Add the directive form of a repetition (`!repeat`)
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.8.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLocation",
40 "LabelsT",
41 "VariablesT",
42 "__author__",
43 "__version__",
44 ]
45
46 import re
47 import abc
48 import ast
49 import sys
50 import enum
51 import math
52 import struct
53 import typing
54 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
55
56
57 # Text location (line and column numbers).
58 class TextLocation:
59 @classmethod
60 def _create(cls, line_no: int, col_no: int):
61 self = cls.__new__(cls)
62 self._init(line_no, col_no)
63 return self
64
65 def __init__(*args, **kwargs): # type: ignore
66 raise NotImplementedError
67
68 def _init(self, line_no: int, col_no: int):
69 self._line_no = line_no
70 self._col_no = col_no
71
72 # Line number.
73 @property
74 def line_no(self):
75 return self._line_no
76
77 # Column number.
78 @property
79 def col_no(self):
80 return self._col_no
81
82 def __repr__(self):
83 return "TextLocation({}, {})".format(self._line_no, self._col_no)
84
85
86 # Any item.
87 class _Item:
88 def __init__(self, text_loc: TextLocation):
89 self._text_loc = text_loc
90
91 # Source text location.
92 @property
93 def text_loc(self):
94 return self._text_loc
95
96
97 # Scalar item.
98 class _ScalarItem(_Item):
99 # Returns the size, in bytes, of this item.
100 @property
101 @abc.abstractmethod
102 def size(self) -> int:
103 ...
104
105
106 # A repeatable item.
107 class _RepableItem:
108 pass
109
110
111 # Single byte.
112 class _Byte(_ScalarItem, _RepableItem):
113 def __init__(self, val: int, text_loc: TextLocation):
114 super().__init__(text_loc)
115 self._val = val
116
117 # Byte value.
118 @property
119 def val(self):
120 return self._val
121
122 @property
123 def size(self):
124 return 1
125
126 def __repr__(self):
127 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
128
129
130 # String.
131 class _Str(_ScalarItem, _RepableItem):
132 def __init__(self, data: bytes, text_loc: TextLocation):
133 super().__init__(text_loc)
134 self._data = data
135
136 # Encoded bytes.
137 @property
138 def data(self):
139 return self._data
140
141 @property
142 def size(self):
143 return len(self._data)
144
145 def __repr__(self):
146 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
147
148
149 # Byte order.
150 @enum.unique
151 class ByteOrder(enum.Enum):
152 # Big endian.
153 BE = "be"
154
155 # Little endian.
156 LE = "le"
157
158
159 # Byte order setting.
160 class _SetBo(_Item):
161 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
162 super().__init__(text_loc)
163 self._bo = bo
164
165 @property
166 def bo(self):
167 return self._bo
168
169 def __repr__(self):
170 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
171
172
173 # Label.
174 class _Label(_Item):
175 def __init__(self, name: str, text_loc: TextLocation):
176 super().__init__(text_loc)
177 self._name = name
178
179 # Label name.
180 @property
181 def name(self):
182 return self._name
183
184 def __repr__(self):
185 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
186
187
188 # Offset setting.
189 class _SetOffset(_Item):
190 def __init__(self, val: int, text_loc: TextLocation):
191 super().__init__(text_loc)
192 self._val = val
193
194 # Offset value (bytes).
195 @property
196 def val(self):
197 return self._val
198
199 def __repr__(self):
200 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
201
202
203 # Offset alignment.
204 class _AlignOffset(_Item):
205 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
206 super().__init__(text_loc)
207 self._val = val
208 self._pad_val = pad_val
209
210 # Alignment value (bits).
211 @property
212 def val(self):
213 return self._val
214
215 # Padding byte value.
216 @property
217 def pad_val(self):
218 return self._pad_val
219
220 def __repr__(self):
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self._val), repr(self._pad_val), repr(self._text_loc)
223 )
224
225
226 # Mixin of containing an AST expression and its string.
227 class _ExprMixin:
228 def __init__(self, expr_str: str, expr: ast.Expression):
229 self._expr_str = expr_str
230 self._expr = expr
231
232 # Expression string.
233 @property
234 def expr_str(self):
235 return self._expr_str
236
237 # Expression node to evaluate.
238 @property
239 def expr(self):
240 return self._expr
241
242
243 # Variable assignment.
244 class _VarAssign(_Item, _ExprMixin):
245 def __init__(
246 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
247 ):
248 super().__init__(text_loc)
249 _ExprMixin.__init__(self, expr_str, expr)
250 self._name = name
251
252 # Name.
253 @property
254 def name(self):
255 return self._name
256
257 def __repr__(self):
258 return "_VarAssign({}, {}, {}, {})".format(
259 repr(self._name),
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._text_loc),
263 )
264
265
266 # Fixed-length number, possibly needing more than one byte.
267 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
268 def __init__(
269 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
270 ):
271 super().__init__(text_loc)
272 _ExprMixin.__init__(self, expr_str, expr)
273 self._len = len
274
275 # Length (bits).
276 @property
277 def len(self):
278 return self._len
279
280 @property
281 def size(self):
282 return self._len // 8
283
284 def __repr__(self):
285 return "_FlNum({}, {}, {}, {})".format(
286 repr(self._expr_str),
287 repr(self._expr),
288 repr(self._len),
289 repr(self._text_loc),
290 )
291
292
293 # LEB128 integer.
294 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
295 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298
299 def __repr__(self):
300 return "{}({}, {}, {})".format(
301 self.__class__.__name__,
302 repr(self._expr_str),
303 repr(self._expr),
304 repr(self._text_loc),
305 )
306
307
308 # Unsigned LEB128 integer.
309 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
310 pass
311
312
313 # Signed LEB128 integer.
314 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
315 pass
316
317
318 # Group of items.
319 class _Group(_Item, _RepableItem):
320 def __init__(self, items: List[_Item], text_loc: TextLocation):
321 super().__init__(text_loc)
322 self._items = items
323
324 # Contained items.
325 @property
326 def items(self):
327 return self._items
328
329 def __repr__(self):
330 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
331
332
333 # Repetition item.
334 class _Rep(_Item, _ExprMixin):
335 def __init__(
336 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
337 ):
338 super().__init__(text_loc)
339 _ExprMixin.__init__(self, expr_str, expr)
340 self._item = item
341
342 # Item to repeat.
343 @property
344 def item(self):
345 return self._item
346
347 def __repr__(self):
348 return "_Rep({}, {}, {}, {})".format(
349 repr(self._item),
350 repr(self._expr_str),
351 repr(self._expr),
352 repr(self._text_loc),
353 )
354
355
356 # Expression item type.
357 _ExprItemT = Union[_FlNum, _Leb128Int, _VarAssign, _Rep]
358
359
360 # A parsing error containing a message and a text location.
361 class ParseError(RuntimeError):
362 @classmethod
363 def _create(cls, msg: str, text_loc: TextLocation):
364 self = cls.__new__(cls)
365 self._init(msg, text_loc)
366 return self
367
368 def __init__(self, *args, **kwargs): # type: ignore
369 raise NotImplementedError
370
371 def _init(self, msg: str, text_loc: TextLocation):
372 super().__init__(msg)
373 self._text_loc = text_loc
374
375 # Source text location.
376 @property
377 def text_loc(self):
378 return self._text_loc
379
380
381 # Raises a parsing error, forwarding the parameters to the constructor.
382 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
383 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
384
385
386 # Variables dictionary type (for type hints).
387 VariablesT = Dict[str, Union[int, float]]
388
389
390 # Labels dictionary type (for type hints).
391 LabelsT = Dict[str, int]
392
393
394 # Python name pattern.
395 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
396
397
398 # Normand parser.
399 #
400 # The constructor accepts a Normand input. After building, use the `res`
401 # property to get the resulting main group.
402 class _Parser:
403 # Builds a parser to parse the Normand input `normand`, parsing
404 # immediately.
405 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
406 self._normand = normand
407 self._at = 0
408 self._line_no = 1
409 self._col_no = 1
410 self._label_names = set(labels.keys())
411 self._var_names = set(variables.keys())
412 self._parse()
413
414 # Result (main group).
415 @property
416 def res(self):
417 return self._res
418
419 # Current text location.
420 @property
421 def _text_loc(self):
422 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
423 self._line_no, self._col_no
424 )
425
426 # Returns `True` if this parser is done parsing.
427 def _is_done(self):
428 return self._at == len(self._normand)
429
430 # Returns `True` if this parser isn't done parsing.
431 def _isnt_done(self):
432 return not self._is_done()
433
434 # Raises a parse error, creating it using the message `msg` and the
435 # current text location.
436 def _raise_error(self, msg: str) -> NoReturn:
437 _raise_error(msg, self._text_loc)
438
439 # Tries to make the pattern `pat` match the current substring,
440 # returning the match object and updating `self._at`,
441 # `self._line_no`, and `self._col_no` on success.
442 def _try_parse_pat(self, pat: Pattern[str]):
443 m = pat.match(self._normand, self._at)
444
445 if m is None:
446 return
447
448 # Skip matched string
449 self._at += len(m.group(0))
450
451 # Update line number
452 self._line_no += m.group(0).count("\n")
453
454 # Update column number
455 for i in reversed(range(self._at)):
456 if self._normand[i] == "\n" or i == 0:
457 if i == 0:
458 self._col_no = self._at + 1
459 else:
460 self._col_no = self._at - i
461
462 break
463
464 # Return match object
465 return m
466
467 # Expects the pattern `pat` to match the current substring,
468 # returning the match object and updating `self._at`,
469 # `self._line_no`, and `self._col_no` on success, or raising a parse
470 # error with the message `error_msg` on error.
471 def _expect_pat(self, pat: Pattern[str], error_msg: str):
472 # Match
473 m = self._try_parse_pat(pat)
474
475 if m is None:
476 # No match: error
477 self._raise_error(error_msg)
478
479 # Return match object
480 return m
481
482 # Pattern for _skip_ws_and_comments()
483 _ws_or_syms_or_comments_pat = re.compile(
484 r"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
485 )
486
487 # Skips as many whitespaces, insignificant symbol characters, and
488 # comments as possible.
489 def _skip_ws_and_comments(self):
490 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
491
492 # Pattern for _try_parse_hex_byte()
493 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
494
495 # Tries to parse a hexadecimal byte, returning a byte item on
496 # success.
497 def _try_parse_hex_byte(self):
498 begin_text_loc = self._text_loc
499
500 # Match initial nibble
501 m_high = self._try_parse_pat(self._nibble_pat)
502
503 if m_high is None:
504 # No match
505 return
506
507 # Expect another nibble
508 self._skip_ws_and_comments()
509 m_low = self._expect_pat(
510 self._nibble_pat, "Expecting another hexadecimal nibble"
511 )
512
513 # Return item
514 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
515
516 # Patterns for _try_parse_bin_byte()
517 _bin_byte_bit_pat = re.compile(r"[01]")
518 _bin_byte_prefix_pat = re.compile(r"%")
519
520 # Tries to parse a binary byte, returning a byte item on success.
521 def _try_parse_bin_byte(self):
522 begin_text_loc = self._text_loc
523
524 # Match prefix
525 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
526 # No match
527 return
528
529 # Expect eight bits
530 bits = [] # type: List[str]
531
532 for _ in range(8):
533 self._skip_ws_and_comments()
534 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
535 bits.append(m.group(0))
536
537 # Return item
538 return _Byte(int("".join(bits), 2), begin_text_loc)
539
540 # Patterns for _try_parse_dec_byte()
541 _dec_byte_prefix_pat = re.compile(r"\$\s*")
542 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
543
544 # Tries to parse a decimal byte, returning a byte item on success.
545 def _try_parse_dec_byte(self):
546 begin_text_loc = self._text_loc
547
548 # Match prefix
549 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
550 # No match
551 return
552
553 # Expect the value
554 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
555
556 # Compute value
557 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
558
559 # Validate
560 if val < -128 or val > 255:
561 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
562
563 # Two's complement
564 val %= 256
565
566 # Return item
567 return _Byte(val, begin_text_loc)
568
569 # Tries to parse a byte, returning a byte item on success.
570 def _try_parse_byte(self):
571 # Hexadecimal
572 item = self._try_parse_hex_byte()
573
574 if item is not None:
575 return item
576
577 # Binary
578 item = self._try_parse_bin_byte()
579
580 if item is not None:
581 return item
582
583 # Decimal
584 item = self._try_parse_dec_byte()
585
586 if item is not None:
587 return item
588
589 # Patterns for _try_parse_str()
590 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
591 _str_suffix_pat = re.compile(r'"')
592 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
593
594 # Strings corresponding to escape sequence characters
595 _str_escape_seq_strs = {
596 "0": "\0",
597 "a": "\a",
598 "b": "\b",
599 "e": "\x1b",
600 "f": "\f",
601 "n": "\n",
602 "r": "\r",
603 "t": "\t",
604 "v": "\v",
605 "\\": "\\",
606 '"': '"',
607 }
608
609 # Tries to parse a string, returning a string item on success.
610 def _try_parse_str(self):
611 begin_text_loc = self._text_loc
612
613 # Match prefix
614 m = self._try_parse_pat(self._str_prefix_pat)
615
616 if m is None:
617 # No match
618 return
619
620 # Get encoding
621 encoding = "utf8"
622
623 if m.group("len") is not None:
624 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
625
626 # Actual string
627 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
628
629 # Expect end of string
630 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
631
632 # Replace escape sequences
633 val = m.group(0)
634
635 for ec in '0abefnrtv"\\':
636 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
637
638 # Encode
639 data = val.encode(encoding)
640
641 # Return item
642 return _Str(data, begin_text_loc)
643
644 # Patterns for _try_parse_group()
645 _group_prefix_pat = re.compile(r"\(")
646 _group_suffix_pat = re.compile(r"\)")
647
648 # Tries to parse a group, returning a group item on success.
649 def _try_parse_group(self):
650 begin_text_loc = self._text_loc
651
652 # Match prefix
653 if self._try_parse_pat(self._group_prefix_pat) is None:
654 # No match
655 return
656
657 # Parse items
658 items = self._parse_items()
659
660 # Expect end of group
661 self._skip_ws_and_comments()
662 self._expect_pat(
663 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
664 )
665
666 # Return item
667 return _Group(items, begin_text_loc)
668
669 # Returns a stripped expression string and an AST expression node
670 # from the expression string `expr_str` at text location `text_loc`.
671 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
672 # Create an expression node from the expression string
673 expr_str = expr_str.strip().replace("\n", " ")
674
675 try:
676 expr = ast.parse(expr_str, mode="eval")
677 except SyntaxError:
678 _raise_error(
679 "Invalid expression `{}`: invalid syntax".format(expr_str),
680 text_loc,
681 )
682
683 return expr_str, expr
684
685 # Patterns for _try_parse_num_and_attr()
686 _val_expr_pat = re.compile(r"([^}:]+):\s*")
687 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
688 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
689
690 # Tries to parse a value and attribute (fixed length in bits or
691 # `leb128`), returning a value item on success.
692 def _try_parse_num_and_attr(self):
693 begin_text_loc = self._text_loc
694
695 # Match
696 m_expr = self._try_parse_pat(self._val_expr_pat)
697
698 if m_expr is None:
699 # No match
700 return
701
702 # Create an expression node from the expression string
703 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
704
705 # Length?
706 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
707
708 if m_attr is None:
709 # LEB128?
710 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
711
712 if m_attr is None:
713 # At this point it's invalid
714 self._raise_error(
715 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
716 )
717
718 # Return LEB128 integer item
719 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
720 return cls(expr_str, expr, begin_text_loc)
721 else:
722 # Return fixed-length number item
723 return _FlNum(
724 expr_str,
725 expr,
726 int(m_attr.group(0)),
727 begin_text_loc,
728 )
729
730 # Patterns for _try_parse_num_and_attr()
731 _var_assign_pat = re.compile(
732 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
733 )
734
735 # Tries to parse a variable assignment, returning a variable
736 # assignment item on success.
737 def _try_parse_var_assign(self):
738 begin_text_loc = self._text_loc
739
740 # Match
741 m = self._try_parse_pat(self._var_assign_pat)
742
743 if m is None:
744 # No match
745 return
746
747 # Validate name
748 name = m.group("name")
749
750 if name == _icitte_name:
751 _raise_error(
752 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
753 )
754
755 if name in self._label_names:
756 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
757
758 # Add to known variable names
759 self._var_names.add(name)
760
761 # Create an expression node from the expression string
762 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
763
764 # Return item
765 return _VarAssign(
766 name,
767 expr_str,
768 expr,
769 begin_text_loc,
770 )
771
772 # Pattern for _try_parse_set_bo()
773 _bo_pat = re.compile(r"[bl]e")
774
775 # Tries to parse a byte order name, returning a byte order setting
776 # item on success.
777 def _try_parse_set_bo(self):
778 begin_text_loc = self._text_loc
779
780 # Match
781 m = self._try_parse_pat(self._bo_pat)
782
783 if m is None:
784 # No match
785 return
786
787 # Return corresponding item
788 if m.group(0) == "be":
789 return _SetBo(ByteOrder.BE, begin_text_loc)
790 else:
791 assert m.group(0) == "le"
792 return _SetBo(ByteOrder.LE, begin_text_loc)
793
794 # Patterns for _try_parse_val_or_bo()
795 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
796 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
797
798 # Tries to parse a value, a variable assignment, or a byte order
799 # setting, returning an item on success.
800 def _try_parse_val_or_var_assign_or_set_bo(self):
801 # Match prefix
802 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
803 # No match
804 return
805
806 # Variable assignment item?
807 item = self._try_parse_var_assign()
808
809 if item is None:
810 # Number item?
811 item = self._try_parse_num_and_attr()
812
813 if item is None:
814 # Byte order setting item?
815 item = self._try_parse_set_bo()
816
817 if item is None:
818 # At this point it's invalid
819 self._raise_error(
820 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
821 )
822
823 # Expect suffix
824 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
825 return item
826
827 # Common positive constant integer pattern
828 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
829
830 # Tries to parse an offset setting value (after the initial `<`),
831 # returning an offset item on success.
832 def _try_parse_set_offset_val(self):
833 begin_text_loc = self._text_loc
834
835 # Match
836 m = self._try_parse_pat(self._pos_const_int_pat)
837
838 if m is None:
839 # No match
840 return
841
842 # Return item
843 return _SetOffset(int(m.group(0), 0), begin_text_loc)
844
845 # Tries to parse a label name (after the initial `<`), returning a
846 # label item on success.
847 def _try_parse_label_name(self):
848 begin_text_loc = self._text_loc
849
850 # Match
851 m = self._try_parse_pat(_py_name_pat)
852
853 if m is None:
854 # No match
855 return
856
857 # Validate
858 name = m.group(0)
859
860 if name == _icitte_name:
861 _raise_error(
862 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
863 )
864
865 if name in self._label_names:
866 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
867
868 if name in self._var_names:
869 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
870
871 # Add to known label names
872 self._label_names.add(name)
873
874 # Return item
875 return _Label(name, begin_text_loc)
876
877 # Patterns for _try_parse_label_or_set_offset()
878 _label_set_offset_prefix_pat = re.compile(r"<\s*")
879 _label_set_offset_suffix_pat = re.compile(r"\s*>")
880
881 # Tries to parse a label or an offset setting, returning an item on
882 # success.
883 def _try_parse_label_or_set_offset(self):
884 # Match prefix
885 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
886 # No match
887 return
888
889 # Offset setting item?
890 item = self._try_parse_set_offset_val()
891
892 if item is None:
893 # Label item?
894 item = self._try_parse_label_name()
895
896 if item is None:
897 # At this point it's invalid
898 self._raise_error("Expecting a label name or an offset setting value")
899
900 # Expect suffix
901 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
902 return item
903
904 # Patterns for _try_parse_align_offset()
905 _align_offset_prefix_pat = re.compile(r"@\s*")
906 _align_offset_val_pat = re.compile(r"(\d+)\s*")
907 _align_offset_pad_val_prefix_pat = re.compile(r"~\s*")
908
909 # Tries to parse an offset alignment, returning an offset alignment
910 # item on success.
911 def _try_parse_align_offset(self):
912 begin_text_loc = self._text_loc
913
914 # Match prefix
915 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
916 # No match
917 return
918
919 align_text_loc = self._text_loc
920 m = self._expect_pat(
921 self._align_offset_val_pat,
922 "Expecting an alignment (positive multiple of eight bits)",
923 )
924
925 # Validate alignment
926 val = int(m.group(1))
927
928 if val <= 0 or (val % 8) != 0:
929 _raise_error(
930 "Invalid alignment value {} (not a positive multiple of eight)".format(
931 val
932 ),
933 align_text_loc,
934 )
935
936 # Padding value?
937 pad_val = 0
938
939 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
940 pad_val_text_loc = self._text_loc
941 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
942
943 # Validate
944 pad_val = int(m.group(0), 0)
945
946 if pad_val > 255:
947 _raise_error(
948 "Invalid padding byte value {}".format(pad_val),
949 pad_val_text_loc,
950 )
951
952 # Return item
953 return _AlignOffset(val, pad_val, begin_text_loc)
954
955 # Patterns for _expect_rep_mul_expr()
956 _rep_expr_prefix_pat = re.compile(r"\{")
957 _rep_expr_pat = re.compile(r"[^}p]+")
958 _rep_expr_suffix_pat = re.compile(r"\}")
959
960 # Parses the multiplier expression of a repetition (block or
961 # post-item) and returns the expression string and AST node.
962 def _expect_rep_mul_expr(self):
963 expr_text_loc = self._text_loc
964
965 # Constant integer?
966 m = self._try_parse_pat(self._pos_const_int_pat)
967
968 if m is None:
969 # Name?
970 m = self._try_parse_pat(_py_name_pat)
971
972 if m is None:
973 # Expression?
974 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
975 # At this point it's invalid
976 self._raise_error(
977 "Expecting a positive integral multiplier, a name, or `{`"
978 )
979
980 # Expect an expression
981 expr_text_loc = self._text_loc
982 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
983 expr_str = m.group(0)
984
985 # Expect `}`
986 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
987 else:
988 expr_str = m.group(0)
989 else:
990 expr_str = m.group(0)
991
992 return self._ast_expr_from_str(expr_str, expr_text_loc)
993
994 # Pattern for _try_parse_rep_block()
995 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b\s*")
996 _rep_block_end_pat = re.compile(r"!end\b\s*")
997
998 # Tries to parse a repetition block, returning a repetition item on
999 # success.
1000 def _try_parse_rep_block(self):
1001 begin_text_loc = self._text_loc
1002
1003 # Match prefix
1004 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1005 # No match
1006 return
1007
1008 # Expect expression
1009 self._skip_ws_and_comments()
1010 expr_str, expr = self._expect_rep_mul_expr()
1011
1012 # Parse items
1013 self._skip_ws_and_comments()
1014 items_text_loc = self._text_loc
1015 items = self._parse_items()
1016
1017 # Expect end of block
1018 self._skip_ws_and_comments()
1019 self._expect_pat(
1020 self._rep_block_end_pat, "Expecting an item or `!end` (end of repetition)"
1021 )
1022
1023 # Return item
1024 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1025
1026 # Tries to parse a base item (anything except a repetition),
1027 # returning it on success.
1028 def _try_parse_base_item(self):
1029 # Byte item?
1030 item = self._try_parse_byte()
1031
1032 if item is not None:
1033 return item
1034
1035 # String item?
1036 item = self._try_parse_str()
1037
1038 if item is not None:
1039 return item
1040
1041 # Value, variable assignment, or byte order setting item?
1042 item = self._try_parse_val_or_var_assign_or_set_bo()
1043
1044 if item is not None:
1045 return item
1046
1047 # Label or offset setting item?
1048 item = self._try_parse_label_or_set_offset()
1049
1050 if item is not None:
1051 return item
1052
1053 # Offset alignment item?
1054 item = self._try_parse_align_offset()
1055
1056 if item is not None:
1057 return item
1058
1059 # Group item?
1060 item = self._try_parse_group()
1061
1062 if item is not None:
1063 return item
1064
1065 # Repetition (block) item?
1066 item = self._try_parse_rep_block()
1067
1068 if item is not None:
1069 return item
1070
1071 # Pattern for _try_parse_rep_post()
1072 _rep_post_prefix_pat = re.compile(r"\*")
1073
1074 # Tries to parse a post-item repetition, returning the expression
1075 # string and AST expression node on success.
1076 def _try_parse_rep_post(self):
1077 # Match prefix
1078 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1079 # No match
1080 return
1081
1082 # Return expression string and AST expression
1083 self._skip_ws_and_comments()
1084 return self._expect_rep_mul_expr()
1085
1086 # Tries to parse an item, possibly followed by a repetition,
1087 # returning `True` on success.
1088 #
1089 # Appends any parsed item to `items`.
1090 def _try_append_item(self, items: List[_Item]):
1091 self._skip_ws_and_comments()
1092
1093 # Parse a base item
1094 item = self._try_parse_base_item()
1095
1096 if item is None:
1097 # No item
1098 return False
1099
1100 # Parse repetition if the base item is repeatable
1101 if isinstance(item, _RepableItem):
1102 self._skip_ws_and_comments()
1103 rep_text_loc = self._text_loc
1104 rep_ret = self._try_parse_rep_post()
1105
1106 if rep_ret is not None:
1107 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
1108
1109 items.append(item)
1110 return True
1111
1112 # Parses and returns items, skipping whitespaces, insignificant
1113 # symbols, and comments when allowed, and stopping at the first
1114 # unknown character.
1115 def _parse_items(self) -> List[_Item]:
1116 items = [] # type: List[_Item]
1117
1118 while self._isnt_done():
1119 # Try to append item
1120 if not self._try_append_item(items):
1121 # Unknown at this point
1122 break
1123
1124 return items
1125
1126 # Parses the whole Normand input, setting `self._res` to the main
1127 # group item on success.
1128 def _parse(self):
1129 if len(self._normand.strip()) == 0:
1130 # Special case to make sure there's something to consume
1131 self._res = _Group([], self._text_loc)
1132 return
1133
1134 # Parse first level items
1135 items = self._parse_items()
1136
1137 # Make sure there's nothing left
1138 self._skip_ws_and_comments()
1139
1140 if self._isnt_done():
1141 self._raise_error(
1142 "Unexpected character `{}`".format(self._normand[self._at])
1143 )
1144
1145 # Set main group item
1146 self._res = _Group(items, self._text_loc)
1147
1148
1149 # The return type of parse().
1150 class ParseResult:
1151 @classmethod
1152 def _create(
1153 cls,
1154 data: bytearray,
1155 variables: VariablesT,
1156 labels: LabelsT,
1157 offset: int,
1158 bo: Optional[ByteOrder],
1159 ):
1160 self = cls.__new__(cls)
1161 self._init(data, variables, labels, offset, bo)
1162 return self
1163
1164 def __init__(self, *args, **kwargs): # type: ignore
1165 raise NotImplementedError
1166
1167 def _init(
1168 self,
1169 data: bytearray,
1170 variables: VariablesT,
1171 labels: LabelsT,
1172 offset: int,
1173 bo: Optional[ByteOrder],
1174 ):
1175 self._data = data
1176 self._vars = variables
1177 self._labels = labels
1178 self._offset = offset
1179 self._bo = bo
1180
1181 # Generated data.
1182 @property
1183 def data(self):
1184 return self._data
1185
1186 # Dictionary of updated variable names to their last computed value.
1187 @property
1188 def variables(self):
1189 return self._vars
1190
1191 # Dictionary of updated main group label names to their computed
1192 # value.
1193 @property
1194 def labels(self):
1195 return self._labels
1196
1197 # Updated offset.
1198 @property
1199 def offset(self):
1200 return self._offset
1201
1202 # Updated byte order.
1203 @property
1204 def byte_order(self):
1205 return self._bo
1206
1207
1208 # Raises a parse error for the item `item`, creating it using the
1209 # message `msg`.
1210 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1211 _raise_error(msg, item.text_loc)
1212
1213
1214 # The `ICITTE` reserved name.
1215 _icitte_name = "ICITTE"
1216
1217
1218 # Base node visitor.
1219 #
1220 # Calls the _visit_name() method for each name node which isn't the name
1221 # of a call.
1222 class _NodeVisitor(ast.NodeVisitor):
1223 def __init__(self):
1224 self._parent_is_call = False
1225
1226 def generic_visit(self, node: ast.AST):
1227 if type(node) is ast.Call:
1228 self._parent_is_call = True
1229 elif type(node) is ast.Name and not self._parent_is_call:
1230 self._visit_name(node.id)
1231
1232 super().generic_visit(node)
1233 self._parent_is_call = False
1234
1235 @abc.abstractmethod
1236 def _visit_name(self, name: str):
1237 ...
1238
1239
1240 # Expression validator: validates that all the names within the
1241 # expression are allowed.
1242 class _ExprValidator(_NodeVisitor):
1243 def __init__(self, item: _ExprItemT, allowed_names: Set[str]):
1244 super().__init__()
1245 self._item = item
1246 self._allowed_names = allowed_names
1247
1248 def _visit_name(self, name: str):
1249 # Make sure the name refers to a known and reachable
1250 # variable/label name.
1251 if name != _icitte_name and name not in self._allowed_names:
1252 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1253 name, self._item.expr_str
1254 )
1255
1256 allowed_names = self._allowed_names.copy()
1257 allowed_names.add(_icitte_name)
1258
1259 if len(allowed_names) > 0:
1260 allowed_names_str = ", ".join(
1261 sorted(["`{}`".format(name) for name in allowed_names])
1262 )
1263 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1264
1265 _raise_error(
1266 msg,
1267 self._item.text_loc,
1268 )
1269
1270
1271 # Expression visitor getting all the contained names.
1272 class _ExprNamesVisitor(_NodeVisitor):
1273 def __init__(self):
1274 self._parent_is_call = False
1275 self._names = set() # type: Set[str]
1276
1277 @property
1278 def names(self):
1279 return self._names
1280
1281 def _visit_name(self, name: str):
1282 self._names.add(name)
1283
1284
1285 # Generator state.
1286 class _GenState:
1287 def __init__(
1288 self,
1289 variables: VariablesT,
1290 labels: LabelsT,
1291 offset: int,
1292 bo: Optional[ByteOrder],
1293 ):
1294 self.variables = variables.copy()
1295 self.labels = labels.copy()
1296 self.offset = offset
1297 self.bo = bo
1298
1299
1300 # Generator of data and final state from a group item.
1301 #
1302 # Generation happens in memory at construction time. After building, use
1303 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1304 # get the resulting context.
1305 #
1306 # The steps of generation are:
1307 #
1308 # 1. Validate that each repetition and LEB128 integer expression uses
1309 # only reachable names.
1310 #
1311 # 2. Compute and keep the effective repetition count and LEB128 integer
1312 # value for each repetition and LEB128 integer instance.
1313 #
1314 # 3. Generate bytes, updating the initial state as it goes which becomes
1315 # the final state after the operation.
1316 #
1317 # During the generation, when handling a `_Rep` or `_Leb128Int` item,
1318 # we already have the effective repetition count or value of the
1319 # instance.
1320 #
1321 # When handling a `_Group` item, first update the current labels with
1322 # all the immediate (not nested) labels, and then handle each
1323 # contained item. This gives contained item access to "future" outer
1324 # labels. Then remove the immediate labels from the state so that
1325 # outer items don't have access to inner labels.
1326 class _Gen:
1327 def __init__(
1328 self,
1329 group: _Group,
1330 variables: VariablesT,
1331 labels: LabelsT,
1332 offset: int,
1333 bo: Optional[ByteOrder],
1334 ):
1335 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1336 self._vl_instance_vals = self._compute_vl_instance_vals(
1337 group, _GenState(variables, labels, offset, bo)
1338 )
1339 self._gen(group, _GenState(variables, labels, offset, bo))
1340
1341 # Generated bytes.
1342 @property
1343 def data(self):
1344 return self._data
1345
1346 # Updated variables.
1347 @property
1348 def variables(self):
1349 return self._final_state.variables
1350
1351 # Updated main group labels.
1352 @property
1353 def labels(self):
1354 return self._final_state.labels
1355
1356 # Updated offset.
1357 @property
1358 def offset(self):
1359 return self._final_state.offset
1360
1361 # Updated byte order.
1362 @property
1363 def bo(self):
1364 return self._final_state.bo
1365
1366 # Returns the set of used, non-called names within the AST
1367 # expression `expr`.
1368 @staticmethod
1369 def _names_of_expr(expr: ast.Expression):
1370 visitor = _ExprNamesVisitor()
1371 visitor.visit(expr)
1372 return visitor.names
1373
1374 # Validates that all the repetition and LEB128 integer expressions
1375 # within `group` don't refer, directly or indirectly, to subsequent
1376 # labels.
1377 #
1378 # The strategy here is to keep a set of allowed label names, per
1379 # group, initialized to `allowed_label_names`, and a set of allowed
1380 # variable names initialized to `allowed_variable_names`.
1381 #
1382 # Then, depending on the type of `item`:
1383 #
1384 # `_Label`:
1385 # Add its name to the local allowed label names: a label
1386 # occurring before a repetition, and not within a nested group,
1387 # is always reachable.
1388 #
1389 # `_VarAssign`:
1390 # If all the names within its expression are allowed, then add
1391 # its name to the allowed variable names.
1392 #
1393 # Otherwise, remove its name from the allowed variable names (if
1394 # it's in there): a variable which refers to an unreachable name
1395 # is unreachable itself.
1396 #
1397 # `_Rep` and `_Leb128`:
1398 # Make sure all the names within its expression are allowed.
1399 #
1400 # `_Group`:
1401 # Call this function for each contained item with a _copy_ of
1402 # the current allowed label names and the same current allowed
1403 # variable names.
1404 @staticmethod
1405 def _validate_vl_exprs(
1406 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1407 ):
1408 if type(item) is _Label:
1409 allowed_label_names.add(item.name)
1410 elif type(item) is _VarAssign:
1411 # Check if this variable name is allowed
1412 allowed = True
1413
1414 for name in _Gen._names_of_expr(item.expr):
1415 if name not in (
1416 allowed_label_names | allowed_variable_names | {_icitte_name}
1417 ):
1418 # Not allowed
1419 allowed = False
1420 break
1421
1422 if allowed:
1423 allowed_variable_names.add(item.name)
1424 elif item.name in allowed_variable_names:
1425 allowed_variable_names.remove(item.name)
1426 elif isinstance(item, _Leb128Int):
1427 # Validate the expression
1428 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1429 item.expr
1430 )
1431 elif type(item) is _Rep:
1432 # Validate the expression first
1433 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1434 item.expr
1435 )
1436
1437 # Validate inner item
1438 _Gen._validate_vl_exprs(
1439 item.item, allowed_variable_names, allowed_label_names
1440 )
1441 elif type(item) is _Group:
1442 # Copy `allowed_label_names` so that this frame cannot
1443 # access the nested label names.
1444 group_allowed_label_names = allowed_label_names.copy()
1445
1446 for subitem in item.items:
1447 _Gen._validate_vl_exprs(
1448 subitem, allowed_variable_names, group_allowed_label_names
1449 )
1450
1451 # Evaluates the expression of `item` considering the current
1452 # generation state `state`.
1453 #
1454 # If `allow_float` is `True`, then the type of the result may be
1455 # `float` too.
1456 @staticmethod
1457 def _eval_item_expr(
1458 item: _ExprItemT,
1459 state: _GenState,
1460 allow_float: bool = False,
1461 ):
1462 syms = {} # type: VariablesT
1463 syms.update(state.labels)
1464
1465 # Set the `ICITTE` name to the current offset
1466 syms[_icitte_name] = state.offset
1467
1468 # Add the current variables
1469 syms.update(state.variables)
1470
1471 # Validate the node and its children
1472 _ExprValidator(item, set(syms.keys())).visit(item.expr)
1473
1474 # Compile and evaluate expression node
1475 try:
1476 val = eval(compile(item.expr, "", "eval"), None, syms)
1477 except Exception as exc:
1478 _raise_error_for_item(
1479 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1480 item,
1481 )
1482
1483 # Validate result type
1484 expected_types = {int} # type: Set[type]
1485 type_msg = "`int`"
1486
1487 if allow_float:
1488 expected_types.add(float)
1489 type_msg += " or `float`"
1490
1491 if type(val) not in expected_types:
1492 _raise_error_for_item(
1493 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1494 item.expr_str, type_msg, type(val).__name__
1495 ),
1496 item,
1497 )
1498
1499 return val
1500
1501 # Returns the size, in bytes, required to encode the value `val`
1502 # with LEB128 (signed version if `is_signed` is `True`).
1503 @staticmethod
1504 def _leb128_size_for_val(val: int, is_signed: bool):
1505 if val < 0:
1506 # Equivalent upper bound.
1507 #
1508 # For example, if `val` is -128, then the full integer for
1509 # this number of bits would be [-128, 127].
1510 val = -val - 1
1511
1512 # Number of bits (add one for the sign if needed)
1513 bits = val.bit_length() + int(is_signed)
1514
1515 if bits == 0:
1516 bits = 1
1517
1518 # Seven bits per byte
1519 return math.ceil(bits / 7)
1520
1521 # Returns the offset `offset` aligned according to `item`.
1522 @staticmethod
1523 def _align_offset(offset: int, item: _AlignOffset):
1524 align_bytes = item.val // 8
1525 return (offset + align_bytes - 1) // align_bytes * align_bytes
1526
1527 # Computes the effective value for each repetition and LEB128
1528 # integer instance, filling `instance_vals` (if not `None`) and
1529 # returning `instance_vals`.
1530 #
1531 # At this point it must be known that, for a given variable-length
1532 # item, its expression only contains reachable names.
1533 #
1534 # When handling a `_Rep` item, this function appends its effective
1535 # multiplier to `instance_vals` _before_ handling its repeated item.
1536 #
1537 # When handling a `_VarAssign` item, this function only evaluates it
1538 # if all its names are reachable.
1539 @staticmethod
1540 def _compute_vl_instance_vals(
1541 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1542 ):
1543 if instance_vals is None:
1544 instance_vals = []
1545
1546 if isinstance(item, _ScalarItem):
1547 state.offset += item.size
1548 elif type(item) is _Label:
1549 state.labels[item.name] = state.offset
1550 elif type(item) is _VarAssign:
1551 # Check if all the names are reachable
1552 do_eval = True
1553
1554 for name in _Gen._names_of_expr(item.expr):
1555 if (
1556 name != _icitte_name
1557 and name not in state.variables
1558 and name not in state.labels
1559 ):
1560 # A name is unknown: cannot evaluate
1561 do_eval = False
1562 break
1563
1564 if do_eval:
1565 # Evaluate the expression and keep the result
1566 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1567 elif type(item) is _SetOffset:
1568 state.offset = item.val
1569 elif type(item) is _AlignOffset:
1570 state.offset = _Gen._align_offset(state.offset, item)
1571 elif isinstance(item, _Leb128Int):
1572 # Evaluate the expression
1573 val = _Gen._eval_item_expr(item, state)
1574
1575 # Validate result
1576 if type(item) is _ULeb128Int and val < 0:
1577 _raise_error_for_item(
1578 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1579 item.expr_str, val
1580 ),
1581 item,
1582 )
1583
1584 # Add the evaluation result to the to variable-length item
1585 # instance values.
1586 instance_vals.append(val)
1587
1588 # Update offset
1589 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
1590 elif type(item) is _Rep:
1591 # Evaluate the expression and keep the result
1592 val = _Gen._eval_item_expr(item, state)
1593
1594 # Validate result
1595 if val < 0:
1596 _raise_error_for_item(
1597 "Invalid expression `{}`: unexpected negative result {:,}".format(
1598 item.expr_str, val
1599 ),
1600 item,
1601 )
1602
1603 # Add to repetition instance values
1604 instance_vals.append(val)
1605
1606 # Process the repeated item `val` times
1607 for _ in range(val):
1608 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1609 elif type(item) is _Group:
1610 prev_labels = state.labels.copy()
1611
1612 # Process each item
1613 for subitem in item.items:
1614 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
1615
1616 state.labels = prev_labels
1617
1618 return instance_vals
1619
1620 def _update_offset_noop(self, item: _Item, state: _GenState, next_vl_instance: int):
1621 return next_vl_instance
1622
1623 def _dry_handle_scalar_item(
1624 self, item: _ScalarItem, state: _GenState, next_vl_instance: int
1625 ):
1626 state.offset += item.size
1627 return next_vl_instance
1628
1629 def _dry_handle_leb128_int_item(
1630 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1631 ):
1632 # Get the value from `self._vl_instance_vals` _before_
1633 # incrementing `next_vl_instance` to honor the order of
1634 # _compute_vl_instance_vals().
1635 state.offset += self._leb128_size_for_val(
1636 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1637 )
1638
1639 return next_vl_instance + 1
1640
1641 def _dry_handle_group_item(
1642 self, item: _Group, state: _GenState, next_vl_instance: int
1643 ):
1644 for subitem in item.items:
1645 next_vl_instance = self._dry_handle_item(subitem, state, next_vl_instance)
1646
1647 return next_vl_instance
1648
1649 def _dry_handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1650 # Get the value from `self._vl_instance_vals` _before_
1651 # incrementing `next_vl_instance` to honor the order of
1652 # _compute_vl_instance_vals().
1653 mul = self._vl_instance_vals[next_vl_instance]
1654 next_vl_instance += 1
1655
1656 for _ in range(mul):
1657 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1658
1659 return next_vl_instance
1660
1661 def _dry_handle_align_offset_item(
1662 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1663 ):
1664 state.offset = self._align_offset(state.offset, item)
1665 return next_vl_instance
1666
1667 def _dry_handle_set_offset_item(
1668 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1669 ):
1670 state.offset = item.val
1671 return next_vl_instance
1672
1673 # Updates `state.offset` considering the generated data of `item`,
1674 # without generating any, and returns the updated next
1675 # variable-length item instance.
1676 def _dry_handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1677 return self._dry_handle_item_funcs[type(item)](item, state, next_vl_instance)
1678
1679 # Handles the byte item `item`.
1680 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
1681 self._data.append(item.val)
1682 state.offset += item.size
1683 return next_vl_instance
1684
1685 # Handles the string item `item`.
1686 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
1687 self._data += item.data
1688 state.offset += item.size
1689 return next_vl_instance
1690
1691 # Handles the byte order setting item `item`.
1692 def _handle_set_bo_item(
1693 self, item: _SetBo, state: _GenState, next_vl_instance: int
1694 ):
1695 # Update current byte order
1696 state.bo = item.bo
1697 return next_vl_instance
1698
1699 # Handles the variable assignment item `item`.
1700 def _handle_var_assign_item(
1701 self, item: _VarAssign, state: _GenState, next_vl_instance: int
1702 ):
1703 # Update variable
1704 state.variables[item.name] = self._eval_item_expr(item, state, True)
1705 return next_vl_instance
1706
1707 # Handles the fixed-length integer item `item`.
1708 def _handle_fl_int_item(self, val: int, item: _FlNum, state: _GenState):
1709 # Validate range
1710 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1711 _raise_error_for_item(
1712 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1713 val, item.len, item.expr_str, state.offset
1714 ),
1715 item,
1716 )
1717
1718 # Encode result on 64 bits (to extend the sign bit whatever the
1719 # value of `item.len`).
1720 data = struct.pack(
1721 "{}{}".format(
1722 ">" if state.bo in (None, ByteOrder.BE) else "<",
1723 "Q" if val >= 0 else "q",
1724 ),
1725 val,
1726 )
1727
1728 # Keep only the requested length
1729 len_bytes = item.len // 8
1730
1731 if state.bo in (None, ByteOrder.BE):
1732 # Big endian: keep last bytes
1733 data = data[-len_bytes:]
1734 else:
1735 # Little endian: keep first bytes
1736 assert state.bo == ByteOrder.LE
1737 data = data[:len_bytes]
1738
1739 # Append to current bytes and update offset
1740 self._data += data
1741
1742 # Handles the fixed-length integer item `item`.
1743 def _handle_fl_float_item(self, val: float, item: _FlNum, state: _GenState):
1744 # Validate length
1745 if item.len not in (32, 64):
1746 _raise_error_for_item(
1747 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1748 item.len, val
1749 ),
1750 item,
1751 )
1752
1753 # Encode result
1754 self._data += struct.pack(
1755 "{}{}".format(
1756 ">" if state.bo in (None, ByteOrder.BE) else "<",
1757 "f" if item.len == 32 else "d",
1758 ),
1759 val,
1760 )
1761
1762 # Handles the fixed-length number item `item`.
1763 def _handle_fl_num_item(
1764 self, item: _FlNum, state: _GenState, next_vl_instance: int
1765 ):
1766 # Compute value
1767 val = self._eval_item_expr(item, state, True)
1768
1769 # Validate current byte order
1770 if state.bo is None and item.len > 8:
1771 _raise_error_for_item(
1772 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1773 item.expr_str
1774 ),
1775 item,
1776 )
1777
1778 # Handle depending on type
1779 if type(val) is int:
1780 self._handle_fl_int_item(val, item, state)
1781 else:
1782 assert type(val) is float
1783 self._handle_fl_float_item(val, item, state)
1784
1785 # Update offset
1786 state.offset += item.size
1787
1788 return next_vl_instance
1789
1790 # Handles the LEB128 integer item `item`.
1791 def _handle_leb128_int_item(
1792 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1793 ):
1794 # Get the precomputed value
1795 val = self._vl_instance_vals[next_vl_instance]
1796
1797 # Size in bytes
1798 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1799
1800 # For each byte
1801 for _ in range(size):
1802 # Seven LSBs, MSB of the byte set (continue)
1803 self._data.append((val & 0x7F) | 0x80)
1804 val >>= 7
1805
1806 # Clear MSB of last byte (stop)
1807 self._data[-1] &= ~0x80
1808
1809 # Consumed this instance
1810 return next_vl_instance + 1
1811
1812 # Handles the group item `item`, only removing the immediate labels
1813 # from `state.labels` if `remove_immediate_labels` is `True`.
1814 def _handle_group_item(
1815 self,
1816 item: _Group,
1817 state: _GenState,
1818 next_vl_instance: int,
1819 remove_immediate_labels: bool = True,
1820 ):
1821 # Compute the values of the immediate (not nested) labels. Those
1822 # labels are reachable by any expression within the group.
1823 tmp_state = _GenState({}, {}, state.offset, None)
1824 immediate_label_names = set() # type: Set[str]
1825 tmp_next_vl_instance = next_vl_instance
1826
1827 for subitem in item.items:
1828 if type(subitem) is _Label:
1829 # New immediate label
1830 state.labels[subitem.name] = tmp_state.offset
1831 immediate_label_names.add(subitem.name)
1832
1833 tmp_next_vl_instance = self._dry_handle_item(
1834 subitem, tmp_state, tmp_next_vl_instance
1835 )
1836
1837 # Handle each item now with the actual state
1838 for subitem in item.items:
1839 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
1840
1841 # Remove immediate labels if required so that outer items won't
1842 # reach inner labels.
1843 if remove_immediate_labels:
1844 for name in immediate_label_names:
1845 del state.labels[name]
1846
1847 return next_vl_instance
1848
1849 # Handles the repetition item `item`.
1850 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1851 # Get the precomputed repetition count
1852 mul = self._vl_instance_vals[next_vl_instance]
1853
1854 # Consumed this instance
1855 next_vl_instance += 1
1856
1857 for _ in range(mul):
1858 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1859
1860 return next_vl_instance
1861
1862 # Handles the offset setting item `item`.
1863 def _handle_set_offset_item(
1864 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1865 ):
1866 state.offset = item.val
1867 return next_vl_instance
1868
1869 # Handles offset alignment item `item` (adds padding).
1870 def _handle_align_offset_item(
1871 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1872 ):
1873 init_offset = state.offset
1874 state.offset = self._align_offset(state.offset, item)
1875 self._data += bytes([item.pad_val] * (state.offset - init_offset))
1876 return next_vl_instance
1877
1878 # Handles the label item `item`.
1879 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1880 return next_vl_instance
1881
1882 # Handles the item `item`, returning the updated next repetition
1883 # instance.
1884 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1885 return self._item_handlers[type(item)](item, state, next_vl_instance)
1886
1887 # Generates the data (`self._data`) and final state
1888 # (`self._final_state`) from `group` and the initial state `state`.
1889 def _gen(self, group: _Group, state: _GenState):
1890 # Initial state
1891 self._data = bytearray()
1892
1893 # Item handlers
1894 self._item_handlers = {
1895 _AlignOffset: self._handle_align_offset_item,
1896 _Byte: self._handle_byte_item,
1897 _FlNum: self._handle_fl_num_item,
1898 _Group: self._handle_group_item,
1899 _Label: self._handle_label_item,
1900 _Rep: self._handle_rep_item,
1901 _SetBo: self._handle_set_bo_item,
1902 _SetOffset: self._handle_set_offset_item,
1903 _SLeb128Int: self._handle_leb128_int_item,
1904 _Str: self._handle_str_item,
1905 _ULeb128Int: self._handle_leb128_int_item,
1906 _VarAssign: self._handle_var_assign_item,
1907 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1908
1909 # Dry item handlers (only updates the state offset)
1910 self._dry_handle_item_funcs = {
1911 _AlignOffset: self._dry_handle_align_offset_item,
1912 _Byte: self._dry_handle_scalar_item,
1913 _FlNum: self._dry_handle_scalar_item,
1914 _Group: self._dry_handle_group_item,
1915 _Label: self._update_offset_noop,
1916 _Rep: self._dry_handle_rep_item,
1917 _SetBo: self._update_offset_noop,
1918 _SetOffset: self._dry_handle_set_offset_item,
1919 _SLeb128Int: self._dry_handle_leb128_int_item,
1920 _Str: self._dry_handle_scalar_item,
1921 _ULeb128Int: self._dry_handle_leb128_int_item,
1922 _VarAssign: self._update_offset_noop,
1923 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1924
1925 # Handle the group item, _not_ removing the immediate labels
1926 # because the `labels` property offers them.
1927 self._handle_group_item(group, state, 0, False)
1928
1929 # This is actually the final state
1930 self._final_state = state
1931
1932
1933 # Returns a `ParseResult` instance containing the bytes encoded by the
1934 # input string `normand`.
1935 #
1936 # `init_variables` is a dictionary of initial variable names (valid
1937 # Python names) to integral values. A variable name must not be the
1938 # reserved name `ICITTE`.
1939 #
1940 # `init_labels` is a dictionary of initial label names (valid Python
1941 # names) to integral values. A label name must not be the reserved name
1942 # `ICITTE`.
1943 #
1944 # `init_offset` is the initial offset.
1945 #
1946 # `init_byte_order` is the initial byte order.
1947 #
1948 # Raises `ParseError` on any parsing error.
1949 def parse(
1950 normand: str,
1951 init_variables: Optional[VariablesT] = None,
1952 init_labels: Optional[LabelsT] = None,
1953 init_offset: int = 0,
1954 init_byte_order: Optional[ByteOrder] = None,
1955 ):
1956 if init_variables is None:
1957 init_variables = {}
1958
1959 if init_labels is None:
1960 init_labels = {}
1961
1962 gen = _Gen(
1963 _Parser(normand, init_variables, init_labels).res,
1964 init_variables,
1965 init_labels,
1966 init_offset,
1967 init_byte_order,
1968 )
1969 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1970 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1971 )
1972
1973
1974 # Parses the command-line arguments.
1975 def _parse_cli_args():
1976 import argparse
1977
1978 # Build parser
1979 ap = argparse.ArgumentParser()
1980 ap.add_argument(
1981 "--offset",
1982 metavar="OFFSET",
1983 action="store",
1984 type=int,
1985 default=0,
1986 help="initial offset (positive)",
1987 )
1988 ap.add_argument(
1989 "-b",
1990 "--byte-order",
1991 metavar="BO",
1992 choices=["be", "le"],
1993 type=str,
1994 help="initial byte order (`be` or `le`)",
1995 )
1996 ap.add_argument(
1997 "--var",
1998 metavar="NAME=VAL",
1999 action="append",
2000 help="add an initial variable (may be repeated)",
2001 )
2002 ap.add_argument(
2003 "-l",
2004 "--label",
2005 metavar="NAME=VAL",
2006 action="append",
2007 help="add an initial label (may be repeated)",
2008 )
2009 ap.add_argument(
2010 "--version", action="version", version="Normand {}".format(__version__)
2011 )
2012 ap.add_argument(
2013 "path",
2014 metavar="PATH",
2015 action="store",
2016 nargs="?",
2017 help="input path (none means standard input)",
2018 )
2019
2020 # Parse
2021 return ap.parse_args()
2022
2023
2024 # Raises a command-line error with the message `msg`.
2025 def _raise_cli_error(msg: str) -> NoReturn:
2026 raise RuntimeError("Command-line error: {}".format(msg))
2027
2028
2029 # Returns a dictionary of string to integers from the list of strings
2030 # `args` containing `NAME=VAL` entries.
2031 def _dict_from_arg(args: Optional[List[str]]):
2032 d = {} # type: LabelsT
2033
2034 if args is None:
2035 return d
2036
2037 for arg in args:
2038 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2039
2040 if m is None:
2041 _raise_cli_error("Invalid assignment {}".format(arg))
2042
2043 d[m.group(1)] = int(m.group(2))
2044
2045 return d
2046
2047
2048 # CLI entry point without exception handling.
2049 def _try_run_cli():
2050 import os.path
2051
2052 # Parse arguments
2053 args = _parse_cli_args()
2054
2055 # Read input
2056 if args.path is None:
2057 normand = sys.stdin.read()
2058 else:
2059 with open(args.path) as f:
2060 normand = f.read()
2061
2062 # Variables and labels
2063 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
2064 labels = _dict_from_arg(args.label)
2065
2066 # Validate offset
2067 if args.offset < 0:
2068 _raise_cli_error("Invalid negative offset {}")
2069
2070 # Validate and set byte order
2071 bo = None # type: Optional[ByteOrder]
2072
2073 if args.byte_order is not None:
2074 if args.byte_order == "be":
2075 bo = ByteOrder.BE
2076 else:
2077 assert args.byte_order == "le"
2078 bo = ByteOrder.LE
2079
2080 # Parse
2081 try:
2082 res = parse(normand, variables, labels, args.offset, bo)
2083 except ParseError as exc:
2084 prefix = ""
2085
2086 if args.path is not None:
2087 prefix = "{}:".format(os.path.abspath(args.path))
2088
2089 _fail(
2090 "{}{}:{} - {}".format(
2091 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2092 )
2093 )
2094
2095 # Print
2096 sys.stdout.buffer.write(res.data)
2097
2098
2099 # Prints the exception message `msg` and exits with status 1.
2100 def _fail(msg: str) -> NoReturn:
2101 if not msg.endswith("."):
2102 msg += "."
2103
2104 print(msg, file=sys.stderr)
2105 sys.exit(1)
2106
2107
2108 # CLI entry point.
2109 def _run_cli():
2110 try:
2111 _try_run_cli()
2112 except Exception as exc:
2113 _fail(str(exc))
2114
2115
2116 if __name__ == "__main__":
2117 _run_cli()
This page took 0.076834 seconds and 4 git commands to generate.