c643429012be389d2433df9a2db687e00d7fb71b
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.6.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "SymbolsT",
41 "__author__",
42 "__version__",
43 ]
44
45 import re
46 import abc
47 import ast
48 import sys
49 import enum
50 import math
51 import struct
52 from typing import (
53 Any,
54 Set,
55 Dict,
56 List,
57 Tuple,
58 Union,
59 Pattern,
60 Callable,
61 NoReturn,
62 Optional,
63 )
64
65
66 # Text location (line and column numbers).
67 class TextLoc:
68 @classmethod
69 def _create(cls, line_no: int, col_no: int):
70 self = cls.__new__(cls)
71 self._init(line_no, col_no)
72 return self
73
74 def __init__(*args, **kwargs): # type: ignore
75 raise NotImplementedError
76
77 def _init(self, line_no: int, col_no: int):
78 self._line_no = line_no
79 self._col_no = col_no
80
81 # Line number.
82 @property
83 def line_no(self):
84 return self._line_no
85
86 # Column number.
87 @property
88 def col_no(self):
89 return self._col_no
90
91 def __repr__(self):
92 return "TextLoc({}, {})".format(self._line_no, self._col_no)
93
94
95 # Any item.
96 class _Item:
97 def __init__(self, text_loc: TextLoc):
98 self._text_loc = text_loc
99
100 # Source text location.
101 @property
102 def text_loc(self):
103 return self._text_loc
104
105
106 # Scalar item.
107 class _ScalarItem(_Item):
108 # Returns the size, in bytes, of this item.
109 @property
110 @abc.abstractmethod
111 def size(self) -> int:
112 ...
113
114
115 # A repeatable item.
116 class _RepableItem:
117 pass
118
119
120 # Single byte.
121 class _Byte(_ScalarItem, _RepableItem):
122 def __init__(self, val: int, text_loc: TextLoc):
123 super().__init__(text_loc)
124 self._val = val
125
126 # Byte value.
127 @property
128 def val(self):
129 return self._val
130
131 @property
132 def size(self):
133 return 1
134
135 def __repr__(self):
136 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
137
138
139 # String.
140 class _Str(_ScalarItem, _RepableItem):
141 def __init__(self, data: bytes, text_loc: TextLoc):
142 super().__init__(text_loc)
143 self._data = data
144
145 # Encoded bytes.
146 @property
147 def data(self):
148 return self._data
149
150 @property
151 def size(self):
152 return len(self._data)
153
154 def __repr__(self):
155 return "_Str({}, {})".format(repr(self._data), self._text_loc)
156
157
158 # Byte order.
159 @enum.unique
160 class ByteOrder(enum.Enum):
161 # Big endian.
162 BE = "be"
163
164 # Little endian.
165 LE = "le"
166
167
168 # Byte order setting.
169 class _SetBo(_Item):
170 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
171 super().__init__(text_loc)
172 self._bo = bo
173
174 @property
175 def bo(self):
176 return self._bo
177
178 def __repr__(self):
179 return "_SetBo({}, {})".format(repr(self._bo), self._text_loc)
180
181
182 # Label.
183 class _Label(_Item):
184 def __init__(self, name: str, text_loc: TextLoc):
185 super().__init__(text_loc)
186 self._name = name
187
188 # Label name.
189 @property
190 def name(self):
191 return self._name
192
193 def __repr__(self):
194 return "_Label({}, {})".format(repr(self._name), self._text_loc)
195
196
197 # Offset setting.
198 class _SetOffset(_Item):
199 def __init__(self, val: int, text_loc: TextLoc):
200 super().__init__(text_loc)
201 self._val = val
202
203 # Offset value.
204 @property
205 def val(self):
206 return self._val
207
208 def __repr__(self):
209 return "_SetOffset({}, {})".format(repr(self._val), self._text_loc)
210
211
212 # Mixin of containing an AST expression and its string.
213 class _ExprMixin:
214 def __init__(self, expr_str: str, expr: ast.Expression):
215 self._expr_str = expr_str
216 self._expr = expr
217
218 # Expression string.
219 @property
220 def expr_str(self):
221 return self._expr_str
222
223 # Expression node to evaluate.
224 @property
225 def expr(self):
226 return self._expr
227
228
229 # Variable assignment.
230 class _VarAssign(_Item, _ExprMixin):
231 def __init__(
232 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
233 ):
234 super().__init__(text_loc)
235 _ExprMixin.__init__(self, expr_str, expr)
236 self._name = name
237
238 # Name.
239 @property
240 def name(self):
241 return self._name
242
243 def __repr__(self):
244 return "_VarAssign({}, {}, {}, {})".format(
245 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
246 )
247
248
249 # Fixed-length number, possibly needing more than one byte.
250 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
251 def __init__(
252 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
253 ):
254 super().__init__(text_loc)
255 _ExprMixin.__init__(self, expr_str, expr)
256 self._len = len
257
258 # Length (bits).
259 @property
260 def len(self):
261 return self._len
262
263 @property
264 def size(self):
265 return self._len // 8
266
267 def __repr__(self):
268 return "_FlNum({}, {}, {}, {})".format(
269 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
270 )
271
272
273 # LEB128 integer.
274 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
275 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLoc):
276 super().__init__(text_loc)
277 _ExprMixin.__init__(self, expr_str, expr)
278
279 def __repr__(self):
280 return "{}({}, {}, {})".format(
281 self.__class__.__name__,
282 repr(self._expr_str),
283 repr(self._expr),
284 self._text_loc,
285 )
286
287
288 # Unsigned LEB128 integer.
289 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
290 pass
291
292
293 # Signed LEB128 integer.
294 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
295 pass
296
297
298 # Group of items.
299 class _Group(_Item, _RepableItem):
300 def __init__(self, items: List[_Item], text_loc: TextLoc):
301 super().__init__(text_loc)
302 self._items = items
303
304 # Contained items.
305 @property
306 def items(self):
307 return self._items
308
309 def __repr__(self):
310 return "_Group({}, {})".format(repr(self._items), self._text_loc)
311
312
313 # Repetition item.
314 class _Rep(_Item, _ExprMixin):
315 def __init__(
316 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLoc
317 ):
318 super().__init__(text_loc)
319 _ExprMixin.__init__(self, expr_str, expr)
320 self._item = item
321
322 # Item to repeat.
323 @property
324 def item(self):
325 return self._item
326
327 def __repr__(self):
328 return "_Rep({}, {}, {}, {})".format(
329 repr(self._item), repr(self._expr_str), repr(self._expr), self._text_loc
330 )
331
332
333 # Expression item type.
334 _ExprItemT = Union[_FlNum, _Leb128Int, _VarAssign, _Rep]
335
336
337 # A parsing error containing a message and a text location.
338 class ParseError(RuntimeError):
339 @classmethod
340 def _create(cls, msg: str, text_loc: TextLoc):
341 self = cls.__new__(cls)
342 self._init(msg, text_loc)
343 return self
344
345 def __init__(self, *args, **kwargs): # type: ignore
346 raise NotImplementedError
347
348 def _init(self, msg: str, text_loc: TextLoc):
349 super().__init__(msg)
350 self._text_loc = text_loc
351
352 # Source text location.
353 @property
354 def text_loc(self):
355 return self._text_loc
356
357
358 # Raises a parsing error, forwarding the parameters to the constructor.
359 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
360 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
361
362
363 # Variable/label dictionary type.
364 SymbolsT = Dict[str, int]
365
366
367 # Python name pattern.
368 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
369
370
371 # Normand parser.
372 #
373 # The constructor accepts a Normand input. After building, use the `res`
374 # property to get the resulting main group.
375 class _Parser:
376 # Builds a parser to parse the Normand input `normand`, parsing
377 # immediately.
378 def __init__(self, normand: str, variables: SymbolsT, labels: SymbolsT):
379 self._normand = normand
380 self._at = 0
381 self._line_no = 1
382 self._col_no = 1
383 self._label_names = set(labels.keys())
384 self._var_names = set(variables.keys())
385 self._parse()
386
387 # Result (main group).
388 @property
389 def res(self):
390 return self._res
391
392 # Current text location.
393 @property
394 def _text_loc(self):
395 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
396 self._line_no, self._col_no
397 )
398
399 # Returns `True` if this parser is done parsing.
400 def _is_done(self):
401 return self._at == len(self._normand)
402
403 # Returns `True` if this parser isn't done parsing.
404 def _isnt_done(self):
405 return not self._is_done()
406
407 # Raises a parse error, creating it using the message `msg` and the
408 # current text location.
409 def _raise_error(self, msg: str) -> NoReturn:
410 _raise_error(msg, self._text_loc)
411
412 # Tries to make the pattern `pat` match the current substring,
413 # returning the match object and updating `self._at`,
414 # `self._line_no`, and `self._col_no` on success.
415 def _try_parse_pat(self, pat: Pattern[str]):
416 m = pat.match(self._normand, self._at)
417
418 if m is None:
419 return
420
421 # Skip matched string
422 self._at += len(m.group(0))
423
424 # Update line number
425 self._line_no += m.group(0).count("\n")
426
427 # Update column number
428 for i in reversed(range(self._at)):
429 if self._normand[i] == "\n" or i == 0:
430 if i == 0:
431 self._col_no = self._at + 1
432 else:
433 self._col_no = self._at - i
434
435 break
436
437 # Return match object
438 return m
439
440 # Expects the pattern `pat` to match the current substring,
441 # returning the match object and updating `self._at`,
442 # `self._line_no`, and `self._col_no` on success, or raising a parse
443 # error with the message `error_msg` on error.
444 def _expect_pat(self, pat: Pattern[str], error_msg: str):
445 # Match
446 m = self._try_parse_pat(pat)
447
448 if m is None:
449 # No match: error
450 self._raise_error(error_msg)
451
452 # Return match object
453 return m
454
455 # Pattern for _skip_ws_and_comments()
456 _ws_or_syms_or_comments_pat = re.compile(
457 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
458 )
459
460 # Skips as many whitespaces, insignificant symbol characters, and
461 # comments as possible.
462 def _skip_ws_and_comments(self):
463 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
464
465 # Pattern for _try_parse_hex_byte()
466 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
467
468 # Tries to parse a hexadecimal byte, returning a byte item on
469 # success.
470 def _try_parse_hex_byte(self):
471 begin_text_loc = self._text_loc
472
473 # Match initial nibble
474 m_high = self._try_parse_pat(self._nibble_pat)
475
476 if m_high is None:
477 # No match
478 return
479
480 # Expect another nibble
481 self._skip_ws_and_comments()
482 m_low = self._expect_pat(
483 self._nibble_pat, "Expecting another hexadecimal nibble"
484 )
485
486 # Return item
487 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
488
489 # Patterns for _try_parse_bin_byte()
490 _bin_byte_bit_pat = re.compile(r"[01]")
491 _bin_byte_prefix_pat = re.compile(r"%")
492
493 # Tries to parse a binary byte, returning a byte item on success.
494 def _try_parse_bin_byte(self):
495 begin_text_loc = self._text_loc
496
497 # Match prefix
498 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
499 # No match
500 return
501
502 # Expect eight bits
503 bits = [] # type: List[str]
504
505 for _ in range(8):
506 self._skip_ws_and_comments()
507 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
508 bits.append(m.group(0))
509
510 # Return item
511 return _Byte(int("".join(bits), 2), begin_text_loc)
512
513 # Patterns for _try_parse_dec_byte()
514 _dec_byte_prefix_pat = re.compile(r"\$\s*")
515 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
516
517 # Tries to parse a decimal byte, returning a byte item on success.
518 def _try_parse_dec_byte(self):
519 begin_text_loc = self._text_loc
520
521 # Match prefix
522 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
523 # No match
524 return
525
526 # Expect the value
527 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
528
529 # Compute value
530 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
531
532 # Validate
533 if val < -128 or val > 255:
534 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
535
536 # Two's complement
537 val %= 256
538
539 # Return item
540 return _Byte(val, begin_text_loc)
541
542 # Tries to parse a byte, returning a byte item on success.
543 def _try_parse_byte(self):
544 # Hexadecimal
545 item = self._try_parse_hex_byte()
546
547 if item is not None:
548 return item
549
550 # Binary
551 item = self._try_parse_bin_byte()
552
553 if item is not None:
554 return item
555
556 # Decimal
557 item = self._try_parse_dec_byte()
558
559 if item is not None:
560 return item
561
562 # Patterns for _try_parse_str()
563 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
564 _str_suffix_pat = re.compile(r'"')
565 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
566
567 # Strings corresponding to escape sequence characters
568 _str_escape_seq_strs = {
569 "0": "\0",
570 "a": "\a",
571 "b": "\b",
572 "e": "\x1b",
573 "f": "\f",
574 "n": "\n",
575 "r": "\r",
576 "t": "\t",
577 "v": "\v",
578 "\\": "\\",
579 '"': '"',
580 }
581
582 # Tries to parse a string, returning a string item on success.
583 def _try_parse_str(self):
584 begin_text_loc = self._text_loc
585
586 # Match prefix
587 m = self._try_parse_pat(self._str_prefix_pat)
588
589 if m is None:
590 # No match
591 return
592
593 # Get encoding
594 encoding = "utf8"
595
596 if m.group("len") is not None:
597 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
598
599 # Actual string
600 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
601
602 # Expect end of string
603 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
604
605 # Replace escape sequences
606 val = m.group(0)
607
608 for ec in '0abefnrtv"\\':
609 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
610
611 # Encode
612 data = val.encode(encoding)
613
614 # Return item
615 return _Str(data, begin_text_loc)
616
617 # Patterns for _try_parse_group()
618 _group_prefix_pat = re.compile(r"\(")
619 _group_suffix_pat = re.compile(r"\)")
620
621 # Tries to parse a group, returning a group item on success.
622 def _try_parse_group(self):
623 begin_text_loc = self._text_loc
624
625 # Match prefix
626 if self._try_parse_pat(self._group_prefix_pat) is None:
627 # No match
628 return
629
630 # Parse items
631 items = self._parse_items()
632
633 # Expect end of group
634 self._skip_ws_and_comments()
635 self._expect_pat(
636 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
637 )
638
639 # Return item
640 return _Group(items, begin_text_loc)
641
642 # Returns a stripped expression string and an AST expression node
643 # from the expression string `expr_str` at text location `text_loc`.
644 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
645 # Create an expression node from the expression string
646 expr_str = expr_str.strip().replace("\n", " ")
647
648 try:
649 expr = ast.parse(expr_str, mode="eval")
650 except SyntaxError:
651 _raise_error(
652 "Invalid expression `{}`: invalid syntax".format(expr_str),
653 text_loc,
654 )
655
656 return expr_str, expr
657
658 # Patterns for _try_parse_num_and_attr()
659 _val_expr_pat = re.compile(r"([^}:]+):\s*")
660 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
661 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
662
663 # Tries to parse a value and attribute (fixed length in bits or
664 # `leb128`), returning a value item on success.
665 def _try_parse_num_and_attr(self):
666 begin_text_loc = self._text_loc
667
668 # Match
669 m_expr = self._try_parse_pat(self._val_expr_pat)
670
671 if m_expr is None:
672 # No match
673 return
674
675 # Create an expression node from the expression string
676 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
677
678 # Length?
679 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
680
681 if m_attr is None:
682 # LEB128?
683 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
684
685 if m_attr is None:
686 # At this point it's invalid
687 self._raise_error(
688 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
689 )
690
691 # Return LEB128 integer item
692 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
693 return cls(expr_str, expr, begin_text_loc)
694 else:
695 # Return fixed-length number item
696 return _FlNum(
697 expr_str,
698 expr,
699 int(m_attr.group(0)),
700 begin_text_loc,
701 )
702
703 # Patterns for _try_parse_num_and_attr()
704 _var_assign_pat = re.compile(
705 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
706 )
707
708 # Tries to parse a variable assignment, returning a variable
709 # assignment item on success.
710 def _try_parse_var_assign(self):
711 begin_text_loc = self._text_loc
712
713 # Match
714 m = self._try_parse_pat(self._var_assign_pat)
715
716 if m is None:
717 # No match
718 return
719
720 # Validate name
721 name = m.group("name")
722
723 if name == _icitte_name:
724 _raise_error(
725 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
726 )
727
728 if name in self._label_names:
729 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
730
731 # Add to known variable names
732 self._var_names.add(name)
733
734 # Create an expression node from the expression string
735 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
736
737 # Return item
738 return _VarAssign(
739 name,
740 expr_str,
741 expr,
742 begin_text_loc,
743 )
744
745 # Pattern for _try_parse_set_bo()
746 _bo_pat = re.compile(r"[bl]e")
747
748 # Tries to parse a byte order name, returning a byte order setting
749 # item on success.
750 def _try_parse_set_bo(self):
751 begin_text_loc = self._text_loc
752
753 # Match
754 m = self._try_parse_pat(self._bo_pat)
755
756 if m is None:
757 # No match
758 return
759
760 # Return corresponding item
761 if m.group(0) == "be":
762 return _SetBo(ByteOrder.BE, begin_text_loc)
763 else:
764 assert m.group(0) == "le"
765 return _SetBo(ByteOrder.LE, begin_text_loc)
766
767 # Patterns for _try_parse_val_or_bo()
768 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
769 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
770
771 # Tries to parse a value, a variable assignment, or a byte order
772 # setting, returning an item on success.
773 def _try_parse_val_or_var_assign_or_set_bo(self):
774 # Match prefix
775 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
776 # No match
777 return
778
779 # Variable assignment item?
780 item = self._try_parse_var_assign()
781
782 if item is None:
783 # Number item?
784 item = self._try_parse_num_and_attr()
785
786 if item is None:
787 # Byte order setting item?
788 item = self._try_parse_set_bo()
789
790 if item is None:
791 # At this point it's invalid
792 self._raise_error(
793 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
794 )
795
796 # Expect suffix
797 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
798 return item
799
800 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
801 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
802
803 # Tries to parse an offset setting value (after the initial `<`),
804 # returning an offset item on success.
805 def _try_parse_set_offset_val(self):
806 begin_text_loc = self._text_loc
807
808 # Match
809 m = self._try_parse_pat(self._pos_const_int_pat)
810
811 if m is None:
812 # No match
813 return
814
815 # Return item
816 return _SetOffset(int(m.group(0), 0), begin_text_loc)
817
818 # Tries to parse a label name (after the initial `<`), returning a
819 # label item on success.
820 def _try_parse_label_name(self):
821 begin_text_loc = self._text_loc
822
823 # Match
824 m = self._try_parse_pat(_py_name_pat)
825
826 if m is None:
827 # No match
828 return
829
830 # Validate
831 name = m.group(0)
832
833 if name == _icitte_name:
834 _raise_error(
835 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
836 )
837
838 if name in self._label_names:
839 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
840
841 if name in self._var_names:
842 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
843
844 # Add to known label names
845 self._label_names.add(name)
846
847 # Return item
848 return _Label(name, begin_text_loc)
849
850 # Patterns for _try_parse_label_or_set_offset()
851 _label_set_offset_prefix_pat = re.compile(r"<\s*")
852 _label_set_offset_suffix_pat = re.compile(r"\s*>")
853
854 # Tries to parse a label or an offset setting, returning an item on
855 # success.
856 def _try_parse_label_or_set_offset(self):
857 # Match prefix
858 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
859 # No match
860 return
861
862 # Offset setting item?
863 item = self._try_parse_set_offset_val()
864
865 if item is None:
866 # Label item?
867 item = self._try_parse_label_name()
868
869 if item is None:
870 # At this point it's invalid
871 self._raise_error("Expecting a label name or an offset setting value")
872
873 # Expect suffix
874 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
875 return item
876
877 # Tries to parse a base item (anything except a repetition),
878 # returning it on success.
879 def _try_parse_base_item(self):
880 # Byte item?
881 item = self._try_parse_byte()
882
883 if item is not None:
884 return item
885
886 # String item?
887 item = self._try_parse_str()
888
889 if item is not None:
890 return item
891
892 # Value, variable assignment, or byte order setting item?
893 item = self._try_parse_val_or_var_assign_or_set_bo()
894
895 if item is not None:
896 return item
897
898 # Label or offset setting item?
899 item = self._try_parse_label_or_set_offset()
900
901 if item is not None:
902 return item
903
904 # Group item?
905 item = self._try_parse_group()
906
907 if item is not None:
908 return item
909
910 # Pattern for _try_parse_rep()
911 _rep_prefix_pat = re.compile(r"\*\s*")
912 _rep_expr_prefix_pat = re.compile(r"\{")
913 _rep_expr_pat = re.compile(r"[^}p]+")
914 _rep_expr_suffix_pat = re.compile(r"\}")
915
916 # Tries to parse a repetition, returning the expression string and
917 # AST expression node on success.
918 def _try_parse_rep(self):
919 # Match prefix
920 if self._try_parse_pat(self._rep_prefix_pat) is None:
921 # No match
922 return
923
924 # Expect and return a decimal multiplier
925 self._skip_ws_and_comments()
926
927 # Integer?
928 m = self._try_parse_pat(self._pos_const_int_pat)
929
930 if m is None:
931 # Expression?
932 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
933 # At this point it's invalid
934 self._raise_error("Expecting a positive integral multiplier or `{`")
935
936 # Expect an expression
937 expr_str_loc = self._text_loc
938 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
939 expr_str = self._ast_expr_from_str(m.group(0), expr_str_loc)
940
941 # Expect `}`
942 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
943 expr_str = m.group(0)
944 else:
945 expr_str_loc = self._text_loc
946 expr_str = m.group(0)
947
948 return self._ast_expr_from_str(expr_str, expr_str_loc)
949
950 # Tries to parse an item, possibly followed by a repetition,
951 # returning `True` on success.
952 #
953 # Appends any parsed item to `items`.
954 def _try_append_item(self, items: List[_Item]):
955 self._skip_ws_and_comments()
956
957 # Parse a base item
958 item = self._try_parse_base_item()
959
960 if item is None:
961 # No item
962 return False
963
964 # Parse repetition if the base item is repeatable
965 if isinstance(item, _RepableItem):
966 self._skip_ws_and_comments()
967 rep_text_loc = self._text_loc
968 rep_ret = self._try_parse_rep()
969
970 if rep_ret is not None:
971 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
972
973 items.append(item)
974 return True
975
976 # Parses and returns items, skipping whitespaces, insignificant
977 # symbols, and comments when allowed, and stopping at the first
978 # unknown character.
979 def _parse_items(self) -> List[_Item]:
980 items = [] # type: List[_Item]
981
982 while self._isnt_done():
983 # Try to append item
984 if not self._try_append_item(items):
985 # Unknown at this point
986 break
987
988 return items
989
990 # Parses the whole Normand input, setting `self._res` to the main
991 # group item on success.
992 def _parse(self):
993 if len(self._normand.strip()) == 0:
994 # Special case to make sure there's something to consume
995 self._res = _Group([], self._text_loc)
996 return
997
998 # Parse first level items
999 items = self._parse_items()
1000
1001 # Make sure there's nothing left
1002 self._skip_ws_and_comments()
1003
1004 if self._isnt_done():
1005 self._raise_error(
1006 "Unexpected character `{}`".format(self._normand[self._at])
1007 )
1008
1009 # Set main group item
1010 self._res = _Group(items, self._text_loc)
1011
1012
1013 # The return type of parse().
1014 class ParseResult:
1015 @classmethod
1016 def _create(
1017 cls,
1018 data: bytearray,
1019 variables: SymbolsT,
1020 labels: SymbolsT,
1021 offset: int,
1022 bo: Optional[ByteOrder],
1023 ):
1024 self = cls.__new__(cls)
1025 self._init(data, variables, labels, offset, bo)
1026 return self
1027
1028 def __init__(self, *args, **kwargs): # type: ignore
1029 raise NotImplementedError
1030
1031 def _init(
1032 self,
1033 data: bytearray,
1034 variables: SymbolsT,
1035 labels: SymbolsT,
1036 offset: int,
1037 bo: Optional[ByteOrder],
1038 ):
1039 self._data = data
1040 self._vars = variables
1041 self._labels = labels
1042 self._offset = offset
1043 self._bo = bo
1044
1045 # Generated data.
1046 @property
1047 def data(self):
1048 return self._data
1049
1050 # Dictionary of updated variable names to their last computed value.
1051 @property
1052 def variables(self):
1053 return self._vars
1054
1055 # Dictionary of updated main group label names to their computed
1056 # value.
1057 @property
1058 def labels(self):
1059 return self._labels
1060
1061 # Updated offset.
1062 @property
1063 def offset(self):
1064 return self._offset
1065
1066 # Updated byte order.
1067 @property
1068 def byte_order(self):
1069 return self._bo
1070
1071
1072 # Raises a parse error for the item `item`, creating it using the
1073 # message `msg`.
1074 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1075 _raise_error(msg, item.text_loc)
1076
1077
1078 # The `ICITTE` reserved name.
1079 _icitte_name = "ICITTE"
1080
1081
1082 # Base node visitor.
1083 #
1084 # Calls the _visit_name() method for each name node which isn't the name
1085 # of a call.
1086 class _NodeVisitor(ast.NodeVisitor):
1087 def __init__(self):
1088 self._parent_is_call = False
1089
1090 def generic_visit(self, node: ast.AST):
1091 if type(node) is ast.Call:
1092 self._parent_is_call = True
1093 elif type(node) is ast.Name and not self._parent_is_call:
1094 self._visit_name(node.id)
1095
1096 super().generic_visit(node)
1097 self._parent_is_call = False
1098
1099 @abc.abstractmethod
1100 def _visit_name(self, name: str):
1101 ...
1102
1103
1104 # Expression validator: validates that all the names within the
1105 # expression are allowed.
1106 class _ExprValidator(_NodeVisitor):
1107 def __init__(self, item: _ExprItemT, allowed_names: Set[str], icitte_allowed: bool):
1108 super().__init__()
1109 self._item = item
1110 self._allowed_names = allowed_names
1111 self._icitte_allowed = icitte_allowed
1112
1113 def _visit_name(self, name: str):
1114 # Make sure the name refers to a known and reachable
1115 # variable/label name.
1116 if name == _icitte_name and not self._icitte_allowed:
1117 _raise_error(
1118 "Illegal reserved name `{}` in expression `{}`".format(
1119 _icitte_name, self._item.expr_str
1120 ),
1121 self._item.text_loc,
1122 )
1123 elif name != _icitte_name and name not in self._allowed_names:
1124 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1125 name, self._item.expr_str
1126 )
1127
1128 allowed_names = self._allowed_names.copy()
1129
1130 if self._icitte_allowed:
1131 allowed_names.add(_icitte_name)
1132
1133 if len(allowed_names) > 0:
1134 allowed_names_str = ", ".join(
1135 sorted(["`{}`".format(name) for name in allowed_names])
1136 )
1137 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1138
1139 _raise_error(
1140 msg,
1141 self._item.text_loc,
1142 )
1143
1144
1145 # Expression visitor getting all the contained names.
1146 class _ExprNamesVisitor(_NodeVisitor):
1147 def __init__(self):
1148 self._parent_is_call = False
1149 self._names = set() # type: Set[str]
1150
1151 @property
1152 def names(self):
1153 return self._names
1154
1155 def _visit_name(self, name: str):
1156 self._names.add(name)
1157
1158
1159 # Generator state.
1160 class _GenState:
1161 def __init__(
1162 self,
1163 variables: SymbolsT,
1164 labels: SymbolsT,
1165 offset: int,
1166 bo: Optional[ByteOrder],
1167 ):
1168 self.variables = variables.copy()
1169 self.labels = labels.copy()
1170 self.offset = offset
1171 self.bo = bo
1172
1173
1174 # Generator of data and final state from a group item.
1175 #
1176 # Generation happens in memory at construction time. After building, use
1177 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1178 # get the resulting context.
1179 #
1180 # The steps of generation are:
1181 #
1182 # 1. Validate that each repetition and LEB128 integer expression uses
1183 # only reachable names and not `ICITTE`.
1184 #
1185 # 2. Compute and keep the effective repetition count and LEB128 integer
1186 # value for each repetition and LEB128 integer instance.
1187 #
1188 # 3. Generate bytes, updating the initial state as it goes which becomes
1189 # the final state after the operation.
1190 #
1191 # During the generation, when handling a `_Rep` or `_Leb128Int` item,
1192 # we already have the effective repetition count or value of the
1193 # instance.
1194 #
1195 # When handling a `_Group` item, first update the current labels with
1196 # all the immediate (not nested) labels, and then handle each
1197 # contained item. This gives contained item access to "future" outer
1198 # labels. Then remove the immediate labels from the state so that
1199 # outer items don't have access to inner labels.
1200 class _Gen:
1201 def __init__(
1202 self,
1203 group: _Group,
1204 variables: SymbolsT,
1205 labels: SymbolsT,
1206 offset: int,
1207 bo: Optional[ByteOrder],
1208 ):
1209 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1210 self._vl_instance_vals = self._compute_vl_instance_vals(
1211 group, _GenState(variables, labels, offset, bo)
1212 )
1213 self._gen(group, _GenState(variables, labels, offset, bo))
1214
1215 # Generated bytes.
1216 @property
1217 def data(self):
1218 return self._data
1219
1220 # Updated variables.
1221 @property
1222 def variables(self):
1223 return self._final_state.variables
1224
1225 # Updated main group labels.
1226 @property
1227 def labels(self):
1228 return self._final_state.labels
1229
1230 # Updated offset.
1231 @property
1232 def offset(self):
1233 return self._final_state.offset
1234
1235 # Updated byte order.
1236 @property
1237 def bo(self):
1238 return self._final_state.bo
1239
1240 # Returns the set of used, non-called names within the AST
1241 # expression `expr`.
1242 @staticmethod
1243 def _names_of_expr(expr: ast.Expression):
1244 visitor = _ExprNamesVisitor()
1245 visitor.visit(expr)
1246 return visitor.names
1247
1248 # Validates that all the repetition and LEB128 integer expressions
1249 # within `group` don't refer, directly or indirectly, to subsequent
1250 # labels.
1251 #
1252 # The strategy here is to keep a set of allowed label names, per
1253 # group, initialized to `allowed_label_names`, and a set of allowed
1254 # variable names initialized to `allowed_variable_names`.
1255 #
1256 # Then, depending on the type of `item`:
1257 #
1258 # `_Label`:
1259 # Add its name to the local allowed label names: a label
1260 # occurring before a repetition, and not within a nested group,
1261 # is always reachable.
1262 #
1263 # `_VarAssign`:
1264 # If all the names within its expression are allowed, then add
1265 # its name to the allowed variable names.
1266 #
1267 # Otherwise, remove its name from the allowed variable names (if
1268 # it's in there): a variable which refers to an unreachable name
1269 # is unreachable itself.
1270 #
1271 # `_Rep` and `_Leb128`:
1272 # Make sure all the names within its expression are allowed.
1273 #
1274 # `_Group`:
1275 # Call this function for each contained item with a _copy_ of
1276 # the current allowed label names and the same current allowed
1277 # variable names.
1278 @staticmethod
1279 def _validate_vl_exprs(
1280 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1281 ):
1282 if type(item) is _Label:
1283 allowed_label_names.add(item.name)
1284 elif type(item) is _VarAssign:
1285 # Check if this variable name is allowed
1286 allowed = True
1287
1288 for name in _Gen._names_of_expr(item.expr):
1289 if name not in (
1290 allowed_label_names | allowed_variable_names | {_icitte_name}
1291 ):
1292 # Not allowed
1293 allowed = False
1294 break
1295
1296 if allowed:
1297 allowed_variable_names.add(item.name)
1298 elif item.name in allowed_variable_names:
1299 allowed_variable_names.remove(item.name)
1300 elif isinstance(item, _Leb128Int):
1301 # Validate the expression (`ICITTE` allowed)
1302 _ExprValidator(
1303 item, allowed_label_names | allowed_variable_names, True
1304 ).visit(item.expr)
1305 elif type(item) is _Rep:
1306 # Validate the expression first (`ICITTE` not allowed)
1307 _ExprValidator(
1308 item, allowed_label_names | allowed_variable_names, False
1309 ).visit(item.expr)
1310
1311 # Validate inner item
1312 _Gen._validate_vl_exprs(
1313 item.item, allowed_variable_names, allowed_label_names
1314 )
1315 elif type(item) is _Group:
1316 # Copy `allowed_label_names` so that this frame cannot
1317 # access the nested label names.
1318 group_allowed_label_names = allowed_label_names.copy()
1319
1320 for subitem in item.items:
1321 _Gen._validate_vl_exprs(
1322 subitem, allowed_variable_names, group_allowed_label_names
1323 )
1324
1325 # Evaluates the expression of `item` considering the current
1326 # generation state `state`.
1327 #
1328 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1329 # for the expression to evaluate.
1330 #
1331 # If `allow_float` is `True`, then the type of the result may be
1332 # `float` too.
1333 @staticmethod
1334 def _eval_item_expr(
1335 item: _ExprItemT,
1336 state: _GenState,
1337 allow_icitte: bool,
1338 allow_float: bool = False,
1339 ):
1340 syms = state.labels.copy()
1341
1342 # Set the `ICITTE` name to the current offset, if any
1343 if allow_icitte:
1344 syms[_icitte_name] = state.offset
1345
1346 # Add the current variables
1347 syms.update(state.variables)
1348
1349 # Validate the node and its children
1350 _ExprValidator(item, set(syms.keys()), True).visit(item.expr)
1351
1352 # Compile and evaluate expression node
1353 try:
1354 val = eval(compile(item.expr, "", "eval"), None, syms)
1355 except Exception as exc:
1356 _raise_error_for_item(
1357 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1358 item,
1359 )
1360
1361 # Validate result type
1362 expected_types = {int} # type: Set[type]
1363 type_msg = "`int`"
1364
1365 if allow_float:
1366 expected_types.add(float)
1367 type_msg += " or `float`"
1368
1369 if type(val) not in expected_types:
1370 _raise_error_for_item(
1371 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1372 item.expr_str, type_msg, type(val).__name__
1373 ),
1374 item,
1375 )
1376
1377 return val
1378
1379 # Returns the size, in bytes, required to encode the value `val`
1380 # with LEB128 (signed version if `is_signed` is `True`).
1381 @staticmethod
1382 def _leb128_size_for_val(val: int, is_signed: bool):
1383 if val < 0:
1384 # Equivalent upper bound.
1385 #
1386 # For example, if `val` is -128, then the full integer for
1387 # this number of bits would be [-128, 127].
1388 val = -val - 1
1389
1390 # Number of bits (add one for the sign if needed)
1391 bits = val.bit_length() + int(is_signed)
1392
1393 if bits == 0:
1394 bits = 1
1395
1396 # Seven bits per byte
1397 return math.ceil(bits / 7)
1398
1399 # Computes the effective value for each repetition and LEB128
1400 # integer instance, filling `instance_vals` (if not `None`) and
1401 # returning `instance_vals`.
1402 #
1403 # At this point it must be known that, for a given variable-length
1404 # item, its expression only contains reachable names.
1405 #
1406 # When handling a `_Rep` item, this function appends its effective
1407 # multiplier to `instance_vals` _before_ handling its repeated item.
1408 #
1409 # When handling a `_VarAssign` item, this function only evaluates it
1410 # if all its names are reachable.
1411 @staticmethod
1412 def _compute_vl_instance_vals(
1413 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1414 ):
1415 if instance_vals is None:
1416 instance_vals = []
1417
1418 if isinstance(item, _ScalarItem):
1419 state.offset += item.size
1420 elif type(item) is _Label:
1421 state.labels[item.name] = state.offset
1422 elif type(item) is _VarAssign:
1423 # Check if all the names are reachable
1424 do_eval = True
1425
1426 for name in _Gen._names_of_expr(item.expr):
1427 if (
1428 name != _icitte_name
1429 and name not in state.variables
1430 and name not in state.labels
1431 ):
1432 # A name is unknown: cannot evaluate
1433 do_eval = False
1434 break
1435
1436 if do_eval:
1437 # Evaluate the expression and keep the result
1438 state.variables[item.name] = _Gen._eval_item_expr(
1439 item, state, True, True
1440 )
1441 elif type(item) is _SetOffset:
1442 state.offset = item.val
1443 elif isinstance(item, _Leb128Int):
1444 # Evaluate the expression
1445 val = _Gen._eval_item_expr(item, state, True)
1446
1447 # Validate result
1448 if type(item) is _ULeb128Int and val < 0:
1449 _raise_error_for_item(
1450 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1451 item.expr_str, val
1452 ),
1453 item,
1454 )
1455
1456 # Add the evaluation result to the to variable-length item
1457 # instance values.
1458 instance_vals.append(val)
1459
1460 # Update offset
1461 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
1462 elif type(item) is _Rep:
1463 # Evaluate the expression and keep the result
1464 val = _Gen._eval_item_expr(item, state, False)
1465
1466 # Validate result
1467 if val < 0:
1468 _raise_error_for_item(
1469 "Invalid expression `{}`: unexpected negative result {:,}".format(
1470 item.expr_str, val
1471 ),
1472 item,
1473 )
1474
1475 # Add to repetition instance values
1476 instance_vals.append(val)
1477
1478 # Process the repeated item `val` times
1479 for _ in range(val):
1480 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1481 elif type(item) is _Group:
1482 prev_labels = state.labels.copy()
1483
1484 # Process each item
1485 for subitem in item.items:
1486 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
1487
1488 state.labels = prev_labels
1489
1490 return instance_vals
1491
1492 def _zero_item_size(self, item: _Item, next_vl_instance: int):
1493 return 0, next_vl_instance
1494
1495 def _scalar_item_size(self, item: _ScalarItem, next_vl_instance: int):
1496 return item.size, next_vl_instance
1497
1498 def _leb128_int_item_size(self, item: _Leb128Int, next_vl_instance: int):
1499 # Get the value from `self._vl_instance_vals` _before_
1500 # incrementing `next_vl_instance` to honor the order of
1501 # _compute_vl_instance_vals().
1502 return (
1503 self._leb128_size_for_val(
1504 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1505 ),
1506 next_vl_instance + 1,
1507 )
1508
1509 def _group_item_size(self, item: _Group, next_vl_instance: int):
1510 size = 0
1511
1512 for subitem in item.items:
1513 subitem_size, next_vl_instance = self._item_size(subitem, next_vl_instance)
1514 size += subitem_size
1515
1516 return size, next_vl_instance
1517
1518 def _rep_item_size(self, item: _Rep, next_vl_instance: int):
1519 # Get the value from `self._vl_instance_vals` _before_
1520 # incrementing `next_vl_instance` to honor the order of
1521 # _compute_vl_instance_vals().
1522 mul = self._vl_instance_vals[next_vl_instance]
1523 next_vl_instance += 1
1524 size = 0
1525
1526 for _ in range(mul):
1527 iter_size, next_vl_instance = self._item_size(item.item, next_vl_instance)
1528 size += iter_size
1529
1530 return size, next_vl_instance
1531
1532 # Returns the size of `item` and the new next repetition instance.
1533 def _item_size(self, item: _Item, next_vl_instance: int):
1534 return self._item_size_funcs[type(item)](item, next_vl_instance)
1535
1536 # Handles the byte item `item`.
1537 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
1538 self._data.append(item.val)
1539 state.offset += item.size
1540 return next_vl_instance
1541
1542 # Handles the string item `item`.
1543 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
1544 self._data += item.data
1545 state.offset += item.size
1546 return next_vl_instance
1547
1548 # Handles the byte order setting item `item`.
1549 def _handle_set_bo_item(
1550 self, item: _SetBo, state: _GenState, next_vl_instance: int
1551 ):
1552 # Update current byte order
1553 state.bo = item.bo
1554 return next_vl_instance
1555
1556 # Handles the variable assignment item `item`.
1557 def _handle_var_assign_item(
1558 self, item: _VarAssign, state: _GenState, next_vl_instance: int
1559 ):
1560 # Update variable
1561 state.variables[item.name] = self._eval_item_expr(item, state, True)
1562 return next_vl_instance
1563
1564 # Handles the fixed-length integer item `item`.
1565 def _handle_fl_int_item(self, val: int, item: _FlNum, state: _GenState):
1566 # Validate range
1567 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1568 _raise_error_for_item(
1569 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1570 val, item.len, item.expr_str, state.offset
1571 ),
1572 item,
1573 )
1574
1575 # Encode result on 64 bits (to extend the sign bit whatever the
1576 # value of `item.len`).
1577 data = struct.pack(
1578 "{}{}".format(
1579 ">" if state.bo in (None, ByteOrder.BE) else "<",
1580 "Q" if val >= 0 else "q",
1581 ),
1582 val,
1583 )
1584
1585 # Keep only the requested length
1586 len_bytes = item.len // 8
1587
1588 if state.bo in (None, ByteOrder.BE):
1589 # Big endian: keep last bytes
1590 data = data[-len_bytes:]
1591 else:
1592 # Little endian: keep first bytes
1593 assert state.bo == ByteOrder.LE
1594 data = data[:len_bytes]
1595
1596 # Append to current bytes and update offset
1597 self._data += data
1598
1599 # Handles the fixed-length integer item `item`.
1600 def _handle_fl_float_item(self, val: float, item: _FlNum, state: _GenState):
1601 # Validate length
1602 if item.len not in (32, 64):
1603 _raise_error_for_item(
1604 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1605 item.len, val
1606 ),
1607 item,
1608 )
1609
1610 # Encode result
1611 self._data += struct.pack(
1612 "{}{}".format(
1613 ">" if state.bo in (None, ByteOrder.BE) else "<",
1614 "f" if item.len == 32 else "d",
1615 ),
1616 val,
1617 )
1618
1619 # Handles the fixed-length number item `item`.
1620 def _handle_fl_num_item(
1621 self, item: _FlNum, state: _GenState, next_vl_instance: int
1622 ):
1623 # Compute value
1624 val = self._eval_item_expr(item, state, True, True)
1625
1626 # Validate current byte order
1627 if state.bo is None and item.len > 8:
1628 _raise_error_for_item(
1629 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1630 item.expr_str
1631 ),
1632 item,
1633 )
1634
1635 # Handle depending on type
1636 if type(val) is int:
1637 self._handle_fl_int_item(val, item, state)
1638 else:
1639 assert type(val) is float
1640 self._handle_fl_float_item(val, item, state)
1641
1642 # Update offset
1643 state.offset += item.size
1644
1645 return next_vl_instance
1646
1647 # Handles the LEB128 integer item `item`.
1648 def _handle_leb128_int_item(
1649 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1650 ):
1651 # Get the precomputed value
1652 val = self._vl_instance_vals[next_vl_instance]
1653
1654 # Size in bytes
1655 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1656
1657 # For each byte
1658 for _ in range(size):
1659 # Seven LSBs, MSB of the byte set (continue)
1660 self._data.append((val & 0x7F) | 0x80)
1661 val >>= 7
1662
1663 # Clear MSB of last byte (stop)
1664 self._data[-1] &= ~0x80
1665
1666 # Consumed this instance
1667 return next_vl_instance + 1
1668
1669 # Handles the group item `item`, only removing the immediate labels
1670 # from `state.labels` if `remove_immediate_labels` is `True`.
1671 def _handle_group_item(
1672 self,
1673 item: _Group,
1674 state: _GenState,
1675 next_vl_instance: int,
1676 remove_immediate_labels: bool = True,
1677 ):
1678 # Compute the values of the immediate (not nested) labels. Those
1679 # labels are reachable by any expression within the group.
1680 offset = state.offset
1681 immediate_label_names = set() # type: Set[str]
1682 tmp_next_vl_instance = next_vl_instance
1683
1684 for subitem in item.items:
1685 if type(subitem) is _SetOffset:
1686 # Update offset
1687 offset = subitem.val
1688 elif type(subitem) is _Label:
1689 # New immediate label
1690 state.labels[subitem.name] = offset
1691 immediate_label_names.add(subitem.name)
1692
1693 subitem_size, tmp_next_vl_instance = self._item_size(
1694 subitem, tmp_next_vl_instance
1695 )
1696 offset += subitem_size
1697
1698 # Handle each item now with the actual state
1699 for subitem in item.items:
1700 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
1701
1702 # Remove immediate labels if required so that outer items won't
1703 # reach inner labels.
1704 if remove_immediate_labels:
1705 for name in immediate_label_names:
1706 del state.labels[name]
1707
1708 return next_vl_instance
1709
1710 # Handles the repetition item `item`.
1711 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1712 # Get the precomputed repetition count
1713 mul = self._vl_instance_vals[next_vl_instance]
1714
1715 # Consumed this instance
1716 next_vl_instance += 1
1717
1718 for _ in range(mul):
1719 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1720
1721 return next_vl_instance
1722
1723 # Handles the offset setting item `item`.
1724 def _handle_set_offset_item(
1725 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1726 ):
1727 state.offset = item.val
1728 return next_vl_instance
1729
1730 # Handles the label item `item`.
1731 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1732 return next_vl_instance
1733
1734 # Handles the item `item`, returning the updated next repetition
1735 # instance.
1736 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1737 return self._item_handlers[type(item)](item, state, next_vl_instance)
1738
1739 # Generates the data (`self._data`) and final state
1740 # (`self._final_state`) from `group` and the initial state `state`.
1741 def _gen(self, group: _Group, state: _GenState):
1742 # Initial state
1743 self._data = bytearray()
1744
1745 # Item handlers
1746 self._item_handlers = {
1747 _Byte: self._handle_byte_item,
1748 _FlNum: self._handle_fl_num_item,
1749 _Group: self._handle_group_item,
1750 _Label: self._handle_label_item,
1751 _Rep: self._handle_rep_item,
1752 _SetBo: self._handle_set_bo_item,
1753 _SetOffset: self._handle_set_offset_item,
1754 _SLeb128Int: self._handle_leb128_int_item,
1755 _Str: self._handle_str_item,
1756 _ULeb128Int: self._handle_leb128_int_item,
1757 _VarAssign: self._handle_var_assign_item,
1758 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1759
1760 # Item size getters
1761 self._item_size_funcs = {
1762 _Byte: self._scalar_item_size,
1763 _FlNum: self._scalar_item_size,
1764 _Group: self._group_item_size,
1765 _Label: self._zero_item_size,
1766 _Rep: self._rep_item_size,
1767 _SetBo: self._zero_item_size,
1768 _SetOffset: self._zero_item_size,
1769 _SLeb128Int: self._leb128_int_item_size,
1770 _Str: self._scalar_item_size,
1771 _ULeb128Int: self._leb128_int_item_size,
1772 _VarAssign: self._zero_item_size,
1773 } # type: Dict[type, Callable[[Any, int], Tuple[int, int]]]
1774
1775 # Handle the group item, _not_ removing the immediate labels
1776 # because the `labels` property offers them.
1777 self._handle_group_item(group, state, 0, False)
1778
1779 # This is actually the final state
1780 self._final_state = state
1781
1782
1783 # Returns a `ParseResult` instance containing the bytes encoded by the
1784 # input string `normand`.
1785 #
1786 # `init_variables` is a dictionary of initial variable names (valid
1787 # Python names) to integral values. A variable name must not be the
1788 # reserved name `ICITTE`.
1789 #
1790 # `init_labels` is a dictionary of initial label names (valid Python
1791 # names) to integral values. A label name must not be the reserved name
1792 # `ICITTE`.
1793 #
1794 # `init_offset` is the initial offset.
1795 #
1796 # `init_byte_order` is the initial byte order.
1797 #
1798 # Raises `ParseError` on any parsing error.
1799 def parse(
1800 normand: str,
1801 init_variables: Optional[SymbolsT] = None,
1802 init_labels: Optional[SymbolsT] = None,
1803 init_offset: int = 0,
1804 init_byte_order: Optional[ByteOrder] = None,
1805 ):
1806 if init_variables is None:
1807 init_variables = {}
1808
1809 if init_labels is None:
1810 init_labels = {}
1811
1812 gen = _Gen(
1813 _Parser(normand, init_variables, init_labels).res,
1814 init_variables,
1815 init_labels,
1816 init_offset,
1817 init_byte_order,
1818 )
1819 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1820 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1821 )
1822
1823
1824 # Parses the command-line arguments.
1825 def _parse_cli_args():
1826 import argparse
1827
1828 # Build parser
1829 ap = argparse.ArgumentParser()
1830 ap.add_argument(
1831 "--offset",
1832 metavar="OFFSET",
1833 action="store",
1834 type=int,
1835 default=0,
1836 help="initial offset (positive)",
1837 )
1838 ap.add_argument(
1839 "-b",
1840 "--byte-order",
1841 metavar="BO",
1842 choices=["be", "le"],
1843 type=str,
1844 help="initial byte order (`be` or `le`)",
1845 )
1846 ap.add_argument(
1847 "--var",
1848 metavar="NAME=VAL",
1849 action="append",
1850 help="add an initial variable (may be repeated)",
1851 )
1852 ap.add_argument(
1853 "-l",
1854 "--label",
1855 metavar="NAME=VAL",
1856 action="append",
1857 help="add an initial label (may be repeated)",
1858 )
1859 ap.add_argument(
1860 "--version", action="version", version="Normand {}".format(__version__)
1861 )
1862 ap.add_argument(
1863 "path",
1864 metavar="PATH",
1865 action="store",
1866 nargs="?",
1867 help="input path (none means standard input)",
1868 )
1869
1870 # Parse
1871 return ap.parse_args()
1872
1873
1874 # Raises a command-line error with the message `msg`.
1875 def _raise_cli_error(msg: str) -> NoReturn:
1876 raise RuntimeError("Command-line error: {}".format(msg))
1877
1878
1879 # Returns a dictionary of string to integers from the list of strings
1880 # `args` containing `NAME=VAL` entries.
1881 def _dict_from_arg(args: Optional[List[str]]):
1882 d = {} # type: Dict[str, int]
1883
1884 if args is None:
1885 return d
1886
1887 for arg in args:
1888 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1889
1890 if m is None:
1891 _raise_cli_error("Invalid assignment {}".format(arg))
1892
1893 d[m.group(1)] = int(m.group(2))
1894
1895 return d
1896
1897
1898 # CLI entry point without exception handling.
1899 def _try_run_cli():
1900 import os.path
1901
1902 # Parse arguments
1903 args = _parse_cli_args()
1904
1905 # Read input
1906 if args.path is None:
1907 normand = sys.stdin.read()
1908 else:
1909 with open(args.path) as f:
1910 normand = f.read()
1911
1912 # Variables and labels
1913 variables = _dict_from_arg(args.var)
1914 labels = _dict_from_arg(args.label)
1915
1916 # Validate offset
1917 if args.offset < 0:
1918 _raise_cli_error("Invalid negative offset {}")
1919
1920 # Validate and set byte order
1921 bo = None # type: Optional[ByteOrder]
1922
1923 if args.byte_order is not None:
1924 if args.byte_order == "be":
1925 bo = ByteOrder.BE
1926 else:
1927 assert args.byte_order == "le"
1928 bo = ByteOrder.LE
1929
1930 # Parse
1931 try:
1932 res = parse(normand, variables, labels, args.offset, bo)
1933 except ParseError as exc:
1934 prefix = ""
1935
1936 if args.path is not None:
1937 prefix = "{}:".format(os.path.abspath(args.path))
1938
1939 _fail(
1940 "{}{}:{} - {}".format(
1941 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1942 )
1943 )
1944
1945 # Print
1946 sys.stdout.buffer.write(res.data)
1947
1948
1949 # Prints the exception message `msg` and exits with status 1.
1950 def _fail(msg: str) -> NoReturn:
1951 if not msg.endswith("."):
1952 msg += "."
1953
1954 print(msg, file=sys.stderr)
1955 sys.exit(1)
1956
1957
1958 # CLI entry point.
1959 def _run_cli():
1960 try:
1961 _try_run_cli()
1962 except Exception as exc:
1963 _fail(str(exc))
1964
1965
1966 if __name__ == "__main__":
1967 _run_cli()
This page took 0.066084 seconds and 3 git commands to generate.