d84dce41afdbf936992dbf9297b69fca090944d3
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.5.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "SymbolsT",
41 "__author__",
42 "__version__",
43 ]
44
45 import re
46 import abc
47 import ast
48 import sys
49 import enum
50 import math
51 import struct
52 from typing import (
53 Any,
54 Set,
55 Dict,
56 List,
57 Tuple,
58 Union,
59 Pattern,
60 Callable,
61 NoReturn,
62 Optional,
63 )
64
65
66 # Text location (line and column numbers).
67 class TextLoc:
68 @classmethod
69 def _create(cls, line_no: int, col_no: int):
70 self = cls.__new__(cls)
71 self._init(line_no, col_no)
72 return self
73
74 def __init__(*args, **kwargs): # type: ignore
75 raise NotImplementedError
76
77 def _init(self, line_no: int, col_no: int):
78 self._line_no = line_no
79 self._col_no = col_no
80
81 # Line number.
82 @property
83 def line_no(self):
84 return self._line_no
85
86 # Column number.
87 @property
88 def col_no(self):
89 return self._col_no
90
91 def __repr__(self):
92 return "TextLoc({}, {})".format(self._line_no, self._col_no)
93
94
95 # Any item.
96 class _Item:
97 def __init__(self, text_loc: TextLoc):
98 self._text_loc = text_loc
99
100 # Source text location.
101 @property
102 def text_loc(self):
103 return self._text_loc
104
105
106 # Scalar item.
107 class _ScalarItem(_Item):
108 # Returns the size, in bytes, of this item.
109 @property
110 @abc.abstractmethod
111 def size(self) -> int:
112 ...
113
114
115 # A repeatable item.
116 class _RepableItem:
117 pass
118
119
120 # Single byte.
121 class _Byte(_ScalarItem, _RepableItem):
122 def __init__(self, val: int, text_loc: TextLoc):
123 super().__init__(text_loc)
124 self._val = val
125
126 # Byte value.
127 @property
128 def val(self):
129 return self._val
130
131 @property
132 def size(self):
133 return 1
134
135 def __repr__(self):
136 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
137
138
139 # String.
140 class _Str(_ScalarItem, _RepableItem):
141 def __init__(self, data: bytes, text_loc: TextLoc):
142 super().__init__(text_loc)
143 self._data = data
144
145 # Encoded bytes.
146 @property
147 def data(self):
148 return self._data
149
150 @property
151 def size(self):
152 return len(self._data)
153
154 def __repr__(self):
155 return "_Str({}, {})".format(repr(self._data), self._text_loc)
156
157
158 # Byte order.
159 @enum.unique
160 class ByteOrder(enum.Enum):
161 # Big endian.
162 BE = "be"
163
164 # Little endian.
165 LE = "le"
166
167
168 # Byte order setting.
169 class _SetBo(_Item):
170 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
171 super().__init__(text_loc)
172 self._bo = bo
173
174 @property
175 def bo(self):
176 return self._bo
177
178 def __repr__(self):
179 return "_SetBo({}, {})".format(repr(self._bo), self._text_loc)
180
181
182 # Label.
183 class _Label(_Item):
184 def __init__(self, name: str, text_loc: TextLoc):
185 super().__init__(text_loc)
186 self._name = name
187
188 # Label name.
189 @property
190 def name(self):
191 return self._name
192
193 def __repr__(self):
194 return "_Label({}, {})".format(repr(self._name), self._text_loc)
195
196
197 # Offset setting.
198 class _SetOffset(_Item):
199 def __init__(self, val: int, text_loc: TextLoc):
200 super().__init__(text_loc)
201 self._val = val
202
203 # Offset value.
204 @property
205 def val(self):
206 return self._val
207
208 def __repr__(self):
209 return "_SetOffset({}, {})".format(repr(self._val), self._text_loc)
210
211
212 # Mixin of containing an AST expression and its string.
213 class _ExprMixin:
214 def __init__(self, expr_str: str, expr: ast.Expression):
215 self._expr_str = expr_str
216 self._expr = expr
217
218 # Expression string.
219 @property
220 def expr_str(self):
221 return self._expr_str
222
223 # Expression node to evaluate.
224 @property
225 def expr(self):
226 return self._expr
227
228
229 # Variable assignment.
230 class _VarAssign(_Item, _ExprMixin):
231 def __init__(
232 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
233 ):
234 super().__init__(text_loc)
235 _ExprMixin.__init__(self, expr_str, expr)
236 self._name = name
237
238 # Name.
239 @property
240 def name(self):
241 return self._name
242
243 def __repr__(self):
244 return "_VarAssign({}, {}, {}, {})".format(
245 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
246 )
247
248
249 # Fixed-length integer, possibly needing more than one byte.
250 class _FlInt(_ScalarItem, _RepableItem, _ExprMixin):
251 def __init__(
252 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
253 ):
254 super().__init__(text_loc)
255 _ExprMixin.__init__(self, expr_str, expr)
256 self._len = len
257
258 # Length (bits).
259 @property
260 def len(self):
261 return self._len
262
263 @property
264 def size(self):
265 return self._len // 8
266
267 def __repr__(self):
268 return "_FlInt({}, {}, {}, {})".format(
269 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
270 )
271
272
273 # LEB128 integer.
274 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
275 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLoc):
276 super().__init__(text_loc)
277 _ExprMixin.__init__(self, expr_str, expr)
278
279 def __repr__(self):
280 return "{}({}, {}, {})".format(
281 self.__class__.__name__,
282 repr(self._expr_str),
283 repr(self._expr),
284 self._text_loc,
285 )
286
287
288 # Unsigned LEB128 integer.
289 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
290 pass
291
292
293 # Signed LEB128 integer.
294 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
295 pass
296
297
298 # Group of items.
299 class _Group(_Item, _RepableItem):
300 def __init__(self, items: List[_Item], text_loc: TextLoc):
301 super().__init__(text_loc)
302 self._items = items
303
304 # Contained items.
305 @property
306 def items(self):
307 return self._items
308
309 def __repr__(self):
310 return "_Group({}, {})".format(repr(self._items), self._text_loc)
311
312
313 # Repetition item.
314 class _Rep(_Item, _ExprMixin):
315 def __init__(
316 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLoc
317 ):
318 super().__init__(text_loc)
319 _ExprMixin.__init__(self, expr_str, expr)
320 self._item = item
321
322 # Item to repeat.
323 @property
324 def item(self):
325 return self._item
326
327 def __repr__(self):
328 return "_Rep({}, {}, {}, {})".format(
329 repr(self._item), repr(self._expr_str), repr(self._expr), self._text_loc
330 )
331
332
333 # Expression item type.
334 _ExprItemT = Union[_FlInt, _Leb128Int, _VarAssign, _Rep]
335
336
337 # A parsing error containing a message and a text location.
338 class ParseError(RuntimeError):
339 @classmethod
340 def _create(cls, msg: str, text_loc: TextLoc):
341 self = cls.__new__(cls)
342 self._init(msg, text_loc)
343 return self
344
345 def __init__(self, *args, **kwargs): # type: ignore
346 raise NotImplementedError
347
348 def _init(self, msg: str, text_loc: TextLoc):
349 super().__init__(msg)
350 self._text_loc = text_loc
351
352 # Source text location.
353 @property
354 def text_loc(self):
355 return self._text_loc
356
357
358 # Raises a parsing error, forwarding the parameters to the constructor.
359 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
360 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
361
362
363 # Variable/label dictionary type.
364 SymbolsT = Dict[str, int]
365
366
367 # Python name pattern.
368 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
369
370
371 # Normand parser.
372 #
373 # The constructor accepts a Normand input. After building, use the `res`
374 # property to get the resulting main group.
375 class _Parser:
376 # Builds a parser to parse the Normand input `normand`, parsing
377 # immediately.
378 def __init__(self, normand: str, variables: SymbolsT, labels: SymbolsT):
379 self._normand = normand
380 self._at = 0
381 self._line_no = 1
382 self._col_no = 1
383 self._label_names = set(labels.keys())
384 self._var_names = set(variables.keys())
385 self._parse()
386
387 # Result (main group).
388 @property
389 def res(self):
390 return self._res
391
392 # Current text location.
393 @property
394 def _text_loc(self):
395 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
396 self._line_no, self._col_no
397 )
398
399 # Returns `True` if this parser is done parsing.
400 def _is_done(self):
401 return self._at == len(self._normand)
402
403 # Returns `True` if this parser isn't done parsing.
404 def _isnt_done(self):
405 return not self._is_done()
406
407 # Raises a parse error, creating it using the message `msg` and the
408 # current text location.
409 def _raise_error(self, msg: str) -> NoReturn:
410 _raise_error(msg, self._text_loc)
411
412 # Tries to make the pattern `pat` match the current substring,
413 # returning the match object and updating `self._at`,
414 # `self._line_no`, and `self._col_no` on success.
415 def _try_parse_pat(self, pat: Pattern[str]):
416 m = pat.match(self._normand, self._at)
417
418 if m is None:
419 return
420
421 # Skip matched string
422 self._at += len(m.group(0))
423
424 # Update line number
425 self._line_no += m.group(0).count("\n")
426
427 # Update column number
428 for i in reversed(range(self._at)):
429 if self._normand[i] == "\n" or i == 0:
430 if i == 0:
431 self._col_no = self._at + 1
432 else:
433 self._col_no = self._at - i
434
435 break
436
437 # Return match object
438 return m
439
440 # Expects the pattern `pat` to match the current substring,
441 # returning the match object and updating `self._at`,
442 # `self._line_no`, and `self._col_no` on success, or raising a parse
443 # error with the message `error_msg` on error.
444 def _expect_pat(self, pat: Pattern[str], error_msg: str):
445 # Match
446 m = self._try_parse_pat(pat)
447
448 if m is None:
449 # No match: error
450 self._raise_error(error_msg)
451
452 # Return match object
453 return m
454
455 # Pattern for _skip_ws_and_comments()
456 _ws_or_syms_or_comments_pat = re.compile(
457 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
458 )
459
460 # Skips as many whitespaces, insignificant symbol characters, and
461 # comments as possible.
462 def _skip_ws_and_comments(self):
463 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
464
465 # Pattern for _try_parse_hex_byte()
466 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
467
468 # Tries to parse a hexadecimal byte, returning a byte item on
469 # success.
470 def _try_parse_hex_byte(self):
471 begin_text_loc = self._text_loc
472
473 # Match initial nibble
474 m_high = self._try_parse_pat(self._nibble_pat)
475
476 if m_high is None:
477 # No match
478 return
479
480 # Expect another nibble
481 self._skip_ws_and_comments()
482 m_low = self._expect_pat(
483 self._nibble_pat, "Expecting another hexadecimal nibble"
484 )
485
486 # Return item
487 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
488
489 # Patterns for _try_parse_bin_byte()
490 _bin_byte_bit_pat = re.compile(r"[01]")
491 _bin_byte_prefix_pat = re.compile(r"%")
492
493 # Tries to parse a binary byte, returning a byte item on success.
494 def _try_parse_bin_byte(self):
495 begin_text_loc = self._text_loc
496
497 # Match prefix
498 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
499 # No match
500 return
501
502 # Expect eight bits
503 bits = [] # type: List[str]
504
505 for _ in range(8):
506 self._skip_ws_and_comments()
507 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
508 bits.append(m.group(0))
509
510 # Return item
511 return _Byte(int("".join(bits), 2), begin_text_loc)
512
513 # Patterns for _try_parse_dec_byte()
514 _dec_byte_prefix_pat = re.compile(r"\$\s*")
515 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
516
517 # Tries to parse a decimal byte, returning a byte item on success.
518 def _try_parse_dec_byte(self):
519 begin_text_loc = self._text_loc
520
521 # Match prefix
522 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
523 # No match
524 return
525
526 # Expect the value
527 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
528
529 # Compute value
530 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
531
532 # Validate
533 if val < -128 or val > 255:
534 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
535
536 # Two's complement
537 val %= 256
538
539 # Return item
540 return _Byte(val, begin_text_loc)
541
542 # Tries to parse a byte, returning a byte item on success.
543 def _try_parse_byte(self):
544 # Hexadecimal
545 item = self._try_parse_hex_byte()
546
547 if item is not None:
548 return item
549
550 # Binary
551 item = self._try_parse_bin_byte()
552
553 if item is not None:
554 return item
555
556 # Decimal
557 item = self._try_parse_dec_byte()
558
559 if item is not None:
560 return item
561
562 # Patterns for _try_parse_str()
563 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
564 _str_suffix_pat = re.compile(r'"')
565 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
566
567 # Strings corresponding to escape sequence characters
568 _str_escape_seq_strs = {
569 "0": "\0",
570 "a": "\a",
571 "b": "\b",
572 "e": "\x1b",
573 "f": "\f",
574 "n": "\n",
575 "r": "\r",
576 "t": "\t",
577 "v": "\v",
578 "\\": "\\",
579 '"': '"',
580 }
581
582 # Tries to parse a string, returning a string item on success.
583 def _try_parse_str(self):
584 begin_text_loc = self._text_loc
585
586 # Match prefix
587 m = self._try_parse_pat(self._str_prefix_pat)
588
589 if m is None:
590 # No match
591 return
592
593 # Get encoding
594 encoding = "utf8"
595
596 if m.group("len") is not None:
597 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
598
599 # Actual string
600 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
601
602 # Expect end of string
603 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
604
605 # Replace escape sequences
606 val = m.group(0)
607
608 for ec in '0abefnrtv"\\':
609 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
610
611 # Encode
612 data = val.encode(encoding)
613
614 # Return item
615 return _Str(data, begin_text_loc)
616
617 # Patterns for _try_parse_group()
618 _group_prefix_pat = re.compile(r"\(")
619 _group_suffix_pat = re.compile(r"\)")
620
621 # Tries to parse a group, returning a group item on success.
622 def _try_parse_group(self):
623 begin_text_loc = self._text_loc
624
625 # Match prefix
626 if self._try_parse_pat(self._group_prefix_pat) is None:
627 # No match
628 return
629
630 # Parse items
631 items = self._parse_items()
632
633 # Expect end of group
634 self._skip_ws_and_comments()
635 self._expect_pat(
636 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
637 )
638
639 # Return item
640 return _Group(items, begin_text_loc)
641
642 # Returns a stripped expression string and an AST expression node
643 # from the expression string `expr_str` at text location `text_loc`.
644 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
645 # Create an expression node from the expression string
646 expr_str = expr_str.strip().replace("\n", " ")
647
648 try:
649 expr = ast.parse(expr_str, mode="eval")
650 except SyntaxError:
651 _raise_error(
652 "Invalid expression `{}`: invalid syntax".format(expr_str),
653 text_loc,
654 )
655
656 return expr_str, expr
657
658 # Patterns for _try_parse_val_and_attr()
659 _val_expr_pat = re.compile(r"([^}:]+):\s*")
660 _fl_int_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
661 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
662
663 # Tries to parse a value and attribute (fixed length in bits or
664 # `leb128`), returning a value item on success.
665 def _try_parse_val_and_attr(self):
666 begin_text_loc = self._text_loc
667
668 # Match
669 m_expr = self._try_parse_pat(self._val_expr_pat)
670
671 if m_expr is None:
672 # No match
673 return
674
675 # Create an expression node from the expression string
676 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
677
678 # Length?
679 m_attr = self._try_parse_pat(self._fl_int_len_attr_pat)
680
681 if m_attr is None:
682 # LEB128?
683 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
684
685 if m_attr is None:
686 # At this point it's invalid
687 self._raise_error(
688 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
689 )
690
691 # Return LEB128 integer item
692 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
693 return cls(expr_str, expr, begin_text_loc)
694 else:
695 # Return fixed-length integer item
696 return _FlInt(
697 expr_str,
698 expr,
699 int(m_attr.group(0)),
700 begin_text_loc,
701 )
702
703 # Patterns for _try_parse_val_and_attr()
704 _var_assign_pat = re.compile(
705 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
706 )
707
708 # Tries to parse a variable assignment, returning a variable
709 # assignment item on success.
710 def _try_parse_var_assign(self):
711 begin_text_loc = self._text_loc
712
713 # Match
714 m = self._try_parse_pat(self._var_assign_pat)
715
716 if m is None:
717 # No match
718 return
719
720 # Validate name
721 name = m.group("name")
722
723 if name == _icitte_name:
724 _raise_error(
725 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
726 )
727
728 if name in self._label_names:
729 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
730
731 # Add to known variable names
732 self._var_names.add(name)
733
734 # Create an expression node from the expression string
735 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
736
737 # Return item
738 return _VarAssign(
739 name,
740 expr_str,
741 expr,
742 begin_text_loc,
743 )
744
745 # Pattern for _try_parse_set_bo()
746 _bo_pat = re.compile(r"[bl]e")
747
748 # Tries to parse a byte order name, returning a byte order setting
749 # item on success.
750 def _try_parse_set_bo(self):
751 begin_text_loc = self._text_loc
752
753 # Match
754 m = self._try_parse_pat(self._bo_pat)
755
756 if m is None:
757 # No match
758 return
759
760 # Return corresponding item
761 if m.group(0) == "be":
762 return _SetBo(ByteOrder.BE, begin_text_loc)
763 else:
764 assert m.group(0) == "le"
765 return _SetBo(ByteOrder.LE, begin_text_loc)
766
767 # Patterns for _try_parse_val_or_bo()
768 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
769 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
770
771 # Tries to parse a value, a variable assignment, or a byte order
772 # setting, returning an item on success.
773 def _try_parse_val_or_var_assign_or_set_bo(self):
774 # Match prefix
775 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
776 # No match
777 return
778
779 # Variable assignment item?
780 item = self._try_parse_var_assign()
781
782 if item is None:
783 # Fixed-length value item?
784 item = self._try_parse_val_and_attr()
785
786 if item is None:
787 # Byte order setting item?
788 item = self._try_parse_set_bo()
789
790 if item is None:
791 # At this point it's invalid
792 self._raise_error(
793 "Expecting a fixed-length integer, a variable assignment, or a byte order setting"
794 )
795
796 # Expect suffix
797 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
798 return item
799
800 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
801 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
802
803 # Tries to parse an offset setting value (after the initial `<`),
804 # returning an offset item on success.
805 def _try_parse_set_offset_val(self):
806 begin_text_loc = self._text_loc
807
808 # Match
809 m = self._try_parse_pat(self._pos_const_int_pat)
810
811 if m is None:
812 # No match
813 return
814
815 # Return item
816 return _SetOffset(int(m.group(0), 0), begin_text_loc)
817
818 # Tries to parse a label name (after the initial `<`), returning a
819 # label item on success.
820 def _try_parse_label_name(self):
821 begin_text_loc = self._text_loc
822
823 # Match
824 m = self._try_parse_pat(_py_name_pat)
825
826 if m is None:
827 # No match
828 return
829
830 # Validate
831 name = m.group(0)
832
833 if name == _icitte_name:
834 _raise_error(
835 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
836 )
837
838 if name in self._label_names:
839 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
840
841 if name in self._var_names:
842 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
843
844 # Add to known label names
845 self._label_names.add(name)
846
847 # Return item
848 return _Label(name, begin_text_loc)
849
850 # Patterns for _try_parse_label_or_set_offset()
851 _label_set_offset_prefix_pat = re.compile(r"<\s*")
852 _label_set_offset_suffix_pat = re.compile(r"\s*>")
853
854 # Tries to parse a label or an offset setting, returning an item on
855 # success.
856 def _try_parse_label_or_set_offset(self):
857 # Match prefix
858 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
859 # No match
860 return
861
862 # Offset setting item?
863 item = self._try_parse_set_offset_val()
864
865 if item is None:
866 # Label item?
867 item = self._try_parse_label_name()
868
869 if item is None:
870 # At this point it's invalid
871 self._raise_error("Expecting a label name or an offset setting value")
872
873 # Expect suffix
874 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
875 return item
876
877 # Tries to parse a base item (anything except a repetition),
878 # returning it on success.
879 def _try_parse_base_item(self):
880 # Byte item?
881 item = self._try_parse_byte()
882
883 if item is not None:
884 return item
885
886 # String item?
887 item = self._try_parse_str()
888
889 if item is not None:
890 return item
891
892 # Value, variable assignment, or byte order setting item?
893 item = self._try_parse_val_or_var_assign_or_set_bo()
894
895 if item is not None:
896 return item
897
898 # Label or offset setting item?
899 item = self._try_parse_label_or_set_offset()
900
901 if item is not None:
902 return item
903
904 # Group item?
905 item = self._try_parse_group()
906
907 if item is not None:
908 return item
909
910 # Pattern for _try_parse_rep()
911 _rep_prefix_pat = re.compile(r"\*\s*")
912 _rep_expr_prefix_pat = re.compile(r"\{")
913 _rep_expr_pat = re.compile(r"[^}p]+")
914 _rep_expr_suffix_pat = re.compile(r"\}")
915
916 # Tries to parse a repetition, returning the expression string and
917 # AST expression node on success.
918 def _try_parse_rep(self):
919 # Match prefix
920 if self._try_parse_pat(self._rep_prefix_pat) is None:
921 # No match
922 return
923
924 # Expect and return a decimal multiplier
925 self._skip_ws_and_comments()
926
927 # Integer?
928 m = self._try_parse_pat(self._pos_const_int_pat)
929
930 if m is None:
931 # Expression?
932 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
933 # At this point it's invalid
934 self._raise_error("Expecting a positive integral multiplier or `{`")
935
936 # Expect an expression
937 expr_str_loc = self._text_loc
938 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
939 expr_str = self._ast_expr_from_str(m.group(0), expr_str_loc)
940
941 # Expect `}`
942 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
943 expr_str = m.group(0)
944 else:
945 expr_str_loc = self._text_loc
946 expr_str = m.group(0)
947
948 return self._ast_expr_from_str(expr_str, expr_str_loc)
949
950 # Tries to parse an item, possibly followed by a repetition,
951 # returning `True` on success.
952 #
953 # Appends any parsed item to `items`.
954 def _try_append_item(self, items: List[_Item]):
955 self._skip_ws_and_comments()
956
957 # Parse a base item
958 item = self._try_parse_base_item()
959
960 if item is None:
961 # No item
962 return False
963
964 # Parse repetition if the base item is repeatable
965 if isinstance(item, _RepableItem):
966 self._skip_ws_and_comments()
967 rep_text_loc = self._text_loc
968 rep_ret = self._try_parse_rep()
969
970 if rep_ret is not None:
971 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
972
973 items.append(item)
974 return True
975
976 # Parses and returns items, skipping whitespaces, insignificant
977 # symbols, and comments when allowed, and stopping at the first
978 # unknown character.
979 def _parse_items(self) -> List[_Item]:
980 items = [] # type: List[_Item]
981
982 while self._isnt_done():
983 # Try to append item
984 if not self._try_append_item(items):
985 # Unknown at this point
986 break
987
988 return items
989
990 # Parses the whole Normand input, setting `self._res` to the main
991 # group item on success.
992 def _parse(self):
993 if len(self._normand.strip()) == 0:
994 # Special case to make sure there's something to consume
995 self._res = _Group([], self._text_loc)
996 return
997
998 # Parse first level items
999 items = self._parse_items()
1000
1001 # Make sure there's nothing left
1002 self._skip_ws_and_comments()
1003
1004 if self._isnt_done():
1005 self._raise_error(
1006 "Unexpected character `{}`".format(self._normand[self._at])
1007 )
1008
1009 # Set main group item
1010 self._res = _Group(items, self._text_loc)
1011
1012
1013 # The return type of parse().
1014 class ParseResult:
1015 @classmethod
1016 def _create(
1017 cls,
1018 data: bytearray,
1019 variables: SymbolsT,
1020 labels: SymbolsT,
1021 offset: int,
1022 bo: Optional[ByteOrder],
1023 ):
1024 self = cls.__new__(cls)
1025 self._init(data, variables, labels, offset, bo)
1026 return self
1027
1028 def __init__(self, *args, **kwargs): # type: ignore
1029 raise NotImplementedError
1030
1031 def _init(
1032 self,
1033 data: bytearray,
1034 variables: SymbolsT,
1035 labels: SymbolsT,
1036 offset: int,
1037 bo: Optional[ByteOrder],
1038 ):
1039 self._data = data
1040 self._vars = variables
1041 self._labels = labels
1042 self._offset = offset
1043 self._bo = bo
1044
1045 # Generated data.
1046 @property
1047 def data(self):
1048 return self._data
1049
1050 # Dictionary of updated variable names to their last computed value.
1051 @property
1052 def variables(self):
1053 return self._vars
1054
1055 # Dictionary of updated main group label names to their computed
1056 # value.
1057 @property
1058 def labels(self):
1059 return self._labels
1060
1061 # Updated offset.
1062 @property
1063 def offset(self):
1064 return self._offset
1065
1066 # Updated byte order.
1067 @property
1068 def byte_order(self):
1069 return self._bo
1070
1071
1072 # Raises a parse error for the item `item`, creating it using the
1073 # message `msg`.
1074 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1075 _raise_error(msg, item.text_loc)
1076
1077
1078 # The `ICITTE` reserved name.
1079 _icitte_name = "ICITTE"
1080
1081
1082 # Base node visitor.
1083 #
1084 # Calls the _visit_name() method for each name node which isn't the name
1085 # of a call.
1086 class _NodeVisitor(ast.NodeVisitor):
1087 def __init__(self):
1088 self._parent_is_call = False
1089
1090 def generic_visit(self, node: ast.AST):
1091 if type(node) is ast.Call:
1092 self._parent_is_call = True
1093 elif type(node) is ast.Name and not self._parent_is_call:
1094 self._visit_name(node.id)
1095
1096 super().generic_visit(node)
1097 self._parent_is_call = False
1098
1099 @abc.abstractmethod
1100 def _visit_name(self, name: str):
1101 ...
1102
1103
1104 # Expression validator: validates that all the names within the
1105 # expression are allowed.
1106 class _ExprValidator(_NodeVisitor):
1107 def __init__(self, item: _ExprItemT, allowed_names: Set[str], icitte_allowed: bool):
1108 super().__init__()
1109 self._item = item
1110 self._allowed_names = allowed_names
1111 self._icitte_allowed = icitte_allowed
1112
1113 def _visit_name(self, name: str):
1114 # Make sure the name refers to a known and reachable
1115 # variable/label name.
1116 if name == _icitte_name and not self._icitte_allowed:
1117 _raise_error(
1118 "Illegal reserved name `{}` in expression `{}`".format(
1119 _icitte_name, self._item.expr_str
1120 ),
1121 self._item.text_loc,
1122 )
1123 elif name != _icitte_name and name not in self._allowed_names:
1124 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1125 name, self._item.expr_str
1126 )
1127
1128 allowed_names = self._allowed_names.copy()
1129
1130 if self._icitte_allowed:
1131 allowed_names.add(_icitte_name)
1132
1133 if len(allowed_names) > 0:
1134 allowed_names_str = ", ".join(
1135 sorted(["`{}`".format(name) for name in allowed_names])
1136 )
1137 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1138
1139 _raise_error(
1140 msg,
1141 self._item.text_loc,
1142 )
1143
1144
1145 # Expression visitor getting all the contained names.
1146 class _ExprNamesVisitor(_NodeVisitor):
1147 def __init__(self):
1148 self._parent_is_call = False
1149 self._names = set() # type: Set[str]
1150
1151 @property
1152 def names(self):
1153 return self._names
1154
1155 def _visit_name(self, name: str):
1156 self._names.add(name)
1157
1158
1159 # Generator state.
1160 class _GenState:
1161 def __init__(
1162 self,
1163 variables: SymbolsT,
1164 labels: SymbolsT,
1165 offset: int,
1166 bo: Optional[ByteOrder],
1167 ):
1168 self.variables = variables.copy()
1169 self.labels = labels.copy()
1170 self.offset = offset
1171 self.bo = bo
1172
1173
1174 # Generator of data and final state from a group item.
1175 #
1176 # Generation happens in memory at construction time. After building, use
1177 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1178 # get the resulting context.
1179 #
1180 # The steps of generation are:
1181 #
1182 # 1. Validate that each repetition and LEB128 integer expression uses
1183 # only reachable names and not `ICITTE`.
1184 #
1185 # 2. Compute and keep the effective repetition count and LEB128 integer
1186 # value for each repetition and LEB128 integer instance.
1187 #
1188 # 3. Generate bytes, updating the initial state as it goes which becomes
1189 # the final state after the operation.
1190 #
1191 # During the generation, when handling a `_Rep` or `_Leb128Int` item,
1192 # we already have the effective repetition count or value of the
1193 # instance.
1194 #
1195 # When handling a `_Group` item, first update the current labels with
1196 # all the immediate (not nested) labels, and then handle each
1197 # contained item. This gives contained item access to "future" outer
1198 # labels. Then remove the immediate labels from the state so that
1199 # outer items don't have access to inner labels.
1200 class _Gen:
1201 def __init__(
1202 self,
1203 group: _Group,
1204 variables: SymbolsT,
1205 labels: SymbolsT,
1206 offset: int,
1207 bo: Optional[ByteOrder],
1208 ):
1209 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1210 self._vl_instance_vals = self._compute_vl_instance_vals(
1211 group, _GenState(variables, labels, offset, bo)
1212 )
1213 self._gen(group, _GenState(variables, labels, offset, bo))
1214
1215 # Generated bytes.
1216 @property
1217 def data(self):
1218 return self._data
1219
1220 # Updated variables.
1221 @property
1222 def variables(self):
1223 return self._final_state.variables
1224
1225 # Updated main group labels.
1226 @property
1227 def labels(self):
1228 return self._final_state.labels
1229
1230 # Updated offset.
1231 @property
1232 def offset(self):
1233 return self._final_state.offset
1234
1235 # Updated byte order.
1236 @property
1237 def bo(self):
1238 return self._final_state.bo
1239
1240 # Returns the set of used, non-called names within the AST
1241 # expression `expr`.
1242 @staticmethod
1243 def _names_of_expr(expr: ast.Expression):
1244 visitor = _ExprNamesVisitor()
1245 visitor.visit(expr)
1246 return visitor.names
1247
1248 # Validates that all the repetition and LEB128 integer expressions
1249 # within `group` don't refer, directly or indirectly, to subsequent
1250 # labels.
1251 #
1252 # The strategy here is to keep a set of allowed label names, per
1253 # group, initialized to `allowed_label_names`, and a set of allowed
1254 # variable names initialized to `allowed_variable_names`.
1255 #
1256 # Then, depending on the type of `item`:
1257 #
1258 # `_Label`:
1259 # Add its name to the local allowed label names: a label
1260 # occurring before a repetition, and not within a nested group,
1261 # is always reachable.
1262 #
1263 # `_VarAssign`:
1264 # If all the names within its expression are allowed, then add
1265 # its name to the allowed variable names.
1266 #
1267 # Otherwise, remove its name from the allowed variable names (if
1268 # it's in there): a variable which refers to an unreachable name
1269 # is unreachable itself.
1270 #
1271 # `_Rep` and `_Leb128`:
1272 # Make sure all the names within its expression are allowed.
1273 #
1274 # `_Group`:
1275 # Call this function for each contained item with a _copy_ of
1276 # the current allowed label names and the same current allowed
1277 # variable names.
1278 @staticmethod
1279 def _validate_vl_exprs(
1280 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1281 ):
1282 if type(item) is _Label:
1283 allowed_label_names.add(item.name)
1284 elif type(item) is _VarAssign:
1285 # Check if this variable name is allowed
1286 allowed = True
1287
1288 for name in _Gen._names_of_expr(item.expr):
1289 if name not in (
1290 allowed_label_names | allowed_variable_names | {_icitte_name}
1291 ):
1292 # Not allowed
1293 allowed = False
1294 break
1295
1296 if allowed:
1297 allowed_variable_names.add(item.name)
1298 elif item.name in allowed_variable_names:
1299 allowed_variable_names.remove(item.name)
1300 elif isinstance(item, _Leb128Int):
1301 # Validate the expression (`ICITTE` allowed)
1302 _ExprValidator(
1303 item, allowed_label_names | allowed_variable_names, True
1304 ).visit(item.expr)
1305 elif type(item) is _Rep:
1306 # Validate the expression first (`ICITTE` not allowed)
1307 _ExprValidator(
1308 item, allowed_label_names | allowed_variable_names, False
1309 ).visit(item.expr)
1310
1311 # Validate inner item
1312 _Gen._validate_vl_exprs(
1313 item.item, allowed_variable_names, allowed_label_names
1314 )
1315 elif type(item) is _Group:
1316 # Copy `allowed_label_names` so that this frame cannot
1317 # access the nested label names.
1318 group_allowed_label_names = allowed_label_names.copy()
1319
1320 for subitem in item.items:
1321 _Gen._validate_vl_exprs(
1322 subitem, allowed_variable_names, group_allowed_label_names
1323 )
1324
1325 # Evaluates the expression of `item` considering the current
1326 # generation state `state`.
1327 #
1328 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1329 # for the expression to evaluate.
1330 @staticmethod
1331 def _eval_item_expr(item: _ExprItemT, state: _GenState, allow_icitte: bool):
1332 syms = state.labels.copy()
1333
1334 # Set the `ICITTE` name to the current offset, if any
1335 if allow_icitte:
1336 syms[_icitte_name] = state.offset
1337
1338 # Add the current variables
1339 syms.update(state.variables)
1340
1341 # Validate the node and its children
1342 _ExprValidator(item, set(syms.keys()), True).visit(item.expr)
1343
1344 # Compile and evaluate expression node
1345 try:
1346 val = eval(compile(item.expr, "", "eval"), None, syms)
1347 except Exception as exc:
1348 _raise_error_for_item(
1349 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1350 item,
1351 )
1352
1353 # Validate result
1354 if type(val) is not int:
1355 _raise_error_for_item(
1356 "Invalid expression `{}`: expecting result type `int`, not `{}`".format(
1357 item.expr_str, type(val).__name__
1358 ),
1359 item,
1360 )
1361
1362 return val
1363
1364 # Returns the size, in bytes, required to encode the value `val`
1365 # with LEB128 (signed version if `is_signed` is `True`).
1366 @staticmethod
1367 def _leb128_size_for_val(val: int, is_signed: bool):
1368 if val < 0:
1369 # Equivalent upper bound.
1370 #
1371 # For example, if `val` is -128, then the full integer for
1372 # this number of bits would be [-128, 127].
1373 val = -val - 1
1374
1375 # Number of bits (add one for the sign if needed)
1376 bits = val.bit_length() + int(is_signed)
1377
1378 if bits == 0:
1379 bits = 1
1380
1381 # Seven bits per byte
1382 return math.ceil(bits / 7)
1383
1384 # Computes the effective value for each repetition and LEB128
1385 # integer instance, filling `instance_vals` (if not `None`) and
1386 # returning `instance_vals`.
1387 #
1388 # At this point it must be known that, for a given variable-length
1389 # item, its expression only contains reachable names.
1390 #
1391 # When handling a `_Rep` item, this function appends its effective
1392 # multiplier to `instance_vals` _before_ handling its repeated item.
1393 #
1394 # When handling a `_VarAssign` item, this function only evaluates it
1395 # if all its names are reachable.
1396 @staticmethod
1397 def _compute_vl_instance_vals(
1398 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1399 ):
1400 if instance_vals is None:
1401 instance_vals = []
1402
1403 if isinstance(item, _ScalarItem):
1404 state.offset += item.size
1405 elif type(item) is _Label:
1406 state.labels[item.name] = state.offset
1407 elif type(item) is _VarAssign:
1408 # Check if all the names are reachable
1409 do_eval = True
1410
1411 for name in _Gen._names_of_expr(item.expr):
1412 if (
1413 name != _icitte_name
1414 and name not in state.variables
1415 and name not in state.labels
1416 ):
1417 # A name is unknown: cannot evaluate
1418 do_eval = False
1419 break
1420
1421 if do_eval:
1422 # Evaluate the expression and keep the result
1423 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1424 elif type(item) is _SetOffset:
1425 state.offset = item.val
1426 elif isinstance(item, _Leb128Int):
1427 # Evaluate the expression
1428 val = _Gen._eval_item_expr(item, state, True)
1429
1430 # Validate result
1431 if type(item) is _ULeb128Int and val < 0:
1432 _raise_error_for_item(
1433 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1434 item.expr_str, val
1435 ),
1436 item,
1437 )
1438
1439 # Add the evaluation result to the to variable-length item
1440 # instance values.
1441 instance_vals.append(val)
1442
1443 # Update offset
1444 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
1445 elif type(item) is _Rep:
1446 # Evaluate the expression and keep the result
1447 val = _Gen._eval_item_expr(item, state, False)
1448
1449 # Validate result
1450 if val < 0:
1451 _raise_error_for_item(
1452 "Invalid expression `{}`: unexpected negative result {:,}".format(
1453 item.expr_str, val
1454 ),
1455 item,
1456 )
1457
1458 # Add to repetition instance values
1459 instance_vals.append(val)
1460
1461 # Process the repeated item `val` times
1462 for _ in range(val):
1463 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1464 elif type(item) is _Group:
1465 prev_labels = state.labels.copy()
1466
1467 # Process each item
1468 for subitem in item.items:
1469 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
1470
1471 state.labels = prev_labels
1472
1473 return instance_vals
1474
1475 def _zero_item_size(self, item: _Item, next_vl_instance: int):
1476 return 0, next_vl_instance
1477
1478 def _scalar_item_size(self, item: _ScalarItem, next_vl_instance: int):
1479 return item.size, next_vl_instance
1480
1481 def _leb128_int_item_size(self, item: _Leb128Int, next_vl_instance: int):
1482 # Get the value from `self._vl_instance_vals` _before_
1483 # incrementing `next_vl_instance` to honor the order of
1484 # _compute_vl_instance_vals().
1485 return (
1486 self._leb128_size_for_val(
1487 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1488 ),
1489 next_vl_instance + 1,
1490 )
1491
1492 def _group_item_size(self, item: _Group, next_vl_instance: int):
1493 size = 0
1494
1495 for subitem in item.items:
1496 subitem_size, next_vl_instance = self._item_size(subitem, next_vl_instance)
1497 size += subitem_size
1498
1499 return size, next_vl_instance
1500
1501 def _rep_item_size(self, item: _Rep, next_vl_instance: int):
1502 # Get the value from `self._vl_instance_vals` _before_
1503 # incrementing `next_vl_instance` to honor the order of
1504 # _compute_vl_instance_vals().
1505 mul = self._vl_instance_vals[next_vl_instance]
1506 next_vl_instance += 1
1507 size = 0
1508
1509 for _ in range(mul):
1510 iter_size, next_vl_instance = self._item_size(item.item, next_vl_instance)
1511 size += iter_size
1512
1513 return size, next_vl_instance
1514
1515 # Returns the size of `item` and the new next repetition instance.
1516 def _item_size(self, item: _Item, next_vl_instance: int):
1517 return self._item_size_funcs[type(item)](item, next_vl_instance)
1518
1519 # Handles the byte item `item`.
1520 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
1521 self._data.append(item.val)
1522 state.offset += item.size
1523 return next_vl_instance
1524
1525 # Handles the string item `item`.
1526 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
1527 self._data += item.data
1528 state.offset += item.size
1529 return next_vl_instance
1530
1531 # Handles the byte order setting item `item`.
1532 def _handle_set_bo_item(
1533 self, item: _SetBo, state: _GenState, next_vl_instance: int
1534 ):
1535 # Update current byte order
1536 state.bo = item.bo
1537 return next_vl_instance
1538
1539 # Handles the variable assignment item `item`.
1540 def _handle_var_assign_item(
1541 self, item: _VarAssign, state: _GenState, next_vl_instance: int
1542 ):
1543 # Update variable
1544 state.variables[item.name] = self._eval_item_expr(item, state, True)
1545 return next_vl_instance
1546
1547 # Handles the fixed-length integer item `item`.
1548 def _handle_fl_int_item(
1549 self, item: _FlInt, state: _GenState, next_vl_instance: int
1550 ):
1551 # Compute value
1552 val = self._eval_item_expr(item, state, True)
1553
1554 # Validate range
1555 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1556 _raise_error_for_item(
1557 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1558 val, item.len, item.expr_str, state.offset
1559 ),
1560 item,
1561 )
1562
1563 # Encode result on 64 bits (to extend the sign bit whatever the
1564 # value of `item.len`).
1565 if state.bo is None and item.len > 8:
1566 _raise_error_for_item(
1567 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1568 item.expr_str
1569 ),
1570 item,
1571 )
1572
1573 data = struct.pack(
1574 "{}{}".format(
1575 ">" if state.bo in (None, ByteOrder.BE) else "<",
1576 "Q" if val >= 0 else "q",
1577 ),
1578 val,
1579 )
1580
1581 # Keep only the requested length
1582 len_bytes = item.len // 8
1583
1584 if state.bo in (None, ByteOrder.BE):
1585 # Big endian: keep last bytes
1586 data = data[-len_bytes:]
1587 else:
1588 # Little endian: keep first bytes
1589 assert state.bo == ByteOrder.LE
1590 data = data[:len_bytes]
1591
1592 # Append to current bytes and update offset
1593 self._data += data
1594 state.offset += len(data)
1595 return next_vl_instance
1596
1597 # Handles the LEB128 integer item `item`.
1598 def _handle_leb128_int_item(
1599 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1600 ):
1601 # Get the precomputed value
1602 val = self._vl_instance_vals[next_vl_instance]
1603
1604 # Size in bytes
1605 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1606
1607 # For each byte
1608 for _ in range(size):
1609 # Seven LSBs, MSB of the byte set (continue)
1610 self._data.append((val & 0x7F) | 0x80)
1611 val >>= 7
1612
1613 # Clear MSB of last byte (stop)
1614 self._data[-1] &= ~0x80
1615
1616 # Consumed this instance
1617 return next_vl_instance + 1
1618
1619 # Handles the group item `item`, only removing the immediate labels
1620 # from `state.labels` if `remove_immediate_labels` is `True`.
1621 def _handle_group_item(
1622 self,
1623 item: _Group,
1624 state: _GenState,
1625 next_vl_instance: int,
1626 remove_immediate_labels: bool = True,
1627 ):
1628 # Compute the values of the immediate (not nested) labels. Those
1629 # labels are reachable by any expression within the group.
1630 offset = state.offset
1631 immediate_label_names = set() # type: Set[str]
1632 tmp_next_vl_instance = next_vl_instance
1633
1634 for subitem in item.items:
1635 if type(subitem) is _SetOffset:
1636 # Update offset
1637 offset = subitem.val
1638 elif type(subitem) is _Label:
1639 # New immediate label
1640 state.labels[subitem.name] = offset
1641 immediate_label_names.add(subitem.name)
1642
1643 subitem_size, tmp_next_vl_instance = self._item_size(
1644 subitem, tmp_next_vl_instance
1645 )
1646 offset += subitem_size
1647
1648 # Handle each item now with the actual state
1649 for subitem in item.items:
1650 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
1651
1652 # Remove immediate labels if required so that outer items won't
1653 # reach inner labels.
1654 if remove_immediate_labels:
1655 for name in immediate_label_names:
1656 del state.labels[name]
1657
1658 return next_vl_instance
1659
1660 # Handles the repetition item `item`.
1661 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1662 # Get the precomputed repetition count
1663 mul = self._vl_instance_vals[next_vl_instance]
1664
1665 # Consumed this instance
1666 next_vl_instance += 1
1667
1668 for _ in range(mul):
1669 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1670
1671 return next_vl_instance
1672
1673 # Handles the offset setting item `item`.
1674 def _handle_set_offset_item(
1675 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1676 ):
1677 state.offset = item.val
1678 return next_vl_instance
1679
1680 # Handles the label item `item`.
1681 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1682 return next_vl_instance
1683
1684 # Handles the item `item`, returning the updated next repetition
1685 # instance.
1686 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1687 return self._item_handlers[type(item)](item, state, next_vl_instance)
1688
1689 # Generates the data (`self._data`) and final state
1690 # (`self._final_state`) from `group` and the initial state `state`.
1691 def _gen(self, group: _Group, state: _GenState):
1692 # Initial state
1693 self._data = bytearray()
1694
1695 # Item handlers
1696 self._item_handlers = {
1697 _Byte: self._handle_byte_item,
1698 _FlInt: self._handle_fl_int_item,
1699 _Group: self._handle_group_item,
1700 _Label: self._handle_label_item,
1701 _Rep: self._handle_rep_item,
1702 _SetBo: self._handle_set_bo_item,
1703 _SetOffset: self._handle_set_offset_item,
1704 _SLeb128Int: self._handle_leb128_int_item,
1705 _Str: self._handle_str_item,
1706 _ULeb128Int: self._handle_leb128_int_item,
1707 _VarAssign: self._handle_var_assign_item,
1708 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1709
1710 # Item size getters
1711 self._item_size_funcs = {
1712 _Byte: self._scalar_item_size,
1713 _FlInt: self._scalar_item_size,
1714 _Group: self._group_item_size,
1715 _Label: self._zero_item_size,
1716 _Rep: self._rep_item_size,
1717 _SetBo: self._zero_item_size,
1718 _SetOffset: self._zero_item_size,
1719 _SLeb128Int: self._leb128_int_item_size,
1720 _Str: self._scalar_item_size,
1721 _ULeb128Int: self._leb128_int_item_size,
1722 _VarAssign: self._zero_item_size,
1723 } # type: Dict[type, Callable[[Any, int], Tuple[int, int]]]
1724
1725 # Handle the group item, _not_ removing the immediate labels
1726 # because the `labels` property offers them.
1727 self._handle_group_item(group, state, 0, False)
1728
1729 # This is actually the final state
1730 self._final_state = state
1731
1732
1733 # Returns a `ParseResult` instance containing the bytes encoded by the
1734 # input string `normand`.
1735 #
1736 # `init_variables` is a dictionary of initial variable names (valid
1737 # Python names) to integral values. A variable name must not be the
1738 # reserved name `ICITTE`.
1739 #
1740 # `init_labels` is a dictionary of initial label names (valid Python
1741 # names) to integral values. A label name must not be the reserved name
1742 # `ICITTE`.
1743 #
1744 # `init_offset` is the initial offset.
1745 #
1746 # `init_byte_order` is the initial byte order.
1747 #
1748 # Raises `ParseError` on any parsing error.
1749 def parse(
1750 normand: str,
1751 init_variables: Optional[SymbolsT] = None,
1752 init_labels: Optional[SymbolsT] = None,
1753 init_offset: int = 0,
1754 init_byte_order: Optional[ByteOrder] = None,
1755 ):
1756 if init_variables is None:
1757 init_variables = {}
1758
1759 if init_labels is None:
1760 init_labels = {}
1761
1762 gen = _Gen(
1763 _Parser(normand, init_variables, init_labels).res,
1764 init_variables,
1765 init_labels,
1766 init_offset,
1767 init_byte_order,
1768 )
1769 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1770 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1771 )
1772
1773
1774 # Parses the command-line arguments.
1775 def _parse_cli_args():
1776 import argparse
1777
1778 # Build parser
1779 ap = argparse.ArgumentParser()
1780 ap.add_argument(
1781 "--offset",
1782 metavar="OFFSET",
1783 action="store",
1784 type=int,
1785 default=0,
1786 help="initial offset (positive)",
1787 )
1788 ap.add_argument(
1789 "-b",
1790 "--byte-order",
1791 metavar="BO",
1792 choices=["be", "le"],
1793 type=str,
1794 help="initial byte order (`be` or `le`)",
1795 )
1796 ap.add_argument(
1797 "--var",
1798 metavar="NAME=VAL",
1799 action="append",
1800 help="add an initial variable (may be repeated)",
1801 )
1802 ap.add_argument(
1803 "-l",
1804 "--label",
1805 metavar="NAME=VAL",
1806 action="append",
1807 help="add an initial label (may be repeated)",
1808 )
1809 ap.add_argument(
1810 "--version", action="version", version="Normand {}".format(__version__)
1811 )
1812 ap.add_argument(
1813 "path",
1814 metavar="PATH",
1815 action="store",
1816 nargs="?",
1817 help="input path (none means standard input)",
1818 )
1819
1820 # Parse
1821 return ap.parse_args()
1822
1823
1824 # Raises a command-line error with the message `msg`.
1825 def _raise_cli_error(msg: str) -> NoReturn:
1826 raise RuntimeError("Command-line error: {}".format(msg))
1827
1828
1829 # Returns a dictionary of string to integers from the list of strings
1830 # `args` containing `NAME=VAL` entries.
1831 def _dict_from_arg(args: Optional[List[str]]):
1832 d = {} # type: Dict[str, int]
1833
1834 if args is None:
1835 return d
1836
1837 for arg in args:
1838 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1839
1840 if m is None:
1841 _raise_cli_error("Invalid assignment {}".format(arg))
1842
1843 d[m.group(1)] = int(m.group(2))
1844
1845 return d
1846
1847
1848 # CLI entry point without exception handling.
1849 def _try_run_cli():
1850 import os.path
1851
1852 # Parse arguments
1853 args = _parse_cli_args()
1854
1855 # Read input
1856 if args.path is None:
1857 normand = sys.stdin.read()
1858 else:
1859 with open(args.path) as f:
1860 normand = f.read()
1861
1862 # Variables and labels
1863 variables = _dict_from_arg(args.var)
1864 labels = _dict_from_arg(args.label)
1865
1866 # Validate offset
1867 if args.offset < 0:
1868 _raise_cli_error("Invalid negative offset {}")
1869
1870 # Validate and set byte order
1871 bo = None # type: Optional[ByteOrder]
1872
1873 if args.byte_order is not None:
1874 if args.byte_order == "be":
1875 bo = ByteOrder.BE
1876 else:
1877 assert args.byte_order == "le"
1878 bo = ByteOrder.LE
1879
1880 # Parse
1881 try:
1882 res = parse(normand, variables, labels, args.offset, bo)
1883 except ParseError as exc:
1884 prefix = ""
1885
1886 if args.path is not None:
1887 prefix = "{}:".format(os.path.abspath(args.path))
1888
1889 _fail(
1890 "{}{}:{} - {}".format(
1891 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1892 )
1893 )
1894
1895 # Print
1896 sys.stdout.buffer.write(res.data)
1897
1898
1899 # Prints the exception message `msg` and exits with status 1.
1900 def _fail(msg: str) -> NoReturn:
1901 if not msg.endswith("."):
1902 msg += "."
1903
1904 print(msg, file=sys.stderr)
1905 sys.exit(1)
1906
1907
1908 # CLI entry point.
1909 def _run_cli():
1910 try:
1911 _try_run_cli()
1912 except Exception as exc:
1913 _fail(str(exc))
1914
1915
1916 if __name__ == "__main__":
1917 _run_cli()
This page took 0.063468 seconds and 3 git commands to generate.