35c2330662ccb40ba67a1ab030e054408615a334
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.4.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "VarsT",
41 "__author__",
42 "__version__",
43 ]
44
45 import re
46 import abc
47 import ast
48 import sys
49 import enum
50 import math
51 import struct
52 from typing import (
53 Any,
54 Set,
55 Dict,
56 List,
57 Tuple,
58 Union,
59 Pattern,
60 Callable,
61 NoReturn,
62 Optional,
63 )
64
65
66 # Text location (line and column numbers).
67 class TextLoc:
68 @classmethod
69 def _create(cls, line_no: int, col_no: int):
70 self = cls.__new__(cls)
71 self._init(line_no, col_no)
72 return self
73
74 def __init__(*args, **kwargs): # type: ignore
75 raise NotImplementedError
76
77 def _init(self, line_no: int, col_no: int):
78 self._line_no = line_no
79 self._col_no = col_no
80
81 # Line number.
82 @property
83 def line_no(self):
84 return self._line_no
85
86 # Column number.
87 @property
88 def col_no(self):
89 return self._col_no
90
91 def __repr__(self):
92 return "TextLoc({}, {})".format(self._line_no, self._col_no)
93
94
95 # Any item.
96 class _Item:
97 def __init__(self, text_loc: TextLoc):
98 self._text_loc = text_loc
99
100 # Source text location.
101 @property
102 def text_loc(self):
103 return self._text_loc
104
105
106 # Scalar item.
107 class _ScalarItem(_Item):
108 # Returns the size, in bytes, of this item.
109 @property
110 @abc.abstractmethod
111 def size(self) -> int:
112 ...
113
114
115 # A repeatable item.
116 class _RepableItem:
117 pass
118
119
120 # Single byte.
121 class _Byte(_ScalarItem, _RepableItem):
122 def __init__(self, val: int, text_loc: TextLoc):
123 super().__init__(text_loc)
124 self._val = val
125
126 # Byte value.
127 @property
128 def val(self):
129 return self._val
130
131 @property
132 def size(self):
133 return 1
134
135 def __repr__(self):
136 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
137
138
139 # String.
140 class _Str(_ScalarItem, _RepableItem):
141 def __init__(self, data: bytes, text_loc: TextLoc):
142 super().__init__(text_loc)
143 self._data = data
144
145 # Encoded bytes.
146 @property
147 def data(self):
148 return self._data
149
150 @property
151 def size(self):
152 return len(self._data)
153
154 def __repr__(self):
155 return "_Str({}, {})".format(repr(self._data), self._text_loc)
156
157
158 # Byte order.
159 @enum.unique
160 class ByteOrder(enum.Enum):
161 # Big endian.
162 BE = "be"
163
164 # Little endian.
165 LE = "le"
166
167
168 # Byte order setting.
169 class _SetBo(_Item):
170 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
171 super().__init__(text_loc)
172 self._bo = bo
173
174 @property
175 def bo(self):
176 return self._bo
177
178 def __repr__(self):
179 return "_SetBo({}, {})".format(repr(self._bo), self._text_loc)
180
181
182 # Label.
183 class _Label(_Item):
184 def __init__(self, name: str, text_loc: TextLoc):
185 super().__init__(text_loc)
186 self._name = name
187
188 # Label name.
189 @property
190 def name(self):
191 return self._name
192
193 def __repr__(self):
194 return "_Label({}, {})".format(repr(self._name), self._text_loc)
195
196
197 # Offset setting.
198 class _SetOffset(_Item):
199 def __init__(self, val: int, text_loc: TextLoc):
200 super().__init__(text_loc)
201 self._val = val
202
203 # Offset value.
204 @property
205 def val(self):
206 return self._val
207
208 def __repr__(self):
209 return "_SetOffset({}, {})".format(repr(self._val), self._text_loc)
210
211
212 # Mixin of containing an AST expression and its string.
213 class _ExprMixin:
214 def __init__(self, expr_str: str, expr: ast.Expression):
215 self._expr_str = expr_str
216 self._expr = expr
217
218 # Expression string.
219 @property
220 def expr_str(self):
221 return self._expr_str
222
223 # Expression node to evaluate.
224 @property
225 def expr(self):
226 return self._expr
227
228
229 # Variable assignment.
230 class _VarAssign(_Item, _ExprMixin):
231 def __init__(
232 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
233 ):
234 super().__init__(text_loc)
235 _ExprMixin.__init__(self, expr_str, expr)
236 self._name = name
237
238 # Name.
239 @property
240 def name(self):
241 return self._name
242
243 def __repr__(self):
244 return "_VarAssign({}, {}, {}, {})".format(
245 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
246 )
247
248
249 # Fixed-length integer, possibly needing more than one byte.
250 class _FlInt(_ScalarItem, _RepableItem, _ExprMixin):
251 def __init__(
252 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
253 ):
254 super().__init__(text_loc)
255 _ExprMixin.__init__(self, expr_str, expr)
256 self._len = len
257
258 # Length (bits).
259 @property
260 def len(self):
261 return self._len
262
263 @property
264 def size(self):
265 return self._len // 8
266
267 def __repr__(self):
268 return "_FlInt({}, {}, {}, {})".format(
269 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
270 )
271
272
273 # LEB128 integer.
274 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
275 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLoc):
276 super().__init__(text_loc)
277 _ExprMixin.__init__(self, expr_str, expr)
278
279 def __repr__(self):
280 return "{}({}, {}, {})".format(
281 self.__class__.__name__,
282 repr(self._expr_str),
283 repr(self._expr),
284 self._text_loc,
285 )
286
287
288 # Unsigned LEB128 integer.
289 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
290 pass
291
292
293 # Signed LEB128 integer.
294 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
295 pass
296
297
298 # Group of items.
299 class _Group(_Item, _RepableItem):
300 def __init__(self, items: List[_Item], text_loc: TextLoc):
301 super().__init__(text_loc)
302 self._items = items
303
304 # Contained items.
305 @property
306 def items(self):
307 return self._items
308
309 def __repr__(self):
310 return "_Group({}, {})".format(repr(self._items), self._text_loc)
311
312
313 # Repetition item.
314 class _Rep(_Item, _ExprMixin):
315 def __init__(
316 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLoc
317 ):
318 super().__init__(text_loc)
319 _ExprMixin.__init__(self, expr_str, expr)
320 self._item = item
321
322 # Item to repeat.
323 @property
324 def item(self):
325 return self._item
326
327 def __repr__(self):
328 return "_Rep({}, {}, {}, {})".format(
329 repr(self._item), repr(self._expr_str), repr(self._expr), self._text_loc
330 )
331
332
333 # Expression item type.
334 _ExprItemT = Union[_FlInt, _Leb128Int, _VarAssign, _Rep]
335
336
337 # A parsing error containing a message and a text location.
338 class ParseError(RuntimeError):
339 @classmethod
340 def _create(cls, msg: str, text_loc: TextLoc):
341 self = cls.__new__(cls)
342 self._init(msg, text_loc)
343 return self
344
345 def __init__(self, *args, **kwargs): # type: ignore
346 raise NotImplementedError
347
348 def _init(self, msg: str, text_loc: TextLoc):
349 super().__init__(msg)
350 self._text_loc = text_loc
351
352 # Source text location.
353 @property
354 def text_loc(self):
355 return self._text_loc
356
357
358 # Raises a parsing error, forwarding the parameters to the constructor.
359 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
360 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
361
362
363 # Variable/label dictionary type.
364 VarsT = Dict[str, int]
365
366
367 # Python name pattern.
368 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
369
370
371 # Normand parser.
372 #
373 # The constructor accepts a Normand input. After building, use the `res`
374 # property to get the resulting main group.
375 class _Parser:
376 # Builds a parser to parse the Normand input `normand`, parsing
377 # immediately.
378 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
379 self._normand = normand
380 self._at = 0
381 self._line_no = 1
382 self._col_no = 1
383 self._label_names = set(labels.keys())
384 self._var_names = set(variables.keys())
385 self._parse()
386
387 # Result (main group).
388 @property
389 def res(self):
390 return self._res
391
392 # Current text location.
393 @property
394 def _text_loc(self):
395 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
396 self._line_no, self._col_no
397 )
398
399 # Returns `True` if this parser is done parsing.
400 def _is_done(self):
401 return self._at == len(self._normand)
402
403 # Returns `True` if this parser isn't done parsing.
404 def _isnt_done(self):
405 return not self._is_done()
406
407 # Raises a parse error, creating it using the message `msg` and the
408 # current text location.
409 def _raise_error(self, msg: str) -> NoReturn:
410 _raise_error(msg, self._text_loc)
411
412 # Tries to make the pattern `pat` match the current substring,
413 # returning the match object and updating `self._at`,
414 # `self._line_no`, and `self._col_no` on success.
415 def _try_parse_pat(self, pat: Pattern[str]):
416 m = pat.match(self._normand, self._at)
417
418 if m is None:
419 return
420
421 # Skip matched string
422 self._at += len(m.group(0))
423
424 # Update line number
425 self._line_no += m.group(0).count("\n")
426
427 # Update column number
428 for i in reversed(range(self._at)):
429 if self._normand[i] == "\n" or i == 0:
430 if i == 0:
431 self._col_no = self._at + 1
432 else:
433 self._col_no = self._at - i
434
435 break
436
437 # Return match object
438 return m
439
440 # Expects the pattern `pat` to match the current substring,
441 # returning the match object and updating `self._at`,
442 # `self._line_no`, and `self._col_no` on success, or raising a parse
443 # error with the message `error_msg` on error.
444 def _expect_pat(self, pat: Pattern[str], error_msg: str):
445 # Match
446 m = self._try_parse_pat(pat)
447
448 if m is None:
449 # No match: error
450 self._raise_error(error_msg)
451
452 # Return match object
453 return m
454
455 # Pattern for _skip_ws_and_comments()
456 _ws_or_syms_or_comments_pat = re.compile(
457 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
458 )
459
460 # Skips as many whitespaces, insignificant symbol characters, and
461 # comments as possible.
462 def _skip_ws_and_comments(self):
463 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
464
465 # Pattern for _try_parse_hex_byte()
466 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
467
468 # Tries to parse a hexadecimal byte, returning a byte item on
469 # success.
470 def _try_parse_hex_byte(self):
471 begin_text_loc = self._text_loc
472
473 # Match initial nibble
474 m_high = self._try_parse_pat(self._nibble_pat)
475
476 if m_high is None:
477 # No match
478 return
479
480 # Expect another nibble
481 self._skip_ws_and_comments()
482 m_low = self._expect_pat(
483 self._nibble_pat, "Expecting another hexadecimal nibble"
484 )
485
486 # Return item
487 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
488
489 # Patterns for _try_parse_bin_byte()
490 _bin_byte_bit_pat = re.compile(r"[01]")
491 _bin_byte_prefix_pat = re.compile(r"%")
492
493 # Tries to parse a binary byte, returning a byte item on success.
494 def _try_parse_bin_byte(self):
495 begin_text_loc = self._text_loc
496
497 # Match prefix
498 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
499 # No match
500 return
501
502 # Expect eight bits
503 bits = [] # type: List[str]
504
505 for _ in range(8):
506 self._skip_ws_and_comments()
507 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
508 bits.append(m.group(0))
509
510 # Return item
511 return _Byte(int("".join(bits), 2), begin_text_loc)
512
513 # Patterns for _try_parse_dec_byte()
514 _dec_byte_prefix_pat = re.compile(r"\$\s*")
515 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
516
517 # Tries to parse a decimal byte, returning a byte item on success.
518 def _try_parse_dec_byte(self):
519 begin_text_loc = self._text_loc
520
521 # Match prefix
522 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
523 # No match
524 return
525
526 # Expect the value
527 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
528
529 # Compute value
530 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
531
532 # Validate
533 if val < -128 or val > 255:
534 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
535
536 # Two's complement
537 val %= 256
538
539 # Return item
540 return _Byte(val, begin_text_loc)
541
542 # Tries to parse a byte, returning a byte item on success.
543 def _try_parse_byte(self):
544 # Hexadecimal
545 item = self._try_parse_hex_byte()
546
547 if item is not None:
548 return item
549
550 # Binary
551 item = self._try_parse_bin_byte()
552
553 if item is not None:
554 return item
555
556 # Decimal
557 item = self._try_parse_dec_byte()
558
559 if item is not None:
560 return item
561
562 # Patterns for _try_parse_str()
563 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
564 _str_suffix_pat = re.compile(r'"')
565 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
566
567 # Strings corresponding to escape sequence characters
568 _str_escape_seq_strs = {
569 "0": "\0",
570 "a": "\a",
571 "b": "\b",
572 "e": "\x1b",
573 "f": "\f",
574 "n": "\n",
575 "r": "\r",
576 "t": "\t",
577 "v": "\v",
578 "\\": "\\",
579 '"': '"',
580 }
581
582 # Tries to parse a string, returning a string item on success.
583 def _try_parse_str(self):
584 begin_text_loc = self._text_loc
585
586 # Match prefix
587 m = self._try_parse_pat(self._str_prefix_pat)
588
589 if m is None:
590 # No match
591 return
592
593 # Get encoding
594 encoding = "utf8"
595
596 if m.group("len") is not None:
597 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
598
599 # Actual string
600 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
601
602 # Expect end of string
603 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
604
605 # Replace escape sequences
606 val = m.group(0)
607
608 for ec in '0abefnrtv"\\':
609 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
610
611 # Encode
612 data = val.encode(encoding)
613
614 # Return item
615 return _Str(data, begin_text_loc)
616
617 # Patterns for _try_parse_group()
618 _group_prefix_pat = re.compile(r"\(")
619 _group_suffix_pat = re.compile(r"\)")
620
621 # Tries to parse a group, returning a group item on success.
622 def _try_parse_group(self):
623 begin_text_loc = self._text_loc
624
625 # Match prefix
626 if self._try_parse_pat(self._group_prefix_pat) is None:
627 # No match
628 return
629
630 # Parse items
631 items = self._parse_items()
632
633 # Expect end of group
634 self._skip_ws_and_comments()
635 self._expect_pat(
636 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
637 )
638
639 # Return item
640 return _Group(items, begin_text_loc)
641
642 # Returns a stripped expression string and an AST expression node
643 # from the expression string `expr_str` at text location `text_loc`.
644 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
645 # Create an expression node from the expression string
646 expr_str = expr_str.strip().replace("\n", " ")
647
648 try:
649 expr = ast.parse(expr_str, mode="eval")
650 except SyntaxError:
651 _raise_error(
652 "Invalid expression `{}`: invalid syntax".format(expr_str),
653 text_loc,
654 )
655
656 return expr_str, expr
657
658 # Patterns for _try_parse_val_and_attr()
659 _val_expr_pat = re.compile(r"([^}:]+):\s*")
660 _fl_int_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
661 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
662
663 # Tries to parse a value and attribute (fixed length in bits or
664 # `leb128`), returning a value item on success.
665 def _try_parse_val_and_attr(self):
666 begin_text_loc = self._text_loc
667
668 # Match
669 m_expr = self._try_parse_pat(self._val_expr_pat)
670
671 if m_expr is None:
672 # No match
673 return
674
675 # Create an expression node from the expression string
676 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
677
678 # Length?
679 m_attr = self._try_parse_pat(self._fl_int_len_attr_pat)
680
681 if m_attr is None:
682 # LEB128?
683 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
684
685 if m_attr is None:
686 # At this point it's invalid
687 self._raise_error(
688 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
689 )
690
691 # Return LEB128 integer item
692 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
693 return cls(expr_str, expr, begin_text_loc)
694 else:
695 # Return fixed-length integer item
696 return _FlInt(
697 expr_str,
698 expr,
699 int(m_attr.group(0)),
700 begin_text_loc,
701 )
702
703 # Patterns for _try_parse_val_and_attr()
704 _var_assign_pat = re.compile(
705 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
706 )
707
708 # Tries to parse a variable assignment, returning a variable
709 # assignment item on success.
710 def _try_parse_var_assign(self):
711 begin_text_loc = self._text_loc
712
713 # Match
714 m = self._try_parse_pat(self._var_assign_pat)
715
716 if m is None:
717 # No match
718 return
719
720 # Validate name
721 name = m.group("name")
722
723 if name == _icitte_name:
724 _raise_error(
725 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
726 )
727
728 if name in self._label_names:
729 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
730
731 # Add to known variable names
732 self._var_names.add(name)
733
734 # Create an expression node from the expression string
735 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
736
737 # Return item
738 return _VarAssign(
739 name,
740 expr_str,
741 expr,
742 begin_text_loc,
743 )
744
745 # Pattern for _try_parse_set_bo()
746 _bo_pat = re.compile(r"[bl]e")
747
748 # Tries to parse a byte order name, returning a byte order setting
749 # item on success.
750 def _try_parse_set_bo(self):
751 begin_text_loc = self._text_loc
752
753 # Match
754 m = self._try_parse_pat(self._bo_pat)
755
756 if m is None:
757 # No match
758 return
759
760 # Return corresponding item
761 if m.group(0) == "be":
762 return _SetBo(ByteOrder.BE, begin_text_loc)
763 else:
764 assert m.group(0) == "le"
765 return _SetBo(ByteOrder.LE, begin_text_loc)
766
767 # Patterns for _try_parse_val_or_bo()
768 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
769 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
770
771 # Tries to parse a value, a variable assignment, or a byte order
772 # setting, returning an item on success.
773 def _try_parse_val_or_var_assign_or_set_bo(self):
774 # Match prefix
775 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
776 # No match
777 return
778
779 # Variable assignment item?
780 item = self._try_parse_var_assign()
781
782 if item is None:
783 # Fixed-length value item?
784 item = self._try_parse_val_and_attr()
785
786 if item is None:
787 # Byte order setting item?
788 item = self._try_parse_set_bo()
789
790 if item is None:
791 # At this point it's invalid
792 self._raise_error(
793 "Expecting a fixed-length integer, a variable assignment, or a byte order setting"
794 )
795
796 # Expect suffix
797 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
798 return item
799
800 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
801 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
802
803 # Tries to parse an offset setting value (after the initial `<`),
804 # returning an offset item on success.
805 def _try_parse_set_offset_val(self):
806 begin_text_loc = self._text_loc
807
808 # Match
809 m = self._try_parse_pat(self._pos_const_int_pat)
810
811 if m is None:
812 # No match
813 return
814
815 # Return item
816 return _SetOffset(int(m.group(0), 0), begin_text_loc)
817
818 # Tries to parse a label name (after the initial `<`), returning a
819 # label item on success.
820 def _try_parse_label_name(self):
821 begin_text_loc = self._text_loc
822
823 # Match
824 m = self._try_parse_pat(_py_name_pat)
825
826 if m is None:
827 # No match
828 return
829
830 # Validate
831 name = m.group(0)
832
833 if name == _icitte_name:
834 _raise_error(
835 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
836 )
837
838 if name in self._label_names:
839 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
840
841 if name in self._var_names:
842 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
843
844 # Add to known label names
845 self._label_names.add(name)
846
847 # Return item
848 return _Label(name, begin_text_loc)
849
850 # Patterns for _try_parse_label_or_set_offset()
851 _label_set_offset_prefix_pat = re.compile(r"<\s*")
852 _label_set_offset_suffix_pat = re.compile(r"\s*>")
853
854 # Tries to parse a label or an offset setting, returning an item on
855 # success.
856 def _try_parse_label_or_set_offset(self):
857 # Match prefix
858 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
859 # No match
860 return
861
862 # Offset setting item?
863 item = self._try_parse_set_offset_val()
864
865 if item is None:
866 # Label item?
867 item = self._try_parse_label_name()
868
869 if item is None:
870 # At this point it's invalid
871 self._raise_error("Expecting a label name or an offset setting value")
872
873 # Expect suffix
874 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
875 return item
876
877 # Tries to parse a base item (anything except a repetition),
878 # returning it on success.
879 def _try_parse_base_item(self):
880 # Byte item?
881 item = self._try_parse_byte()
882
883 if item is not None:
884 return item
885
886 # String item?
887 item = self._try_parse_str()
888
889 if item is not None:
890 return item
891
892 # Value, variable assignment, or byte order setting item?
893 item = self._try_parse_val_or_var_assign_or_set_bo()
894
895 if item is not None:
896 return item
897
898 # Label or offset setting item?
899 item = self._try_parse_label_or_set_offset()
900
901 if item is not None:
902 return item
903
904 # Group item?
905 item = self._try_parse_group()
906
907 if item is not None:
908 return item
909
910 # Pattern for _try_parse_rep()
911 _rep_prefix_pat = re.compile(r"\*\s*")
912 _rep_expr_prefix_pat = re.compile(r"\{")
913 _rep_expr_pat = re.compile(r"[^}p]+")
914 _rep_expr_suffix_pat = re.compile(r"\}")
915
916 # Tries to parse a repetition, returning the expression string and
917 # AST expression node on success.
918 def _try_parse_rep(self):
919 # Match prefix
920 if self._try_parse_pat(self._rep_prefix_pat) is None:
921 # No match
922 return
923
924 # Expect and return a decimal multiplier
925 self._skip_ws_and_comments()
926
927 # Integer?
928 m = self._try_parse_pat(self._pos_const_int_pat)
929
930 if m is None:
931 # Expression?
932 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
933 # At this point it's invalid
934 self._raise_error("Expecting a positive integral multiplier or `{`")
935
936 # Expect an expression
937 expr_str_loc = self._text_loc
938 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
939 expr_str = self._ast_expr_from_str(m.group(0), expr_str_loc)
940
941 # Expect `}`
942 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
943 expr_str = m.group(0)
944 else:
945 expr_str_loc = self._text_loc
946 expr_str = m.group(0)
947
948 return self._ast_expr_from_str(expr_str, expr_str_loc)
949
950 # Tries to parse an item, possibly followed by a repetition,
951 # returning `True` on success.
952 #
953 # Appends any parsed item to `items`.
954 def _try_append_item(self, items: List[_Item]):
955 self._skip_ws_and_comments()
956
957 # Parse a base item
958 item = self._try_parse_base_item()
959
960 if item is None:
961 # No item
962 return False
963
964 # Parse repetition if the base item is repeatable
965 if isinstance(item, _RepableItem):
966 self._skip_ws_and_comments()
967 rep_text_loc = self._text_loc
968 rep_ret = self._try_parse_rep()
969
970 if rep_ret is not None:
971 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
972
973 items.append(item)
974 return True
975
976 # Parses and returns items, skipping whitespaces, insignificant
977 # symbols, and comments when allowed, and stopping at the first
978 # unknown character.
979 def _parse_items(self) -> List[_Item]:
980 items = [] # type: List[_Item]
981
982 while self._isnt_done():
983 # Try to append item
984 if not self._try_append_item(items):
985 # Unknown at this point
986 break
987
988 return items
989
990 # Parses the whole Normand input, setting `self._res` to the main
991 # group item on success.
992 def _parse(self):
993 if len(self._normand.strip()) == 0:
994 # Special case to make sure there's something to consume
995 self._res = _Group([], self._text_loc)
996 return
997
998 # Parse first level items
999 items = self._parse_items()
1000
1001 # Make sure there's nothing left
1002 self._skip_ws_and_comments()
1003
1004 if self._isnt_done():
1005 self._raise_error(
1006 "Unexpected character `{}`".format(self._normand[self._at])
1007 )
1008
1009 # Set main group item
1010 self._res = _Group(items, self._text_loc)
1011
1012
1013 # The return type of parse().
1014 class ParseResult:
1015 @classmethod
1016 def _create(
1017 cls,
1018 data: bytearray,
1019 variables: VarsT,
1020 labels: VarsT,
1021 offset: int,
1022 bo: Optional[ByteOrder],
1023 ):
1024 self = cls.__new__(cls)
1025 self._init(data, variables, labels, offset, bo)
1026 return self
1027
1028 def __init__(self, *args, **kwargs): # type: ignore
1029 raise NotImplementedError
1030
1031 def _init(
1032 self,
1033 data: bytearray,
1034 variables: VarsT,
1035 labels: VarsT,
1036 offset: int,
1037 bo: Optional[ByteOrder],
1038 ):
1039 self._data = data
1040 self._vars = variables
1041 self._labels = labels
1042 self._offset = offset
1043 self._bo = bo
1044
1045 # Generated data.
1046 @property
1047 def data(self):
1048 return self._data
1049
1050 # Dictionary of updated variable names to their last computed value.
1051 @property
1052 def variables(self):
1053 return self._vars
1054
1055 # Dictionary of updated main group label names to their computed
1056 # value.
1057 @property
1058 def labels(self):
1059 return self._labels
1060
1061 # Updated offset.
1062 @property
1063 def offset(self):
1064 return self._offset
1065
1066 # Updated byte order.
1067 @property
1068 def byte_order(self):
1069 return self._bo
1070
1071
1072 # Raises a parse error for the item `item`, creating it using the
1073 # message `msg`.
1074 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1075 _raise_error(msg, item.text_loc)
1076
1077
1078 # The `ICITTE` reserved name.
1079 _icitte_name = "ICITTE"
1080
1081
1082 # Base node visitor.
1083 #
1084 # Calls the _visit_name() method for each name node which isn't the name
1085 # of a call.
1086 class _NodeVisitor(ast.NodeVisitor):
1087 def __init__(self):
1088 self._parent_is_call = False
1089
1090 def generic_visit(self, node: ast.AST):
1091 if type(node) is ast.Call:
1092 self._parent_is_call = True
1093 elif type(node) is ast.Name and not self._parent_is_call:
1094 self._visit_name(node.id)
1095
1096 super().generic_visit(node)
1097 self._parent_is_call = False
1098
1099 @abc.abstractmethod
1100 def _visit_name(self, name: str):
1101 ...
1102
1103
1104 # Expression validator: validates that all the names within the
1105 # expression are allowed.
1106 class _ExprValidator(_NodeVisitor):
1107 def __init__(self, item: _ExprItemT, allowed_names: Set[str], icitte_allowed: bool):
1108 super().__init__()
1109 self._item = item
1110 self._allowed_names = allowed_names
1111 self._icitte_allowed = icitte_allowed
1112
1113 def _visit_name(self, name: str):
1114 # Make sure the name refers to a known and reachable
1115 # variable/label name.
1116 if name == _icitte_name and not self._icitte_allowed:
1117 _raise_error(
1118 "Illegal reserved name `{}` in expression `{}`".format(
1119 _icitte_name, self._item.expr_str
1120 ),
1121 self._item.text_loc,
1122 )
1123 elif name != _icitte_name and name not in self._allowed_names:
1124 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1125 name, self._item.expr_str
1126 )
1127
1128 allowed_names = self._allowed_names.copy()
1129
1130 if self._icitte_allowed:
1131 allowed_names.add(_icitte_name)
1132
1133 if len(allowed_names) > 0:
1134 allowed_names_str = ", ".join(
1135 sorted(["`{}`".format(name) for name in allowed_names])
1136 )
1137 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1138
1139 _raise_error(
1140 msg,
1141 self._item.text_loc,
1142 )
1143
1144
1145 # Expression visitor getting all the contained names.
1146 class _ExprNamesVisitor(_NodeVisitor):
1147 def __init__(self):
1148 self._parent_is_call = False
1149 self._names = set() # type: Set[str]
1150
1151 @property
1152 def names(self):
1153 return self._names
1154
1155 def _visit_name(self, name: str):
1156 self._names.add(name)
1157
1158
1159 # Generator state.
1160 class _GenState:
1161 def __init__(
1162 self, variables: VarsT, labels: VarsT, offset: int, bo: Optional[ByteOrder]
1163 ):
1164 self.variables = variables.copy()
1165 self.labels = labels.copy()
1166 self.offset = offset
1167 self.bo = bo
1168
1169
1170 # Generator of data and final state from a group item.
1171 #
1172 # Generation happens in memory at construction time. After building, use
1173 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1174 # get the resulting context.
1175 #
1176 # The steps of generation are:
1177 #
1178 # 1. Validate that each repetition and LEB128 integer expression uses
1179 # only reachable names and not `ICITTE`.
1180 #
1181 # 2. Compute and keep the effective repetition count and LEB128 integer
1182 # value for each repetition and LEB128 integer instance.
1183 #
1184 # 3. Generate bytes, updating the initial state as it goes which becomes
1185 # the final state after the operation.
1186 #
1187 # During the generation, when handling a `_Rep` or `_Leb128Int` item,
1188 # we already have the effective repetition count or value of the
1189 # instance.
1190 #
1191 # When handling a `_Group` item, first update the current labels with
1192 # all the immediate (not nested) labels, and then handle each
1193 # contained item. This gives contained item access to "future" outer
1194 # labels. Then remove the immediate labels from the state so that
1195 # outer items don't have access to inner labels.
1196 class _Gen:
1197 def __init__(
1198 self,
1199 group: _Group,
1200 variables: VarsT,
1201 labels: VarsT,
1202 offset: int,
1203 bo: Optional[ByteOrder],
1204 ):
1205 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1206 self._vl_instance_vals = self._compute_vl_instance_vals(
1207 group, _GenState(variables, labels, offset, bo)
1208 )
1209 self._gen(group, _GenState(variables, labels, offset, bo))
1210
1211 # Generated bytes.
1212 @property
1213 def data(self):
1214 return self._data
1215
1216 # Updated variables.
1217 @property
1218 def variables(self):
1219 return self._final_state.variables
1220
1221 # Updated main group labels.
1222 @property
1223 def labels(self):
1224 return self._final_state.labels
1225
1226 # Updated offset.
1227 @property
1228 def offset(self):
1229 return self._final_state.offset
1230
1231 # Updated byte order.
1232 @property
1233 def bo(self):
1234 return self._final_state.bo
1235
1236 # Returns the set of used, non-called names within the AST
1237 # expression `expr`.
1238 @staticmethod
1239 def _names_of_expr(expr: ast.Expression):
1240 visitor = _ExprNamesVisitor()
1241 visitor.visit(expr)
1242 return visitor.names
1243
1244 # Validates that all the repetition and LEB128 integer expressions
1245 # within `group` don't refer, directly or indirectly, to subsequent
1246 # labels.
1247 #
1248 # The strategy here is to keep a set of allowed label names, per
1249 # group, initialized to `allowed_label_names`, and a set of allowed
1250 # variable names initialized to `allowed_variable_names`.
1251 #
1252 # Then, depending on the type of `item`:
1253 #
1254 # `_Label`:
1255 # Add its name to the local allowed label names: a label
1256 # occurring before a repetition, and not within a nested group,
1257 # is always reachable.
1258 #
1259 # `_VarAssign`:
1260 # If all the names within its expression are allowed, then add
1261 # its name to the allowed variable names.
1262 #
1263 # Otherwise, remove its name from the allowed variable names (if
1264 # it's in there): a variable which refers to an unreachable name
1265 # is unreachable itself.
1266 #
1267 # `_Rep` and `_Leb128`:
1268 # Make sure all the names within its expression are allowed.
1269 #
1270 # `_Group`:
1271 # Call this function for each contained item with a _copy_ of
1272 # the current allowed label names and the same current allowed
1273 # variable names.
1274 @staticmethod
1275 def _validate_vl_exprs(
1276 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1277 ):
1278 if type(item) is _Label:
1279 allowed_label_names.add(item.name)
1280 elif type(item) is _VarAssign:
1281 # Check if this variable name is allowed
1282 allowed = True
1283
1284 for name in _Gen._names_of_expr(item.expr):
1285 if name not in (
1286 allowed_label_names | allowed_variable_names | {_icitte_name}
1287 ):
1288 # Not allowed
1289 allowed = False
1290 break
1291
1292 if allowed:
1293 allowed_variable_names.add(item.name)
1294 elif item.name in allowed_variable_names:
1295 allowed_variable_names.remove(item.name)
1296 elif isinstance(item, _Leb128Int):
1297 # Validate the expression (`ICITTE` allowed)
1298 _ExprValidator(
1299 item, allowed_label_names | allowed_variable_names, True
1300 ).visit(item.expr)
1301 elif type(item) is _Rep:
1302 # Validate the expression first (`ICITTE` not allowed)
1303 _ExprValidator(
1304 item, allowed_label_names | allowed_variable_names, False
1305 ).visit(item.expr)
1306
1307 # Validate inner item
1308 _Gen._validate_vl_exprs(
1309 item.item, allowed_variable_names, allowed_label_names
1310 )
1311 elif type(item) is _Group:
1312 # Copy `allowed_label_names` so that this frame cannot
1313 # access the nested label names.
1314 group_allowed_label_names = allowed_label_names.copy()
1315
1316 for subitem in item.items:
1317 _Gen._validate_vl_exprs(
1318 subitem, allowed_variable_names, group_allowed_label_names
1319 )
1320
1321 # Evaluates the expression of `item` considering the current
1322 # generation state `state`.
1323 #
1324 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1325 # for the expression to evaluate.
1326 @staticmethod
1327 def _eval_item_expr(item: _ExprItemT, state: _GenState, allow_icitte: bool):
1328 syms = state.labels.copy()
1329
1330 # Set the `ICITTE` name to the current offset, if any
1331 if allow_icitte:
1332 syms[_icitte_name] = state.offset
1333
1334 # Add the current variables
1335 syms.update(state.variables)
1336
1337 # Validate the node and its children
1338 _ExprValidator(item, set(syms.keys()), True).visit(item.expr)
1339
1340 # Compile and evaluate expression node
1341 try:
1342 val = eval(compile(item.expr, "", "eval"), None, syms)
1343 except Exception as exc:
1344 _raise_error_for_item(
1345 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1346 item,
1347 )
1348
1349 # Validate result
1350 if type(val) is not int:
1351 _raise_error_for_item(
1352 "Invalid expression `{}`: expecting result type `int`, not `{}`".format(
1353 item.expr_str, type(val).__name__
1354 ),
1355 item,
1356 )
1357
1358 return val
1359
1360 # Returns the size, in bytes, required to encode the value `val`
1361 # with LEB128 (signed version if `is_signed` is `True`).
1362 @staticmethod
1363 def _leb128_size_for_val(val: int, is_signed: bool):
1364 if val < 0:
1365 # Equivalent upper bound.
1366 #
1367 # For example, if `val` is -128, then the full integer for
1368 # this number of bits would be [-128, 127].
1369 val = -val - 1
1370
1371 # Number of bits (add one for the sign if needed)
1372 bits = val.bit_length() + int(is_signed)
1373
1374 if bits == 0:
1375 bits = 1
1376
1377 # Seven bits per byte
1378 return math.ceil(bits / 7)
1379
1380 # Computes the effective value for each repetition and LEB128
1381 # integer instance, filling `instance_vals` (if not `None`) and
1382 # returning `instance_vals`.
1383 #
1384 # At this point it must be known that, for a given variable-length
1385 # item, its expression only contains reachable names.
1386 #
1387 # When handling a `_Rep` item, this function appends its effective
1388 # multiplier to `instance_vals` _before_ handling its repeated item.
1389 #
1390 # When handling a `_VarAssign` item, this function only evaluates it
1391 # if all its names are reachable.
1392 @staticmethod
1393 def _compute_vl_instance_vals(
1394 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1395 ):
1396 if instance_vals is None:
1397 instance_vals = []
1398
1399 if isinstance(item, _ScalarItem):
1400 state.offset += item.size
1401 elif type(item) is _Label:
1402 state.labels[item.name] = state.offset
1403 elif type(item) is _VarAssign:
1404 # Check if all the names are reachable
1405 do_eval = True
1406
1407 for name in _Gen._names_of_expr(item.expr):
1408 if (
1409 name != _icitte_name
1410 and name not in state.variables
1411 and name not in state.labels
1412 ):
1413 # A name is unknown: cannot evaluate
1414 do_eval = False
1415 break
1416
1417 if do_eval:
1418 # Evaluate the expression and keep the result
1419 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1420 elif type(item) is _SetOffset:
1421 state.offset = item.val
1422 elif isinstance(item, _Leb128Int):
1423 # Evaluate the expression
1424 val = _Gen._eval_item_expr(item, state, True)
1425
1426 # Validate result
1427 if type(item) is _ULeb128Int and val < 0:
1428 _raise_error_for_item(
1429 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1430 item.expr_str, val
1431 ),
1432 item,
1433 )
1434
1435 # Add the evaluation result to the to variable-length item
1436 # instance values.
1437 instance_vals.append(val)
1438
1439 # Update offset
1440 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
1441 elif type(item) is _Rep:
1442 # Evaluate the expression and keep the result
1443 val = _Gen._eval_item_expr(item, state, False)
1444
1445 # Validate result
1446 if val < 0:
1447 _raise_error_for_item(
1448 "Invalid expression `{}`: unexpected negative result {:,}".format(
1449 item.expr_str, val
1450 ),
1451 item,
1452 )
1453
1454 # Add to repetition instance values
1455 instance_vals.append(val)
1456
1457 # Process the repeated item `val` times
1458 for _ in range(val):
1459 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1460 elif type(item) is _Group:
1461 prev_labels = state.labels.copy()
1462
1463 # Process each item
1464 for subitem in item.items:
1465 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
1466
1467 state.labels = prev_labels
1468
1469 return instance_vals
1470
1471 def _zero_item_size(self, item: _Item, next_vl_instance: int):
1472 return 0, next_vl_instance
1473
1474 def _scalar_item_size(self, item: _ScalarItem, next_vl_instance: int):
1475 return item.size, next_vl_instance
1476
1477 def _leb128_int_item_size(self, item: _Leb128Int, next_vl_instance: int):
1478 # Get the value from `self._vl_instance_vals` _before_
1479 # incrementing `next_vl_instance` to honor the order of
1480 # _compute_vl_instance_vals().
1481 return (
1482 self._leb128_size_for_val(
1483 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1484 ),
1485 next_vl_instance + 1,
1486 )
1487
1488 def _group_item_size(self, item: _Group, next_vl_instance: int):
1489 size = 0
1490
1491 for subitem in item.items:
1492 subitem_size, next_vl_instance = self._item_size(subitem, next_vl_instance)
1493 size += subitem_size
1494
1495 return size, next_vl_instance
1496
1497 def _rep_item_size(self, item: _Rep, next_vl_instance: int):
1498 # Get the value from `self._vl_instance_vals` _before_
1499 # incrementing `next_vl_instance` to honor the order of
1500 # _compute_vl_instance_vals().
1501 mul = self._vl_instance_vals[next_vl_instance]
1502 next_vl_instance += 1
1503 size = 0
1504
1505 for _ in range(mul):
1506 iter_size, next_vl_instance = self._item_size(item.item, next_vl_instance)
1507 size += iter_size
1508
1509 return size, next_vl_instance
1510
1511 # Returns the size of `item` and the new next repetition instance.
1512 def _item_size(self, item: _Item, next_vl_instance: int):
1513 return self._item_size_funcs[type(item)](item, next_vl_instance)
1514
1515 # Handles the byte item `item`.
1516 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
1517 self._data.append(item.val)
1518 state.offset += item.size
1519 return next_vl_instance
1520
1521 # Handles the string item `item`.
1522 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
1523 self._data += item.data
1524 state.offset += item.size
1525 return next_vl_instance
1526
1527 # Handles the byte order setting item `item`.
1528 def _handle_set_bo_item(
1529 self, item: _SetBo, state: _GenState, next_vl_instance: int
1530 ):
1531 # Update current byte order
1532 state.bo = item.bo
1533 return next_vl_instance
1534
1535 # Handles the variable assignment item `item`.
1536 def _handle_var_assign_item(
1537 self, item: _VarAssign, state: _GenState, next_vl_instance: int
1538 ):
1539 # Update variable
1540 state.variables[item.name] = self._eval_item_expr(item, state, True)
1541 return next_vl_instance
1542
1543 # Handles the fixed-length integer item `item`.
1544 def _handle_fl_int_item(
1545 self, item: _FlInt, state: _GenState, next_vl_instance: int
1546 ):
1547 # Compute value
1548 val = self._eval_item_expr(item, state, True)
1549
1550 # Validate range
1551 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1552 _raise_error_for_item(
1553 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1554 val, item.len, item.expr_str, state.offset
1555 ),
1556 item,
1557 )
1558
1559 # Encode result on 64 bits (to extend the sign bit whatever the
1560 # value of `item.len`).
1561 if state.bo is None and item.len > 8:
1562 _raise_error_for_item(
1563 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1564 item.expr_str
1565 ),
1566 item,
1567 )
1568
1569 data = struct.pack(
1570 "{}{}".format(
1571 ">" if state.bo in (None, ByteOrder.BE) else "<",
1572 "Q" if val >= 0 else "q",
1573 ),
1574 val,
1575 )
1576
1577 # Keep only the requested length
1578 len_bytes = item.len // 8
1579
1580 if state.bo in (None, ByteOrder.BE):
1581 # Big endian: keep last bytes
1582 data = data[-len_bytes:]
1583 else:
1584 # Little endian: keep first bytes
1585 assert state.bo == ByteOrder.LE
1586 data = data[:len_bytes]
1587
1588 # Append to current bytes and update offset
1589 self._data += data
1590 state.offset += len(data)
1591 return next_vl_instance
1592
1593 # Handles the LEB128 integer item `item`.
1594 def _handle_leb128_int_item(
1595 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1596 ):
1597 # Get the precomputed value
1598 val = self._vl_instance_vals[next_vl_instance]
1599
1600 # Size in bytes
1601 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1602
1603 # For each byte
1604 for _ in range(size):
1605 # Seven LSBs, MSB of the byte set (continue)
1606 self._data.append((val & 0x7F) | 0x80)
1607 val >>= 7
1608
1609 # Clear MSB of last byte (stop)
1610 self._data[-1] &= ~0x80
1611
1612 # Consumed this instance
1613 return next_vl_instance + 1
1614
1615 # Handles the group item `item`, only removing the immediate labels
1616 # from `state.labels` if `remove_immediate_labels` is `True`.
1617 def _handle_group_item(
1618 self,
1619 item: _Group,
1620 state: _GenState,
1621 next_vl_instance: int,
1622 remove_immediate_labels: bool = True,
1623 ):
1624 # Compute the values of the immediate (not nested) labels. Those
1625 # labels are reachable by any expression within the group.
1626 offset = state.offset
1627 immediate_label_names = set() # type: Set[str]
1628 tmp_next_vl_instance = next_vl_instance
1629
1630 for subitem in item.items:
1631 if type(subitem) is _SetOffset:
1632 # Update offset
1633 offset = subitem.val
1634 elif type(subitem) is _Label:
1635 # New immediate label
1636 state.labels[subitem.name] = offset
1637 immediate_label_names.add(subitem.name)
1638
1639 subitem_size, tmp_next_vl_instance = self._item_size(
1640 subitem, tmp_next_vl_instance
1641 )
1642 offset += subitem_size
1643
1644 # Handle each item now with the actual state
1645 for subitem in item.items:
1646 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
1647
1648 # Remove immediate labels if required so that outer items won't
1649 # reach inner labels.
1650 if remove_immediate_labels:
1651 for name in immediate_label_names:
1652 del state.labels[name]
1653
1654 return next_vl_instance
1655
1656 # Handles the repetition item `item`.
1657 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1658 # Get the precomputed repetition count
1659 mul = self._vl_instance_vals[next_vl_instance]
1660
1661 # Consumed this instance
1662 next_vl_instance += 1
1663
1664 for _ in range(mul):
1665 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1666
1667 return next_vl_instance
1668
1669 # Handles the offset setting item `item`.
1670 def _handle_set_offset_item(
1671 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1672 ):
1673 state.offset = item.val
1674 return next_vl_instance
1675
1676 # Handles the label item `item`.
1677 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1678 return next_vl_instance
1679
1680 # Handles the item `item`, returning the updated next repetition
1681 # instance.
1682 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1683 return self._item_handlers[type(item)](item, state, next_vl_instance)
1684
1685 # Generates the data (`self._data`) and final state
1686 # (`self._final_state`) from `group` and the initial state `state`.
1687 def _gen(self, group: _Group, state: _GenState):
1688 # Initial state
1689 self._data = bytearray()
1690
1691 # Item handlers
1692 self._item_handlers = {
1693 _Byte: self._handle_byte_item,
1694 _FlInt: self._handle_fl_int_item,
1695 _Group: self._handle_group_item,
1696 _Label: self._handle_label_item,
1697 _Rep: self._handle_rep_item,
1698 _SetBo: self._handle_set_bo_item,
1699 _SetOffset: self._handle_set_offset_item,
1700 _SLeb128Int: self._handle_leb128_int_item,
1701 _Str: self._handle_str_item,
1702 _ULeb128Int: self._handle_leb128_int_item,
1703 _VarAssign: self._handle_var_assign_item,
1704 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1705
1706 # Item size getters
1707 self._item_size_funcs = {
1708 _Byte: self._scalar_item_size,
1709 _FlInt: self._scalar_item_size,
1710 _Group: self._group_item_size,
1711 _Label: self._zero_item_size,
1712 _Rep: self._rep_item_size,
1713 _SetBo: self._zero_item_size,
1714 _SetOffset: self._zero_item_size,
1715 _SLeb128Int: self._leb128_int_item_size,
1716 _Str: self._scalar_item_size,
1717 _ULeb128Int: self._leb128_int_item_size,
1718 _VarAssign: self._zero_item_size,
1719 } # type: Dict[type, Callable[[Any, int], Tuple[int, int]]]
1720
1721 # Handle the group item, _not_ removing the immediate labels
1722 # because the `labels` property offers them.
1723 self._handle_group_item(group, state, 0, False)
1724
1725 # This is actually the final state
1726 self._final_state = state
1727
1728
1729 # Returns a `ParseResult` instance containing the bytes encoded by the
1730 # input string `normand`.
1731 #
1732 # `init_variables` is a dictionary of initial variable names (valid
1733 # Python names) to integral values. A variable name must not be the
1734 # reserved name `ICITTE`.
1735 #
1736 # `init_labels` is a dictionary of initial label names (valid Python
1737 # names) to integral values. A label name must not be the reserved name
1738 # `ICITTE`.
1739 #
1740 # `init_offset` is the initial offset.
1741 #
1742 # `init_byte_order` is the initial byte order.
1743 #
1744 # Raises `ParseError` on any parsing error.
1745 def parse(
1746 normand: str,
1747 init_variables: Optional[VarsT] = None,
1748 init_labels: Optional[VarsT] = None,
1749 init_offset: int = 0,
1750 init_byte_order: Optional[ByteOrder] = None,
1751 ):
1752 if init_variables is None:
1753 init_variables = {}
1754
1755 if init_labels is None:
1756 init_labels = {}
1757
1758 gen = _Gen(
1759 _Parser(normand, init_variables, init_labels).res,
1760 init_variables,
1761 init_labels,
1762 init_offset,
1763 init_byte_order,
1764 )
1765 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1766 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1767 )
1768
1769
1770 # Parses the command-line arguments.
1771 def _parse_cli_args():
1772 import argparse
1773
1774 # Build parser
1775 ap = argparse.ArgumentParser()
1776 ap.add_argument(
1777 "--offset",
1778 metavar="OFFSET",
1779 action="store",
1780 type=int,
1781 default=0,
1782 help="initial offset (positive)",
1783 )
1784 ap.add_argument(
1785 "-b",
1786 "--byte-order",
1787 metavar="BO",
1788 choices=["be", "le"],
1789 type=str,
1790 help="initial byte order (`be` or `le`)",
1791 )
1792 ap.add_argument(
1793 "--var",
1794 metavar="NAME=VAL",
1795 action="append",
1796 help="add an initial variable (may be repeated)",
1797 )
1798 ap.add_argument(
1799 "-l",
1800 "--label",
1801 metavar="NAME=VAL",
1802 action="append",
1803 help="add an initial label (may be repeated)",
1804 )
1805 ap.add_argument(
1806 "--version", action="version", version="Normand {}".format(__version__)
1807 )
1808 ap.add_argument(
1809 "path",
1810 metavar="PATH",
1811 action="store",
1812 nargs="?",
1813 help="input path (none means standard input)",
1814 )
1815
1816 # Parse
1817 return ap.parse_args()
1818
1819
1820 # Raises a command-line error with the message `msg`.
1821 def _raise_cli_error(msg: str) -> NoReturn:
1822 raise RuntimeError("Command-line error: {}".format(msg))
1823
1824
1825 # Returns a dictionary of string to integers from the list of strings
1826 # `args` containing `NAME=VAL` entries.
1827 def _dict_from_arg(args: Optional[List[str]]):
1828 d = {} # type: Dict[str, int]
1829
1830 if args is None:
1831 return d
1832
1833 for arg in args:
1834 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1835
1836 if m is None:
1837 _raise_cli_error("Invalid assignment {}".format(arg))
1838
1839 d[m.group(1)] = int(m.group(2))
1840
1841 return d
1842
1843
1844 # CLI entry point without exception handling.
1845 def _try_run_cli():
1846 import os.path
1847
1848 # Parse arguments
1849 args = _parse_cli_args()
1850
1851 # Read input
1852 if args.path is None:
1853 normand = sys.stdin.read()
1854 else:
1855 with open(args.path) as f:
1856 normand = f.read()
1857
1858 # Variables and labels
1859 variables = _dict_from_arg(args.var)
1860 labels = _dict_from_arg(args.label)
1861
1862 # Validate offset
1863 if args.offset < 0:
1864 _raise_cli_error("Invalid negative offset {}")
1865
1866 # Validate and set byte order
1867 bo = None # type: Optional[ByteOrder]
1868
1869 if args.byte_order is not None:
1870 if args.byte_order == "be":
1871 bo = ByteOrder.BE
1872 else:
1873 assert args.byte_order == "le"
1874 bo = ByteOrder.LE
1875
1876 # Parse
1877 try:
1878 res = parse(normand, variables, labels, args.offset, bo)
1879 except ParseError as exc:
1880 prefix = ""
1881
1882 if args.path is not None:
1883 prefix = "{}:".format(os.path.abspath(args.path))
1884
1885 _fail(
1886 "{}{}:{} - {}".format(
1887 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1888 )
1889 )
1890
1891 # Print
1892 sys.stdout.buffer.write(res.data)
1893
1894
1895 # Prints the exception message `msg` and exits with status 1.
1896 def _fail(msg: str) -> NoReturn:
1897 if not msg.endswith("."):
1898 msg += "."
1899
1900 print(msg, file=sys.stderr)
1901 sys.exit(1)
1902
1903
1904 # CLI entry point.
1905 def _run_cli():
1906 try:
1907 _try_run_cli()
1908 except Exception as exc:
1909 _fail(str(exc))
1910
1911
1912 if __name__ == "__main__":
1913 _run_cli()
This page took 0.066471 seconds and 3 git commands to generate.