e2dbe0f3d8d2acd4678f5ddd1534018364865800
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.10.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLocation",
40 "LabelsT",
41 "VariablesT",
42 "__author__",
43 "__version__",
44 ]
45
46 import re
47 import abc
48 import ast
49 import sys
50 import enum
51 import math
52 import struct
53 import typing
54 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
55
56
57 # Text location (line and column numbers).
58 class TextLocation:
59 @classmethod
60 def _create(cls, line_no: int, col_no: int):
61 self = cls.__new__(cls)
62 self._init(line_no, col_no)
63 return self
64
65 def __init__(*args, **kwargs): # type: ignore
66 raise NotImplementedError
67
68 def _init(self, line_no: int, col_no: int):
69 self._line_no = line_no
70 self._col_no = col_no
71
72 # Line number.
73 @property
74 def line_no(self):
75 return self._line_no
76
77 # Column number.
78 @property
79 def col_no(self):
80 return self._col_no
81
82 def __repr__(self):
83 return "TextLocation({}, {})".format(self._line_no, self._col_no)
84
85
86 # Any item.
87 class _Item:
88 def __init__(self, text_loc: TextLocation):
89 self._text_loc = text_loc
90
91 # Source text location.
92 @property
93 def text_loc(self):
94 return self._text_loc
95
96
97 # Scalar item.
98 class _ScalarItem(_Item):
99 # Returns the size, in bytes, of this item.
100 @property
101 @abc.abstractmethod
102 def size(self) -> int:
103 ...
104
105
106 # A repeatable item.
107 class _RepableItem:
108 pass
109
110
111 # Single byte.
112 class _Byte(_ScalarItem, _RepableItem):
113 def __init__(self, val: int, text_loc: TextLocation):
114 super().__init__(text_loc)
115 self._val = val
116
117 # Byte value.
118 @property
119 def val(self):
120 return self._val
121
122 @property
123 def size(self):
124 return 1
125
126 def __repr__(self):
127 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
128
129
130 # String.
131 class _Str(_ScalarItem, _RepableItem):
132 def __init__(self, data: bytes, text_loc: TextLocation):
133 super().__init__(text_loc)
134 self._data = data
135
136 # Encoded bytes.
137 @property
138 def data(self):
139 return self._data
140
141 @property
142 def size(self):
143 return len(self._data)
144
145 def __repr__(self):
146 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
147
148
149 # Byte order.
150 @enum.unique
151 class ByteOrder(enum.Enum):
152 # Big endian.
153 BE = "be"
154
155 # Little endian.
156 LE = "le"
157
158
159 # Byte order setting.
160 class _SetBo(_Item):
161 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
162 super().__init__(text_loc)
163 self._bo = bo
164
165 @property
166 def bo(self):
167 return self._bo
168
169 def __repr__(self):
170 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
171
172
173 # Label.
174 class _Label(_Item):
175 def __init__(self, name: str, text_loc: TextLocation):
176 super().__init__(text_loc)
177 self._name = name
178
179 # Label name.
180 @property
181 def name(self):
182 return self._name
183
184 def __repr__(self):
185 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
186
187
188 # Offset setting.
189 class _SetOffset(_Item):
190 def __init__(self, val: int, text_loc: TextLocation):
191 super().__init__(text_loc)
192 self._val = val
193
194 # Offset value (bytes).
195 @property
196 def val(self):
197 return self._val
198
199 def __repr__(self):
200 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
201
202
203 # Offset alignment.
204 class _AlignOffset(_Item):
205 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
206 super().__init__(text_loc)
207 self._val = val
208 self._pad_val = pad_val
209
210 # Alignment value (bits).
211 @property
212 def val(self):
213 return self._val
214
215 # Padding byte value.
216 @property
217 def pad_val(self):
218 return self._pad_val
219
220 def __repr__(self):
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self._val), repr(self._pad_val), repr(self._text_loc)
223 )
224
225
226 # Mixin of containing an AST expression and its string.
227 class _ExprMixin:
228 def __init__(self, expr_str: str, expr: ast.Expression):
229 self._expr_str = expr_str
230 self._expr = expr
231
232 # Expression string.
233 @property
234 def expr_str(self):
235 return self._expr_str
236
237 # Expression node to evaluate.
238 @property
239 def expr(self):
240 return self._expr
241
242
243 # Variable assignment.
244 class _VarAssign(_Item, _ExprMixin):
245 def __init__(
246 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
247 ):
248 super().__init__(text_loc)
249 _ExprMixin.__init__(self, expr_str, expr)
250 self._name = name
251
252 # Name.
253 @property
254 def name(self):
255 return self._name
256
257 def __repr__(self):
258 return "_VarAssign({}, {}, {}, {})".format(
259 repr(self._name),
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._text_loc),
263 )
264
265
266 # Fixed-length number, possibly needing more than one byte.
267 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
268 def __init__(
269 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
270 ):
271 super().__init__(text_loc)
272 _ExprMixin.__init__(self, expr_str, expr)
273 self._len = len
274
275 # Length (bits).
276 @property
277 def len(self):
278 return self._len
279
280 @property
281 def size(self):
282 return self._len // 8
283
284 def __repr__(self):
285 return "_FlNum({}, {}, {}, {})".format(
286 repr(self._expr_str),
287 repr(self._expr),
288 repr(self._len),
289 repr(self._text_loc),
290 )
291
292
293 # LEB128 integer.
294 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
295 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298
299 def __repr__(self):
300 return "{}({}, {}, {})".format(
301 self.__class__.__name__,
302 repr(self._expr_str),
303 repr(self._expr),
304 repr(self._text_loc),
305 )
306
307
308 # Unsigned LEB128 integer.
309 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
310 pass
311
312
313 # Signed LEB128 integer.
314 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
315 pass
316
317
318 # Group of items.
319 class _Group(_Item, _RepableItem):
320 def __init__(self, items: List[_Item], text_loc: TextLocation):
321 super().__init__(text_loc)
322 self._items = items
323
324 # Contained items.
325 @property
326 def items(self):
327 return self._items
328
329 def __repr__(self):
330 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
331
332
333 # Repetition item.
334 class _Rep(_Item, _ExprMixin):
335 def __init__(
336 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
337 ):
338 super().__init__(text_loc)
339 _ExprMixin.__init__(self, expr_str, expr)
340 self._item = item
341
342 # Item to repeat.
343 @property
344 def item(self):
345 return self._item
346
347 def __repr__(self):
348 return "_Rep({}, {}, {}, {})".format(
349 repr(self._item),
350 repr(self._expr_str),
351 repr(self._expr),
352 repr(self._text_loc),
353 )
354
355
356 # Conditional item.
357 class _Cond(_Item, _ExprMixin):
358 def __init__(
359 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
360 ):
361 super().__init__(text_loc)
362 _ExprMixin.__init__(self, expr_str, expr)
363 self._item = item
364
365 # Conditional item.
366 @property
367 def item(self):
368 return self._item
369
370 def __repr__(self):
371 return "_Cond({}, {}, {}, {})".format(
372 repr(self._item),
373 repr(self._expr_str),
374 repr(self._expr),
375 repr(self._text_loc),
376 )
377
378
379 # Expression item type.
380 _ExprItemT = Union[_FlNum, _Leb128Int, _VarAssign, _Rep, _Cond]
381
382
383 # A parsing error containing a message and a text location.
384 class ParseError(RuntimeError):
385 @classmethod
386 def _create(cls, msg: str, text_loc: TextLocation):
387 self = cls.__new__(cls)
388 self._init(msg, text_loc)
389 return self
390
391 def __init__(self, *args, **kwargs): # type: ignore
392 raise NotImplementedError
393
394 def _init(self, msg: str, text_loc: TextLocation):
395 super().__init__(msg)
396 self._text_loc = text_loc
397
398 # Source text location.
399 @property
400 def text_loc(self):
401 return self._text_loc
402
403
404 # Raises a parsing error, forwarding the parameters to the constructor.
405 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
406 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
407
408
409 # Variables dictionary type (for type hints).
410 VariablesT = Dict[str, Union[int, float]]
411
412
413 # Labels dictionary type (for type hints).
414 LabelsT = Dict[str, int]
415
416
417 # Python name pattern.
418 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
419
420
421 # Normand parser.
422 #
423 # The constructor accepts a Normand input. After building, use the `res`
424 # property to get the resulting main group.
425 class _Parser:
426 # Builds a parser to parse the Normand input `normand`, parsing
427 # immediately.
428 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
429 self._normand = normand
430 self._at = 0
431 self._line_no = 1
432 self._col_no = 1
433 self._label_names = set(labels.keys())
434 self._var_names = set(variables.keys())
435 self._parse()
436
437 # Result (main group).
438 @property
439 def res(self):
440 return self._res
441
442 # Current text location.
443 @property
444 def _text_loc(self):
445 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
446 self._line_no, self._col_no
447 )
448
449 # Returns `True` if this parser is done parsing.
450 def _is_done(self):
451 return self._at == len(self._normand)
452
453 # Returns `True` if this parser isn't done parsing.
454 def _isnt_done(self):
455 return not self._is_done()
456
457 # Raises a parse error, creating it using the message `msg` and the
458 # current text location.
459 def _raise_error(self, msg: str) -> NoReturn:
460 _raise_error(msg, self._text_loc)
461
462 # Tries to make the pattern `pat` match the current substring,
463 # returning the match object and updating `self._at`,
464 # `self._line_no`, and `self._col_no` on success.
465 def _try_parse_pat(self, pat: Pattern[str]):
466 m = pat.match(self._normand, self._at)
467
468 if m is None:
469 return
470
471 # Skip matched string
472 self._at += len(m.group(0))
473
474 # Update line number
475 self._line_no += m.group(0).count("\n")
476
477 # Update column number
478 for i in reversed(range(self._at)):
479 if self._normand[i] == "\n" or i == 0:
480 if i == 0:
481 self._col_no = self._at + 1
482 else:
483 self._col_no = self._at - i
484
485 break
486
487 # Return match object
488 return m
489
490 # Expects the pattern `pat` to match the current substring,
491 # returning the match object and updating `self._at`,
492 # `self._line_no`, and `self._col_no` on success, or raising a parse
493 # error with the message `error_msg` on error.
494 def _expect_pat(self, pat: Pattern[str], error_msg: str):
495 # Match
496 m = self._try_parse_pat(pat)
497
498 if m is None:
499 # No match: error
500 self._raise_error(error_msg)
501
502 # Return match object
503 return m
504
505 # Pattern for _skip_ws_and_comments()
506 _ws_or_syms_or_comments_pat = re.compile(
507 r"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
508 )
509
510 # Skips as many whitespaces, insignificant symbol characters, and
511 # comments as possible.
512 def _skip_ws_and_comments(self):
513 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
514
515 # Pattern for _try_parse_hex_byte()
516 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
517
518 # Tries to parse a hexadecimal byte, returning a byte item on
519 # success.
520 def _try_parse_hex_byte(self):
521 begin_text_loc = self._text_loc
522
523 # Match initial nibble
524 m_high = self._try_parse_pat(self._nibble_pat)
525
526 if m_high is None:
527 # No match
528 return
529
530 # Expect another nibble
531 self._skip_ws_and_comments()
532 m_low = self._expect_pat(
533 self._nibble_pat, "Expecting another hexadecimal nibble"
534 )
535
536 # Return item
537 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
538
539 # Patterns for _try_parse_bin_byte()
540 _bin_byte_bit_pat = re.compile(r"[01]")
541 _bin_byte_prefix_pat = re.compile(r"%")
542
543 # Tries to parse a binary byte, returning a byte item on success.
544 def _try_parse_bin_byte(self):
545 begin_text_loc = self._text_loc
546
547 # Match prefix
548 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
549 # No match
550 return
551
552 # Expect eight bits
553 bits = [] # type: List[str]
554
555 for _ in range(8):
556 self._skip_ws_and_comments()
557 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
558 bits.append(m.group(0))
559
560 # Return item
561 return _Byte(int("".join(bits), 2), begin_text_loc)
562
563 # Patterns for _try_parse_dec_byte()
564 _dec_byte_prefix_pat = re.compile(r"\$\s*")
565 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
566
567 # Tries to parse a decimal byte, returning a byte item on success.
568 def _try_parse_dec_byte(self):
569 begin_text_loc = self._text_loc
570
571 # Match prefix
572 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
573 # No match
574 return
575
576 # Expect the value
577 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
578
579 # Compute value
580 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
581
582 # Validate
583 if val < -128 or val > 255:
584 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
585
586 # Two's complement
587 val %= 256
588
589 # Return item
590 return _Byte(val, begin_text_loc)
591
592 # Tries to parse a byte, returning a byte item on success.
593 def _try_parse_byte(self):
594 # Hexadecimal
595 item = self._try_parse_hex_byte()
596
597 if item is not None:
598 return item
599
600 # Binary
601 item = self._try_parse_bin_byte()
602
603 if item is not None:
604 return item
605
606 # Decimal
607 item = self._try_parse_dec_byte()
608
609 if item is not None:
610 return item
611
612 # Patterns for _try_parse_str()
613 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
614 _str_suffix_pat = re.compile(r'"')
615 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
616
617 # Strings corresponding to escape sequence characters
618 _str_escape_seq_strs = {
619 "0": "\0",
620 "a": "\a",
621 "b": "\b",
622 "e": "\x1b",
623 "f": "\f",
624 "n": "\n",
625 "r": "\r",
626 "t": "\t",
627 "v": "\v",
628 "\\": "\\",
629 '"': '"',
630 }
631
632 # Tries to parse a string, returning a string item on success.
633 def _try_parse_str(self):
634 begin_text_loc = self._text_loc
635
636 # Match prefix
637 m = self._try_parse_pat(self._str_prefix_pat)
638
639 if m is None:
640 # No match
641 return
642
643 # Get encoding
644 encoding = "utf8"
645
646 if m.group("len") is not None:
647 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
648
649 # Actual string
650 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
651
652 # Expect end of string
653 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
654
655 # Replace escape sequences
656 val = m.group(0)
657
658 for ec in '0abefnrtv"\\':
659 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
660
661 # Encode
662 data = val.encode(encoding)
663
664 # Return item
665 return _Str(data, begin_text_loc)
666
667 # Patterns for _try_parse_group()
668 _group_prefix_pat = re.compile(r"\(|!g(roup)?\b")
669 _group_suffix_paren_pat = re.compile(r"\)")
670
671 # Tries to parse a group, returning a group item on success.
672 def _try_parse_group(self):
673 begin_text_loc = self._text_loc
674
675 # Match prefix
676 m_open = self._try_parse_pat(self._group_prefix_pat)
677
678 if m_open is None:
679 # No match
680 return
681
682 # Parse items
683 items = self._parse_items()
684
685 # Expect end of group
686 self._skip_ws_and_comments()
687
688 if m_open.group(0) == "(":
689 pat = self._group_suffix_paren_pat
690 exp = ")"
691 else:
692 pat = self._block_end_pat
693 exp = "!end"
694
695 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
696
697 # Return item
698 return _Group(items, begin_text_loc)
699
700 # Returns a stripped expression string and an AST expression node
701 # from the expression string `expr_str` at text location `text_loc`.
702 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
703 # Create an expression node from the expression string
704 expr_str = expr_str.strip().replace("\n", " ")
705
706 try:
707 expr = ast.parse(expr_str, mode="eval")
708 except SyntaxError:
709 _raise_error(
710 "Invalid expression `{}`: invalid syntax".format(expr_str),
711 text_loc,
712 )
713
714 return expr_str, expr
715
716 # Patterns for _try_parse_num_and_attr()
717 _val_expr_pat = re.compile(r"([^}:]+):\s*")
718 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
719 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
720
721 # Tries to parse a value and attribute (fixed length in bits or
722 # `leb128`), returning a value item on success.
723 def _try_parse_num_and_attr(self):
724 begin_text_loc = self._text_loc
725
726 # Match
727 m_expr = self._try_parse_pat(self._val_expr_pat)
728
729 if m_expr is None:
730 # No match
731 return
732
733 # Create an expression node from the expression string
734 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
735
736 # Length?
737 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
738
739 if m_attr is None:
740 # LEB128?
741 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
742
743 if m_attr is None:
744 # At this point it's invalid
745 self._raise_error(
746 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
747 )
748
749 # Return LEB128 integer item
750 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
751 return cls(expr_str, expr, begin_text_loc)
752 else:
753 # Return fixed-length number item
754 return _FlNum(
755 expr_str,
756 expr,
757 int(m_attr.group(0)),
758 begin_text_loc,
759 )
760
761 # Patterns for _try_parse_num_and_attr()
762 _var_assign_pat = re.compile(
763 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
764 )
765
766 # Tries to parse a variable assignment, returning a variable
767 # assignment item on success.
768 def _try_parse_var_assign(self):
769 begin_text_loc = self._text_loc
770
771 # Match
772 m = self._try_parse_pat(self._var_assign_pat)
773
774 if m is None:
775 # No match
776 return
777
778 # Validate name
779 name = m.group("name")
780
781 if name == _icitte_name:
782 _raise_error(
783 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
784 )
785
786 if name in self._label_names:
787 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
788
789 # Add to known variable names
790 self._var_names.add(name)
791
792 # Create an expression node from the expression string
793 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
794
795 # Return item
796 return _VarAssign(
797 name,
798 expr_str,
799 expr,
800 begin_text_loc,
801 )
802
803 # Pattern for _try_parse_set_bo()
804 _bo_pat = re.compile(r"[bl]e")
805
806 # Tries to parse a byte order name, returning a byte order setting
807 # item on success.
808 def _try_parse_set_bo(self):
809 begin_text_loc = self._text_loc
810
811 # Match
812 m = self._try_parse_pat(self._bo_pat)
813
814 if m is None:
815 # No match
816 return
817
818 # Return corresponding item
819 if m.group(0) == "be":
820 return _SetBo(ByteOrder.BE, begin_text_loc)
821 else:
822 assert m.group(0) == "le"
823 return _SetBo(ByteOrder.LE, begin_text_loc)
824
825 # Patterns for _try_parse_val_or_bo()
826 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
827 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
828
829 # Tries to parse a value, a variable assignment, or a byte order
830 # setting, returning an item on success.
831 def _try_parse_val_or_var_assign_or_set_bo(self):
832 # Match prefix
833 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
834 # No match
835 return
836
837 # Variable assignment item?
838 item = self._try_parse_var_assign()
839
840 if item is None:
841 # Number item?
842 item = self._try_parse_num_and_attr()
843
844 if item is None:
845 # Byte order setting item?
846 item = self._try_parse_set_bo()
847
848 if item is None:
849 # At this point it's invalid
850 self._raise_error(
851 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
852 )
853
854 # Expect suffix
855 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
856 return item
857
858 # Common positive constant integer pattern
859 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
860
861 # Tries to parse an offset setting value (after the initial `<`),
862 # returning an offset item on success.
863 def _try_parse_set_offset_val(self):
864 begin_text_loc = self._text_loc
865
866 # Match
867 m = self._try_parse_pat(self._pos_const_int_pat)
868
869 if m is None:
870 # No match
871 return
872
873 # Return item
874 return _SetOffset(int(m.group(0), 0), begin_text_loc)
875
876 # Tries to parse a label name (after the initial `<`), returning a
877 # label item on success.
878 def _try_parse_label_name(self):
879 begin_text_loc = self._text_loc
880
881 # Match
882 m = self._try_parse_pat(_py_name_pat)
883
884 if m is None:
885 # No match
886 return
887
888 # Validate
889 name = m.group(0)
890
891 if name == _icitte_name:
892 _raise_error(
893 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
894 )
895
896 if name in self._label_names:
897 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
898
899 if name in self._var_names:
900 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
901
902 # Add to known label names
903 self._label_names.add(name)
904
905 # Return item
906 return _Label(name, begin_text_loc)
907
908 # Patterns for _try_parse_label_or_set_offset()
909 _label_set_offset_prefix_pat = re.compile(r"<\s*")
910 _label_set_offset_suffix_pat = re.compile(r"\s*>")
911
912 # Tries to parse a label or an offset setting, returning an item on
913 # success.
914 def _try_parse_label_or_set_offset(self):
915 # Match prefix
916 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
917 # No match
918 return
919
920 # Offset setting item?
921 item = self._try_parse_set_offset_val()
922
923 if item is None:
924 # Label item?
925 item = self._try_parse_label_name()
926
927 if item is None:
928 # At this point it's invalid
929 self._raise_error("Expecting a label name or an offset setting value")
930
931 # Expect suffix
932 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
933 return item
934
935 # Patterns for _try_parse_align_offset()
936 _align_offset_prefix_pat = re.compile(r"@\s*")
937 _align_offset_val_pat = re.compile(r"(\d+)\s*")
938 _align_offset_pad_val_prefix_pat = re.compile(r"~\s*")
939
940 # Tries to parse an offset alignment, returning an offset alignment
941 # item on success.
942 def _try_parse_align_offset(self):
943 begin_text_loc = self._text_loc
944
945 # Match prefix
946 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
947 # No match
948 return
949
950 align_text_loc = self._text_loc
951 m = self._expect_pat(
952 self._align_offset_val_pat,
953 "Expecting an alignment (positive multiple of eight bits)",
954 )
955
956 # Validate alignment
957 val = int(m.group(1))
958
959 if val <= 0 or (val % 8) != 0:
960 _raise_error(
961 "Invalid alignment value {} (not a positive multiple of eight)".format(
962 val
963 ),
964 align_text_loc,
965 )
966
967 # Padding value?
968 pad_val = 0
969
970 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
971 pad_val_text_loc = self._text_loc
972 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
973
974 # Validate
975 pad_val = int(m.group(0), 0)
976
977 if pad_val > 255:
978 _raise_error(
979 "Invalid padding byte value {}".format(pad_val),
980 pad_val_text_loc,
981 )
982
983 # Return item
984 return _AlignOffset(val, pad_val, begin_text_loc)
985
986 # Patterns for _expect_rep_mul_expr()
987 _rep_cond_expr_prefix_pat = re.compile(r"\{")
988 _rep_cond_expr_pat = re.compile(r"[^}]+")
989 _rep_cond_expr_suffix_pat = re.compile(r"\}")
990
991 # Parses the expression of a conditional block or of a repetition
992 # (block or post-item) and returns the expression string and AST
993 # node.
994 def _expect_rep_cond_expr(self, accept_int: bool):
995 expr_text_loc = self._text_loc
996
997 # Constant integer?
998 m = None
999
1000 if accept_int:
1001 m = self._try_parse_pat(self._pos_const_int_pat)
1002
1003 if m is None:
1004 # Name?
1005 m = self._try_parse_pat(_py_name_pat)
1006
1007 if m is None:
1008 # Expression?
1009 if self._try_parse_pat(self._rep_cond_expr_prefix_pat) is None:
1010 if accept_int:
1011 mid_msg = "a positive constant integer, a name, or `{`"
1012 else:
1013 mid_msg = "a name or `{`"
1014
1015 # At this point it's invalid
1016 self._raise_error("Expecting {}".format(mid_msg))
1017
1018 # Expect an expression
1019 expr_text_loc = self._text_loc
1020 m = self._expect_pat(self._rep_cond_expr_pat, "Expecting an expression")
1021 expr_str = m.group(0)
1022
1023 # Expect `}`
1024 self._expect_pat(self._rep_cond_expr_suffix_pat, "Expecting `}`")
1025 else:
1026 expr_str = m.group(0)
1027 else:
1028 expr_str = m.group(0)
1029
1030 return self._ast_expr_from_str(expr_str, expr_text_loc)
1031
1032 # Parses the multiplier expression of a repetition (block or
1033 # post-item) and returns the expression string and AST node.
1034 def _expect_rep_mul_expr(self):
1035 return self._expect_rep_cond_expr(True)
1036
1037 # Common block end pattern
1038 _block_end_pat = re.compile(r"!end\b\s*")
1039
1040 # Pattern for _try_parse_rep_block()
1041 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b\s*")
1042
1043 # Tries to parse a repetition block, returning a repetition item on
1044 # success.
1045 def _try_parse_rep_block(self):
1046 begin_text_loc = self._text_loc
1047
1048 # Match prefix
1049 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1050 # No match
1051 return
1052
1053 # Expect expression
1054 self._skip_ws_and_comments()
1055 expr_str, expr = self._expect_rep_mul_expr()
1056
1057 # Parse items
1058 self._skip_ws_and_comments()
1059 items_text_loc = self._text_loc
1060 items = self._parse_items()
1061
1062 # Expect end of block
1063 self._skip_ws_and_comments()
1064 self._expect_pat(
1065 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1066 )
1067
1068 # Return item
1069 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1070
1071 # Pattern for _try_parse_cond_block()
1072 _cond_block_prefix_pat = re.compile(r"!if\b\s*")
1073
1074 # Tries to parse a conditional block, returning a conditional item
1075 # on success.
1076 def _try_parse_cond_block(self):
1077 begin_text_loc = self._text_loc
1078
1079 # Match prefix
1080 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1081 # No match
1082 return
1083
1084 # Expect expression
1085 self._skip_ws_and_comments()
1086 expr_str, expr = self._expect_rep_cond_expr(False)
1087
1088 # Parse items
1089 self._skip_ws_and_comments()
1090 items_text_loc = self._text_loc
1091 items = self._parse_items()
1092
1093 # Expect end of block
1094 self._skip_ws_and_comments()
1095 self._expect_pat(
1096 self._block_end_pat,
1097 "Expecting an item or `!end` (end of conditional block)",
1098 )
1099
1100 # Return item
1101 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1102
1103 # Tries to parse a base item (anything except a repetition),
1104 # returning it on success.
1105 def _try_parse_base_item(self):
1106 # Byte item?
1107 item = self._try_parse_byte()
1108
1109 if item is not None:
1110 return item
1111
1112 # String item?
1113 item = self._try_parse_str()
1114
1115 if item is not None:
1116 return item
1117
1118 # Value, variable assignment, or byte order setting item?
1119 item = self._try_parse_val_or_var_assign_or_set_bo()
1120
1121 if item is not None:
1122 return item
1123
1124 # Label or offset setting item?
1125 item = self._try_parse_label_or_set_offset()
1126
1127 if item is not None:
1128 return item
1129
1130 # Offset alignment item?
1131 item = self._try_parse_align_offset()
1132
1133 if item is not None:
1134 return item
1135
1136 # Group item?
1137 item = self._try_parse_group()
1138
1139 if item is not None:
1140 return item
1141
1142 # Repetition (block) item?
1143 item = self._try_parse_rep_block()
1144
1145 if item is not None:
1146 return item
1147
1148 # Conditional block item?
1149 item = self._try_parse_cond_block()
1150
1151 if item is not None:
1152 return item
1153
1154 # Pattern for _try_parse_rep_post()
1155 _rep_post_prefix_pat = re.compile(r"\*")
1156
1157 # Tries to parse a post-item repetition, returning the expression
1158 # string and AST expression node on success.
1159 def _try_parse_rep_post(self):
1160 # Match prefix
1161 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1162 # No match
1163 return
1164
1165 # Return expression string and AST expression
1166 self._skip_ws_and_comments()
1167 return self._expect_rep_mul_expr()
1168
1169 # Tries to parse an item, possibly followed by a repetition,
1170 # returning `True` on success.
1171 #
1172 # Appends any parsed item to `items`.
1173 def _try_append_item(self, items: List[_Item]):
1174 self._skip_ws_and_comments()
1175
1176 # Parse a base item
1177 item = self._try_parse_base_item()
1178
1179 if item is None:
1180 # No item
1181 return False
1182
1183 # Parse repetition if the base item is repeatable
1184 if isinstance(item, _RepableItem):
1185 self._skip_ws_and_comments()
1186 rep_text_loc = self._text_loc
1187 rep_ret = self._try_parse_rep_post()
1188
1189 if rep_ret is not None:
1190 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
1191
1192 items.append(item)
1193 return True
1194
1195 # Parses and returns items, skipping whitespaces, insignificant
1196 # symbols, and comments when allowed, and stopping at the first
1197 # unknown character.
1198 def _parse_items(self) -> List[_Item]:
1199 items = [] # type: List[_Item]
1200
1201 while self._isnt_done():
1202 # Try to append item
1203 if not self._try_append_item(items):
1204 # Unknown at this point
1205 break
1206
1207 return items
1208
1209 # Parses the whole Normand input, setting `self._res` to the main
1210 # group item on success.
1211 def _parse(self):
1212 if len(self._normand.strip()) == 0:
1213 # Special case to make sure there's something to consume
1214 self._res = _Group([], self._text_loc)
1215 return
1216
1217 # Parse first level items
1218 items = self._parse_items()
1219
1220 # Make sure there's nothing left
1221 self._skip_ws_and_comments()
1222
1223 if self._isnt_done():
1224 self._raise_error(
1225 "Unexpected character `{}`".format(self._normand[self._at])
1226 )
1227
1228 # Set main group item
1229 self._res = _Group(items, self._text_loc)
1230
1231
1232 # The return type of parse().
1233 class ParseResult:
1234 @classmethod
1235 def _create(
1236 cls,
1237 data: bytearray,
1238 variables: VariablesT,
1239 labels: LabelsT,
1240 offset: int,
1241 bo: Optional[ByteOrder],
1242 ):
1243 self = cls.__new__(cls)
1244 self._init(data, variables, labels, offset, bo)
1245 return self
1246
1247 def __init__(self, *args, **kwargs): # type: ignore
1248 raise NotImplementedError
1249
1250 def _init(
1251 self,
1252 data: bytearray,
1253 variables: VariablesT,
1254 labels: LabelsT,
1255 offset: int,
1256 bo: Optional[ByteOrder],
1257 ):
1258 self._data = data
1259 self._vars = variables
1260 self._labels = labels
1261 self._offset = offset
1262 self._bo = bo
1263
1264 # Generated data.
1265 @property
1266 def data(self):
1267 return self._data
1268
1269 # Dictionary of updated variable names to their last computed value.
1270 @property
1271 def variables(self):
1272 return self._vars
1273
1274 # Dictionary of updated main group label names to their computed
1275 # value.
1276 @property
1277 def labels(self):
1278 return self._labels
1279
1280 # Updated offset.
1281 @property
1282 def offset(self):
1283 return self._offset
1284
1285 # Updated byte order.
1286 @property
1287 def byte_order(self):
1288 return self._bo
1289
1290
1291 # Raises a parse error for the item `item`, creating it using the
1292 # message `msg`.
1293 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1294 _raise_error(msg, item.text_loc)
1295
1296
1297 # The `ICITTE` reserved name.
1298 _icitte_name = "ICITTE"
1299
1300
1301 # Base node visitor.
1302 #
1303 # Calls the _visit_name() method for each name node which isn't the name
1304 # of a call.
1305 class _NodeVisitor(ast.NodeVisitor):
1306 def __init__(self):
1307 self._parent_is_call = False
1308
1309 def generic_visit(self, node: ast.AST):
1310 if type(node) is ast.Call:
1311 self._parent_is_call = True
1312 elif type(node) is ast.Name and not self._parent_is_call:
1313 self._visit_name(node.id)
1314
1315 super().generic_visit(node)
1316 self._parent_is_call = False
1317
1318 @abc.abstractmethod
1319 def _visit_name(self, name: str):
1320 ...
1321
1322
1323 # Expression validator: validates that all the names within the
1324 # expression are allowed.
1325 class _ExprValidator(_NodeVisitor):
1326 def __init__(self, item: _ExprItemT, allowed_names: Set[str]):
1327 super().__init__()
1328 self._item = item
1329 self._allowed_names = allowed_names
1330
1331 def _visit_name(self, name: str):
1332 # Make sure the name refers to a known and reachable
1333 # variable/label name.
1334 if name != _icitte_name and name not in self._allowed_names:
1335 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1336 name, self._item.expr_str
1337 )
1338
1339 allowed_names = self._allowed_names.copy()
1340 allowed_names.add(_icitte_name)
1341
1342 if len(allowed_names) > 0:
1343 allowed_names_str = ", ".join(
1344 sorted(["`{}`".format(name) for name in allowed_names])
1345 )
1346 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1347
1348 _raise_error(
1349 msg,
1350 self._item.text_loc,
1351 )
1352
1353
1354 # Expression visitor getting all the contained names.
1355 class _ExprNamesVisitor(_NodeVisitor):
1356 def __init__(self):
1357 self._parent_is_call = False
1358 self._names = set() # type: Set[str]
1359
1360 @property
1361 def names(self):
1362 return self._names
1363
1364 def _visit_name(self, name: str):
1365 self._names.add(name)
1366
1367
1368 # Generator state.
1369 class _GenState:
1370 def __init__(
1371 self,
1372 variables: VariablesT,
1373 labels: LabelsT,
1374 offset: int,
1375 bo: Optional[ByteOrder],
1376 ):
1377 self.variables = variables.copy()
1378 self.labels = labels.copy()
1379 self.offset = offset
1380 self.bo = bo
1381
1382
1383 # Generator of data and final state from a group item.
1384 #
1385 # Generation happens in memory at construction time. After building, use
1386 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1387 # get the resulting context.
1388 #
1389 # The steps of generation are:
1390 #
1391 # 1. Validate that each repetition, conditional, and LEB128 integer
1392 # expression uses only reachable names.
1393 #
1394 # 2. Compute and keep the effective repetition count, conditional value,
1395 # and LEB128 integer value for each repetition and LEB128 integer
1396 # instance.
1397 #
1398 # 3. Generate bytes, updating the initial state as it goes which becomes
1399 # the final state after the operation.
1400 #
1401 # During the generation, when handling a `_Rep`, `_Cond`, or
1402 # `_Leb128Int` item, we already have the effective repetition count,
1403 # conditional value, or value of the instance.
1404 #
1405 # When handling a `_Group` item, first update the current labels with
1406 # all the immediate (not nested) labels, and then handle each
1407 # contained item. This gives contained item access to "future" outer
1408 # labels. Then remove the immediate labels from the state so that
1409 # outer items don't have access to inner labels.
1410 class _Gen:
1411 def __init__(
1412 self,
1413 group: _Group,
1414 variables: VariablesT,
1415 labels: LabelsT,
1416 offset: int,
1417 bo: Optional[ByteOrder],
1418 ):
1419 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1420 self._vl_instance_vals = self._compute_vl_instance_vals(
1421 group, _GenState(variables, labels, offset, bo)
1422 )
1423 self._gen(group, _GenState(variables, labels, offset, bo))
1424
1425 # Generated bytes.
1426 @property
1427 def data(self):
1428 return self._data
1429
1430 # Updated variables.
1431 @property
1432 def variables(self):
1433 return self._final_state.variables
1434
1435 # Updated main group labels.
1436 @property
1437 def labels(self):
1438 return self._final_state.labels
1439
1440 # Updated offset.
1441 @property
1442 def offset(self):
1443 return self._final_state.offset
1444
1445 # Updated byte order.
1446 @property
1447 def bo(self):
1448 return self._final_state.bo
1449
1450 # Returns the set of used, non-called names within the AST
1451 # expression `expr`.
1452 @staticmethod
1453 def _names_of_expr(expr: ast.Expression):
1454 visitor = _ExprNamesVisitor()
1455 visitor.visit(expr)
1456 return visitor.names
1457
1458 # Validates that all the repetition, conditional, and LEB128 integer
1459 # expressions within `group` don't refer, directly or indirectly, to
1460 # subsequent labels.
1461 #
1462 # The strategy here is to keep a set of allowed label names, per
1463 # group, initialized to `allowed_label_names`, and a set of allowed
1464 # variable names initialized to `allowed_variable_names`.
1465 #
1466 # Then, depending on the type of `item`:
1467 #
1468 # `_Label`:
1469 # Add its name to the local allowed label names: a label
1470 # occurring before a repetition, and not within a nested group,
1471 # is always reachable.
1472 #
1473 # `_VarAssign`:
1474 # If all the names within its expression are allowed, then add
1475 # its name to the allowed variable names.
1476 #
1477 # Otherwise, remove its name from the allowed variable names (if
1478 # it's in there): a variable which refers to an unreachable name
1479 # is unreachable itself.
1480 #
1481 # `_Rep`, `_Cond`, and `_Leb128`:
1482 # Make sure all the names within its expression are allowed.
1483 #
1484 # `_Group`:
1485 # Call this function for each contained item with a _copy_ of
1486 # the current allowed label names and the same current allowed
1487 # variable names.
1488 @staticmethod
1489 def _validate_vl_exprs(
1490 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1491 ):
1492 if type(item) is _Label:
1493 allowed_label_names.add(item.name)
1494 elif type(item) is _VarAssign:
1495 # Check if this variable name is allowed
1496 allowed = True
1497
1498 for name in _Gen._names_of_expr(item.expr):
1499 if name not in (
1500 allowed_label_names | allowed_variable_names | {_icitte_name}
1501 ):
1502 # Not allowed
1503 allowed = False
1504 break
1505
1506 if allowed:
1507 allowed_variable_names.add(item.name)
1508 elif item.name in allowed_variable_names:
1509 allowed_variable_names.remove(item.name)
1510 elif isinstance(item, _Leb128Int):
1511 # Validate the expression
1512 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1513 item.expr
1514 )
1515 elif type(item) is _Rep or type(item) is _Cond:
1516 # Validate the expression first
1517 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1518 item.expr
1519 )
1520
1521 # Validate inner item
1522 _Gen._validate_vl_exprs(
1523 item.item, allowed_variable_names, allowed_label_names
1524 )
1525 elif type(item) is _Group:
1526 # Copy `allowed_label_names` so that this frame cannot
1527 # access the nested label names.
1528 group_allowed_label_names = allowed_label_names.copy()
1529
1530 for subitem in item.items:
1531 _Gen._validate_vl_exprs(
1532 subitem, allowed_variable_names, group_allowed_label_names
1533 )
1534
1535 # Evaluates the expression of `item` considering the current
1536 # generation state `state`.
1537 #
1538 # If `allow_float` is `True`, then the type of the result may be
1539 # `float` too.
1540 @staticmethod
1541 def _eval_item_expr(
1542 item: _ExprItemT,
1543 state: _GenState,
1544 allow_float: bool = False,
1545 ):
1546 syms = {} # type: VariablesT
1547 syms.update(state.labels)
1548
1549 # Set the `ICITTE` name to the current offset
1550 syms[_icitte_name] = state.offset
1551
1552 # Add the current variables
1553 syms.update(state.variables)
1554
1555 # Validate the node and its children
1556 _ExprValidator(item, set(syms.keys())).visit(item.expr)
1557
1558 # Compile and evaluate expression node
1559 try:
1560 val = eval(compile(item.expr, "", "eval"), None, syms)
1561 except Exception as exc:
1562 _raise_error_for_item(
1563 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1564 item,
1565 )
1566
1567 # Convert `bool` result type to `int` to normalize
1568 if type(val) is bool:
1569 val = int(val)
1570
1571 # Validate result type
1572 expected_types = {int} # type: Set[type]
1573 type_msg = "`int`"
1574
1575 if allow_float:
1576 expected_types.add(float)
1577 type_msg += " or `float`"
1578
1579 if type(val) not in expected_types:
1580 _raise_error_for_item(
1581 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1582 item.expr_str, type_msg, type(val).__name__
1583 ),
1584 item,
1585 )
1586
1587 return val
1588
1589 # Returns the size, in bytes, required to encode the value `val`
1590 # with LEB128 (signed version if `is_signed` is `True`).
1591 @staticmethod
1592 def _leb128_size_for_val(val: int, is_signed: bool):
1593 if val < 0:
1594 # Equivalent upper bound.
1595 #
1596 # For example, if `val` is -128, then the full integer for
1597 # this number of bits would be [-128, 127].
1598 val = -val - 1
1599
1600 # Number of bits (add one for the sign if needed)
1601 bits = val.bit_length() + int(is_signed)
1602
1603 if bits == 0:
1604 bits = 1
1605
1606 # Seven bits per byte
1607 return math.ceil(bits / 7)
1608
1609 # Returns the offset `offset` aligned according to `item`.
1610 @staticmethod
1611 def _align_offset(offset: int, item: _AlignOffset):
1612 align_bytes = item.val // 8
1613 return (offset + align_bytes - 1) // align_bytes * align_bytes
1614
1615 # Computes the effective value for each repetition, conditional, and
1616 # LEB128 integer instance, filling `instance_vals` (if not `None`)
1617 # and returning `instance_vals`.
1618 #
1619 # At this point it must be known that, for a given variable-length
1620 # item, its expression only contains reachable names.
1621 #
1622 # When handling a `_Rep` or `_Cond` item, this function appends its
1623 # effective multiplier/value to `instance_vals` _before_ handling
1624 # its repeated/conditional item.
1625 #
1626 # When handling a `_VarAssign` item, this function only evaluates it
1627 # if all its names are reachable.
1628 @staticmethod
1629 def _compute_vl_instance_vals(
1630 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1631 ):
1632 if instance_vals is None:
1633 instance_vals = []
1634
1635 if isinstance(item, _ScalarItem):
1636 state.offset += item.size
1637 elif type(item) is _Label:
1638 state.labels[item.name] = state.offset
1639 elif type(item) is _VarAssign:
1640 # Check if all the names are reachable
1641 do_eval = True
1642
1643 for name in _Gen._names_of_expr(item.expr):
1644 if (
1645 name != _icitte_name
1646 and name not in state.variables
1647 and name not in state.labels
1648 ):
1649 # A name is unknown: cannot evaluate
1650 do_eval = False
1651 break
1652
1653 if do_eval:
1654 # Evaluate the expression and keep the result
1655 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1656 elif type(item) is _SetOffset:
1657 state.offset = item.val
1658 elif type(item) is _AlignOffset:
1659 state.offset = _Gen._align_offset(state.offset, item)
1660 elif isinstance(item, _Leb128Int):
1661 # Evaluate the expression
1662 val = _Gen._eval_item_expr(item, state)
1663
1664 # Validate result
1665 if type(item) is _ULeb128Int and val < 0:
1666 _raise_error_for_item(
1667 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1668 item.expr_str, val
1669 ),
1670 item,
1671 )
1672
1673 # Add the evaluation result to the to variable-length item
1674 # instance values.
1675 instance_vals.append(val)
1676
1677 # Update offset
1678 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
1679 elif type(item) is _Rep:
1680 # Evaluate the expression and keep the result
1681 val = _Gen._eval_item_expr(item, state)
1682
1683 # Validate result
1684 if val < 0:
1685 _raise_error_for_item(
1686 "Invalid expression `{}`: unexpected negative result {:,}".format(
1687 item.expr_str, val
1688 ),
1689 item,
1690 )
1691
1692 # Add to variable-length item instance values
1693 instance_vals.append(val)
1694
1695 # Process the repeated item `val` times
1696 for _ in range(val):
1697 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1698 elif type(item) is _Cond:
1699 # Evaluate the expression and keep the result
1700 val = _Gen._eval_item_expr(item, state)
1701
1702 # Add to variable-length item instance values
1703 instance_vals.append(val)
1704
1705 # Process the conditional item if needed
1706 if val:
1707 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1708 elif type(item) is _Group:
1709 prev_labels = state.labels.copy()
1710
1711 # Process each item
1712 for subitem in item.items:
1713 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
1714
1715 state.labels = prev_labels
1716
1717 return instance_vals
1718
1719 def _update_offset_noop(self, item: _Item, state: _GenState, next_vl_instance: int):
1720 return next_vl_instance
1721
1722 def _dry_handle_scalar_item(
1723 self, item: _ScalarItem, state: _GenState, next_vl_instance: int
1724 ):
1725 state.offset += item.size
1726 return next_vl_instance
1727
1728 def _dry_handle_leb128_int_item(
1729 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1730 ):
1731 # Get the value from `self._vl_instance_vals` _before_
1732 # incrementing `next_vl_instance` to honor the order of
1733 # _compute_vl_instance_vals().
1734 state.offset += self._leb128_size_for_val(
1735 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1736 )
1737
1738 return next_vl_instance + 1
1739
1740 def _dry_handle_group_item(
1741 self, item: _Group, state: _GenState, next_vl_instance: int
1742 ):
1743 for subitem in item.items:
1744 next_vl_instance = self._dry_handle_item(subitem, state, next_vl_instance)
1745
1746 return next_vl_instance
1747
1748 def _dry_handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1749 # Get the value from `self._vl_instance_vals` _before_
1750 # incrementing `next_vl_instance` to honor the order of
1751 # _compute_vl_instance_vals().
1752 mul = self._vl_instance_vals[next_vl_instance]
1753 next_vl_instance += 1
1754
1755 for _ in range(mul):
1756 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1757
1758 return next_vl_instance
1759
1760 def _dry_handle_cond_item(
1761 self, item: _Cond, state: _GenState, next_vl_instance: int
1762 ):
1763 # Get the value from `self._vl_instance_vals` _before_
1764 # incrementing `next_vl_instance` to honor the order of
1765 # _compute_vl_instance_vals().
1766 val = self._vl_instance_vals[next_vl_instance]
1767 next_vl_instance += 1
1768
1769 if val:
1770 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1771
1772 return next_vl_instance
1773
1774 def _dry_handle_align_offset_item(
1775 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1776 ):
1777 state.offset = self._align_offset(state.offset, item)
1778 return next_vl_instance
1779
1780 def _dry_handle_set_offset_item(
1781 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1782 ):
1783 state.offset = item.val
1784 return next_vl_instance
1785
1786 # Updates `state.offset` considering the generated data of `item`,
1787 # without generating any, and returns the updated next
1788 # variable-length item instance.
1789 def _dry_handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1790 return self._dry_handle_item_funcs[type(item)](item, state, next_vl_instance)
1791
1792 # Handles the byte item `item`.
1793 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
1794 self._data.append(item.val)
1795 state.offset += item.size
1796 return next_vl_instance
1797
1798 # Handles the string item `item`.
1799 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
1800 self._data += item.data
1801 state.offset += item.size
1802 return next_vl_instance
1803
1804 # Handles the byte order setting item `item`.
1805 def _handle_set_bo_item(
1806 self, item: _SetBo, state: _GenState, next_vl_instance: int
1807 ):
1808 # Update current byte order
1809 state.bo = item.bo
1810 return next_vl_instance
1811
1812 # Handles the variable assignment item `item`.
1813 def _handle_var_assign_item(
1814 self, item: _VarAssign, state: _GenState, next_vl_instance: int
1815 ):
1816 # Update variable
1817 state.variables[item.name] = self._eval_item_expr(item, state, True)
1818 return next_vl_instance
1819
1820 # Handles the fixed-length integer item `item`.
1821 def _handle_fl_int_item(self, val: int, item: _FlNum, state: _GenState):
1822 # Validate range
1823 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1824 _raise_error_for_item(
1825 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1826 val, item.len, item.expr_str, state.offset
1827 ),
1828 item,
1829 )
1830
1831 # Encode result on 64 bits (to extend the sign bit whatever the
1832 # value of `item.len`).
1833 data = struct.pack(
1834 "{}{}".format(
1835 ">" if state.bo in (None, ByteOrder.BE) else "<",
1836 "Q" if val >= 0 else "q",
1837 ),
1838 val,
1839 )
1840
1841 # Keep only the requested length
1842 len_bytes = item.len // 8
1843
1844 if state.bo in (None, ByteOrder.BE):
1845 # Big endian: keep last bytes
1846 data = data[-len_bytes:]
1847 else:
1848 # Little endian: keep first bytes
1849 assert state.bo == ByteOrder.LE
1850 data = data[:len_bytes]
1851
1852 # Append to current bytes and update offset
1853 self._data += data
1854
1855 # Handles the fixed-length integer item `item`.
1856 def _handle_fl_float_item(self, val: float, item: _FlNum, state: _GenState):
1857 # Validate length
1858 if item.len not in (32, 64):
1859 _raise_error_for_item(
1860 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1861 item.len, val
1862 ),
1863 item,
1864 )
1865
1866 # Encode result
1867 self._data += struct.pack(
1868 "{}{}".format(
1869 ">" if state.bo in (None, ByteOrder.BE) else "<",
1870 "f" if item.len == 32 else "d",
1871 ),
1872 val,
1873 )
1874
1875 # Handles the fixed-length number item `item`.
1876 def _handle_fl_num_item(
1877 self, item: _FlNum, state: _GenState, next_vl_instance: int
1878 ):
1879 # Compute value
1880 val = self._eval_item_expr(item, state, True)
1881
1882 # Validate current byte order
1883 if state.bo is None and item.len > 8:
1884 _raise_error_for_item(
1885 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1886 item.expr_str
1887 ),
1888 item,
1889 )
1890
1891 # Handle depending on type
1892 if type(val) is int:
1893 self._handle_fl_int_item(val, item, state)
1894 else:
1895 assert type(val) is float
1896 self._handle_fl_float_item(val, item, state)
1897
1898 # Update offset
1899 state.offset += item.size
1900
1901 return next_vl_instance
1902
1903 # Handles the LEB128 integer item `item`.
1904 def _handle_leb128_int_item(
1905 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1906 ):
1907 # Get the precomputed value
1908 val = self._vl_instance_vals[next_vl_instance]
1909
1910 # Size in bytes
1911 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1912
1913 # For each byte
1914 for _ in range(size):
1915 # Seven LSBs, MSB of the byte set (continue)
1916 self._data.append((val & 0x7F) | 0x80)
1917 val >>= 7
1918
1919 # Clear MSB of last byte (stop)
1920 self._data[-1] &= ~0x80
1921
1922 # Consumed this instance
1923 return next_vl_instance + 1
1924
1925 # Handles the group item `item`, only removing the immediate labels
1926 # from `state.labels` if `remove_immediate_labels` is `True`.
1927 def _handle_group_item(
1928 self,
1929 item: _Group,
1930 state: _GenState,
1931 next_vl_instance: int,
1932 remove_immediate_labels: bool = True,
1933 ):
1934 # Compute the values of the immediate (not nested) labels. Those
1935 # labels are reachable by any expression within the group.
1936 tmp_state = _GenState({}, {}, state.offset, None)
1937 immediate_label_names = set() # type: Set[str]
1938 tmp_next_vl_instance = next_vl_instance
1939
1940 for subitem in item.items:
1941 if type(subitem) is _Label:
1942 # New immediate label
1943 state.labels[subitem.name] = tmp_state.offset
1944 immediate_label_names.add(subitem.name)
1945
1946 tmp_next_vl_instance = self._dry_handle_item(
1947 subitem, tmp_state, tmp_next_vl_instance
1948 )
1949
1950 # Handle each item now with the actual state
1951 for subitem in item.items:
1952 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
1953
1954 # Remove immediate labels if required so that outer items won't
1955 # reach inner labels.
1956 if remove_immediate_labels:
1957 for name in immediate_label_names:
1958 del state.labels[name]
1959
1960 return next_vl_instance
1961
1962 # Handles the repetition item `item`.
1963 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1964 # Get the precomputed repetition count
1965 mul = self._vl_instance_vals[next_vl_instance]
1966
1967 # Consumed this instance
1968 next_vl_instance += 1
1969
1970 for _ in range(mul):
1971 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1972
1973 return next_vl_instance
1974
1975 # Handles the conditional item `item`.
1976 def _handle_cond_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1977 # Get the precomputed conditional value
1978 val = self._vl_instance_vals[next_vl_instance]
1979
1980 # Consumed this instance
1981 next_vl_instance += 1
1982
1983 if val:
1984 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1985
1986 return next_vl_instance
1987
1988 # Handles the offset setting item `item`.
1989 def _handle_set_offset_item(
1990 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1991 ):
1992 state.offset = item.val
1993 return next_vl_instance
1994
1995 # Handles offset alignment item `item` (adds padding).
1996 def _handle_align_offset_item(
1997 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1998 ):
1999 init_offset = state.offset
2000 state.offset = self._align_offset(state.offset, item)
2001 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2002 return next_vl_instance
2003
2004 # Handles the label item `item`.
2005 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
2006 return next_vl_instance
2007
2008 # Handles the item `item`, returning the updated next repetition
2009 # instance.
2010 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
2011 return self._item_handlers[type(item)](item, state, next_vl_instance)
2012
2013 # Generates the data (`self._data`) and final state
2014 # (`self._final_state`) from `group` and the initial state `state`.
2015 def _gen(self, group: _Group, state: _GenState):
2016 # Initial state
2017 self._data = bytearray()
2018
2019 # Item handlers
2020 self._item_handlers = {
2021 _AlignOffset: self._handle_align_offset_item,
2022 _Byte: self._handle_byte_item,
2023 _Cond: self._handle_cond_item,
2024 _FlNum: self._handle_fl_num_item,
2025 _Group: self._handle_group_item,
2026 _Label: self._handle_label_item,
2027 _Rep: self._handle_rep_item,
2028 _SetBo: self._handle_set_bo_item,
2029 _SetOffset: self._handle_set_offset_item,
2030 _SLeb128Int: self._handle_leb128_int_item,
2031 _Str: self._handle_str_item,
2032 _ULeb128Int: self._handle_leb128_int_item,
2033 _VarAssign: self._handle_var_assign_item,
2034 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2035
2036 # Dry item handlers (only updates the state offset)
2037 self._dry_handle_item_funcs = {
2038 _AlignOffset: self._dry_handle_align_offset_item,
2039 _Byte: self._dry_handle_scalar_item,
2040 _Cond: self._dry_handle_cond_item,
2041 _FlNum: self._dry_handle_scalar_item,
2042 _Group: self._dry_handle_group_item,
2043 _Label: self._update_offset_noop,
2044 _Rep: self._dry_handle_rep_item,
2045 _SetBo: self._update_offset_noop,
2046 _SetOffset: self._dry_handle_set_offset_item,
2047 _SLeb128Int: self._dry_handle_leb128_int_item,
2048 _Str: self._dry_handle_scalar_item,
2049 _ULeb128Int: self._dry_handle_leb128_int_item,
2050 _VarAssign: self._update_offset_noop,
2051 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2052
2053 # Handle the group item, _not_ removing the immediate labels
2054 # because the `labels` property offers them.
2055 self._handle_group_item(group, state, 0, False)
2056
2057 # This is actually the final state
2058 self._final_state = state
2059
2060
2061 # Returns a `ParseResult` instance containing the bytes encoded by the
2062 # input string `normand`.
2063 #
2064 # `init_variables` is a dictionary of initial variable names (valid
2065 # Python names) to integral values. A variable name must not be the
2066 # reserved name `ICITTE`.
2067 #
2068 # `init_labels` is a dictionary of initial label names (valid Python
2069 # names) to integral values. A label name must not be the reserved name
2070 # `ICITTE`.
2071 #
2072 # `init_offset` is the initial offset.
2073 #
2074 # `init_byte_order` is the initial byte order.
2075 #
2076 # Raises `ParseError` on any parsing error.
2077 def parse(
2078 normand: str,
2079 init_variables: Optional[VariablesT] = None,
2080 init_labels: Optional[LabelsT] = None,
2081 init_offset: int = 0,
2082 init_byte_order: Optional[ByteOrder] = None,
2083 ):
2084 if init_variables is None:
2085 init_variables = {}
2086
2087 if init_labels is None:
2088 init_labels = {}
2089
2090 gen = _Gen(
2091 _Parser(normand, init_variables, init_labels).res,
2092 init_variables,
2093 init_labels,
2094 init_offset,
2095 init_byte_order,
2096 )
2097 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2098 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2099 )
2100
2101
2102 # Parses the command-line arguments.
2103 def _parse_cli_args():
2104 import argparse
2105
2106 # Build parser
2107 ap = argparse.ArgumentParser()
2108 ap.add_argument(
2109 "--offset",
2110 metavar="OFFSET",
2111 action="store",
2112 type=int,
2113 default=0,
2114 help="initial offset (positive)",
2115 )
2116 ap.add_argument(
2117 "-b",
2118 "--byte-order",
2119 metavar="BO",
2120 choices=["be", "le"],
2121 type=str,
2122 help="initial byte order (`be` or `le`)",
2123 )
2124 ap.add_argument(
2125 "--var",
2126 metavar="NAME=VAL",
2127 action="append",
2128 help="add an initial variable (may be repeated)",
2129 )
2130 ap.add_argument(
2131 "-l",
2132 "--label",
2133 metavar="NAME=VAL",
2134 action="append",
2135 help="add an initial label (may be repeated)",
2136 )
2137 ap.add_argument(
2138 "--version", action="version", version="Normand {}".format(__version__)
2139 )
2140 ap.add_argument(
2141 "path",
2142 metavar="PATH",
2143 action="store",
2144 nargs="?",
2145 help="input path (none means standard input)",
2146 )
2147
2148 # Parse
2149 return ap.parse_args()
2150
2151
2152 # Raises a command-line error with the message `msg`.
2153 def _raise_cli_error(msg: str) -> NoReturn:
2154 raise RuntimeError("Command-line error: {}".format(msg))
2155
2156
2157 # Returns a dictionary of string to integers from the list of strings
2158 # `args` containing `NAME=VAL` entries.
2159 def _dict_from_arg(args: Optional[List[str]]):
2160 d = {} # type: LabelsT
2161
2162 if args is None:
2163 return d
2164
2165 for arg in args:
2166 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2167
2168 if m is None:
2169 _raise_cli_error("Invalid assignment {}".format(arg))
2170
2171 d[m.group(1)] = int(m.group(2))
2172
2173 return d
2174
2175
2176 # CLI entry point without exception handling.
2177 def _try_run_cli():
2178 import os.path
2179
2180 # Parse arguments
2181 args = _parse_cli_args()
2182
2183 # Read input
2184 if args.path is None:
2185 normand = sys.stdin.read()
2186 else:
2187 with open(args.path) as f:
2188 normand = f.read()
2189
2190 # Variables and labels
2191 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
2192 labels = _dict_from_arg(args.label)
2193
2194 # Validate offset
2195 if args.offset < 0:
2196 _raise_cli_error("Invalid negative offset {}")
2197
2198 # Validate and set byte order
2199 bo = None # type: Optional[ByteOrder]
2200
2201 if args.byte_order is not None:
2202 if args.byte_order == "be":
2203 bo = ByteOrder.BE
2204 else:
2205 assert args.byte_order == "le"
2206 bo = ByteOrder.LE
2207
2208 # Parse
2209 try:
2210 res = parse(normand, variables, labels, args.offset, bo)
2211 except ParseError as exc:
2212 prefix = ""
2213
2214 if args.path is not None:
2215 prefix = "{}:".format(os.path.abspath(args.path))
2216
2217 _fail(
2218 "{}{}:{} - {}".format(
2219 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2220 )
2221 )
2222
2223 # Print
2224 sys.stdout.buffer.write(res.data)
2225
2226
2227 # Prints the exception message `msg` and exits with status 1.
2228 def _fail(msg: str) -> NoReturn:
2229 if not msg.endswith("."):
2230 msg += "."
2231
2232 print(msg, file=sys.stderr)
2233 sys.exit(1)
2234
2235
2236 # CLI entry point.
2237 def _run_cli():
2238 try:
2239 _try_run_cli()
2240 except Exception as exc:
2241 _fail(str(exc))
2242
2243
2244 if __name__ == "__main__":
2245 _run_cli()
This page took 0.076175 seconds and 3 git commands to generate.