Add offset alignment support
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.7.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "SymbolsT",
41 "__author__",
42 "__version__",
43 ]
44
45 import re
46 import abc
47 import ast
48 import sys
49 import enum
50 import math
51 import struct
52 from typing import (
53 Any,
54 Set,
55 Dict,
56 List,
57 Union,
58 Pattern,
59 Callable,
60 NoReturn,
61 Optional,
62 )
63
64
65 # Text location (line and column numbers).
66 class TextLoc:
67 @classmethod
68 def _create(cls, line_no: int, col_no: int):
69 self = cls.__new__(cls)
70 self._init(line_no, col_no)
71 return self
72
73 def __init__(*args, **kwargs): # type: ignore
74 raise NotImplementedError
75
76 def _init(self, line_no: int, col_no: int):
77 self._line_no = line_no
78 self._col_no = col_no
79
80 # Line number.
81 @property
82 def line_no(self):
83 return self._line_no
84
85 # Column number.
86 @property
87 def col_no(self):
88 return self._col_no
89
90 def __repr__(self):
91 return "TextLoc({}, {})".format(self._line_no, self._col_no)
92
93
94 # Any item.
95 class _Item:
96 def __init__(self, text_loc: TextLoc):
97 self._text_loc = text_loc
98
99 # Source text location.
100 @property
101 def text_loc(self):
102 return self._text_loc
103
104
105 # Scalar item.
106 class _ScalarItem(_Item):
107 # Returns the size, in bytes, of this item.
108 @property
109 @abc.abstractmethod
110 def size(self) -> int:
111 ...
112
113
114 # A repeatable item.
115 class _RepableItem:
116 pass
117
118
119 # Single byte.
120 class _Byte(_ScalarItem, _RepableItem):
121 def __init__(self, val: int, text_loc: TextLoc):
122 super().__init__(text_loc)
123 self._val = val
124
125 # Byte value.
126 @property
127 def val(self):
128 return self._val
129
130 @property
131 def size(self):
132 return 1
133
134 def __repr__(self):
135 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
136
137
138 # String.
139 class _Str(_ScalarItem, _RepableItem):
140 def __init__(self, data: bytes, text_loc: TextLoc):
141 super().__init__(text_loc)
142 self._data = data
143
144 # Encoded bytes.
145 @property
146 def data(self):
147 return self._data
148
149 @property
150 def size(self):
151 return len(self._data)
152
153 def __repr__(self):
154 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
155
156
157 # Byte order.
158 @enum.unique
159 class ByteOrder(enum.Enum):
160 # Big endian.
161 BE = "be"
162
163 # Little endian.
164 LE = "le"
165
166
167 # Byte order setting.
168 class _SetBo(_Item):
169 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
170 super().__init__(text_loc)
171 self._bo = bo
172
173 @property
174 def bo(self):
175 return self._bo
176
177 def __repr__(self):
178 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
179
180
181 # Label.
182 class _Label(_Item):
183 def __init__(self, name: str, text_loc: TextLoc):
184 super().__init__(text_loc)
185 self._name = name
186
187 # Label name.
188 @property
189 def name(self):
190 return self._name
191
192 def __repr__(self):
193 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
194
195
196 # Offset setting.
197 class _SetOffset(_Item):
198 def __init__(self, val: int, text_loc: TextLoc):
199 super().__init__(text_loc)
200 self._val = val
201
202 # Offset value (bytes).
203 @property
204 def val(self):
205 return self._val
206
207 def __repr__(self):
208 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
209
210
211 # Offset alignment.
212 class _AlignOffset(_Item):
213 def __init__(self, val: int, pad_val: int, text_loc: TextLoc):
214 super().__init__(text_loc)
215 self._val = val
216 self._pad_val = pad_val
217
218 # Alignment value (bits).
219 @property
220 def val(self):
221 return self._val
222
223 # Padding byte value.
224 @property
225 def pad_val(self):
226 return self._pad_val
227
228 def __repr__(self):
229 return "_AlignOffset({}, {}, {})".format(
230 repr(self._val), repr(self._pad_val), repr(self._text_loc)
231 )
232
233
234 # Mixin of containing an AST expression and its string.
235 class _ExprMixin:
236 def __init__(self, expr_str: str, expr: ast.Expression):
237 self._expr_str = expr_str
238 self._expr = expr
239
240 # Expression string.
241 @property
242 def expr_str(self):
243 return self._expr_str
244
245 # Expression node to evaluate.
246 @property
247 def expr(self):
248 return self._expr
249
250
251 # Variable assignment.
252 class _VarAssign(_Item, _ExprMixin):
253 def __init__(
254 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
255 ):
256 super().__init__(text_loc)
257 _ExprMixin.__init__(self, expr_str, expr)
258 self._name = name
259
260 # Name.
261 @property
262 def name(self):
263 return self._name
264
265 def __repr__(self):
266 return "_VarAssign({}, {}, {}, {})".format(
267 repr(self._name),
268 repr(self._expr_str),
269 repr(self._expr),
270 repr(self._text_loc),
271 )
272
273
274 # Fixed-length number, possibly needing more than one byte.
275 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
276 def __init__(
277 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
278 ):
279 super().__init__(text_loc)
280 _ExprMixin.__init__(self, expr_str, expr)
281 self._len = len
282
283 # Length (bits).
284 @property
285 def len(self):
286 return self._len
287
288 @property
289 def size(self):
290 return self._len // 8
291
292 def __repr__(self):
293 return "_FlNum({}, {}, {}, {})".format(
294 repr(self._expr_str),
295 repr(self._expr),
296 repr(self._len),
297 repr(self._text_loc),
298 )
299
300
301 # LEB128 integer.
302 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
303 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLoc):
304 super().__init__(text_loc)
305 _ExprMixin.__init__(self, expr_str, expr)
306
307 def __repr__(self):
308 return "{}({}, {}, {})".format(
309 self.__class__.__name__,
310 repr(self._expr_str),
311 repr(self._expr),
312 repr(self._text_loc),
313 )
314
315
316 # Unsigned LEB128 integer.
317 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
318 pass
319
320
321 # Signed LEB128 integer.
322 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
323 pass
324
325
326 # Group of items.
327 class _Group(_Item, _RepableItem):
328 def __init__(self, items: List[_Item], text_loc: TextLoc):
329 super().__init__(text_loc)
330 self._items = items
331
332 # Contained items.
333 @property
334 def items(self):
335 return self._items
336
337 def __repr__(self):
338 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
339
340
341 # Repetition item.
342 class _Rep(_Item, _ExprMixin):
343 def __init__(
344 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLoc
345 ):
346 super().__init__(text_loc)
347 _ExprMixin.__init__(self, expr_str, expr)
348 self._item = item
349
350 # Item to repeat.
351 @property
352 def item(self):
353 return self._item
354
355 def __repr__(self):
356 return "_Rep({}, {}, {}, {})".format(
357 repr(self._item),
358 repr(self._expr_str),
359 repr(self._expr),
360 repr(self._text_loc),
361 )
362
363
364 # Expression item type.
365 _ExprItemT = Union[_FlNum, _Leb128Int, _VarAssign, _Rep]
366
367
368 # A parsing error containing a message and a text location.
369 class ParseError(RuntimeError):
370 @classmethod
371 def _create(cls, msg: str, text_loc: TextLoc):
372 self = cls.__new__(cls)
373 self._init(msg, text_loc)
374 return self
375
376 def __init__(self, *args, **kwargs): # type: ignore
377 raise NotImplementedError
378
379 def _init(self, msg: str, text_loc: TextLoc):
380 super().__init__(msg)
381 self._text_loc = text_loc
382
383 # Source text location.
384 @property
385 def text_loc(self):
386 return self._text_loc
387
388
389 # Raises a parsing error, forwarding the parameters to the constructor.
390 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
391 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
392
393
394 # Variable/label dictionary type.
395 SymbolsT = Dict[str, Union[int, float]]
396
397
398 # Python name pattern.
399 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
400
401
402 # Normand parser.
403 #
404 # The constructor accepts a Normand input. After building, use the `res`
405 # property to get the resulting main group.
406 class _Parser:
407 # Builds a parser to parse the Normand input `normand`, parsing
408 # immediately.
409 def __init__(self, normand: str, variables: SymbolsT, labels: SymbolsT):
410 self._normand = normand
411 self._at = 0
412 self._line_no = 1
413 self._col_no = 1
414 self._label_names = set(labels.keys())
415 self._var_names = set(variables.keys())
416 self._parse()
417
418 # Result (main group).
419 @property
420 def res(self):
421 return self._res
422
423 # Current text location.
424 @property
425 def _text_loc(self):
426 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
427 self._line_no, self._col_no
428 )
429
430 # Returns `True` if this parser is done parsing.
431 def _is_done(self):
432 return self._at == len(self._normand)
433
434 # Returns `True` if this parser isn't done parsing.
435 def _isnt_done(self):
436 return not self._is_done()
437
438 # Raises a parse error, creating it using the message `msg` and the
439 # current text location.
440 def _raise_error(self, msg: str) -> NoReturn:
441 _raise_error(msg, self._text_loc)
442
443 # Tries to make the pattern `pat` match the current substring,
444 # returning the match object and updating `self._at`,
445 # `self._line_no`, and `self._col_no` on success.
446 def _try_parse_pat(self, pat: Pattern[str]):
447 m = pat.match(self._normand, self._at)
448
449 if m is None:
450 return
451
452 # Skip matched string
453 self._at += len(m.group(0))
454
455 # Update line number
456 self._line_no += m.group(0).count("\n")
457
458 # Update column number
459 for i in reversed(range(self._at)):
460 if self._normand[i] == "\n" or i == 0:
461 if i == 0:
462 self._col_no = self._at + 1
463 else:
464 self._col_no = self._at - i
465
466 break
467
468 # Return match object
469 return m
470
471 # Expects the pattern `pat` to match the current substring,
472 # returning the match object and updating `self._at`,
473 # `self._line_no`, and `self._col_no` on success, or raising a parse
474 # error with the message `error_msg` on error.
475 def _expect_pat(self, pat: Pattern[str], error_msg: str):
476 # Match
477 m = self._try_parse_pat(pat)
478
479 if m is None:
480 # No match: error
481 self._raise_error(error_msg)
482
483 # Return match object
484 return m
485
486 # Pattern for _skip_ws_and_comments()
487 _ws_or_syms_or_comments_pat = re.compile(
488 r"(?:[\s!/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
489 )
490
491 # Skips as many whitespaces, insignificant symbol characters, and
492 # comments as possible.
493 def _skip_ws_and_comments(self):
494 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
495
496 # Pattern for _try_parse_hex_byte()
497 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
498
499 # Tries to parse a hexadecimal byte, returning a byte item on
500 # success.
501 def _try_parse_hex_byte(self):
502 begin_text_loc = self._text_loc
503
504 # Match initial nibble
505 m_high = self._try_parse_pat(self._nibble_pat)
506
507 if m_high is None:
508 # No match
509 return
510
511 # Expect another nibble
512 self._skip_ws_and_comments()
513 m_low = self._expect_pat(
514 self._nibble_pat, "Expecting another hexadecimal nibble"
515 )
516
517 # Return item
518 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
519
520 # Patterns for _try_parse_bin_byte()
521 _bin_byte_bit_pat = re.compile(r"[01]")
522 _bin_byte_prefix_pat = re.compile(r"%")
523
524 # Tries to parse a binary byte, returning a byte item on success.
525 def _try_parse_bin_byte(self):
526 begin_text_loc = self._text_loc
527
528 # Match prefix
529 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
530 # No match
531 return
532
533 # Expect eight bits
534 bits = [] # type: List[str]
535
536 for _ in range(8):
537 self._skip_ws_and_comments()
538 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
539 bits.append(m.group(0))
540
541 # Return item
542 return _Byte(int("".join(bits), 2), begin_text_loc)
543
544 # Patterns for _try_parse_dec_byte()
545 _dec_byte_prefix_pat = re.compile(r"\$\s*")
546 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
547
548 # Tries to parse a decimal byte, returning a byte item on success.
549 def _try_parse_dec_byte(self):
550 begin_text_loc = self._text_loc
551
552 # Match prefix
553 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
554 # No match
555 return
556
557 # Expect the value
558 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
559
560 # Compute value
561 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
562
563 # Validate
564 if val < -128 or val > 255:
565 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
566
567 # Two's complement
568 val %= 256
569
570 # Return item
571 return _Byte(val, begin_text_loc)
572
573 # Tries to parse a byte, returning a byte item on success.
574 def _try_parse_byte(self):
575 # Hexadecimal
576 item = self._try_parse_hex_byte()
577
578 if item is not None:
579 return item
580
581 # Binary
582 item = self._try_parse_bin_byte()
583
584 if item is not None:
585 return item
586
587 # Decimal
588 item = self._try_parse_dec_byte()
589
590 if item is not None:
591 return item
592
593 # Patterns for _try_parse_str()
594 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
595 _str_suffix_pat = re.compile(r'"')
596 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
597
598 # Strings corresponding to escape sequence characters
599 _str_escape_seq_strs = {
600 "0": "\0",
601 "a": "\a",
602 "b": "\b",
603 "e": "\x1b",
604 "f": "\f",
605 "n": "\n",
606 "r": "\r",
607 "t": "\t",
608 "v": "\v",
609 "\\": "\\",
610 '"': '"',
611 }
612
613 # Tries to parse a string, returning a string item on success.
614 def _try_parse_str(self):
615 begin_text_loc = self._text_loc
616
617 # Match prefix
618 m = self._try_parse_pat(self._str_prefix_pat)
619
620 if m is None:
621 # No match
622 return
623
624 # Get encoding
625 encoding = "utf8"
626
627 if m.group("len") is not None:
628 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
629
630 # Actual string
631 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
632
633 # Expect end of string
634 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
635
636 # Replace escape sequences
637 val = m.group(0)
638
639 for ec in '0abefnrtv"\\':
640 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
641
642 # Encode
643 data = val.encode(encoding)
644
645 # Return item
646 return _Str(data, begin_text_loc)
647
648 # Patterns for _try_parse_group()
649 _group_prefix_pat = re.compile(r"\(")
650 _group_suffix_pat = re.compile(r"\)")
651
652 # Tries to parse a group, returning a group item on success.
653 def _try_parse_group(self):
654 begin_text_loc = self._text_loc
655
656 # Match prefix
657 if self._try_parse_pat(self._group_prefix_pat) is None:
658 # No match
659 return
660
661 # Parse items
662 items = self._parse_items()
663
664 # Expect end of group
665 self._skip_ws_and_comments()
666 self._expect_pat(
667 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
668 )
669
670 # Return item
671 return _Group(items, begin_text_loc)
672
673 # Returns a stripped expression string and an AST expression node
674 # from the expression string `expr_str` at text location `text_loc`.
675 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
676 # Create an expression node from the expression string
677 expr_str = expr_str.strip().replace("\n", " ")
678
679 try:
680 expr = ast.parse(expr_str, mode="eval")
681 except SyntaxError:
682 _raise_error(
683 "Invalid expression `{}`: invalid syntax".format(expr_str),
684 text_loc,
685 )
686
687 return expr_str, expr
688
689 # Patterns for _try_parse_num_and_attr()
690 _val_expr_pat = re.compile(r"([^}:]+):\s*")
691 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
692 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
693
694 # Tries to parse a value and attribute (fixed length in bits or
695 # `leb128`), returning a value item on success.
696 def _try_parse_num_and_attr(self):
697 begin_text_loc = self._text_loc
698
699 # Match
700 m_expr = self._try_parse_pat(self._val_expr_pat)
701
702 if m_expr is None:
703 # No match
704 return
705
706 # Create an expression node from the expression string
707 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
708
709 # Length?
710 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
711
712 if m_attr is None:
713 # LEB128?
714 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
715
716 if m_attr is None:
717 # At this point it's invalid
718 self._raise_error(
719 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
720 )
721
722 # Return LEB128 integer item
723 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
724 return cls(expr_str, expr, begin_text_loc)
725 else:
726 # Return fixed-length number item
727 return _FlNum(
728 expr_str,
729 expr,
730 int(m_attr.group(0)),
731 begin_text_loc,
732 )
733
734 # Patterns for _try_parse_num_and_attr()
735 _var_assign_pat = re.compile(
736 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
737 )
738
739 # Tries to parse a variable assignment, returning a variable
740 # assignment item on success.
741 def _try_parse_var_assign(self):
742 begin_text_loc = self._text_loc
743
744 # Match
745 m = self._try_parse_pat(self._var_assign_pat)
746
747 if m is None:
748 # No match
749 return
750
751 # Validate name
752 name = m.group("name")
753
754 if name == _icitte_name:
755 _raise_error(
756 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
757 )
758
759 if name in self._label_names:
760 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
761
762 # Add to known variable names
763 self._var_names.add(name)
764
765 # Create an expression node from the expression string
766 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
767
768 # Return item
769 return _VarAssign(
770 name,
771 expr_str,
772 expr,
773 begin_text_loc,
774 )
775
776 # Pattern for _try_parse_set_bo()
777 _bo_pat = re.compile(r"[bl]e")
778
779 # Tries to parse a byte order name, returning a byte order setting
780 # item on success.
781 def _try_parse_set_bo(self):
782 begin_text_loc = self._text_loc
783
784 # Match
785 m = self._try_parse_pat(self._bo_pat)
786
787 if m is None:
788 # No match
789 return
790
791 # Return corresponding item
792 if m.group(0) == "be":
793 return _SetBo(ByteOrder.BE, begin_text_loc)
794 else:
795 assert m.group(0) == "le"
796 return _SetBo(ByteOrder.LE, begin_text_loc)
797
798 # Patterns for _try_parse_val_or_bo()
799 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
800 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
801
802 # Tries to parse a value, a variable assignment, or a byte order
803 # setting, returning an item on success.
804 def _try_parse_val_or_var_assign_or_set_bo(self):
805 # Match prefix
806 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
807 # No match
808 return
809
810 # Variable assignment item?
811 item = self._try_parse_var_assign()
812
813 if item is None:
814 # Number item?
815 item = self._try_parse_num_and_attr()
816
817 if item is None:
818 # Byte order setting item?
819 item = self._try_parse_set_bo()
820
821 if item is None:
822 # At this point it's invalid
823 self._raise_error(
824 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
825 )
826
827 # Expect suffix
828 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
829 return item
830
831 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
832 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
833
834 # Tries to parse an offset setting value (after the initial `<`),
835 # returning an offset item on success.
836 def _try_parse_set_offset_val(self):
837 begin_text_loc = self._text_loc
838
839 # Match
840 m = self._try_parse_pat(self._pos_const_int_pat)
841
842 if m is None:
843 # No match
844 return
845
846 # Return item
847 return _SetOffset(int(m.group(0), 0), begin_text_loc)
848
849 # Tries to parse a label name (after the initial `<`), returning a
850 # label item on success.
851 def _try_parse_label_name(self):
852 begin_text_loc = self._text_loc
853
854 # Match
855 m = self._try_parse_pat(_py_name_pat)
856
857 if m is None:
858 # No match
859 return
860
861 # Validate
862 name = m.group(0)
863
864 if name == _icitte_name:
865 _raise_error(
866 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
867 )
868
869 if name in self._label_names:
870 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
871
872 if name in self._var_names:
873 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
874
875 # Add to known label names
876 self._label_names.add(name)
877
878 # Return item
879 return _Label(name, begin_text_loc)
880
881 # Patterns for _try_parse_label_or_set_offset()
882 _label_set_offset_prefix_pat = re.compile(r"<\s*")
883 _label_set_offset_suffix_pat = re.compile(r"\s*>")
884
885 # Tries to parse a label or an offset setting, returning an item on
886 # success.
887 def _try_parse_label_or_set_offset(self):
888 # Match prefix
889 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
890 # No match
891 return
892
893 # Offset setting item?
894 item = self._try_parse_set_offset_val()
895
896 if item is None:
897 # Label item?
898 item = self._try_parse_label_name()
899
900 if item is None:
901 # At this point it's invalid
902 self._raise_error("Expecting a label name or an offset setting value")
903
904 # Expect suffix
905 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
906 return item
907
908 # Patterns for _try_parse_align_offset()
909 _align_offset_prefix_pat = re.compile(r"@\s*")
910 _align_offset_val_pat = re.compile(r"(\d+)\s*")
911 _align_offset_pad_val_prefix_pat = re.compile(r"~\s*")
912
913 # Tries to parse an offset alignment, returning an offset alignment
914 # item on success.
915 def _try_parse_align_offset(self):
916 begin_text_loc = self._text_loc
917
918 # Match prefix
919 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
920 # No match
921 return
922
923 align_text_loc = self._text_loc
924 m = self._expect_pat(
925 self._align_offset_val_pat,
926 "Expecting an alignment (positive multiple of eight bits)",
927 )
928
929 # Validate alignment
930 val = int(m.group(1))
931
932 if val <= 0 or (val % 8) != 0:
933 _raise_error(
934 "Invalid alignment value {} (not a positive multiple of eight)".format(
935 val
936 ),
937 align_text_loc,
938 )
939
940 # Padding value?
941 pad_val = 0
942
943 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
944 pad_val_text_loc = self._text_loc
945 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
946
947 # Validate
948 pad_val = int(m.group(0), 0)
949
950 if pad_val > 255:
951 _raise_error(
952 "Invalid padding byte value {}".format(pad_val),
953 pad_val_text_loc,
954 )
955
956 # Return item
957 return _AlignOffset(val, pad_val, begin_text_loc)
958
959 # Tries to parse a base item (anything except a repetition),
960 # returning it on success.
961 def _try_parse_base_item(self):
962 # Byte item?
963 item = self._try_parse_byte()
964
965 if item is not None:
966 return item
967
968 # String item?
969 item = self._try_parse_str()
970
971 if item is not None:
972 return item
973
974 # Value, variable assignment, or byte order setting item?
975 item = self._try_parse_val_or_var_assign_or_set_bo()
976
977 if item is not None:
978 return item
979
980 # Label or offset setting item?
981 item = self._try_parse_label_or_set_offset()
982
983 if item is not None:
984 return item
985
986 # Offset alignment item?
987 item = self._try_parse_align_offset()
988
989 if item is not None:
990 return item
991
992 # Group item?
993 item = self._try_parse_group()
994
995 if item is not None:
996 return item
997
998 # Pattern for _try_parse_rep()
999 _rep_prefix_pat = re.compile(r"\*\s*")
1000 _rep_expr_prefix_pat = re.compile(r"\{")
1001 _rep_expr_pat = re.compile(r"[^}p]+")
1002 _rep_expr_suffix_pat = re.compile(r"\}")
1003
1004 # Tries to parse a repetition, returning the expression string and
1005 # AST expression node on success.
1006 def _try_parse_rep(self):
1007 # Match prefix
1008 if self._try_parse_pat(self._rep_prefix_pat) is None:
1009 # No match
1010 return
1011
1012 # Expect and return a decimal multiplier
1013 self._skip_ws_and_comments()
1014
1015 # Integer?
1016 m = self._try_parse_pat(self._pos_const_int_pat)
1017
1018 if m is None:
1019 # Expression?
1020 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
1021 # At this point it's invalid
1022 self._raise_error("Expecting a positive integral multiplier or `{`")
1023
1024 # Expect an expression
1025 expr_str_loc = self._text_loc
1026 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
1027 expr_str = self._ast_expr_from_str(m.group(0), expr_str_loc)
1028
1029 # Expect `}`
1030 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
1031 expr_str = m.group(0)
1032 else:
1033 expr_str_loc = self._text_loc
1034 expr_str = m.group(0)
1035
1036 return self._ast_expr_from_str(expr_str, expr_str_loc)
1037
1038 # Tries to parse an item, possibly followed by a repetition,
1039 # returning `True` on success.
1040 #
1041 # Appends any parsed item to `items`.
1042 def _try_append_item(self, items: List[_Item]):
1043 self._skip_ws_and_comments()
1044
1045 # Parse a base item
1046 item = self._try_parse_base_item()
1047
1048 if item is None:
1049 # No item
1050 return False
1051
1052 # Parse repetition if the base item is repeatable
1053 if isinstance(item, _RepableItem):
1054 self._skip_ws_and_comments()
1055 rep_text_loc = self._text_loc
1056 rep_ret = self._try_parse_rep()
1057
1058 if rep_ret is not None:
1059 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
1060
1061 items.append(item)
1062 return True
1063
1064 # Parses and returns items, skipping whitespaces, insignificant
1065 # symbols, and comments when allowed, and stopping at the first
1066 # unknown character.
1067 def _parse_items(self) -> List[_Item]:
1068 items = [] # type: List[_Item]
1069
1070 while self._isnt_done():
1071 # Try to append item
1072 if not self._try_append_item(items):
1073 # Unknown at this point
1074 break
1075
1076 return items
1077
1078 # Parses the whole Normand input, setting `self._res` to the main
1079 # group item on success.
1080 def _parse(self):
1081 if len(self._normand.strip()) == 0:
1082 # Special case to make sure there's something to consume
1083 self._res = _Group([], self._text_loc)
1084 return
1085
1086 # Parse first level items
1087 items = self._parse_items()
1088
1089 # Make sure there's nothing left
1090 self._skip_ws_and_comments()
1091
1092 if self._isnt_done():
1093 self._raise_error(
1094 "Unexpected character `{}`".format(self._normand[self._at])
1095 )
1096
1097 # Set main group item
1098 self._res = _Group(items, self._text_loc)
1099
1100
1101 # The return type of parse().
1102 class ParseResult:
1103 @classmethod
1104 def _create(
1105 cls,
1106 data: bytearray,
1107 variables: SymbolsT,
1108 labels: SymbolsT,
1109 offset: int,
1110 bo: Optional[ByteOrder],
1111 ):
1112 self = cls.__new__(cls)
1113 self._init(data, variables, labels, offset, bo)
1114 return self
1115
1116 def __init__(self, *args, **kwargs): # type: ignore
1117 raise NotImplementedError
1118
1119 def _init(
1120 self,
1121 data: bytearray,
1122 variables: SymbolsT,
1123 labels: SymbolsT,
1124 offset: int,
1125 bo: Optional[ByteOrder],
1126 ):
1127 self._data = data
1128 self._vars = variables
1129 self._labels = labels
1130 self._offset = offset
1131 self._bo = bo
1132
1133 # Generated data.
1134 @property
1135 def data(self):
1136 return self._data
1137
1138 # Dictionary of updated variable names to their last computed value.
1139 @property
1140 def variables(self):
1141 return self._vars
1142
1143 # Dictionary of updated main group label names to their computed
1144 # value.
1145 @property
1146 def labels(self):
1147 return self._labels
1148
1149 # Updated offset.
1150 @property
1151 def offset(self):
1152 return self._offset
1153
1154 # Updated byte order.
1155 @property
1156 def byte_order(self):
1157 return self._bo
1158
1159
1160 # Raises a parse error for the item `item`, creating it using the
1161 # message `msg`.
1162 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1163 _raise_error(msg, item.text_loc)
1164
1165
1166 # The `ICITTE` reserved name.
1167 _icitte_name = "ICITTE"
1168
1169
1170 # Base node visitor.
1171 #
1172 # Calls the _visit_name() method for each name node which isn't the name
1173 # of a call.
1174 class _NodeVisitor(ast.NodeVisitor):
1175 def __init__(self):
1176 self._parent_is_call = False
1177
1178 def generic_visit(self, node: ast.AST):
1179 if type(node) is ast.Call:
1180 self._parent_is_call = True
1181 elif type(node) is ast.Name and not self._parent_is_call:
1182 self._visit_name(node.id)
1183
1184 super().generic_visit(node)
1185 self._parent_is_call = False
1186
1187 @abc.abstractmethod
1188 def _visit_name(self, name: str):
1189 ...
1190
1191
1192 # Expression validator: validates that all the names within the
1193 # expression are allowed.
1194 class _ExprValidator(_NodeVisitor):
1195 def __init__(self, item: _ExprItemT, allowed_names: Set[str], icitte_allowed: bool):
1196 super().__init__()
1197 self._item = item
1198 self._allowed_names = allowed_names
1199 self._icitte_allowed = icitte_allowed
1200
1201 def _visit_name(self, name: str):
1202 # Make sure the name refers to a known and reachable
1203 # variable/label name.
1204 if name == _icitte_name and not self._icitte_allowed:
1205 _raise_error(
1206 "Illegal reserved name `{}` in expression `{}`".format(
1207 _icitte_name, self._item.expr_str
1208 ),
1209 self._item.text_loc,
1210 )
1211 elif name != _icitte_name and name not in self._allowed_names:
1212 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1213 name, self._item.expr_str
1214 )
1215
1216 allowed_names = self._allowed_names.copy()
1217
1218 if self._icitte_allowed:
1219 allowed_names.add(_icitte_name)
1220
1221 if len(allowed_names) > 0:
1222 allowed_names_str = ", ".join(
1223 sorted(["`{}`".format(name) for name in allowed_names])
1224 )
1225 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1226
1227 _raise_error(
1228 msg,
1229 self._item.text_loc,
1230 )
1231
1232
1233 # Expression visitor getting all the contained names.
1234 class _ExprNamesVisitor(_NodeVisitor):
1235 def __init__(self):
1236 self._parent_is_call = False
1237 self._names = set() # type: Set[str]
1238
1239 @property
1240 def names(self):
1241 return self._names
1242
1243 def _visit_name(self, name: str):
1244 self._names.add(name)
1245
1246
1247 # Generator state.
1248 class _GenState:
1249 def __init__(
1250 self,
1251 variables: SymbolsT,
1252 labels: SymbolsT,
1253 offset: int,
1254 bo: Optional[ByteOrder],
1255 ):
1256 self.variables = variables.copy()
1257 self.labels = labels.copy()
1258 self.offset = offset
1259 self.bo = bo
1260
1261
1262 # Generator of data and final state from a group item.
1263 #
1264 # Generation happens in memory at construction time. After building, use
1265 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1266 # get the resulting context.
1267 #
1268 # The steps of generation are:
1269 #
1270 # 1. Validate that each repetition and LEB128 integer expression uses
1271 # only reachable names and not `ICITTE`.
1272 #
1273 # 2. Compute and keep the effective repetition count and LEB128 integer
1274 # value for each repetition and LEB128 integer instance.
1275 #
1276 # 3. Generate bytes, updating the initial state as it goes which becomes
1277 # the final state after the operation.
1278 #
1279 # During the generation, when handling a `_Rep` or `_Leb128Int` item,
1280 # we already have the effective repetition count or value of the
1281 # instance.
1282 #
1283 # When handling a `_Group` item, first update the current labels with
1284 # all the immediate (not nested) labels, and then handle each
1285 # contained item. This gives contained item access to "future" outer
1286 # labels. Then remove the immediate labels from the state so that
1287 # outer items don't have access to inner labels.
1288 class _Gen:
1289 def __init__(
1290 self,
1291 group: _Group,
1292 variables: SymbolsT,
1293 labels: SymbolsT,
1294 offset: int,
1295 bo: Optional[ByteOrder],
1296 ):
1297 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1298 self._vl_instance_vals = self._compute_vl_instance_vals(
1299 group, _GenState(variables, labels, offset, bo)
1300 )
1301 self._gen(group, _GenState(variables, labels, offset, bo))
1302
1303 # Generated bytes.
1304 @property
1305 def data(self):
1306 return self._data
1307
1308 # Updated variables.
1309 @property
1310 def variables(self):
1311 return self._final_state.variables
1312
1313 # Updated main group labels.
1314 @property
1315 def labels(self):
1316 return self._final_state.labels
1317
1318 # Updated offset.
1319 @property
1320 def offset(self):
1321 return self._final_state.offset
1322
1323 # Updated byte order.
1324 @property
1325 def bo(self):
1326 return self._final_state.bo
1327
1328 # Returns the set of used, non-called names within the AST
1329 # expression `expr`.
1330 @staticmethod
1331 def _names_of_expr(expr: ast.Expression):
1332 visitor = _ExprNamesVisitor()
1333 visitor.visit(expr)
1334 return visitor.names
1335
1336 # Validates that all the repetition and LEB128 integer expressions
1337 # within `group` don't refer, directly or indirectly, to subsequent
1338 # labels.
1339 #
1340 # The strategy here is to keep a set of allowed label names, per
1341 # group, initialized to `allowed_label_names`, and a set of allowed
1342 # variable names initialized to `allowed_variable_names`.
1343 #
1344 # Then, depending on the type of `item`:
1345 #
1346 # `_Label`:
1347 # Add its name to the local allowed label names: a label
1348 # occurring before a repetition, and not within a nested group,
1349 # is always reachable.
1350 #
1351 # `_VarAssign`:
1352 # If all the names within its expression are allowed, then add
1353 # its name to the allowed variable names.
1354 #
1355 # Otherwise, remove its name from the allowed variable names (if
1356 # it's in there): a variable which refers to an unreachable name
1357 # is unreachable itself.
1358 #
1359 # `_Rep` and `_Leb128`:
1360 # Make sure all the names within its expression are allowed.
1361 #
1362 # `_Group`:
1363 # Call this function for each contained item with a _copy_ of
1364 # the current allowed label names and the same current allowed
1365 # variable names.
1366 @staticmethod
1367 def _validate_vl_exprs(
1368 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1369 ):
1370 if type(item) is _Label:
1371 allowed_label_names.add(item.name)
1372 elif type(item) is _VarAssign:
1373 # Check if this variable name is allowed
1374 allowed = True
1375
1376 for name in _Gen._names_of_expr(item.expr):
1377 if name not in (
1378 allowed_label_names | allowed_variable_names | {_icitte_name}
1379 ):
1380 # Not allowed
1381 allowed = False
1382 break
1383
1384 if allowed:
1385 allowed_variable_names.add(item.name)
1386 elif item.name in allowed_variable_names:
1387 allowed_variable_names.remove(item.name)
1388 elif isinstance(item, _Leb128Int):
1389 # Validate the expression (`ICITTE` allowed)
1390 _ExprValidator(
1391 item, allowed_label_names | allowed_variable_names, True
1392 ).visit(item.expr)
1393 elif type(item) is _Rep:
1394 # Validate the expression first (`ICITTE` not allowed)
1395 _ExprValidator(
1396 item, allowed_label_names | allowed_variable_names, False
1397 ).visit(item.expr)
1398
1399 # Validate inner item
1400 _Gen._validate_vl_exprs(
1401 item.item, allowed_variable_names, allowed_label_names
1402 )
1403 elif type(item) is _Group:
1404 # Copy `allowed_label_names` so that this frame cannot
1405 # access the nested label names.
1406 group_allowed_label_names = allowed_label_names.copy()
1407
1408 for subitem in item.items:
1409 _Gen._validate_vl_exprs(
1410 subitem, allowed_variable_names, group_allowed_label_names
1411 )
1412
1413 # Evaluates the expression of `item` considering the current
1414 # generation state `state`.
1415 #
1416 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1417 # for the expression to evaluate.
1418 #
1419 # If `allow_float` is `True`, then the type of the result may be
1420 # `float` too.
1421 @staticmethod
1422 def _eval_item_expr(
1423 item: _ExprItemT,
1424 state: _GenState,
1425 allow_icitte: bool,
1426 allow_float: bool = False,
1427 ):
1428 syms = state.labels.copy()
1429
1430 # Set the `ICITTE` name to the current offset, if any
1431 if allow_icitte:
1432 syms[_icitte_name] = state.offset
1433
1434 # Add the current variables
1435 syms.update(state.variables)
1436
1437 # Validate the node and its children
1438 _ExprValidator(item, set(syms.keys()), True).visit(item.expr)
1439
1440 # Compile and evaluate expression node
1441 try:
1442 val = eval(compile(item.expr, "", "eval"), None, syms)
1443 except Exception as exc:
1444 _raise_error_for_item(
1445 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1446 item,
1447 )
1448
1449 # Validate result type
1450 expected_types = {int} # type: Set[type]
1451 type_msg = "`int`"
1452
1453 if allow_float:
1454 expected_types.add(float)
1455 type_msg += " or `float`"
1456
1457 if type(val) not in expected_types:
1458 _raise_error_for_item(
1459 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1460 item.expr_str, type_msg, type(val).__name__
1461 ),
1462 item,
1463 )
1464
1465 return val
1466
1467 # Returns the size, in bytes, required to encode the value `val`
1468 # with LEB128 (signed version if `is_signed` is `True`).
1469 @staticmethod
1470 def _leb128_size_for_val(val: int, is_signed: bool):
1471 if val < 0:
1472 # Equivalent upper bound.
1473 #
1474 # For example, if `val` is -128, then the full integer for
1475 # this number of bits would be [-128, 127].
1476 val = -val - 1
1477
1478 # Number of bits (add one for the sign if needed)
1479 bits = val.bit_length() + int(is_signed)
1480
1481 if bits == 0:
1482 bits = 1
1483
1484 # Seven bits per byte
1485 return math.ceil(bits / 7)
1486
1487 # Returns the offset `offset` aligned according to `item`.
1488 @staticmethod
1489 def _align_offset(offset: int, item: _AlignOffset):
1490 align_bytes = item.val // 8
1491 return (offset + align_bytes - 1) // align_bytes * align_bytes
1492
1493 # Computes the effective value for each repetition and LEB128
1494 # integer instance, filling `instance_vals` (if not `None`) and
1495 # returning `instance_vals`.
1496 #
1497 # At this point it must be known that, for a given variable-length
1498 # item, its expression only contains reachable names.
1499 #
1500 # When handling a `_Rep` item, this function appends its effective
1501 # multiplier to `instance_vals` _before_ handling its repeated item.
1502 #
1503 # When handling a `_VarAssign` item, this function only evaluates it
1504 # if all its names are reachable.
1505 @staticmethod
1506 def _compute_vl_instance_vals(
1507 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1508 ):
1509 if instance_vals is None:
1510 instance_vals = []
1511
1512 if isinstance(item, _ScalarItem):
1513 state.offset += item.size
1514 elif type(item) is _Label:
1515 state.labels[item.name] = state.offset
1516 elif type(item) is _VarAssign:
1517 # Check if all the names are reachable
1518 do_eval = True
1519
1520 for name in _Gen._names_of_expr(item.expr):
1521 if (
1522 name != _icitte_name
1523 and name not in state.variables
1524 and name not in state.labels
1525 ):
1526 # A name is unknown: cannot evaluate
1527 do_eval = False
1528 break
1529
1530 if do_eval:
1531 # Evaluate the expression and keep the result
1532 state.variables[item.name] = _Gen._eval_item_expr(
1533 item, state, True, True
1534 )
1535 elif type(item) is _SetOffset:
1536 state.offset = item.val
1537 elif type(item) is _AlignOffset:
1538 state.offset = _Gen._align_offset(state.offset, item)
1539 elif isinstance(item, _Leb128Int):
1540 # Evaluate the expression
1541 val = _Gen._eval_item_expr(item, state, True)
1542
1543 # Validate result
1544 if type(item) is _ULeb128Int and val < 0:
1545 _raise_error_for_item(
1546 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1547 item.expr_str, val
1548 ),
1549 item,
1550 )
1551
1552 # Add the evaluation result to the to variable-length item
1553 # instance values.
1554 instance_vals.append(val)
1555
1556 # Update offset
1557 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
1558 elif type(item) is _Rep:
1559 # Evaluate the expression and keep the result
1560 val = _Gen._eval_item_expr(item, state, False)
1561
1562 # Validate result
1563 if val < 0:
1564 _raise_error_for_item(
1565 "Invalid expression `{}`: unexpected negative result {:,}".format(
1566 item.expr_str, val
1567 ),
1568 item,
1569 )
1570
1571 # Add to repetition instance values
1572 instance_vals.append(val)
1573
1574 # Process the repeated item `val` times
1575 for _ in range(val):
1576 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1577 elif type(item) is _Group:
1578 prev_labels = state.labels.copy()
1579
1580 # Process each item
1581 for subitem in item.items:
1582 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
1583
1584 state.labels = prev_labels
1585
1586 return instance_vals
1587
1588 def _update_offset_noop(self, item: _Item, state: _GenState, next_vl_instance: int):
1589 return next_vl_instance
1590
1591 def _dry_handle_scalar_item(
1592 self, item: _ScalarItem, state: _GenState, next_vl_instance: int
1593 ):
1594 state.offset += item.size
1595 return next_vl_instance
1596
1597 def _dry_handle_leb128_int_item(
1598 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1599 ):
1600 # Get the value from `self._vl_instance_vals` _before_
1601 # incrementing `next_vl_instance` to honor the order of
1602 # _compute_vl_instance_vals().
1603 state.offset += self._leb128_size_for_val(
1604 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1605 )
1606
1607 return next_vl_instance + 1
1608
1609 def _dry_handle_group_item(
1610 self, item: _Group, state: _GenState, next_vl_instance: int
1611 ):
1612 for subitem in item.items:
1613 next_vl_instance = self._dry_handle_item(subitem, state, next_vl_instance)
1614
1615 return next_vl_instance
1616
1617 def _dry_handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1618 # Get the value from `self._vl_instance_vals` _before_
1619 # incrementing `next_vl_instance` to honor the order of
1620 # _compute_vl_instance_vals().
1621 mul = self._vl_instance_vals[next_vl_instance]
1622 next_vl_instance += 1
1623
1624 for _ in range(mul):
1625 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1626
1627 return next_vl_instance
1628
1629 def _dry_handle_align_offset_item(
1630 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1631 ):
1632 state.offset = self._align_offset(state.offset, item)
1633 return next_vl_instance
1634
1635 def _dry_handle_set_offset_item(
1636 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1637 ):
1638 state.offset = item.val
1639 return next_vl_instance
1640
1641 # Updates `state.offset` considering the generated data of `item`,
1642 # without generating any, and returns the updated next
1643 # variable-length item instance.
1644 def _dry_handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1645 return self._dry_handle_item_funcs[type(item)](item, state, next_vl_instance)
1646
1647 # Handles the byte item `item`.
1648 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
1649 self._data.append(item.val)
1650 state.offset += item.size
1651 return next_vl_instance
1652
1653 # Handles the string item `item`.
1654 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
1655 self._data += item.data
1656 state.offset += item.size
1657 return next_vl_instance
1658
1659 # Handles the byte order setting item `item`.
1660 def _handle_set_bo_item(
1661 self, item: _SetBo, state: _GenState, next_vl_instance: int
1662 ):
1663 # Update current byte order
1664 state.bo = item.bo
1665 return next_vl_instance
1666
1667 # Handles the variable assignment item `item`.
1668 def _handle_var_assign_item(
1669 self, item: _VarAssign, state: _GenState, next_vl_instance: int
1670 ):
1671 # Update variable
1672 state.variables[item.name] = self._eval_item_expr(item, state, True, True)
1673 return next_vl_instance
1674
1675 # Handles the fixed-length integer item `item`.
1676 def _handle_fl_int_item(self, val: int, item: _FlNum, state: _GenState):
1677 # Validate range
1678 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1679 _raise_error_for_item(
1680 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1681 val, item.len, item.expr_str, state.offset
1682 ),
1683 item,
1684 )
1685
1686 # Encode result on 64 bits (to extend the sign bit whatever the
1687 # value of `item.len`).
1688 data = struct.pack(
1689 "{}{}".format(
1690 ">" if state.bo in (None, ByteOrder.BE) else "<",
1691 "Q" if val >= 0 else "q",
1692 ),
1693 val,
1694 )
1695
1696 # Keep only the requested length
1697 len_bytes = item.len // 8
1698
1699 if state.bo in (None, ByteOrder.BE):
1700 # Big endian: keep last bytes
1701 data = data[-len_bytes:]
1702 else:
1703 # Little endian: keep first bytes
1704 assert state.bo == ByteOrder.LE
1705 data = data[:len_bytes]
1706
1707 # Append to current bytes and update offset
1708 self._data += data
1709
1710 # Handles the fixed-length integer item `item`.
1711 def _handle_fl_float_item(self, val: float, item: _FlNum, state: _GenState):
1712 # Validate length
1713 if item.len not in (32, 64):
1714 _raise_error_for_item(
1715 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1716 item.len, val
1717 ),
1718 item,
1719 )
1720
1721 # Encode result
1722 self._data += struct.pack(
1723 "{}{}".format(
1724 ">" if state.bo in (None, ByteOrder.BE) else "<",
1725 "f" if item.len == 32 else "d",
1726 ),
1727 val,
1728 )
1729
1730 # Handles the fixed-length number item `item`.
1731 def _handle_fl_num_item(
1732 self, item: _FlNum, state: _GenState, next_vl_instance: int
1733 ):
1734 # Compute value
1735 val = self._eval_item_expr(item, state, True, True)
1736
1737 # Validate current byte order
1738 if state.bo is None and item.len > 8:
1739 _raise_error_for_item(
1740 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1741 item.expr_str
1742 ),
1743 item,
1744 )
1745
1746 # Handle depending on type
1747 if type(val) is int:
1748 self._handle_fl_int_item(val, item, state)
1749 else:
1750 assert type(val) is float
1751 self._handle_fl_float_item(val, item, state)
1752
1753 # Update offset
1754 state.offset += item.size
1755
1756 return next_vl_instance
1757
1758 # Handles the LEB128 integer item `item`.
1759 def _handle_leb128_int_item(
1760 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1761 ):
1762 # Get the precomputed value
1763 val = self._vl_instance_vals[next_vl_instance]
1764
1765 # Size in bytes
1766 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1767
1768 # For each byte
1769 for _ in range(size):
1770 # Seven LSBs, MSB of the byte set (continue)
1771 self._data.append((val & 0x7F) | 0x80)
1772 val >>= 7
1773
1774 # Clear MSB of last byte (stop)
1775 self._data[-1] &= ~0x80
1776
1777 # Consumed this instance
1778 return next_vl_instance + 1
1779
1780 # Handles the group item `item`, only removing the immediate labels
1781 # from `state.labels` if `remove_immediate_labels` is `True`.
1782 def _handle_group_item(
1783 self,
1784 item: _Group,
1785 state: _GenState,
1786 next_vl_instance: int,
1787 remove_immediate_labels: bool = True,
1788 ):
1789 # Compute the values of the immediate (not nested) labels. Those
1790 # labels are reachable by any expression within the group.
1791 tmp_state = _GenState({}, {}, state.offset, None)
1792 immediate_label_names = set() # type: Set[str]
1793 tmp_next_vl_instance = next_vl_instance
1794
1795 for subitem in item.items:
1796 if type(subitem) is _Label:
1797 # New immediate label
1798 state.labels[subitem.name] = tmp_state.offset
1799 immediate_label_names.add(subitem.name)
1800
1801 tmp_next_vl_instance = self._dry_handle_item(
1802 subitem, tmp_state, tmp_next_vl_instance
1803 )
1804
1805 # Handle each item now with the actual state
1806 for subitem in item.items:
1807 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
1808
1809 # Remove immediate labels if required so that outer items won't
1810 # reach inner labels.
1811 if remove_immediate_labels:
1812 for name in immediate_label_names:
1813 del state.labels[name]
1814
1815 return next_vl_instance
1816
1817 # Handles the repetition item `item`.
1818 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1819 # Get the precomputed repetition count
1820 mul = self._vl_instance_vals[next_vl_instance]
1821
1822 # Consumed this instance
1823 next_vl_instance += 1
1824
1825 for _ in range(mul):
1826 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1827
1828 return next_vl_instance
1829
1830 # Handles the offset setting item `item`.
1831 def _handle_set_offset_item(
1832 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1833 ):
1834 state.offset = item.val
1835 return next_vl_instance
1836
1837 # Handles offset alignment item `item` (adds padding).
1838 def _handle_align_offset_item(
1839 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1840 ):
1841 init_offset = state.offset
1842 state.offset = self._align_offset(state.offset, item)
1843 self._data += bytes([item.pad_val] * (state.offset - init_offset))
1844 return next_vl_instance
1845
1846 # Handles the label item `item`.
1847 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1848 return next_vl_instance
1849
1850 # Handles the item `item`, returning the updated next repetition
1851 # instance.
1852 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1853 return self._item_handlers[type(item)](item, state, next_vl_instance)
1854
1855 # Generates the data (`self._data`) and final state
1856 # (`self._final_state`) from `group` and the initial state `state`.
1857 def _gen(self, group: _Group, state: _GenState):
1858 # Initial state
1859 self._data = bytearray()
1860
1861 # Item handlers
1862 self._item_handlers = {
1863 _AlignOffset: self._handle_align_offset_item,
1864 _Byte: self._handle_byte_item,
1865 _FlNum: self._handle_fl_num_item,
1866 _Group: self._handle_group_item,
1867 _Label: self._handle_label_item,
1868 _Rep: self._handle_rep_item,
1869 _SetBo: self._handle_set_bo_item,
1870 _SetOffset: self._handle_set_offset_item,
1871 _SLeb128Int: self._handle_leb128_int_item,
1872 _Str: self._handle_str_item,
1873 _ULeb128Int: self._handle_leb128_int_item,
1874 _VarAssign: self._handle_var_assign_item,
1875 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1876
1877 # Dry item handlers (only updates the state offset)
1878 self._dry_handle_item_funcs = {
1879 _AlignOffset: self._dry_handle_align_offset_item,
1880 _Byte: self._dry_handle_scalar_item,
1881 _FlNum: self._dry_handle_scalar_item,
1882 _Group: self._dry_handle_group_item,
1883 _Label: self._update_offset_noop,
1884 _Rep: self._dry_handle_rep_item,
1885 _SetBo: self._update_offset_noop,
1886 _SetOffset: self._dry_handle_set_offset_item,
1887 _SLeb128Int: self._dry_handle_leb128_int_item,
1888 _Str: self._dry_handle_scalar_item,
1889 _ULeb128Int: self._dry_handle_leb128_int_item,
1890 _VarAssign: self._update_offset_noop,
1891 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1892
1893 # Handle the group item, _not_ removing the immediate labels
1894 # because the `labels` property offers them.
1895 self._handle_group_item(group, state, 0, False)
1896
1897 # This is actually the final state
1898 self._final_state = state
1899
1900
1901 # Returns a `ParseResult` instance containing the bytes encoded by the
1902 # input string `normand`.
1903 #
1904 # `init_variables` is a dictionary of initial variable names (valid
1905 # Python names) to integral values. A variable name must not be the
1906 # reserved name `ICITTE`.
1907 #
1908 # `init_labels` is a dictionary of initial label names (valid Python
1909 # names) to integral values. A label name must not be the reserved name
1910 # `ICITTE`.
1911 #
1912 # `init_offset` is the initial offset.
1913 #
1914 # `init_byte_order` is the initial byte order.
1915 #
1916 # Raises `ParseError` on any parsing error.
1917 def parse(
1918 normand: str,
1919 init_variables: Optional[SymbolsT] = None,
1920 init_labels: Optional[SymbolsT] = None,
1921 init_offset: int = 0,
1922 init_byte_order: Optional[ByteOrder] = None,
1923 ):
1924 if init_variables is None:
1925 init_variables = {}
1926
1927 if init_labels is None:
1928 init_labels = {}
1929
1930 gen = _Gen(
1931 _Parser(normand, init_variables, init_labels).res,
1932 init_variables,
1933 init_labels,
1934 init_offset,
1935 init_byte_order,
1936 )
1937 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1938 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1939 )
1940
1941
1942 # Parses the command-line arguments.
1943 def _parse_cli_args():
1944 import argparse
1945
1946 # Build parser
1947 ap = argparse.ArgumentParser()
1948 ap.add_argument(
1949 "--offset",
1950 metavar="OFFSET",
1951 action="store",
1952 type=int,
1953 default=0,
1954 help="initial offset (positive)",
1955 )
1956 ap.add_argument(
1957 "-b",
1958 "--byte-order",
1959 metavar="BO",
1960 choices=["be", "le"],
1961 type=str,
1962 help="initial byte order (`be` or `le`)",
1963 )
1964 ap.add_argument(
1965 "--var",
1966 metavar="NAME=VAL",
1967 action="append",
1968 help="add an initial variable (may be repeated)",
1969 )
1970 ap.add_argument(
1971 "-l",
1972 "--label",
1973 metavar="NAME=VAL",
1974 action="append",
1975 help="add an initial label (may be repeated)",
1976 )
1977 ap.add_argument(
1978 "--version", action="version", version="Normand {}".format(__version__)
1979 )
1980 ap.add_argument(
1981 "path",
1982 metavar="PATH",
1983 action="store",
1984 nargs="?",
1985 help="input path (none means standard input)",
1986 )
1987
1988 # Parse
1989 return ap.parse_args()
1990
1991
1992 # Raises a command-line error with the message `msg`.
1993 def _raise_cli_error(msg: str) -> NoReturn:
1994 raise RuntimeError("Command-line error: {}".format(msg))
1995
1996
1997 # Returns a dictionary of string to integers from the list of strings
1998 # `args` containing `NAME=VAL` entries.
1999 def _dict_from_arg(args: Optional[List[str]]):
2000 d = {} # type: SymbolsT
2001
2002 if args is None:
2003 return d
2004
2005 for arg in args:
2006 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2007
2008 if m is None:
2009 _raise_cli_error("Invalid assignment {}".format(arg))
2010
2011 d[m.group(1)] = int(m.group(2))
2012
2013 return d
2014
2015
2016 # CLI entry point without exception handling.
2017 def _try_run_cli():
2018 import os.path
2019
2020 # Parse arguments
2021 args = _parse_cli_args()
2022
2023 # Read input
2024 if args.path is None:
2025 normand = sys.stdin.read()
2026 else:
2027 with open(args.path) as f:
2028 normand = f.read()
2029
2030 # Variables and labels
2031 variables = _dict_from_arg(args.var)
2032 labels = _dict_from_arg(args.label)
2033
2034 # Validate offset
2035 if args.offset < 0:
2036 _raise_cli_error("Invalid negative offset {}")
2037
2038 # Validate and set byte order
2039 bo = None # type: Optional[ByteOrder]
2040
2041 if args.byte_order is not None:
2042 if args.byte_order == "be":
2043 bo = ByteOrder.BE
2044 else:
2045 assert args.byte_order == "le"
2046 bo = ByteOrder.LE
2047
2048 # Parse
2049 try:
2050 res = parse(normand, variables, labels, args.offset, bo)
2051 except ParseError as exc:
2052 prefix = ""
2053
2054 if args.path is not None:
2055 prefix = "{}:".format(os.path.abspath(args.path))
2056
2057 _fail(
2058 "{}{}:{} - {}".format(
2059 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2060 )
2061 )
2062
2063 # Print
2064 sys.stdout.buffer.write(res.data)
2065
2066
2067 # Prints the exception message `msg` and exits with status 1.
2068 def _fail(msg: str) -> NoReturn:
2069 if not msg.endswith("."):
2070 msg += "."
2071
2072 print(msg, file=sys.stderr)
2073 sys.exit(1)
2074
2075
2076 # CLI entry point.
2077 def _run_cli():
2078 try:
2079 _try_run_cli()
2080 except Exception as exc:
2081 _fail(str(exc))
2082
2083
2084 if __name__ == "__main__":
2085 _run_cli()
This page took 0.077088 seconds and 4 git commands to generate.