95c76eba003223404969ef8cf13a705be8bf72c0
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.20.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import sys
51 import copy
52 import enum
53 import math
54 import struct
55 import typing
56 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
57
58
59 # Text location (line and column numbers).
60 class TextLocation:
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
84 def __repr__(self):
85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
86
87
88 # Any item.
89 class _Item:
90 def __init__(self, text_loc: TextLocation):
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
98
99 # Scalar item.
100 class _ScalarItem(_Item):
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108 # A repeatable item.
109 class _RepableItem:
110 pass
111
112
113 # Single byte.
114 class _Byte(_ScalarItem, _RepableItem):
115 def __init__(self, val: int, text_loc: TextLocation):
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
130
131
132 # Literal string.
133 class _LitStr(_ScalarItem, _RepableItem):
134 def __init__(self, data: bytes, text_loc: TextLocation):
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
148 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
149
150
151 # Byte order.
152 @enum.unique
153 class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
161 # Byte order setting.
162 class _SetBo(_Item):
163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
164 super().__init__(text_loc)
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
171 def __repr__(self):
172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
173
174
175 # Label.
176 class _Label(_Item):
177 def __init__(self, name: str, text_loc: TextLocation):
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
186 def __repr__(self):
187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
188
189
190 # Offset setting.
191 class _SetOffset(_Item):
192 def __init__(self, val: int, text_loc: TextLocation):
193 super().__init__(text_loc)
194 self._val = val
195
196 # Offset value (bytes).
197 @property
198 def val(self):
199 return self._val
200
201 def __repr__(self):
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205 # Offset alignment.
206 class _AlignOffset(_Item):
207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
226
227
228 # Mixin of containing an AST expression and its string.
229 class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
245 # Fill until some offset.
246 class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
268 # Variable assignment.
269 class _VarAssign(_Item, _ExprMixin):
270 def __init__(
271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
282 def __repr__(self):
283 return "_VarAssign({}, {}, {}, {})".format(
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
288 )
289
290
291 # Fixed-length number, possibly needing more than one byte.
292 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
293 def __init__(
294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
310 return "_FlNum({}, {}, {}, {})".format(
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
315 )
316
317
318 # LEB128 integer.
319 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
329 repr(self._text_loc),
330 )
331
332
333 # Unsigned LEB128 integer.
334 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338 # Signed LEB128 integer.
339 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343 # String.
344 class _Str(_Item, _RepableItem, _ExprMixin):
345 def __init__(
346 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
347 ):
348 super().__init__(text_loc)
349 _ExprMixin.__init__(self, expr_str, expr)
350 self._codec = codec
351
352 # Codec name.
353 @property
354 def codec(self):
355 return self._codec
356
357 def __repr__(self):
358 return "_Str({}, {}, {}, {})".format(
359 self.__class__.__name__,
360 repr(self._expr_str),
361 repr(self._expr),
362 repr(self._codec),
363 repr(self._text_loc),
364 )
365
366
367 # Group of items.
368 class _Group(_Item, _RepableItem):
369 def __init__(self, items: List[_Item], text_loc: TextLocation):
370 super().__init__(text_loc)
371 self._items = items
372
373 # Contained items.
374 @property
375 def items(self):
376 return self._items
377
378 def __repr__(self):
379 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
380
381
382 # Repetition item.
383 class _Rep(_Item, _ExprMixin):
384 def __init__(
385 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
386 ):
387 super().__init__(text_loc)
388 _ExprMixin.__init__(self, expr_str, expr)
389 self._item = item
390
391 # Item to repeat.
392 @property
393 def item(self):
394 return self._item
395
396 def __repr__(self):
397 return "_Rep({}, {}, {}, {})".format(
398 repr(self._item),
399 repr(self._expr_str),
400 repr(self._expr),
401 repr(self._text_loc),
402 )
403
404
405 # Conditional item.
406 class _Cond(_Item, _ExprMixin):
407 def __init__(
408 self,
409 true_item: _Item,
410 false_item: _Item,
411 expr_str: str,
412 expr: ast.Expression,
413 text_loc: TextLocation,
414 ):
415 super().__init__(text_loc)
416 _ExprMixin.__init__(self, expr_str, expr)
417 self._true_item = true_item
418 self._false_item = false_item
419
420 # Item when condition is true.
421 @property
422 def true_item(self):
423 return self._true_item
424
425 # Item when condition is false.
426 @property
427 def false_item(self):
428 return self._false_item
429
430 def __repr__(self):
431 return "_Cond({}, {}, {}, {}, {})".format(
432 repr(self._true_item),
433 repr(self._false_item),
434 repr(self._expr_str),
435 repr(self._expr),
436 repr(self._text_loc),
437 )
438
439
440 # Macro definition item.
441 class _MacroDef(_Item):
442 def __init__(
443 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
444 ):
445 super().__init__(text_loc)
446 self._name = name
447 self._param_names = param_names
448 self._group = group
449
450 # Name.
451 @property
452 def name(self):
453 return self._name
454
455 # Parameters.
456 @property
457 def param_names(self):
458 return self._param_names
459
460 # Contained items.
461 @property
462 def group(self):
463 return self._group
464
465 def __repr__(self):
466 return "_MacroDef({}, {}, {}, {})".format(
467 repr(self._name),
468 repr(self._param_names),
469 repr(self._group),
470 repr(self._text_loc),
471 )
472
473
474 # Macro expansion parameter.
475 class _MacroExpParam:
476 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
477 self._expr_str = expr_str
478 self._expr = expr
479 self._text_loc = text_loc
480
481 # Expression string.
482 @property
483 def expr_str(self):
484 return self._expr_str
485
486 # Expression.
487 @property
488 def expr(self):
489 return self._expr
490
491 # Source text location.
492 @property
493 def text_loc(self):
494 return self._text_loc
495
496 def __repr__(self):
497 return "_MacroExpParam({}, {}, {})".format(
498 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
499 )
500
501
502 # Macro expansion item.
503 class _MacroExp(_Item, _RepableItem):
504 def __init__(
505 self,
506 name: str,
507 params: List[_MacroExpParam],
508 text_loc: TextLocation,
509 ):
510 super().__init__(text_loc)
511 self._name = name
512 self._params = params
513
514 # Name.
515 @property
516 def name(self):
517 return self._name
518
519 # Parameters.
520 @property
521 def params(self):
522 return self._params
523
524 def __repr__(self):
525 return "_MacroExp({}, {}, {})".format(
526 repr(self._name),
527 repr(self._params),
528 repr(self._text_loc),
529 )
530
531
532 # A parsing error message: a string and a text location.
533 class ParseErrorMessage:
534 @classmethod
535 def _create(cls, text: str, text_loc: TextLocation):
536 self = cls.__new__(cls)
537 self._init(text, text_loc)
538 return self
539
540 def __init__(self, *args, **kwargs): # type: ignore
541 raise NotImplementedError
542
543 def _init(self, text: str, text_loc: TextLocation):
544 self._text = text
545 self._text_loc = text_loc
546
547 # Message text.
548 @property
549 def text(self):
550 return self._text
551
552 # Source text location.
553 @property
554 def text_location(self):
555 return self._text_loc
556
557
558 # A parsing error containing one or more messages (`ParseErrorMessage`).
559 class ParseError(RuntimeError):
560 @classmethod
561 def _create(cls, msg: str, text_loc: TextLocation):
562 self = cls.__new__(cls)
563 self._init(msg, text_loc)
564 return self
565
566 def __init__(self, *args, **kwargs): # type: ignore
567 raise NotImplementedError
568
569 def _init(self, msg: str, text_loc: TextLocation):
570 super().__init__(msg)
571 self._msgs = [] # type: List[ParseErrorMessage]
572 self._add_msg(msg, text_loc)
573
574 def _add_msg(self, msg: str, text_loc: TextLocation):
575 self._msgs.append(
576 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
577 msg, text_loc
578 )
579 )
580
581 # Parsing error messages.
582 #
583 # The first message is the most specific one.
584 @property
585 def messages(self):
586 return self._msgs
587
588
589 # Raises a parsing error, forwarding the parameters to the constructor.
590 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
591 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
592
593
594 # Adds a message to the parsing error `exc`.
595 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
596 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
597
598
599 # Appends a message to the parsing error `exc` and reraises it.
600 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
601 _add_error_msg(exc, msg, text_loc)
602 raise exc
603
604
605 # Returns a normalized version (so as to be parseable by int()) of
606 # the constant integer string `s`, possibly negative, dealing with
607 # any radix suffix.
608 def _norm_const_int(s: str):
609 neg = ""
610 pos = s
611
612 if s.startswith("-"):
613 neg = "-"
614 pos = s[1:]
615
616 for r in "xXoObB":
617 if pos.startswith("0" + r):
618 # Already correct
619 return s
620
621 # Try suffix
622 asm_suf_base = {
623 "h": "x",
624 "H": "x",
625 "q": "o",
626 "Q": "o",
627 "o": "o",
628 "O": "o",
629 "b": "b",
630 "B": "B",
631 }
632
633 for suf in asm_suf_base:
634 if pos[-1] == suf:
635 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
636
637 return s
638
639
640 # Encodes the string `s` using the codec `codec`, raising `ParseError`
641 # with `text_loc` on encoding error.
642 def _encode_str(s: str, codec: str, text_loc: TextLocation):
643 try:
644 return s.encode(codec)
645 except UnicodeEncodeError:
646 _raise_error(
647 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
648 )
649
650
651 # Variables dictionary type (for type hints).
652 VariablesT = Dict[str, Union[int, float, str]]
653
654
655 # Labels dictionary type (for type hints).
656 LabelsT = Dict[str, int]
657
658
659 # Common patterns.
660 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
661 _pos_const_int_pat = re.compile(
662 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
663 )
664 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
665 _const_float_pat = re.compile(
666 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
667 )
668
669
670 # Macro definition dictionary.
671 _MacroDefsT = Dict[str, _MacroDef]
672
673
674 # Normand parser.
675 #
676 # The constructor accepts a Normand input. After building, use the `res`
677 # property to get the resulting main group.
678 class _Parser:
679 # Builds a parser to parse the Normand input `normand`, parsing
680 # immediately.
681 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
682 self._normand = normand
683 self._at = 0
684 self._line_no = 1
685 self._col_no = 1
686 self._label_names = set(labels.keys())
687 self._var_names = set(variables.keys())
688 self._macro_defs = {} # type: _MacroDefsT
689 self._parse()
690
691 # Result (main group).
692 @property
693 def res(self):
694 return self._res
695
696 # Macro definitions.
697 @property
698 def macro_defs(self):
699 return self._macro_defs
700
701 # Current text location.
702 @property
703 def _text_loc(self):
704 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
705 self._line_no, self._col_no
706 )
707
708 # Returns `True` if this parser is done parsing.
709 def _is_done(self):
710 return self._at == len(self._normand)
711
712 # Returns `True` if this parser isn't done parsing.
713 def _isnt_done(self):
714 return not self._is_done()
715
716 # Raises a parse error, creating it using the message `msg` and the
717 # current text location.
718 def _raise_error(self, msg: str) -> NoReturn:
719 _raise_error(msg, self._text_loc)
720
721 # Tries to make the pattern `pat` match the current substring,
722 # returning the match object and updating `self._at`,
723 # `self._line_no`, and `self._col_no` on success.
724 def _try_parse_pat(self, pat: Pattern[str]):
725 m = pat.match(self._normand, self._at)
726
727 if m is None:
728 return
729
730 # Skip matched string
731 self._at += len(m.group(0))
732
733 # Update line number
734 self._line_no += m.group(0).count("\n")
735
736 # Update column number
737 for i in reversed(range(self._at)):
738 if self._normand[i] == "\n" or i == 0:
739 if i == 0:
740 self._col_no = self._at + 1
741 else:
742 self._col_no = self._at - i
743
744 break
745
746 # Return match object
747 return m
748
749 # Expects the pattern `pat` to match the current substring,
750 # returning the match object and updating `self._at`,
751 # `self._line_no`, and `self._col_no` on success, or raising a parse
752 # error with the message `error_msg` on error.
753 def _expect_pat(self, pat: Pattern[str], error_msg: str):
754 # Match
755 m = self._try_parse_pat(pat)
756
757 if m is None:
758 # No match: error
759 self._raise_error(error_msg)
760
761 # Return match object
762 return m
763
764 # Patterns for _skip_*()
765 _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M)
766 _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M)
767 _ws_or_syms_or_comments_pat = re.compile(
768 r"(?:[\s/\\?&:;.,[\]_=|-]|{})*".format(_comment_pat.pattern), re.M
769 )
770
771 # Skips as many whitespaces and comments as possible, but not
772 # insignificant symbol characters.
773 def _skip_ws_and_comments(self):
774 self._try_parse_pat(self._ws_or_comments_pat)
775
776 # Skips as many whitespaces, insignificant symbol characters, and
777 # comments as possible.
778 def _skip_ws_and_comments_and_syms(self):
779 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
780
781 # Pattern for _try_parse_hex_byte()
782 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
783
784 # Tries to parse a hexadecimal byte, returning a byte item on
785 # success.
786 def _try_parse_hex_byte(self):
787 begin_text_loc = self._text_loc
788
789 # Match initial nibble
790 m_high = self._try_parse_pat(self._nibble_pat)
791
792 if m_high is None:
793 # No match
794 return
795
796 # Expect another nibble
797 self._skip_ws_and_comments_and_syms()
798 m_low = self._expect_pat(
799 self._nibble_pat, "Expecting another hexadecimal nibble"
800 )
801
802 # Return item
803 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
804
805 # Patterns for _try_parse_bin_byte()
806 _bin_byte_bit_pat = re.compile(r"[01]")
807 _bin_byte_prefix_pat = re.compile(r"%+")
808
809 # Tries to parse a binary byte, returning a byte item on success.
810 def _try_parse_bin_byte(self):
811 begin_text_loc = self._text_loc
812
813 # Match prefix
814 m = self._try_parse_pat(self._bin_byte_prefix_pat)
815
816 if m is None:
817 # No match
818 return
819
820 # Expect as many bytes as there are `%` prefixes
821 items = [] # type: List[_Item]
822
823 for _ in range(len(m.group(0))):
824 self._skip_ws_and_comments_and_syms()
825 byte_text_loc = self._text_loc
826 bits = [] # type: List[str]
827
828 # Expect eight bits
829 for _ in range(8):
830 self._skip_ws_and_comments_and_syms()
831 m = self._expect_pat(
832 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
833 )
834 bits.append(m.group(0))
835
836 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
837
838 # Return item
839 if len(items) == 1:
840 return items[0]
841
842 # As group
843 return _Group(items, begin_text_loc)
844
845 # Patterns for _try_parse_dec_byte()
846 _dec_byte_prefix_pat = re.compile(r"\$")
847 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
848
849 # Tries to parse a decimal byte, returning a byte item on success.
850 def _try_parse_dec_byte(self):
851 begin_text_loc = self._text_loc
852
853 # Match prefix
854 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
855 # No match
856 return
857
858 # Expect the value
859 self._skip_ws_and_comments()
860 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
861
862 # Compute value
863 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
864
865 # Validate
866 if val < -128 or val > 255:
867 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
868
869 # Two's complement
870 val %= 256
871
872 # Return item
873 return _Byte(val, begin_text_loc)
874
875 # Tries to parse a byte, returning a byte item on success.
876 def _try_parse_byte(self):
877 # Hexadecimal
878 item = self._try_parse_hex_byte()
879
880 if item is not None:
881 return item
882
883 # Binary
884 item = self._try_parse_bin_byte()
885
886 if item is not None:
887 return item
888
889 # Decimal
890 item = self._try_parse_dec_byte()
891
892 if item is not None:
893 return item
894
895 # Strings corresponding to escape sequence characters
896 _lit_str_escape_seq_strs = {
897 "0": "\0",
898 "a": "\a",
899 "b": "\b",
900 "e": "\x1b",
901 "f": "\f",
902 "n": "\n",
903 "r": "\r",
904 "t": "\t",
905 "v": "\v",
906 "\\": "\\",
907 '"': '"',
908 }
909
910 # Patterns for _try_parse_lit_str()
911 _lit_str_prefix_suffix_pat = re.compile(r'"')
912 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
913
914 # Parses a literal string between double quotes (without an encoding
915 # prefix) and returns the resulting string.
916 def _try_parse_lit_str(self, with_prefix: bool):
917 # Match prefix if needed
918 if with_prefix:
919 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
920 # No match
921 return
922
923 # Expect literal string
924 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
925
926 # Expect end of string
927 self._expect_pat(
928 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
929 )
930
931 # Replace escape sequences
932 val = m.group(0)
933
934 for ec in '0abefnrtv"\\':
935 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
936
937 # Return string
938 return val
939
940 # Patterns for _try_parse_utf_str_encoding()
941 _str_encoding_utf_prefix_pat = re.compile(r"u")
942 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
943
944 # Tries to parse a UTF encoding specification, returning the Python
945 # codec name on success.
946 def _try_parse_utf_str_encoding(self):
947 # Match prefix
948 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
949 # No match
950 return
951
952 # Expect UTF specification
953 m = self._expect_pat(
954 self._str_encoding_utf_pat,
955 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
956 )
957
958 # Convert to codec name
959 return {
960 "8": "utf_8",
961 "16be": "utf_16_be",
962 "16le": "utf_16_le",
963 "32be": "utf_32_be",
964 "32le": "utf_32_le",
965 }[m.group(0)]
966
967 # Patterns for _try_parse_str_encoding()
968 _str_encoding_gen_prefix_pat = re.compile(r"s")
969 _str_encoding_colon_pat = re.compile(r":")
970 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
971
972 # Tries to parse a string encoding specification, returning the
973 # Python codec name on success.
974 #
975 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
976 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
977 # General prefix?
978 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
979 # Expect `:`
980 self._skip_ws_and_comments()
981 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
982
983 # Expect encoding specification
984 self._skip_ws_and_comments()
985
986 # UTF?
987 codec = self._try_parse_utf_str_encoding()
988
989 if codec is not None:
990 return codec
991
992 # Expect Latin
993 m = self._expect_pat(
994 self._str_encoding_non_utf_pat,
995 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
996 )
997 return m.group(0)
998
999 # UTF?
1000 if not req_gen_prefix:
1001 return self._try_parse_utf_str_encoding()
1002
1003 # Patterns for _try_parse_str()
1004 _lit_str_prefix_pat = re.compile(r'"')
1005 _str_prefix_pat = re.compile(r'"|\{')
1006 _str_expr_pat = re.compile(r"[^}]+")
1007 _str_expr_suffix_pat = re.compile(r"\}")
1008
1009 # Tries to parse a string, returning a literal string or string item
1010 # on success.
1011 def _try_parse_str(self):
1012 begin_text_loc = self._text_loc
1013
1014 # Encoding
1015 codec = self._try_parse_str_encoding()
1016
1017 # Match prefix (expect if there's an encoding specification)
1018 self._skip_ws_and_comments()
1019
1020 if codec is None:
1021 # No encoding: only a literal string (UTF-8) is legal
1022 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1023
1024 if m_prefix is None:
1025 return
1026 else:
1027 # Encoding present: expect a string prefix
1028 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1029
1030 # Literal string or expression?
1031 prefix = m_prefix.group(0)
1032
1033 if prefix == '"':
1034 # Expect literal string
1035 str_text_loc = self._text_loc
1036 val = self._try_parse_lit_str(False)
1037
1038 if val is None:
1039 self._raise_error("Expecting a literal string")
1040
1041 # Encode string
1042 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1043
1044 # Return item
1045 return _LitStr(data, begin_text_loc)
1046 else:
1047 # Expect expression
1048 self._skip_ws_and_comments()
1049 expr_text_loc = self._text_loc
1050 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1051
1052 # Expect `}`
1053 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1054
1055 # Create an expression node from the expression string
1056 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1057
1058 # Return item
1059 assert codec is not None
1060 return _Str(expr_str, expr, codec, begin_text_loc)
1061
1062 # Common right parenthesis pattern
1063 _right_paren_pat = re.compile(r"\)")
1064
1065 # Patterns for _try_parse_group()
1066 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
1067
1068 # Tries to parse a group, returning a group item on success.
1069 def _try_parse_group(self):
1070 begin_text_loc = self._text_loc
1071
1072 # Match prefix
1073 m_open = self._try_parse_pat(self._group_prefix_pat)
1074
1075 if m_open is None:
1076 # No match
1077 return
1078
1079 # Parse items
1080 items = self._parse_items()
1081
1082 # Expect end of group
1083 self._skip_ws_and_comments_and_syms()
1084
1085 if m_open.group(0) == "(":
1086 pat = self._right_paren_pat
1087 exp = ")"
1088 else:
1089 pat = self._block_end_pat
1090 exp = "!end"
1091
1092 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
1093
1094 # Return item
1095 return _Group(items, begin_text_loc)
1096
1097 # Returns a stripped expression string and an AST expression node
1098 # from the expression string `expr_str` at text location `text_loc`.
1099 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
1100 # Create an expression node from the expression string
1101 expr_str = expr_str.strip().replace("\n", " ")
1102
1103 try:
1104 expr = ast.parse(expr_str, mode="eval")
1105 except SyntaxError:
1106 _raise_error(
1107 "Invalid expression `{}`: invalid syntax".format(expr_str),
1108 text_loc,
1109 )
1110
1111 return expr_str, expr
1112
1113 # Patterns for _try_parse_val()
1114 _val_expr_pat = re.compile(r"([^}:]+):\s*")
1115 _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
1116 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
1117
1118 # Tries to parse a value (number or string) and format (fixed length
1119 # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
1120 # name), returning an item on success.
1121 def _try_parse_val(self):
1122 begin_text_loc = self._text_loc
1123
1124 # Match
1125 m_expr = self._try_parse_pat(self._val_expr_pat)
1126
1127 if m_expr is None:
1128 # No match
1129 return
1130
1131 # Create an expression node from the expression string
1132 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
1133
1134 # Fixed length?
1135 self._skip_ws_and_comments()
1136 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
1137
1138 if m_fmt is None:
1139 # LEB128?
1140 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
1141
1142 if m_fmt is None:
1143 # String encoding?
1144 codec = self._try_parse_str_encoding(True)
1145
1146 if codec is None:
1147 # At this point it's invalid
1148 self._raise_error(
1149 "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1150 )
1151 else:
1152 # Return string item
1153 return _Str(expr_str, expr, codec, begin_text_loc)
1154
1155 # Return LEB128 integer item
1156 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
1157 return cls(expr_str, expr, begin_text_loc)
1158 else:
1159 # Return fixed-length number item
1160 return _FlNum(
1161 expr_str,
1162 expr,
1163 int(m_fmt.group(0)),
1164 begin_text_loc,
1165 )
1166
1167 # Patterns for _try_parse_var_assign()
1168 _var_assign_name_equal_pat = re.compile(
1169 r"({})\s*=(?!=)".format(_py_name_pat.pattern)
1170 )
1171 _var_assign_expr_pat = re.compile(r"[^}]+")
1172
1173 # Tries to parse a variable assignment, returning a variable
1174 # assignment item on success.
1175 def _try_parse_var_assign(self):
1176 begin_text_loc = self._text_loc
1177
1178 # Match
1179 m = self._try_parse_pat(self._var_assign_name_equal_pat)
1180
1181 if m is None:
1182 # No match
1183 return
1184
1185 # Validate name
1186 name = m.group(1)
1187
1188 if name == _icitte_name:
1189 _raise_error(
1190 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1191 )
1192
1193 if name in self._label_names:
1194 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
1195
1196 # Expect an expression
1197 self._skip_ws_and_comments()
1198 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
1199
1200 # Create an expression node from the expression string
1201 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1202
1203 # Add to known variable names
1204 self._var_names.add(name)
1205
1206 # Return item
1207 return _VarAssign(
1208 name,
1209 expr_str,
1210 expr,
1211 begin_text_loc,
1212 )
1213
1214 # Pattern for _try_parse_set_bo()
1215 _bo_pat = re.compile(r"[bl]e")
1216
1217 # Tries to parse a byte order name, returning a byte order setting
1218 # item on success.
1219 def _try_parse_set_bo(self):
1220 begin_text_loc = self._text_loc
1221
1222 # Match
1223 m = self._try_parse_pat(self._bo_pat)
1224
1225 if m is None:
1226 # No match
1227 return
1228
1229 # Return corresponding item
1230 if m.group(0) == "be":
1231 return _SetBo(ByteOrder.BE, begin_text_loc)
1232 else:
1233 assert m.group(0) == "le"
1234 return _SetBo(ByteOrder.LE, begin_text_loc)
1235
1236 # Patterns for _try_parse_val_or_bo()
1237 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1238 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
1239
1240 # Tries to parse a value, a variable assignment, or a byte order
1241 # setting, returning an item on success.
1242 def _try_parse_val_or_var_assign_or_set_bo(self):
1243 # Match prefix
1244 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
1245 # No match
1246 return
1247
1248 self._skip_ws_and_comments()
1249
1250 # Variable assignment item?
1251 item = self._try_parse_var_assign()
1252
1253 if item is None:
1254 # Value item?
1255 item = self._try_parse_val()
1256
1257 if item is None:
1258 # Byte order setting item?
1259 item = self._try_parse_set_bo()
1260
1261 if item is None:
1262 # At this point it's invalid
1263 self._raise_error(
1264 "Expecting a fixed-length number, a string, a variable assignment, or a byte order setting"
1265 )
1266
1267 # Expect suffix
1268 self._skip_ws_and_comments()
1269 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
1270 return item
1271
1272 # Tries to parse an offset setting value (after the initial `<`),
1273 # returning an offset item on success.
1274 def _try_parse_set_offset_val(self):
1275 begin_text_loc = self._text_loc
1276
1277 # Match
1278 m = self._try_parse_pat(_pos_const_int_pat)
1279
1280 if m is None:
1281 # No match
1282 return
1283
1284 # Return item
1285 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
1286
1287 # Tries to parse a label name (after the initial `<`), returning a
1288 # label item on success.
1289 def _try_parse_label_name(self):
1290 begin_text_loc = self._text_loc
1291
1292 # Match
1293 m = self._try_parse_pat(_py_name_pat)
1294
1295 if m is None:
1296 # No match
1297 return
1298
1299 # Validate
1300 name = m.group(0)
1301
1302 if name == _icitte_name:
1303 _raise_error(
1304 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1305 )
1306
1307 if name in self._label_names:
1308 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1309
1310 if name in self._var_names:
1311 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1312
1313 # Add to known label names
1314 self._label_names.add(name)
1315
1316 # Return item
1317 return _Label(name, begin_text_loc)
1318
1319 # Patterns for _try_parse_label_or_set_offset()
1320 _label_set_offset_prefix_pat = re.compile(r"<")
1321 _label_set_offset_suffix_pat = re.compile(r">")
1322
1323 # Tries to parse a label or an offset setting, returning an item on
1324 # success.
1325 def _try_parse_label_or_set_offset(self):
1326 # Match prefix
1327 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1328 # No match
1329 return
1330
1331 # Offset setting item?
1332 self._skip_ws_and_comments()
1333 item = self._try_parse_set_offset_val()
1334
1335 if item is None:
1336 # Label item?
1337 item = self._try_parse_label_name()
1338
1339 if item is None:
1340 # At this point it's invalid
1341 self._raise_error("Expecting a label name or an offset setting value")
1342
1343 # Expect suffix
1344 self._skip_ws_and_comments()
1345 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1346 return item
1347
1348 # Pattern for _parse_pad_val()
1349 _pad_val_prefix_pat = re.compile(r"~")
1350
1351 # Tries to parse a padding value, returning the padding value, or 0
1352 # if none.
1353 def _parse_pad_val(self):
1354 # Padding value?
1355 self._skip_ws_and_comments()
1356 pad_val = 0
1357
1358 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1359 self._skip_ws_and_comments()
1360 pad_val_text_loc = self._text_loc
1361 m = self._expect_pat(
1362 _pos_const_int_pat,
1363 "Expecting a positive constant integer (byte value)",
1364 )
1365
1366 # Validate
1367 pad_val = int(_norm_const_int(m.group(0)), 0)
1368
1369 if pad_val > 255:
1370 _raise_error(
1371 "Invalid padding byte value {}".format(pad_val),
1372 pad_val_text_loc,
1373 )
1374
1375 return pad_val
1376
1377 # Patterns for _try_parse_align_offset()
1378 _align_offset_prefix_pat = re.compile(r"@")
1379 _align_offset_val_pat = re.compile(r"\d+")
1380
1381 # Tries to parse an offset alignment, returning an offset alignment
1382 # item on success.
1383 def _try_parse_align_offset(self):
1384 begin_text_loc = self._text_loc
1385
1386 # Match prefix
1387 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1388 # No match
1389 return
1390
1391 # Expect an alignment
1392 self._skip_ws_and_comments()
1393 align_text_loc = self._text_loc
1394 m = self._expect_pat(
1395 self._align_offset_val_pat,
1396 "Expecting an alignment (positive multiple of eight bits)",
1397 )
1398
1399 # Validate alignment
1400 val = int(m.group(0))
1401
1402 if val <= 0 or (val % 8) != 0:
1403 _raise_error(
1404 "Invalid alignment value {} (not a positive multiple of eight)".format(
1405 val
1406 ),
1407 align_text_loc,
1408 )
1409
1410 # Padding value
1411 pad_val = self._parse_pad_val()
1412
1413 # Return item
1414 return _AlignOffset(val, pad_val, begin_text_loc)
1415
1416 # Patterns for _expect_expr()
1417 _inner_expr_prefix_pat = re.compile(r"\{")
1418 _inner_expr_pat = re.compile(r"[^}]+")
1419 _inner_expr_suffix_pat = re.compile(r"\}")
1420
1421 # Parses an expression outside a `{`/`}` context.
1422 #
1423 # This function accepts:
1424 #
1425 # • A Python expression within `{` and `}`.
1426 #
1427 # • A Python name.
1428 #
1429 # • If `accept_const_int` is `True`: a constant integer, which may
1430 # be negative if `allow_neg_int` is `True`.
1431 #
1432 # • If `accept_float` is `True`: a constant floating point number.
1433 #
1434 # Returns the stripped expression string and AST expression.
1435 def _expect_expr(
1436 self,
1437 accept_const_int: bool = False,
1438 allow_neg_int: bool = False,
1439 accept_const_float: bool = False,
1440 accept_lit_str: bool = False,
1441 ):
1442 begin_text_loc = self._text_loc
1443
1444 # Constant floating point number?
1445 if accept_const_float:
1446 m = self._try_parse_pat(_const_float_pat)
1447
1448 if m is not None:
1449 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1450
1451 # Constant integer?
1452 if accept_const_int:
1453 m = self._try_parse_pat(_const_int_pat)
1454
1455 if m is not None:
1456 # Negative and allowed?
1457 if m.group("neg") == "-" and not allow_neg_int:
1458 _raise_error(
1459 "Expecting a positive constant integer", begin_text_loc
1460 )
1461
1462 expr_str = _norm_const_int(m.group(0))
1463 return self._ast_expr_from_str(expr_str, begin_text_loc)
1464
1465 # Name?
1466 m = self._try_parse_pat(_py_name_pat)
1467
1468 if m is not None:
1469 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1470
1471 # Literal string
1472 if accept_lit_str:
1473 val = self._try_parse_lit_str(True)
1474
1475 if val is not None:
1476 return self._ast_expr_from_str(repr(val), begin_text_loc)
1477
1478 # Expect `{`
1479 msg_accepted_parts = ["a name", "or `{`"]
1480
1481 if accept_lit_str:
1482 msg_accepted_parts.insert(0, "a literal string")
1483
1484 if accept_const_float:
1485 msg_accepted_parts.insert(0, "a constant floating point number")
1486
1487 if accept_const_int:
1488 msg_pos = "" if allow_neg_int else "positive "
1489 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1490
1491 if len(msg_accepted_parts) == 2:
1492 msg_accepted = " ".join(msg_accepted_parts)
1493 else:
1494 msg_accepted = ", ".join(msg_accepted_parts)
1495
1496 self._expect_pat(
1497 self._inner_expr_prefix_pat,
1498 "Expecting {}".format(msg_accepted),
1499 )
1500
1501 # Expect an expression
1502 self._skip_ws_and_comments()
1503 expr_text_loc = self._text_loc
1504 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1505 expr_str = m.group(0)
1506
1507 # Expect `}`
1508 self._skip_ws_and_comments()
1509 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1510
1511 return self._ast_expr_from_str(expr_str, expr_text_loc)
1512
1513 # Patterns for _try_parse_fill_until()
1514 _fill_until_prefix_pat = re.compile(r"\+")
1515 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1516
1517 # Tries to parse a filling, returning a filling item on success.
1518 def _try_parse_fill_until(self):
1519 begin_text_loc = self._text_loc
1520
1521 # Match prefix
1522 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1523 # No match
1524 return
1525
1526 # Expect expression
1527 self._skip_ws_and_comments()
1528 expr_str, expr = self._expect_expr(accept_const_int=True)
1529
1530 # Padding value
1531 pad_val = self._parse_pad_val()
1532
1533 # Return item
1534 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1535
1536 # Parses the multiplier expression of a repetition (block or
1537 # post-item) and returns the expression string and AST node.
1538 def _expect_rep_mul_expr(self):
1539 return self._expect_expr(accept_const_int=True)
1540
1541 # Common block end pattern
1542 _block_end_pat = re.compile(r"!end\b")
1543
1544 # Pattern for _try_parse_rep_block()
1545 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1546
1547 # Tries to parse a repetition block, returning a repetition item on
1548 # success.
1549 def _try_parse_rep_block(self):
1550 begin_text_loc = self._text_loc
1551
1552 # Match prefix
1553 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1554 # No match
1555 return
1556
1557 # Expect expression
1558 self._skip_ws_and_comments()
1559 expr_str, expr = self._expect_rep_mul_expr()
1560
1561 # Parse items
1562 self._skip_ws_and_comments_and_syms()
1563 items_text_loc = self._text_loc
1564 items = self._parse_items()
1565
1566 # Expect end of block
1567 self._skip_ws_and_comments_and_syms()
1568 self._expect_pat(
1569 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1570 )
1571
1572 # Return item
1573 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1574
1575 # Pattern for _try_parse_cond_block()
1576 _cond_block_prefix_pat = re.compile(r"!if\b")
1577 _cond_block_else_pat = re.compile(r"!else\b")
1578
1579 # Tries to parse a conditional block, returning a conditional item
1580 # on success.
1581 def _try_parse_cond_block(self):
1582 begin_text_loc = self._text_loc
1583
1584 # Match prefix
1585 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1586 # No match
1587 return
1588
1589 # Expect expression
1590 self._skip_ws_and_comments()
1591 expr_str, expr = self._expect_expr()
1592
1593 # Parse "true" items
1594 self._skip_ws_and_comments_and_syms()
1595 true_items_text_loc = self._text_loc
1596 true_items = self._parse_items()
1597 false_items = [] # type: List[_Item]
1598 false_items_text_loc = begin_text_loc
1599
1600 # `!else`?
1601 self._skip_ws_and_comments_and_syms()
1602
1603 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1604 # Parse "false" items
1605 self._skip_ws_and_comments_and_syms()
1606 false_items_text_loc = self._text_loc
1607 false_items = self._parse_items()
1608
1609 # Expect end of block
1610 self._expect_pat(
1611 self._block_end_pat,
1612 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1613 )
1614
1615 # Return item
1616 return _Cond(
1617 _Group(true_items, true_items_text_loc),
1618 _Group(false_items, false_items_text_loc),
1619 expr_str,
1620 expr,
1621 begin_text_loc,
1622 )
1623
1624 # Common left parenthesis pattern
1625 _left_paren_pat = re.compile(r"\(")
1626
1627 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1628 _macro_params_comma_pat = re.compile(",")
1629
1630 # Patterns for _try_parse_macro_def()
1631 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1632
1633 # Tries to parse a macro definition, adding it to `self._macro_defs`
1634 # and returning `True` on success.
1635 def _try_parse_macro_def(self):
1636 begin_text_loc = self._text_loc
1637
1638 # Match prefix
1639 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1640 # No match
1641 return False
1642
1643 # Expect a name
1644 self._skip_ws_and_comments()
1645 name_text_loc = self._text_loc
1646 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1647
1648 # Validate name
1649 name = m.group(0)
1650
1651 if name in self._macro_defs:
1652 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1653
1654 # Expect `(`
1655 self._skip_ws_and_comments()
1656 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1657
1658 # Try to parse comma-separated parameter names
1659 param_names = [] # type: List[str]
1660 expect_comma = False
1661
1662 while True:
1663 self._skip_ws_and_comments()
1664
1665 # End?
1666 if self._try_parse_pat(self._right_paren_pat) is not None:
1667 # End
1668 break
1669
1670 # Comma?
1671 if expect_comma:
1672 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1673
1674 # Expect parameter name
1675 self._skip_ws_and_comments()
1676 param_text_loc = self._text_loc
1677 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1678
1679 if m.group(0) in param_names:
1680 _raise_error(
1681 "Duplicate macro parameter named `{}`".format(m.group(0)),
1682 param_text_loc,
1683 )
1684
1685 param_names.append(m.group(0))
1686 expect_comma = True
1687
1688 # Expect items
1689 self._skip_ws_and_comments_and_syms()
1690 items_text_loc = self._text_loc
1691 old_var_names = self._var_names.copy()
1692 old_label_names = self._label_names.copy()
1693 self._var_names = set() # type: Set[str]
1694 self._label_names = set() # type: Set[str]
1695 items = self._parse_items()
1696 self._var_names = old_var_names
1697 self._label_names = old_label_names
1698
1699 # Expect suffix
1700 self._expect_pat(
1701 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1702 )
1703
1704 # Register macro
1705 self._macro_defs[name] = _MacroDef(
1706 name, param_names, _Group(items, items_text_loc), begin_text_loc
1707 )
1708
1709 return True
1710
1711 # Patterns for _try_parse_macro_exp()
1712 _macro_exp_prefix_pat = re.compile(r"m\b")
1713 _macro_exp_colon_pat = re.compile(r":")
1714
1715 # Tries to parse a macro expansion, returning a macro expansion item
1716 # on success.
1717 def _try_parse_macro_exp(self):
1718 begin_text_loc = self._text_loc
1719
1720 # Match prefix
1721 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1722 # No match
1723 return
1724
1725 # Expect `:`
1726 self._skip_ws_and_comments()
1727 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1728
1729 # Expect a macro name
1730 self._skip_ws_and_comments()
1731 name_text_loc = self._text_loc
1732 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1733
1734 # Validate name
1735 name = m.group(0)
1736 macro_def = self._macro_defs.get(name)
1737
1738 if macro_def is None:
1739 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1740
1741 # Expect `(`
1742 self._skip_ws_and_comments()
1743 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1744
1745 # Try to parse comma-separated parameter values
1746 params_text_loc = self._text_loc
1747 params = [] # type: List[_MacroExpParam]
1748 expect_comma = False
1749
1750 while True:
1751 self._skip_ws_and_comments()
1752
1753 # End?
1754 if self._try_parse_pat(self._right_paren_pat) is not None:
1755 # End
1756 break
1757
1758 # Expect a value
1759 if expect_comma:
1760 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1761
1762 self._skip_ws_and_comments()
1763 param_text_loc = self._text_loc
1764 params.append(
1765 _MacroExpParam(
1766 *self._expect_expr(
1767 accept_const_int=True,
1768 allow_neg_int=True,
1769 accept_const_float=True,
1770 accept_lit_str=True,
1771 ),
1772 text_loc=param_text_loc
1773 )
1774 )
1775 expect_comma = True
1776
1777 # Validate parameter values
1778 if len(params) != len(macro_def.param_names):
1779 sing_plur = "" if len(params) == 1 else "s"
1780 _raise_error(
1781 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1782 len(params), sing_plur, len(macro_def.param_names)
1783 ),
1784 params_text_loc,
1785 )
1786
1787 # Return item
1788 return _MacroExp(name, params, begin_text_loc)
1789
1790 # Tries to parse a base item (anything except a repetition),
1791 # returning it on success.
1792 def _try_parse_base_item(self):
1793 # Byte item?
1794 item = self._try_parse_byte()
1795
1796 if item is not None:
1797 return item
1798
1799 # String item?
1800 item = self._try_parse_str()
1801
1802 if item is not None:
1803 return item
1804
1805 # Value, variable assignment, or byte order setting item?
1806 item = self._try_parse_val_or_var_assign_or_set_bo()
1807
1808 if item is not None:
1809 return item
1810
1811 # Label or offset setting item?
1812 item = self._try_parse_label_or_set_offset()
1813
1814 if item is not None:
1815 return item
1816
1817 # Offset alignment item?
1818 item = self._try_parse_align_offset()
1819
1820 if item is not None:
1821 return item
1822
1823 # Filling item?
1824 item = self._try_parse_fill_until()
1825
1826 if item is not None:
1827 return item
1828
1829 # Group item?
1830 item = self._try_parse_group()
1831
1832 if item is not None:
1833 return item
1834
1835 # Repetition block item?
1836 item = self._try_parse_rep_block()
1837
1838 if item is not None:
1839 return item
1840
1841 # Conditional block item?
1842 item = self._try_parse_cond_block()
1843
1844 if item is not None:
1845 return item
1846
1847 # Macro expansion?
1848 item = self._try_parse_macro_exp()
1849
1850 if item is not None:
1851 return item
1852
1853 # Pattern for _try_parse_rep_post()
1854 _rep_post_prefix_pat = re.compile(r"\*")
1855
1856 # Tries to parse a post-item repetition, returning the expression
1857 # string and AST expression node on success.
1858 def _try_parse_rep_post(self):
1859 # Match prefix
1860 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1861 # No match
1862 return
1863
1864 # Return expression string and AST expression
1865 self._skip_ws_and_comments()
1866 return self._expect_rep_mul_expr()
1867
1868 # Tries to parse an item, possibly followed by a repetition,
1869 # returning `True` on success.
1870 #
1871 # Appends any parsed item to `items`.
1872 def _try_append_item(self, items: List[_Item]):
1873 self._skip_ws_and_comments_and_syms()
1874
1875 # Base item
1876 item = self._try_parse_base_item()
1877
1878 if item is None:
1879 return
1880
1881 # Parse repetition if the base item is repeatable
1882 if isinstance(item, _RepableItem):
1883 self._skip_ws_and_comments()
1884 rep_text_loc = self._text_loc
1885 rep_ret = self._try_parse_rep_post()
1886
1887 if rep_ret is not None:
1888 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
1889
1890 items.append(item)
1891 return True
1892
1893 # Parses and returns items, skipping whitespaces, insignificant
1894 # symbols, and comments when allowed, and stopping at the first
1895 # unknown character.
1896 #
1897 # Accepts and registers macro definitions if `accept_macro_defs`
1898 # is `True`.
1899 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1900 items = [] # type: List[_Item]
1901
1902 while self._isnt_done():
1903 # Try to append item
1904 if not self._try_append_item(items):
1905 if accept_macro_defs and self._try_parse_macro_def():
1906 continue
1907
1908 # Unknown at this point
1909 break
1910
1911 return items
1912
1913 # Parses the whole Normand input, setting `self._res` to the main
1914 # group item on success.
1915 def _parse(self):
1916 if len(self._normand.strip()) == 0:
1917 # Special case to make sure there's something to consume
1918 self._res = _Group([], self._text_loc)
1919 return
1920
1921 # Parse first level items
1922 items = self._parse_items(True)
1923
1924 # Make sure there's nothing left
1925 self._skip_ws_and_comments_and_syms()
1926
1927 if self._isnt_done():
1928 self._raise_error(
1929 "Unexpected character `{}`".format(self._normand[self._at])
1930 )
1931
1932 # Set main group item
1933 self._res = _Group(items, self._text_loc)
1934
1935
1936 # The return type of parse().
1937 class ParseResult:
1938 @classmethod
1939 def _create(
1940 cls,
1941 data: bytearray,
1942 variables: VariablesT,
1943 labels: LabelsT,
1944 offset: int,
1945 bo: Optional[ByteOrder],
1946 ):
1947 self = cls.__new__(cls)
1948 self._init(data, variables, labels, offset, bo)
1949 return self
1950
1951 def __init__(self, *args, **kwargs): # type: ignore
1952 raise NotImplementedError
1953
1954 def _init(
1955 self,
1956 data: bytearray,
1957 variables: VariablesT,
1958 labels: LabelsT,
1959 offset: int,
1960 bo: Optional[ByteOrder],
1961 ):
1962 self._data = data
1963 self._vars = variables
1964 self._labels = labels
1965 self._offset = offset
1966 self._bo = bo
1967
1968 # Generated data.
1969 @property
1970 def data(self):
1971 return self._data
1972
1973 # Dictionary of updated variable names to their last computed value.
1974 @property
1975 def variables(self):
1976 return self._vars
1977
1978 # Dictionary of updated main group label names to their computed
1979 # value.
1980 @property
1981 def labels(self):
1982 return self._labels
1983
1984 # Updated offset.
1985 @property
1986 def offset(self):
1987 return self._offset
1988
1989 # Updated byte order.
1990 @property
1991 def byte_order(self):
1992 return self._bo
1993
1994
1995 # Raises a parse error for the item `item`, creating it using the
1996 # message `msg`.
1997 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1998 _raise_error(msg, item.text_loc)
1999
2000
2001 # The `ICITTE` reserved name.
2002 _icitte_name = "ICITTE"
2003
2004
2005 # Base node visitor.
2006 #
2007 # Calls the _visit_name() method for each name node which isn't the name
2008 # of a call.
2009 class _NodeVisitor(ast.NodeVisitor):
2010 def __init__(self):
2011 self._parent_is_call = False
2012
2013 def generic_visit(self, node: ast.AST):
2014 if type(node) is ast.Call:
2015 self._parent_is_call = True
2016 elif type(node) is ast.Name and not self._parent_is_call:
2017 self._visit_name(node.id)
2018
2019 super().generic_visit(node)
2020 self._parent_is_call = False
2021
2022 @abc.abstractmethod
2023 def _visit_name(self, name: str):
2024 ...
2025
2026
2027 # Expression validator: validates that all the names within the
2028 # expression are allowed.
2029 class _ExprValidator(_NodeVisitor):
2030 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2031 super().__init__()
2032 self._expr_str = expr_str
2033 self._text_loc = text_loc
2034 self._allowed_names = allowed_names
2035
2036 def _visit_name(self, name: str):
2037 # Make sure the name refers to a known and reachable
2038 # variable/label name.
2039 if name != _icitte_name and name not in self._allowed_names:
2040 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
2041 name, self._expr_str
2042 )
2043
2044 allowed_names = self._allowed_names.copy()
2045 allowed_names.add(_icitte_name)
2046
2047 if len(allowed_names) > 0:
2048 allowed_names_str = ", ".join(
2049 sorted(["`{}`".format(name) for name in allowed_names])
2050 )
2051 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2052
2053 _raise_error(
2054 msg,
2055 self._text_loc,
2056 )
2057
2058
2059 # Generator state.
2060 class _GenState:
2061 def __init__(
2062 self,
2063 variables: VariablesT,
2064 labels: LabelsT,
2065 offset: int,
2066 bo: Optional[ByteOrder],
2067 ):
2068 self.variables = variables.copy()
2069 self.labels = labels.copy()
2070 self.offset = offset
2071 self.bo = bo
2072
2073 def __repr__(self):
2074 return "_GenState({}, {}, {}, {})".format(
2075 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2076 )
2077
2078
2079 # Fixed-length number item instance.
2080 class _FlNumItemInst:
2081 def __init__(
2082 self,
2083 item: _FlNum,
2084 offset_in_data: int,
2085 state: _GenState,
2086 parse_error_msgs: List[ParseErrorMessage],
2087 ):
2088 self._item = item
2089 self._offset_in_data = offset_in_data
2090 self._state = state
2091 self._parse_error_msgs = parse_error_msgs
2092
2093 @property
2094 def item(self):
2095 return self._item
2096
2097 @property
2098 def offset_in_data(self):
2099 return self._offset_in_data
2100
2101 @property
2102 def state(self):
2103 return self._state
2104
2105 @property
2106 def parse_error_msgs(self):
2107 return self._parse_error_msgs
2108
2109
2110 # Generator of data and final state from a group item.
2111 #
2112 # Generation happens in memory at construction time. After building, use
2113 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2114 # get the resulting context.
2115 #
2116 # The steps of generation are:
2117 #
2118 # 1. Handle each item in prefix order.
2119 #
2120 # The handlers append bytes to `self._data` and update some current
2121 # state object (`_GenState` instance).
2122 #
2123 # When handling a fixed-length number item, try to evaluate its
2124 # expression using the current state. If this fails, then it might be
2125 # because the expression refers to a "future" label: save the current
2126 # offset in `self._data` (generated data) and a snapshot of the
2127 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
2128 # object). _gen_fl_num_item_insts() will deal with this later. A
2129 # `_FlNumItemInst` instance also contains a snapshot of the current
2130 # parsing error messages (`self._parse_error_msgs`) which need to be
2131 # taken into account when handling the instance later.
2132 #
2133 # When handling the items of a group, keep a map of immediate label
2134 # names to their offset. Then, after having processed all the items,
2135 # update the relevant saved state snapshots in
2136 # `self._fl_num_item_insts` with those immediate label values.
2137 # _gen_fl_num_item_insts() will deal with this later.
2138 #
2139 # 2. Handle all the fixed-length number item instances of which the
2140 # expression evaluation failed before.
2141 #
2142 # At this point, `self._fl_num_item_insts` contains everything that's
2143 # needed to evaluate the expressions, including the values of
2144 # "future" labels from the point of view of some fixed-length number
2145 # item instance.
2146 #
2147 # If an evaluation fails at this point, then it's a user error. Add
2148 # to the parsing error all the saved parsing error messages of the
2149 # instance. Those additional messages add precious context to the
2150 # error.
2151 class _Gen:
2152 def __init__(
2153 self,
2154 group: _Group,
2155 macro_defs: _MacroDefsT,
2156 variables: VariablesT,
2157 labels: LabelsT,
2158 offset: int,
2159 bo: Optional[ByteOrder],
2160 ):
2161 self._macro_defs = macro_defs
2162 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2163 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2164 self._gen(group, _GenState(variables, labels, offset, bo))
2165
2166 # Generated bytes.
2167 @property
2168 def data(self):
2169 return self._data
2170
2171 # Updated variables.
2172 @property
2173 def variables(self):
2174 return self._final_state.variables
2175
2176 # Updated main group labels.
2177 @property
2178 def labels(self):
2179 return self._final_state.labels
2180
2181 # Updated offset.
2182 @property
2183 def offset(self):
2184 return self._final_state.offset
2185
2186 # Updated byte order.
2187 @property
2188 def bo(self):
2189 return self._final_state.bo
2190
2191 # Evaluates the expression `expr` of which the original string is
2192 # `expr_str` at the location `text_loc` considering the current
2193 # generation state `state`.
2194 #
2195 # If `accept_float` is `True`, then the type of the result may be
2196 # `float` too.
2197 #
2198 # If `accept_str` is `True`, then the type of the result may be
2199 # `str` too.
2200 @staticmethod
2201 def _eval_expr(
2202 expr_str: str,
2203 expr: ast.Expression,
2204 text_loc: TextLocation,
2205 state: _GenState,
2206 accept_float: bool = False,
2207 accept_str: bool = False,
2208 ):
2209 syms = {} # type: VariablesT
2210 syms.update(state.labels)
2211
2212 # Set the `ICITTE` name to the current offset
2213 syms[_icitte_name] = state.offset
2214
2215 # Add the current variables
2216 syms.update(state.variables)
2217
2218 # Validate the node and its children
2219 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2220
2221 # Compile and evaluate expression node
2222 try:
2223 val = eval(compile(expr, "", "eval"), None, syms)
2224 except Exception as exc:
2225 _raise_error(
2226 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2227 text_loc,
2228 )
2229
2230 # Convert `bool` result type to `int` to normalize
2231 if type(val) is bool:
2232 val = int(val)
2233
2234 # Validate result type
2235 expected_types = {int} # type: Set[type]
2236
2237 if accept_float:
2238 expected_types.add(float)
2239
2240 if accept_str:
2241 expected_types.add(str)
2242
2243 if type(val) not in expected_types:
2244 expected_types_str = sorted(
2245 ["`{}`".format(t.__name__) for t in expected_types]
2246 )
2247
2248 if len(expected_types_str) == 1:
2249 msg_expected = expected_types_str[0]
2250 elif len(expected_types_str) == 2:
2251 msg_expected = " or ".join(expected_types_str)
2252 else:
2253 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2254 msg_expected = ", ".join(expected_types_str)
2255
2256 _raise_error(
2257 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2258 expr_str, msg_expected, type(val).__name__
2259 ),
2260 text_loc,
2261 )
2262
2263 return val
2264
2265 # Forwards to _eval_expr() with the expression and text location of
2266 # `item`.
2267 @staticmethod
2268 def _eval_item_expr(
2269 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
2270 state: _GenState,
2271 accept_float: bool = False,
2272 accept_str: bool = False,
2273 ):
2274 return _Gen._eval_expr(
2275 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
2276 )
2277
2278 # Handles the byte item `item`.
2279 def _handle_byte_item(self, item: _Byte, state: _GenState):
2280 self._data.append(item.val)
2281 state.offset += item.size
2282
2283 # Handles the literal string item `item`.
2284 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
2285 self._data += item.data
2286 state.offset += item.size
2287
2288 # Handles the byte order setting item `item`.
2289 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2290 # Update current byte order
2291 state.bo = item.bo
2292
2293 # Handles the variable assignment item `item`.
2294 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2295 # Update variable
2296 state.variables[item.name] = self._eval_item_expr(
2297 item, state, accept_float=True, accept_str=True
2298 )
2299
2300 # Handles the fixed-length number item `item`.
2301 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2302 # Validate current byte order
2303 if state.bo is None and item.len > 8:
2304 _raise_error_for_item(
2305 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2306 item.expr_str
2307 ),
2308 item,
2309 )
2310
2311 # Try an immediate evaluation. If it fails, then keep everything
2312 # needed to (try to) generate the bytes of this item later.
2313 try:
2314 data = self._gen_fl_num_item_inst_data(item, state)
2315 except Exception:
2316 self._fl_num_item_insts.append(
2317 _FlNumItemInst(
2318 item,
2319 len(self._data),
2320 copy.deepcopy(state),
2321 copy.deepcopy(self._parse_error_msgs),
2322 )
2323 )
2324
2325 # Reserve space in `self._data` for this instance
2326 data = bytes([0] * (item.len // 8))
2327
2328 # Append bytes
2329 self._data += data
2330
2331 # Update offset
2332 state.offset += len(data)
2333
2334 # Returns the size, in bytes, required to encode the value `val`
2335 # with LEB128 (signed version if `is_signed` is `True`).
2336 @staticmethod
2337 def _leb128_size_for_val(val: int, is_signed: bool):
2338 if val < 0:
2339 # Equivalent upper bound.
2340 #
2341 # For example, if `val` is -128, then the full integer for
2342 # this number of bits would be [-128, 127].
2343 val = -val - 1
2344
2345 # Number of bits (add one for the sign if needed)
2346 bits = val.bit_length() + int(is_signed)
2347
2348 if bits == 0:
2349 bits = 1
2350
2351 # Seven bits per byte
2352 return math.ceil(bits / 7)
2353
2354 # Handles the LEB128 integer item `item`.
2355 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2356 # Compute value
2357 val = self._eval_item_expr(item, state)
2358
2359 # Size in bytes
2360 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2361
2362 # For each byte
2363 for _ in range(size):
2364 # Seven LSBs, MSB of the byte set (continue)
2365 self._data.append((val & 0x7F) | 0x80)
2366 val >>= 7
2367
2368 # Clear MSB of last byte (stop)
2369 self._data[-1] &= ~0x80
2370
2371 # Update offset
2372 state.offset += size
2373
2374 # Handles the string item `item`.
2375 def _handle_str_item(self, item: _Str, state: _GenState):
2376 # Compute value
2377 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2378
2379 # Encode
2380 data = _encode_str(val, item.codec, item.text_loc)
2381
2382 # Add to data
2383 self._data += data
2384
2385 # Update offset
2386 state.offset += len(data)
2387
2388 # Handles the group item `item`, removing the immediate labels from
2389 # `state` at the end if `remove_immediate_labels` is `True`.
2390 def _handle_group_item(
2391 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2392 ):
2393 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2394 immediate_labels = {} # type: LabelsT
2395
2396 # Handle each item
2397 for subitem in item.items:
2398 if type(subitem) is _Label:
2399 # Add to local immediate labels
2400 immediate_labels[subitem.name] = state.offset
2401
2402 self._handle_item(subitem, state)
2403
2404 # Remove immediate labels from current state if needed
2405 if remove_immediate_labels:
2406 for name in immediate_labels:
2407 del state.labels[name]
2408
2409 # Add all immediate labels to all state snapshots since
2410 # `first_fl_num_item_inst_index`.
2411 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2412 inst.state.labels.update(immediate_labels)
2413
2414 # Handles the repetition item `item`.
2415 def _handle_rep_item(self, item: _Rep, state: _GenState):
2416 # Compute the repetition count
2417 mul = _Gen._eval_item_expr(item, state)
2418
2419 # Validate result
2420 if mul < 0:
2421 _raise_error_for_item(
2422 "Invalid expression `{}`: unexpected negative result {:,}".format(
2423 item.expr_str, mul
2424 ),
2425 item,
2426 )
2427
2428 # Generate item data `mul` times
2429 for _ in range(mul):
2430 self._handle_item(item.item, state)
2431
2432 # Handles the conditional item `item`.
2433 def _handle_cond_item(self, item: _Cond, state: _GenState):
2434 # Compute the conditional value
2435 val = _Gen._eval_item_expr(item, state)
2436
2437 # Generate item data if needed
2438 if val:
2439 self._handle_item(item.true_item, state)
2440 else:
2441 self._handle_item(item.false_item, state)
2442
2443 # Evaluates the parameters of the macro expansion item `item`
2444 # considering the initial state `init_state` and returns a new state
2445 # to handle the items of the macro.
2446 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2447 # New state
2448 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2449
2450 # Evaluate the parameter expressions
2451 macro_def = self._macro_defs[item.name]
2452
2453 for param_name, param in zip(macro_def.param_names, item.params):
2454 exp_state.variables[param_name] = _Gen._eval_expr(
2455 param.expr_str,
2456 param.expr,
2457 param.text_loc,
2458 init_state,
2459 accept_float=True,
2460 accept_str=True,
2461 )
2462
2463 return exp_state
2464
2465 # Handles the macro expansion item `item`.
2466 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2467 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2468
2469 try:
2470 # New state
2471 exp_state = self._eval_macro_exp_params(item, state)
2472
2473 # Process the contained group
2474 init_data_size = len(self._data)
2475 parse_error_msg = (
2476 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2477 parse_error_msg_text, item.text_loc
2478 )
2479 )
2480 self._parse_error_msgs.append(parse_error_msg)
2481 self._handle_item(self._macro_defs[item.name].group, exp_state)
2482 self._parse_error_msgs.pop()
2483 except ParseError as exc:
2484 _augment_error(exc, parse_error_msg_text, item.text_loc)
2485
2486 # Update state offset and return
2487 state.offset += len(self._data) - init_data_size
2488
2489 # Handles the offset setting item `item`.
2490 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2491 state.offset = item.val
2492
2493 # Handles the offset alignment item `item` (adds padding).
2494 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2495 init_offset = state.offset
2496 align_bytes = item.val // 8
2497 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2498 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2499
2500 # Handles the filling item `item` (adds padding).
2501 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2502 # Compute the new offset
2503 new_offset = _Gen._eval_item_expr(item, state)
2504
2505 # Validate the new offset
2506 if new_offset < state.offset:
2507 _raise_error_for_item(
2508 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2509 item.expr_str, new_offset, state.offset
2510 ),
2511 item,
2512 )
2513
2514 # Fill
2515 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2516
2517 # Update offset
2518 state.offset = new_offset
2519
2520 # Handles the label item `item`.
2521 def _handle_label_item(self, item: _Label, state: _GenState):
2522 state.labels[item.name] = state.offset
2523
2524 # Handles the item `item`, returning the updated next repetition
2525 # instance.
2526 def _handle_item(self, item: _Item, state: _GenState):
2527 return self._item_handlers[type(item)](item, state)
2528
2529 # Generates the data for a fixed-length integer item instance having
2530 # the value `val` and returns it.
2531 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2532 # Validate range
2533 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2534 _raise_error_for_item(
2535 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2536 val, item.len, item.expr_str
2537 ),
2538 item,
2539 )
2540
2541 # Encode result on 64 bits (to extend the sign bit whatever the
2542 # value of `item.len`).
2543 data = struct.pack(
2544 "{}{}".format(
2545 ">" if state.bo in (None, ByteOrder.BE) else "<",
2546 "Q" if val >= 0 else "q",
2547 ),
2548 val,
2549 )
2550
2551 # Keep only the requested length
2552 len_bytes = item.len // 8
2553
2554 if state.bo in (None, ByteOrder.BE):
2555 # Big endian: keep last bytes
2556 data = data[-len_bytes:]
2557 else:
2558 # Little endian: keep first bytes
2559 assert state.bo == ByteOrder.LE
2560 data = data[:len_bytes]
2561
2562 # Return data
2563 return data
2564
2565 # Generates the data for a fixed-length floating point number item
2566 # instance having the value `val` and returns it.
2567 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2568 # Validate length
2569 if item.len not in (32, 64):
2570 _raise_error_for_item(
2571 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2572 item.len, val
2573 ),
2574 item,
2575 )
2576
2577 # Encode and return result
2578 return struct.pack(
2579 "{}{}".format(
2580 ">" if state.bo in (None, ByteOrder.BE) else "<",
2581 "f" if item.len == 32 else "d",
2582 ),
2583 val,
2584 )
2585
2586 # Generates the data for a fixed-length number item instance and
2587 # returns it.
2588 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2589 # Compute value
2590 val = self._eval_item_expr(item, state, True)
2591
2592 # Handle depending on type
2593 if type(val) is int:
2594 return self._gen_fl_int_item_inst_data(val, item, state)
2595 else:
2596 assert type(val) is float
2597 return self._gen_fl_float_item_inst_data(val, item, state)
2598
2599 # Generates the data for all the fixed-length number item instances
2600 # and writes it at the correct offset within `self._data`.
2601 def _gen_fl_num_item_insts(self):
2602 for inst in self._fl_num_item_insts:
2603 # Generate bytes
2604 try:
2605 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2606 except ParseError as exc:
2607 # Add all the saved parse error messages for this
2608 # instance.
2609 for msg in reversed(inst.parse_error_msgs):
2610 _add_error_msg(exc, msg.text, msg.text_location)
2611
2612 raise
2613
2614 # Insert bytes into `self._data`
2615 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2616
2617 # Generates the data (`self._data`) and final state
2618 # (`self._final_state`) from `group` and the initial state `state`.
2619 def _gen(self, group: _Group, state: _GenState):
2620 # Initial state
2621 self._data = bytearray()
2622
2623 # Item handlers
2624 self._item_handlers = {
2625 _AlignOffset: self._handle_align_offset_item,
2626 _Byte: self._handle_byte_item,
2627 _Cond: self._handle_cond_item,
2628 _FillUntil: self._handle_fill_until_item,
2629 _FlNum: self._handle_fl_num_item,
2630 _Group: self._handle_group_item,
2631 _Label: self._handle_label_item,
2632 _LitStr: self._handle_lit_str_item,
2633 _MacroExp: self._handle_macro_exp_item,
2634 _Rep: self._handle_rep_item,
2635 _SetBo: self._handle_set_bo_item,
2636 _SetOffset: self._handle_set_offset_item,
2637 _SLeb128Int: self._handle_leb128_int_item,
2638 _Str: self._handle_str_item,
2639 _ULeb128Int: self._handle_leb128_int_item,
2640 _VarAssign: self._handle_var_assign_item,
2641 } # type: Dict[type, Callable[[Any, _GenState], None]]
2642
2643 # Handle the group item, _not_ removing the immediate labels
2644 # because the `labels` property offers them.
2645 self._handle_group_item(group, state, False)
2646
2647 # This is actually the final state
2648 self._final_state = state
2649
2650 # Generate all the fixed-length number bytes now that we know
2651 # their full state
2652 self._gen_fl_num_item_insts()
2653
2654
2655 # Returns a `ParseResult` instance containing the bytes encoded by the
2656 # input string `normand`.
2657 #
2658 # `init_variables` is a dictionary of initial variable names (valid
2659 # Python names) to integral values. A variable name must not be the
2660 # reserved name `ICITTE`.
2661 #
2662 # `init_labels` is a dictionary of initial label names (valid Python
2663 # names) to integral values. A label name must not be the reserved name
2664 # `ICITTE`.
2665 #
2666 # `init_offset` is the initial offset.
2667 #
2668 # `init_byte_order` is the initial byte order.
2669 #
2670 # Raises `ParseError` on any parsing error.
2671 def parse(
2672 normand: str,
2673 init_variables: Optional[VariablesT] = None,
2674 init_labels: Optional[LabelsT] = None,
2675 init_offset: int = 0,
2676 init_byte_order: Optional[ByteOrder] = None,
2677 ):
2678 if init_variables is None:
2679 init_variables = {}
2680
2681 if init_labels is None:
2682 init_labels = {}
2683
2684 parser = _Parser(normand, init_variables, init_labels)
2685 gen = _Gen(
2686 parser.res,
2687 parser.macro_defs,
2688 init_variables,
2689 init_labels,
2690 init_offset,
2691 init_byte_order,
2692 )
2693 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2694 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2695 )
2696
2697
2698 # Raises a command-line error with the message `msg`.
2699 def _raise_cli_error(msg: str) -> NoReturn:
2700 raise RuntimeError("Command-line error: {}".format(msg))
2701
2702
2703 # Returns the `int` or `float` value out of a CLI assignment value.
2704 def _val_from_assign_val_str(s: str, is_label: bool):
2705 s = s.strip()
2706
2707 # Floating point number?
2708 if not is_label:
2709 m = _const_float_pat.fullmatch(s)
2710
2711 if m is not None:
2712 return float(m.group(0))
2713
2714 # Integer?
2715 m = _const_int_pat.fullmatch(s)
2716
2717 if m is not None:
2718 return int(_norm_const_int(m.group(0)), 0)
2719
2720 exp = "an integer" if is_label else "a number"
2721 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2722
2723
2724 # Returns a dictionary of string to numbers from the list of strings
2725 # `args` containing `NAME=VAL` entries.
2726 def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
2727 d = {} # type: VariablesT
2728
2729 if args is None:
2730 return d
2731
2732 for arg in args:
2733 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
2734
2735 if m is None:
2736 _raise_cli_error("Invalid assignment `{}`".format(arg))
2737
2738 if is_str_only:
2739 val = m.group(2)
2740 else:
2741 val = _val_from_assign_val_str(m.group(2), is_label)
2742
2743 d[m.group(1)] = val
2744
2745 return d
2746
2747
2748 # Parses the command-line arguments and returns, in this order:
2749 #
2750 # 1. The input file path, or `None` if none.
2751 # 2. The Normand input text.
2752 # 3. The initial offset.
2753 # 4. The initial byte order.
2754 # 5. The initial variables.
2755 # 6. The initial labels.
2756 def _parse_cli_args():
2757 import argparse
2758
2759 # Build parser
2760 ap = argparse.ArgumentParser()
2761 ap.add_argument(
2762 "--offset",
2763 metavar="OFFSET",
2764 action="store",
2765 type=int,
2766 default=0,
2767 help="initial offset (positive)",
2768 )
2769 ap.add_argument(
2770 "-b",
2771 "--byte-order",
2772 metavar="BO",
2773 choices=["be", "le"],
2774 type=str,
2775 help="initial byte order (`be` or `le`)",
2776 )
2777 ap.add_argument(
2778 "-v",
2779 "--var",
2780 metavar="NAME=VAL",
2781 action="append",
2782 help="add an initial numeric variable (may be repeated)",
2783 )
2784 ap.add_argument(
2785 "-s",
2786 "--var-str",
2787 metavar="NAME=VAL",
2788 action="append",
2789 help="add an initial string variable (may be repeated)",
2790 )
2791 ap.add_argument(
2792 "-l",
2793 "--label",
2794 metavar="NAME=VAL",
2795 action="append",
2796 help="add an initial label (may be repeated)",
2797 )
2798 ap.add_argument(
2799 "--version", action="version", version="Normand {}".format(__version__)
2800 )
2801 ap.add_argument(
2802 "path",
2803 metavar="PATH",
2804 action="store",
2805 nargs="?",
2806 help="input path (none means standard input)",
2807 )
2808
2809 # Parse
2810 args = ap.parse_args()
2811
2812 # Read input
2813 if args.path is None:
2814 normand = sys.stdin.read()
2815 else:
2816 with open(args.path) as f:
2817 normand = f.read()
2818
2819 # Variables and labels
2820 variables = _dict_from_arg(args.var, False, False)
2821 variables.update(_dict_from_arg(args.var_str, False, True))
2822 labels = _dict_from_arg(args.label, True, False)
2823
2824 # Validate offset
2825 if args.offset < 0:
2826 _raise_cli_error("Invalid negative offset {}")
2827
2828 # Validate and set byte order
2829 bo = None # type: Optional[ByteOrder]
2830
2831 if args.byte_order is not None:
2832 if args.byte_order == "be":
2833 bo = ByteOrder.BE
2834 else:
2835 assert args.byte_order == "le"
2836 bo = ByteOrder.LE
2837
2838 # Return input and initial state
2839 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
2840
2841
2842 # CLI entry point without exception handling.
2843 def _run_cli_with_args(
2844 normand: str,
2845 offset: int,
2846 bo: Optional[ByteOrder],
2847 variables: VariablesT,
2848 labels: LabelsT,
2849 ):
2850 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2851
2852
2853 # Prints the exception message `msg` and exits with status 1.
2854 def _fail(msg: str) -> NoReturn:
2855 if not msg.endswith("."):
2856 msg += "."
2857
2858 print(msg.strip(), file=sys.stderr)
2859 sys.exit(1)
2860
2861
2862 # CLI entry point.
2863 def _run_cli():
2864 try:
2865 args = _parse_cli_args()
2866 except Exception as exc:
2867 _fail(str(exc))
2868
2869 try:
2870 _run_cli_with_args(*args[1:])
2871 except ParseError as exc:
2872 import os.path
2873
2874 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2875 fail_msg = ""
2876
2877 for msg in reversed(exc.messages):
2878 fail_msg += "{}{}:{} - {}".format(
2879 prefix,
2880 msg.text_location.line_no,
2881 msg.text_location.col_no,
2882 msg.text,
2883 )
2884
2885 if fail_msg[-1] not in ".:;":
2886 fail_msg += "."
2887
2888 fail_msg += "\n"
2889
2890 _fail(fail_msg.strip())
2891 except Exception as exc:
2892 _fail(str(exc))
2893
2894
2895 if __name__ == "__main__":
2896 _run_cli()
This page took 0.084063 seconds and 3 git commands to generate.