Add many string features
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.19.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import sys
51 import copy
52 import enum
53 import math
54 import struct
55 import typing
56 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
57
58
59 # Text location (line and column numbers).
60 class TextLocation:
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
84 def __repr__(self):
85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
86
87
88 # Any item.
89 class _Item:
90 def __init__(self, text_loc: TextLocation):
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
98
99 # Scalar item.
100 class _ScalarItem(_Item):
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108 # A repeatable item.
109 class _RepableItem:
110 pass
111
112
113 # Single byte.
114 class _Byte(_ScalarItem, _RepableItem):
115 def __init__(self, val: int, text_loc: TextLocation):
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
130
131
132 # Literal string.
133 class _LitStr(_ScalarItem, _RepableItem):
134 def __init__(self, data: bytes, text_loc: TextLocation):
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
148 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
149
150
151 # Byte order.
152 @enum.unique
153 class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
161 # Byte order setting.
162 class _SetBo(_Item):
163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
164 super().__init__(text_loc)
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
171 def __repr__(self):
172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
173
174
175 # Label.
176 class _Label(_Item):
177 def __init__(self, name: str, text_loc: TextLocation):
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
186 def __repr__(self):
187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
188
189
190 # Offset setting.
191 class _SetOffset(_Item):
192 def __init__(self, val: int, text_loc: TextLocation):
193 super().__init__(text_loc)
194 self._val = val
195
196 # Offset value (bytes).
197 @property
198 def val(self):
199 return self._val
200
201 def __repr__(self):
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205 # Offset alignment.
206 class _AlignOffset(_Item):
207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
226
227
228 # Mixin of containing an AST expression and its string.
229 class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
245 # Fill until some offset.
246 class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
268 # Variable assignment.
269 class _VarAssign(_Item, _ExprMixin):
270 def __init__(
271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
282 def __repr__(self):
283 return "_VarAssign({}, {}, {}, {})".format(
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
288 )
289
290
291 # Fixed-length number, possibly needing more than one byte.
292 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
293 def __init__(
294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
310 return "_FlNum({}, {}, {}, {})".format(
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
315 )
316
317
318 # LEB128 integer.
319 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
329 repr(self._text_loc),
330 )
331
332
333 # Unsigned LEB128 integer.
334 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338 # Signed LEB128 integer.
339 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343 # String.
344 class _Str(_Item, _RepableItem, _ExprMixin):
345 def __init__(
346 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
347 ):
348 super().__init__(text_loc)
349 _ExprMixin.__init__(self, expr_str, expr)
350 self._codec = codec
351
352 # Codec name.
353 @property
354 def codec(self):
355 return self._codec
356
357 def __repr__(self):
358 return "_Str({}, {}, {}, {})".format(
359 self.__class__.__name__,
360 repr(self._expr_str),
361 repr(self._expr),
362 repr(self._codec),
363 repr(self._text_loc),
364 )
365
366
367 # Group of items.
368 class _Group(_Item, _RepableItem):
369 def __init__(self, items: List[_Item], text_loc: TextLocation):
370 super().__init__(text_loc)
371 self._items = items
372
373 # Contained items.
374 @property
375 def items(self):
376 return self._items
377
378 def __repr__(self):
379 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
380
381
382 # Repetition item.
383 class _Rep(_Item, _ExprMixin):
384 def __init__(
385 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
386 ):
387 super().__init__(text_loc)
388 _ExprMixin.__init__(self, expr_str, expr)
389 self._item = item
390
391 # Item to repeat.
392 @property
393 def item(self):
394 return self._item
395
396 def __repr__(self):
397 return "_Rep({}, {}, {}, {})".format(
398 repr(self._item),
399 repr(self._expr_str),
400 repr(self._expr),
401 repr(self._text_loc),
402 )
403
404
405 # Conditional item.
406 class _Cond(_Item, _ExprMixin):
407 def __init__(
408 self,
409 true_item: _Item,
410 false_item: _Item,
411 expr_str: str,
412 expr: ast.Expression,
413 text_loc: TextLocation,
414 ):
415 super().__init__(text_loc)
416 _ExprMixin.__init__(self, expr_str, expr)
417 self._true_item = true_item
418 self._false_item = false_item
419
420 # Item when condition is true.
421 @property
422 def true_item(self):
423 return self._true_item
424
425 # Item when condition is false.
426 @property
427 def false_item(self):
428 return self._false_item
429
430 def __repr__(self):
431 return "_Cond({}, {}, {}, {}, {})".format(
432 repr(self._true_item),
433 repr(self._false_item),
434 repr(self._expr_str),
435 repr(self._expr),
436 repr(self._text_loc),
437 )
438
439
440 # Macro definition item.
441 class _MacroDef(_Item):
442 def __init__(
443 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
444 ):
445 super().__init__(text_loc)
446 self._name = name
447 self._param_names = param_names
448 self._group = group
449
450 # Name.
451 @property
452 def name(self):
453 return self._name
454
455 # Parameters.
456 @property
457 def param_names(self):
458 return self._param_names
459
460 # Contained items.
461 @property
462 def group(self):
463 return self._group
464
465 def __repr__(self):
466 return "_MacroDef({}, {}, {}, {})".format(
467 repr(self._name),
468 repr(self._param_names),
469 repr(self._group),
470 repr(self._text_loc),
471 )
472
473
474 # Macro expansion parameter.
475 class _MacroExpParam:
476 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
477 self._expr_str = expr_str
478 self._expr = expr
479 self._text_loc = text_loc
480
481 # Expression string.
482 @property
483 def expr_str(self):
484 return self._expr_str
485
486 # Expression.
487 @property
488 def expr(self):
489 return self._expr
490
491 # Source text location.
492 @property
493 def text_loc(self):
494 return self._text_loc
495
496 def __repr__(self):
497 return "_MacroExpParam({}, {}, {})".format(
498 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
499 )
500
501
502 # Macro expansion item.
503 class _MacroExp(_Item, _RepableItem):
504 def __init__(
505 self,
506 name: str,
507 params: List[_MacroExpParam],
508 text_loc: TextLocation,
509 ):
510 super().__init__(text_loc)
511 self._name = name
512 self._params = params
513
514 # Name.
515 @property
516 def name(self):
517 return self._name
518
519 # Parameters.
520 @property
521 def params(self):
522 return self._params
523
524 def __repr__(self):
525 return "_MacroExp({}, {}, {})".format(
526 repr(self._name),
527 repr(self._params),
528 repr(self._text_loc),
529 )
530
531
532 # A parsing error message: a string and a text location.
533 class ParseErrorMessage:
534 @classmethod
535 def _create(cls, text: str, text_loc: TextLocation):
536 self = cls.__new__(cls)
537 self._init(text, text_loc)
538 return self
539
540 def __init__(self, *args, **kwargs): # type: ignore
541 raise NotImplementedError
542
543 def _init(self, text: str, text_loc: TextLocation):
544 self._text = text
545 self._text_loc = text_loc
546
547 # Message text.
548 @property
549 def text(self):
550 return self._text
551
552 # Source text location.
553 @property
554 def text_location(self):
555 return self._text_loc
556
557
558 # A parsing error containing one or more messages (`ParseErrorMessage`).
559 class ParseError(RuntimeError):
560 @classmethod
561 def _create(cls, msg: str, text_loc: TextLocation):
562 self = cls.__new__(cls)
563 self._init(msg, text_loc)
564 return self
565
566 def __init__(self, *args, **kwargs): # type: ignore
567 raise NotImplementedError
568
569 def _init(self, msg: str, text_loc: TextLocation):
570 super().__init__(msg)
571 self._msgs = [] # type: List[ParseErrorMessage]
572 self._add_msg(msg, text_loc)
573
574 def _add_msg(self, msg: str, text_loc: TextLocation):
575 self._msgs.append(
576 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
577 msg, text_loc
578 )
579 )
580
581 # Parsing error messages.
582 #
583 # The first message is the most specific one.
584 @property
585 def messages(self):
586 return self._msgs
587
588
589 # Raises a parsing error, forwarding the parameters to the constructor.
590 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
591 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
592
593
594 # Adds a message to the parsing error `exc`.
595 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
596 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
597
598
599 # Appends a message to the parsing error `exc` and reraises it.
600 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
601 _add_error_msg(exc, msg, text_loc)
602 raise exc
603
604
605 # Returns a normalized version (so as to be parseable by int()) of
606 # the constant integer string `s`, possibly negative, dealing with
607 # any radix suffix.
608 def _norm_const_int(s: str):
609 neg = ""
610 pos = s
611
612 if s.startswith("-"):
613 neg = "-"
614 pos = s[1:]
615
616 for r in "xXoObB":
617 if pos.startswith("0" + r):
618 # Already correct
619 return s
620
621 # Try suffix
622 asm_suf_base = {
623 "h": "x",
624 "H": "x",
625 "q": "o",
626 "Q": "o",
627 "o": "o",
628 "O": "o",
629 "b": "b",
630 "B": "B",
631 }
632
633 for suf in asm_suf_base:
634 if pos[-1] == suf:
635 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
636
637 return s
638
639
640 # Encodes the string `s` using the codec `codec`, raising `ParseError`
641 # with `text_loc` on encoding error.
642 def _encode_str(s: str, codec: str, text_loc: TextLocation):
643 try:
644 return s.encode(codec)
645 except UnicodeEncodeError:
646 _raise_error(
647 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
648 )
649
650
651 # Variables dictionary type (for type hints).
652 VariablesT = Dict[str, Union[int, float, str]]
653
654
655 # Labels dictionary type (for type hints).
656 LabelsT = Dict[str, int]
657
658
659 # Common patterns.
660 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
661 _pos_const_int_pat = re.compile(
662 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
663 )
664 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
665 _const_float_pat = re.compile(
666 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
667 )
668
669
670 # Macro definition dictionary.
671 _MacroDefsT = Dict[str, _MacroDef]
672
673
674 # Normand parser.
675 #
676 # The constructor accepts a Normand input. After building, use the `res`
677 # property to get the resulting main group.
678 class _Parser:
679 # Builds a parser to parse the Normand input `normand`, parsing
680 # immediately.
681 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
682 self._normand = normand
683 self._at = 0
684 self._line_no = 1
685 self._col_no = 1
686 self._label_names = set(labels.keys())
687 self._var_names = set(variables.keys())
688 self._macro_defs = {} # type: _MacroDefsT
689 self._parse()
690
691 # Result (main group).
692 @property
693 def res(self):
694 return self._res
695
696 # Macro definitions.
697 @property
698 def macro_defs(self):
699 return self._macro_defs
700
701 # Current text location.
702 @property
703 def _text_loc(self):
704 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
705 self._line_no, self._col_no
706 )
707
708 # Returns `True` if this parser is done parsing.
709 def _is_done(self):
710 return self._at == len(self._normand)
711
712 # Returns `True` if this parser isn't done parsing.
713 def _isnt_done(self):
714 return not self._is_done()
715
716 # Raises a parse error, creating it using the message `msg` and the
717 # current text location.
718 def _raise_error(self, msg: str) -> NoReturn:
719 _raise_error(msg, self._text_loc)
720
721 # Tries to make the pattern `pat` match the current substring,
722 # returning the match object and updating `self._at`,
723 # `self._line_no`, and `self._col_no` on success.
724 def _try_parse_pat(self, pat: Pattern[str]):
725 m = pat.match(self._normand, self._at)
726
727 if m is None:
728 return
729
730 # Skip matched string
731 self._at += len(m.group(0))
732
733 # Update line number
734 self._line_no += m.group(0).count("\n")
735
736 # Update column number
737 for i in reversed(range(self._at)):
738 if self._normand[i] == "\n" or i == 0:
739 if i == 0:
740 self._col_no = self._at + 1
741 else:
742 self._col_no = self._at - i
743
744 break
745
746 # Return match object
747 return m
748
749 # Expects the pattern `pat` to match the current substring,
750 # returning the match object and updating `self._at`,
751 # `self._line_no`, and `self._col_no` on success, or raising a parse
752 # error with the message `error_msg` on error.
753 def _expect_pat(self, pat: Pattern[str], error_msg: str):
754 # Match
755 m = self._try_parse_pat(pat)
756
757 if m is None:
758 # No match: error
759 self._raise_error(error_msg)
760
761 # Return match object
762 return m
763
764 # Pattern for _skip_ws_and_comments()
765 _ws_or_syms_or_comments_pat = re.compile(
766 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
767 )
768
769 # Skips as many whitespaces, insignificant symbol characters, and
770 # comments as possible.
771 def _skip_ws_and_comments(self):
772 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
773
774 # Pattern for _skip_ws()
775 _ws_pat = re.compile(r"\s*")
776
777 # Skips as many whitespaces as possible.
778 def _skip_ws(self):
779 self._try_parse_pat(self._ws_pat)
780
781 # Pattern for _try_parse_hex_byte()
782 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
783
784 # Tries to parse a hexadecimal byte, returning a byte item on
785 # success.
786 def _try_parse_hex_byte(self):
787 begin_text_loc = self._text_loc
788
789 # Match initial nibble
790 m_high = self._try_parse_pat(self._nibble_pat)
791
792 if m_high is None:
793 # No match
794 return
795
796 # Expect another nibble
797 self._skip_ws_and_comments()
798 m_low = self._expect_pat(
799 self._nibble_pat, "Expecting another hexadecimal nibble"
800 )
801
802 # Return item
803 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
804
805 # Patterns for _try_parse_bin_byte()
806 _bin_byte_bit_pat = re.compile(r"[01]")
807 _bin_byte_prefix_pat = re.compile(r"%+")
808
809 # Tries to parse a binary byte, returning a byte item on success.
810 def _try_parse_bin_byte(self):
811 begin_text_loc = self._text_loc
812
813 # Match prefix
814 m = self._try_parse_pat(self._bin_byte_prefix_pat)
815
816 if m is None:
817 # No match
818 return
819
820 # Expect as many bytes as there are `%` prefixes
821 items = [] # type: List[_Item]
822
823 for _ in range(len(m.group(0))):
824 self._skip_ws_and_comments()
825 byte_text_loc = self._text_loc
826 bits = [] # type: List[str]
827
828 # Expect eight bits
829 for _ in range(8):
830 self._skip_ws_and_comments()
831 m = self._expect_pat(
832 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
833 )
834 bits.append(m.group(0))
835
836 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
837
838 # Return item
839 if len(items) == 1:
840 return items[0]
841
842 # As group
843 return _Group(items, begin_text_loc)
844
845 # Patterns for _try_parse_dec_byte()
846 _dec_byte_prefix_pat = re.compile(r"\$")
847 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
848
849 # Tries to parse a decimal byte, returning a byte item on success.
850 def _try_parse_dec_byte(self):
851 begin_text_loc = self._text_loc
852
853 # Match prefix
854 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
855 # No match
856 return
857
858 # Expect the value
859 self._skip_ws()
860 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
861
862 # Compute value
863 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
864
865 # Validate
866 if val < -128 or val > 255:
867 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
868
869 # Two's complement
870 val %= 256
871
872 # Return item
873 return _Byte(val, begin_text_loc)
874
875 # Tries to parse a byte, returning a byte item on success.
876 def _try_parse_byte(self):
877 # Hexadecimal
878 item = self._try_parse_hex_byte()
879
880 if item is not None:
881 return item
882
883 # Binary
884 item = self._try_parse_bin_byte()
885
886 if item is not None:
887 return item
888
889 # Decimal
890 item = self._try_parse_dec_byte()
891
892 if item is not None:
893 return item
894
895 # Strings corresponding to escape sequence characters
896 _lit_str_escape_seq_strs = {
897 "0": "\0",
898 "a": "\a",
899 "b": "\b",
900 "e": "\x1b",
901 "f": "\f",
902 "n": "\n",
903 "r": "\r",
904 "t": "\t",
905 "v": "\v",
906 "\\": "\\",
907 '"': '"',
908 }
909
910 # Patterns for _try_parse_lit_str()
911 _lit_str_prefix_suffix_pat = re.compile(r'"')
912 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
913
914 # Parses a literal string between double quotes (without an encoding
915 # prefix) and returns the resulting string.
916 def _try_parse_lit_str(self, with_prefix: bool):
917 # Match prefix if needed
918 if with_prefix:
919 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
920 # No match
921 return
922
923 # Expect literal string
924 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
925
926 # Expect end of string
927 self._expect_pat(
928 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
929 )
930
931 # Replace escape sequences
932 val = m.group(0)
933
934 for ec in '0abefnrtv"\\':
935 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
936
937 # Return string
938 return val
939
940 # Patterns for _try_parse_utf_str_encoding()
941 _str_encoding_utf_prefix_pat = re.compile(r"u")
942 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
943
944 # Tries to parse a UTF encoding specification, returning the Python
945 # codec name on success.
946 def _try_parse_utf_str_encoding(self):
947 # Match prefix
948 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
949 # No match
950 return
951
952 # Expect UTF specification
953 m = self._expect_pat(
954 self._str_encoding_utf_pat,
955 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
956 )
957
958 # Convert to codec name
959 return {
960 "8": "utf_8",
961 "16be": "utf_16_be",
962 "16le": "utf_16_le",
963 "32be": "utf_32_be",
964 "32le": "utf_32_le",
965 }[m.group(0)]
966
967 # Patterns for _try_parse_str_encoding()
968 _str_encoding_gen_prefix_pat = re.compile(r"s")
969 _str_encoding_colon_pat = re.compile(r":")
970 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
971
972 # Tries to parse a string encoding specification, returning the
973 # Python codec name on success.
974 #
975 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
976 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
977 # General prefix?
978 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
979 # Expect `:`
980 self._skip_ws()
981 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
982
983 # Expect encoding specification
984 self._skip_ws()
985
986 # UTF?
987 codec = self._try_parse_utf_str_encoding()
988
989 if codec is not None:
990 return codec
991
992 # Expect Latin
993 m = self._expect_pat(
994 self._str_encoding_non_utf_pat,
995 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
996 )
997 return m.group(0)
998
999 # UTF?
1000 if not req_gen_prefix:
1001 return self._try_parse_utf_str_encoding()
1002
1003 # Patterns for _try_parse_str()
1004 _lit_str_prefix_pat = re.compile(r'"')
1005 _str_prefix_pat = re.compile(r'"|\{')
1006 _str_expr_pat = re.compile(r"[^}]+")
1007 _str_expr_suffix_pat = re.compile(r"\}")
1008
1009 # Tries to parse a string, returning a literal string or string item
1010 # on success.
1011 def _try_parse_str(self):
1012 begin_text_loc = self._text_loc
1013
1014 # Encoding
1015 codec = self._try_parse_str_encoding()
1016
1017 # Match prefix (expect if there's an encoding specification)
1018 self._skip_ws()
1019
1020 if codec is None:
1021 # No encoding: only a literal string (UTF-8) is legal
1022 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1023
1024 if m_prefix is None:
1025 return
1026 else:
1027 # Encoding present: expect a string prefix
1028 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1029
1030 # Literal string or expression?
1031 prefix = m_prefix.group(0)
1032
1033 if prefix == '"':
1034 # Expect literal string
1035 str_text_loc = self._text_loc
1036 val = self._try_parse_lit_str(False)
1037
1038 if val is None:
1039 self._raise_error("Expecting a literal string")
1040
1041 # Encode string
1042 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1043
1044 # Return item
1045 return _LitStr(data, begin_text_loc)
1046 else:
1047 # Expect expression
1048 self._skip_ws()
1049 expr_text_loc = self._text_loc
1050 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1051
1052 # Expect `}`
1053 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1054
1055 # Create an expression node from the expression string
1056 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1057
1058 # Return item
1059 assert codec is not None
1060 return _Str(expr_str, expr, codec, begin_text_loc)
1061
1062 # Common right parenthesis pattern
1063 _right_paren_pat = re.compile(r"\)")
1064
1065 # Patterns for _try_parse_group()
1066 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
1067
1068 # Tries to parse a group, returning a group item on success.
1069 def _try_parse_group(self):
1070 begin_text_loc = self._text_loc
1071
1072 # Match prefix
1073 m_open = self._try_parse_pat(self._group_prefix_pat)
1074
1075 if m_open is None:
1076 # No match
1077 return
1078
1079 # Parse items
1080 items = self._parse_items()
1081
1082 # Expect end of group
1083 self._skip_ws_and_comments()
1084
1085 if m_open.group(0) == "(":
1086 pat = self._right_paren_pat
1087 exp = ")"
1088 else:
1089 pat = self._block_end_pat
1090 exp = "!end"
1091
1092 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
1093
1094 # Return item
1095 return _Group(items, begin_text_loc)
1096
1097 # Returns a stripped expression string and an AST expression node
1098 # from the expression string `expr_str` at text location `text_loc`.
1099 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
1100 # Create an expression node from the expression string
1101 expr_str = expr_str.strip().replace("\n", " ")
1102
1103 try:
1104 expr = ast.parse(expr_str, mode="eval")
1105 except SyntaxError:
1106 _raise_error(
1107 "Invalid expression `{}`: invalid syntax".format(expr_str),
1108 text_loc,
1109 )
1110
1111 return expr_str, expr
1112
1113 # Patterns for _try_parse_val()
1114 _val_expr_pat = re.compile(r"([^}:]+):\s*")
1115 _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
1116 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
1117
1118 # Tries to parse a value (number or string) and format (fixed length
1119 # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
1120 # name), returning an item on success.
1121 def _try_parse_val(self):
1122 begin_text_loc = self._text_loc
1123
1124 # Match
1125 m_expr = self._try_parse_pat(self._val_expr_pat)
1126
1127 if m_expr is None:
1128 # No match
1129 return
1130
1131 # Create an expression node from the expression string
1132 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
1133
1134 # Fixed length?
1135 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
1136
1137 if m_fmt is None:
1138 # LEB128?
1139 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
1140
1141 if m_fmt is None:
1142 # String encoding?
1143 codec = self._try_parse_str_encoding(True)
1144
1145 if codec is None:
1146 # At this point it's invalid
1147 self._raise_error(
1148 "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1149 )
1150 else:
1151 # Return string item
1152 return _Str(expr_str, expr, codec, begin_text_loc)
1153
1154 # Return LEB128 integer item
1155 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
1156 return cls(expr_str, expr, begin_text_loc)
1157 else:
1158 # Return fixed-length number item
1159 return _FlNum(
1160 expr_str,
1161 expr,
1162 int(m_fmt.group(0)),
1163 begin_text_loc,
1164 )
1165
1166 # Patterns for _try_parse_var_assign()
1167 _var_assign_name_equal_pat = re.compile(
1168 r"({})\s*=(?!=)".format(_py_name_pat.pattern)
1169 )
1170 _var_assign_expr_pat = re.compile(r"[^}]+")
1171
1172 # Tries to parse a variable assignment, returning a variable
1173 # assignment item on success.
1174 def _try_parse_var_assign(self):
1175 begin_text_loc = self._text_loc
1176
1177 # Match
1178 m = self._try_parse_pat(self._var_assign_name_equal_pat)
1179
1180 if m is None:
1181 # No match
1182 return
1183
1184 # Validate name
1185 name = m.group(1)
1186
1187 if name == _icitte_name:
1188 _raise_error(
1189 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1190 )
1191
1192 if name in self._label_names:
1193 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
1194
1195 # Expect an expression
1196 self._skip_ws()
1197 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
1198
1199 # Create an expression node from the expression string
1200 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1201
1202 # Add to known variable names
1203 self._var_names.add(name)
1204
1205 # Return item
1206 return _VarAssign(
1207 name,
1208 expr_str,
1209 expr,
1210 begin_text_loc,
1211 )
1212
1213 # Pattern for _try_parse_set_bo()
1214 _bo_pat = re.compile(r"[bl]e")
1215
1216 # Tries to parse a byte order name, returning a byte order setting
1217 # item on success.
1218 def _try_parse_set_bo(self):
1219 begin_text_loc = self._text_loc
1220
1221 # Match
1222 m = self._try_parse_pat(self._bo_pat)
1223
1224 if m is None:
1225 # No match
1226 return
1227
1228 # Return corresponding item
1229 if m.group(0) == "be":
1230 return _SetBo(ByteOrder.BE, begin_text_loc)
1231 else:
1232 assert m.group(0) == "le"
1233 return _SetBo(ByteOrder.LE, begin_text_loc)
1234
1235 # Patterns for _try_parse_val_or_bo()
1236 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1237 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
1238
1239 # Tries to parse a value, a variable assignment, or a byte order
1240 # setting, returning an item on success.
1241 def _try_parse_val_or_var_assign_or_set_bo(self):
1242 # Match prefix
1243 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
1244 # No match
1245 return
1246
1247 self._skip_ws()
1248
1249 # Variable assignment item?
1250 item = self._try_parse_var_assign()
1251
1252 if item is None:
1253 # Value item?
1254 item = self._try_parse_val()
1255
1256 if item is None:
1257 # Byte order setting item?
1258 item = self._try_parse_set_bo()
1259
1260 if item is None:
1261 # At this point it's invalid
1262 self._raise_error(
1263 "Expecting a fixed-length number, a string, a variable assignment, or a byte order setting"
1264 )
1265
1266 # Expect suffix
1267 self._skip_ws()
1268 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
1269 return item
1270
1271 # Tries to parse an offset setting value (after the initial `<`),
1272 # returning an offset item on success.
1273 def _try_parse_set_offset_val(self):
1274 begin_text_loc = self._text_loc
1275
1276 # Match
1277 m = self._try_parse_pat(_pos_const_int_pat)
1278
1279 if m is None:
1280 # No match
1281 return
1282
1283 # Return item
1284 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
1285
1286 # Tries to parse a label name (after the initial `<`), returning a
1287 # label item on success.
1288 def _try_parse_label_name(self):
1289 begin_text_loc = self._text_loc
1290
1291 # Match
1292 m = self._try_parse_pat(_py_name_pat)
1293
1294 if m is None:
1295 # No match
1296 return
1297
1298 # Validate
1299 name = m.group(0)
1300
1301 if name == _icitte_name:
1302 _raise_error(
1303 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1304 )
1305
1306 if name in self._label_names:
1307 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1308
1309 if name in self._var_names:
1310 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1311
1312 # Add to known label names
1313 self._label_names.add(name)
1314
1315 # Return item
1316 return _Label(name, begin_text_loc)
1317
1318 # Patterns for _try_parse_label_or_set_offset()
1319 _label_set_offset_prefix_pat = re.compile(r"<")
1320 _label_set_offset_suffix_pat = re.compile(r">")
1321
1322 # Tries to parse a label or an offset setting, returning an item on
1323 # success.
1324 def _try_parse_label_or_set_offset(self):
1325 # Match prefix
1326 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1327 # No match
1328 return
1329
1330 # Offset setting item?
1331 self._skip_ws()
1332 item = self._try_parse_set_offset_val()
1333
1334 if item is None:
1335 # Label item?
1336 item = self._try_parse_label_name()
1337
1338 if item is None:
1339 # At this point it's invalid
1340 self._raise_error("Expecting a label name or an offset setting value")
1341
1342 # Expect suffix
1343 self._skip_ws()
1344 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1345 return item
1346
1347 # Pattern for _parse_pad_val()
1348 _pad_val_prefix_pat = re.compile(r"~")
1349
1350 # Tries to parse a padding value, returning the padding value, or 0
1351 # if none.
1352 def _parse_pad_val(self):
1353 # Padding value?
1354 self._skip_ws()
1355 pad_val = 0
1356
1357 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1358 self._skip_ws()
1359 pad_val_text_loc = self._text_loc
1360 m = self._expect_pat(
1361 _pos_const_int_pat,
1362 "Expecting a positive constant integer (byte value)",
1363 )
1364
1365 # Validate
1366 pad_val = int(_norm_const_int(m.group(0)), 0)
1367
1368 if pad_val > 255:
1369 _raise_error(
1370 "Invalid padding byte value {}".format(pad_val),
1371 pad_val_text_loc,
1372 )
1373
1374 return pad_val
1375
1376 # Patterns for _try_parse_align_offset()
1377 _align_offset_prefix_pat = re.compile(r"@")
1378 _align_offset_val_pat = re.compile(r"\d+")
1379
1380 # Tries to parse an offset alignment, returning an offset alignment
1381 # item on success.
1382 def _try_parse_align_offset(self):
1383 begin_text_loc = self._text_loc
1384
1385 # Match prefix
1386 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1387 # No match
1388 return
1389
1390 # Expect an alignment
1391 self._skip_ws()
1392 align_text_loc = self._text_loc
1393 m = self._expect_pat(
1394 self._align_offset_val_pat,
1395 "Expecting an alignment (positive multiple of eight bits)",
1396 )
1397
1398 # Validate alignment
1399 val = int(m.group(0))
1400
1401 if val <= 0 or (val % 8) != 0:
1402 _raise_error(
1403 "Invalid alignment value {} (not a positive multiple of eight)".format(
1404 val
1405 ),
1406 align_text_loc,
1407 )
1408
1409 # Padding value
1410 pad_val = self._parse_pad_val()
1411
1412 # Return item
1413 return _AlignOffset(val, pad_val, begin_text_loc)
1414
1415 # Patterns for _expect_expr()
1416 _inner_expr_prefix_pat = re.compile(r"\{")
1417 _inner_expr_pat = re.compile(r"[^}]+")
1418 _inner_expr_suffix_pat = re.compile(r"\}")
1419
1420 # Parses an expression outside a `{`/`}` context.
1421 #
1422 # This function accepts:
1423 #
1424 # • A Python expression within `{` and `}`.
1425 #
1426 # • A Python name.
1427 #
1428 # • If `accept_const_int` is `True`: a constant integer, which may
1429 # be negative if `allow_neg_int` is `True`.
1430 #
1431 # • If `accept_float` is `True`: a constant floating point number.
1432 #
1433 # Returns the stripped expression string and AST expression.
1434 def _expect_expr(
1435 self,
1436 accept_const_int: bool = False,
1437 allow_neg_int: bool = False,
1438 accept_const_float: bool = False,
1439 accept_lit_str: bool = False,
1440 ):
1441 begin_text_loc = self._text_loc
1442
1443 # Constant floating point number?
1444 if accept_const_float:
1445 m = self._try_parse_pat(_const_float_pat)
1446
1447 if m is not None:
1448 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1449
1450 # Constant integer?
1451 if accept_const_int:
1452 m = self._try_parse_pat(_const_int_pat)
1453
1454 if m is not None:
1455 # Negative and allowed?
1456 if m.group("neg") == "-" and not allow_neg_int:
1457 _raise_error(
1458 "Expecting a positive constant integer", begin_text_loc
1459 )
1460
1461 expr_str = _norm_const_int(m.group(0))
1462 return self._ast_expr_from_str(expr_str, begin_text_loc)
1463
1464 # Name?
1465 m = self._try_parse_pat(_py_name_pat)
1466
1467 if m is not None:
1468 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1469
1470 # Literal string
1471 if accept_lit_str:
1472 val = self._try_parse_lit_str(True)
1473
1474 if val is not None:
1475 return self._ast_expr_from_str(repr(val), begin_text_loc)
1476
1477 # Expect `{`
1478 msg_accepted_parts = ["a name", "or `{`"]
1479
1480 if accept_lit_str:
1481 msg_accepted_parts.insert(0, "a literal string")
1482
1483 if accept_const_float:
1484 msg_accepted_parts.insert(0, "a constant floating point number")
1485
1486 if accept_const_int:
1487 msg_pos = "" if allow_neg_int else "positive "
1488 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1489
1490 if len(msg_accepted_parts) == 2:
1491 msg_accepted = " ".join(msg_accepted_parts)
1492 else:
1493 msg_accepted = ", ".join(msg_accepted_parts)
1494
1495 self._expect_pat(
1496 self._inner_expr_prefix_pat,
1497 "Expecting {}".format(msg_accepted),
1498 )
1499
1500 # Expect an expression
1501 self._skip_ws()
1502 expr_text_loc = self._text_loc
1503 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1504 expr_str = m.group(0)
1505
1506 # Expect `}`
1507 self._skip_ws()
1508 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1509
1510 return self._ast_expr_from_str(expr_str, expr_text_loc)
1511
1512 # Patterns for _try_parse_fill_until()
1513 _fill_until_prefix_pat = re.compile(r"\+")
1514 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1515
1516 # Tries to parse a filling, returning a filling item on success.
1517 def _try_parse_fill_until(self):
1518 begin_text_loc = self._text_loc
1519
1520 # Match prefix
1521 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1522 # No match
1523 return
1524
1525 # Expect expression
1526 self._skip_ws()
1527 expr_str, expr = self._expect_expr(accept_const_int=True)
1528
1529 # Padding value
1530 pad_val = self._parse_pad_val()
1531
1532 # Return item
1533 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1534
1535 # Parses the multiplier expression of a repetition (block or
1536 # post-item) and returns the expression string and AST node.
1537 def _expect_rep_mul_expr(self):
1538 return self._expect_expr(accept_const_int=True)
1539
1540 # Common block end pattern
1541 _block_end_pat = re.compile(r"!end\b")
1542
1543 # Pattern for _try_parse_rep_block()
1544 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1545
1546 # Tries to parse a repetition block, returning a repetition item on
1547 # success.
1548 def _try_parse_rep_block(self):
1549 begin_text_loc = self._text_loc
1550
1551 # Match prefix
1552 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1553 # No match
1554 return
1555
1556 # Expect expression
1557 self._skip_ws_and_comments()
1558 expr_str, expr = self._expect_rep_mul_expr()
1559
1560 # Parse items
1561 self._skip_ws_and_comments()
1562 items_text_loc = self._text_loc
1563 items = self._parse_items()
1564
1565 # Expect end of block
1566 self._skip_ws_and_comments()
1567 self._expect_pat(
1568 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1569 )
1570
1571 # Return item
1572 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1573
1574 # Pattern for _try_parse_cond_block()
1575 _cond_block_prefix_pat = re.compile(r"!if\b")
1576 _cond_block_else_pat = re.compile(r"!else\b")
1577
1578 # Tries to parse a conditional block, returning a conditional item
1579 # on success.
1580 def _try_parse_cond_block(self):
1581 begin_text_loc = self._text_loc
1582
1583 # Match prefix
1584 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1585 # No match
1586 return
1587
1588 # Expect expression
1589 self._skip_ws_and_comments()
1590 expr_str, expr = self._expect_expr()
1591
1592 # Parse "true" items
1593 self._skip_ws_and_comments()
1594 true_items_text_loc = self._text_loc
1595 true_items = self._parse_items()
1596 false_items = [] # type: List[_Item]
1597 false_items_text_loc = begin_text_loc
1598
1599 # `!else`?
1600 self._skip_ws_and_comments()
1601
1602 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1603 # Parse "false" items
1604 self._skip_ws_and_comments()
1605 false_items_text_loc = self._text_loc
1606 false_items = self._parse_items()
1607
1608 # Expect end of block
1609 self._expect_pat(
1610 self._block_end_pat,
1611 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1612 )
1613
1614 # Return item
1615 return _Cond(
1616 _Group(true_items, true_items_text_loc),
1617 _Group(false_items, false_items_text_loc),
1618 expr_str,
1619 expr,
1620 begin_text_loc,
1621 )
1622
1623 # Common left parenthesis pattern
1624 _left_paren_pat = re.compile(r"\(")
1625
1626 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1627 _macro_params_comma_pat = re.compile(",")
1628
1629 # Patterns for _try_parse_macro_def()
1630 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1631
1632 # Tries to parse a macro definition, adding it to `self._macro_defs`
1633 # and returning `True` on success.
1634 def _try_parse_macro_def(self):
1635 begin_text_loc = self._text_loc
1636
1637 # Match prefix
1638 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1639 # No match
1640 return False
1641
1642 # Expect a name
1643 self._skip_ws()
1644 name_text_loc = self._text_loc
1645 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1646
1647 # Validate name
1648 name = m.group(0)
1649
1650 if name in self._macro_defs:
1651 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1652
1653 # Expect `(`
1654 self._skip_ws()
1655 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1656
1657 # Try to parse comma-separated parameter names
1658 param_names = [] # type: List[str]
1659 expect_comma = False
1660
1661 while True:
1662 self._skip_ws()
1663
1664 # End?
1665 if self._try_parse_pat(self._right_paren_pat) is not None:
1666 # End
1667 break
1668
1669 # Comma?
1670 if expect_comma:
1671 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1672
1673 # Expect parameter name
1674 self._skip_ws()
1675 param_text_loc = self._text_loc
1676 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1677
1678 if m.group(0) in param_names:
1679 _raise_error(
1680 "Duplicate macro parameter named `{}`".format(m.group(0)),
1681 param_text_loc,
1682 )
1683
1684 param_names.append(m.group(0))
1685 expect_comma = True
1686
1687 # Expect items
1688 self._skip_ws_and_comments()
1689 items_text_loc = self._text_loc
1690 old_var_names = self._var_names.copy()
1691 old_label_names = self._label_names.copy()
1692 self._var_names = set() # type: Set[str]
1693 self._label_names = set() # type: Set[str]
1694 items = self._parse_items()
1695 self._var_names = old_var_names
1696 self._label_names = old_label_names
1697
1698 # Expect suffix
1699 self._expect_pat(
1700 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1701 )
1702
1703 # Register macro
1704 self._macro_defs[name] = _MacroDef(
1705 name, param_names, _Group(items, items_text_loc), begin_text_loc
1706 )
1707
1708 return True
1709
1710 # Patterns for _try_parse_macro_exp()
1711 _macro_exp_prefix_pat = re.compile(r"m\b")
1712 _macro_exp_colon_pat = re.compile(r":")
1713
1714 # Tries to parse a macro expansion, returning a macro expansion item
1715 # on success.
1716 def _try_parse_macro_exp(self):
1717 begin_text_loc = self._text_loc
1718
1719 # Match prefix
1720 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1721 # No match
1722 return
1723
1724 # Expect `:`
1725 self._skip_ws()
1726 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1727
1728 # Expect a macro name
1729 self._skip_ws()
1730 name_text_loc = self._text_loc
1731 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1732
1733 # Validate name
1734 name = m.group(0)
1735 macro_def = self._macro_defs.get(name)
1736
1737 if macro_def is None:
1738 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1739
1740 # Expect `(`
1741 self._skip_ws()
1742 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1743
1744 # Try to parse comma-separated parameter values
1745 params_text_loc = self._text_loc
1746 params = [] # type: List[_MacroExpParam]
1747 expect_comma = False
1748
1749 while True:
1750 self._skip_ws()
1751
1752 # End?
1753 if self._try_parse_pat(self._right_paren_pat) is not None:
1754 # End
1755 break
1756
1757 # Expect a value
1758 if expect_comma:
1759 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1760
1761 self._skip_ws()
1762 param_text_loc = self._text_loc
1763 params.append(
1764 _MacroExpParam(
1765 *self._expect_expr(
1766 accept_const_int=True,
1767 allow_neg_int=True,
1768 accept_const_float=True,
1769 accept_lit_str=True,
1770 ),
1771 text_loc=param_text_loc
1772 )
1773 )
1774 expect_comma = True
1775
1776 # Validate parameter values
1777 if len(params) != len(macro_def.param_names):
1778 sing_plur = "" if len(params) == 1 else "s"
1779 _raise_error(
1780 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1781 len(params), sing_plur, len(macro_def.param_names)
1782 ),
1783 params_text_loc,
1784 )
1785
1786 # Return item
1787 return _MacroExp(name, params, begin_text_loc)
1788
1789 # Tries to parse a base item (anything except a repetition),
1790 # returning it on success.
1791 def _try_parse_base_item(self):
1792 # Byte item?
1793 item = self._try_parse_byte()
1794
1795 if item is not None:
1796 return item
1797
1798 # String item?
1799 item = self._try_parse_str()
1800
1801 if item is not None:
1802 return item
1803
1804 # Value, variable assignment, or byte order setting item?
1805 item = self._try_parse_val_or_var_assign_or_set_bo()
1806
1807 if item is not None:
1808 return item
1809
1810 # Label or offset setting item?
1811 item = self._try_parse_label_or_set_offset()
1812
1813 if item is not None:
1814 return item
1815
1816 # Offset alignment item?
1817 item = self._try_parse_align_offset()
1818
1819 if item is not None:
1820 return item
1821
1822 # Filling item?
1823 item = self._try_parse_fill_until()
1824
1825 if item is not None:
1826 return item
1827
1828 # Group item?
1829 item = self._try_parse_group()
1830
1831 if item is not None:
1832 return item
1833
1834 # Repetition block item?
1835 item = self._try_parse_rep_block()
1836
1837 if item is not None:
1838 return item
1839
1840 # Conditional block item?
1841 item = self._try_parse_cond_block()
1842
1843 if item is not None:
1844 return item
1845
1846 # Macro expansion?
1847 item = self._try_parse_macro_exp()
1848
1849 if item is not None:
1850 return item
1851
1852 # Pattern for _try_parse_rep_post()
1853 _rep_post_prefix_pat = re.compile(r"\*")
1854
1855 # Tries to parse a post-item repetition, returning the expression
1856 # string and AST expression node on success.
1857 def _try_parse_rep_post(self):
1858 # Match prefix
1859 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1860 # No match
1861 return
1862
1863 # Return expression string and AST expression
1864 self._skip_ws_and_comments()
1865 return self._expect_rep_mul_expr()
1866
1867 # Tries to parse an item, possibly followed by a repetition,
1868 # returning `True` on success.
1869 #
1870 # Appends any parsed item to `items`.
1871 def _try_append_item(self, items: List[_Item]):
1872 self._skip_ws_and_comments()
1873
1874 # Base item
1875 item = self._try_parse_base_item()
1876
1877 if item is None:
1878 return
1879
1880 # Parse repetition if the base item is repeatable
1881 if isinstance(item, _RepableItem):
1882 self._skip_ws_and_comments()
1883 rep_text_loc = self._text_loc
1884 rep_ret = self._try_parse_rep_post()
1885
1886 if rep_ret is not None:
1887 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
1888
1889 items.append(item)
1890 return True
1891
1892 # Parses and returns items, skipping whitespaces, insignificant
1893 # symbols, and comments when allowed, and stopping at the first
1894 # unknown character.
1895 #
1896 # Accepts and registers macro definitions if `accept_macro_defs`
1897 # is `True`.
1898 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1899 items = [] # type: List[_Item]
1900
1901 while self._isnt_done():
1902 # Try to append item
1903 if not self._try_append_item(items):
1904 if accept_macro_defs and self._try_parse_macro_def():
1905 continue
1906
1907 # Unknown at this point
1908 break
1909
1910 return items
1911
1912 # Parses the whole Normand input, setting `self._res` to the main
1913 # group item on success.
1914 def _parse(self):
1915 if len(self._normand.strip()) == 0:
1916 # Special case to make sure there's something to consume
1917 self._res = _Group([], self._text_loc)
1918 return
1919
1920 # Parse first level items
1921 items = self._parse_items(True)
1922
1923 # Make sure there's nothing left
1924 self._skip_ws_and_comments()
1925
1926 if self._isnt_done():
1927 self._raise_error(
1928 "Unexpected character `{}`".format(self._normand[self._at])
1929 )
1930
1931 # Set main group item
1932 self._res = _Group(items, self._text_loc)
1933
1934
1935 # The return type of parse().
1936 class ParseResult:
1937 @classmethod
1938 def _create(
1939 cls,
1940 data: bytearray,
1941 variables: VariablesT,
1942 labels: LabelsT,
1943 offset: int,
1944 bo: Optional[ByteOrder],
1945 ):
1946 self = cls.__new__(cls)
1947 self._init(data, variables, labels, offset, bo)
1948 return self
1949
1950 def __init__(self, *args, **kwargs): # type: ignore
1951 raise NotImplementedError
1952
1953 def _init(
1954 self,
1955 data: bytearray,
1956 variables: VariablesT,
1957 labels: LabelsT,
1958 offset: int,
1959 bo: Optional[ByteOrder],
1960 ):
1961 self._data = data
1962 self._vars = variables
1963 self._labels = labels
1964 self._offset = offset
1965 self._bo = bo
1966
1967 # Generated data.
1968 @property
1969 def data(self):
1970 return self._data
1971
1972 # Dictionary of updated variable names to their last computed value.
1973 @property
1974 def variables(self):
1975 return self._vars
1976
1977 # Dictionary of updated main group label names to their computed
1978 # value.
1979 @property
1980 def labels(self):
1981 return self._labels
1982
1983 # Updated offset.
1984 @property
1985 def offset(self):
1986 return self._offset
1987
1988 # Updated byte order.
1989 @property
1990 def byte_order(self):
1991 return self._bo
1992
1993
1994 # Raises a parse error for the item `item`, creating it using the
1995 # message `msg`.
1996 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1997 _raise_error(msg, item.text_loc)
1998
1999
2000 # The `ICITTE` reserved name.
2001 _icitte_name = "ICITTE"
2002
2003
2004 # Base node visitor.
2005 #
2006 # Calls the _visit_name() method for each name node which isn't the name
2007 # of a call.
2008 class _NodeVisitor(ast.NodeVisitor):
2009 def __init__(self):
2010 self._parent_is_call = False
2011
2012 def generic_visit(self, node: ast.AST):
2013 if type(node) is ast.Call:
2014 self._parent_is_call = True
2015 elif type(node) is ast.Name and not self._parent_is_call:
2016 self._visit_name(node.id)
2017
2018 super().generic_visit(node)
2019 self._parent_is_call = False
2020
2021 @abc.abstractmethod
2022 def _visit_name(self, name: str):
2023 ...
2024
2025
2026 # Expression validator: validates that all the names within the
2027 # expression are allowed.
2028 class _ExprValidator(_NodeVisitor):
2029 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2030 super().__init__()
2031 self._expr_str = expr_str
2032 self._text_loc = text_loc
2033 self._allowed_names = allowed_names
2034
2035 def _visit_name(self, name: str):
2036 # Make sure the name refers to a known and reachable
2037 # variable/label name.
2038 if name != _icitte_name and name not in self._allowed_names:
2039 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
2040 name, self._expr_str
2041 )
2042
2043 allowed_names = self._allowed_names.copy()
2044 allowed_names.add(_icitte_name)
2045
2046 if len(allowed_names) > 0:
2047 allowed_names_str = ", ".join(
2048 sorted(["`{}`".format(name) for name in allowed_names])
2049 )
2050 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2051
2052 _raise_error(
2053 msg,
2054 self._text_loc,
2055 )
2056
2057
2058 # Generator state.
2059 class _GenState:
2060 def __init__(
2061 self,
2062 variables: VariablesT,
2063 labels: LabelsT,
2064 offset: int,
2065 bo: Optional[ByteOrder],
2066 ):
2067 self.variables = variables.copy()
2068 self.labels = labels.copy()
2069 self.offset = offset
2070 self.bo = bo
2071
2072 def __repr__(self):
2073 return "_GenState({}, {}, {}, {})".format(
2074 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2075 )
2076
2077
2078 # Fixed-length number item instance.
2079 class _FlNumItemInst:
2080 def __init__(
2081 self,
2082 item: _FlNum,
2083 offset_in_data: int,
2084 state: _GenState,
2085 parse_error_msgs: List[ParseErrorMessage],
2086 ):
2087 self._item = item
2088 self._offset_in_data = offset_in_data
2089 self._state = state
2090 self._parse_error_msgs = parse_error_msgs
2091
2092 @property
2093 def item(self):
2094 return self._item
2095
2096 @property
2097 def offset_in_data(self):
2098 return self._offset_in_data
2099
2100 @property
2101 def state(self):
2102 return self._state
2103
2104 @property
2105 def parse_error_msgs(self):
2106 return self._parse_error_msgs
2107
2108
2109 # Generator of data and final state from a group item.
2110 #
2111 # Generation happens in memory at construction time. After building, use
2112 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2113 # get the resulting context.
2114 #
2115 # The steps of generation are:
2116 #
2117 # 1. Handle each item in prefix order.
2118 #
2119 # The handlers append bytes to `self._data` and update some current
2120 # state object (`_GenState` instance).
2121 #
2122 # When handling a fixed-length number item, try to evaluate its
2123 # expression using the current state. If this fails, then it might be
2124 # because the expression refers to a "future" label: save the current
2125 # offset in `self._data` (generated data) and a snapshot of the
2126 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
2127 # object). _gen_fl_num_item_insts() will deal with this later. A
2128 # `_FlNumItemInst` instance also contains a snapshot of the current
2129 # parsing error messages (`self._parse_error_msgs`) which need to be
2130 # taken into account when handling the instance later.
2131 #
2132 # When handling the items of a group, keep a map of immediate label
2133 # names to their offset. Then, after having processed all the items,
2134 # update the relevant saved state snapshots in
2135 # `self._fl_num_item_insts` with those immediate label values.
2136 # _gen_fl_num_item_insts() will deal with this later.
2137 #
2138 # 2. Handle all the fixed-length number item instances of which the
2139 # expression evaluation failed before.
2140 #
2141 # At this point, `self._fl_num_item_insts` contains everything that's
2142 # needed to evaluate the expressions, including the values of
2143 # "future" labels from the point of view of some fixed-length number
2144 # item instance.
2145 #
2146 # If an evaluation fails at this point, then it's a user error. Add
2147 # to the parsing error all the saved parsing error messages of the
2148 # instance. Those additional messages add precious context to the
2149 # error.
2150 class _Gen:
2151 def __init__(
2152 self,
2153 group: _Group,
2154 macro_defs: _MacroDefsT,
2155 variables: VariablesT,
2156 labels: LabelsT,
2157 offset: int,
2158 bo: Optional[ByteOrder],
2159 ):
2160 self._macro_defs = macro_defs
2161 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2162 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2163 self._gen(group, _GenState(variables, labels, offset, bo))
2164
2165 # Generated bytes.
2166 @property
2167 def data(self):
2168 return self._data
2169
2170 # Updated variables.
2171 @property
2172 def variables(self):
2173 return self._final_state.variables
2174
2175 # Updated main group labels.
2176 @property
2177 def labels(self):
2178 return self._final_state.labels
2179
2180 # Updated offset.
2181 @property
2182 def offset(self):
2183 return self._final_state.offset
2184
2185 # Updated byte order.
2186 @property
2187 def bo(self):
2188 return self._final_state.bo
2189
2190 # Evaluates the expression `expr` of which the original string is
2191 # `expr_str` at the location `text_loc` considering the current
2192 # generation state `state`.
2193 #
2194 # If `accept_float` is `True`, then the type of the result may be
2195 # `float` too.
2196 #
2197 # If `accept_str` is `True`, then the type of the result may be
2198 # `str` too.
2199 @staticmethod
2200 def _eval_expr(
2201 expr_str: str,
2202 expr: ast.Expression,
2203 text_loc: TextLocation,
2204 state: _GenState,
2205 accept_float: bool = False,
2206 accept_str: bool = False,
2207 ):
2208 syms = {} # type: VariablesT
2209 syms.update(state.labels)
2210
2211 # Set the `ICITTE` name to the current offset
2212 syms[_icitte_name] = state.offset
2213
2214 # Add the current variables
2215 syms.update(state.variables)
2216
2217 # Validate the node and its children
2218 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2219
2220 # Compile and evaluate expression node
2221 try:
2222 val = eval(compile(expr, "", "eval"), None, syms)
2223 except Exception as exc:
2224 _raise_error(
2225 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2226 text_loc,
2227 )
2228
2229 # Convert `bool` result type to `int` to normalize
2230 if type(val) is bool:
2231 val = int(val)
2232
2233 # Validate result type
2234 expected_types = {int} # type: Set[type]
2235
2236 if accept_float:
2237 expected_types.add(float)
2238
2239 if accept_str:
2240 expected_types.add(str)
2241
2242 if type(val) not in expected_types:
2243 expected_types_str = sorted(
2244 ["`{}`".format(t.__name__) for t in expected_types]
2245 )
2246
2247 if len(expected_types_str) == 1:
2248 msg_expected = expected_types_str[0]
2249 elif len(expected_types_str) == 2:
2250 msg_expected = " or ".join(expected_types_str)
2251 else:
2252 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2253 msg_expected = ", ".join(expected_types_str)
2254
2255 _raise_error(
2256 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2257 expr_str, msg_expected, type(val).__name__
2258 ),
2259 text_loc,
2260 )
2261
2262 return val
2263
2264 # Forwards to _eval_expr() with the expression and text location of
2265 # `item`.
2266 @staticmethod
2267 def _eval_item_expr(
2268 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
2269 state: _GenState,
2270 accept_float: bool = False,
2271 accept_str: bool = False,
2272 ):
2273 return _Gen._eval_expr(
2274 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
2275 )
2276
2277 # Handles the byte item `item`.
2278 def _handle_byte_item(self, item: _Byte, state: _GenState):
2279 self._data.append(item.val)
2280 state.offset += item.size
2281
2282 # Handles the literal string item `item`.
2283 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
2284 self._data += item.data
2285 state.offset += item.size
2286
2287 # Handles the byte order setting item `item`.
2288 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2289 # Update current byte order
2290 state.bo = item.bo
2291
2292 # Handles the variable assignment item `item`.
2293 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2294 # Update variable
2295 state.variables[item.name] = self._eval_item_expr(
2296 item, state, accept_float=True, accept_str=True
2297 )
2298
2299 # Handles the fixed-length number item `item`.
2300 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2301 # Validate current byte order
2302 if state.bo is None and item.len > 8:
2303 _raise_error_for_item(
2304 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2305 item.expr_str
2306 ),
2307 item,
2308 )
2309
2310 # Try an immediate evaluation. If it fails, then keep everything
2311 # needed to (try to) generate the bytes of this item later.
2312 try:
2313 data = self._gen_fl_num_item_inst_data(item, state)
2314 except Exception:
2315 self._fl_num_item_insts.append(
2316 _FlNumItemInst(
2317 item,
2318 len(self._data),
2319 copy.deepcopy(state),
2320 copy.deepcopy(self._parse_error_msgs),
2321 )
2322 )
2323
2324 # Reserve space in `self._data` for this instance
2325 data = bytes([0] * (item.len // 8))
2326
2327 # Append bytes
2328 self._data += data
2329
2330 # Update offset
2331 state.offset += len(data)
2332
2333 # Returns the size, in bytes, required to encode the value `val`
2334 # with LEB128 (signed version if `is_signed` is `True`).
2335 @staticmethod
2336 def _leb128_size_for_val(val: int, is_signed: bool):
2337 if val < 0:
2338 # Equivalent upper bound.
2339 #
2340 # For example, if `val` is -128, then the full integer for
2341 # this number of bits would be [-128, 127].
2342 val = -val - 1
2343
2344 # Number of bits (add one for the sign if needed)
2345 bits = val.bit_length() + int(is_signed)
2346
2347 if bits == 0:
2348 bits = 1
2349
2350 # Seven bits per byte
2351 return math.ceil(bits / 7)
2352
2353 # Handles the LEB128 integer item `item`.
2354 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2355 # Compute value
2356 val = self._eval_item_expr(item, state)
2357
2358 # Size in bytes
2359 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2360
2361 # For each byte
2362 for _ in range(size):
2363 # Seven LSBs, MSB of the byte set (continue)
2364 self._data.append((val & 0x7F) | 0x80)
2365 val >>= 7
2366
2367 # Clear MSB of last byte (stop)
2368 self._data[-1] &= ~0x80
2369
2370 # Update offset
2371 state.offset += size
2372
2373 # Handles the string item `item`.
2374 def _handle_str_item(self, item: _Str, state: _GenState):
2375 # Compute value
2376 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2377
2378 # Encode
2379 data = _encode_str(val, item.codec, item.text_loc)
2380
2381 # Add to data
2382 self._data += data
2383
2384 # Update offset
2385 state.offset += len(data)
2386
2387 # Handles the group item `item`, removing the immediate labels from
2388 # `state` at the end if `remove_immediate_labels` is `True`.
2389 def _handle_group_item(
2390 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2391 ):
2392 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2393 immediate_labels = {} # type: LabelsT
2394
2395 # Handle each item
2396 for subitem in item.items:
2397 if type(subitem) is _Label:
2398 # Add to local immediate labels
2399 immediate_labels[subitem.name] = state.offset
2400
2401 self._handle_item(subitem, state)
2402
2403 # Remove immediate labels from current state if needed
2404 if remove_immediate_labels:
2405 for name in immediate_labels:
2406 del state.labels[name]
2407
2408 # Add all immediate labels to all state snapshots since
2409 # `first_fl_num_item_inst_index`.
2410 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2411 inst.state.labels.update(immediate_labels)
2412
2413 # Handles the repetition item `item`.
2414 def _handle_rep_item(self, item: _Rep, state: _GenState):
2415 # Compute the repetition count
2416 mul = _Gen._eval_item_expr(item, state)
2417
2418 # Validate result
2419 if mul < 0:
2420 _raise_error_for_item(
2421 "Invalid expression `{}`: unexpected negative result {:,}".format(
2422 item.expr_str, mul
2423 ),
2424 item,
2425 )
2426
2427 # Generate item data `mul` times
2428 for _ in range(mul):
2429 self._handle_item(item.item, state)
2430
2431 # Handles the conditional item `item`.
2432 def _handle_cond_item(self, item: _Cond, state: _GenState):
2433 # Compute the conditional value
2434 val = _Gen._eval_item_expr(item, state)
2435
2436 # Generate item data if needed
2437 if val:
2438 self._handle_item(item.true_item, state)
2439 else:
2440 self._handle_item(item.false_item, state)
2441
2442 # Evaluates the parameters of the macro expansion item `item`
2443 # considering the initial state `init_state` and returns a new state
2444 # to handle the items of the macro.
2445 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2446 # New state
2447 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2448
2449 # Evaluate the parameter expressions
2450 macro_def = self._macro_defs[item.name]
2451
2452 for param_name, param in zip(macro_def.param_names, item.params):
2453 exp_state.variables[param_name] = _Gen._eval_expr(
2454 param.expr_str,
2455 param.expr,
2456 param.text_loc,
2457 init_state,
2458 accept_float=True,
2459 accept_str=True,
2460 )
2461
2462 return exp_state
2463
2464 # Handles the macro expansion item `item`.
2465 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2466 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2467
2468 try:
2469 # New state
2470 exp_state = self._eval_macro_exp_params(item, state)
2471
2472 # Process the contained group
2473 init_data_size = len(self._data)
2474 parse_error_msg = (
2475 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2476 parse_error_msg_text, item.text_loc
2477 )
2478 )
2479 self._parse_error_msgs.append(parse_error_msg)
2480 self._handle_item(self._macro_defs[item.name].group, exp_state)
2481 self._parse_error_msgs.pop()
2482 except ParseError as exc:
2483 _augment_error(exc, parse_error_msg_text, item.text_loc)
2484
2485 # Update state offset and return
2486 state.offset += len(self._data) - init_data_size
2487
2488 # Handles the offset setting item `item`.
2489 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2490 state.offset = item.val
2491
2492 # Handles the offset alignment item `item` (adds padding).
2493 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2494 init_offset = state.offset
2495 align_bytes = item.val // 8
2496 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2497 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2498
2499 # Handles the filling item `item` (adds padding).
2500 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2501 # Compute the new offset
2502 new_offset = _Gen._eval_item_expr(item, state)
2503
2504 # Validate the new offset
2505 if new_offset < state.offset:
2506 _raise_error_for_item(
2507 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2508 item.expr_str, new_offset, state.offset
2509 ),
2510 item,
2511 )
2512
2513 # Fill
2514 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2515
2516 # Update offset
2517 state.offset = new_offset
2518
2519 # Handles the label item `item`.
2520 def _handle_label_item(self, item: _Label, state: _GenState):
2521 state.labels[item.name] = state.offset
2522
2523 # Handles the item `item`, returning the updated next repetition
2524 # instance.
2525 def _handle_item(self, item: _Item, state: _GenState):
2526 return self._item_handlers[type(item)](item, state)
2527
2528 # Generates the data for a fixed-length integer item instance having
2529 # the value `val` and returns it.
2530 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2531 # Validate range
2532 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2533 _raise_error_for_item(
2534 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2535 val, item.len, item.expr_str
2536 ),
2537 item,
2538 )
2539
2540 # Encode result on 64 bits (to extend the sign bit whatever the
2541 # value of `item.len`).
2542 data = struct.pack(
2543 "{}{}".format(
2544 ">" if state.bo in (None, ByteOrder.BE) else "<",
2545 "Q" if val >= 0 else "q",
2546 ),
2547 val,
2548 )
2549
2550 # Keep only the requested length
2551 len_bytes = item.len // 8
2552
2553 if state.bo in (None, ByteOrder.BE):
2554 # Big endian: keep last bytes
2555 data = data[-len_bytes:]
2556 else:
2557 # Little endian: keep first bytes
2558 assert state.bo == ByteOrder.LE
2559 data = data[:len_bytes]
2560
2561 # Return data
2562 return data
2563
2564 # Generates the data for a fixed-length floating point number item
2565 # instance having the value `val` and returns it.
2566 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2567 # Validate length
2568 if item.len not in (32, 64):
2569 _raise_error_for_item(
2570 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2571 item.len, val
2572 ),
2573 item,
2574 )
2575
2576 # Encode and return result
2577 return struct.pack(
2578 "{}{}".format(
2579 ">" if state.bo in (None, ByteOrder.BE) else "<",
2580 "f" if item.len == 32 else "d",
2581 ),
2582 val,
2583 )
2584
2585 # Generates the data for a fixed-length number item instance and
2586 # returns it.
2587 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2588 # Compute value
2589 val = self._eval_item_expr(item, state, True)
2590
2591 # Handle depending on type
2592 if type(val) is int:
2593 return self._gen_fl_int_item_inst_data(val, item, state)
2594 else:
2595 assert type(val) is float
2596 return self._gen_fl_float_item_inst_data(val, item, state)
2597
2598 # Generates the data for all the fixed-length number item instances
2599 # and writes it at the correct offset within `self._data`.
2600 def _gen_fl_num_item_insts(self):
2601 for inst in self._fl_num_item_insts:
2602 # Generate bytes
2603 try:
2604 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2605 except ParseError as exc:
2606 # Add all the saved parse error messages for this
2607 # instance.
2608 for msg in reversed(inst.parse_error_msgs):
2609 _add_error_msg(exc, msg.text, msg.text_location)
2610
2611 raise
2612
2613 # Insert bytes into `self._data`
2614 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2615
2616 # Generates the data (`self._data`) and final state
2617 # (`self._final_state`) from `group` and the initial state `state`.
2618 def _gen(self, group: _Group, state: _GenState):
2619 # Initial state
2620 self._data = bytearray()
2621
2622 # Item handlers
2623 self._item_handlers = {
2624 _AlignOffset: self._handle_align_offset_item,
2625 _Byte: self._handle_byte_item,
2626 _Cond: self._handle_cond_item,
2627 _FillUntil: self._handle_fill_until_item,
2628 _FlNum: self._handle_fl_num_item,
2629 _Group: self._handle_group_item,
2630 _Label: self._handle_label_item,
2631 _LitStr: self._handle_lit_str_item,
2632 _MacroExp: self._handle_macro_exp_item,
2633 _Rep: self._handle_rep_item,
2634 _SetBo: self._handle_set_bo_item,
2635 _SetOffset: self._handle_set_offset_item,
2636 _SLeb128Int: self._handle_leb128_int_item,
2637 _Str: self._handle_str_item,
2638 _ULeb128Int: self._handle_leb128_int_item,
2639 _VarAssign: self._handle_var_assign_item,
2640 } # type: Dict[type, Callable[[Any, _GenState], None]]
2641
2642 # Handle the group item, _not_ removing the immediate labels
2643 # because the `labels` property offers them.
2644 self._handle_group_item(group, state, False)
2645
2646 # This is actually the final state
2647 self._final_state = state
2648
2649 # Generate all the fixed-length number bytes now that we know
2650 # their full state
2651 self._gen_fl_num_item_insts()
2652
2653
2654 # Returns a `ParseResult` instance containing the bytes encoded by the
2655 # input string `normand`.
2656 #
2657 # `init_variables` is a dictionary of initial variable names (valid
2658 # Python names) to integral values. A variable name must not be the
2659 # reserved name `ICITTE`.
2660 #
2661 # `init_labels` is a dictionary of initial label names (valid Python
2662 # names) to integral values. A label name must not be the reserved name
2663 # `ICITTE`.
2664 #
2665 # `init_offset` is the initial offset.
2666 #
2667 # `init_byte_order` is the initial byte order.
2668 #
2669 # Raises `ParseError` on any parsing error.
2670 def parse(
2671 normand: str,
2672 init_variables: Optional[VariablesT] = None,
2673 init_labels: Optional[LabelsT] = None,
2674 init_offset: int = 0,
2675 init_byte_order: Optional[ByteOrder] = None,
2676 ):
2677 if init_variables is None:
2678 init_variables = {}
2679
2680 if init_labels is None:
2681 init_labels = {}
2682
2683 parser = _Parser(normand, init_variables, init_labels)
2684 gen = _Gen(
2685 parser.res,
2686 parser.macro_defs,
2687 init_variables,
2688 init_labels,
2689 init_offset,
2690 init_byte_order,
2691 )
2692 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2693 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2694 )
2695
2696
2697 # Raises a command-line error with the message `msg`.
2698 def _raise_cli_error(msg: str) -> NoReturn:
2699 raise RuntimeError("Command-line error: {}".format(msg))
2700
2701
2702 # Returns the `int` or `float` value out of a CLI assignment value.
2703 def _val_from_assign_val_str(s: str, is_label: bool):
2704 s = s.strip()
2705
2706 # Floating point number?
2707 if not is_label:
2708 m = _const_float_pat.fullmatch(s)
2709
2710 if m is not None:
2711 return float(m.group(0))
2712
2713 # Integer?
2714 m = _const_int_pat.fullmatch(s)
2715
2716 if m is not None:
2717 return int(_norm_const_int(m.group(0)), 0)
2718
2719 exp = "an integer" if is_label else "a number"
2720 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2721
2722
2723 # Returns a dictionary of string to numbers from the list of strings
2724 # `args` containing `NAME=VAL` entries.
2725 def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
2726 d = {} # type: VariablesT
2727
2728 if args is None:
2729 return d
2730
2731 for arg in args:
2732 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
2733
2734 if m is None:
2735 _raise_cli_error("Invalid assignment `{}`".format(arg))
2736
2737 if is_str_only:
2738 val = m.group(2)
2739 else:
2740 val = _val_from_assign_val_str(m.group(2), is_label)
2741
2742 d[m.group(1)] = val
2743
2744 return d
2745
2746
2747 # Parses the command-line arguments and returns, in this order:
2748 #
2749 # 1. The input file path, or `None` if none.
2750 # 2. The Normand input text.
2751 # 3. The initial offset.
2752 # 4. The initial byte order.
2753 # 5. The initial variables.
2754 # 6. The initial labels.
2755 def _parse_cli_args():
2756 import argparse
2757
2758 # Build parser
2759 ap = argparse.ArgumentParser()
2760 ap.add_argument(
2761 "--offset",
2762 metavar="OFFSET",
2763 action="store",
2764 type=int,
2765 default=0,
2766 help="initial offset (positive)",
2767 )
2768 ap.add_argument(
2769 "-b",
2770 "--byte-order",
2771 metavar="BO",
2772 choices=["be", "le"],
2773 type=str,
2774 help="initial byte order (`be` or `le`)",
2775 )
2776 ap.add_argument(
2777 "-v",
2778 "--var",
2779 metavar="NAME=VAL",
2780 action="append",
2781 help="add an initial numeric variable (may be repeated)",
2782 )
2783 ap.add_argument(
2784 "-s",
2785 "--var-str",
2786 metavar="NAME=VAL",
2787 action="append",
2788 help="add an initial string variable (may be repeated)",
2789 )
2790 ap.add_argument(
2791 "-l",
2792 "--label",
2793 metavar="NAME=VAL",
2794 action="append",
2795 help="add an initial label (may be repeated)",
2796 )
2797 ap.add_argument(
2798 "--version", action="version", version="Normand {}".format(__version__)
2799 )
2800 ap.add_argument(
2801 "path",
2802 metavar="PATH",
2803 action="store",
2804 nargs="?",
2805 help="input path (none means standard input)",
2806 )
2807
2808 # Parse
2809 args = ap.parse_args()
2810
2811 # Read input
2812 if args.path is None:
2813 normand = sys.stdin.read()
2814 else:
2815 with open(args.path) as f:
2816 normand = f.read()
2817
2818 # Variables and labels
2819 variables = _dict_from_arg(args.var, False, False)
2820 variables.update(_dict_from_arg(args.var_str, False, True))
2821 labels = _dict_from_arg(args.label, True, False)
2822
2823 # Validate offset
2824 if args.offset < 0:
2825 _raise_cli_error("Invalid negative offset {}")
2826
2827 # Validate and set byte order
2828 bo = None # type: Optional[ByteOrder]
2829
2830 if args.byte_order is not None:
2831 if args.byte_order == "be":
2832 bo = ByteOrder.BE
2833 else:
2834 assert args.byte_order == "le"
2835 bo = ByteOrder.LE
2836
2837 # Return input and initial state
2838 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
2839
2840
2841 # CLI entry point without exception handling.
2842 def _run_cli_with_args(
2843 normand: str,
2844 offset: int,
2845 bo: Optional[ByteOrder],
2846 variables: VariablesT,
2847 labels: LabelsT,
2848 ):
2849 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2850
2851
2852 # Prints the exception message `msg` and exits with status 1.
2853 def _fail(msg: str) -> NoReturn:
2854 if not msg.endswith("."):
2855 msg += "."
2856
2857 print(msg.strip(), file=sys.stderr)
2858 sys.exit(1)
2859
2860
2861 # CLI entry point.
2862 def _run_cli():
2863 try:
2864 args = _parse_cli_args()
2865 except Exception as exc:
2866 _fail(str(exc))
2867
2868 try:
2869 _run_cli_with_args(*args[1:])
2870 except ParseError as exc:
2871 import os.path
2872
2873 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2874 fail_msg = ""
2875
2876 for msg in reversed(exc.messages):
2877 fail_msg += "{}{}:{} - {}".format(
2878 prefix,
2879 msg.text_location.line_no,
2880 msg.text_location.col_no,
2881 msg.text,
2882 )
2883
2884 if fail_msg[-1] not in ".:;":
2885 fail_msg += "."
2886
2887 fail_msg += "\n"
2888
2889 _fail(fail_msg.strip())
2890 except Exception as exc:
2891 _fail(str(exc))
2892
2893
2894 if __name__ == "__main__":
2895 _run_cli()
This page took 0.089763 seconds and 4 git commands to generate.