Add transformation block support
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.21.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import bz2
51 import sys
52 import copy
53 import enum
54 import gzip
55 import math
56 import base64
57 import quopri
58 import struct
59 import typing
60 import functools
61 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
62
63
64 # Text location (line and column numbers).
65 class TextLocation:
66 @classmethod
67 def _create(cls, line_no: int, col_no: int):
68 self = cls.__new__(cls)
69 self._init(line_no, col_no)
70 return self
71
72 def __init__(*args, **kwargs): # type: ignore
73 raise NotImplementedError
74
75 def _init(self, line_no: int, col_no: int):
76 self._line_no = line_no
77 self._col_no = col_no
78
79 # Line number.
80 @property
81 def line_no(self):
82 return self._line_no
83
84 # Column number.
85 @property
86 def col_no(self):
87 return self._col_no
88
89 def __repr__(self):
90 return "TextLocation({}, {})".format(self._line_no, self._col_no)
91
92
93 # Any item.
94 class _Item:
95 def __init__(self, text_loc: TextLocation):
96 self._text_loc = text_loc
97
98 # Source text location.
99 @property
100 def text_loc(self):
101 return self._text_loc
102
103
104 # Scalar item.
105 class _ScalarItem(_Item):
106 # Returns the size, in bytes, of this item.
107 @property
108 @abc.abstractmethod
109 def size(self) -> int:
110 ...
111
112
113 # A repeatable item.
114 class _RepableItem:
115 pass
116
117
118 # Single byte.
119 class _Byte(_ScalarItem, _RepableItem):
120 def __init__(self, val: int, text_loc: TextLocation):
121 super().__init__(text_loc)
122 self._val = val
123
124 # Byte value.
125 @property
126 def val(self):
127 return self._val
128
129 @property
130 def size(self):
131 return 1
132
133 def __repr__(self):
134 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
135
136
137 # Literal string.
138 class _LitStr(_ScalarItem, _RepableItem):
139 def __init__(self, data: bytes, text_loc: TextLocation):
140 super().__init__(text_loc)
141 self._data = data
142
143 # Encoded bytes.
144 @property
145 def data(self):
146 return self._data
147
148 @property
149 def size(self):
150 return len(self._data)
151
152 def __repr__(self):
153 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
154
155
156 # Byte order.
157 @enum.unique
158 class ByteOrder(enum.Enum):
159 # Big endian.
160 BE = "be"
161
162 # Little endian.
163 LE = "le"
164
165
166 # Byte order setting.
167 class _SetBo(_Item):
168 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
169 super().__init__(text_loc)
170 self._bo = bo
171
172 @property
173 def bo(self):
174 return self._bo
175
176 def __repr__(self):
177 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
178
179
180 # Label.
181 class _Label(_Item):
182 def __init__(self, name: str, text_loc: TextLocation):
183 super().__init__(text_loc)
184 self._name = name
185
186 # Label name.
187 @property
188 def name(self):
189 return self._name
190
191 def __repr__(self):
192 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
193
194
195 # Offset setting.
196 class _SetOffset(_Item):
197 def __init__(self, val: int, text_loc: TextLocation):
198 super().__init__(text_loc)
199 self._val = val
200
201 # Offset value (bytes).
202 @property
203 def val(self):
204 return self._val
205
206 def __repr__(self):
207 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
208
209
210 # Offset alignment.
211 class _AlignOffset(_Item):
212 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
213 super().__init__(text_loc)
214 self._val = val
215 self._pad_val = pad_val
216
217 # Alignment value (bits).
218 @property
219 def val(self):
220 return self._val
221
222 # Padding byte value.
223 @property
224 def pad_val(self):
225 return self._pad_val
226
227 def __repr__(self):
228 return "_AlignOffset({}, {}, {})".format(
229 repr(self._val), repr(self._pad_val), repr(self._text_loc)
230 )
231
232
233 # Mixin of containing an AST expression and its string.
234 class _ExprMixin:
235 def __init__(self, expr_str: str, expr: ast.Expression):
236 self._expr_str = expr_str
237 self._expr = expr
238
239 # Expression string.
240 @property
241 def expr_str(self):
242 return self._expr_str
243
244 # Expression node to evaluate.
245 @property
246 def expr(self):
247 return self._expr
248
249
250 # Fill until some offset.
251 class _FillUntil(_Item, _ExprMixin):
252 def __init__(
253 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
254 ):
255 super().__init__(text_loc)
256 _ExprMixin.__init__(self, expr_str, expr)
257 self._pad_val = pad_val
258
259 # Padding byte value.
260 @property
261 def pad_val(self):
262 return self._pad_val
263
264 def __repr__(self):
265 return "_FillUntil({}, {}, {}, {})".format(
266 repr(self._expr_str),
267 repr(self._expr),
268 repr(self._pad_val),
269 repr(self._text_loc),
270 )
271
272
273 # Variable assignment.
274 class _VarAssign(_Item, _ExprMixin):
275 def __init__(
276 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
277 ):
278 super().__init__(text_loc)
279 _ExprMixin.__init__(self, expr_str, expr)
280 self._name = name
281
282 # Name.
283 @property
284 def name(self):
285 return self._name
286
287 def __repr__(self):
288 return "_VarAssign({}, {}, {}, {})".format(
289 repr(self._name),
290 repr(self._expr_str),
291 repr(self._expr),
292 repr(self._text_loc),
293 )
294
295
296 # Fixed-length number, possibly needing more than one byte.
297 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
298 def __init__(
299 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
300 ):
301 super().__init__(text_loc)
302 _ExprMixin.__init__(self, expr_str, expr)
303 self._len = len
304
305 # Length (bits).
306 @property
307 def len(self):
308 return self._len
309
310 @property
311 def size(self):
312 return self._len // 8
313
314 def __repr__(self):
315 return "_FlNum({}, {}, {}, {})".format(
316 repr(self._expr_str),
317 repr(self._expr),
318 repr(self._len),
319 repr(self._text_loc),
320 )
321
322
323 # LEB128 integer.
324 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
325 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
326 super().__init__(text_loc)
327 _ExprMixin.__init__(self, expr_str, expr)
328
329 def __repr__(self):
330 return "{}({}, {}, {})".format(
331 self.__class__.__name__,
332 repr(self._expr_str),
333 repr(self._expr),
334 repr(self._text_loc),
335 )
336
337
338 # Unsigned LEB128 integer.
339 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343 # Signed LEB128 integer.
344 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
345 pass
346
347
348 # String.
349 class _Str(_Item, _RepableItem, _ExprMixin):
350 def __init__(
351 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
352 ):
353 super().__init__(text_loc)
354 _ExprMixin.__init__(self, expr_str, expr)
355 self._codec = codec
356
357 # Codec name.
358 @property
359 def codec(self):
360 return self._codec
361
362 def __repr__(self):
363 return "_Str({}, {}, {}, {})".format(
364 repr(self._expr_str),
365 repr(self._expr),
366 repr(self._codec),
367 repr(self._text_loc),
368 )
369
370
371 # Group of items.
372 class _Group(_Item, _RepableItem):
373 def __init__(self, items: List[_Item], text_loc: TextLocation):
374 super().__init__(text_loc)
375 self._items = items
376
377 # Contained items.
378 @property
379 def items(self):
380 return self._items
381
382 def __repr__(self):
383 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
384
385
386 # Repetition item.
387 class _Rep(_Group, _ExprMixin):
388 def __init__(
389 self,
390 items: List[_Item],
391 expr_str: str,
392 expr: ast.Expression,
393 text_loc: TextLocation,
394 ):
395 super().__init__(items, text_loc)
396 _ExprMixin.__init__(self, expr_str, expr)
397
398 def __repr__(self):
399 return "_Rep({}, {}, {}, {})".format(
400 repr(self._items),
401 repr(self._expr_str),
402 repr(self._expr),
403 repr(self._text_loc),
404 )
405
406
407 # Conditional item.
408 class _Cond(_Item, _ExprMixin):
409 def __init__(
410 self,
411 true_item: _Group,
412 false_item: _Group,
413 expr_str: str,
414 expr: ast.Expression,
415 text_loc: TextLocation,
416 ):
417 super().__init__(text_loc)
418 _ExprMixin.__init__(self, expr_str, expr)
419 self._true_item = true_item
420 self._false_item = false_item
421
422 # Item when condition is true.
423 @property
424 def true_item(self):
425 return self._true_item
426
427 # Item when condition is false.
428 @property
429 def false_item(self):
430 return self._false_item
431
432 def __repr__(self):
433 return "_Cond({}, {}, {}, {}, {})".format(
434 repr(self._true_item),
435 repr(self._false_item),
436 repr(self._expr_str),
437 repr(self._expr),
438 repr(self._text_loc),
439 )
440
441
442 # Transformation.
443 class _Trans(_Group, _RepableItem):
444 def __init__(
445 self,
446 items: List[_Item],
447 name: str,
448 func: Callable[[Union[bytes, bytearray]], bytes],
449 text_loc: TextLocation,
450 ):
451 super().__init__(items, text_loc)
452 self._name = name
453 self._func = func
454
455 @property
456 def name(self):
457 return self._name
458
459 # Transforms the data `data`.
460 def trans(self, data: Union[bytes, bytearray]):
461 return self._func(data)
462
463 def __repr__(self):
464 return "_Trans({}, {}, {}, {})".format(
465 repr(self._items),
466 repr(self._name),
467 repr(self._func),
468 repr(self._text_loc),
469 )
470
471
472 # Macro definition item.
473 class _MacroDef(_Group):
474 def __init__(
475 self,
476 name: str,
477 param_names: List[str],
478 items: List[_Item],
479 text_loc: TextLocation,
480 ):
481 super().__init__(items, text_loc)
482 self._name = name
483 self._param_names = param_names
484
485 # Name.
486 @property
487 def name(self):
488 return self._name
489
490 # Parameters.
491 @property
492 def param_names(self):
493 return self._param_names
494
495 def __repr__(self):
496 return "_MacroDef({}, {}, {}, {})".format(
497 repr(self._name),
498 repr(self._param_names),
499 repr(self._items),
500 repr(self._text_loc),
501 )
502
503
504 # Macro expansion parameter.
505 class _MacroExpParam:
506 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
507 self._expr_str = expr_str
508 self._expr = expr
509 self._text_loc = text_loc
510
511 # Expression string.
512 @property
513 def expr_str(self):
514 return self._expr_str
515
516 # Expression.
517 @property
518 def expr(self):
519 return self._expr
520
521 # Source text location.
522 @property
523 def text_loc(self):
524 return self._text_loc
525
526 def __repr__(self):
527 return "_MacroExpParam({}, {}, {})".format(
528 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
529 )
530
531
532 # Macro expansion item.
533 class _MacroExp(_Item, _RepableItem):
534 def __init__(
535 self,
536 name: str,
537 params: List[_MacroExpParam],
538 text_loc: TextLocation,
539 ):
540 super().__init__(text_loc)
541 self._name = name
542 self._params = params
543
544 # Name.
545 @property
546 def name(self):
547 return self._name
548
549 # Parameters.
550 @property
551 def params(self):
552 return self._params
553
554 def __repr__(self):
555 return "_MacroExp({}, {}, {})".format(
556 repr(self._name),
557 repr(self._params),
558 repr(self._text_loc),
559 )
560
561
562 # A parsing error message: a string and a text location.
563 class ParseErrorMessage:
564 @classmethod
565 def _create(cls, text: str, text_loc: TextLocation):
566 self = cls.__new__(cls)
567 self._init(text, text_loc)
568 return self
569
570 def __init__(self, *args, **kwargs): # type: ignore
571 raise NotImplementedError
572
573 def _init(self, text: str, text_loc: TextLocation):
574 self._text = text
575 self._text_loc = text_loc
576
577 # Message text.
578 @property
579 def text(self):
580 return self._text
581
582 # Source text location.
583 @property
584 def text_location(self):
585 return self._text_loc
586
587
588 # A parsing error containing one or more messages (`ParseErrorMessage`).
589 class ParseError(RuntimeError):
590 @classmethod
591 def _create(cls, msg: str, text_loc: TextLocation):
592 self = cls.__new__(cls)
593 self._init(msg, text_loc)
594 return self
595
596 def __init__(self, *args, **kwargs): # type: ignore
597 raise NotImplementedError
598
599 def _init(self, msg: str, text_loc: TextLocation):
600 super().__init__(msg)
601 self._msgs = [] # type: List[ParseErrorMessage]
602 self._add_msg(msg, text_loc)
603
604 def _add_msg(self, msg: str, text_loc: TextLocation):
605 self._msgs.append(
606 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
607 msg, text_loc
608 )
609 )
610
611 # Parsing error messages.
612 #
613 # The first message is the most specific one.
614 @property
615 def messages(self):
616 return self._msgs
617
618
619 # Raises a parsing error, forwarding the parameters to the constructor.
620 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
621 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
622
623
624 # Adds a message to the parsing error `exc`.
625 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
626 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
627
628
629 # Appends a message to the parsing error `exc` and reraises it.
630 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
631 _add_error_msg(exc, msg, text_loc)
632 raise exc
633
634
635 # Returns a normalized version (so as to be parseable by int()) of
636 # the constant integer string `s`, possibly negative, dealing with
637 # any radix suffix.
638 def _norm_const_int(s: str):
639 neg = ""
640 pos = s
641
642 if s.startswith("-"):
643 neg = "-"
644 pos = s[1:]
645
646 for r in "xXoObB":
647 if pos.startswith("0" + r):
648 # Already correct
649 return s
650
651 # Try suffix
652 asm_suf_base = {
653 "h": "x",
654 "H": "x",
655 "q": "o",
656 "Q": "o",
657 "o": "o",
658 "O": "o",
659 "b": "b",
660 "B": "B",
661 }
662
663 for suf in asm_suf_base:
664 if pos[-1] == suf:
665 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
666
667 return s
668
669
670 # Encodes the string `s` using the codec `codec`, raising `ParseError`
671 # with `text_loc` on encoding error.
672 def _encode_str(s: str, codec: str, text_loc: TextLocation):
673 try:
674 return s.encode(codec)
675 except UnicodeEncodeError:
676 _raise_error(
677 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
678 )
679
680
681 # Variables dictionary type (for type hints).
682 VariablesT = Dict[str, Union[int, float, str]]
683
684
685 # Labels dictionary type (for type hints).
686 LabelsT = Dict[str, int]
687
688
689 # Common patterns.
690 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
691 _pos_const_int_pat = re.compile(
692 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
693 )
694 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
695 _const_float_pat = re.compile(
696 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
697 )
698
699
700 # Macro definition dictionary.
701 _MacroDefsT = Dict[str, _MacroDef]
702
703
704 # Normand parser.
705 #
706 # The constructor accepts a Normand input. After building, use the `res`
707 # property to get the resulting main group.
708 class _Parser:
709 # Builds a parser to parse the Normand input `normand`, parsing
710 # immediately.
711 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
712 self._normand = normand
713 self._at = 0
714 self._line_no = 1
715 self._col_no = 1
716 self._label_names = set(labels.keys())
717 self._var_names = set(variables.keys())
718 self._macro_defs = {} # type: _MacroDefsT
719 self._parse()
720
721 # Result (main group).
722 @property
723 def res(self):
724 return self._res
725
726 # Macro definitions.
727 @property
728 def macro_defs(self):
729 return self._macro_defs
730
731 # Current text location.
732 @property
733 def _text_loc(self):
734 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
735 self._line_no, self._col_no
736 )
737
738 # Returns `True` if this parser is done parsing.
739 def _is_done(self):
740 return self._at == len(self._normand)
741
742 # Returns `True` if this parser isn't done parsing.
743 def _isnt_done(self):
744 return not self._is_done()
745
746 # Raises a parse error, creating it using the message `msg` and the
747 # current text location.
748 def _raise_error(self, msg: str) -> NoReturn:
749 _raise_error(msg, self._text_loc)
750
751 # Tries to make the pattern `pat` match the current substring,
752 # returning the match object and updating `self._at`,
753 # `self._line_no`, and `self._col_no` on success.
754 def _try_parse_pat(self, pat: Pattern[str]):
755 m = pat.match(self._normand, self._at)
756
757 if m is None:
758 return
759
760 # Skip matched string
761 self._at += len(m.group(0))
762
763 # Update line number
764 self._line_no += m.group(0).count("\n")
765
766 # Update column number
767 for i in reversed(range(self._at)):
768 if self._normand[i] == "\n" or i == 0:
769 if i == 0:
770 self._col_no = self._at + 1
771 else:
772 self._col_no = self._at - i
773
774 break
775
776 # Return match object
777 return m
778
779 # Expects the pattern `pat` to match the current substring,
780 # returning the match object and updating `self._at`,
781 # `self._line_no`, and `self._col_no` on success, or raising a parse
782 # error with the message `error_msg` on error.
783 def _expect_pat(self, pat: Pattern[str], error_msg: str):
784 # Match
785 m = self._try_parse_pat(pat)
786
787 if m is None:
788 # No match: error
789 self._raise_error(error_msg)
790
791 # Return match object
792 return m
793
794 # Patterns for _skip_*()
795 _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M)
796 _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M)
797 _ws_or_syms_or_comments_pat = re.compile(
798 r"(?:[\s/\\?&:;.,[\]_=|-]|{})*".format(_comment_pat.pattern), re.M
799 )
800
801 # Skips as many whitespaces and comments as possible, but not
802 # insignificant symbol characters.
803 def _skip_ws_and_comments(self):
804 self._try_parse_pat(self._ws_or_comments_pat)
805
806 # Skips as many whitespaces, insignificant symbol characters, and
807 # comments as possible.
808 def _skip_ws_and_comments_and_syms(self):
809 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
810
811 # Pattern for _try_parse_hex_byte()
812 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
813
814 # Tries to parse a hexadecimal byte, returning a byte item on
815 # success.
816 def _try_parse_hex_byte(self):
817 begin_text_loc = self._text_loc
818
819 # Match initial nibble
820 m_high = self._try_parse_pat(self._nibble_pat)
821
822 if m_high is None:
823 # No match
824 return
825
826 # Expect another nibble
827 self._skip_ws_and_comments_and_syms()
828 m_low = self._expect_pat(
829 self._nibble_pat, "Expecting another hexadecimal nibble"
830 )
831
832 # Return item
833 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
834
835 # Patterns for _try_parse_bin_byte()
836 _bin_byte_bit_pat = re.compile(r"[01]")
837 _bin_byte_prefix_pat = re.compile(r"%+")
838
839 # Tries to parse a binary byte, returning a byte item on success.
840 def _try_parse_bin_byte(self):
841 begin_text_loc = self._text_loc
842
843 # Match prefix
844 m = self._try_parse_pat(self._bin_byte_prefix_pat)
845
846 if m is None:
847 # No match
848 return
849
850 # Expect as many bytes as there are `%` prefixes
851 items = [] # type: List[_Item]
852
853 for _ in range(len(m.group(0))):
854 self._skip_ws_and_comments_and_syms()
855 byte_text_loc = self._text_loc
856 bits = [] # type: List[str]
857
858 # Expect eight bits
859 for _ in range(8):
860 self._skip_ws_and_comments_and_syms()
861 m = self._expect_pat(
862 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
863 )
864 bits.append(m.group(0))
865
866 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
867
868 # Return item
869 if len(items) == 1:
870 return items[0]
871
872 # As group
873 return _Group(items, begin_text_loc)
874
875 # Patterns for _try_parse_dec_byte()
876 _dec_byte_prefix_pat = re.compile(r"\$")
877 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
878
879 # Tries to parse a decimal byte, returning a byte item on success.
880 def _try_parse_dec_byte(self):
881 begin_text_loc = self._text_loc
882
883 # Match prefix
884 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
885 # No match
886 return
887
888 # Expect the value
889 self._skip_ws_and_comments()
890 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
891
892 # Compute value
893 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
894
895 # Validate
896 if val < -128 or val > 255:
897 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
898
899 # Two's complement
900 val %= 256
901
902 # Return item
903 return _Byte(val, begin_text_loc)
904
905 # Tries to parse a byte, returning a byte item on success.
906 def _try_parse_byte(self):
907 # Hexadecimal
908 item = self._try_parse_hex_byte()
909
910 if item is not None:
911 return item
912
913 # Binary
914 item = self._try_parse_bin_byte()
915
916 if item is not None:
917 return item
918
919 # Decimal
920 item = self._try_parse_dec_byte()
921
922 if item is not None:
923 return item
924
925 # Strings corresponding to escape sequence characters
926 _lit_str_escape_seq_strs = {
927 "0": "\0",
928 "a": "\a",
929 "b": "\b",
930 "e": "\x1b",
931 "f": "\f",
932 "n": "\n",
933 "r": "\r",
934 "t": "\t",
935 "v": "\v",
936 "\\": "\\",
937 '"': '"',
938 }
939
940 # Patterns for _try_parse_lit_str()
941 _lit_str_prefix_suffix_pat = re.compile(r'"')
942 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
943
944 # Parses a literal string between double quotes (without an encoding
945 # prefix) and returns the resulting string.
946 def _try_parse_lit_str(self, with_prefix: bool):
947 # Match prefix if needed
948 if with_prefix:
949 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
950 # No match
951 return
952
953 # Expect literal string
954 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
955
956 # Expect end of string
957 self._expect_pat(
958 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
959 )
960
961 # Replace escape sequences
962 val = m.group(0)
963
964 for ec in '0abefnrtv"\\':
965 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
966
967 # Return string
968 return val
969
970 # Patterns for _try_parse_utf_str_encoding()
971 _str_encoding_utf_prefix_pat = re.compile(r"u")
972 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
973
974 # Tries to parse a UTF encoding specification, returning the Python
975 # codec name on success.
976 def _try_parse_utf_str_encoding(self):
977 # Match prefix
978 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
979 # No match
980 return
981
982 # Expect UTF specification
983 m = self._expect_pat(
984 self._str_encoding_utf_pat,
985 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
986 )
987
988 # Convert to codec name
989 return {
990 "8": "utf_8",
991 "16be": "utf_16_be",
992 "16le": "utf_16_le",
993 "32be": "utf_32_be",
994 "32le": "utf_32_le",
995 }[m.group(0)]
996
997 # Patterns for _try_parse_str_encoding()
998 _str_encoding_gen_prefix_pat = re.compile(r"s")
999 _str_encoding_colon_pat = re.compile(r":")
1000 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
1001
1002 # Tries to parse a string encoding specification, returning the
1003 # Python codec name on success.
1004 #
1005 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
1006 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
1007 # General prefix?
1008 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
1009 # Expect `:`
1010 self._skip_ws_and_comments()
1011 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
1012
1013 # Expect encoding specification
1014 self._skip_ws_and_comments()
1015
1016 # UTF?
1017 codec = self._try_parse_utf_str_encoding()
1018
1019 if codec is not None:
1020 return codec
1021
1022 # Expect Latin
1023 m = self._expect_pat(
1024 self._str_encoding_non_utf_pat,
1025 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
1026 )
1027 return m.group(0)
1028
1029 # UTF?
1030 if not req_gen_prefix:
1031 return self._try_parse_utf_str_encoding()
1032
1033 # Patterns for _try_parse_str()
1034 _lit_str_prefix_pat = re.compile(r'"')
1035 _str_prefix_pat = re.compile(r'"|\{')
1036 _str_expr_pat = re.compile(r"[^}]+")
1037 _str_expr_suffix_pat = re.compile(r"\}")
1038
1039 # Tries to parse a string, returning a literal string or string item
1040 # on success.
1041 def _try_parse_str(self):
1042 begin_text_loc = self._text_loc
1043
1044 # Encoding
1045 codec = self._try_parse_str_encoding()
1046
1047 # Match prefix (expect if there's an encoding specification)
1048 self._skip_ws_and_comments()
1049
1050 if codec is None:
1051 # No encoding: only a literal string (UTF-8) is legal
1052 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1053
1054 if m_prefix is None:
1055 return
1056 else:
1057 # Encoding present: expect a string prefix
1058 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1059
1060 # Literal string or expression?
1061 prefix = m_prefix.group(0)
1062
1063 if prefix == '"':
1064 # Expect literal string
1065 str_text_loc = self._text_loc
1066 val = self._try_parse_lit_str(False)
1067
1068 if val is None:
1069 self._raise_error("Expecting a literal string")
1070
1071 # Encode string
1072 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1073
1074 # Return item
1075 return _LitStr(data, begin_text_loc)
1076 else:
1077 # Expect expression
1078 self._skip_ws_and_comments()
1079 expr_text_loc = self._text_loc
1080 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1081
1082 # Expect `}`
1083 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1084
1085 # Create an expression node from the expression string
1086 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1087
1088 # Return item
1089 assert codec is not None
1090 return _Str(expr_str, expr, codec, begin_text_loc)
1091
1092 # Common right parenthesis pattern
1093 _right_paren_pat = re.compile(r"\)")
1094
1095 # Patterns for _try_parse_group()
1096 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
1097
1098 # Tries to parse a group, returning a group item on success.
1099 def _try_parse_group(self):
1100 begin_text_loc = self._text_loc
1101
1102 # Match prefix
1103 m_open = self._try_parse_pat(self._group_prefix_pat)
1104
1105 if m_open is None:
1106 # No match
1107 return
1108
1109 # Parse items
1110 items = self._parse_items()
1111
1112 # Expect end of group
1113 self._skip_ws_and_comments_and_syms()
1114
1115 if m_open.group(0) == "(":
1116 pat = self._right_paren_pat
1117 exp = ")"
1118 else:
1119 pat = self._block_end_pat
1120 exp = "!end"
1121
1122 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
1123
1124 # Return item
1125 return _Group(items, begin_text_loc)
1126
1127 # Returns a stripped expression string and an AST expression node
1128 # from the expression string `expr_str` at text location `text_loc`.
1129 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
1130 # Create an expression node from the expression string
1131 expr_str = expr_str.strip().replace("\n", " ")
1132
1133 try:
1134 expr = ast.parse(expr_str, mode="eval")
1135 except SyntaxError:
1136 _raise_error(
1137 "Invalid expression `{}`: invalid syntax".format(expr_str),
1138 text_loc,
1139 )
1140
1141 return expr_str, expr
1142
1143 # Patterns for _try_parse_val()
1144 _val_expr_pat = re.compile(r"([^}:]+):\s*")
1145 _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
1146 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
1147
1148 # Tries to parse a value (number or string) and format (fixed length
1149 # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
1150 # name), returning an item on success.
1151 def _try_parse_val(self):
1152 begin_text_loc = self._text_loc
1153
1154 # Match
1155 m_expr = self._try_parse_pat(self._val_expr_pat)
1156
1157 if m_expr is None:
1158 # No match
1159 return
1160
1161 # Create an expression node from the expression string
1162 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
1163
1164 # Fixed length?
1165 self._skip_ws_and_comments()
1166 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
1167
1168 if m_fmt is None:
1169 # LEB128?
1170 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
1171
1172 if m_fmt is None:
1173 # String encoding?
1174 codec = self._try_parse_str_encoding(True)
1175
1176 if codec is None:
1177 # At this point it's invalid
1178 self._raise_error(
1179 "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1180 )
1181 else:
1182 # Return string item
1183 return _Str(expr_str, expr, codec, begin_text_loc)
1184
1185 # Return LEB128 integer item
1186 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
1187 return cls(expr_str, expr, begin_text_loc)
1188 else:
1189 # Return fixed-length number item
1190 return _FlNum(
1191 expr_str,
1192 expr,
1193 int(m_fmt.group(0)),
1194 begin_text_loc,
1195 )
1196
1197 # Patterns for _try_parse_var_assign()
1198 _var_assign_name_equal_pat = re.compile(
1199 r"({})\s*=(?!=)".format(_py_name_pat.pattern)
1200 )
1201 _var_assign_expr_pat = re.compile(r"[^}]+")
1202
1203 # Tries to parse a variable assignment, returning a variable
1204 # assignment item on success.
1205 def _try_parse_var_assign(self):
1206 begin_text_loc = self._text_loc
1207
1208 # Match
1209 m = self._try_parse_pat(self._var_assign_name_equal_pat)
1210
1211 if m is None:
1212 # No match
1213 return
1214
1215 # Validate name
1216 name = m.group(1)
1217
1218 if name == _icitte_name:
1219 _raise_error(
1220 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1221 )
1222
1223 if name in self._label_names:
1224 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
1225
1226 # Expect an expression
1227 self._skip_ws_and_comments()
1228 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
1229
1230 # Create an expression node from the expression string
1231 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1232
1233 # Add to known variable names
1234 self._var_names.add(name)
1235
1236 # Return item
1237 return _VarAssign(
1238 name,
1239 expr_str,
1240 expr,
1241 begin_text_loc,
1242 )
1243
1244 # Pattern for _try_parse_set_bo()
1245 _bo_pat = re.compile(r"[bl]e")
1246
1247 # Tries to parse a byte order name, returning a byte order setting
1248 # item on success.
1249 def _try_parse_set_bo(self):
1250 begin_text_loc = self._text_loc
1251
1252 # Match
1253 m = self._try_parse_pat(self._bo_pat)
1254
1255 if m is None:
1256 # No match
1257 return
1258
1259 # Return corresponding item
1260 if m.group(0) == "be":
1261 return _SetBo(ByteOrder.BE, begin_text_loc)
1262 else:
1263 assert m.group(0) == "le"
1264 return _SetBo(ByteOrder.LE, begin_text_loc)
1265
1266 # Patterns for _try_parse_val_or_bo()
1267 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1268 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
1269
1270 # Tries to parse a value, a variable assignment, or a byte order
1271 # setting, returning an item on success.
1272 def _try_parse_val_or_var_assign_or_set_bo(self):
1273 # Match prefix
1274 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
1275 # No match
1276 return
1277
1278 self._skip_ws_and_comments()
1279
1280 # Variable assignment item?
1281 item = self._try_parse_var_assign()
1282
1283 if item is None:
1284 # Value item?
1285 item = self._try_parse_val()
1286
1287 if item is None:
1288 # Byte order setting item?
1289 item = self._try_parse_set_bo()
1290
1291 if item is None:
1292 # At this point it's invalid
1293 self._raise_error(
1294 "Expecting a fixed-length number, a string, a variable assignment, or a byte order setting"
1295 )
1296
1297 # Expect suffix
1298 self._skip_ws_and_comments()
1299 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
1300 return item
1301
1302 # Tries to parse an offset setting value (after the initial `<`),
1303 # returning an offset item on success.
1304 def _try_parse_set_offset_val(self):
1305 begin_text_loc = self._text_loc
1306
1307 # Match
1308 m = self._try_parse_pat(_pos_const_int_pat)
1309
1310 if m is None:
1311 # No match
1312 return
1313
1314 # Return item
1315 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
1316
1317 # Tries to parse a label name (after the initial `<`), returning a
1318 # label item on success.
1319 def _try_parse_label_name(self):
1320 begin_text_loc = self._text_loc
1321
1322 # Match
1323 m = self._try_parse_pat(_py_name_pat)
1324
1325 if m is None:
1326 # No match
1327 return
1328
1329 # Validate
1330 name = m.group(0)
1331
1332 if name == _icitte_name:
1333 _raise_error(
1334 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1335 )
1336
1337 if name in self._label_names:
1338 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1339
1340 if name in self._var_names:
1341 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1342
1343 # Add to known label names
1344 self._label_names.add(name)
1345
1346 # Return item
1347 return _Label(name, begin_text_loc)
1348
1349 # Patterns for _try_parse_label_or_set_offset()
1350 _label_set_offset_prefix_pat = re.compile(r"<")
1351 _label_set_offset_suffix_pat = re.compile(r">")
1352
1353 # Tries to parse a label or an offset setting, returning an item on
1354 # success.
1355 def _try_parse_label_or_set_offset(self):
1356 # Match prefix
1357 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1358 # No match
1359 return
1360
1361 # Offset setting item?
1362 self._skip_ws_and_comments()
1363 item = self._try_parse_set_offset_val()
1364
1365 if item is None:
1366 # Label item?
1367 item = self._try_parse_label_name()
1368
1369 if item is None:
1370 # At this point it's invalid
1371 self._raise_error("Expecting a label name or an offset setting value")
1372
1373 # Expect suffix
1374 self._skip_ws_and_comments()
1375 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1376 return item
1377
1378 # Pattern for _parse_pad_val()
1379 _pad_val_prefix_pat = re.compile(r"~")
1380
1381 # Tries to parse a padding value, returning the padding value, or 0
1382 # if none.
1383 def _parse_pad_val(self):
1384 # Padding value?
1385 self._skip_ws_and_comments()
1386 pad_val = 0
1387
1388 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1389 self._skip_ws_and_comments()
1390 pad_val_text_loc = self._text_loc
1391 m = self._expect_pat(
1392 _pos_const_int_pat,
1393 "Expecting a positive constant integer (byte value)",
1394 )
1395
1396 # Validate
1397 pad_val = int(_norm_const_int(m.group(0)), 0)
1398
1399 if pad_val > 255:
1400 _raise_error(
1401 "Invalid padding byte value {}".format(pad_val),
1402 pad_val_text_loc,
1403 )
1404
1405 return pad_val
1406
1407 # Patterns for _try_parse_align_offset()
1408 _align_offset_prefix_pat = re.compile(r"@")
1409 _align_offset_val_pat = re.compile(r"\d+")
1410
1411 # Tries to parse an offset alignment, returning an offset alignment
1412 # item on success.
1413 def _try_parse_align_offset(self):
1414 begin_text_loc = self._text_loc
1415
1416 # Match prefix
1417 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1418 # No match
1419 return
1420
1421 # Expect an alignment
1422 self._skip_ws_and_comments()
1423 align_text_loc = self._text_loc
1424 m = self._expect_pat(
1425 self._align_offset_val_pat,
1426 "Expecting an alignment (positive multiple of eight bits)",
1427 )
1428
1429 # Validate alignment
1430 val = int(m.group(0))
1431
1432 if val <= 0 or (val % 8) != 0:
1433 _raise_error(
1434 "Invalid alignment value {} (not a positive multiple of eight)".format(
1435 val
1436 ),
1437 align_text_loc,
1438 )
1439
1440 # Padding value
1441 pad_val = self._parse_pad_val()
1442
1443 # Return item
1444 return _AlignOffset(val, pad_val, begin_text_loc)
1445
1446 # Patterns for _expect_expr()
1447 _inner_expr_prefix_pat = re.compile(r"\{")
1448 _inner_expr_pat = re.compile(r"[^}]+")
1449 _inner_expr_suffix_pat = re.compile(r"\}")
1450
1451 # Parses an expression outside a `{`/`}` context.
1452 #
1453 # This function accepts:
1454 #
1455 # • A Python expression within `{` and `}`.
1456 #
1457 # • A Python name.
1458 #
1459 # • If `accept_const_int` is `True`: a constant integer, which may
1460 # be negative if `allow_neg_int` is `True`.
1461 #
1462 # • If `accept_float` is `True`: a constant floating point number.
1463 #
1464 # Returns the stripped expression string and AST expression.
1465 def _expect_expr(
1466 self,
1467 accept_const_int: bool = False,
1468 allow_neg_int: bool = False,
1469 accept_const_float: bool = False,
1470 accept_lit_str: bool = False,
1471 ):
1472 begin_text_loc = self._text_loc
1473
1474 # Constant floating point number?
1475 if accept_const_float:
1476 m = self._try_parse_pat(_const_float_pat)
1477
1478 if m is not None:
1479 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1480
1481 # Constant integer?
1482 if accept_const_int:
1483 m = self._try_parse_pat(_const_int_pat)
1484
1485 if m is not None:
1486 # Negative and allowed?
1487 if m.group("neg") == "-" and not allow_neg_int:
1488 _raise_error(
1489 "Expecting a positive constant integer", begin_text_loc
1490 )
1491
1492 expr_str = _norm_const_int(m.group(0))
1493 return self._ast_expr_from_str(expr_str, begin_text_loc)
1494
1495 # Name?
1496 m = self._try_parse_pat(_py_name_pat)
1497
1498 if m is not None:
1499 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1500
1501 # Literal string
1502 if accept_lit_str:
1503 val = self._try_parse_lit_str(True)
1504
1505 if val is not None:
1506 return self._ast_expr_from_str(repr(val), begin_text_loc)
1507
1508 # Expect `{`
1509 msg_accepted_parts = ["a name", "or `{`"]
1510
1511 if accept_lit_str:
1512 msg_accepted_parts.insert(0, "a literal string")
1513
1514 if accept_const_float:
1515 msg_accepted_parts.insert(0, "a constant floating point number")
1516
1517 if accept_const_int:
1518 msg_pos = "" if allow_neg_int else "positive "
1519 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1520
1521 if len(msg_accepted_parts) == 2:
1522 msg_accepted = " ".join(msg_accepted_parts)
1523 else:
1524 msg_accepted = ", ".join(msg_accepted_parts)
1525
1526 self._expect_pat(
1527 self._inner_expr_prefix_pat,
1528 "Expecting {}".format(msg_accepted),
1529 )
1530
1531 # Expect an expression
1532 self._skip_ws_and_comments()
1533 expr_text_loc = self._text_loc
1534 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1535 expr_str = m.group(0)
1536
1537 # Expect `}`
1538 self._skip_ws_and_comments()
1539 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1540
1541 return self._ast_expr_from_str(expr_str, expr_text_loc)
1542
1543 # Patterns for _try_parse_fill_until()
1544 _fill_until_prefix_pat = re.compile(r"\+")
1545 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1546
1547 # Tries to parse a filling, returning a filling item on success.
1548 def _try_parse_fill_until(self):
1549 begin_text_loc = self._text_loc
1550
1551 # Match prefix
1552 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1553 # No match
1554 return
1555
1556 # Expect expression
1557 self._skip_ws_and_comments()
1558 expr_str, expr = self._expect_expr(accept_const_int=True)
1559
1560 # Padding value
1561 pad_val = self._parse_pad_val()
1562
1563 # Return item
1564 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1565
1566 # Parses the multiplier expression of a repetition (block or
1567 # post-item) and returns the expression string and AST node.
1568 def _expect_rep_mul_expr(self):
1569 return self._expect_expr(accept_const_int=True)
1570
1571 # Common block end pattern
1572 _block_end_pat = re.compile(r"!end\b")
1573
1574 # Pattern for _try_parse_rep_block()
1575 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1576
1577 # Tries to parse a repetition block, returning a repetition item on
1578 # success.
1579 def _try_parse_rep_block(self):
1580 begin_text_loc = self._text_loc
1581
1582 # Match prefix
1583 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1584 # No match
1585 return
1586
1587 # Expect expression
1588 self._skip_ws_and_comments()
1589 expr_str, expr = self._expect_rep_mul_expr()
1590
1591 # Parse items
1592 self._skip_ws_and_comments_and_syms()
1593 items = self._parse_items()
1594
1595 # Expect end of block
1596 self._skip_ws_and_comments_and_syms()
1597 self._expect_pat(
1598 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1599 )
1600
1601 # Return item
1602 return _Rep(items, expr_str, expr, begin_text_loc)
1603
1604 # Pattern for _try_parse_cond_block()
1605 _cond_block_prefix_pat = re.compile(r"!if\b")
1606 _cond_block_else_pat = re.compile(r"!else\b")
1607
1608 # Tries to parse a conditional block, returning a conditional item
1609 # on success.
1610 def _try_parse_cond_block(self):
1611 begin_text_loc = self._text_loc
1612
1613 # Match prefix
1614 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1615 # No match
1616 return
1617
1618 # Expect expression
1619 self._skip_ws_and_comments()
1620 expr_str, expr = self._expect_expr()
1621
1622 # Parse "true" items
1623 self._skip_ws_and_comments_and_syms()
1624 true_items_text_loc = self._text_loc
1625 true_items = self._parse_items()
1626 false_items = [] # type: List[_Item]
1627 false_items_text_loc = begin_text_loc
1628
1629 # `!else`?
1630 self._skip_ws_and_comments_and_syms()
1631
1632 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1633 # Parse "false" items
1634 self._skip_ws_and_comments_and_syms()
1635 false_items_text_loc = self._text_loc
1636 false_items = self._parse_items()
1637
1638 # Expect end of block
1639 self._expect_pat(
1640 self._block_end_pat,
1641 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1642 )
1643
1644 # Return item
1645 return _Cond(
1646 _Group(true_items, true_items_text_loc),
1647 _Group(false_items, false_items_text_loc),
1648 expr_str,
1649 expr,
1650 begin_text_loc,
1651 )
1652
1653 # Pattern for _try_parse_trans_block()
1654 _trans_block_prefix_pat = re.compile(r"!t(?:ransform)?\b")
1655 _trans_block_type_pat = re.compile(
1656 r"(?:(?:base|b)64(?:u)?|(?:base|b)(?:16|32)|(?:ascii|a|base|b)85(?:p)?|(?:quopri|qp)(?:t)?|gzip|gz|bzip2|bz2)\b"
1657 )
1658
1659 # Tries to parse a transformation block, returning a transformation
1660 # block item on success.
1661 def _try_parse_trans_block(self):
1662 begin_text_loc = self._text_loc
1663
1664 # Match prefix
1665 if self._try_parse_pat(self._trans_block_prefix_pat) is None:
1666 # No match
1667 return
1668
1669 # Expect type
1670 self._skip_ws_and_comments()
1671 m = self._expect_pat(
1672 self._trans_block_type_pat, "Expecting a known transformation type"
1673 )
1674
1675 # Parse items
1676 self._skip_ws_and_comments_and_syms()
1677 items = self._parse_items()
1678
1679 # Expect end of block
1680 self._expect_pat(
1681 self._block_end_pat,
1682 "Expecting an item or `!end` (end of transformation block)",
1683 )
1684
1685 # Choose encoding function
1686 enc = m.group(0)
1687
1688 if enc in ("base64", "b64"):
1689 func = base64.standard_b64encode
1690 name = "standard Base64"
1691 elif enc in ("base64u", "b64u"):
1692 func = base64.urlsafe_b64encode
1693 name = "URL-safe Base64"
1694 elif enc in ("base32", "b32"):
1695 func = base64.b32encode
1696 name = "Base32"
1697 elif enc in ("base16", "b16"):
1698 func = base64.b16encode
1699 name = "Base16"
1700 elif enc in ("ascii85", "a85"):
1701 func = base64.a85encode
1702 name = "Ascii85"
1703 elif enc in ("ascii85p", "a85p"):
1704 func = functools.partial(base64.a85encode, pad=True)
1705 name = "padded Ascii85"
1706 elif enc in ("base85", "b85"):
1707 func = base64.b85encode
1708 name = "Base85"
1709 elif enc in ("base85p", "b85p"):
1710 func = functools.partial(base64.b85encode, pad=True)
1711 name = "padded Base85"
1712 elif enc in ("quopri", "qp"):
1713 func = quopri.encodestring
1714 name = "MIME quoted-printable"
1715 elif enc in ("quoprit", "qpt"):
1716 func = functools.partial(quopri.encodestring, quotetabs=True)
1717 name = "MIME quoted-printable (with quoted tabs)"
1718 elif enc in ("gzip", "gz"):
1719 func = gzip.compress
1720 name = "gzip"
1721 else:
1722 assert enc in ("bzip2", "bz2")
1723 func = bz2.compress
1724 name = "bzip2"
1725
1726 # Return item
1727 return _Trans(
1728 items,
1729 name,
1730 func,
1731 begin_text_loc,
1732 )
1733
1734 # Common left parenthesis pattern
1735 _left_paren_pat = re.compile(r"\(")
1736
1737 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1738 _macro_params_comma_pat = re.compile(",")
1739
1740 # Patterns for _try_parse_macro_def()
1741 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1742
1743 # Tries to parse a macro definition, adding it to `self._macro_defs`
1744 # and returning `True` on success.
1745 def _try_parse_macro_def(self):
1746 begin_text_loc = self._text_loc
1747
1748 # Match prefix
1749 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1750 # No match
1751 return False
1752
1753 # Expect a name
1754 self._skip_ws_and_comments()
1755 name_text_loc = self._text_loc
1756 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1757
1758 # Validate name
1759 name = m.group(0)
1760
1761 if name in self._macro_defs:
1762 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1763
1764 # Expect `(`
1765 self._skip_ws_and_comments()
1766 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1767
1768 # Try to parse comma-separated parameter names
1769 param_names = [] # type: List[str]
1770 expect_comma = False
1771
1772 while True:
1773 self._skip_ws_and_comments()
1774
1775 # End?
1776 if self._try_parse_pat(self._right_paren_pat) is not None:
1777 # End
1778 break
1779
1780 # Comma?
1781 if expect_comma:
1782 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1783
1784 # Expect parameter name
1785 self._skip_ws_and_comments()
1786 param_text_loc = self._text_loc
1787 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1788
1789 if m.group(0) in param_names:
1790 _raise_error(
1791 "Duplicate macro parameter named `{}`".format(m.group(0)),
1792 param_text_loc,
1793 )
1794
1795 param_names.append(m.group(0))
1796 expect_comma = True
1797
1798 # Expect items
1799 self._skip_ws_and_comments_and_syms()
1800 old_var_names = self._var_names.copy()
1801 old_label_names = self._label_names.copy()
1802 self._var_names = set() # type: Set[str]
1803 self._label_names = set() # type: Set[str]
1804 items = self._parse_items()
1805 self._var_names = old_var_names
1806 self._label_names = old_label_names
1807
1808 # Expect suffix
1809 self._expect_pat(
1810 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1811 )
1812
1813 # Register macro
1814 self._macro_defs[name] = _MacroDef(name, param_names, items, begin_text_loc)
1815
1816 return True
1817
1818 # Patterns for _try_parse_macro_exp()
1819 _macro_exp_prefix_pat = re.compile(r"m\b")
1820 _macro_exp_colon_pat = re.compile(r":")
1821
1822 # Tries to parse a macro expansion, returning a macro expansion item
1823 # on success.
1824 def _try_parse_macro_exp(self):
1825 begin_text_loc = self._text_loc
1826
1827 # Match prefix
1828 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1829 # No match
1830 return
1831
1832 # Expect `:`
1833 self._skip_ws_and_comments()
1834 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1835
1836 # Expect a macro name
1837 self._skip_ws_and_comments()
1838 name_text_loc = self._text_loc
1839 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1840
1841 # Validate name
1842 name = m.group(0)
1843 macro_def = self._macro_defs.get(name)
1844
1845 if macro_def is None:
1846 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1847
1848 # Expect `(`
1849 self._skip_ws_and_comments()
1850 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1851
1852 # Try to parse comma-separated parameter values
1853 params_text_loc = self._text_loc
1854 params = [] # type: List[_MacroExpParam]
1855 expect_comma = False
1856
1857 while True:
1858 self._skip_ws_and_comments()
1859
1860 # End?
1861 if self._try_parse_pat(self._right_paren_pat) is not None:
1862 # End
1863 break
1864
1865 # Expect a value
1866 if expect_comma:
1867 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1868
1869 self._skip_ws_and_comments()
1870 param_text_loc = self._text_loc
1871 params.append(
1872 _MacroExpParam(
1873 *self._expect_expr(
1874 accept_const_int=True,
1875 allow_neg_int=True,
1876 accept_const_float=True,
1877 accept_lit_str=True,
1878 ),
1879 text_loc=param_text_loc
1880 )
1881 )
1882 expect_comma = True
1883
1884 # Validate parameter values
1885 if len(params) != len(macro_def.param_names):
1886 sing_plur = "" if len(params) == 1 else "s"
1887 _raise_error(
1888 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1889 len(params), sing_plur, len(macro_def.param_names)
1890 ),
1891 params_text_loc,
1892 )
1893
1894 # Return item
1895 return _MacroExp(name, params, begin_text_loc)
1896
1897 # Tries to parse a base item (anything except a repetition),
1898 # returning it on success.
1899 def _try_parse_base_item(self):
1900 # Byte item?
1901 item = self._try_parse_byte()
1902
1903 if item is not None:
1904 return item
1905
1906 # String item?
1907 item = self._try_parse_str()
1908
1909 if item is not None:
1910 return item
1911
1912 # Value, variable assignment, or byte order setting item?
1913 item = self._try_parse_val_or_var_assign_or_set_bo()
1914
1915 if item is not None:
1916 return item
1917
1918 # Label or offset setting item?
1919 item = self._try_parse_label_or_set_offset()
1920
1921 if item is not None:
1922 return item
1923
1924 # Offset alignment item?
1925 item = self._try_parse_align_offset()
1926
1927 if item is not None:
1928 return item
1929
1930 # Filling item?
1931 item = self._try_parse_fill_until()
1932
1933 if item is not None:
1934 return item
1935
1936 # Group item?
1937 item = self._try_parse_group()
1938
1939 if item is not None:
1940 return item
1941
1942 # Repetition block item?
1943 item = self._try_parse_rep_block()
1944
1945 if item is not None:
1946 return item
1947
1948 # Conditional block item?
1949 item = self._try_parse_cond_block()
1950
1951 if item is not None:
1952 return item
1953
1954 # Macro expansion item?
1955 item = self._try_parse_macro_exp()
1956
1957 if item is not None:
1958 return item
1959
1960 # Transformation block item?
1961 item = self._try_parse_trans_block()
1962
1963 if item is not None:
1964 return item
1965
1966 # Pattern for _try_parse_rep_post()
1967 _rep_post_prefix_pat = re.compile(r"\*")
1968
1969 # Tries to parse a post-item repetition, returning the expression
1970 # string and AST expression node on success.
1971 def _try_parse_rep_post(self):
1972 # Match prefix
1973 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1974 # No match
1975 return
1976
1977 # Return expression string and AST expression
1978 self._skip_ws_and_comments()
1979 return self._expect_rep_mul_expr()
1980
1981 # Tries to parse an item, possibly followed by a repetition,
1982 # returning `True` on success.
1983 #
1984 # Appends any parsed item to `items`.
1985 def _try_append_item(self, items: List[_Item]):
1986 self._skip_ws_and_comments_and_syms()
1987
1988 # Base item
1989 item = self._try_parse_base_item()
1990
1991 if item is None:
1992 return
1993
1994 # Parse repetition if the base item is repeatable
1995 if isinstance(item, _RepableItem):
1996 self._skip_ws_and_comments()
1997 rep_text_loc = self._text_loc
1998 rep_ret = self._try_parse_rep_post()
1999
2000 if rep_ret is not None:
2001 item = _Rep([item], *rep_ret, text_loc=rep_text_loc)
2002
2003 items.append(item)
2004 return True
2005
2006 # Parses and returns items, skipping whitespaces, insignificant
2007 # symbols, and comments when allowed, and stopping at the first
2008 # unknown character.
2009 #
2010 # Accepts and registers macro definitions if `accept_macro_defs`
2011 # is `True`.
2012 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
2013 items = [] # type: List[_Item]
2014
2015 while self._isnt_done():
2016 # Try to append item
2017 if not self._try_append_item(items):
2018 if accept_macro_defs and self._try_parse_macro_def():
2019 continue
2020
2021 # Unknown at this point
2022 break
2023
2024 return items
2025
2026 # Parses the whole Normand input, setting `self._res` to the main
2027 # group item on success.
2028 def _parse(self):
2029 if len(self._normand.strip()) == 0:
2030 # Special case to make sure there's something to consume
2031 self._res = _Group([], self._text_loc)
2032 return
2033
2034 # Parse first level items
2035 items = self._parse_items(True)
2036
2037 # Make sure there's nothing left
2038 self._skip_ws_and_comments_and_syms()
2039
2040 if self._isnt_done():
2041 self._raise_error(
2042 "Unexpected character `{}`".format(self._normand[self._at])
2043 )
2044
2045 # Set main group item
2046 self._res = _Group(items, self._text_loc)
2047
2048
2049 # The return type of parse().
2050 class ParseResult:
2051 @classmethod
2052 def _create(
2053 cls,
2054 data: bytearray,
2055 variables: VariablesT,
2056 labels: LabelsT,
2057 offset: int,
2058 bo: Optional[ByteOrder],
2059 ):
2060 self = cls.__new__(cls)
2061 self._init(data, variables, labels, offset, bo)
2062 return self
2063
2064 def __init__(self, *args, **kwargs): # type: ignore
2065 raise NotImplementedError
2066
2067 def _init(
2068 self,
2069 data: bytearray,
2070 variables: VariablesT,
2071 labels: LabelsT,
2072 offset: int,
2073 bo: Optional[ByteOrder],
2074 ):
2075 self._data = data
2076 self._vars = variables
2077 self._labels = labels
2078 self._offset = offset
2079 self._bo = bo
2080
2081 # Generated data.
2082 @property
2083 def data(self):
2084 return self._data
2085
2086 # Dictionary of updated variable names to their last computed value.
2087 @property
2088 def variables(self):
2089 return self._vars
2090
2091 # Dictionary of updated main group label names to their computed
2092 # value.
2093 @property
2094 def labels(self):
2095 return self._labels
2096
2097 # Updated offset.
2098 @property
2099 def offset(self):
2100 return self._offset
2101
2102 # Updated byte order.
2103 @property
2104 def byte_order(self):
2105 return self._bo
2106
2107
2108 # Raises a parse error for the item `item`, creating it using the
2109 # message `msg`.
2110 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
2111 _raise_error(msg, item.text_loc)
2112
2113
2114 # The `ICITTE` reserved name.
2115 _icitte_name = "ICITTE"
2116
2117
2118 # Base node visitor.
2119 #
2120 # Calls the _visit_name() method for each name node which isn't the name
2121 # of a call.
2122 class _NodeVisitor(ast.NodeVisitor):
2123 def __init__(self):
2124 self._parent_is_call = False
2125
2126 def generic_visit(self, node: ast.AST):
2127 if type(node) is ast.Call:
2128 self._parent_is_call = True
2129 elif type(node) is ast.Name and not self._parent_is_call:
2130 self._visit_name(node.id)
2131
2132 super().generic_visit(node)
2133 self._parent_is_call = False
2134
2135 @abc.abstractmethod
2136 def _visit_name(self, name: str):
2137 ...
2138
2139
2140 # Expression validator: validates that all the names within the
2141 # expression are allowed.
2142 class _ExprValidator(_NodeVisitor):
2143 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2144 super().__init__()
2145 self._expr_str = expr_str
2146 self._text_loc = text_loc
2147 self._allowed_names = allowed_names
2148
2149 def _visit_name(self, name: str):
2150 # Make sure the name refers to a known and reachable
2151 # variable/label name.
2152 if name != _icitte_name and name not in self._allowed_names:
2153 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
2154 name, self._expr_str
2155 )
2156
2157 allowed_names = self._allowed_names.copy()
2158 allowed_names.add(_icitte_name)
2159
2160 if len(allowed_names) > 0:
2161 allowed_names_str = ", ".join(
2162 sorted(["`{}`".format(name) for name in allowed_names])
2163 )
2164 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2165
2166 _raise_error(
2167 msg,
2168 self._text_loc,
2169 )
2170
2171
2172 # Generator state.
2173 class _GenState:
2174 def __init__(
2175 self,
2176 variables: VariablesT,
2177 labels: LabelsT,
2178 offset: int,
2179 bo: Optional[ByteOrder],
2180 ):
2181 self.variables = variables.copy()
2182 self.labels = labels.copy()
2183 self.offset = offset
2184 self.bo = bo
2185
2186 def __repr__(self):
2187 return "_GenState({}, {}, {}, {})".format(
2188 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2189 )
2190
2191
2192 # Fixed-length number item instance.
2193 class _FlNumItemInst:
2194 def __init__(
2195 self,
2196 item: _FlNum,
2197 offset_in_data: int,
2198 state: _GenState,
2199 parse_error_msgs: List[ParseErrorMessage],
2200 ):
2201 self._item = item
2202 self._offset_in_data = offset_in_data
2203 self._state = state
2204 self._parse_error_msgs = parse_error_msgs
2205
2206 @property
2207 def item(self):
2208 return self._item
2209
2210 @property
2211 def offset_in_data(self):
2212 return self._offset_in_data
2213
2214 @property
2215 def state(self):
2216 return self._state
2217
2218 @property
2219 def parse_error_msgs(self):
2220 return self._parse_error_msgs
2221
2222
2223 # Generator of data and final state from a group item.
2224 #
2225 # Generation happens in memory at construction time. After building, use
2226 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2227 # get the resulting context.
2228 #
2229 # The steps of generation are:
2230 #
2231 # 1. Handle each item in prefix order.
2232 #
2233 # The handlers append bytes to `self._data` and update some current
2234 # state object (`_GenState` instance).
2235 #
2236 # When handling a fixed-length number item, try to evaluate its
2237 # expression using the current state. If this fails, then it might be
2238 # because the expression refers to a "future" label: save the current
2239 # offset in `self._data` (generated data) and a snapshot of the
2240 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
2241 # object). _gen_fl_num_item_insts() will deal with this later. A
2242 # `_FlNumItemInst` instance also contains a snapshot of the current
2243 # parsing error messages (`self._parse_error_msgs`) which need to be
2244 # taken into account when handling the instance later.
2245 #
2246 # When handling the items of a group, keep a map of immediate label
2247 # names to their offset. Then, after having processed all the items,
2248 # update the relevant saved state snapshots in
2249 # `self._fl_num_item_insts` with those immediate label values.
2250 # _gen_fl_num_item_insts() will deal with this later.
2251 #
2252 # 2. Handle all the fixed-length number item instances of which the
2253 # expression evaluation failed before.
2254 #
2255 # At this point, `self._fl_num_item_insts` contains everything that's
2256 # needed to evaluate the expressions, including the values of
2257 # "future" labels from the point of view of some fixed-length number
2258 # item instance.
2259 #
2260 # If an evaluation fails at this point, then it's a user error. Add
2261 # to the parsing error all the saved parsing error messages of the
2262 # instance. Those additional messages add precious context to the
2263 # error.
2264 class _Gen:
2265 def __init__(
2266 self,
2267 group: _Group,
2268 macro_defs: _MacroDefsT,
2269 variables: VariablesT,
2270 labels: LabelsT,
2271 offset: int,
2272 bo: Optional[ByteOrder],
2273 ):
2274 self._macro_defs = macro_defs
2275 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2276 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2277 self._in_trans = False
2278 self._gen(group, _GenState(variables, labels, offset, bo))
2279
2280 # Generated bytes.
2281 @property
2282 def data(self):
2283 return self._data
2284
2285 # Updated variables.
2286 @property
2287 def variables(self):
2288 return self._final_state.variables
2289
2290 # Updated main group labels.
2291 @property
2292 def labels(self):
2293 return self._final_state.labels
2294
2295 # Updated offset.
2296 @property
2297 def offset(self):
2298 return self._final_state.offset
2299
2300 # Updated byte order.
2301 @property
2302 def bo(self):
2303 return self._final_state.bo
2304
2305 # Evaluates the expression `expr` of which the original string is
2306 # `expr_str` at the location `text_loc` considering the current
2307 # generation state `state`.
2308 #
2309 # If `accept_float` is `True`, then the type of the result may be
2310 # `float` too.
2311 #
2312 # If `accept_str` is `True`, then the type of the result may be
2313 # `str` too.
2314 @staticmethod
2315 def _eval_expr(
2316 expr_str: str,
2317 expr: ast.Expression,
2318 text_loc: TextLocation,
2319 state: _GenState,
2320 accept_float: bool = False,
2321 accept_str: bool = False,
2322 ):
2323 syms = {} # type: VariablesT
2324 syms.update(state.labels)
2325
2326 # Set the `ICITTE` name to the current offset
2327 syms[_icitte_name] = state.offset
2328
2329 # Add the current variables
2330 syms.update(state.variables)
2331
2332 # Validate the node and its children
2333 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2334
2335 # Compile and evaluate expression node
2336 try:
2337 val = eval(compile(expr, "", "eval"), None, syms)
2338 except Exception as exc:
2339 _raise_error(
2340 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2341 text_loc,
2342 )
2343
2344 # Convert `bool` result type to `int` to normalize
2345 if type(val) is bool:
2346 val = int(val)
2347
2348 # Validate result type
2349 expected_types = {int} # type: Set[type]
2350
2351 if accept_float:
2352 expected_types.add(float)
2353
2354 if accept_str:
2355 expected_types.add(str)
2356
2357 if type(val) not in expected_types:
2358 expected_types_str = sorted(
2359 ["`{}`".format(t.__name__) for t in expected_types]
2360 )
2361
2362 if len(expected_types_str) == 1:
2363 msg_expected = expected_types_str[0]
2364 elif len(expected_types_str) == 2:
2365 msg_expected = " or ".join(expected_types_str)
2366 else:
2367 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2368 msg_expected = ", ".join(expected_types_str)
2369
2370 _raise_error(
2371 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2372 expr_str, msg_expected, type(val).__name__
2373 ),
2374 text_loc,
2375 )
2376
2377 return val
2378
2379 # Forwards to _eval_expr() with the expression and text location of
2380 # `item`.
2381 @staticmethod
2382 def _eval_item_expr(
2383 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
2384 state: _GenState,
2385 accept_float: bool = False,
2386 accept_str: bool = False,
2387 ):
2388 return _Gen._eval_expr(
2389 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
2390 )
2391
2392 # Handles the byte item `item`.
2393 def _handle_byte_item(self, item: _Byte, state: _GenState):
2394 self._data.append(item.val)
2395 state.offset += item.size
2396
2397 # Handles the literal string item `item`.
2398 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
2399 self._data += item.data
2400 state.offset += item.size
2401
2402 # Handles the byte order setting item `item`.
2403 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2404 # Update current byte order
2405 state.bo = item.bo
2406
2407 # Handles the variable assignment item `item`.
2408 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2409 # Update variable
2410 state.variables[item.name] = self._eval_item_expr(
2411 item, state, accept_float=True, accept_str=True
2412 )
2413
2414 # Handles the fixed-length number item `item`.
2415 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2416 # Validate current byte order
2417 if state.bo is None and item.len > 8:
2418 _raise_error_for_item(
2419 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2420 item.expr_str
2421 ),
2422 item,
2423 )
2424
2425 # Try an immediate evaluation. If it fails, then keep everything
2426 # needed to (try to) generate the bytes of this item later.
2427 try:
2428 data = self._gen_fl_num_item_inst_data(item, state)
2429 except Exception:
2430 if self._in_trans:
2431 _raise_error_for_item(
2432 "Invalid expression `{}`: failed to evaluate within a transformation block".format(
2433 item.expr_str
2434 ),
2435 item,
2436 )
2437
2438 self._fl_num_item_insts.append(
2439 _FlNumItemInst(
2440 item,
2441 len(self._data),
2442 copy.deepcopy(state),
2443 copy.deepcopy(self._parse_error_msgs),
2444 )
2445 )
2446
2447 # Reserve space in `self._data` for this instance
2448 data = bytes([0] * (item.len // 8))
2449
2450 # Append bytes
2451 self._data += data
2452
2453 # Update offset
2454 state.offset += len(data)
2455
2456 # Returns the size, in bytes, required to encode the value `val`
2457 # with LEB128 (signed version if `is_signed` is `True`).
2458 @staticmethod
2459 def _leb128_size_for_val(val: int, is_signed: bool):
2460 if val < 0:
2461 # Equivalent upper bound.
2462 #
2463 # For example, if `val` is -128, then the full integer for
2464 # this number of bits would be [-128, 127].
2465 val = -val - 1
2466
2467 # Number of bits (add one for the sign if needed)
2468 bits = val.bit_length() + int(is_signed)
2469
2470 if bits == 0:
2471 bits = 1
2472
2473 # Seven bits per byte
2474 return math.ceil(bits / 7)
2475
2476 # Handles the LEB128 integer item `item`.
2477 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2478 # Compute value
2479 val = self._eval_item_expr(item, state)
2480
2481 # Size in bytes
2482 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2483
2484 # For each byte
2485 for _ in range(size):
2486 # Seven LSBs, MSB of the byte set (continue)
2487 self._data.append((val & 0x7F) | 0x80)
2488 val >>= 7
2489
2490 # Clear MSB of last byte (stop)
2491 self._data[-1] &= ~0x80
2492
2493 # Update offset
2494 state.offset += size
2495
2496 # Handles the string item `item`.
2497 def _handle_str_item(self, item: _Str, state: _GenState):
2498 # Compute value
2499 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2500
2501 # Encode
2502 data = _encode_str(val, item.codec, item.text_loc)
2503
2504 # Add to data
2505 self._data += data
2506
2507 # Update offset
2508 state.offset += len(data)
2509
2510 # Handles the group item `item`, removing the immediate labels from
2511 # `state` at the end if `remove_immediate_labels` is `True`.
2512 def _handle_group_item(
2513 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2514 ):
2515 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2516 immediate_labels = {} # type: LabelsT
2517
2518 # Handle each item
2519 for subitem in item.items:
2520 if type(subitem) is _Label:
2521 # Add to local immediate labels
2522 immediate_labels[subitem.name] = state.offset
2523
2524 self._handle_item(subitem, state)
2525
2526 # Remove immediate labels from current state if needed
2527 if remove_immediate_labels:
2528 for name in immediate_labels:
2529 del state.labels[name]
2530
2531 # Add all immediate labels to all state snapshots since
2532 # `first_fl_num_item_inst_index`.
2533 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2534 inst.state.labels.update(immediate_labels)
2535
2536 # Handles the repetition item `item`.
2537 def _handle_rep_item(self, item: _Rep, state: _GenState):
2538 # Compute the repetition count
2539 mul = _Gen._eval_item_expr(item, state)
2540
2541 # Validate result
2542 if mul < 0:
2543 _raise_error_for_item(
2544 "Invalid expression `{}`: unexpected negative result {:,}".format(
2545 item.expr_str, mul
2546 ),
2547 item,
2548 )
2549
2550 # Generate group data `mul` times
2551 for _ in range(mul):
2552 self._handle_group_item(item, state)
2553
2554 # Handles the conditional item `item`.
2555 def _handle_cond_item(self, item: _Cond, state: _GenState):
2556 # Compute the conditional value
2557 val = _Gen._eval_item_expr(item, state)
2558
2559 # Generate selected group data
2560 if val:
2561 self._handle_group_item(item.true_item, state)
2562 else:
2563 self._handle_group_item(item.false_item, state)
2564
2565 # Handles the transformation item `item`.
2566 def _handle_trans_item(self, item: _Trans, state: _GenState):
2567 init_in_trans = self._in_trans
2568 self._in_trans = True
2569 init_data_len = len(self._data)
2570 init_offset = state.offset
2571
2572 # Generate group data
2573 self._handle_group_item(item, state)
2574
2575 # Remove and keep group data
2576 to_trans = self._data[init_data_len:]
2577 del self._data[init_data_len:]
2578
2579 # Encode group data and append to current data
2580 try:
2581 transformed = item.trans(to_trans)
2582 except Exception as exc:
2583 _raise_error_for_item(
2584 "Cannot apply the {} transformation to this data: {}".format(
2585 item.name, exc
2586 ),
2587 item,
2588 )
2589
2590 self._data += transformed
2591
2592 # Update offset and restore
2593 state.offset = init_offset + len(transformed)
2594 self._in_trans = init_in_trans
2595
2596 # Evaluates the parameters of the macro expansion item `item`
2597 # considering the initial state `init_state` and returns a new state
2598 # to handle the items of the macro.
2599 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2600 # New state
2601 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2602
2603 # Evaluate the parameter expressions
2604 macro_def = self._macro_defs[item.name]
2605
2606 for param_name, param in zip(macro_def.param_names, item.params):
2607 exp_state.variables[param_name] = _Gen._eval_expr(
2608 param.expr_str,
2609 param.expr,
2610 param.text_loc,
2611 init_state,
2612 accept_float=True,
2613 accept_str=True,
2614 )
2615
2616 return exp_state
2617
2618 # Handles the macro expansion item `item`.
2619 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2620 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2621
2622 try:
2623 # New state
2624 exp_state = self._eval_macro_exp_params(item, state)
2625
2626 # Process the contained group
2627 init_data_size = len(self._data)
2628 parse_error_msg = (
2629 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2630 parse_error_msg_text, item.text_loc
2631 )
2632 )
2633 self._parse_error_msgs.append(parse_error_msg)
2634 self._handle_group_item(self._macro_defs[item.name], exp_state)
2635 self._parse_error_msgs.pop()
2636 except ParseError as exc:
2637 _augment_error(exc, parse_error_msg_text, item.text_loc)
2638
2639 # Update state offset and return
2640 state.offset += len(self._data) - init_data_size
2641
2642 # Handles the offset setting item `item`.
2643 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2644 state.offset = item.val
2645
2646 # Handles the offset alignment item `item` (adds padding).
2647 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2648 init_offset = state.offset
2649 align_bytes = item.val // 8
2650 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2651 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2652
2653 # Handles the filling item `item` (adds padding).
2654 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2655 # Compute the new offset
2656 new_offset = _Gen._eval_item_expr(item, state)
2657
2658 # Validate the new offset
2659 if new_offset < state.offset:
2660 _raise_error_for_item(
2661 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2662 item.expr_str, new_offset, state.offset
2663 ),
2664 item,
2665 )
2666
2667 # Fill
2668 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2669
2670 # Update offset
2671 state.offset = new_offset
2672
2673 # Handles the label item `item`.
2674 def _handle_label_item(self, item: _Label, state: _GenState):
2675 state.labels[item.name] = state.offset
2676
2677 # Handles the item `item`, returning the updated next repetition
2678 # instance.
2679 def _handle_item(self, item: _Item, state: _GenState):
2680 return self._item_handlers[type(item)](item, state)
2681
2682 # Generates the data for a fixed-length integer item instance having
2683 # the value `val` and returns it.
2684 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2685 # Validate range
2686 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2687 _raise_error_for_item(
2688 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2689 val, item.len, item.expr_str
2690 ),
2691 item,
2692 )
2693
2694 # Encode result on 64 bits (to extend the sign bit whatever the
2695 # value of `item.len`).
2696 data = struct.pack(
2697 "{}{}".format(
2698 ">" if state.bo in (None, ByteOrder.BE) else "<",
2699 "Q" if val >= 0 else "q",
2700 ),
2701 val,
2702 )
2703
2704 # Keep only the requested length
2705 len_bytes = item.len // 8
2706
2707 if state.bo in (None, ByteOrder.BE):
2708 # Big endian: keep last bytes
2709 data = data[-len_bytes:]
2710 else:
2711 # Little endian: keep first bytes
2712 assert state.bo == ByteOrder.LE
2713 data = data[:len_bytes]
2714
2715 # Return data
2716 return data
2717
2718 # Generates the data for a fixed-length floating point number item
2719 # instance having the value `val` and returns it.
2720 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2721 # Validate length
2722 if item.len not in (32, 64):
2723 _raise_error_for_item(
2724 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2725 item.len, val
2726 ),
2727 item,
2728 )
2729
2730 # Encode and return result
2731 return struct.pack(
2732 "{}{}".format(
2733 ">" if state.bo in (None, ByteOrder.BE) else "<",
2734 "f" if item.len == 32 else "d",
2735 ),
2736 val,
2737 )
2738
2739 # Generates the data for a fixed-length number item instance and
2740 # returns it.
2741 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2742 # Compute value
2743 val = self._eval_item_expr(item, state, True)
2744
2745 # Handle depending on type
2746 if type(val) is int:
2747 return self._gen_fl_int_item_inst_data(val, item, state)
2748 else:
2749 assert type(val) is float
2750 return self._gen_fl_float_item_inst_data(val, item, state)
2751
2752 # Generates the data for all the fixed-length number item instances
2753 # and writes it at the correct offset within `self._data`.
2754 def _gen_fl_num_item_insts(self):
2755 for inst in self._fl_num_item_insts:
2756 # Generate bytes
2757 try:
2758 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2759 except ParseError as exc:
2760 # Add all the saved parse error messages for this
2761 # instance.
2762 for msg in reversed(inst.parse_error_msgs):
2763 _add_error_msg(exc, msg.text, msg.text_location)
2764
2765 raise
2766
2767 # Insert bytes into `self._data`
2768 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2769
2770 # Generates the data (`self._data`) and final state
2771 # (`self._final_state`) from `group` and the initial state `state`.
2772 def _gen(self, group: _Group, state: _GenState):
2773 # Initial state
2774 self._data = bytearray()
2775
2776 # Item handlers
2777 self._item_handlers = {
2778 _AlignOffset: self._handle_align_offset_item,
2779 _Byte: self._handle_byte_item,
2780 _Cond: self._handle_cond_item,
2781 _FillUntil: self._handle_fill_until_item,
2782 _FlNum: self._handle_fl_num_item,
2783 _Group: self._handle_group_item,
2784 _Label: self._handle_label_item,
2785 _LitStr: self._handle_lit_str_item,
2786 _MacroExp: self._handle_macro_exp_item,
2787 _Rep: self._handle_rep_item,
2788 _SetBo: self._handle_set_bo_item,
2789 _SetOffset: self._handle_set_offset_item,
2790 _SLeb128Int: self._handle_leb128_int_item,
2791 _Str: self._handle_str_item,
2792 _Trans: self._handle_trans_item,
2793 _ULeb128Int: self._handle_leb128_int_item,
2794 _VarAssign: self._handle_var_assign_item,
2795 } # type: Dict[type, Callable[[Any, _GenState], None]]
2796
2797 # Handle the group item, _not_ removing the immediate labels
2798 # because the `labels` property offers them.
2799 self._handle_group_item(group, state, False)
2800
2801 # This is actually the final state
2802 self._final_state = state
2803
2804 # Generate all the fixed-length number bytes now that we know
2805 # their full state
2806 self._gen_fl_num_item_insts()
2807
2808
2809 # Returns a `ParseResult` instance containing the bytes encoded by the
2810 # input string `normand`.
2811 #
2812 # `init_variables` is a dictionary of initial variable names (valid
2813 # Python names) to integral values. A variable name must not be the
2814 # reserved name `ICITTE`.
2815 #
2816 # `init_labels` is a dictionary of initial label names (valid Python
2817 # names) to integral values. A label name must not be the reserved name
2818 # `ICITTE`.
2819 #
2820 # `init_offset` is the initial offset.
2821 #
2822 # `init_byte_order` is the initial byte order.
2823 #
2824 # Raises `ParseError` on any parsing error.
2825 def parse(
2826 normand: str,
2827 init_variables: Optional[VariablesT] = None,
2828 init_labels: Optional[LabelsT] = None,
2829 init_offset: int = 0,
2830 init_byte_order: Optional[ByteOrder] = None,
2831 ):
2832 if init_variables is None:
2833 init_variables = {}
2834
2835 if init_labels is None:
2836 init_labels = {}
2837
2838 parser = _Parser(normand, init_variables, init_labels)
2839 gen = _Gen(
2840 parser.res,
2841 parser.macro_defs,
2842 init_variables,
2843 init_labels,
2844 init_offset,
2845 init_byte_order,
2846 )
2847 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2848 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2849 )
2850
2851
2852 # Raises a command-line error with the message `msg`.
2853 def _raise_cli_error(msg: str) -> NoReturn:
2854 raise RuntimeError("Command-line error: {}".format(msg))
2855
2856
2857 # Returns the `int` or `float` value out of a CLI assignment value.
2858 def _val_from_assign_val_str(s: str, is_label: bool):
2859 s = s.strip()
2860
2861 # Floating point number?
2862 if not is_label:
2863 m = _const_float_pat.fullmatch(s)
2864
2865 if m is not None:
2866 return float(m.group(0))
2867
2868 # Integer?
2869 m = _const_int_pat.fullmatch(s)
2870
2871 if m is not None:
2872 return int(_norm_const_int(m.group(0)), 0)
2873
2874 exp = "an integer" if is_label else "a number"
2875 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2876
2877
2878 # Returns a dictionary of string to numbers from the list of strings
2879 # `args` containing `NAME=VAL` entries.
2880 def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
2881 d = {} # type: VariablesT
2882
2883 if args is None:
2884 return d
2885
2886 for arg in args:
2887 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
2888
2889 if m is None:
2890 _raise_cli_error("Invalid assignment `{}`".format(arg))
2891
2892 if is_str_only:
2893 val = m.group(2)
2894 else:
2895 val = _val_from_assign_val_str(m.group(2), is_label)
2896
2897 d[m.group(1)] = val
2898
2899 return d
2900
2901
2902 # Parses the command-line arguments and returns, in this order:
2903 #
2904 # 1. The input file path, or `None` if none.
2905 # 2. The Normand input text.
2906 # 3. The initial offset.
2907 # 4. The initial byte order.
2908 # 5. The initial variables.
2909 # 6. The initial labels.
2910 def _parse_cli_args():
2911 import argparse
2912
2913 # Build parser
2914 ap = argparse.ArgumentParser()
2915 ap.add_argument(
2916 "--offset",
2917 metavar="OFFSET",
2918 action="store",
2919 type=int,
2920 default=0,
2921 help="initial offset (positive)",
2922 )
2923 ap.add_argument(
2924 "-b",
2925 "--byte-order",
2926 metavar="BO",
2927 choices=["be", "le"],
2928 type=str,
2929 help="initial byte order (`be` or `le`)",
2930 )
2931 ap.add_argument(
2932 "-v",
2933 "--var",
2934 metavar="NAME=VAL",
2935 action="append",
2936 help="add an initial numeric variable (may be repeated)",
2937 )
2938 ap.add_argument(
2939 "-s",
2940 "--var-str",
2941 metavar="NAME=VAL",
2942 action="append",
2943 help="add an initial string variable (may be repeated)",
2944 )
2945 ap.add_argument(
2946 "-l",
2947 "--label",
2948 metavar="NAME=VAL",
2949 action="append",
2950 help="add an initial label (may be repeated)",
2951 )
2952 ap.add_argument(
2953 "--version", action="version", version="Normand {}".format(__version__)
2954 )
2955 ap.add_argument(
2956 "path",
2957 metavar="PATH",
2958 action="store",
2959 nargs="?",
2960 help="input path (none means standard input)",
2961 )
2962
2963 # Parse
2964 args = ap.parse_args()
2965
2966 # Read input
2967 if args.path is None:
2968 normand = sys.stdin.read()
2969 else:
2970 with open(args.path) as f:
2971 normand = f.read()
2972
2973 # Variables and labels
2974 variables = _dict_from_arg(args.var, False, False)
2975 variables.update(_dict_from_arg(args.var_str, False, True))
2976 labels = _dict_from_arg(args.label, True, False)
2977
2978 # Validate offset
2979 if args.offset < 0:
2980 _raise_cli_error("Invalid negative offset {}")
2981
2982 # Validate and set byte order
2983 bo = None # type: Optional[ByteOrder]
2984
2985 if args.byte_order is not None:
2986 if args.byte_order == "be":
2987 bo = ByteOrder.BE
2988 else:
2989 assert args.byte_order == "le"
2990 bo = ByteOrder.LE
2991
2992 # Return input and initial state
2993 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
2994
2995
2996 # CLI entry point without exception handling.
2997 def _run_cli_with_args(
2998 normand: str,
2999 offset: int,
3000 bo: Optional[ByteOrder],
3001 variables: VariablesT,
3002 labels: LabelsT,
3003 ):
3004 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
3005
3006
3007 # Prints the exception message `msg` and exits with status 1.
3008 def _fail(msg: str) -> NoReturn:
3009 if not msg.endswith("."):
3010 msg += "."
3011
3012 print(msg.strip(), file=sys.stderr)
3013 sys.exit(1)
3014
3015
3016 # CLI entry point.
3017 def _run_cli():
3018 try:
3019 args = _parse_cli_args()
3020 except Exception as exc:
3021 _fail(str(exc))
3022
3023 try:
3024 _run_cli_with_args(*args[1:])
3025 except ParseError as exc:
3026 import os.path
3027
3028 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
3029 fail_msg = ""
3030
3031 for msg in reversed(exc.messages):
3032 fail_msg += "{}{}:{} - {}".format(
3033 prefix,
3034 msg.text_location.line_no,
3035 msg.text_location.col_no,
3036 msg.text,
3037 )
3038
3039 if fail_msg[-1] not in ".:;":
3040 fail_msg += "."
3041
3042 fail_msg += "\n"
3043
3044 _fail(fail_msg.strip())
3045 except Exception as exc:
3046 _fail(str(exc))
3047
3048
3049 if __name__ == "__main__":
3050 _run_cli()
This page took 0.112738 seconds and 4 git commands to generate.