Use only naked `{` and `}` for variable assignment
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.22.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import bz2
51 import sys
52 import copy
53 import enum
54 import gzip
55 import math
56 import base64
57 import quopri
58 import struct
59 import typing
60 import functools
61 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
62
63
64 # Text location (line and column numbers).
65 class TextLocation:
66 @classmethod
67 def _create(cls, line_no: int, col_no: int):
68 self = cls.__new__(cls)
69 self._init(line_no, col_no)
70 return self
71
72 def __init__(*args, **kwargs): # type: ignore
73 raise NotImplementedError
74
75 def _init(self, line_no: int, col_no: int):
76 self._line_no = line_no
77 self._col_no = col_no
78
79 # Line number.
80 @property
81 def line_no(self):
82 return self._line_no
83
84 # Column number.
85 @property
86 def col_no(self):
87 return self._col_no
88
89 def __repr__(self):
90 return "TextLocation({}, {})".format(self._line_no, self._col_no)
91
92
93 # Any item.
94 class _Item:
95 def __init__(self, text_loc: TextLocation):
96 self._text_loc = text_loc
97
98 # Source text location.
99 @property
100 def text_loc(self):
101 return self._text_loc
102
103
104 # Scalar item.
105 class _ScalarItem(_Item):
106 # Returns the size, in bytes, of this item.
107 @property
108 @abc.abstractmethod
109 def size(self) -> int:
110 ...
111
112
113 # A repeatable item.
114 class _RepableItem:
115 pass
116
117
118 # Single byte.
119 class _Byte(_ScalarItem, _RepableItem):
120 def __init__(self, val: int, text_loc: TextLocation):
121 super().__init__(text_loc)
122 self._val = val
123
124 # Byte value.
125 @property
126 def val(self):
127 return self._val
128
129 @property
130 def size(self):
131 return 1
132
133 def __repr__(self):
134 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
135
136
137 # Literal string.
138 class _LitStr(_ScalarItem, _RepableItem):
139 def __init__(self, data: bytes, text_loc: TextLocation):
140 super().__init__(text_loc)
141 self._data = data
142
143 # Encoded bytes.
144 @property
145 def data(self):
146 return self._data
147
148 @property
149 def size(self):
150 return len(self._data)
151
152 def __repr__(self):
153 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
154
155
156 # Byte order.
157 @enum.unique
158 class ByteOrder(enum.Enum):
159 # Big endian.
160 BE = "be"
161
162 # Little endian.
163 LE = "le"
164
165
166 # Byte order setting.
167 class _SetBo(_Item):
168 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
169 super().__init__(text_loc)
170 self._bo = bo
171
172 @property
173 def bo(self):
174 return self._bo
175
176 def __repr__(self):
177 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
178
179
180 # Label.
181 class _Label(_Item):
182 def __init__(self, name: str, text_loc: TextLocation):
183 super().__init__(text_loc)
184 self._name = name
185
186 # Label name.
187 @property
188 def name(self):
189 return self._name
190
191 def __repr__(self):
192 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
193
194
195 # Offset setting.
196 class _SetOffset(_Item):
197 def __init__(self, val: int, text_loc: TextLocation):
198 super().__init__(text_loc)
199 self._val = val
200
201 # Offset value (bytes).
202 @property
203 def val(self):
204 return self._val
205
206 def __repr__(self):
207 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
208
209
210 # Offset alignment.
211 class _AlignOffset(_Item):
212 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
213 super().__init__(text_loc)
214 self._val = val
215 self._pad_val = pad_val
216
217 # Alignment value (bits).
218 @property
219 def val(self):
220 return self._val
221
222 # Padding byte value.
223 @property
224 def pad_val(self):
225 return self._pad_val
226
227 def __repr__(self):
228 return "_AlignOffset({}, {}, {})".format(
229 repr(self._val), repr(self._pad_val), repr(self._text_loc)
230 )
231
232
233 # Mixin of containing an AST expression and its string.
234 class _ExprMixin:
235 def __init__(self, expr_str: str, expr: ast.Expression):
236 self._expr_str = expr_str
237 self._expr = expr
238
239 # Expression string.
240 @property
241 def expr_str(self):
242 return self._expr_str
243
244 # Expression node to evaluate.
245 @property
246 def expr(self):
247 return self._expr
248
249
250 # Fill until some offset.
251 class _FillUntil(_Item, _ExprMixin):
252 def __init__(
253 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
254 ):
255 super().__init__(text_loc)
256 _ExprMixin.__init__(self, expr_str, expr)
257 self._pad_val = pad_val
258
259 # Padding byte value.
260 @property
261 def pad_val(self):
262 return self._pad_val
263
264 def __repr__(self):
265 return "_FillUntil({}, {}, {}, {})".format(
266 repr(self._expr_str),
267 repr(self._expr),
268 repr(self._pad_val),
269 repr(self._text_loc),
270 )
271
272
273 # Variable assignment.
274 class _VarAssign(_Item, _ExprMixin):
275 def __init__(
276 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
277 ):
278 super().__init__(text_loc)
279 _ExprMixin.__init__(self, expr_str, expr)
280 self._name = name
281
282 # Name.
283 @property
284 def name(self):
285 return self._name
286
287 def __repr__(self):
288 return "_VarAssign({}, {}, {}, {})".format(
289 repr(self._name),
290 repr(self._expr_str),
291 repr(self._expr),
292 repr(self._text_loc),
293 )
294
295
296 # Fixed-length number, possibly needing more than one byte.
297 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
298 def __init__(
299 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
300 ):
301 super().__init__(text_loc)
302 _ExprMixin.__init__(self, expr_str, expr)
303 self._len = len
304
305 # Length (bits).
306 @property
307 def len(self):
308 return self._len
309
310 @property
311 def size(self):
312 return self._len // 8
313
314 def __repr__(self):
315 return "_FlNum({}, {}, {}, {})".format(
316 repr(self._expr_str),
317 repr(self._expr),
318 repr(self._len),
319 repr(self._text_loc),
320 )
321
322
323 # LEB128 integer.
324 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
325 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
326 super().__init__(text_loc)
327 _ExprMixin.__init__(self, expr_str, expr)
328
329 def __repr__(self):
330 return "{}({}, {}, {})".format(
331 self.__class__.__name__,
332 repr(self._expr_str),
333 repr(self._expr),
334 repr(self._text_loc),
335 )
336
337
338 # Unsigned LEB128 integer.
339 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343 # Signed LEB128 integer.
344 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
345 pass
346
347
348 # String.
349 class _Str(_Item, _RepableItem, _ExprMixin):
350 def __init__(
351 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
352 ):
353 super().__init__(text_loc)
354 _ExprMixin.__init__(self, expr_str, expr)
355 self._codec = codec
356
357 # Codec name.
358 @property
359 def codec(self):
360 return self._codec
361
362 def __repr__(self):
363 return "_Str({}, {}, {}, {})".format(
364 repr(self._expr_str),
365 repr(self._expr),
366 repr(self._codec),
367 repr(self._text_loc),
368 )
369
370
371 # Group of items.
372 class _Group(_Item, _RepableItem):
373 def __init__(self, items: List[_Item], text_loc: TextLocation):
374 super().__init__(text_loc)
375 self._items = items
376
377 # Contained items.
378 @property
379 def items(self):
380 return self._items
381
382 def __repr__(self):
383 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
384
385
386 # Repetition item.
387 class _Rep(_Group, _ExprMixin):
388 def __init__(
389 self,
390 items: List[_Item],
391 expr_str: str,
392 expr: ast.Expression,
393 text_loc: TextLocation,
394 ):
395 super().__init__(items, text_loc)
396 _ExprMixin.__init__(self, expr_str, expr)
397
398 def __repr__(self):
399 return "_Rep({}, {}, {}, {})".format(
400 repr(self._items),
401 repr(self._expr_str),
402 repr(self._expr),
403 repr(self._text_loc),
404 )
405
406
407 # Conditional item.
408 class _Cond(_Item, _ExprMixin):
409 def __init__(
410 self,
411 true_item: _Group,
412 false_item: _Group,
413 expr_str: str,
414 expr: ast.Expression,
415 text_loc: TextLocation,
416 ):
417 super().__init__(text_loc)
418 _ExprMixin.__init__(self, expr_str, expr)
419 self._true_item = true_item
420 self._false_item = false_item
421
422 # Item when condition is true.
423 @property
424 def true_item(self):
425 return self._true_item
426
427 # Item when condition is false.
428 @property
429 def false_item(self):
430 return self._false_item
431
432 def __repr__(self):
433 return "_Cond({}, {}, {}, {}, {})".format(
434 repr(self._true_item),
435 repr(self._false_item),
436 repr(self._expr_str),
437 repr(self._expr),
438 repr(self._text_loc),
439 )
440
441
442 # Transformation.
443 class _Trans(_Group, _RepableItem):
444 def __init__(
445 self,
446 items: List[_Item],
447 name: str,
448 func: Callable[[Union[bytes, bytearray]], bytes],
449 text_loc: TextLocation,
450 ):
451 super().__init__(items, text_loc)
452 self._name = name
453 self._func = func
454
455 @property
456 def name(self):
457 return self._name
458
459 # Transforms the data `data`.
460 def trans(self, data: Union[bytes, bytearray]):
461 return self._func(data)
462
463 def __repr__(self):
464 return "_Trans({}, {}, {}, {})".format(
465 repr(self._items),
466 repr(self._name),
467 repr(self._func),
468 repr(self._text_loc),
469 )
470
471
472 # Macro definition item.
473 class _MacroDef(_Group):
474 def __init__(
475 self,
476 name: str,
477 param_names: List[str],
478 items: List[_Item],
479 text_loc: TextLocation,
480 ):
481 super().__init__(items, text_loc)
482 self._name = name
483 self._param_names = param_names
484
485 # Name.
486 @property
487 def name(self):
488 return self._name
489
490 # Parameters.
491 @property
492 def param_names(self):
493 return self._param_names
494
495 def __repr__(self):
496 return "_MacroDef({}, {}, {}, {})".format(
497 repr(self._name),
498 repr(self._param_names),
499 repr(self._items),
500 repr(self._text_loc),
501 )
502
503
504 # Macro expansion parameter.
505 class _MacroExpParam:
506 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
507 self._expr_str = expr_str
508 self._expr = expr
509 self._text_loc = text_loc
510
511 # Expression string.
512 @property
513 def expr_str(self):
514 return self._expr_str
515
516 # Expression.
517 @property
518 def expr(self):
519 return self._expr
520
521 # Source text location.
522 @property
523 def text_loc(self):
524 return self._text_loc
525
526 def __repr__(self):
527 return "_MacroExpParam({}, {}, {})".format(
528 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
529 )
530
531
532 # Macro expansion item.
533 class _MacroExp(_Item, _RepableItem):
534 def __init__(
535 self,
536 name: str,
537 params: List[_MacroExpParam],
538 text_loc: TextLocation,
539 ):
540 super().__init__(text_loc)
541 self._name = name
542 self._params = params
543
544 # Name.
545 @property
546 def name(self):
547 return self._name
548
549 # Parameters.
550 @property
551 def params(self):
552 return self._params
553
554 def __repr__(self):
555 return "_MacroExp({}, {}, {})".format(
556 repr(self._name),
557 repr(self._params),
558 repr(self._text_loc),
559 )
560
561
562 # A parsing error message: a string and a text location.
563 class ParseErrorMessage:
564 @classmethod
565 def _create(cls, text: str, text_loc: TextLocation):
566 self = cls.__new__(cls)
567 self._init(text, text_loc)
568 return self
569
570 def __init__(self, *args, **kwargs): # type: ignore
571 raise NotImplementedError
572
573 def _init(self, text: str, text_loc: TextLocation):
574 self._text = text
575 self._text_loc = text_loc
576
577 # Message text.
578 @property
579 def text(self):
580 return self._text
581
582 # Source text location.
583 @property
584 def text_location(self):
585 return self._text_loc
586
587
588 # A parsing error containing one or more messages (`ParseErrorMessage`).
589 class ParseError(RuntimeError):
590 @classmethod
591 def _create(cls, msg: str, text_loc: TextLocation):
592 self = cls.__new__(cls)
593 self._init(msg, text_loc)
594 return self
595
596 def __init__(self, *args, **kwargs): # type: ignore
597 raise NotImplementedError
598
599 def _init(self, msg: str, text_loc: TextLocation):
600 super().__init__(msg)
601 self._msgs = [] # type: List[ParseErrorMessage]
602 self._add_msg(msg, text_loc)
603
604 def _add_msg(self, msg: str, text_loc: TextLocation):
605 self._msgs.append(
606 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
607 msg, text_loc
608 )
609 )
610
611 # Parsing error messages.
612 #
613 # The first message is the most specific one.
614 @property
615 def messages(self):
616 return self._msgs
617
618
619 # Raises a parsing error, forwarding the parameters to the constructor.
620 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
621 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
622
623
624 # Adds a message to the parsing error `exc`.
625 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
626 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
627
628
629 # Appends a message to the parsing error `exc` and reraises it.
630 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
631 _add_error_msg(exc, msg, text_loc)
632 raise exc
633
634
635 # Returns a normalized version (so as to be parseable by int()) of
636 # the constant integer string `s`, possibly negative, dealing with
637 # any radix suffix.
638 def _norm_const_int(s: str):
639 neg = ""
640 pos = s
641
642 if s.startswith("-"):
643 neg = "-"
644 pos = s[1:]
645
646 for r in "xXoObB":
647 if pos.startswith("0" + r):
648 # Already correct
649 return s
650
651 # Try suffix
652 asm_suf_base = {
653 "h": "x",
654 "H": "x",
655 "q": "o",
656 "Q": "o",
657 "o": "o",
658 "O": "o",
659 "b": "b",
660 "B": "B",
661 }
662
663 for suf in asm_suf_base:
664 if pos[-1] == suf:
665 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
666
667 return s
668
669
670 # Encodes the string `s` using the codec `codec`, raising `ParseError`
671 # with `text_loc` on encoding error.
672 def _encode_str(s: str, codec: str, text_loc: TextLocation):
673 try:
674 return s.encode(codec)
675 except UnicodeEncodeError:
676 _raise_error(
677 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
678 )
679
680
681 # Variables dictionary type (for type hints).
682 VariablesT = Dict[str, Union[int, float, str]]
683
684
685 # Labels dictionary type (for type hints).
686 LabelsT = Dict[str, int]
687
688
689 # Common patterns.
690 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
691 _pos_const_int_pat = re.compile(
692 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
693 )
694 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
695 _const_float_pat = re.compile(
696 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
697 )
698
699
700 # Macro definition dictionary.
701 _MacroDefsT = Dict[str, _MacroDef]
702
703
704 # Normand parser.
705 #
706 # The constructor accepts a Normand input. After building, use the `res`
707 # property to get the resulting main group.
708 class _Parser:
709 # Builds a parser to parse the Normand input `normand`, parsing
710 # immediately.
711 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
712 self._normand = normand
713 self._at = 0
714 self._line_no = 1
715 self._col_no = 1
716 self._label_names = set(labels.keys())
717 self._var_names = set(variables.keys())
718 self._macro_defs = {} # type: _MacroDefsT
719 self._base_item_parse_funcs = [
720 self._try_parse_byte,
721 self._try_parse_str,
722 self._try_parse_val,
723 self._try_parse_var_assign,
724 self._try_parse_set_bo,
725 self._try_parse_label_or_set_offset,
726 self._try_parse_align_offset,
727 self._try_parse_fill_until,
728 self._try_parse_group,
729 self._try_parse_rep_block,
730 self._try_parse_cond_block,
731 self._try_parse_macro_exp,
732 self._try_parse_trans_block,
733 ]
734 self._parse()
735
736 # Result (main group).
737 @property
738 def res(self):
739 return self._res
740
741 # Macro definitions.
742 @property
743 def macro_defs(self):
744 return self._macro_defs
745
746 # Current text location.
747 @property
748 def _text_loc(self):
749 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
750 self._line_no, self._col_no
751 )
752
753 # Returns `True` if this parser is done parsing.
754 def _is_done(self):
755 return self._at == len(self._normand)
756
757 # Returns `True` if this parser isn't done parsing.
758 def _isnt_done(self):
759 return not self._is_done()
760
761 # Raises a parse error, creating it using the message `msg` and the
762 # current text location.
763 def _raise_error(self, msg: str) -> NoReturn:
764 _raise_error(msg, self._text_loc)
765
766 # Tries to make the pattern `pat` match the current substring,
767 # returning the match object and updating `self._at`,
768 # `self._line_no`, and `self._col_no` on success.
769 def _try_parse_pat(self, pat: Pattern[str]):
770 m = pat.match(self._normand, self._at)
771
772 if m is None:
773 return
774
775 # Skip matched string
776 self._at += len(m.group(0))
777
778 # Update line number
779 self._line_no += m.group(0).count("\n")
780
781 # Update column number
782 for i in reversed(range(self._at)):
783 if self._normand[i] == "\n" or i == 0:
784 if i == 0:
785 self._col_no = self._at + 1
786 else:
787 self._col_no = self._at - i
788
789 break
790
791 # Return match object
792 return m
793
794 # Expects the pattern `pat` to match the current substring,
795 # returning the match object and updating `self._at`,
796 # `self._line_no`, and `self._col_no` on success, or raising a parse
797 # error with the message `error_msg` on error.
798 def _expect_pat(self, pat: Pattern[str], error_msg: str):
799 # Match
800 m = self._try_parse_pat(pat)
801
802 if m is None:
803 # No match: error
804 self._raise_error(error_msg)
805
806 # Return match object
807 return m
808
809 # Patterns for _skip_*()
810 _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M)
811 _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M)
812 _ws_or_syms_or_comments_pat = re.compile(
813 r"(?:[\s/\\?&:;.,_=|-]|{})*".format(_comment_pat.pattern), re.M
814 )
815
816 # Skips as many whitespaces and comments as possible, but not
817 # insignificant symbol characters.
818 def _skip_ws_and_comments(self):
819 self._try_parse_pat(self._ws_or_comments_pat)
820
821 # Skips as many whitespaces, insignificant symbol characters, and
822 # comments as possible.
823 def _skip_ws_and_comments_and_syms(self):
824 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
825
826 # Pattern for _try_parse_hex_byte()
827 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
828
829 # Tries to parse a hexadecimal byte, returning a byte item on
830 # success.
831 def _try_parse_hex_byte(self):
832 begin_text_loc = self._text_loc
833
834 # Match initial nibble
835 m_high = self._try_parse_pat(self._nibble_pat)
836
837 if m_high is None:
838 # No match
839 return
840
841 # Expect another nibble
842 self._skip_ws_and_comments_and_syms()
843 m_low = self._expect_pat(
844 self._nibble_pat, "Expecting another hexadecimal nibble"
845 )
846
847 # Return item
848 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
849
850 # Patterns for _try_parse_bin_byte()
851 _bin_byte_bit_pat = re.compile(r"[01]")
852 _bin_byte_prefix_pat = re.compile(r"%+")
853
854 # Tries to parse a binary byte, returning a byte item on success.
855 def _try_parse_bin_byte(self):
856 begin_text_loc = self._text_loc
857
858 # Match prefix
859 m = self._try_parse_pat(self._bin_byte_prefix_pat)
860
861 if m is None:
862 # No match
863 return
864
865 # Expect as many bytes as there are `%` prefixes
866 items = [] # type: List[_Item]
867
868 for _ in range(len(m.group(0))):
869 self._skip_ws_and_comments_and_syms()
870 byte_text_loc = self._text_loc
871 bits = [] # type: List[str]
872
873 # Expect eight bits
874 for _ in range(8):
875 self._skip_ws_and_comments_and_syms()
876 m = self._expect_pat(
877 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
878 )
879 bits.append(m.group(0))
880
881 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
882
883 # Return item
884 if len(items) == 1:
885 return items[0]
886
887 # As group
888 return _Group(items, begin_text_loc)
889
890 # Patterns for _try_parse_dec_byte()
891 _dec_byte_prefix_pat = re.compile(r"\$")
892 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
893
894 # Tries to parse a decimal byte, returning a byte item on success.
895 def _try_parse_dec_byte(self):
896 begin_text_loc = self._text_loc
897
898 # Match prefix
899 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
900 # No match
901 return
902
903 # Expect the value
904 self._skip_ws_and_comments()
905 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
906
907 # Compute value
908 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
909
910 # Validate
911 if val < -128 or val > 255:
912 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
913
914 # Two's complement
915 val %= 256
916
917 # Return item
918 return _Byte(val, begin_text_loc)
919
920 # Tries to parse a byte, returning a byte item on success.
921 def _try_parse_byte(self):
922 # Hexadecimal
923 item = self._try_parse_hex_byte()
924
925 if item is not None:
926 return item
927
928 # Binary
929 item = self._try_parse_bin_byte()
930
931 if item is not None:
932 return item
933
934 # Decimal
935 item = self._try_parse_dec_byte()
936
937 if item is not None:
938 return item
939
940 # Strings corresponding to escape sequence characters
941 _lit_str_escape_seq_strs = {
942 "0": "\0",
943 "a": "\a",
944 "b": "\b",
945 "e": "\x1b",
946 "f": "\f",
947 "n": "\n",
948 "r": "\r",
949 "t": "\t",
950 "v": "\v",
951 "\\": "\\",
952 '"': '"',
953 }
954
955 # Patterns for _try_parse_lit_str()
956 _lit_str_prefix_suffix_pat = re.compile(r'"')
957 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
958
959 # Parses a literal string between double quotes (without an encoding
960 # prefix) and returns the resulting string.
961 def _try_parse_lit_str(self, with_prefix: bool):
962 # Match prefix if needed
963 if with_prefix:
964 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
965 # No match
966 return
967
968 # Expect literal string
969 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
970
971 # Expect end of string
972 self._expect_pat(
973 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
974 )
975
976 # Replace escape sequences
977 val = m.group(0)
978
979 for ec in '0abefnrtv"\\':
980 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
981
982 # Return string
983 return val
984
985 # Patterns for _try_parse_utf_str_encoding()
986 _str_encoding_utf_prefix_pat = re.compile(r"u")
987 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
988
989 # Tries to parse a UTF encoding specification, returning the Python
990 # codec name on success.
991 def _try_parse_utf_str_encoding(self):
992 # Match prefix
993 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
994 # No match
995 return
996
997 # Expect UTF specification
998 m = self._expect_pat(
999 self._str_encoding_utf_pat,
1000 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
1001 )
1002
1003 # Convert to codec name
1004 return {
1005 "8": "utf_8",
1006 "16be": "utf_16_be",
1007 "16le": "utf_16_le",
1008 "32be": "utf_32_be",
1009 "32le": "utf_32_le",
1010 }[m.group(0)]
1011
1012 # Patterns for _try_parse_str_encoding()
1013 _str_encoding_gen_prefix_pat = re.compile(r"s")
1014 _str_encoding_colon_pat = re.compile(r":")
1015 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
1016
1017 # Tries to parse a string encoding specification, returning the
1018 # Python codec name on success.
1019 #
1020 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
1021 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
1022 # General prefix?
1023 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
1024 # Expect `:`
1025 self._skip_ws_and_comments()
1026 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
1027
1028 # Expect encoding specification
1029 self._skip_ws_and_comments()
1030
1031 # UTF?
1032 codec = self._try_parse_utf_str_encoding()
1033
1034 if codec is not None:
1035 return codec
1036
1037 # Expect Latin
1038 m = self._expect_pat(
1039 self._str_encoding_non_utf_pat,
1040 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
1041 )
1042 return m.group(0)
1043
1044 # UTF?
1045 if not req_gen_prefix:
1046 return self._try_parse_utf_str_encoding()
1047
1048 # Patterns for _try_parse_str()
1049 _lit_str_prefix_pat = re.compile(r'"')
1050 _str_prefix_pat = re.compile(r'"|\{')
1051 _str_expr_pat = re.compile(r"[^}]+")
1052 _str_expr_suffix_pat = re.compile(r"\}")
1053
1054 # Tries to parse a string, returning a literal string or string item
1055 # on success.
1056 def _try_parse_str(self):
1057 begin_text_loc = self._text_loc
1058
1059 # Encoding
1060 codec = self._try_parse_str_encoding()
1061
1062 # Match prefix (expect if there's an encoding specification)
1063 self._skip_ws_and_comments()
1064
1065 if codec is None:
1066 # No encoding: only a literal string (UTF-8) is legal
1067 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1068
1069 if m_prefix is None:
1070 return
1071 else:
1072 # Encoding present: expect a string prefix
1073 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1074
1075 # Literal string or expression?
1076 prefix = m_prefix.group(0)
1077
1078 if prefix == '"':
1079 # Expect literal string
1080 str_text_loc = self._text_loc
1081 val = self._try_parse_lit_str(False)
1082
1083 if val is None:
1084 self._raise_error("Expecting a literal string")
1085
1086 # Encode string
1087 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1088
1089 # Return item
1090 return _LitStr(data, begin_text_loc)
1091 else:
1092 # Expect expression
1093 self._skip_ws_and_comments()
1094 expr_text_loc = self._text_loc
1095 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1096
1097 # Expect `}`
1098 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1099
1100 # Create an expression node from the expression string
1101 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1102
1103 # Return item
1104 assert codec is not None
1105 return _Str(expr_str, expr, codec, begin_text_loc)
1106
1107 # Common right parenthesis pattern
1108 _right_paren_pat = re.compile(r"\)")
1109
1110 # Patterns for _try_parse_group()
1111 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
1112
1113 # Tries to parse a group, returning a group item on success.
1114 def _try_parse_group(self):
1115 begin_text_loc = self._text_loc
1116
1117 # Match prefix
1118 m_open = self._try_parse_pat(self._group_prefix_pat)
1119
1120 if m_open is None:
1121 # No match
1122 return
1123
1124 # Parse items
1125 items = self._parse_items()
1126
1127 # Expect end of group
1128 self._skip_ws_and_comments_and_syms()
1129
1130 if m_open.group(0) == "(":
1131 pat = self._right_paren_pat
1132 exp = ")"
1133 else:
1134 pat = self._block_end_pat
1135 exp = "!end"
1136
1137 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
1138
1139 # Return item
1140 return _Group(items, begin_text_loc)
1141
1142 # Returns a stripped expression string and an AST expression node
1143 # from the expression string `expr_str` at text location `text_loc`.
1144 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
1145 # Create an expression node from the expression string
1146 expr_str = expr_str.strip().replace("\n", " ")
1147
1148 try:
1149 expr = ast.parse(expr_str, mode="eval")
1150 except SyntaxError:
1151 _raise_error(
1152 "Invalid expression `{}`: invalid syntax".format(expr_str),
1153 text_loc,
1154 )
1155
1156 return expr_str, expr
1157
1158 # Patterns for _try_parse_val()
1159 _val_prefix_pat = re.compile(r"\[")
1160 _val_expr_pat = re.compile(r"([^\]:]+):")
1161 _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
1162 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
1163 _val_suffix_pat = re.compile(r"]")
1164
1165 # Tries to parse a value (number or string) and format (fixed length
1166 # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
1167 # name), returning an item on success.
1168 def _try_parse_val(self):
1169 # Match prefix
1170 if self._try_parse_pat(self._val_prefix_pat) is None:
1171 # No match
1172 return
1173
1174 # Expect expression and `:`
1175 self._skip_ws_and_comments()
1176 expr_text_loc = self._text_loc
1177 m = self._expect_pat(self._val_expr_pat, "Expecting an expression")
1178
1179 # Create an expression node from the expression string
1180 expr_str, expr = self._ast_expr_from_str(m.group(1), expr_text_loc)
1181
1182 # Fixed length?
1183 self._skip_ws_and_comments()
1184 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
1185
1186 if m_fmt is not None:
1187 # Create fixed-length number item
1188 item = _FlNum(
1189 expr_str,
1190 expr,
1191 int(m_fmt.group(0)),
1192 expr_text_loc,
1193 )
1194 else:
1195 # LEB128?
1196 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
1197
1198 if m_fmt is not None:
1199 # Create LEB128 integer item
1200 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
1201 item = cls(expr_str, expr, expr_text_loc)
1202 else:
1203 # String encoding?
1204 codec = self._try_parse_str_encoding(True)
1205
1206 if codec is not None:
1207 # Create string item
1208 item = _Str(expr_str, expr, codec, expr_text_loc)
1209 else:
1210 # At this point it's invalid
1211 self._raise_error(
1212 "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1213 )
1214
1215 # Expect `]`
1216 self._skip_ws_and_comments()
1217 m = self._expect_pat(self._val_suffix_pat, "Expecting `]`")
1218
1219 # Return item
1220 return item
1221
1222 # Patterns for _try_parse_var_assign()
1223 _var_assign_prefix_pat = re.compile(r"\{")
1224 _var_assign_equal_pat = re.compile(r"=")
1225 _var_assign_expr_pat = re.compile(r"[^}]+")
1226 _var_assign_suffix_pat = re.compile(r"\}")
1227
1228 # Tries to parse a variable assignment, returning a variable
1229 # assignment item on success.
1230 def _try_parse_var_assign(self):
1231 # Match prefix
1232 if self._try_parse_pat(self._var_assign_prefix_pat) is None:
1233 # No match
1234 return
1235
1236 # Expect a name
1237 self._skip_ws_and_comments()
1238 name_text_loc = self._text_loc
1239 m = self._expect_pat(_py_name_pat, "Expecting a valid Python name")
1240 name = m.group(0)
1241
1242 # Expect `=`
1243 self._skip_ws_and_comments()
1244 self._expect_pat(self._var_assign_equal_pat, "Expecting `=`")
1245
1246 # Expect expression
1247 self._skip_ws_and_comments()
1248 expr_text_loc = self._text_loc
1249 m_expr = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
1250
1251 # Expect `}`
1252 self._skip_ws_and_comments()
1253 self._expect_pat(self._var_assign_suffix_pat, "Expecting `}`")
1254
1255 # Validate name
1256 if name == _icitte_name:
1257 _raise_error(
1258 "`{}` is a reserved variable name".format(_icitte_name), name_text_loc
1259 )
1260
1261 if name in self._label_names:
1262 _raise_error("Existing label named `{}`".format(name), name_text_loc)
1263
1264 # Create an expression node from the expression string
1265 expr_str, expr = self._ast_expr_from_str(m_expr.group(0), expr_text_loc)
1266
1267 # Add to known variable names
1268 self._var_names.add(name)
1269
1270 # Return item
1271 return _VarAssign(
1272 name,
1273 expr_str,
1274 expr,
1275 name_text_loc,
1276 )
1277
1278 # Pattern for _try_parse_set_bo()
1279 _set_bo_pat = re.compile(r"!([bl]e)\b")
1280
1281 # Tries to parse a byte order setting, returning a byte order
1282 # setting item on success.
1283 def _try_parse_set_bo(self):
1284 begin_text_loc = self._text_loc
1285
1286 # Match
1287 m = self._try_parse_pat(self._set_bo_pat)
1288
1289 if m is None:
1290 # No match
1291 return
1292
1293 # Return corresponding item
1294 if m.group(1) == "be":
1295 bo = ByteOrder.BE
1296 else:
1297 assert m.group(1) == "le"
1298 bo = ByteOrder.LE
1299
1300 return _SetBo(bo, begin_text_loc)
1301
1302 # Tries to parse an offset setting value (after the initial `<`),
1303 # returning an offset item on success.
1304 def _try_parse_set_offset_val(self):
1305 begin_text_loc = self._text_loc
1306
1307 # Match
1308 m = self._try_parse_pat(_pos_const_int_pat)
1309
1310 if m is None:
1311 # No match
1312 return
1313
1314 # Return item
1315 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
1316
1317 # Tries to parse a label name (after the initial `<`), returning a
1318 # label item on success.
1319 def _try_parse_label_name(self):
1320 begin_text_loc = self._text_loc
1321
1322 # Match
1323 m = self._try_parse_pat(_py_name_pat)
1324
1325 if m is None:
1326 # No match
1327 return
1328
1329 # Validate
1330 name = m.group(0)
1331
1332 if name == _icitte_name:
1333 _raise_error(
1334 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1335 )
1336
1337 if name in self._label_names:
1338 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1339
1340 if name in self._var_names:
1341 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1342
1343 # Add to known label names
1344 self._label_names.add(name)
1345
1346 # Return item
1347 return _Label(name, begin_text_loc)
1348
1349 # Patterns for _try_parse_label_or_set_offset()
1350 _label_set_offset_prefix_pat = re.compile(r"<")
1351 _label_set_offset_suffix_pat = re.compile(r">")
1352
1353 # Tries to parse a label or an offset setting, returning an item on
1354 # success.
1355 def _try_parse_label_or_set_offset(self):
1356 # Match prefix
1357 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1358 # No match
1359 return
1360
1361 # Offset setting item?
1362 self._skip_ws_and_comments()
1363 item = self._try_parse_set_offset_val()
1364
1365 if item is None:
1366 # Label item?
1367 item = self._try_parse_label_name()
1368
1369 if item is None:
1370 # At this point it's invalid
1371 self._raise_error("Expecting a label name or an offset setting value")
1372
1373 # Expect suffix
1374 self._skip_ws_and_comments()
1375 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1376 return item
1377
1378 # Pattern for _parse_pad_val()
1379 _pad_val_prefix_pat = re.compile(r"~")
1380
1381 # Tries to parse a padding value, returning the padding value, or 0
1382 # if none.
1383 def _parse_pad_val(self):
1384 # Padding value?
1385 self._skip_ws_and_comments()
1386 pad_val = 0
1387
1388 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1389 self._skip_ws_and_comments()
1390 pad_val_text_loc = self._text_loc
1391 m = self._expect_pat(
1392 _pos_const_int_pat,
1393 "Expecting a positive constant integer (byte value)",
1394 )
1395
1396 # Validate
1397 pad_val = int(_norm_const_int(m.group(0)), 0)
1398
1399 if pad_val > 255:
1400 _raise_error(
1401 "Invalid padding byte value {}".format(pad_val),
1402 pad_val_text_loc,
1403 )
1404
1405 return pad_val
1406
1407 # Patterns for _try_parse_align_offset()
1408 _align_offset_prefix_pat = re.compile(r"@")
1409 _align_offset_val_pat = re.compile(r"\d+")
1410
1411 # Tries to parse an offset alignment, returning an offset alignment
1412 # item on success.
1413 def _try_parse_align_offset(self):
1414 begin_text_loc = self._text_loc
1415
1416 # Match prefix
1417 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1418 # No match
1419 return
1420
1421 # Expect an alignment
1422 self._skip_ws_and_comments()
1423 align_text_loc = self._text_loc
1424 m = self._expect_pat(
1425 self._align_offset_val_pat,
1426 "Expecting an alignment (positive multiple of eight bits)",
1427 )
1428
1429 # Validate alignment
1430 val = int(m.group(0))
1431
1432 if val <= 0 or (val % 8) != 0:
1433 _raise_error(
1434 "Invalid alignment value {} (not a positive multiple of eight)".format(
1435 val
1436 ),
1437 align_text_loc,
1438 )
1439
1440 # Padding value
1441 pad_val = self._parse_pad_val()
1442
1443 # Return item
1444 return _AlignOffset(val, pad_val, begin_text_loc)
1445
1446 # Patterns for _expect_expr()
1447 _inner_expr_prefix_pat = re.compile(r"\{")
1448 _inner_expr_pat = re.compile(r"[^}]+")
1449 _inner_expr_suffix_pat = re.compile(r"\}")
1450
1451 # Parses an expression outside a `{`/`}` context.
1452 #
1453 # This function accepts:
1454 #
1455 # • A Python expression within `{` and `}`.
1456 #
1457 # • A Python name.
1458 #
1459 # • If `accept_const_int` is `True`: a constant integer, which may
1460 # be negative if `allow_neg_int` is `True`.
1461 #
1462 # • If `accept_float` is `True`: a constant floating point number.
1463 #
1464 # Returns the stripped expression string and AST expression.
1465 def _expect_expr(
1466 self,
1467 accept_const_int: bool = False,
1468 allow_neg_int: bool = False,
1469 accept_const_float: bool = False,
1470 accept_lit_str: bool = False,
1471 ):
1472 begin_text_loc = self._text_loc
1473
1474 # Constant floating point number?
1475 if accept_const_float:
1476 m = self._try_parse_pat(_const_float_pat)
1477
1478 if m is not None:
1479 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1480
1481 # Constant integer?
1482 if accept_const_int:
1483 m = self._try_parse_pat(_const_int_pat)
1484
1485 if m is not None:
1486 # Negative and allowed?
1487 if m.group("neg") == "-" and not allow_neg_int:
1488 _raise_error(
1489 "Expecting a positive constant integer", begin_text_loc
1490 )
1491
1492 expr_str = _norm_const_int(m.group(0))
1493 return self._ast_expr_from_str(expr_str, begin_text_loc)
1494
1495 # Name?
1496 m = self._try_parse_pat(_py_name_pat)
1497
1498 if m is not None:
1499 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1500
1501 # Literal string
1502 if accept_lit_str:
1503 val = self._try_parse_lit_str(True)
1504
1505 if val is not None:
1506 return self._ast_expr_from_str(repr(val), begin_text_loc)
1507
1508 # Expect `{`
1509 msg_accepted_parts = ["a name", "or `{`"]
1510
1511 if accept_lit_str:
1512 msg_accepted_parts.insert(0, "a literal string")
1513
1514 if accept_const_float:
1515 msg_accepted_parts.insert(0, "a constant floating point number")
1516
1517 if accept_const_int:
1518 msg_pos = "" if allow_neg_int else "positive "
1519 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1520
1521 if len(msg_accepted_parts) == 2:
1522 msg_accepted = " ".join(msg_accepted_parts)
1523 else:
1524 msg_accepted = ", ".join(msg_accepted_parts)
1525
1526 self._expect_pat(
1527 self._inner_expr_prefix_pat,
1528 "Expecting {}".format(msg_accepted),
1529 )
1530
1531 # Expect an expression
1532 self._skip_ws_and_comments()
1533 expr_text_loc = self._text_loc
1534 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1535 expr_str = m.group(0)
1536
1537 # Expect `}`
1538 self._skip_ws_and_comments()
1539 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1540
1541 return self._ast_expr_from_str(expr_str, expr_text_loc)
1542
1543 # Patterns for _try_parse_fill_until()
1544 _fill_until_prefix_pat = re.compile(r"\+")
1545 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1546
1547 # Tries to parse a filling, returning a filling item on success.
1548 def _try_parse_fill_until(self):
1549 begin_text_loc = self._text_loc
1550
1551 # Match prefix
1552 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1553 # No match
1554 return
1555
1556 # Expect expression
1557 self._skip_ws_and_comments()
1558 expr_str, expr = self._expect_expr(accept_const_int=True)
1559
1560 # Padding value
1561 pad_val = self._parse_pad_val()
1562
1563 # Return item
1564 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1565
1566 # Parses the multiplier expression of a repetition (block or
1567 # post-item) and returns the expression string and AST node.
1568 def _expect_rep_mul_expr(self):
1569 return self._expect_expr(accept_const_int=True)
1570
1571 # Common block end pattern
1572 _block_end_pat = re.compile(r"!end\b")
1573
1574 # Pattern for _try_parse_rep_block()
1575 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1576
1577 # Tries to parse a repetition block, returning a repetition item on
1578 # success.
1579 def _try_parse_rep_block(self):
1580 begin_text_loc = self._text_loc
1581
1582 # Match prefix
1583 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1584 # No match
1585 return
1586
1587 # Expect expression
1588 self._skip_ws_and_comments()
1589 expr_str, expr = self._expect_rep_mul_expr()
1590
1591 # Parse items
1592 self._skip_ws_and_comments_and_syms()
1593 items = self._parse_items()
1594
1595 # Expect end of block
1596 self._skip_ws_and_comments_and_syms()
1597 self._expect_pat(
1598 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1599 )
1600
1601 # Return item
1602 return _Rep(items, expr_str, expr, begin_text_loc)
1603
1604 # Pattern for _try_parse_cond_block()
1605 _cond_block_prefix_pat = re.compile(r"!if\b")
1606 _cond_block_else_pat = re.compile(r"!else\b")
1607
1608 # Tries to parse a conditional block, returning a conditional item
1609 # on success.
1610 def _try_parse_cond_block(self):
1611 begin_text_loc = self._text_loc
1612
1613 # Match prefix
1614 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1615 # No match
1616 return
1617
1618 # Expect expression
1619 self._skip_ws_and_comments()
1620 expr_str, expr = self._expect_expr()
1621
1622 # Parse "true" items
1623 self._skip_ws_and_comments_and_syms()
1624 true_items_text_loc = self._text_loc
1625 true_items = self._parse_items()
1626 false_items = [] # type: List[_Item]
1627 false_items_text_loc = begin_text_loc
1628
1629 # `!else`?
1630 self._skip_ws_and_comments_and_syms()
1631
1632 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1633 # Parse "false" items
1634 self._skip_ws_and_comments_and_syms()
1635 false_items_text_loc = self._text_loc
1636 false_items = self._parse_items()
1637
1638 # Expect end of block
1639 self._expect_pat(
1640 self._block_end_pat,
1641 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1642 )
1643
1644 # Return item
1645 return _Cond(
1646 _Group(true_items, true_items_text_loc),
1647 _Group(false_items, false_items_text_loc),
1648 expr_str,
1649 expr,
1650 begin_text_loc,
1651 )
1652
1653 # Pattern for _try_parse_trans_block()
1654 _trans_block_prefix_pat = re.compile(r"!t(?:ransform)?\b")
1655 _trans_block_type_pat = re.compile(
1656 r"(?:(?:base|b)64(?:u)?|(?:base|b)(?:16|32)|(?:ascii|a|base|b)85(?:p)?|(?:quopri|qp)(?:t)?|gzip|gz|bzip2|bz2)\b"
1657 )
1658
1659 # Tries to parse a transformation block, returning a transformation
1660 # block item on success.
1661 def _try_parse_trans_block(self):
1662 begin_text_loc = self._text_loc
1663
1664 # Match prefix
1665 if self._try_parse_pat(self._trans_block_prefix_pat) is None:
1666 # No match
1667 return
1668
1669 # Expect type
1670 self._skip_ws_and_comments()
1671 m = self._expect_pat(
1672 self._trans_block_type_pat, "Expecting a known transformation type"
1673 )
1674
1675 # Parse items
1676 self._skip_ws_and_comments_and_syms()
1677 items = self._parse_items()
1678
1679 # Expect end of block
1680 self._expect_pat(
1681 self._block_end_pat,
1682 "Expecting an item or `!end` (end of transformation block)",
1683 )
1684
1685 # Choose encoding function
1686 enc = m.group(0)
1687
1688 if enc in ("base64", "b64"):
1689 func = base64.standard_b64encode
1690 name = "standard Base64"
1691 elif enc in ("base64u", "b64u"):
1692 func = base64.urlsafe_b64encode
1693 name = "URL-safe Base64"
1694 elif enc in ("base32", "b32"):
1695 func = base64.b32encode
1696 name = "Base32"
1697 elif enc in ("base16", "b16"):
1698 func = base64.b16encode
1699 name = "Base16"
1700 elif enc in ("ascii85", "a85"):
1701 func = base64.a85encode
1702 name = "Ascii85"
1703 elif enc in ("ascii85p", "a85p"):
1704 func = functools.partial(base64.a85encode, pad=True)
1705 name = "padded Ascii85"
1706 elif enc in ("base85", "b85"):
1707 func = base64.b85encode
1708 name = "Base85"
1709 elif enc in ("base85p", "b85p"):
1710 func = functools.partial(base64.b85encode, pad=True)
1711 name = "padded Base85"
1712 elif enc in ("quopri", "qp"):
1713 func = quopri.encodestring
1714 name = "MIME quoted-printable"
1715 elif enc in ("quoprit", "qpt"):
1716 func = functools.partial(quopri.encodestring, quotetabs=True)
1717 name = "MIME quoted-printable (with quoted tabs)"
1718 elif enc in ("gzip", "gz"):
1719 func = gzip.compress
1720 name = "gzip"
1721 else:
1722 assert enc in ("bzip2", "bz2")
1723 func = bz2.compress
1724 name = "bzip2"
1725
1726 # Return item
1727 return _Trans(
1728 items,
1729 name,
1730 func,
1731 begin_text_loc,
1732 )
1733
1734 # Common left parenthesis pattern
1735 _left_paren_pat = re.compile(r"\(")
1736
1737 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1738 _macro_params_comma_pat = re.compile(",")
1739
1740 # Patterns for _try_parse_macro_def()
1741 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1742
1743 # Tries to parse a macro definition, adding it to `self._macro_defs`
1744 # and returning `True` on success.
1745 def _try_parse_macro_def(self):
1746 begin_text_loc = self._text_loc
1747
1748 # Match prefix
1749 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1750 # No match
1751 return False
1752
1753 # Expect a name
1754 self._skip_ws_and_comments()
1755 name_text_loc = self._text_loc
1756 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1757
1758 # Validate name
1759 name = m.group(0)
1760
1761 if name in self._macro_defs:
1762 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1763
1764 # Expect `(`
1765 self._skip_ws_and_comments()
1766 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1767
1768 # Try to parse comma-separated parameter names
1769 param_names = [] # type: List[str]
1770 expect_comma = False
1771
1772 while True:
1773 self._skip_ws_and_comments()
1774
1775 # End?
1776 if self._try_parse_pat(self._right_paren_pat) is not None:
1777 # End
1778 break
1779
1780 # Comma?
1781 if expect_comma:
1782 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1783
1784 # Expect parameter name
1785 self._skip_ws_and_comments()
1786 param_text_loc = self._text_loc
1787 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1788
1789 if m.group(0) in param_names:
1790 _raise_error(
1791 "Duplicate macro parameter named `{}`".format(m.group(0)),
1792 param_text_loc,
1793 )
1794
1795 param_names.append(m.group(0))
1796 expect_comma = True
1797
1798 # Expect items
1799 self._skip_ws_and_comments_and_syms()
1800 old_var_names = self._var_names.copy()
1801 old_label_names = self._label_names.copy()
1802 self._var_names = set() # type: Set[str]
1803 self._label_names = set() # type: Set[str]
1804 items = self._parse_items()
1805 self._var_names = old_var_names
1806 self._label_names = old_label_names
1807
1808 # Expect suffix
1809 self._expect_pat(
1810 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1811 )
1812
1813 # Register macro
1814 self._macro_defs[name] = _MacroDef(name, param_names, items, begin_text_loc)
1815
1816 return True
1817
1818 # Patterns for _try_parse_macro_exp()
1819 _macro_exp_prefix_pat = re.compile(r"m\b")
1820 _macro_exp_colon_pat = re.compile(r":")
1821
1822 # Tries to parse a macro expansion, returning a macro expansion item
1823 # on success.
1824 def _try_parse_macro_exp(self):
1825 begin_text_loc = self._text_loc
1826
1827 # Match prefix
1828 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1829 # No match
1830 return
1831
1832 # Expect `:`
1833 self._skip_ws_and_comments()
1834 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1835
1836 # Expect a macro name
1837 self._skip_ws_and_comments()
1838 name_text_loc = self._text_loc
1839 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1840
1841 # Validate name
1842 name = m.group(0)
1843 macro_def = self._macro_defs.get(name)
1844
1845 if macro_def is None:
1846 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1847
1848 # Expect `(`
1849 self._skip_ws_and_comments()
1850 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1851
1852 # Try to parse comma-separated parameter values
1853 params_text_loc = self._text_loc
1854 params = [] # type: List[_MacroExpParam]
1855 expect_comma = False
1856
1857 while True:
1858 self._skip_ws_and_comments()
1859
1860 # End?
1861 if self._try_parse_pat(self._right_paren_pat) is not None:
1862 # End
1863 break
1864
1865 # Expect a value
1866 if expect_comma:
1867 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1868
1869 self._skip_ws_and_comments()
1870 param_text_loc = self._text_loc
1871 params.append(
1872 _MacroExpParam(
1873 *self._expect_expr(
1874 accept_const_int=True,
1875 allow_neg_int=True,
1876 accept_const_float=True,
1877 accept_lit_str=True,
1878 ),
1879 text_loc=param_text_loc
1880 )
1881 )
1882 expect_comma = True
1883
1884 # Validate parameter values
1885 if len(params) != len(macro_def.param_names):
1886 sing_plur = "" if len(params) == 1 else "s"
1887 _raise_error(
1888 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1889 len(params), sing_plur, len(macro_def.param_names)
1890 ),
1891 params_text_loc,
1892 )
1893
1894 # Return item
1895 return _MacroExp(name, params, begin_text_loc)
1896
1897 # Tries to parse a base item (anything except a post-item
1898 # repetition), returning it on success.
1899 def _try_parse_base_item(self):
1900 for func in self._base_item_parse_funcs:
1901 item = func()
1902
1903 if item is not None:
1904 return item
1905
1906 # Pattern for _try_parse_rep_post()
1907 _rep_post_prefix_pat = re.compile(r"\*")
1908
1909 # Tries to parse a post-item repetition, returning the expression
1910 # string and AST expression node on success.
1911 def _try_parse_rep_post(self):
1912 # Match prefix
1913 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1914 # No match
1915 return
1916
1917 # Return expression string and AST expression
1918 self._skip_ws_and_comments()
1919 return self._expect_rep_mul_expr()
1920
1921 # Tries to parse an item, possibly followed by a repetition,
1922 # returning `True` on success.
1923 #
1924 # Appends any parsed item to `items`.
1925 def _try_append_item(self, items: List[_Item]):
1926 self._skip_ws_and_comments_and_syms()
1927
1928 # Base item
1929 item = self._try_parse_base_item()
1930
1931 if item is None:
1932 return
1933
1934 # Parse repetition if the base item is repeatable
1935 if isinstance(item, _RepableItem):
1936 self._skip_ws_and_comments()
1937 rep_text_loc = self._text_loc
1938 rep_ret = self._try_parse_rep_post()
1939
1940 if rep_ret is not None:
1941 item = _Rep([item], *rep_ret, text_loc=rep_text_loc)
1942
1943 items.append(item)
1944 return True
1945
1946 # Parses and returns items, skipping whitespaces, insignificant
1947 # symbols, and comments when allowed, and stopping at the first
1948 # unknown character.
1949 #
1950 # Accepts and registers macro definitions if `accept_macro_defs`
1951 # is `True`.
1952 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1953 items = [] # type: List[_Item]
1954
1955 while self._isnt_done():
1956 # Try to append item
1957 if not self._try_append_item(items):
1958 if accept_macro_defs and self._try_parse_macro_def():
1959 continue
1960
1961 # Unknown at this point
1962 break
1963
1964 return items
1965
1966 # Parses the whole Normand input, setting `self._res` to the main
1967 # group item on success.
1968 def _parse(self):
1969 if len(self._normand.strip()) == 0:
1970 # Special case to make sure there's something to consume
1971 self._res = _Group([], self._text_loc)
1972 return
1973
1974 # Parse first level items
1975 items = self._parse_items(True)
1976
1977 # Make sure there's nothing left
1978 self._skip_ws_and_comments_and_syms()
1979
1980 if self._isnt_done():
1981 self._raise_error(
1982 "Unexpected character `{}`".format(self._normand[self._at])
1983 )
1984
1985 # Set main group item
1986 self._res = _Group(items, self._text_loc)
1987
1988
1989 # The return type of parse().
1990 class ParseResult:
1991 @classmethod
1992 def _create(
1993 cls,
1994 data: bytearray,
1995 variables: VariablesT,
1996 labels: LabelsT,
1997 offset: int,
1998 bo: Optional[ByteOrder],
1999 ):
2000 self = cls.__new__(cls)
2001 self._init(data, variables, labels, offset, bo)
2002 return self
2003
2004 def __init__(self, *args, **kwargs): # type: ignore
2005 raise NotImplementedError
2006
2007 def _init(
2008 self,
2009 data: bytearray,
2010 variables: VariablesT,
2011 labels: LabelsT,
2012 offset: int,
2013 bo: Optional[ByteOrder],
2014 ):
2015 self._data = data
2016 self._vars = variables
2017 self._labels = labels
2018 self._offset = offset
2019 self._bo = bo
2020
2021 # Generated data.
2022 @property
2023 def data(self):
2024 return self._data
2025
2026 # Dictionary of updated variable names to their last computed value.
2027 @property
2028 def variables(self):
2029 return self._vars
2030
2031 # Dictionary of updated main group label names to their computed
2032 # value.
2033 @property
2034 def labels(self):
2035 return self._labels
2036
2037 # Updated offset.
2038 @property
2039 def offset(self):
2040 return self._offset
2041
2042 # Updated byte order.
2043 @property
2044 def byte_order(self):
2045 return self._bo
2046
2047
2048 # Raises a parse error for the item `item`, creating it using the
2049 # message `msg`.
2050 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
2051 _raise_error(msg, item.text_loc)
2052
2053
2054 # The `ICITTE` reserved name.
2055 _icitte_name = "ICITTE"
2056
2057
2058 # Base node visitor.
2059 #
2060 # Calls the _visit_name() method for each name node which isn't the name
2061 # of a call.
2062 class _NodeVisitor(ast.NodeVisitor):
2063 def __init__(self):
2064 self._parent_is_call = False
2065
2066 def generic_visit(self, node: ast.AST):
2067 if type(node) is ast.Call:
2068 self._parent_is_call = True
2069 elif type(node) is ast.Name and not self._parent_is_call:
2070 self._visit_name(node.id)
2071
2072 super().generic_visit(node)
2073 self._parent_is_call = False
2074
2075 @abc.abstractmethod
2076 def _visit_name(self, name: str):
2077 ...
2078
2079
2080 # Expression validator: validates that all the names within the
2081 # expression are allowed.
2082 class _ExprValidator(_NodeVisitor):
2083 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2084 super().__init__()
2085 self._expr_str = expr_str
2086 self._text_loc = text_loc
2087 self._allowed_names = allowed_names
2088
2089 def _visit_name(self, name: str):
2090 # Make sure the name refers to a known and reachable
2091 # variable/label name.
2092 if name != _icitte_name and name not in self._allowed_names:
2093 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
2094 name, self._expr_str
2095 )
2096
2097 allowed_names = self._allowed_names.copy()
2098 allowed_names.add(_icitte_name)
2099
2100 if len(allowed_names) > 0:
2101 allowed_names_str = ", ".join(
2102 sorted(["`{}`".format(name) for name in allowed_names])
2103 )
2104 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2105
2106 _raise_error(
2107 msg,
2108 self._text_loc,
2109 )
2110
2111
2112 # Generator state.
2113 class _GenState:
2114 def __init__(
2115 self,
2116 variables: VariablesT,
2117 labels: LabelsT,
2118 offset: int,
2119 bo: Optional[ByteOrder],
2120 ):
2121 self.variables = variables.copy()
2122 self.labels = labels.copy()
2123 self.offset = offset
2124 self.bo = bo
2125
2126 def __repr__(self):
2127 return "_GenState({}, {}, {}, {})".format(
2128 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2129 )
2130
2131
2132 # Fixed-length number item instance.
2133 class _FlNumItemInst:
2134 def __init__(
2135 self,
2136 item: _FlNum,
2137 offset_in_data: int,
2138 state: _GenState,
2139 parse_error_msgs: List[ParseErrorMessage],
2140 ):
2141 self._item = item
2142 self._offset_in_data = offset_in_data
2143 self._state = state
2144 self._parse_error_msgs = parse_error_msgs
2145
2146 @property
2147 def item(self):
2148 return self._item
2149
2150 @property
2151 def offset_in_data(self):
2152 return self._offset_in_data
2153
2154 @property
2155 def state(self):
2156 return self._state
2157
2158 @property
2159 def parse_error_msgs(self):
2160 return self._parse_error_msgs
2161
2162
2163 # Generator of data and final state from a group item.
2164 #
2165 # Generation happens in memory at construction time. After building, use
2166 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2167 # get the resulting context.
2168 #
2169 # The steps of generation are:
2170 #
2171 # 1. Handle each item in prefix order.
2172 #
2173 # The handlers append bytes to `self._data` and update some current
2174 # state object (`_GenState` instance).
2175 #
2176 # When handling a fixed-length number item, try to evaluate its
2177 # expression using the current state. If this fails, then it might be
2178 # because the expression refers to a "future" label: save the current
2179 # offset in `self._data` (generated data) and a snapshot of the
2180 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
2181 # object). _gen_fl_num_item_insts() will deal with this later. A
2182 # `_FlNumItemInst` instance also contains a snapshot of the current
2183 # parsing error messages (`self._parse_error_msgs`) which need to be
2184 # taken into account when handling the instance later.
2185 #
2186 # When handling the items of a group, keep a map of immediate label
2187 # names to their offset. Then, after having processed all the items,
2188 # update the relevant saved state snapshots in
2189 # `self._fl_num_item_insts` with those immediate label values.
2190 # _gen_fl_num_item_insts() will deal with this later.
2191 #
2192 # 2. Handle all the fixed-length number item instances of which the
2193 # expression evaluation failed before.
2194 #
2195 # At this point, `self._fl_num_item_insts` contains everything that's
2196 # needed to evaluate the expressions, including the values of
2197 # "future" labels from the point of view of some fixed-length number
2198 # item instance.
2199 #
2200 # If an evaluation fails at this point, then it's a user error. Add
2201 # to the parsing error all the saved parsing error messages of the
2202 # instance. Those additional messages add precious context to the
2203 # error.
2204 class _Gen:
2205 def __init__(
2206 self,
2207 group: _Group,
2208 macro_defs: _MacroDefsT,
2209 variables: VariablesT,
2210 labels: LabelsT,
2211 offset: int,
2212 bo: Optional[ByteOrder],
2213 ):
2214 self._macro_defs = macro_defs
2215 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2216 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2217 self._in_trans = False
2218 self._gen(group, _GenState(variables, labels, offset, bo))
2219
2220 # Generated bytes.
2221 @property
2222 def data(self):
2223 return self._data
2224
2225 # Updated variables.
2226 @property
2227 def variables(self):
2228 return self._final_state.variables
2229
2230 # Updated main group labels.
2231 @property
2232 def labels(self):
2233 return self._final_state.labels
2234
2235 # Updated offset.
2236 @property
2237 def offset(self):
2238 return self._final_state.offset
2239
2240 # Updated byte order.
2241 @property
2242 def bo(self):
2243 return self._final_state.bo
2244
2245 # Evaluates the expression `expr` of which the original string is
2246 # `expr_str` at the location `text_loc` considering the current
2247 # generation state `state`.
2248 #
2249 # If `accept_float` is `True`, then the type of the result may be
2250 # `float` too.
2251 #
2252 # If `accept_str` is `True`, then the type of the result may be
2253 # `str` too.
2254 @staticmethod
2255 def _eval_expr(
2256 expr_str: str,
2257 expr: ast.Expression,
2258 text_loc: TextLocation,
2259 state: _GenState,
2260 accept_float: bool = False,
2261 accept_str: bool = False,
2262 ):
2263 syms = {} # type: VariablesT
2264 syms.update(state.labels)
2265
2266 # Set the `ICITTE` name to the current offset
2267 syms[_icitte_name] = state.offset
2268
2269 # Add the current variables
2270 syms.update(state.variables)
2271
2272 # Validate the node and its children
2273 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2274
2275 # Compile and evaluate expression node
2276 try:
2277 val = eval(compile(expr, "", "eval"), None, syms)
2278 except Exception as exc:
2279 _raise_error(
2280 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2281 text_loc,
2282 )
2283
2284 # Convert `bool` result type to `int` to normalize
2285 if type(val) is bool:
2286 val = int(val)
2287
2288 # Validate result type
2289 expected_types = {int} # type: Set[type]
2290
2291 if accept_float:
2292 expected_types.add(float)
2293
2294 if accept_str:
2295 expected_types.add(str)
2296
2297 if type(val) not in expected_types:
2298 expected_types_str = sorted(
2299 ["`{}`".format(t.__name__) for t in expected_types]
2300 )
2301
2302 if len(expected_types_str) == 1:
2303 msg_expected = expected_types_str[0]
2304 elif len(expected_types_str) == 2:
2305 msg_expected = " or ".join(expected_types_str)
2306 else:
2307 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2308 msg_expected = ", ".join(expected_types_str)
2309
2310 _raise_error(
2311 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2312 expr_str, msg_expected, type(val).__name__
2313 ),
2314 text_loc,
2315 )
2316
2317 return val
2318
2319 # Forwards to _eval_expr() with the expression and text location of
2320 # `item`.
2321 @staticmethod
2322 def _eval_item_expr(
2323 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
2324 state: _GenState,
2325 accept_float: bool = False,
2326 accept_str: bool = False,
2327 ):
2328 return _Gen._eval_expr(
2329 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
2330 )
2331
2332 # Handles the byte item `item`.
2333 def _handle_byte_item(self, item: _Byte, state: _GenState):
2334 self._data.append(item.val)
2335 state.offset += item.size
2336
2337 # Handles the literal string item `item`.
2338 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
2339 self._data += item.data
2340 state.offset += item.size
2341
2342 # Handles the byte order setting item `item`.
2343 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2344 # Update current byte order
2345 state.bo = item.bo
2346
2347 # Handles the variable assignment item `item`.
2348 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2349 # Update variable
2350 state.variables[item.name] = self._eval_item_expr(
2351 item, state, accept_float=True, accept_str=True
2352 )
2353
2354 # Handles the fixed-length number item `item`.
2355 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2356 # Validate current byte order
2357 if state.bo is None and item.len > 8:
2358 _raise_error_for_item(
2359 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2360 item.expr_str
2361 ),
2362 item,
2363 )
2364
2365 # Try an immediate evaluation. If it fails, then keep everything
2366 # needed to (try to) generate the bytes of this item later.
2367 try:
2368 data = self._gen_fl_num_item_inst_data(item, state)
2369 except Exception:
2370 if self._in_trans:
2371 _raise_error_for_item(
2372 "Invalid expression `{}`: failed to evaluate within a transformation block".format(
2373 item.expr_str
2374 ),
2375 item,
2376 )
2377
2378 self._fl_num_item_insts.append(
2379 _FlNumItemInst(
2380 item,
2381 len(self._data),
2382 copy.deepcopy(state),
2383 copy.deepcopy(self._parse_error_msgs),
2384 )
2385 )
2386
2387 # Reserve space in `self._data` for this instance
2388 data = bytes([0] * (item.len // 8))
2389
2390 # Append bytes
2391 self._data += data
2392
2393 # Update offset
2394 state.offset += len(data)
2395
2396 # Returns the size, in bytes, required to encode the value `val`
2397 # with LEB128 (signed version if `is_signed` is `True`).
2398 @staticmethod
2399 def _leb128_size_for_val(val: int, is_signed: bool):
2400 if val < 0:
2401 # Equivalent upper bound.
2402 #
2403 # For example, if `val` is -128, then the full integer for
2404 # this number of bits would be [-128, 127].
2405 val = -val - 1
2406
2407 # Number of bits (add one for the sign if needed)
2408 bits = val.bit_length() + int(is_signed)
2409
2410 if bits == 0:
2411 bits = 1
2412
2413 # Seven bits per byte
2414 return math.ceil(bits / 7)
2415
2416 # Handles the LEB128 integer item `item`.
2417 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2418 # Compute value
2419 val = self._eval_item_expr(item, state)
2420
2421 # Size in bytes
2422 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2423
2424 # For each byte
2425 for _ in range(size):
2426 # Seven LSBs, MSB of the byte set (continue)
2427 self._data.append((val & 0x7F) | 0x80)
2428 val >>= 7
2429
2430 # Clear MSB of last byte (stop)
2431 self._data[-1] &= ~0x80
2432
2433 # Update offset
2434 state.offset += size
2435
2436 # Handles the string item `item`.
2437 def _handle_str_item(self, item: _Str, state: _GenState):
2438 # Compute value
2439 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2440
2441 # Encode
2442 data = _encode_str(val, item.codec, item.text_loc)
2443
2444 # Add to data
2445 self._data += data
2446
2447 # Update offset
2448 state.offset += len(data)
2449
2450 # Handles the group item `item`, removing the immediate labels from
2451 # `state` at the end if `remove_immediate_labels` is `True`.
2452 def _handle_group_item(
2453 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2454 ):
2455 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2456 immediate_labels = {} # type: LabelsT
2457
2458 # Handle each item
2459 for subitem in item.items:
2460 if type(subitem) is _Label:
2461 # Add to local immediate labels
2462 immediate_labels[subitem.name] = state.offset
2463
2464 self._handle_item(subitem, state)
2465
2466 # Remove immediate labels from current state if needed
2467 if remove_immediate_labels:
2468 for name in immediate_labels:
2469 del state.labels[name]
2470
2471 # Add all immediate labels to all state snapshots since
2472 # `first_fl_num_item_inst_index`.
2473 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2474 inst.state.labels.update(immediate_labels)
2475
2476 # Handles the repetition item `item`.
2477 def _handle_rep_item(self, item: _Rep, state: _GenState):
2478 # Compute the repetition count
2479 mul = _Gen._eval_item_expr(item, state)
2480
2481 # Validate result
2482 if mul < 0:
2483 _raise_error_for_item(
2484 "Invalid expression `{}`: unexpected negative result {:,}".format(
2485 item.expr_str, mul
2486 ),
2487 item,
2488 )
2489
2490 # Generate group data `mul` times
2491 for _ in range(mul):
2492 self._handle_group_item(item, state)
2493
2494 # Handles the conditional item `item`.
2495 def _handle_cond_item(self, item: _Cond, state: _GenState):
2496 # Compute the conditional value
2497 val = _Gen._eval_item_expr(item, state)
2498
2499 # Generate selected group data
2500 if val:
2501 self._handle_group_item(item.true_item, state)
2502 else:
2503 self._handle_group_item(item.false_item, state)
2504
2505 # Handles the transformation item `item`.
2506 def _handle_trans_item(self, item: _Trans, state: _GenState):
2507 init_in_trans = self._in_trans
2508 self._in_trans = True
2509 init_data_len = len(self._data)
2510 init_offset = state.offset
2511
2512 # Generate group data
2513 self._handle_group_item(item, state)
2514
2515 # Remove and keep group data
2516 to_trans = self._data[init_data_len:]
2517 del self._data[init_data_len:]
2518
2519 # Encode group data and append to current data
2520 try:
2521 transformed = item.trans(to_trans)
2522 except Exception as exc:
2523 _raise_error_for_item(
2524 "Cannot apply the {} transformation to this data: {}".format(
2525 item.name, exc
2526 ),
2527 item,
2528 )
2529
2530 self._data += transformed
2531
2532 # Update offset and restore
2533 state.offset = init_offset + len(transformed)
2534 self._in_trans = init_in_trans
2535
2536 # Evaluates the parameters of the macro expansion item `item`
2537 # considering the initial state `init_state` and returns a new state
2538 # to handle the items of the macro.
2539 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2540 # New state
2541 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2542
2543 # Evaluate the parameter expressions
2544 macro_def = self._macro_defs[item.name]
2545
2546 for param_name, param in zip(macro_def.param_names, item.params):
2547 exp_state.variables[param_name] = _Gen._eval_expr(
2548 param.expr_str,
2549 param.expr,
2550 param.text_loc,
2551 init_state,
2552 accept_float=True,
2553 accept_str=True,
2554 )
2555
2556 return exp_state
2557
2558 # Handles the macro expansion item `item`.
2559 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2560 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2561
2562 try:
2563 # New state
2564 exp_state = self._eval_macro_exp_params(item, state)
2565
2566 # Process the contained group
2567 init_data_size = len(self._data)
2568 parse_error_msg = (
2569 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2570 parse_error_msg_text, item.text_loc
2571 )
2572 )
2573 self._parse_error_msgs.append(parse_error_msg)
2574 self._handle_group_item(self._macro_defs[item.name], exp_state)
2575 self._parse_error_msgs.pop()
2576 except ParseError as exc:
2577 _augment_error(exc, parse_error_msg_text, item.text_loc)
2578
2579 # Update state offset and return
2580 state.offset += len(self._data) - init_data_size
2581
2582 # Handles the offset setting item `item`.
2583 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2584 state.offset = item.val
2585
2586 # Handles the offset alignment item `item` (adds padding).
2587 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2588 init_offset = state.offset
2589 align_bytes = item.val // 8
2590 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2591 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2592
2593 # Handles the filling item `item` (adds padding).
2594 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2595 # Compute the new offset
2596 new_offset = _Gen._eval_item_expr(item, state)
2597
2598 # Validate the new offset
2599 if new_offset < state.offset:
2600 _raise_error_for_item(
2601 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2602 item.expr_str, new_offset, state.offset
2603 ),
2604 item,
2605 )
2606
2607 # Fill
2608 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2609
2610 # Update offset
2611 state.offset = new_offset
2612
2613 # Handles the label item `item`.
2614 def _handle_label_item(self, item: _Label, state: _GenState):
2615 state.labels[item.name] = state.offset
2616
2617 # Handles the item `item`, returning the updated next repetition
2618 # instance.
2619 def _handle_item(self, item: _Item, state: _GenState):
2620 return self._item_handlers[type(item)](item, state)
2621
2622 # Generates the data for a fixed-length integer item instance having
2623 # the value `val` and returns it.
2624 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2625 # Validate range
2626 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2627 _raise_error_for_item(
2628 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2629 val, item.len, item.expr_str
2630 ),
2631 item,
2632 )
2633
2634 # Encode result on 64 bits (to extend the sign bit whatever the
2635 # value of `item.len`).
2636 data = struct.pack(
2637 "{}{}".format(
2638 ">" if state.bo in (None, ByteOrder.BE) else "<",
2639 "Q" if val >= 0 else "q",
2640 ),
2641 val,
2642 )
2643
2644 # Keep only the requested length
2645 len_bytes = item.len // 8
2646
2647 if state.bo in (None, ByteOrder.BE):
2648 # Big endian: keep last bytes
2649 data = data[-len_bytes:]
2650 else:
2651 # Little endian: keep first bytes
2652 assert state.bo == ByteOrder.LE
2653 data = data[:len_bytes]
2654
2655 # Return data
2656 return data
2657
2658 # Generates the data for a fixed-length floating point number item
2659 # instance having the value `val` and returns it.
2660 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2661 # Validate length
2662 if item.len not in (32, 64):
2663 _raise_error_for_item(
2664 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2665 item.len, val
2666 ),
2667 item,
2668 )
2669
2670 # Encode and return result
2671 return struct.pack(
2672 "{}{}".format(
2673 ">" if state.bo in (None, ByteOrder.BE) else "<",
2674 "f" if item.len == 32 else "d",
2675 ),
2676 val,
2677 )
2678
2679 # Generates the data for a fixed-length number item instance and
2680 # returns it.
2681 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2682 # Compute value
2683 val = self._eval_item_expr(item, state, True)
2684
2685 # Handle depending on type
2686 if type(val) is int:
2687 return self._gen_fl_int_item_inst_data(val, item, state)
2688 else:
2689 assert type(val) is float
2690 return self._gen_fl_float_item_inst_data(val, item, state)
2691
2692 # Generates the data for all the fixed-length number item instances
2693 # and writes it at the correct offset within `self._data`.
2694 def _gen_fl_num_item_insts(self):
2695 for inst in self._fl_num_item_insts:
2696 # Generate bytes
2697 try:
2698 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2699 except ParseError as exc:
2700 # Add all the saved parse error messages for this
2701 # instance.
2702 for msg in reversed(inst.parse_error_msgs):
2703 _add_error_msg(exc, msg.text, msg.text_location)
2704
2705 raise
2706
2707 # Insert bytes into `self._data`
2708 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2709
2710 # Generates the data (`self._data`) and final state
2711 # (`self._final_state`) from `group` and the initial state `state`.
2712 def _gen(self, group: _Group, state: _GenState):
2713 # Initial state
2714 self._data = bytearray()
2715
2716 # Item handlers
2717 self._item_handlers = {
2718 _AlignOffset: self._handle_align_offset_item,
2719 _Byte: self._handle_byte_item,
2720 _Cond: self._handle_cond_item,
2721 _FillUntil: self._handle_fill_until_item,
2722 _FlNum: self._handle_fl_num_item,
2723 _Group: self._handle_group_item,
2724 _Label: self._handle_label_item,
2725 _LitStr: self._handle_lit_str_item,
2726 _MacroExp: self._handle_macro_exp_item,
2727 _Rep: self._handle_rep_item,
2728 _SetBo: self._handle_set_bo_item,
2729 _SetOffset: self._handle_set_offset_item,
2730 _SLeb128Int: self._handle_leb128_int_item,
2731 _Str: self._handle_str_item,
2732 _Trans: self._handle_trans_item,
2733 _ULeb128Int: self._handle_leb128_int_item,
2734 _VarAssign: self._handle_var_assign_item,
2735 } # type: Dict[type, Callable[[Any, _GenState], None]]
2736
2737 # Handle the group item, _not_ removing the immediate labels
2738 # because the `labels` property offers them.
2739 self._handle_group_item(group, state, False)
2740
2741 # This is actually the final state
2742 self._final_state = state
2743
2744 # Generate all the fixed-length number bytes now that we know
2745 # their full state
2746 self._gen_fl_num_item_insts()
2747
2748
2749 # Returns a `ParseResult` instance containing the bytes encoded by the
2750 # input string `normand`.
2751 #
2752 # `init_variables` is a dictionary of initial variable names (valid
2753 # Python names) to integral values. A variable name must not be the
2754 # reserved name `ICITTE`.
2755 #
2756 # `init_labels` is a dictionary of initial label names (valid Python
2757 # names) to integral values. A label name must not be the reserved name
2758 # `ICITTE`.
2759 #
2760 # `init_offset` is the initial offset.
2761 #
2762 # `init_byte_order` is the initial byte order.
2763 #
2764 # Raises `ParseError` on any parsing error.
2765 def parse(
2766 normand: str,
2767 init_variables: Optional[VariablesT] = None,
2768 init_labels: Optional[LabelsT] = None,
2769 init_offset: int = 0,
2770 init_byte_order: Optional[ByteOrder] = None,
2771 ):
2772 if init_variables is None:
2773 init_variables = {}
2774
2775 if init_labels is None:
2776 init_labels = {}
2777
2778 parser = _Parser(normand, init_variables, init_labels)
2779 gen = _Gen(
2780 parser.res,
2781 parser.macro_defs,
2782 init_variables,
2783 init_labels,
2784 init_offset,
2785 init_byte_order,
2786 )
2787 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2788 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2789 )
2790
2791
2792 # Raises a command-line error with the message `msg`.
2793 def _raise_cli_error(msg: str) -> NoReturn:
2794 raise RuntimeError("Command-line error: {}".format(msg))
2795
2796
2797 # Returns the `int` or `float` value out of a CLI assignment value.
2798 def _val_from_assign_val_str(s: str, is_label: bool):
2799 s = s.strip()
2800
2801 # Floating point number?
2802 if not is_label:
2803 m = _const_float_pat.fullmatch(s)
2804
2805 if m is not None:
2806 return float(m.group(0))
2807
2808 # Integer?
2809 m = _const_int_pat.fullmatch(s)
2810
2811 if m is not None:
2812 return int(_norm_const_int(m.group(0)), 0)
2813
2814 exp = "an integer" if is_label else "a number"
2815 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2816
2817
2818 # Returns a dictionary of string to numbers from the list of strings
2819 # `args` containing `NAME=VAL` entries.
2820 def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
2821 d = {} # type: VariablesT
2822
2823 if args is None:
2824 return d
2825
2826 for arg in args:
2827 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
2828
2829 if m is None:
2830 _raise_cli_error("Invalid assignment `{}`".format(arg))
2831
2832 if is_str_only:
2833 val = m.group(2)
2834 else:
2835 val = _val_from_assign_val_str(m.group(2), is_label)
2836
2837 d[m.group(1)] = val
2838
2839 return d
2840
2841
2842 # Parses the command-line arguments and returns, in this order:
2843 #
2844 # 1. The input file path, or `None` if none.
2845 # 2. The Normand input text.
2846 # 3. The initial offset.
2847 # 4. The initial byte order.
2848 # 5. The initial variables.
2849 # 6. The initial labels.
2850 def _parse_cli_args():
2851 import argparse
2852
2853 # Build parser
2854 ap = argparse.ArgumentParser()
2855 ap.add_argument(
2856 "--offset",
2857 metavar="OFFSET",
2858 action="store",
2859 type=int,
2860 default=0,
2861 help="initial offset (positive)",
2862 )
2863 ap.add_argument(
2864 "-b",
2865 "--byte-order",
2866 metavar="BO",
2867 choices=["be", "le"],
2868 type=str,
2869 help="initial byte order (`be` or `le`)",
2870 )
2871 ap.add_argument(
2872 "-v",
2873 "--var",
2874 metavar="NAME=VAL",
2875 action="append",
2876 help="add an initial numeric variable (may be repeated)",
2877 )
2878 ap.add_argument(
2879 "-s",
2880 "--var-str",
2881 metavar="NAME=VAL",
2882 action="append",
2883 help="add an initial string variable (may be repeated)",
2884 )
2885 ap.add_argument(
2886 "-l",
2887 "--label",
2888 metavar="NAME=VAL",
2889 action="append",
2890 help="add an initial label (may be repeated)",
2891 )
2892 ap.add_argument(
2893 "--version", action="version", version="Normand {}".format(__version__)
2894 )
2895 ap.add_argument(
2896 "path",
2897 metavar="PATH",
2898 action="store",
2899 nargs="?",
2900 help="input path (none means standard input)",
2901 )
2902
2903 # Parse
2904 args = ap.parse_args()
2905
2906 # Read input
2907 if args.path is None:
2908 normand = sys.stdin.read()
2909 else:
2910 with open(args.path) as f:
2911 normand = f.read()
2912
2913 # Variables and labels
2914 variables = _dict_from_arg(args.var, False, False)
2915 variables.update(_dict_from_arg(args.var_str, False, True))
2916 labels = _dict_from_arg(args.label, True, False)
2917
2918 # Validate offset
2919 if args.offset < 0:
2920 _raise_cli_error("Invalid negative offset {}")
2921
2922 # Validate and set byte order
2923 bo = None # type: Optional[ByteOrder]
2924
2925 if args.byte_order is not None:
2926 if args.byte_order == "be":
2927 bo = ByteOrder.BE
2928 else:
2929 assert args.byte_order == "le"
2930 bo = ByteOrder.LE
2931
2932 # Return input and initial state
2933 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
2934
2935
2936 # CLI entry point without exception handling.
2937 def _run_cli_with_args(
2938 normand: str,
2939 offset: int,
2940 bo: Optional[ByteOrder],
2941 variables: VariablesT,
2942 labels: LabelsT,
2943 ):
2944 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2945
2946
2947 # Prints the exception message `msg` and exits with status 1.
2948 def _fail(msg: str) -> NoReturn:
2949 if not msg.endswith("."):
2950 msg += "."
2951
2952 print(msg.strip(), file=sys.stderr)
2953 sys.exit(1)
2954
2955
2956 # CLI entry point.
2957 def _run_cli():
2958 try:
2959 args = _parse_cli_args()
2960 except Exception as exc:
2961 _fail(str(exc))
2962
2963 try:
2964 _run_cli_with_args(*args[1:])
2965 except ParseError as exc:
2966 import os.path
2967
2968 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2969 fail_msg = ""
2970
2971 for msg in reversed(exc.messages):
2972 fail_msg += "{}{}:{} - {}".format(
2973 prefix,
2974 msg.text_location.line_no,
2975 msg.text_location.col_no,
2976 msg.text,
2977 )
2978
2979 if fail_msg[-1] not in ".:;":
2980 fail_msg += "."
2981
2982 fail_msg += "\n"
2983
2984 _fail(fail_msg.strip())
2985 except Exception as exc:
2986 _fail(str(exc))
2987
2988
2989 if __name__ == "__main__":
2990 _run_cli()
This page took 0.103086 seconds and 4 git commands to generate.