1 # SPDX-FileCopyrightText: 2023 Philippe Proulx <eeppeliteloop@gmail.com>
2 # SPDX-License-Identifier: MIT
4 # The MIT License (MIT)
6 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
8 # Permission is hereby granted, free of charge, to any person obtaining
9 # a copy of this software and associated documentation files (the
10 # "Software"), to deal in the Software without restriction, including
11 # without limitation the rights to use, copy, modify, merge, publish,
12 # distribute, sublicense, and/or sell copies of the Software, and to
13 # permit persons to whom the Software is furnished to do so, subject to
14 # the following conditions:
16 # The above copyright notice and this permission notice shall be
17 # included in all copies or substantial portions of the Software.
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 # This module is the portable Normand processor. It offers both the
28 # parse() function and the command-line tool (run the module itself)
29 # without external dependencies except a `typing` module for Python 3.4.
31 # Feel free to copy this module file to your own project to use Normand.
33 # Upstream repository: <https://github.com/efficios/normand>.
35 __author__
= "Philippe Proulx"
36 __version__
= "0.23.0"
64 from typing
import Any
, Set
, Dict
, List
, Union
, Pattern
, Callable
, NoReturn
, Optional
67 # Text location (line and column numbers).
70 def _create(cls
, line_no
: int, col_no
: int):
71 self
= cls
.__new
__(cls
)
72 self
._init
(line_no
, col_no
)
75 def __init__(*args
, **kwargs
): # type: ignore
76 raise NotImplementedError
78 def _init(self
, line_no
: int, col_no
: int):
79 self
._line
_no
= line_no
93 return "TextLocation({}, {})".format(self
._line
_no
, self
._col
_no
)
98 def __init__(self
, text_loc
: TextLocation
):
99 self
._text
_loc
= text_loc
101 # Source text location.
104 return self
._text
_loc
108 class _ScalarItem(_Item
):
109 # Returns the size, in bytes, of this item.
112 def size(self
) -> int:
122 class _Byte(_ScalarItem
, _RepableItem
):
123 def __init__(self
, val
: int, text_loc
: TextLocation
):
124 super().__init
__(text_loc
)
137 return "_Byte({}, {})".format(hex(self
._val
), repr(self
._text
_loc
))
141 class _LitStr(_ScalarItem
, _RepableItem
):
142 def __init__(self
, data
: bytes
, text_loc
: TextLocation
):
143 super().__init
__(text_loc
)
153 return len(self
._data
)
156 return "_LitStr({}, {})".format(repr(self
._data
), repr(self
._text
_loc
))
161 class ByteOrder(enum
.Enum
):
169 # Byte order setting.
171 def __init__(self
, bo
: ByteOrder
, text_loc
: TextLocation
):
172 super().__init
__(text_loc
)
180 return "_SetBo({}, {})".format(repr(self
._bo
), repr(self
._text
_loc
))
185 def __init__(self
, name
: str, text_loc
: TextLocation
):
186 super().__init
__(text_loc
)
195 return "_Label({}, {})".format(repr(self
._name
), repr(self
._text
_loc
))
199 class _SetOffset(_Item
):
200 def __init__(self
, val
: int, text_loc
: TextLocation
):
201 super().__init
__(text_loc
)
204 # Offset value (bytes).
210 return "_SetOffset({}, {})".format(repr(self
._val
), repr(self
._text
_loc
))
214 class _AlignOffset(_Item
):
215 def __init__(self
, val
: int, pad_val
: int, text_loc
: TextLocation
):
216 super().__init
__(text_loc
)
218 self
._pad
_val
= pad_val
220 # Alignment value (bits).
225 # Padding byte value.
231 return "_AlignOffset({}, {}, {})".format(
232 repr(self
._val
), repr(self
._pad
_val
), repr(self
._text
_loc
)
236 # Mixin of containing an AST expression and its string.
238 def __init__(self
, expr_str
: str, expr
: ast
.Expression
):
239 self
._expr
_str
= expr_str
245 return self
._expr
_str
247 # Expression node to evaluate.
253 # Fill until some offset.
254 class _FillUntil(_Item
, _ExprMixin
):
256 self
, expr_str
: str, expr
: ast
.Expression
, pad_val
: int, text_loc
: TextLocation
258 super().__init
__(text_loc
)
259 _ExprMixin
.__init
__(self
, expr_str
, expr
)
260 self
._pad
_val
= pad_val
262 # Padding byte value.
268 return "_FillUntil({}, {}, {}, {})".format(
269 repr(self
._expr
_str
),
272 repr(self
._text
_loc
),
276 # Variable assignment.
277 class _VarAssign(_Item
, _ExprMixin
):
279 self
, name
: str, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
281 super().__init
__(text_loc
)
282 _ExprMixin
.__init
__(self
, expr_str
, expr
)
291 return "_VarAssign({}, {}, {}, {})".format(
293 repr(self
._expr
_str
),
295 repr(self
._text
_loc
),
299 # Fixed-length number, possibly needing more than one byte.
300 class _FlNum(_ScalarItem
, _RepableItem
, _ExprMixin
):
304 expr
: ast
.Expression
,
306 bo
: Optional
[ByteOrder
],
307 text_loc
: TextLocation
,
309 super().__init
__(text_loc
)
310 _ExprMixin
.__init
__(self
, expr_str
, expr
)
319 # Byte order override.
326 return self
._len
// 8
329 return "_FlNum({}, {}, {}, {}, {})".format(
330 repr(self
._expr
_str
),
334 repr(self
._text
_loc
),
339 class _Leb128Int(_Item
, _RepableItem
, _ExprMixin
):
340 def __init__(self
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
):
341 super().__init
__(text_loc
)
342 _ExprMixin
.__init
__(self
, expr_str
, expr
)
345 return "{}({}, {}, {})".format(
346 self
.__class
__.__name
__,
347 repr(self
._expr
_str
),
349 repr(self
._text
_loc
),
353 # Unsigned LEB128 integer.
354 class _ULeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
358 # Signed LEB128 integer.
359 class _SLeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
364 class _Str(_Item
, _RepableItem
, _ExprMixin
):
366 self
, expr_str
: str, expr
: ast
.Expression
, codec
: str, text_loc
: TextLocation
368 super().__init
__(text_loc
)
369 _ExprMixin
.__init
__(self
, expr_str
, expr
)
378 return "_Str({}, {}, {}, {})".format(
379 repr(self
._expr
_str
),
382 repr(self
._text
_loc
),
387 class _Group(_Item
, _RepableItem
):
388 def __init__(self
, items
: List
[_Item
], text_loc
: TextLocation
):
389 super().__init
__(text_loc
)
398 return "_Group({}, {})".format(repr(self
._items
), repr(self
._text
_loc
))
402 class _Rep(_Group
, _ExprMixin
):
407 expr
: ast
.Expression
,
408 text_loc
: TextLocation
,
410 super().__init
__(items
, text_loc
)
411 _ExprMixin
.__init
__(self
, expr_str
, expr
)
414 return "_Rep({}, {}, {}, {})".format(
416 repr(self
._expr
_str
),
418 repr(self
._text
_loc
),
423 class _Cond(_Item
, _ExprMixin
):
429 expr
: ast
.Expression
,
430 text_loc
: TextLocation
,
432 super().__init
__(text_loc
)
433 _ExprMixin
.__init
__(self
, expr_str
, expr
)
434 self
._true
_item
= true_item
435 self
._false
_item
= false_item
437 # Item when condition is true.
440 return self
._true
_item
442 # Item when condition is false.
444 def false_item(self
):
445 return self
._false
_item
448 return "_Cond({}, {}, {}, {}, {})".format(
449 repr(self
._true
_item
),
450 repr(self
._false
_item
),
451 repr(self
._expr
_str
),
453 repr(self
._text
_loc
),
458 class _Trans(_Group
, _RepableItem
):
463 func
: Callable
[[Union
[bytes
, bytearray
]], bytes
],
464 text_loc
: TextLocation
,
466 super().__init
__(items
, text_loc
)
474 # Transforms the data `data`.
475 def trans(self
, data
: Union
[bytes
, bytearray
]):
476 return self
._func
(data
)
479 return "_Trans({}, {}, {}, {})".format(
483 repr(self
._text
_loc
),
487 # Macro definition item.
488 class _MacroDef(_Group
):
492 param_names
: List
[str],
494 text_loc
: TextLocation
,
496 super().__init
__(items
, text_loc
)
498 self
._param
_names
= param_names
507 def param_names(self
):
508 return self
._param
_names
511 return "_MacroDef({}, {}, {}, {})".format(
513 repr(self
._param
_names
),
515 repr(self
._text
_loc
),
519 # Macro expansion parameter.
520 class _MacroExpParam
:
521 def __init__(self
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
):
522 self
._expr
_str
= expr_str
524 self
._text
_loc
= text_loc
529 return self
._expr
_str
536 # Source text location.
539 return self
._text
_loc
542 return "_MacroExpParam({}, {}, {})".format(
543 repr(self
._expr
_str
), repr(self
._expr
), repr(self
._text
_loc
)
547 # Macro expansion item.
548 class _MacroExp(_Item
, _RepableItem
):
552 params
: List
[_MacroExpParam
],
553 text_loc
: TextLocation
,
555 super().__init
__(text_loc
)
557 self
._params
= params
570 return "_MacroExp({}, {}, {})".format(
573 repr(self
._text
_loc
),
577 # A parsing error message: a string and a text location.
578 class ParseErrorMessage
:
580 def _create(cls
, text
: str, text_loc
: TextLocation
):
581 self
= cls
.__new
__(cls
)
582 self
._init
(text
, text_loc
)
585 def __init__(self
, *args
, **kwargs
): # type: ignore
586 raise NotImplementedError
588 def _init(self
, text
: str, text_loc
: TextLocation
):
590 self
._text
_loc
= text_loc
597 # Source text location.
599 def text_location(self
):
600 return self
._text
_loc
603 # A parsing error containing one or more messages (`ParseErrorMessage`).
604 class ParseError(RuntimeError):
606 def _create(cls
, msg
: str, text_loc
: TextLocation
):
607 self
= cls
.__new
__(cls
)
608 self
._init
(msg
, text_loc
)
611 def __init__(self
, *args
, **kwargs
): # type: ignore
612 raise NotImplementedError
614 def _init(self
, msg
: str, text_loc
: TextLocation
):
615 super().__init
__(msg
)
616 self
._msgs
= [] # type: List[ParseErrorMessage]
617 self
._add
_msg
(msg
, text_loc
)
619 def _add_msg(self
, msg
: str, text_loc
: TextLocation
):
621 ParseErrorMessage
._create
( # pyright: ignore[reportPrivateUsage]
626 # Parsing error messages.
628 # The first message is the most specific one.
634 # Raises a parsing error, forwarding the parameters to the constructor.
635 def _raise_error(msg
: str, text_loc
: TextLocation
) -> NoReturn
:
636 raise ParseError
._create
(msg
, text_loc
) # pyright: ignore[reportPrivateUsage]
639 # Adds a message to the parsing error `exc`.
640 def _add_error_msg(exc
: ParseError
, msg
: str, text_loc
: TextLocation
):
641 exc
._add
_msg
(msg
, text_loc
) # pyright: ignore[reportPrivateUsage]
644 # Appends a message to the parsing error `exc` and reraises it.
645 def _augment_error(exc
: ParseError
, msg
: str, text_loc
: TextLocation
) -> NoReturn
:
646 _add_error_msg(exc
, msg
, text_loc
)
650 # Returns a normalized version (so as to be parseable by int()) of
651 # the constant integer string `s`, possibly negative, dealing with
653 def _norm_const_int(s
: str):
657 if s
.startswith("-"):
662 if pos
.startswith("0" + r
):
678 for suf
in asm_suf_base
:
680 s
= "{}0{}{}".format(neg
, asm_suf_base
[suf
], pos
.rstrip(suf
))
685 # Encodes the string `s` using the codec `codec`, raising `ParseError`
686 # with `text_loc` on encoding error.
687 def _encode_str(s
: str, codec
: str, text_loc
: TextLocation
):
689 return s
.encode(codec
)
690 except UnicodeEncodeError:
692 "Cannot encode `{}` with the `{}` encoding".format(s
, codec
), text_loc
696 # Variables dictionary type (for type hints).
697 VariablesT
= Dict
[str, Union
[int, float, str]]
700 # Labels dictionary type (for type hints).
701 LabelsT
= Dict
[str, int]
705 _py_name_pat
= re
.compile(r
"[a-zA-Z_][a-zA-Z0-9_]*")
706 _pos_const_int_pat
= re
.compile(
707 r
"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
709 _const_int_pat
= re
.compile(r
"(?P<neg>-)?(?:{})".format(_pos_const_int_pat
.pattern
))
710 _const_float_pat
= re
.compile(
711 r
"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
715 # Macro definition dictionary.
716 _MacroDefsT
= Dict
[str, _MacroDef
]
721 # The constructor accepts a Normand input. After building, use the `res`
722 # property to get the resulting main group.
724 # Builds a parser to parse the Normand input `normand`, parsing
726 def __init__(self
, normand
: str, variables
: VariablesT
, labels
: LabelsT
):
727 self
._normand
= normand
731 self
._label
_names
= set(labels
.keys())
732 self
._var
_names
= set(variables
.keys())
733 self
._macro
_defs
= {} # type: _MacroDefsT
734 self
._base
_item
_parse
_funcs
= [
735 self
._try
_parse
_byte
,
738 self
._try
_parse
_var
_assign
,
739 self
._try
_parse
_set
_bo
,
740 self
._try
_parse
_label
_or
_set
_offset
,
741 self
._try
_parse
_align
_offset
,
742 self
._try
_parse
_fill
_until
,
743 self
._try
_parse
_group
,
744 self
._try
_parse
_rep
_block
,
745 self
._try
_parse
_cond
_block
,
746 self
._try
_parse
_macro
_exp
,
747 self
._try
_parse
_trans
_block
,
751 # Result (main group).
758 def macro_defs(self
):
759 return self
._macro
_defs
761 # Current text location.
764 return TextLocation
._create
( # pyright: ignore[reportPrivateUsage]
765 self
._line
_no
, self
._col
_no
768 # Returns `True` if this parser is done parsing.
770 return self
._at
== len(self
._normand
)
772 # Returns `True` if this parser isn't done parsing.
773 def _isnt_done(self
):
774 return not self
._is
_done
()
776 # Raises a parse error, creating it using the message `msg` and the
777 # current text location.
778 def _raise_error(self
, msg
: str) -> NoReturn
:
779 _raise_error(msg
, self
._text
_loc
)
781 # Tries to make the pattern `pat` match the current substring,
782 # returning the match object and updating `self._at`,
783 # `self._line_no`, and `self._col_no` on success.
784 def _try_parse_pat(self
, pat
: Pattern
[str]):
785 m
= pat
.match(self
._normand
, self
._at
)
790 # Skip matched string
791 self
._at
+= len(m
.group(0))
794 self
._line
_no
+= m
.group(0).count("\n")
796 # Update column number
797 for i
in reversed(range(self
._at
)):
798 if self
._normand
[i
] == "\n" or i
== 0:
800 self
._col
_no
= self
._at
+ 1
802 self
._col
_no
= self
._at
- i
806 # Return match object
809 # Expects the pattern `pat` to match the current substring,
810 # returning the match object and updating `self._at`,
811 # `self._line_no`, and `self._col_no` on success, or raising a parse
812 # error with the message `error_msg` on error.
813 def _expect_pat(self
, pat
: Pattern
[str], error_msg
: str):
815 m
= self
._try
_parse
_pat
(pat
)
819 self
._raise
_error
(error_msg
)
821 # Return match object
824 # Patterns for _skip_*()
825 _comment_pat
= re
.compile(r
"#[^#]*?(?:$|#)", re
.M
)
826 _ws_or_comments_pat
= re
.compile(r
"(?:\s|{})*".format(_comment_pat
.pattern
), re
.M
)
827 _ws_or_syms_or_comments_pat
= re
.compile(
828 r
"(?:[\s/\\?&:;.,_=|-]|{})*".format(_comment_pat
.pattern
), re
.M
831 # Skips as many whitespaces and comments as possible, but not
832 # insignificant symbol characters.
833 def _skip_ws_and_comments(self
):
834 self
._try
_parse
_pat
(self
._ws
_or
_comments
_pat
)
836 # Skips as many whitespaces, insignificant symbol characters, and
837 # comments as possible.
838 def _skip_ws_and_comments_and_syms(self
):
839 self
._try
_parse
_pat
(self
._ws
_or
_syms
_or
_comments
_pat
)
841 # Pattern for _try_parse_hex_byte()
842 _nibble_pat
= re
.compile(r
"[A-Fa-f0-9]")
844 # Tries to parse a hexadecimal byte, returning a byte item on
846 def _try_parse_hex_byte(self
):
847 begin_text_loc
= self
._text
_loc
849 # Match initial nibble
850 m_high
= self
._try
_parse
_pat
(self
._nibble
_pat
)
856 # Expect another nibble
857 self
._skip
_ws
_and
_comments
_and
_syms
()
858 m_low
= self
._expect
_pat
(
859 self
._nibble
_pat
, "Expecting another hexadecimal nibble"
863 return _Byte(int(m_high
.group(0) + m_low
.group(0), 16), begin_text_loc
)
865 # Patterns for _try_parse_bin_byte()
866 _bin_byte_bit_pat
= re
.compile(r
"[01]")
867 _bin_byte_prefix_pat
= re
.compile(r
"%+")
869 # Tries to parse a binary byte, returning a byte item on success.
870 def _try_parse_bin_byte(self
):
871 begin_text_loc
= self
._text
_loc
874 m
= self
._try
_parse
_pat
(self
._bin
_byte
_prefix
_pat
)
880 # Expect as many bytes as there are `%` prefixes
881 items
= [] # type: List[_Item]
883 for _
in range(len(m
.group(0))):
884 self
._skip
_ws
_and
_comments
_and
_syms
()
885 byte_text_loc
= self
._text
_loc
886 bits
= [] # type: List[str]
890 self
._skip
_ws
_and
_comments
_and
_syms
()
891 m
= self
._expect
_pat
(
892 self
._bin
_byte
_bit
_pat
, "Expecting a bit (`0` or `1`)"
894 bits
.append(m
.group(0))
896 items
.append(_Byte(int("".join(bits
), 2), byte_text_loc
))
903 return _Group(items
, begin_text_loc
)
905 # Patterns for _try_parse_dec_byte()
906 _dec_byte_prefix_pat
= re
.compile(r
"\$")
907 _dec_byte_val_pat
= re
.compile(r
"(?P<neg>-?)(?P<val>\d+)")
909 # Tries to parse a decimal byte, returning a byte item on success.
910 def _try_parse_dec_byte(self
):
911 begin_text_loc
= self
._text
_loc
914 if self
._try
_parse
_pat
(self
._dec
_byte
_prefix
_pat
) is None:
919 self
._skip
_ws
_and
_comments
()
920 m
= self
._expect
_pat
(self
._dec
_byte
_val
_pat
, "Expecting a decimal constant")
923 val
= int(m
.group("val")) * (-1 if m
.group("neg") == "-" else 1)
926 if val
< -128 or val
> 255:
927 _raise_error("Invalid decimal byte value {}".format(val
), begin_text_loc
)
933 return _Byte(val
, begin_text_loc
)
935 # Tries to parse a byte, returning a byte item on success.
936 def _try_parse_byte(self
):
938 item
= self
._try
_parse
_hex
_byte
()
944 item
= self
._try
_parse
_bin
_byte
()
950 item
= self
._try
_parse
_dec
_byte
()
955 # Strings corresponding to escape sequence characters
956 _lit_str_escape_seq_strs
= {
970 # Patterns for _try_parse_lit_str()
971 _lit_str_prefix_suffix_pat
= re
.compile(r
'"')
972 _lit_str_contents_pat
= re
.compile(r
'(?:(?:\\.)|[^"])*')
974 # Parses a literal string between double quotes (without an encoding
975 # prefix) and returns the resulting string.
976 def _try_parse_lit_str(self
, with_prefix
: bool):
977 # Match prefix if needed
979 if self
._try
_parse
_pat
(self
._lit
_str
_prefix
_suffix
_pat
) is None:
983 # Expect literal string
984 m
= self
._expect
_pat
(self
._lit
_str
_contents
_pat
, "Expecting a literal string")
986 # Expect end of string
988 self
._lit
_str
_prefix
_suffix
_pat
, 'Expecting `"` (end of literal string)'
991 # Replace escape sequences
994 for ec
in '0abefnrtv"\\':
995 val
= val
.replace(r
"\{}".format(ec
), self
._lit
_str
_escape
_seq
_strs
[ec
])
1000 # Patterns for _try_parse_utf_str_encoding()
1001 _str_encoding_utf_prefix_pat
= re
.compile(r
"u")
1002 _str_encoding_utf_pat
= re
.compile(r
"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
1004 # Tries to parse a UTF encoding specification, returning the Python
1005 # codec name on success.
1006 def _try_parse_utf_str_encoding(self
):
1008 if self
._try
_parse
_pat
(self
._str
_encoding
_utf
_prefix
_pat
) is None:
1012 # Expect UTF specification
1013 m
= self
._expect
_pat
(
1014 self
._str
_encoding
_utf
_pat
,
1015 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
1018 # Convert to codec name
1021 "16be": "utf_16_be",
1022 "16le": "utf_16_le",
1023 "32be": "utf_32_be",
1024 "32le": "utf_32_le",
1027 # Patterns for _try_parse_str_encoding()
1028 _str_encoding_gen_prefix_pat
= re
.compile(r
"s")
1029 _str_encoding_colon_pat
= re
.compile(r
":")
1030 _str_encoding_non_utf_pat
= re
.compile(r
"latin(?:[1-9]|10)\b")
1032 # Tries to parse a string encoding specification, returning the
1033 # Python codec name on success.
1035 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
1036 def _try_parse_str_encoding(self
, req_gen_prefix
: bool = False):
1038 if self
._try
_parse
_pat
(self
._str
_encoding
_gen
_prefix
_pat
) is not None:
1040 self
._skip
_ws
_and
_comments
()
1041 self
._expect
_pat
(self
._str
_encoding
_colon
_pat
, "Expecting `:`")
1043 # Expect encoding specification
1044 self
._skip
_ws
_and
_comments
()
1047 codec
= self
._try
_parse
_utf
_str
_encoding
()
1049 if codec
is not None:
1053 m
= self
._expect
_pat
(
1054 self
._str
_encoding
_non
_utf
_pat
,
1055 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
1060 if not req_gen_prefix
:
1061 return self
._try
_parse
_utf
_str
_encoding
()
1063 # Patterns for _try_parse_str()
1064 _lit_str_prefix_pat
= re
.compile(r
'"')
1065 _str_prefix_pat
= re
.compile(r
'"|\{')
1066 _str_expr_pat
= re
.compile(r
"[^}]+")
1067 _str_expr_suffix_pat
= re
.compile(r
"\}")
1069 # Tries to parse a string, returning a literal string or string item
1071 def _try_parse_str(self
):
1072 begin_text_loc
= self
._text
_loc
1075 codec
= self
._try
_parse
_str
_encoding
()
1077 # Match prefix (expect if there's an encoding specification)
1078 self
._skip
_ws
_and
_comments
()
1081 # No encoding: only a literal string (UTF-8) is legal
1082 m_prefix
= self
._try
_parse
_pat
(self
._lit
_str
_prefix
_pat
)
1084 if m_prefix
is None:
1087 # Encoding present: expect a string prefix
1088 m_prefix
= self
._expect
_pat
(self
._str
_prefix
_pat
, 'Expecting `"` or `{`')
1090 # Literal string or expression?
1091 prefix
= m_prefix
.group(0)
1094 # Expect literal string
1095 str_text_loc
= self
._text
_loc
1096 val
= self
._try
_parse
_lit
_str
(False)
1099 self
._raise
_error
("Expecting a literal string")
1102 data
= _encode_str(val
, "utf_8" if codec
is None else codec
, str_text_loc
)
1105 return _LitStr(data
, begin_text_loc
)
1108 self
._skip
_ws
_and
_comments
()
1109 expr_text_loc
= self
._text
_loc
1110 m
= self
._expect
_pat
(self
._str
_expr
_pat
, "Expecting an expression")
1113 self
._expect
_pat
(self
._str
_expr
_suffix
_pat
, "Expecting `}`")
1115 # Create an expression node from the expression string
1116 expr_str
, expr
= self
._ast
_expr
_from
_str
(m
.group(0), expr_text_loc
)
1119 assert codec
is not None
1120 return _Str(expr_str
, expr
, codec
, begin_text_loc
)
1122 # Common right parenthesis pattern
1123 _right_paren_pat
= re
.compile(r
"\)")
1125 # Patterns for _try_parse_group()
1126 _group_prefix_pat
= re
.compile(r
"\(|!g(?:roup)?\b")
1128 # Tries to parse a group, returning a group item on success.
1129 def _try_parse_group(self
):
1130 begin_text_loc
= self
._text
_loc
1133 m_open
= self
._try
_parse
_pat
(self
._group
_prefix
_pat
)
1140 items
= self
._parse
_items
()
1142 # Expect end of group
1143 self
._skip
_ws
_and
_comments
_and
_syms
()
1145 if m_open
.group(0) == "(":
1146 pat
= self
._right
_paren
_pat
1149 pat
= self
._block
_end
_pat
1152 self
._expect
_pat
(pat
, "Expecting an item or `{}` (end of group)".format(exp
))
1155 return _Group(items
, begin_text_loc
)
1157 # Returns a stripped expression string and an AST expression node
1158 # from the expression string `expr_str` at text location `text_loc`.
1159 def _ast_expr_from_str(self
, expr_str
: str, text_loc
: TextLocation
):
1160 # Create an expression node from the expression string
1161 expr_str
= expr_str
.strip().replace("\n", " ")
1164 expr
= ast
.parse(expr_str
, mode
="eval")
1167 "Invalid expression `{}`: invalid syntax".format(expr_str
),
1171 return expr_str
, expr
1173 # Returns a `ByteOrder` value from the _valid_ byte order string
1176 def _bo_from_str(bo_str
: str):
1182 # Patterns for _try_parse_val()
1183 _val_prefix_pat
= re
.compile(r
"\[")
1184 _val_expr_pat
= re
.compile(r
"([^\]:]+):")
1185 _fl_num_len_fmt_pat
= re
.compile(r
"(?P<len>8|16|24|32|40|48|56|64)(?P<bo>[bl]e)?")
1186 _leb128_int_fmt_pat
= re
.compile(r
"(u|s)leb128")
1187 _val_suffix_pat
= re
.compile(r
"]")
1189 # Tries to parse a value (number or string) and format (fixed length
1190 # in bits and optional byte order override, `uleb128`, `sleb128`, or
1191 # `s:` followed with an encoding name), returning an item on
1193 def _try_parse_val(self
):
1195 if self
._try
_parse
_pat
(self
._val
_prefix
_pat
) is None:
1199 # Expect expression and `:`
1200 self
._skip
_ws
_and
_comments
()
1201 expr_text_loc
= self
._text
_loc
1202 m
= self
._expect
_pat
(self
._val
_expr
_pat
, "Expecting an expression")
1204 # Create an expression node from the expression string
1205 expr_str
, expr
= self
._ast
_expr
_from
_str
(m
.group(1), expr_text_loc
)
1208 self
._skip
_ws
_and
_comments
()
1209 m_fmt
= self
._try
_parse
_pat
(self
._fl
_num
_len
_fmt
_pat
)
1211 if m_fmt
is not None:
1212 # Byte order override
1213 if m_fmt
.group("bo") is None:
1216 bo
= self
._bo
_from
_str
(m_fmt
.group("bo"))
1218 # Create fixed-length number item
1222 int(m_fmt
.group("len")),
1228 m_fmt
= self
._try
_parse
_pat
(self
._leb
128_int
_fmt
_pat
)
1230 if m_fmt
is not None:
1231 # Create LEB128 integer item
1232 cls
= _ULeb128Int
if m_fmt
.group(1) == "u" else _SLeb128Int
1233 item
= cls(expr_str
, expr
, expr_text_loc
)
1236 codec
= self
._try
_parse
_str
_encoding
(True)
1238 if codec
is not None:
1239 # Create string item
1240 item
= _Str(expr_str
, expr
, codec
, expr_text_loc
)
1242 # At this point it's invalid
1244 "Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1248 self
._skip
_ws
_and
_comments
()
1249 m
= self
._expect
_pat
(self
._val
_suffix
_pat
, "Expecting `]`")
1254 # Patterns for _try_parse_var_assign()
1255 _var_assign_prefix_pat
= re
.compile(r
"\{")
1256 _var_assign_equal_pat
= re
.compile(r
"=")
1257 _var_assign_expr_pat
= re
.compile(r
"[^}]+")
1258 _var_assign_suffix_pat
= re
.compile(r
"\}")
1260 # Tries to parse a variable assignment, returning a variable
1261 # assignment item on success.
1262 def _try_parse_var_assign(self
):
1264 if self
._try
_parse
_pat
(self
._var
_assign
_prefix
_pat
) is None:
1269 self
._skip
_ws
_and
_comments
()
1270 name_text_loc
= self
._text
_loc
1271 m
= self
._expect
_pat
(_py_name_pat
, "Expecting a valid Python name")
1275 self
._skip
_ws
_and
_comments
()
1276 self
._expect
_pat
(self
._var
_assign
_equal
_pat
, "Expecting `=`")
1279 self
._skip
_ws
_and
_comments
()
1280 expr_text_loc
= self
._text
_loc
1281 m_expr
= self
._expect
_pat
(self
._var
_assign
_expr
_pat
, "Expecting an expression")
1284 self
._skip
_ws
_and
_comments
()
1285 self
._expect
_pat
(self
._var
_assign
_suffix
_pat
, "Expecting `}`")
1288 if name
== _icitte_name
:
1290 "`{}` is a reserved variable name".format(_icitte_name
), name_text_loc
1293 if name
in self
._label
_names
:
1294 _raise_error("Existing label named `{}`".format(name
), name_text_loc
)
1296 # Create an expression node from the expression string
1297 expr_str
, expr
= self
._ast
_expr
_from
_str
(m_expr
.group(0), expr_text_loc
)
1299 # Add to known variable names
1300 self
._var
_names
.add(name
)
1310 # Pattern for _try_parse_set_bo()
1311 _set_bo_pat
= re
.compile(r
"!([bl]e)\b")
1313 # Tries to parse a byte order setting, returning a byte order
1314 # setting item on success.
1315 def _try_parse_set_bo(self
):
1316 begin_text_loc
= self
._text
_loc
1319 m
= self
._try
_parse
_pat
(self
._set
_bo
_pat
)
1325 # Return corresponding item
1326 if m
.group(1) == "be":
1329 assert m
.group(1) == "le"
1332 return _SetBo(bo
, begin_text_loc
)
1334 # Tries to parse an offset setting value (after the initial `<`),
1335 # returning an offset item on success.
1336 def _try_parse_set_offset_val(self
):
1337 begin_text_loc
= self
._text
_loc
1340 m
= self
._try
_parse
_pat
(_pos_const_int_pat
)
1347 return _SetOffset(int(_norm_const_int(m
.group(0)), 0), begin_text_loc
)
1349 # Tries to parse a label name (after the initial `<`), returning a
1350 # label item on success.
1351 def _try_parse_label_name(self
):
1352 begin_text_loc
= self
._text
_loc
1355 m
= self
._try
_parse
_pat
(_py_name_pat
)
1364 if name
== _icitte_name
:
1366 "`{}` is a reserved label name".format(_icitte_name
), begin_text_loc
1369 if name
in self
._label
_names
:
1370 _raise_error("Duplicate label name `{}`".format(name
), begin_text_loc
)
1372 if name
in self
._var
_names
:
1373 _raise_error("Existing variable named `{}`".format(name
), begin_text_loc
)
1375 # Add to known label names
1376 self
._label
_names
.add(name
)
1379 return _Label(name
, begin_text_loc
)
1381 # Patterns for _try_parse_label_or_set_offset()
1382 _label_set_offset_prefix_pat
= re
.compile(r
"<")
1383 _label_set_offset_suffix_pat
= re
.compile(r
">")
1385 # Tries to parse a label or an offset setting, returning an item on
1387 def _try_parse_label_or_set_offset(self
):
1389 if self
._try
_parse
_pat
(self
._label
_set
_offset
_prefix
_pat
) is None:
1393 # Offset setting item?
1394 self
._skip
_ws
_and
_comments
()
1395 item
= self
._try
_parse
_set
_offset
_val
()
1399 item
= self
._try
_parse
_label
_name
()
1402 # At this point it's invalid
1403 self
._raise
_error
("Expecting a label name or an offset setting value")
1406 self
._skip
_ws
_and
_comments
()
1407 self
._expect
_pat
(self
._label
_set
_offset
_suffix
_pat
, "Expecting `>`")
1410 # Pattern for _parse_pad_val()
1411 _pad_val_prefix_pat
= re
.compile(r
"~")
1413 # Tries to parse a padding value, returning the padding value, or 0
1415 def _parse_pad_val(self
):
1417 self
._skip
_ws
_and
_comments
()
1420 if self
._try
_parse
_pat
(self
._pad
_val
_prefix
_pat
) is not None:
1421 self
._skip
_ws
_and
_comments
()
1422 pad_val_text_loc
= self
._text
_loc
1423 m
= self
._expect
_pat
(
1425 "Expecting a positive constant integer (byte value)",
1429 pad_val
= int(_norm_const_int(m
.group(0)), 0)
1433 "Invalid padding byte value {}".format(pad_val
),
1439 # Patterns for _try_parse_align_offset()
1440 _align_offset_prefix_pat
= re
.compile(r
"@")
1441 _align_offset_val_pat
= re
.compile(r
"\d+")
1443 # Tries to parse an offset alignment, returning an offset alignment
1445 def _try_parse_align_offset(self
):
1446 begin_text_loc
= self
._text
_loc
1449 if self
._try
_parse
_pat
(self
._align
_offset
_prefix
_pat
) is None:
1453 # Expect an alignment
1454 self
._skip
_ws
_and
_comments
()
1455 align_text_loc
= self
._text
_loc
1456 m
= self
._expect
_pat
(
1457 self
._align
_offset
_val
_pat
,
1458 "Expecting an alignment (positive multiple of eight bits)",
1461 # Validate alignment
1462 val
= int(m
.group(0))
1464 if val
<= 0 or (val
% 8) != 0:
1466 "Invalid alignment value {} (not a positive multiple of eight)".format(
1473 pad_val
= self
._parse
_pad
_val
()
1476 return _AlignOffset(val
, pad_val
, begin_text_loc
)
1478 # Patterns for _expect_expr()
1479 _inner_expr_prefix_pat
= re
.compile(r
"\{")
1480 _inner_expr_pat
= re
.compile(r
"[^}]+")
1481 _inner_expr_suffix_pat
= re
.compile(r
"\}")
1483 # Parses an expression outside a `{`/`}` context.
1485 # This function accepts:
1487 # • A Python expression within `{` and `}`.
1489 # • A Python name.
1491 # • If `accept_const_int` is `True`: a constant integer, which may
1492 # be negative if `allow_neg_int` is `True`.
1494 # • If `accept_float` is `True`: a constant floating point number.
1496 # Returns the stripped expression string and AST expression.
1499 accept_const_int
: bool = False,
1500 allow_neg_int
: bool = False,
1501 accept_const_float
: bool = False,
1502 accept_lit_str
: bool = False,
1504 begin_text_loc
= self
._text
_loc
1506 # Constant floating point number?
1507 if accept_const_float
:
1508 m
= self
._try
_parse
_pat
(_const_float_pat
)
1511 return self
._ast
_expr
_from
_str
(m
.group(0), begin_text_loc
)
1514 if accept_const_int
:
1515 m
= self
._try
_parse
_pat
(_const_int_pat
)
1518 # Negative and allowed?
1519 if m
.group("neg") == "-" and not allow_neg_int
:
1521 "Expecting a positive constant integer", begin_text_loc
1524 expr_str
= _norm_const_int(m
.group(0))
1525 return self
._ast
_expr
_from
_str
(expr_str
, begin_text_loc
)
1528 m
= self
._try
_parse
_pat
(_py_name_pat
)
1531 return self
._ast
_expr
_from
_str
(m
.group(0), begin_text_loc
)
1535 val
= self
._try
_parse
_lit
_str
(True)
1538 return self
._ast
_expr
_from
_str
(repr(val
), begin_text_loc
)
1541 msg_accepted_parts
= ["a name", "or `{`"]
1544 msg_accepted_parts
.insert(0, "a literal string")
1546 if accept_const_float
:
1547 msg_accepted_parts
.insert(0, "a constant floating point number")
1549 if accept_const_int
:
1550 msg_pos
= "" if allow_neg_int
else "positive "
1551 msg_accepted_parts
.insert(0, "a {}constant integer".format(msg_pos
))
1553 if len(msg_accepted_parts
) == 2:
1554 msg_accepted
= " ".join(msg_accepted_parts
)
1556 msg_accepted
= ", ".join(msg_accepted_parts
)
1559 self
._inner
_expr
_prefix
_pat
,
1560 "Expecting {}".format(msg_accepted
),
1563 # Expect an expression
1564 self
._skip
_ws
_and
_comments
()
1565 expr_text_loc
= self
._text
_loc
1566 m
= self
._expect
_pat
(self
._inner
_expr
_pat
, "Expecting an expression")
1567 expr_str
= m
.group(0)
1570 self
._skip
_ws
_and
_comments
()
1571 self
._expect
_pat
(self
._inner
_expr
_suffix
_pat
, "Expecting `}`")
1573 return self
._ast
_expr
_from
_str
(expr_str
, expr_text_loc
)
1575 # Patterns for _try_parse_fill_until()
1576 _fill_until_prefix_pat
= re
.compile(r
"\+")
1577 _fill_until_pad_val_prefix_pat
= re
.compile(r
"~")
1579 # Tries to parse a filling, returning a filling item on success.
1580 def _try_parse_fill_until(self
):
1581 begin_text_loc
= self
._text
_loc
1584 if self
._try
_parse
_pat
(self
._fill
_until
_prefix
_pat
) is None:
1589 self
._skip
_ws
_and
_comments
()
1590 expr_str
, expr
= self
._expect
_expr
(accept_const_int
=True)
1593 pad_val
= self
._parse
_pad
_val
()
1596 return _FillUntil(expr_str
, expr
, pad_val
, begin_text_loc
)
1598 # Parses the multiplier expression of a repetition (block or
1599 # post-item) and returns the expression string and AST node.
1600 def _expect_rep_mul_expr(self
):
1601 return self
._expect
_expr
(accept_const_int
=True)
1603 # Common block end pattern
1604 _block_end_pat
= re
.compile(r
"!end\b")
1606 # Pattern for _try_parse_rep_block()
1607 _rep_block_prefix_pat
= re
.compile(r
"!r(?:epeat)?\b")
1609 # Tries to parse a repetition block, returning a repetition item on
1611 def _try_parse_rep_block(self
):
1612 begin_text_loc
= self
._text
_loc
1615 if self
._try
_parse
_pat
(self
._rep
_block
_prefix
_pat
) is None:
1620 self
._skip
_ws
_and
_comments
()
1621 expr_str
, expr
= self
._expect
_rep
_mul
_expr
()
1624 self
._skip
_ws
_and
_comments
_and
_syms
()
1625 items
= self
._parse
_items
()
1627 # Expect end of block
1628 self
._skip
_ws
_and
_comments
_and
_syms
()
1630 self
._block
_end
_pat
, "Expecting an item or `!end` (end of repetition block)"
1634 return _Rep(items
, expr_str
, expr
, begin_text_loc
)
1636 # Pattern for _try_parse_cond_block()
1637 _cond_block_prefix_pat
= re
.compile(r
"!if\b")
1638 _cond_block_else_pat
= re
.compile(r
"!else\b")
1640 # Tries to parse a conditional block, returning a conditional item
1642 def _try_parse_cond_block(self
):
1643 begin_text_loc
= self
._text
_loc
1646 if self
._try
_parse
_pat
(self
._cond
_block
_prefix
_pat
) is None:
1651 self
._skip
_ws
_and
_comments
()
1652 expr_str
, expr
= self
._expect
_expr
()
1654 # Parse "true" items
1655 self
._skip
_ws
_and
_comments
_and
_syms
()
1656 true_items_text_loc
= self
._text
_loc
1657 true_items
= self
._parse
_items
()
1658 false_items
= [] # type: List[_Item]
1659 false_items_text_loc
= begin_text_loc
1662 self
._skip
_ws
_and
_comments
_and
_syms
()
1664 if self
._try
_parse
_pat
(self
._cond
_block
_else
_pat
) is not None:
1665 # Parse "false" items
1666 self
._skip
_ws
_and
_comments
_and
_syms
()
1667 false_items_text_loc
= self
._text
_loc
1668 false_items
= self
._parse
_items
()
1670 # Expect end of block
1672 self
._block
_end
_pat
,
1673 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1678 _Group(true_items
, true_items_text_loc
),
1679 _Group(false_items
, false_items_text_loc
),
1685 # Pattern for _try_parse_trans_block()
1686 _trans_block_prefix_pat
= re
.compile(r
"!t(?:ransform)?\b")
1687 _trans_block_type_pat
= re
.compile(
1688 r
"(?:(?:base|b)64(?:u)?|(?:base|b)(?:16|32)|(?:ascii|a|base|b)85(?:p)?|(?:quopri|qp)(?:t)?|gzip|gz|bzip2|bz2)\b"
1691 # Tries to parse a transformation block, returning a transformation
1692 # block item on success.
1693 def _try_parse_trans_block(self
):
1694 begin_text_loc
= self
._text
_loc
1697 if self
._try
_parse
_pat
(self
._trans
_block
_prefix
_pat
) is None:
1702 self
._skip
_ws
_and
_comments
()
1703 m
= self
._expect
_pat
(
1704 self
._trans
_block
_type
_pat
, "Expecting a known transformation type"
1708 self
._skip
_ws
_and
_comments
_and
_syms
()
1709 items
= self
._parse
_items
()
1711 # Expect end of block
1713 self
._block
_end
_pat
,
1714 "Expecting an item or `!end` (end of transformation block)",
1717 # Choose encoding function
1720 if enc
in ("base64", "b64"):
1721 func
= base64
.standard_b64encode
1722 name
= "standard Base64"
1723 elif enc
in ("base64u", "b64u"):
1724 func
= base64
.urlsafe_b64encode
1725 name
= "URL-safe Base64"
1726 elif enc
in ("base32", "b32"):
1727 func
= base64
.b32encode
1729 elif enc
in ("base16", "b16"):
1730 func
= base64
.b16encode
1732 elif enc
in ("ascii85", "a85"):
1733 func
= base64
.a85encode
1735 elif enc
in ("ascii85p", "a85p"):
1736 func
= functools
.partial(base64
.a85encode
, pad
=True)
1737 name
= "padded Ascii85"
1738 elif enc
in ("base85", "b85"):
1739 func
= base64
.b85encode
1741 elif enc
in ("base85p", "b85p"):
1742 func
= functools
.partial(base64
.b85encode
, pad
=True)
1743 name
= "padded Base85"
1744 elif enc
in ("quopri", "qp"):
1745 func
= quopri
.encodestring
1746 name
= "MIME quoted-printable"
1747 elif enc
in ("quoprit", "qpt"):
1748 func
= functools
.partial(quopri
.encodestring
, quotetabs
=True)
1749 name
= "MIME quoted-printable (with quoted tabs)"
1750 elif enc
in ("gzip", "gz"):
1751 func
= gzip
.compress
1754 assert enc
in ("bzip2", "bz2")
1766 # Common left parenthesis pattern
1767 _left_paren_pat
= re
.compile(r
"\(")
1769 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1770 _macro_params_comma_pat
= re
.compile(",")
1772 # Patterns for _try_parse_macro_def()
1773 _macro_def_prefix_pat
= re
.compile(r
"!m(?:acro)?\b")
1775 # Tries to parse a macro definition, adding it to `self._macro_defs`
1776 # and returning `True` on success.
1777 def _try_parse_macro_def(self
):
1778 begin_text_loc
= self
._text
_loc
1781 if self
._try
_parse
_pat
(self
._macro
_def
_prefix
_pat
) is None:
1786 self
._skip
_ws
_and
_comments
()
1787 name_text_loc
= self
._text
_loc
1788 m
= self
._expect
_pat
(_py_name_pat
, "Expecting a valid macro name")
1793 if name
in self
._macro
_defs
:
1794 _raise_error("Duplicate macro named `{}`".format(name
), name_text_loc
)
1797 self
._skip
_ws
_and
_comments
()
1798 self
._expect
_pat
(self
._left
_paren
_pat
, "Expecting `(`")
1800 # Try to parse comma-separated parameter names
1801 param_names
= [] # type: List[str]
1802 expect_comma
= False
1805 self
._skip
_ws
_and
_comments
()
1808 if self
._try
_parse
_pat
(self
._right
_paren
_pat
) is not None:
1814 self
._expect
_pat
(self
._macro
_params
_comma
_pat
, "Expecting `,`")
1816 # Expect parameter name
1817 self
._skip
_ws
_and
_comments
()
1818 param_text_loc
= self
._text
_loc
1819 m
= self
._expect
_pat
(_py_name_pat
, "Expecting valid parameter name")
1821 if m
.group(0) in param_names
:
1823 "Duplicate macro parameter named `{}`".format(m
.group(0)),
1827 param_names
.append(m
.group(0))
1831 self
._skip
_ws
_and
_comments
_and
_syms
()
1832 old_var_names
= self
._var
_names
.copy()
1833 old_label_names
= self
._label
_names
.copy()
1834 self
._var
_names
= set() # type: Set[str]
1835 self
._label
_names
= set() # type: Set[str]
1836 items
= self
._parse
_items
()
1837 self
._var
_names
= old_var_names
1838 self
._label
_names
= old_label_names
1842 self
._block
_end
_pat
, "Expecting an item or `!end` (end of macro block)"
1846 self
._macro
_defs
[name
] = _MacroDef(name
, param_names
, items
, begin_text_loc
)
1850 # Patterns for _try_parse_macro_exp()
1851 _macro_exp_prefix_pat
= re
.compile(r
"m\b")
1852 _macro_exp_colon_pat
= re
.compile(r
":")
1854 # Tries to parse a macro expansion, returning a macro expansion item
1856 def _try_parse_macro_exp(self
):
1857 begin_text_loc
= self
._text
_loc
1860 if self
._try
_parse
_pat
(self
._macro
_exp
_prefix
_pat
) is None:
1865 self
._skip
_ws
_and
_comments
()
1866 self
._expect
_pat
(self
._macro
_exp
_colon
_pat
, "Expecting `:`")
1868 # Expect a macro name
1869 self
._skip
_ws
_and
_comments
()
1870 name_text_loc
= self
._text
_loc
1871 m
= self
._expect
_pat
(_py_name_pat
, "Expecting a valid macro name")
1875 macro_def
= self
._macro
_defs
.get(name
)
1877 if macro_def
is None:
1878 _raise_error("Unknown macro name `{}`".format(name
), name_text_loc
)
1881 self
._skip
_ws
_and
_comments
()
1882 self
._expect
_pat
(self
._left
_paren
_pat
, "Expecting `(`")
1884 # Try to parse comma-separated parameter values
1885 params_text_loc
= self
._text
_loc
1886 params
= [] # type: List[_MacroExpParam]
1887 expect_comma
= False
1890 self
._skip
_ws
_and
_comments
()
1893 if self
._try
_parse
_pat
(self
._right
_paren
_pat
) is not None:
1899 self
._expect
_pat
(self
._macro
_params
_comma
_pat
, "Expecting `,`")
1901 self
._skip
_ws
_and
_comments
()
1902 param_text_loc
= self
._text
_loc
1906 accept_const_int
=True,
1908 accept_const_float
=True,
1909 accept_lit_str
=True,
1911 text_loc
=param_text_loc
1916 # Validate parameter values
1917 if len(params
) != len(macro_def
.param_names
):
1918 sing_plur
= "" if len(params
) == 1 else "s"
1920 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1921 len(params
), sing_plur
, len(macro_def
.param_names
)
1927 return _MacroExp(name
, params
, begin_text_loc
)
1929 # Tries to parse a base item (anything except a post-item
1930 # repetition), returning it on success.
1931 def _try_parse_base_item(self
):
1932 for func
in self
._base
_item
_parse
_funcs
:
1935 if item
is not None:
1938 # Pattern for _try_parse_rep_post()
1939 _rep_post_prefix_pat
= re
.compile(r
"\*")
1941 # Tries to parse a post-item repetition, returning the expression
1942 # string and AST expression node on success.
1943 def _try_parse_rep_post(self
):
1945 if self
._try
_parse
_pat
(self
._rep
_post
_prefix
_pat
) is None:
1949 # Return expression string and AST expression
1950 self
._skip
_ws
_and
_comments
()
1951 return self
._expect
_rep
_mul
_expr
()
1953 # Tries to parse an item, possibly followed by a repetition,
1954 # returning `True` on success.
1956 # Appends any parsed item to `items`.
1957 def _try_append_item(self
, items
: List
[_Item
]):
1958 self
._skip
_ws
_and
_comments
_and
_syms
()
1961 item
= self
._try
_parse
_base
_item
()
1966 # Parse repetition if the base item is repeatable
1967 if isinstance(item
, _RepableItem
):
1968 self
._skip
_ws
_and
_comments
()
1969 rep_text_loc
= self
._text
_loc
1970 rep_ret
= self
._try
_parse
_rep
_post
()
1972 if rep_ret
is not None:
1973 item
= _Rep([item
], *rep_ret
, text_loc
=rep_text_loc
)
1978 # Parses and returns items, skipping whitespaces, insignificant
1979 # symbols, and comments when allowed, and stopping at the first
1980 # unknown character.
1982 # Accepts and registers macro definitions if `accept_macro_defs`
1984 def _parse_items(self
, accept_macro_defs
: bool = False) -> List
[_Item
]:
1985 items
= [] # type: List[_Item]
1987 while self
._isnt
_done
():
1988 # Try to append item
1989 if not self
._try
_append
_item
(items
):
1990 if accept_macro_defs
and self
._try
_parse
_macro
_def
():
1993 # Unknown at this point
1998 # Parses the whole Normand input, setting `self._res` to the main
1999 # group item on success.
2001 if len(self
._normand
.strip()) == 0:
2002 # Special case to make sure there's something to consume
2003 self
._res
= _Group([], self
._text
_loc
)
2006 # Parse first level items
2007 items
= self
._parse
_items
(True)
2009 # Make sure there's nothing left
2010 self
._skip
_ws
_and
_comments
_and
_syms
()
2012 if self
._isnt
_done
():
2014 "Unexpected character `{}`".format(self
._normand
[self
._at
])
2017 # Set main group item
2018 self
._res
= _Group(items
, self
._text
_loc
)
2021 # The return type of parse().
2027 variables
: VariablesT
,
2030 bo
: Optional
[ByteOrder
],
2032 self
= cls
.__new
__(cls
)
2033 self
._init
(data
, variables
, labels
, offset
, bo
)
2036 def __init__(self
, *args
, **kwargs
): # type: ignore
2037 raise NotImplementedError
2042 variables
: VariablesT
,
2045 bo
: Optional
[ByteOrder
],
2048 self
._vars
= variables
2049 self
._labels
= labels
2050 self
._offset
= offset
2058 # Dictionary of updated variable names to their last computed value.
2060 def variables(self
):
2063 # Dictionary of updated main group label names to their computed
2074 # Updated byte order.
2076 def byte_order(self
):
2080 # Raises a parse error for the item `item`, creating it using the
2082 def _raise_error_for_item(msg
: str, item
: _Item
) -> NoReturn
:
2083 _raise_error(msg
, item
.text_loc
)
2086 # The `ICITTE` reserved name.
2087 _icitte_name
= "ICITTE"
2090 # Base node visitor.
2092 # Calls the _visit_name() method for each name node which isn't the name
2094 class _NodeVisitor(ast
.NodeVisitor
):
2096 self
._parent
_is
_call
= False
2098 def generic_visit(self
, node
: ast
.AST
):
2099 if type(node
) is ast
.Call
:
2100 self
._parent
_is
_call
= True
2101 elif type(node
) is ast
.Name
and not self
._parent
_is
_call
:
2102 self
._visit
_name
(node
.id)
2104 super().generic_visit(node
)
2105 self
._parent
_is
_call
= False
2108 def _visit_name(self
, name
: str):
2112 # Expression validator: validates that all the names within the
2113 # expression are allowed.
2114 class _ExprValidator(_NodeVisitor
):
2115 def __init__(self
, expr_str
: str, text_loc
: TextLocation
, allowed_names
: Set
[str]):
2117 self
._expr
_str
= expr_str
2118 self
._text
_loc
= text_loc
2119 self
._allowed
_names
= allowed_names
2121 def _visit_name(self
, name
: str):
2122 # Make sure the name refers to a known and reachable
2123 # variable/label name.
2124 if name
!= _icitte_name
and name
not in self
._allowed
_names
:
2125 msg
= "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
2126 name
, self
._expr
_str
2129 allowed_names
= self
._allowed
_names
.copy()
2130 allowed_names
.add(_icitte_name
)
2132 if len(allowed_names
) > 0:
2133 allowed_names_str
= ", ".join(
2134 sorted(["`{}`".format(name
) for name
in allowed_names
])
2136 msg
+= "; the legal names are {{{}}}".format(allowed_names_str
)
2148 variables
: VariablesT
,
2151 bo
: Optional
[ByteOrder
],
2153 self
.variables
= variables
.copy()
2154 self
.labels
= labels
.copy()
2155 self
.offset
= offset
2159 return "_GenState({}, {}, {}, {})".format(
2160 repr(self
.variables
), repr(self
.labels
), repr(self
.offset
), repr(self
.bo
)
2164 # Fixed-length number item instance.
2165 class _FlNumItemInst
:
2169 offset_in_data
: int,
2171 parse_error_msgs
: List
[ParseErrorMessage
],
2174 self
._offset
_in
_data
= offset_in_data
2176 self
._parse
_error
_msgs
= parse_error_msgs
2183 def offset_in_data(self
):
2184 return self
._offset
_in
_data
2191 def parse_error_msgs(self
):
2192 return self
._parse
_error
_msgs
2195 # Generator of data and final state from a group item.
2197 # Generation happens in memory at construction time. After building, use
2198 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2199 # get the resulting context.
2201 # The steps of generation are:
2203 # 1. Handle each item in prefix order.
2205 # The handlers append bytes to `self._data` and update some current
2206 # state object (`_GenState` instance).
2208 # When handling a fixed-length number item, try to evaluate its
2209 # expression using the current state. If this fails, then it might be
2210 # because the expression refers to a "future" label: save the current
2211 # offset in `self._data` (generated data) and a snapshot of the
2212 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
2213 # object). _gen_fl_num_item_insts() will deal with this later. A
2214 # `_FlNumItemInst` instance also contains a snapshot of the current
2215 # parsing error messages (`self._parse_error_msgs`) which need to be
2216 # taken into account when handling the instance later.
2218 # When handling the items of a group, keep a map of immediate label
2219 # names to their offset. Then, after having processed all the items,
2220 # update the relevant saved state snapshots in
2221 # `self._fl_num_item_insts` with those immediate label values.
2222 # _gen_fl_num_item_insts() will deal with this later.
2224 # 2. Handle all the fixed-length number item instances of which the
2225 # expression evaluation failed before.
2227 # At this point, `self._fl_num_item_insts` contains everything that's
2228 # needed to evaluate the expressions, including the values of
2229 # "future" labels from the point of view of some fixed-length number
2232 # If an evaluation fails at this point, then it's a user error. Add
2233 # to the parsing error all the saved parsing error messages of the
2234 # instance. Those additional messages add precious context to the
2240 macro_defs
: _MacroDefsT
,
2241 variables
: VariablesT
,
2244 bo
: Optional
[ByteOrder
],
2246 self
._macro
_defs
= macro_defs
2247 self
._fl
_num
_item
_insts
= [] # type: List[_FlNumItemInst]
2248 self
._parse
_error
_msgs
= [] # type: List[ParseErrorMessage]
2249 self
._in
_trans
= False
2250 self
._gen
(group
, _GenState(variables
, labels
, offset
, bo
))
2257 # Updated variables.
2259 def variables(self
):
2260 return self
._final
_state
.variables
2262 # Updated main group labels.
2265 return self
._final
_state
.labels
2270 return self
._final
_state
.offset
2272 # Updated byte order.
2275 return self
._final
_state
.bo
2277 # Evaluates the expression `expr` of which the original string is
2278 # `expr_str` at the location `text_loc` considering the current
2279 # generation state `state`.
2281 # If `accept_float` is `True`, then the type of the result may be
2284 # If `accept_str` is `True`, then the type of the result may be
2289 expr
: ast
.Expression
,
2290 text_loc
: TextLocation
,
2292 accept_float
: bool = False,
2293 accept_str
: bool = False,
2295 syms
= {} # type: VariablesT
2296 syms
.update(state
.labels
)
2298 # Set the `ICITTE` name to the current offset
2299 syms
[_icitte_name
] = state
.offset
2301 # Add the current variables
2302 syms
.update(state
.variables
)
2304 # Validate the node and its children
2305 _ExprValidator(expr_str
, text_loc
, set(syms
.keys())).visit(expr
)
2307 # Compile and evaluate expression node
2309 val
= eval(compile(expr
, "", "eval"), None, syms
)
2310 except Exception as exc
:
2312 "Failed to evaluate expression `{}`: {}".format(expr_str
, exc
),
2316 # Convert `bool` result type to `int` to normalize
2317 if type(val
) is bool:
2320 # Validate result type
2321 expected_types
= {int}
# type: Set[type]
2324 expected_types
.add(float)
2327 expected_types
.add(str)
2329 if type(val
) not in expected_types
:
2330 expected_types_str
= sorted(
2331 ["`{}`".format(t
.__name
__) for t
in expected_types
]
2334 if len(expected_types_str
) == 1:
2335 msg_expected
= expected_types_str
[0]
2336 elif len(expected_types_str
) == 2:
2337 msg_expected
= " or ".join(expected_types_str
)
2339 expected_types_str
[-1] = "or {}".format(expected_types_str
[-1])
2340 msg_expected
= ", ".join(expected_types_str
)
2343 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2344 expr_str
, msg_expected
, type(val
).__name
__
2351 # Forwards to _eval_expr() with the expression and text location of
2354 def _eval_item_expr(
2355 item
: Union
[_Cond
, _FillUntil
, _FlNum
, _Leb128Int
, _Rep
, _Str
, _VarAssign
],
2357 accept_float
: bool = False,
2358 accept_str
: bool = False,
2360 return _Gen
._eval
_expr
(
2361 item
.expr_str
, item
.expr
, item
.text_loc
, state
, accept_float
, accept_str
2364 # Handles the byte item `item`.
2365 def _handle_byte_item(self
, item
: _Byte
, state
: _GenState
):
2366 self
._data
.append(item
.val
)
2367 state
.offset
+= item
.size
2369 # Handles the literal string item `item`.
2370 def _handle_lit_str_item(self
, item
: _LitStr
, state
: _GenState
):
2371 self
._data
+= item
.data
2372 state
.offset
+= item
.size
2374 # Handles the byte order setting item `item`.
2375 def _handle_set_bo_item(self
, item
: _SetBo
, state
: _GenState
):
2376 # Update current byte order
2379 # Handles the variable assignment item `item`.
2380 def _handle_var_assign_item(self
, item
: _VarAssign
, state
: _GenState
):
2382 state
.variables
[item
.name
] = self
._eval
_item
_expr
(
2383 item
, state
, accept_float
=True, accept_str
=True
2386 # Returns the effective byte order to use to encode the fixed-length
2387 # number `item` considering the current state `state`.
2389 def _fl_num_item_effective_bo(item
: _FlNum
, state
: _GenState
):
2390 return state
.bo
if item
.bo
is None else item
.bo
2392 # Handles the fixed-length number item `item`.
2393 def _handle_fl_num_item(self
, item
: _FlNum
, state
: _GenState
):
2394 # Effective byte order
2395 bo
= self
._fl
_num
_item
_effective
_bo
(item
, state
)
2397 # Validate current byte order
2398 if bo
is None and item
.len > 8:
2399 _raise_error_for_item(
2400 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2406 # Try an immediate evaluation. If it fails, then keep everything
2407 # needed to (try to) generate the bytes of this item later.
2409 data
= self
._gen
_fl
_num
_item
_inst
_data
(item
, state
)
2412 _raise_error_for_item(
2413 "Invalid expression `{}`: failed to evaluate within a transformation block".format(
2419 self
._fl
_num
_item
_insts
.append(
2423 copy
.deepcopy(state
),
2424 copy
.deepcopy(self
._parse
_error
_msgs
),
2428 # Reserve space in `self._data` for this instance
2429 data
= bytes([0] * (item
.len // 8))
2435 state
.offset
+= len(data
)
2437 # Returns the size, in bytes, required to encode the value `val`
2438 # with LEB128 (signed version if `is_signed` is `True`).
2440 def _leb128_size_for_val(val
: int, is_signed
: bool):
2442 # Equivalent upper bound.
2444 # For example, if `val` is -128, then the full integer for
2445 # this number of bits would be [-128, 127].
2448 # Number of bits (add one for the sign if needed)
2449 bits
= val
.bit_length() + int(is_signed
)
2454 # Seven bits per byte
2455 return math
.ceil(bits
/ 7)
2457 # Handles the LEB128 integer item `item`.
2458 def _handle_leb128_int_item(self
, item
: _Leb128Int
, state
: _GenState
):
2460 val
= self
._eval
_item
_expr
(item
, state
)
2463 size
= self
._leb
128_size
_for
_val
(val
, type(item
) is _SLeb128Int
)
2466 for _
in range(size
):
2467 # Seven LSBs, MSB of the byte set (continue)
2468 self
._data
.append((val
& 0x7F) |
0x80)
2471 # Clear MSB of last byte (stop)
2472 self
._data
[-1] &= ~
0x80
2475 state
.offset
+= size
2477 # Handles the string item `item`.
2478 def _handle_str_item(self
, item
: _Str
, state
: _GenState
):
2480 val
= str(self
._eval
_item
_expr
(item
, state
, accept_float
=True, accept_str
=True))
2483 data
= _encode_str(val
, item
.codec
, item
.text_loc
)
2489 state
.offset
+= len(data
)
2491 # Handles the group item `item`, removing the immediate labels from
2492 # `state` at the end if `remove_immediate_labels` is `True`.
2493 def _handle_group_item(
2494 self
, item
: _Group
, state
: _GenState
, remove_immediate_labels
: bool = True
2496 first_fl_num_item_inst_index
= len(self
._fl
_num
_item
_insts
)
2497 immediate_labels
= {} # type: LabelsT
2500 for subitem
in item
.items
:
2501 if type(subitem
) is _Label
:
2502 # Add to local immediate labels
2503 immediate_labels
[subitem
.name
] = state
.offset
2505 self
._handle
_item
(subitem
, state
)
2507 # Remove immediate labels from current state if needed
2508 if remove_immediate_labels
:
2509 for name
in immediate_labels
:
2510 del state
.labels
[name
]
2512 # Add all immediate labels to all state snapshots since
2513 # `first_fl_num_item_inst_index`.
2514 for inst
in self
._fl
_num
_item
_insts
[first_fl_num_item_inst_index
:]:
2515 inst
.state
.labels
.update(immediate_labels
)
2517 # Handles the repetition item `item`.
2518 def _handle_rep_item(self
, item
: _Rep
, state
: _GenState
):
2519 # Compute the repetition count
2520 mul
= _Gen
._eval
_item
_expr
(item
, state
)
2524 _raise_error_for_item(
2525 "Invalid expression `{}`: unexpected negative result {:,}".format(
2531 # Generate group data `mul` times
2532 for _
in range(mul
):
2533 self
._handle
_group
_item
(item
, state
)
2535 # Handles the conditional item `item`.
2536 def _handle_cond_item(self
, item
: _Cond
, state
: _GenState
):
2537 # Compute the conditional value
2538 val
= _Gen
._eval
_item
_expr
(item
, state
)
2540 # Generate selected group data
2542 self
._handle
_group
_item
(item
.true_item
, state
)
2544 self
._handle
_group
_item
(item
.false_item
, state
)
2546 # Handles the transformation item `item`.
2547 def _handle_trans_item(self
, item
: _Trans
, state
: _GenState
):
2548 init_in_trans
= self
._in
_trans
2549 self
._in
_trans
= True
2550 init_data_len
= len(self
._data
)
2551 init_offset
= state
.offset
2553 # Generate group data
2554 self
._handle
_group
_item
(item
, state
)
2556 # Remove and keep group data
2557 to_trans
= self
._data
[init_data_len
:]
2558 del self
._data
[init_data_len
:]
2560 # Encode group data and append to current data
2562 transformed
= item
.trans(to_trans
)
2563 except Exception as exc
:
2564 _raise_error_for_item(
2565 "Cannot apply the {} transformation to this data: {}".format(
2571 self
._data
+= transformed
2573 # Update offset and restore
2574 state
.offset
= init_offset
+ len(transformed
)
2575 self
._in
_trans
= init_in_trans
2577 # Evaluates the parameters of the macro expansion item `item`
2578 # considering the initial state `init_state` and returns a new state
2579 # to handle the items of the macro.
2580 def _eval_macro_exp_params(self
, item
: _MacroExp
, init_state
: _GenState
):
2582 exp_state
= _GenState({}, {}, init_state
.offset
, init_state
.bo
)
2584 # Evaluate the parameter expressions
2585 macro_def
= self
._macro
_defs
[item
.name
]
2587 for param_name
, param
in zip(macro_def
.param_names
, item
.params
):
2588 exp_state
.variables
[param_name
] = _Gen
._eval
_expr
(
2599 # Handles the macro expansion item `item`.
2600 def _handle_macro_exp_item(self
, item
: _MacroExp
, state
: _GenState
):
2601 parse_error_msg_text
= "While expanding the macro `{}`:".format(item
.name
)
2605 exp_state
= self
._eval
_macro
_exp
_params
(item
, state
)
2607 # Process the contained group
2608 init_data_size
= len(self
._data
)
2610 ParseErrorMessage
._create
( # pyright: ignore[reportPrivateUsage]
2611 parse_error_msg_text
, item
.text_loc
2614 self
._parse
_error
_msgs
.append(parse_error_msg
)
2615 self
._handle
_group
_item
(self
._macro
_defs
[item
.name
], exp_state
)
2616 self
._parse
_error
_msgs
.pop()
2617 except ParseError
as exc
:
2618 _augment_error(exc
, parse_error_msg_text
, item
.text_loc
)
2620 # Update state offset and return
2621 state
.offset
+= len(self
._data
) - init_data_size
2623 # Handles the offset setting item `item`.
2624 def _handle_set_offset_item(self
, item
: _SetOffset
, state
: _GenState
):
2625 state
.offset
= item
.val
2627 # Handles the offset alignment item `item` (adds padding).
2628 def _handle_align_offset_item(self
, item
: _AlignOffset
, state
: _GenState
):
2629 init_offset
= state
.offset
2630 align_bytes
= item
.val
// 8
2631 state
.offset
= (state
.offset
+ align_bytes
- 1) // align_bytes
* align_bytes
2632 self
._data
+= bytes([item
.pad_val
] * (state
.offset
- init_offset
))
2634 # Handles the filling item `item` (adds padding).
2635 def _handle_fill_until_item(self
, item
: _FillUntil
, state
: _GenState
):
2636 # Compute the new offset
2637 new_offset
= _Gen
._eval
_item
_expr
(item
, state
)
2639 # Validate the new offset
2640 if new_offset
< state
.offset
:
2641 _raise_error_for_item(
2642 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2643 item
.expr_str
, new_offset
, state
.offset
2649 self
._data
+= bytes([item
.pad_val
] * (new_offset
- state
.offset
))
2652 state
.offset
= new_offset
2654 # Handles the label item `item`.
2655 def _handle_label_item(self
, item
: _Label
, state
: _GenState
):
2656 state
.labels
[item
.name
] = state
.offset
2658 # Handles the item `item`, returning the updated next repetition
2660 def _handle_item(self
, item
: _Item
, state
: _GenState
):
2661 return self
._item
_handlers
[type(item
)](item
, state
)
2663 # Generates the data for a fixed-length integer item instance having
2664 # the value `val` and the effective byte order `bo` and returns it.
2665 def _gen_fl_int_item_inst_data(
2666 self
, val
: int, bo
: Optional
[ByteOrder
], item
: _FlNum
2669 if val
< -(2 ** (item
.len - 1)) or val
> 2**item
.len - 1:
2670 _raise_error_for_item(
2671 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2672 val
, item
.len, item
.expr_str
2677 # Encode result on 64 bits (to extend the sign bit whatever the
2678 # value of `item.len`).
2681 ">" if bo
in (None, ByteOrder
.BE
) else "<",
2682 "Q" if val
>= 0 else "q",
2687 # Keep only the requested length
2688 len_bytes
= item
.len // 8
2690 if bo
in (None, ByteOrder
.BE
):
2691 # Big endian: keep last bytes
2692 data
= data
[-len_bytes
:]
2694 # Little endian: keep first bytes
2695 assert bo
== ByteOrder
.LE
2696 data
= data
[:len_bytes
]
2701 # Generates the data for a fixed-length floating point number item
2702 # instance having the value `val` and the effective byte order `bo`
2704 def _gen_fl_float_item_inst_data(
2705 self
, val
: float, bo
: Optional
[ByteOrder
], item
: _FlNum
2708 if item
.len not in (32, 64):
2709 _raise_error_for_item(
2710 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2716 # Encode and return result
2719 ">" if bo
in (None, ByteOrder
.BE
) else "<",
2720 "f" if item
.len == 32 else "d",
2725 # Generates the data for a fixed-length number item instance and
2727 def _gen_fl_num_item_inst_data(self
, item
: _FlNum
, state
: _GenState
):
2728 # Effective byte order
2729 bo
= self
._fl
_num
_item
_effective
_bo
(item
, state
)
2732 val
= self
._eval
_item
_expr
(item
, state
, True)
2734 # Handle depending on type
2735 if type(val
) is int:
2736 return self
._gen
_fl
_int
_item
_inst
_data
(val
, bo
, item
)
2738 assert type(val
) is float
2739 return self
._gen
_fl
_float
_item
_inst
_data
(val
, bo
, item
)
2741 # Generates the data for all the fixed-length number item instances
2742 # and writes it at the correct offset within `self._data`.
2743 def _gen_fl_num_item_insts(self
):
2744 for inst
in self
._fl
_num
_item
_insts
:
2747 data
= self
._gen
_fl
_num
_item
_inst
_data
(inst
.item
, inst
.state
)
2748 except ParseError
as exc
:
2749 # Add all the saved parse error messages for this
2751 for msg
in reversed(inst
.parse_error_msgs
):
2752 _add_error_msg(exc
, msg
.text
, msg
.text_location
)
2756 # Insert bytes into `self._data`
2757 self
._data
[inst
.offset_in_data
: inst
.offset_in_data
+ len(data
)] = data
2759 # Generates the data (`self._data`) and final state
2760 # (`self._final_state`) from `group` and the initial state `state`.
2761 def _gen(self
, group
: _Group
, state
: _GenState
):
2763 self
._data
= bytearray()
2766 self
._item
_handlers
= {
2767 _AlignOffset
: self
._handle
_align
_offset
_item
,
2768 _Byte
: self
._handle
_byte
_item
,
2769 _Cond
: self
._handle
_cond
_item
,
2770 _FillUntil
: self
._handle
_fill
_until
_item
,
2771 _FlNum
: self
._handle
_fl
_num
_item
,
2772 _Group
: self
._handle
_group
_item
,
2773 _Label
: self
._handle
_label
_item
,
2774 _LitStr
: self
._handle
_lit
_str
_item
,
2775 _MacroExp
: self
._handle
_macro
_exp
_item
,
2776 _Rep
: self
._handle
_rep
_item
,
2777 _SetBo
: self
._handle
_set
_bo
_item
,
2778 _SetOffset
: self
._handle
_set
_offset
_item
,
2779 _SLeb128Int
: self
._handle
_leb
128_int
_item
,
2780 _Str
: self
._handle
_str
_item
,
2781 _Trans
: self
._handle
_trans
_item
,
2782 _ULeb128Int
: self
._handle
_leb
128_int
_item
,
2783 _VarAssign
: self
._handle
_var
_assign
_item
,
2784 } # type: Dict[type, Callable[[Any, _GenState], None]]
2786 # Handle the group item, _not_ removing the immediate labels
2787 # because the `labels` property offers them.
2788 self
._handle
_group
_item
(group
, state
, False)
2790 # This is actually the final state
2791 self
._final
_state
= state
2793 # Generate all the fixed-length number bytes now that we know
2795 self
._gen
_fl
_num
_item
_insts
()
2798 # Returns a `ParseResult` instance containing the bytes encoded by the
2799 # input string `normand`.
2801 # `init_variables` is a dictionary of initial variable names (valid
2802 # Python names) to integral values. A variable name must not be the
2803 # reserved name `ICITTE`.
2805 # `init_labels` is a dictionary of initial label names (valid Python
2806 # names) to integral values. A label name must not be the reserved name
2809 # `init_offset` is the initial offset.
2811 # `init_byte_order` is the initial byte order.
2813 # Raises `ParseError` on any parsing error.
2816 init_variables
: Optional
[VariablesT
] = None,
2817 init_labels
: Optional
[LabelsT
] = None,
2818 init_offset
: int = 0,
2819 init_byte_order
: Optional
[ByteOrder
] = None,
2821 if init_variables
is None:
2824 if init_labels
is None:
2827 parser
= _Parser(normand
, init_variables
, init_labels
)
2836 return ParseResult
._create
( # pyright: ignore[reportPrivateUsage]
2837 gen
.data
, gen
.variables
, gen
.labels
, gen
.offset
, gen
.bo
2841 # Raises a command-line error with the message `msg`.
2842 def _raise_cli_error(msg
: str) -> NoReturn
:
2843 raise RuntimeError("Command-line error: {}".format(msg
))
2846 # Returns the `int` or `float` value out of a CLI assignment value.
2847 def _val_from_assign_val_str(s
: str, is_label
: bool):
2850 # Floating point number?
2852 m
= _const_float_pat
.fullmatch(s
)
2855 return float(m
.group(0))
2858 m
= _const_int_pat
.fullmatch(s
)
2861 return int(_norm_const_int(m
.group(0)), 0)
2863 exp
= "an integer" if is_label
else "a number"
2864 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s
, exp
))
2867 # Returns a dictionary of string to numbers from the list of strings
2868 # `args` containing `NAME=VAL` entries.
2869 def _dict_from_arg(args
: Optional
[List
[str]], is_label
: bool, is_str_only
: bool):
2870 d
= {} # type: VariablesT
2876 m
= re
.match(r
"({})\s*=\s*(.*)$".format(_py_name_pat
.pattern
), arg
)
2879 _raise_cli_error("Invalid assignment `{}`".format(arg
))
2884 val
= _val_from_assign_val_str(m
.group(2), is_label
)
2891 # Parses the command-line arguments and returns, in this order:
2893 # 1. The input file path, or `None` if none.
2894 # 2. The Normand input text.
2895 # 3. The initial offset.
2896 # 4. The initial byte order.
2897 # 5. The initial variables.
2898 # 6. The initial labels.
2899 def _parse_cli_args():
2903 ap
= argparse
.ArgumentParser()
2910 help="initial offset (positive)",
2916 choices
=["be", "le"],
2918 help="initial byte order (`be` or `le`)",
2925 help="add an initial numeric variable (may be repeated)",
2932 help="add an initial string variable (may be repeated)",
2939 help="add an initial label (may be repeated)",
2942 "--version", action
="version", version
="Normand {}".format(__version__
)
2949 help="input path (none means standard input)",
2953 args
= ap
.parse_args()
2956 if args
.path
is None:
2957 normand
= sys
.stdin
.read()
2959 with
open(args
.path
) as f
:
2962 # Variables and labels
2963 variables
= _dict_from_arg(args
.var
, False, False)
2964 variables
.update(_dict_from_arg(args
.var_str
, False, True))
2965 labels
= _dict_from_arg(args
.label
, True, False)
2969 _raise_cli_error("Invalid negative offset {}")
2971 # Validate and set byte order
2972 bo
= None # type: Optional[ByteOrder]
2974 if args
.byte_order
is not None:
2975 if args
.byte_order
== "be":
2978 assert args
.byte_order
== "le"
2981 # Return input and initial state
2982 return args
.path
, normand
, args
.offset
, bo
, variables
, typing
.cast(LabelsT
, labels
)
2985 # CLI entry point without exception handling.
2986 def _run_cli_with_args(
2989 bo
: Optional
[ByteOrder
],
2990 variables
: VariablesT
,
2993 sys
.stdout
.buffer.write(parse(normand
, variables
, labels
, offset
, bo
).data
)
2996 # Prints the exception message `msg` and exits with status 1.
2997 def _fail(msg
: str) -> NoReturn
:
2998 if not msg
.endswith("."):
3001 print(msg
.strip(), file=sys
.stderr
)
3008 args
= _parse_cli_args()
3009 except Exception as exc
:
3013 _run_cli_with_args(*args
[1:])
3014 except ParseError
as exc
:
3017 prefix
= "" if args
[0] is None else "{}:".format(os
.path
.abspath(args
[0]))
3020 for msg
in reversed(exc
.messages
):
3021 fail_msg
+= "{}{}:{} - {}".format(
3023 msg
.text_location
.line_no
,
3024 msg
.text_location
.col_no
,
3028 if fail_msg
[-1] not in ".:;":
3033 _fail(fail_msg
.strip())
3034 except Exception as exc
:
3038 if __name__
== "__main__":