1 # The MIT License (MIT)
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
28 # Feel free to copy this module file to your own project to use Normand.
30 # Upstream repository: <https://github.com/efficios/normand>.
32 __author__
= "Philippe Proulx"
33 __version__
= "0.10.0"
54 from typing
import Any
, Set
, Dict
, List
, Union
, Pattern
, Callable
, NoReturn
, Optional
57 # Text location (line and column numbers).
60 def _create(cls
, line_no
: int, col_no
: int):
61 self
= cls
.__new
__(cls
)
62 self
._init
(line_no
, col_no
)
65 def __init__(*args
, **kwargs
): # type: ignore
66 raise NotImplementedError
68 def _init(self
, line_no
: int, col_no
: int):
69 self
._line
_no
= line_no
83 return "TextLocation({}, {})".format(self
._line
_no
, self
._col
_no
)
88 def __init__(self
, text_loc
: TextLocation
):
89 self
._text
_loc
= text_loc
91 # Source text location.
98 class _ScalarItem(_Item
):
99 # Returns the size, in bytes, of this item.
102 def size(self
) -> int:
112 class _Byte(_ScalarItem
, _RepableItem
):
113 def __init__(self
, val
: int, text_loc
: TextLocation
):
114 super().__init
__(text_loc
)
127 return "_Byte({}, {})".format(hex(self
._val
), repr(self
._text
_loc
))
131 class _Str(_ScalarItem
, _RepableItem
):
132 def __init__(self
, data
: bytes
, text_loc
: TextLocation
):
133 super().__init
__(text_loc
)
143 return len(self
._data
)
146 return "_Str({}, {})".format(repr(self
._data
), repr(self
._text
_loc
))
151 class ByteOrder(enum
.Enum
):
159 # Byte order setting.
161 def __init__(self
, bo
: ByteOrder
, text_loc
: TextLocation
):
162 super().__init
__(text_loc
)
170 return "_SetBo({}, {})".format(repr(self
._bo
), repr(self
._text
_loc
))
175 def __init__(self
, name
: str, text_loc
: TextLocation
):
176 super().__init
__(text_loc
)
185 return "_Label({}, {})".format(repr(self
._name
), repr(self
._text
_loc
))
189 class _SetOffset(_Item
):
190 def __init__(self
, val
: int, text_loc
: TextLocation
):
191 super().__init
__(text_loc
)
194 # Offset value (bytes).
200 return "_SetOffset({}, {})".format(repr(self
._val
), repr(self
._text
_loc
))
204 class _AlignOffset(_Item
):
205 def __init__(self
, val
: int, pad_val
: int, text_loc
: TextLocation
):
206 super().__init
__(text_loc
)
208 self
._pad
_val
= pad_val
210 # Alignment value (bits).
215 # Padding byte value.
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self
._val
), repr(self
._pad
_val
), repr(self
._text
_loc
)
226 # Mixin of containing an AST expression and its string.
228 def __init__(self
, expr_str
: str, expr
: ast
.Expression
):
229 self
._expr
_str
= expr_str
235 return self
._expr
_str
237 # Expression node to evaluate.
243 # Variable assignment.
244 class _VarAssign(_Item
, _ExprMixin
):
246 self
, name
: str, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
248 super().__init
__(text_loc
)
249 _ExprMixin
.__init
__(self
, expr_str
, expr
)
258 return "_VarAssign({}, {}, {}, {})".format(
260 repr(self
._expr
_str
),
262 repr(self
._text
_loc
),
266 # Fixed-length number, possibly needing more than one byte.
267 class _FlNum(_ScalarItem
, _RepableItem
, _ExprMixin
):
269 self
, expr_str
: str, expr
: ast
.Expression
, len: int, text_loc
: TextLocation
271 super().__init
__(text_loc
)
272 _ExprMixin
.__init
__(self
, expr_str
, expr
)
282 return self
._len
// 8
285 return "_FlNum({}, {}, {}, {})".format(
286 repr(self
._expr
_str
),
289 repr(self
._text
_loc
),
294 class _Leb128Int(_Item
, _RepableItem
, _ExprMixin
):
295 def __init__(self
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
):
296 super().__init
__(text_loc
)
297 _ExprMixin
.__init
__(self
, expr_str
, expr
)
300 return "{}({}, {}, {})".format(
301 self
.__class
__.__name
__,
302 repr(self
._expr
_str
),
304 repr(self
._text
_loc
),
308 # Unsigned LEB128 integer.
309 class _ULeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
313 # Signed LEB128 integer.
314 class _SLeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
319 class _Group(_Item
, _RepableItem
):
320 def __init__(self
, items
: List
[_Item
], text_loc
: TextLocation
):
321 super().__init
__(text_loc
)
330 return "_Group({}, {})".format(repr(self
._items
), repr(self
._text
_loc
))
334 class _Rep(_Item
, _ExprMixin
):
336 self
, item
: _Item
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
338 super().__init
__(text_loc
)
339 _ExprMixin
.__init
__(self
, expr_str
, expr
)
348 return "_Rep({}, {}, {}, {})".format(
350 repr(self
._expr
_str
),
352 repr(self
._text
_loc
),
357 class _Cond(_Item
, _ExprMixin
):
359 self
, item
: _Item
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
361 super().__init
__(text_loc
)
362 _ExprMixin
.__init
__(self
, expr_str
, expr
)
371 return "_Cond({}, {}, {}, {})".format(
373 repr(self
._expr
_str
),
375 repr(self
._text
_loc
),
379 # Expression item type.
380 _ExprItemT
= Union
[_FlNum
, _Leb128Int
, _VarAssign
, _Rep
, _Cond
]
383 # A parsing error containing a message and a text location.
384 class ParseError(RuntimeError):
386 def _create(cls
, msg
: str, text_loc
: TextLocation
):
387 self
= cls
.__new
__(cls
)
388 self
._init
(msg
, text_loc
)
391 def __init__(self
, *args
, **kwargs
): # type: ignore
392 raise NotImplementedError
394 def _init(self
, msg
: str, text_loc
: TextLocation
):
395 super().__init
__(msg
)
396 self
._text
_loc
= text_loc
398 # Source text location.
401 return self
._text
_loc
404 # Raises a parsing error, forwarding the parameters to the constructor.
405 def _raise_error(msg
: str, text_loc
: TextLocation
) -> NoReturn
:
406 raise ParseError
._create
(msg
, text_loc
) # pyright: ignore[reportPrivateUsage]
409 # Variables dictionary type (for type hints).
410 VariablesT
= Dict
[str, Union
[int, float]]
413 # Labels dictionary type (for type hints).
414 LabelsT
= Dict
[str, int]
417 # Python name pattern.
418 _py_name_pat
= re
.compile(r
"[a-zA-Z_][a-zA-Z0-9_]*")
423 # The constructor accepts a Normand input. After building, use the `res`
424 # property to get the resulting main group.
426 # Builds a parser to parse the Normand input `normand`, parsing
428 def __init__(self
, normand
: str, variables
: VariablesT
, labels
: LabelsT
):
429 self
._normand
= normand
433 self
._label
_names
= set(labels
.keys())
434 self
._var
_names
= set(variables
.keys())
437 # Result (main group).
442 # Current text location.
445 return TextLocation
._create
( # pyright: ignore[reportPrivateUsage]
446 self
._line
_no
, self
._col
_no
449 # Returns `True` if this parser is done parsing.
451 return self
._at
== len(self
._normand
)
453 # Returns `True` if this parser isn't done parsing.
454 def _isnt_done(self
):
455 return not self
._is
_done
()
457 # Raises a parse error, creating it using the message `msg` and the
458 # current text location.
459 def _raise_error(self
, msg
: str) -> NoReturn
:
460 _raise_error(msg
, self
._text
_loc
)
462 # Tries to make the pattern `pat` match the current substring,
463 # returning the match object and updating `self._at`,
464 # `self._line_no`, and `self._col_no` on success.
465 def _try_parse_pat(self
, pat
: Pattern
[str]):
466 m
= pat
.match(self
._normand
, self
._at
)
471 # Skip matched string
472 self
._at
+= len(m
.group(0))
475 self
._line
_no
+= m
.group(0).count("\n")
477 # Update column number
478 for i
in reversed(range(self
._at
)):
479 if self
._normand
[i
] == "\n" or i
== 0:
481 self
._col
_no
= self
._at
+ 1
483 self
._col
_no
= self
._at
- i
487 # Return match object
490 # Expects the pattern `pat` to match the current substring,
491 # returning the match object and updating `self._at`,
492 # `self._line_no`, and `self._col_no` on success, or raising a parse
493 # error with the message `error_msg` on error.
494 def _expect_pat(self
, pat
: Pattern
[str], error_msg
: str):
496 m
= self
._try
_parse
_pat
(pat
)
500 self
._raise
_error
(error_msg
)
502 # Return match object
505 # Pattern for _skip_ws_and_comments()
506 _ws_or_syms_or_comments_pat
= re
.compile(
507 r
"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
510 # Skips as many whitespaces, insignificant symbol characters, and
511 # comments as possible.
512 def _skip_ws_and_comments(self
):
513 self
._try
_parse
_pat
(self
._ws
_or
_syms
_or
_comments
_pat
)
515 # Pattern for _try_parse_hex_byte()
516 _nibble_pat
= re
.compile(r
"[A-Fa-f0-9]")
518 # Tries to parse a hexadecimal byte, returning a byte item on
520 def _try_parse_hex_byte(self
):
521 begin_text_loc
= self
._text
_loc
523 # Match initial nibble
524 m_high
= self
._try
_parse
_pat
(self
._nibble
_pat
)
530 # Expect another nibble
531 self
._skip
_ws
_and
_comments
()
532 m_low
= self
._expect
_pat
(
533 self
._nibble
_pat
, "Expecting another hexadecimal nibble"
537 return _Byte(int(m_high
.group(0) + m_low
.group(0), 16), begin_text_loc
)
539 # Patterns for _try_parse_bin_byte()
540 _bin_byte_bit_pat
= re
.compile(r
"[01]")
541 _bin_byte_prefix_pat
= re
.compile(r
"%")
543 # Tries to parse a binary byte, returning a byte item on success.
544 def _try_parse_bin_byte(self
):
545 begin_text_loc
= self
._text
_loc
548 if self
._try
_parse
_pat
(self
._bin
_byte
_prefix
_pat
) is None:
553 bits
= [] # type: List[str]
556 self
._skip
_ws
_and
_comments
()
557 m
= self
._expect
_pat
(self
._bin
_byte
_bit
_pat
, "Expecting a bit (`0` or `1`)")
558 bits
.append(m
.group(0))
561 return _Byte(int("".join(bits
), 2), begin_text_loc
)
563 # Patterns for _try_parse_dec_byte()
564 _dec_byte_prefix_pat
= re
.compile(r
"\$\s*")
565 _dec_byte_val_pat
= re
.compile(r
"(?P<neg>-?)(?P<val>\d+)")
567 # Tries to parse a decimal byte, returning a byte item on success.
568 def _try_parse_dec_byte(self
):
569 begin_text_loc
= self
._text
_loc
572 if self
._try
_parse
_pat
(self
._dec
_byte
_prefix
_pat
) is None:
577 m
= self
._expect
_pat
(self
._dec
_byte
_val
_pat
, "Expecting a decimal constant")
580 val
= int(m
.group("val")) * (-1 if m
.group("neg") == "-" else 1)
583 if val
< -128 or val
> 255:
584 _raise_error("Invalid decimal byte value {}".format(val
), begin_text_loc
)
590 return _Byte(val
, begin_text_loc
)
592 # Tries to parse a byte, returning a byte item on success.
593 def _try_parse_byte(self
):
595 item
= self
._try
_parse
_hex
_byte
()
601 item
= self
._try
_parse
_bin
_byte
()
607 item
= self
._try
_parse
_dec
_byte
()
612 # Patterns for _try_parse_str()
613 _str_prefix_pat
= re
.compile(r
'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
614 _str_suffix_pat
= re
.compile(r
'"')
615 _str_str_pat
= re
.compile(r
'(?:(?:\\.)|[^"])*')
617 # Strings corresponding to escape sequence characters
618 _str_escape_seq_strs
= {
632 # Tries to parse a string, returning a string item on success.
633 def _try_parse_str(self
):
634 begin_text_loc
= self
._text
_loc
637 m
= self
._try
_parse
_pat
(self
._str
_prefix
_pat
)
646 if m
.group("len") is not None:
647 encoding
= "utf_{}_{}".format(m
.group("len"), m
.group("bo"))
650 m
= self
._expect
_pat
(self
._str
_str
_pat
, "Expecting a literal string")
652 # Expect end of string
653 self
._expect
_pat
(self
._str
_suffix
_pat
, 'Expecting `"` (end of literal string)')
655 # Replace escape sequences
658 for ec
in '0abefnrtv"\\':
659 val
= val
.replace(r
"\{}".format(ec
), self
._str
_escape
_seq
_strs
[ec
])
662 data
= val
.encode(encoding
)
665 return _Str(data
, begin_text_loc
)
667 # Patterns for _try_parse_group()
668 _group_prefix_pat
= re
.compile(r
"\(|!g(roup)?\b")
669 _group_suffix_paren_pat
= re
.compile(r
"\)")
671 # Tries to parse a group, returning a group item on success.
672 def _try_parse_group(self
):
673 begin_text_loc
= self
._text
_loc
676 m_open
= self
._try
_parse
_pat
(self
._group
_prefix
_pat
)
683 items
= self
._parse
_items
()
685 # Expect end of group
686 self
._skip
_ws
_and
_comments
()
688 if m_open
.group(0) == "(":
689 pat
= self
._group
_suffix
_paren
_pat
692 pat
= self
._block
_end
_pat
695 self
._expect
_pat
(pat
, "Expecting an item or `{}` (end of group)".format(exp
))
698 return _Group(items
, begin_text_loc
)
700 # Returns a stripped expression string and an AST expression node
701 # from the expression string `expr_str` at text location `text_loc`.
702 def _ast_expr_from_str(self
, expr_str
: str, text_loc
: TextLocation
):
703 # Create an expression node from the expression string
704 expr_str
= expr_str
.strip().replace("\n", " ")
707 expr
= ast
.parse(expr_str
, mode
="eval")
710 "Invalid expression `{}`: invalid syntax".format(expr_str
),
714 return expr_str
, expr
716 # Patterns for _try_parse_num_and_attr()
717 _val_expr_pat
= re
.compile(r
"([^}:]+):\s*")
718 _fl_num_len_attr_pat
= re
.compile(r
"8|16|24|32|40|48|56|64")
719 _leb128_int_attr_pat
= re
.compile(r
"(u|s)leb128")
721 # Tries to parse a value and attribute (fixed length in bits or
722 # `leb128`), returning a value item on success.
723 def _try_parse_num_and_attr(self
):
724 begin_text_loc
= self
._text
_loc
727 m_expr
= self
._try
_parse
_pat
(self
._val
_expr
_pat
)
733 # Create an expression node from the expression string
734 expr_str
, expr
= self
._ast
_expr
_from
_str
(m_expr
.group(1), begin_text_loc
)
737 m_attr
= self
._try
_parse
_pat
(self
._fl
_num
_len
_attr
_pat
)
741 m_attr
= self
._try
_parse
_pat
(self
._leb
128_int
_attr
_pat
)
744 # At this point it's invalid
746 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
749 # Return LEB128 integer item
750 cls
= _ULeb128Int
if m_attr
.group(1) == "u" else _SLeb128Int
751 return cls(expr_str
, expr
, begin_text_loc
)
753 # Return fixed-length number item
757 int(m_attr
.group(0)),
761 # Patterns for _try_parse_num_and_attr()
762 _var_assign_pat
= re
.compile(
763 r
"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat
.pattern
)
766 # Tries to parse a variable assignment, returning a variable
767 # assignment item on success.
768 def _try_parse_var_assign(self
):
769 begin_text_loc
= self
._text
_loc
772 m
= self
._try
_parse
_pat
(self
._var
_assign
_pat
)
779 name
= m
.group("name")
781 if name
== _icitte_name
:
783 "`{}` is a reserved variable name".format(_icitte_name
), begin_text_loc
786 if name
in self
._label
_names
:
787 _raise_error("Existing label named `{}`".format(name
), begin_text_loc
)
789 # Add to known variable names
790 self
._var
_names
.add(name
)
792 # Create an expression node from the expression string
793 expr_str
, expr
= self
._ast
_expr
_from
_str
(m
.group("expr"), begin_text_loc
)
803 # Pattern for _try_parse_set_bo()
804 _bo_pat
= re
.compile(r
"[bl]e")
806 # Tries to parse a byte order name, returning a byte order setting
808 def _try_parse_set_bo(self
):
809 begin_text_loc
= self
._text
_loc
812 m
= self
._try
_parse
_pat
(self
._bo
_pat
)
818 # Return corresponding item
819 if m
.group(0) == "be":
820 return _SetBo(ByteOrder
.BE
, begin_text_loc
)
822 assert m
.group(0) == "le"
823 return _SetBo(ByteOrder
.LE
, begin_text_loc
)
825 # Patterns for _try_parse_val_or_bo()
826 _val_var_assign_set_bo_prefix_pat
= re
.compile(r
"\{\s*")
827 _val_var_assign_set_bo_suffix_pat
= re
.compile(r
"\s*}")
829 # Tries to parse a value, a variable assignment, or a byte order
830 # setting, returning an item on success.
831 def _try_parse_val_or_var_assign_or_set_bo(self
):
833 if self
._try
_parse
_pat
(self
._val
_var
_assign
_set
_bo
_prefix
_pat
) is None:
837 # Variable assignment item?
838 item
= self
._try
_parse
_var
_assign
()
842 item
= self
._try
_parse
_num
_and
_attr
()
845 # Byte order setting item?
846 item
= self
._try
_parse
_set
_bo
()
849 # At this point it's invalid
851 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
855 self
._expect
_pat
(self
._val
_var
_assign
_set
_bo
_suffix
_pat
, "Expecting `}`")
858 # Common positive constant integer pattern
859 _pos_const_int_pat
= re
.compile(r
"0[Xx][A-Fa-f0-9]+|\d+")
861 # Tries to parse an offset setting value (after the initial `<`),
862 # returning an offset item on success.
863 def _try_parse_set_offset_val(self
):
864 begin_text_loc
= self
._text
_loc
867 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
874 return _SetOffset(int(m
.group(0), 0), begin_text_loc
)
876 # Tries to parse a label name (after the initial `<`), returning a
877 # label item on success.
878 def _try_parse_label_name(self
):
879 begin_text_loc
= self
._text
_loc
882 m
= self
._try
_parse
_pat
(_py_name_pat
)
891 if name
== _icitte_name
:
893 "`{}` is a reserved label name".format(_icitte_name
), begin_text_loc
896 if name
in self
._label
_names
:
897 _raise_error("Duplicate label name `{}`".format(name
), begin_text_loc
)
899 if name
in self
._var
_names
:
900 _raise_error("Existing variable named `{}`".format(name
), begin_text_loc
)
902 # Add to known label names
903 self
._label
_names
.add(name
)
906 return _Label(name
, begin_text_loc
)
908 # Patterns for _try_parse_label_or_set_offset()
909 _label_set_offset_prefix_pat
= re
.compile(r
"<\s*")
910 _label_set_offset_suffix_pat
= re
.compile(r
"\s*>")
912 # Tries to parse a label or an offset setting, returning an item on
914 def _try_parse_label_or_set_offset(self
):
916 if self
._try
_parse
_pat
(self
._label
_set
_offset
_prefix
_pat
) is None:
920 # Offset setting item?
921 item
= self
._try
_parse
_set
_offset
_val
()
925 item
= self
._try
_parse
_label
_name
()
928 # At this point it's invalid
929 self
._raise
_error
("Expecting a label name or an offset setting value")
932 self
._expect
_pat
(self
._label
_set
_offset
_suffix
_pat
, "Expecting `>`")
935 # Patterns for _try_parse_align_offset()
936 _align_offset_prefix_pat
= re
.compile(r
"@\s*")
937 _align_offset_val_pat
= re
.compile(r
"(\d+)\s*")
938 _align_offset_pad_val_prefix_pat
= re
.compile(r
"~\s*")
940 # Tries to parse an offset alignment, returning an offset alignment
942 def _try_parse_align_offset(self
):
943 begin_text_loc
= self
._text
_loc
946 if self
._try
_parse
_pat
(self
._align
_offset
_prefix
_pat
) is None:
950 align_text_loc
= self
._text
_loc
951 m
= self
._expect
_pat
(
952 self
._align
_offset
_val
_pat
,
953 "Expecting an alignment (positive multiple of eight bits)",
957 val
= int(m
.group(1))
959 if val
<= 0 or (val
% 8) != 0:
961 "Invalid alignment value {} (not a positive multiple of eight)".format(
970 if self
._try
_parse
_pat
(self
._align
_offset
_pad
_val
_prefix
_pat
) is not None:
971 pad_val_text_loc
= self
._text
_loc
972 m
= self
._expect
_pat
(self
._pos
_const
_int
_pat
, "Expecting a byte value")
975 pad_val
= int(m
.group(0), 0)
979 "Invalid padding byte value {}".format(pad_val
),
984 return _AlignOffset(val
, pad_val
, begin_text_loc
)
986 # Patterns for _expect_rep_mul_expr()
987 _rep_cond_expr_prefix_pat
= re
.compile(r
"\{")
988 _rep_cond_expr_pat
= re
.compile(r
"[^}]+")
989 _rep_cond_expr_suffix_pat
= re
.compile(r
"\}")
991 # Parses the expression of a conditional block or of a repetition
992 # (block or post-item) and returns the expression string and AST
994 def _expect_rep_cond_expr(self
, accept_int
: bool):
995 expr_text_loc
= self
._text
_loc
1001 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
1005 m
= self
._try
_parse
_pat
(_py_name_pat
)
1009 if self
._try
_parse
_pat
(self
._rep
_cond
_expr
_prefix
_pat
) is None:
1011 mid_msg
= "a positive constant integer, a name, or `{`"
1013 mid_msg
= "a name or `{`"
1015 # At this point it's invalid
1016 self
._raise
_error
("Expecting {}".format(mid_msg
))
1018 # Expect an expression
1019 expr_text_loc
= self
._text
_loc
1020 m
= self
._expect
_pat
(self
._rep
_cond
_expr
_pat
, "Expecting an expression")
1021 expr_str
= m
.group(0)
1024 self
._expect
_pat
(self
._rep
_cond
_expr
_suffix
_pat
, "Expecting `}`")
1026 expr_str
= m
.group(0)
1028 expr_str
= m
.group(0)
1030 return self
._ast
_expr
_from
_str
(expr_str
, expr_text_loc
)
1032 # Parses the multiplier expression of a repetition (block or
1033 # post-item) and returns the expression string and AST node.
1034 def _expect_rep_mul_expr(self
):
1035 return self
._expect
_rep
_cond
_expr
(True)
1037 # Common block end pattern
1038 _block_end_pat
= re
.compile(r
"!end\b\s*")
1040 # Pattern for _try_parse_rep_block()
1041 _rep_block_prefix_pat
= re
.compile(r
"!r(?:epeat)?\b\s*")
1043 # Tries to parse a repetition block, returning a repetition item on
1045 def _try_parse_rep_block(self
):
1046 begin_text_loc
= self
._text
_loc
1049 if self
._try
_parse
_pat
(self
._rep
_block
_prefix
_pat
) is None:
1054 self
._skip
_ws
_and
_comments
()
1055 expr_str
, expr
= self
._expect
_rep
_mul
_expr
()
1058 self
._skip
_ws
_and
_comments
()
1059 items_text_loc
= self
._text
_loc
1060 items
= self
._parse
_items
()
1062 # Expect end of block
1063 self
._skip
_ws
_and
_comments
()
1065 self
._block
_end
_pat
, "Expecting an item or `!end` (end of repetition block)"
1069 return _Rep(_Group(items
, items_text_loc
), expr_str
, expr
, begin_text_loc
)
1071 # Pattern for _try_parse_cond_block()
1072 _cond_block_prefix_pat
= re
.compile(r
"!if\b\s*")
1074 # Tries to parse a conditional block, returning a conditional item
1076 def _try_parse_cond_block(self
):
1077 begin_text_loc
= self
._text
_loc
1080 if self
._try
_parse
_pat
(self
._cond
_block
_prefix
_pat
) is None:
1085 self
._skip
_ws
_and
_comments
()
1086 expr_str
, expr
= self
._expect
_rep
_cond
_expr
(False)
1089 self
._skip
_ws
_and
_comments
()
1090 items_text_loc
= self
._text
_loc
1091 items
= self
._parse
_items
()
1093 # Expect end of block
1094 self
._skip
_ws
_and
_comments
()
1096 self
._block
_end
_pat
,
1097 "Expecting an item or `!end` (end of conditional block)",
1101 return _Cond(_Group(items
, items_text_loc
), expr_str
, expr
, begin_text_loc
)
1103 # Tries to parse a base item (anything except a repetition),
1104 # returning it on success.
1105 def _try_parse_base_item(self
):
1107 item
= self
._try
_parse
_byte
()
1109 if item
is not None:
1113 item
= self
._try
_parse
_str
()
1115 if item
is not None:
1118 # Value, variable assignment, or byte order setting item?
1119 item
= self
._try
_parse
_val
_or
_var
_assign
_or
_set
_bo
()
1121 if item
is not None:
1124 # Label or offset setting item?
1125 item
= self
._try
_parse
_label
_or
_set
_offset
()
1127 if item
is not None:
1130 # Offset alignment item?
1131 item
= self
._try
_parse
_align
_offset
()
1133 if item
is not None:
1137 item
= self
._try
_parse
_group
()
1139 if item
is not None:
1142 # Repetition (block) item?
1143 item
= self
._try
_parse
_rep
_block
()
1145 if item
is not None:
1148 # Conditional block item?
1149 item
= self
._try
_parse
_cond
_block
()
1151 if item
is not None:
1154 # Pattern for _try_parse_rep_post()
1155 _rep_post_prefix_pat
= re
.compile(r
"\*")
1157 # Tries to parse a post-item repetition, returning the expression
1158 # string and AST expression node on success.
1159 def _try_parse_rep_post(self
):
1161 if self
._try
_parse
_pat
(self
._rep
_post
_prefix
_pat
) is None:
1165 # Return expression string and AST expression
1166 self
._skip
_ws
_and
_comments
()
1167 return self
._expect
_rep
_mul
_expr
()
1169 # Tries to parse an item, possibly followed by a repetition,
1170 # returning `True` on success.
1172 # Appends any parsed item to `items`.
1173 def _try_append_item(self
, items
: List
[_Item
]):
1174 self
._skip
_ws
_and
_comments
()
1177 item
= self
._try
_parse
_base
_item
()
1183 # Parse repetition if the base item is repeatable
1184 if isinstance(item
, _RepableItem
):
1185 self
._skip
_ws
_and
_comments
()
1186 rep_text_loc
= self
._text
_loc
1187 rep_ret
= self
._try
_parse
_rep
_post
()
1189 if rep_ret
is not None:
1190 item
= _Rep(item
, rep_ret
[0], rep_ret
[1], rep_text_loc
)
1195 # Parses and returns items, skipping whitespaces, insignificant
1196 # symbols, and comments when allowed, and stopping at the first
1197 # unknown character.
1198 def _parse_items(self
) -> List
[_Item
]:
1199 items
= [] # type: List[_Item]
1201 while self
._isnt
_done
():
1202 # Try to append item
1203 if not self
._try
_append
_item
(items
):
1204 # Unknown at this point
1209 # Parses the whole Normand input, setting `self._res` to the main
1210 # group item on success.
1212 if len(self
._normand
.strip()) == 0:
1213 # Special case to make sure there's something to consume
1214 self
._res
= _Group([], self
._text
_loc
)
1217 # Parse first level items
1218 items
= self
._parse
_items
()
1220 # Make sure there's nothing left
1221 self
._skip
_ws
_and
_comments
()
1223 if self
._isnt
_done
():
1225 "Unexpected character `{}`".format(self
._normand
[self
._at
])
1228 # Set main group item
1229 self
._res
= _Group(items
, self
._text
_loc
)
1232 # The return type of parse().
1238 variables
: VariablesT
,
1241 bo
: Optional
[ByteOrder
],
1243 self
= cls
.__new
__(cls
)
1244 self
._init
(data
, variables
, labels
, offset
, bo
)
1247 def __init__(self
, *args
, **kwargs
): # type: ignore
1248 raise NotImplementedError
1253 variables
: VariablesT
,
1256 bo
: Optional
[ByteOrder
],
1259 self
._vars
= variables
1260 self
._labels
= labels
1261 self
._offset
= offset
1269 # Dictionary of updated variable names to their last computed value.
1271 def variables(self
):
1274 # Dictionary of updated main group label names to their computed
1285 # Updated byte order.
1287 def byte_order(self
):
1291 # Raises a parse error for the item `item`, creating it using the
1293 def _raise_error_for_item(msg
: str, item
: _Item
) -> NoReturn
:
1294 _raise_error(msg
, item
.text_loc
)
1297 # The `ICITTE` reserved name.
1298 _icitte_name
= "ICITTE"
1301 # Base node visitor.
1303 # Calls the _visit_name() method for each name node which isn't the name
1305 class _NodeVisitor(ast
.NodeVisitor
):
1307 self
._parent
_is
_call
= False
1309 def generic_visit(self
, node
: ast
.AST
):
1310 if type(node
) is ast
.Call
:
1311 self
._parent
_is
_call
= True
1312 elif type(node
) is ast
.Name
and not self
._parent
_is
_call
:
1313 self
._visit
_name
(node
.id)
1315 super().generic_visit(node
)
1316 self
._parent
_is
_call
= False
1319 def _visit_name(self
, name
: str):
1323 # Expression validator: validates that all the names within the
1324 # expression are allowed.
1325 class _ExprValidator(_NodeVisitor
):
1326 def __init__(self
, item
: _ExprItemT
, allowed_names
: Set
[str]):
1329 self
._allowed
_names
= allowed_names
1331 def _visit_name(self
, name
: str):
1332 # Make sure the name refers to a known and reachable
1333 # variable/label name.
1334 if name
!= _icitte_name
and name
not in self
._allowed
_names
:
1335 msg
= "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1336 name
, self
._item
.expr_str
1339 allowed_names
= self
._allowed
_names
.copy()
1340 allowed_names
.add(_icitte_name
)
1342 if len(allowed_names
) > 0:
1343 allowed_names_str
= ", ".join(
1344 sorted(["`{}`".format(name
) for name
in allowed_names
])
1346 msg
+= "; the legal names are {{{}}}".format(allowed_names_str
)
1350 self
._item
.text_loc
,
1354 # Expression visitor getting all the contained names.
1355 class _ExprNamesVisitor(_NodeVisitor
):
1357 self
._parent
_is
_call
= False
1358 self
._names
= set() # type: Set[str]
1364 def _visit_name(self
, name
: str):
1365 self
._names
.add(name
)
1372 variables
: VariablesT
,
1375 bo
: Optional
[ByteOrder
],
1377 self
.variables
= variables
.copy()
1378 self
.labels
= labels
.copy()
1379 self
.offset
= offset
1383 # Generator of data and final state from a group item.
1385 # Generation happens in memory at construction time. After building, use
1386 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1387 # get the resulting context.
1389 # The steps of generation are:
1391 # 1. Validate that each repetition, conditional, and LEB128 integer
1392 # expression uses only reachable names.
1394 # 2. Compute and keep the effective repetition count, conditional value,
1395 # and LEB128 integer value for each repetition and LEB128 integer
1398 # 3. Generate bytes, updating the initial state as it goes which becomes
1399 # the final state after the operation.
1401 # During the generation, when handling a `_Rep`, `_Cond`, or
1402 # `_Leb128Int` item, we already have the effective repetition count,
1403 # conditional value, or value of the instance.
1405 # When handling a `_Group` item, first update the current labels with
1406 # all the immediate (not nested) labels, and then handle each
1407 # contained item. This gives contained item access to "future" outer
1408 # labels. Then remove the immediate labels from the state so that
1409 # outer items don't have access to inner labels.
1414 variables
: VariablesT
,
1417 bo
: Optional
[ByteOrder
],
1419 self
._validate
_vl
_exprs
(group
, set(variables
.keys()), set(labels
.keys()))
1420 self
._vl
_instance
_vals
= self
._compute
_vl
_instance
_vals
(
1421 group
, _GenState(variables
, labels
, offset
, bo
)
1423 self
._gen
(group
, _GenState(variables
, labels
, offset
, bo
))
1430 # Updated variables.
1432 def variables(self
):
1433 return self
._final
_state
.variables
1435 # Updated main group labels.
1438 return self
._final
_state
.labels
1443 return self
._final
_state
.offset
1445 # Updated byte order.
1448 return self
._final
_state
.bo
1450 # Returns the set of used, non-called names within the AST
1451 # expression `expr`.
1453 def _names_of_expr(expr
: ast
.Expression
):
1454 visitor
= _ExprNamesVisitor()
1456 return visitor
.names
1458 # Validates that all the repetition, conditional, and LEB128 integer
1459 # expressions within `group` don't refer, directly or indirectly, to
1460 # subsequent labels.
1462 # The strategy here is to keep a set of allowed label names, per
1463 # group, initialized to `allowed_label_names`, and a set of allowed
1464 # variable names initialized to `allowed_variable_names`.
1466 # Then, depending on the type of `item`:
1469 # Add its name to the local allowed label names: a label
1470 # occurring before a repetition, and not within a nested group,
1471 # is always reachable.
1474 # If all the names within its expression are allowed, then add
1475 # its name to the allowed variable names.
1477 # Otherwise, remove its name from the allowed variable names (if
1478 # it's in there): a variable which refers to an unreachable name
1479 # is unreachable itself.
1481 # `_Rep`, `_Cond`, and `_Leb128`:
1482 # Make sure all the names within its expression are allowed.
1485 # Call this function for each contained item with a _copy_ of
1486 # the current allowed label names and the same current allowed
1489 def _validate_vl_exprs(
1490 item
: _Item
, allowed_variable_names
: Set
[str], allowed_label_names
: Set
[str]
1492 if type(item
) is _Label
:
1493 allowed_label_names
.add(item
.name
)
1494 elif type(item
) is _VarAssign
:
1495 # Check if this variable name is allowed
1498 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1500 allowed_label_names | allowed_variable_names | {_icitte_name}
1507 allowed_variable_names
.add(item
.name
)
1508 elif item
.name
in allowed_variable_names
:
1509 allowed_variable_names
.remove(item
.name
)
1510 elif isinstance(item
, _Leb128Int
):
1511 # Validate the expression
1512 _ExprValidator(item
, allowed_label_names | allowed_variable_names
).visit(
1515 elif type(item
) is _Rep
or type(item
) is _Cond
:
1516 # Validate the expression first
1517 _ExprValidator(item
, allowed_label_names | allowed_variable_names
).visit(
1521 # Validate inner item
1522 _Gen
._validate
_vl
_exprs
(
1523 item
.item
, allowed_variable_names
, allowed_label_names
1525 elif type(item
) is _Group
:
1526 # Copy `allowed_label_names` so that this frame cannot
1527 # access the nested label names.
1528 group_allowed_label_names
= allowed_label_names
.copy()
1530 for subitem
in item
.items
:
1531 _Gen
._validate
_vl
_exprs
(
1532 subitem
, allowed_variable_names
, group_allowed_label_names
1535 # Evaluates the expression of `item` considering the current
1536 # generation state `state`.
1538 # If `allow_float` is `True`, then the type of the result may be
1541 def _eval_item_expr(
1544 allow_float
: bool = False,
1546 syms
= {} # type: VariablesT
1547 syms
.update(state
.labels
)
1549 # Set the `ICITTE` name to the current offset
1550 syms
[_icitte_name
] = state
.offset
1552 # Add the current variables
1553 syms
.update(state
.variables
)
1555 # Validate the node and its children
1556 _ExprValidator(item
, set(syms
.keys())).visit(item
.expr
)
1558 # Compile and evaluate expression node
1560 val
= eval(compile(item
.expr
, "", "eval"), None, syms
)
1561 except Exception as exc
:
1562 _raise_error_for_item(
1563 "Failed to evaluate expression `{}`: {}".format(item
.expr_str
, exc
),
1567 # Convert `bool` result type to `int` to normalize
1568 if type(val
) is bool:
1571 # Validate result type
1572 expected_types
= {int}
# type: Set[type]
1576 expected_types
.add(float)
1577 type_msg
+= " or `float`"
1579 if type(val
) not in expected_types
:
1580 _raise_error_for_item(
1581 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1582 item
.expr_str
, type_msg
, type(val
).__name
__
1589 # Returns the size, in bytes, required to encode the value `val`
1590 # with LEB128 (signed version if `is_signed` is `True`).
1592 def _leb128_size_for_val(val
: int, is_signed
: bool):
1594 # Equivalent upper bound.
1596 # For example, if `val` is -128, then the full integer for
1597 # this number of bits would be [-128, 127].
1600 # Number of bits (add one for the sign if needed)
1601 bits
= val
.bit_length() + int(is_signed
)
1606 # Seven bits per byte
1607 return math
.ceil(bits
/ 7)
1609 # Returns the offset `offset` aligned according to `item`.
1611 def _align_offset(offset
: int, item
: _AlignOffset
):
1612 align_bytes
= item
.val
// 8
1613 return (offset
+ align_bytes
- 1) // align_bytes
* align_bytes
1615 # Computes the effective value for each repetition, conditional, and
1616 # LEB128 integer instance, filling `instance_vals` (if not `None`)
1617 # and returning `instance_vals`.
1619 # At this point it must be known that, for a given variable-length
1620 # item, its expression only contains reachable names.
1622 # When handling a `_Rep` or `_Cond` item, this function appends its
1623 # effective multiplier/value to `instance_vals` _before_ handling
1624 # its repeated/conditional item.
1626 # When handling a `_VarAssign` item, this function only evaluates it
1627 # if all its names are reachable.
1629 def _compute_vl_instance_vals(
1630 item
: _Item
, state
: _GenState
, instance_vals
: Optional
[List
[int]] = None
1632 if instance_vals
is None:
1635 if isinstance(item
, _ScalarItem
):
1636 state
.offset
+= item
.size
1637 elif type(item
) is _Label
:
1638 state
.labels
[item
.name
] = state
.offset
1639 elif type(item
) is _VarAssign
:
1640 # Check if all the names are reachable
1643 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1645 name
!= _icitte_name
1646 and name
not in state
.variables
1647 and name
not in state
.labels
1649 # A name is unknown: cannot evaluate
1654 # Evaluate the expression and keep the result
1655 state
.variables
[item
.name
] = _Gen
._eval
_item
_expr
(item
, state
, True)
1656 elif type(item
) is _SetOffset
:
1657 state
.offset
= item
.val
1658 elif type(item
) is _AlignOffset
:
1659 state
.offset
= _Gen
._align
_offset
(state
.offset
, item
)
1660 elif isinstance(item
, _Leb128Int
):
1661 # Evaluate the expression
1662 val
= _Gen
._eval
_item
_expr
(item
, state
)
1665 if type(item
) is _ULeb128Int
and val
< 0:
1666 _raise_error_for_item(
1667 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1673 # Add the evaluation result to the to variable-length item
1675 instance_vals
.append(val
)
1678 state
.offset
+= _Gen
._leb
128_size
_for
_val
(val
, type(item
) is _SLeb128Int
)
1679 elif type(item
) is _Rep
:
1680 # Evaluate the expression and keep the result
1681 val
= _Gen
._eval
_item
_expr
(item
, state
)
1685 _raise_error_for_item(
1686 "Invalid expression `{}`: unexpected negative result {:,}".format(
1692 # Add to variable-length item instance values
1693 instance_vals
.append(val
)
1695 # Process the repeated item `val` times
1696 for _
in range(val
):
1697 _Gen
._compute
_vl
_instance
_vals
(item
.item
, state
, instance_vals
)
1698 elif type(item
) is _Cond
:
1699 # Evaluate the expression and keep the result
1700 val
= _Gen
._eval
_item
_expr
(item
, state
)
1702 # Add to variable-length item instance values
1703 instance_vals
.append(val
)
1705 # Process the conditional item if needed
1707 _Gen
._compute
_vl
_instance
_vals
(item
.item
, state
, instance_vals
)
1708 elif type(item
) is _Group
:
1709 prev_labels
= state
.labels
.copy()
1712 for subitem
in item
.items
:
1713 _Gen
._compute
_vl
_instance
_vals
(subitem
, state
, instance_vals
)
1715 state
.labels
= prev_labels
1717 return instance_vals
1719 def _update_offset_noop(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
1720 return next_vl_instance
1722 def _dry_handle_scalar_item(
1723 self
, item
: _ScalarItem
, state
: _GenState
, next_vl_instance
: int
1725 state
.offset
+= item
.size
1726 return next_vl_instance
1728 def _dry_handle_leb128_int_item(
1729 self
, item
: _Leb128Int
, state
: _GenState
, next_vl_instance
: int
1731 # Get the value from `self._vl_instance_vals` _before_
1732 # incrementing `next_vl_instance` to honor the order of
1733 # _compute_vl_instance_vals().
1734 state
.offset
+= self
._leb
128_size
_for
_val
(
1735 self
._vl
_instance
_vals
[next_vl_instance
], type(item
) is _SLeb128Int
1738 return next_vl_instance
+ 1
1740 def _dry_handle_group_item(
1741 self
, item
: _Group
, state
: _GenState
, next_vl_instance
: int
1743 for subitem
in item
.items
:
1744 next_vl_instance
= self
._dry
_handle
_item
(subitem
, state
, next_vl_instance
)
1746 return next_vl_instance
1748 def _dry_handle_rep_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1749 # Get the value from `self._vl_instance_vals` _before_
1750 # incrementing `next_vl_instance` to honor the order of
1751 # _compute_vl_instance_vals().
1752 mul
= self
._vl
_instance
_vals
[next_vl_instance
]
1753 next_vl_instance
+= 1
1755 for _
in range(mul
):
1756 next_vl_instance
= self
._dry
_handle
_item
(item
.item
, state
, next_vl_instance
)
1758 return next_vl_instance
1760 def _dry_handle_cond_item(
1761 self
, item
: _Cond
, state
: _GenState
, next_vl_instance
: int
1763 # Get the value from `self._vl_instance_vals` _before_
1764 # incrementing `next_vl_instance` to honor the order of
1765 # _compute_vl_instance_vals().
1766 val
= self
._vl
_instance
_vals
[next_vl_instance
]
1767 next_vl_instance
+= 1
1770 next_vl_instance
= self
._dry
_handle
_item
(item
.item
, state
, next_vl_instance
)
1772 return next_vl_instance
1774 def _dry_handle_align_offset_item(
1775 self
, item
: _AlignOffset
, state
: _GenState
, next_vl_instance
: int
1777 state
.offset
= self
._align
_offset
(state
.offset
, item
)
1778 return next_vl_instance
1780 def _dry_handle_set_offset_item(
1781 self
, item
: _SetOffset
, state
: _GenState
, next_vl_instance
: int
1783 state
.offset
= item
.val
1784 return next_vl_instance
1786 # Updates `state.offset` considering the generated data of `item`,
1787 # without generating any, and returns the updated next
1788 # variable-length item instance.
1789 def _dry_handle_item(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
1790 return self
._dry
_handle
_item
_funcs
[type(item
)](item
, state
, next_vl_instance
)
1792 # Handles the byte item `item`.
1793 def _handle_byte_item(self
, item
: _Byte
, state
: _GenState
, next_vl_instance
: int):
1794 self
._data
.append(item
.val
)
1795 state
.offset
+= item
.size
1796 return next_vl_instance
1798 # Handles the string item `item`.
1799 def _handle_str_item(self
, item
: _Str
, state
: _GenState
, next_vl_instance
: int):
1800 self
._data
+= item
.data
1801 state
.offset
+= item
.size
1802 return next_vl_instance
1804 # Handles the byte order setting item `item`.
1805 def _handle_set_bo_item(
1806 self
, item
: _SetBo
, state
: _GenState
, next_vl_instance
: int
1808 # Update current byte order
1810 return next_vl_instance
1812 # Handles the variable assignment item `item`.
1813 def _handle_var_assign_item(
1814 self
, item
: _VarAssign
, state
: _GenState
, next_vl_instance
: int
1817 state
.variables
[item
.name
] = self
._eval
_item
_expr
(item
, state
, True)
1818 return next_vl_instance
1820 # Handles the fixed-length integer item `item`.
1821 def _handle_fl_int_item(self
, val
: int, item
: _FlNum
, state
: _GenState
):
1823 if val
< -(2 ** (item
.len - 1)) or val
> 2**item
.len - 1:
1824 _raise_error_for_item(
1825 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1826 val
, item
.len, item
.expr_str
, state
.offset
1831 # Encode result on 64 bits (to extend the sign bit whatever the
1832 # value of `item.len`).
1835 ">" if state
.bo
in (None, ByteOrder
.BE
) else "<",
1836 "Q" if val
>= 0 else "q",
1841 # Keep only the requested length
1842 len_bytes
= item
.len // 8
1844 if state
.bo
in (None, ByteOrder
.BE
):
1845 # Big endian: keep last bytes
1846 data
= data
[-len_bytes
:]
1848 # Little endian: keep first bytes
1849 assert state
.bo
== ByteOrder
.LE
1850 data
= data
[:len_bytes
]
1852 # Append to current bytes and update offset
1855 # Handles the fixed-length integer item `item`.
1856 def _handle_fl_float_item(self
, val
: float, item
: _FlNum
, state
: _GenState
):
1858 if item
.len not in (32, 64):
1859 _raise_error_for_item(
1860 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1867 self
._data
+= struct
.pack(
1869 ">" if state
.bo
in (None, ByteOrder
.BE
) else "<",
1870 "f" if item
.len == 32 else "d",
1875 # Handles the fixed-length number item `item`.
1876 def _handle_fl_num_item(
1877 self
, item
: _FlNum
, state
: _GenState
, next_vl_instance
: int
1880 val
= self
._eval
_item
_expr
(item
, state
, True)
1882 # Validate current byte order
1883 if state
.bo
is None and item
.len > 8:
1884 _raise_error_for_item(
1885 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1891 # Handle depending on type
1892 if type(val
) is int:
1893 self
._handle
_fl
_int
_item
(val
, item
, state
)
1895 assert type(val
) is float
1896 self
._handle
_fl
_float
_item
(val
, item
, state
)
1899 state
.offset
+= item
.size
1901 return next_vl_instance
1903 # Handles the LEB128 integer item `item`.
1904 def _handle_leb128_int_item(
1905 self
, item
: _Leb128Int
, state
: _GenState
, next_vl_instance
: int
1907 # Get the precomputed value
1908 val
= self
._vl
_instance
_vals
[next_vl_instance
]
1911 size
= self
._leb
128_size
_for
_val
(val
, type(item
) is _SLeb128Int
)
1914 for _
in range(size
):
1915 # Seven LSBs, MSB of the byte set (continue)
1916 self
._data
.append((val
& 0x7F) |
0x80)
1919 # Clear MSB of last byte (stop)
1920 self
._data
[-1] &= ~
0x80
1922 # Consumed this instance
1923 return next_vl_instance
+ 1
1925 # Handles the group item `item`, only removing the immediate labels
1926 # from `state.labels` if `remove_immediate_labels` is `True`.
1927 def _handle_group_item(
1931 next_vl_instance
: int,
1932 remove_immediate_labels
: bool = True,
1934 # Compute the values of the immediate (not nested) labels. Those
1935 # labels are reachable by any expression within the group.
1936 tmp_state
= _GenState({}, {}, state
.offset
, None)
1937 immediate_label_names
= set() # type: Set[str]
1938 tmp_next_vl_instance
= next_vl_instance
1940 for subitem
in item
.items
:
1941 if type(subitem
) is _Label
:
1942 # New immediate label
1943 state
.labels
[subitem
.name
] = tmp_state
.offset
1944 immediate_label_names
.add(subitem
.name
)
1946 tmp_next_vl_instance
= self
._dry
_handle
_item
(
1947 subitem
, tmp_state
, tmp_next_vl_instance
1950 # Handle each item now with the actual state
1951 for subitem
in item
.items
:
1952 next_vl_instance
= self
._handle
_item
(subitem
, state
, next_vl_instance
)
1954 # Remove immediate labels if required so that outer items won't
1955 # reach inner labels.
1956 if remove_immediate_labels
:
1957 for name
in immediate_label_names
:
1958 del state
.labels
[name
]
1960 return next_vl_instance
1962 # Handles the repetition item `item`.
1963 def _handle_rep_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1964 # Get the precomputed repetition count
1965 mul
= self
._vl
_instance
_vals
[next_vl_instance
]
1967 # Consumed this instance
1968 next_vl_instance
+= 1
1970 for _
in range(mul
):
1971 next_vl_instance
= self
._handle
_item
(item
.item
, state
, next_vl_instance
)
1973 return next_vl_instance
1975 # Handles the conditional item `item`.
1976 def _handle_cond_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1977 # Get the precomputed conditional value
1978 val
= self
._vl
_instance
_vals
[next_vl_instance
]
1980 # Consumed this instance
1981 next_vl_instance
+= 1
1984 next_vl_instance
= self
._handle
_item
(item
.item
, state
, next_vl_instance
)
1986 return next_vl_instance
1988 # Handles the offset setting item `item`.
1989 def _handle_set_offset_item(
1990 self
, item
: _SetOffset
, state
: _GenState
, next_vl_instance
: int
1992 state
.offset
= item
.val
1993 return next_vl_instance
1995 # Handles offset alignment item `item` (adds padding).
1996 def _handle_align_offset_item(
1997 self
, item
: _AlignOffset
, state
: _GenState
, next_vl_instance
: int
1999 init_offset
= state
.offset
2000 state
.offset
= self
._align
_offset
(state
.offset
, item
)
2001 self
._data
+= bytes([item
.pad_val
] * (state
.offset
- init_offset
))
2002 return next_vl_instance
2004 # Handles the label item `item`.
2005 def _handle_label_item(self
, item
: _Label
, state
: _GenState
, next_vl_instance
: int):
2006 return next_vl_instance
2008 # Handles the item `item`, returning the updated next repetition
2010 def _handle_item(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
2011 return self
._item
_handlers
[type(item
)](item
, state
, next_vl_instance
)
2013 # Generates the data (`self._data`) and final state
2014 # (`self._final_state`) from `group` and the initial state `state`.
2015 def _gen(self
, group
: _Group
, state
: _GenState
):
2017 self
._data
= bytearray()
2020 self
._item
_handlers
= {
2021 _AlignOffset
: self
._handle
_align
_offset
_item
,
2022 _Byte
: self
._handle
_byte
_item
,
2023 _Cond
: self
._handle
_cond
_item
,
2024 _FlNum
: self
._handle
_fl
_num
_item
,
2025 _Group
: self
._handle
_group
_item
,
2026 _Label
: self
._handle
_label
_item
,
2027 _Rep
: self
._handle
_rep
_item
,
2028 _SetBo
: self
._handle
_set
_bo
_item
,
2029 _SetOffset
: self
._handle
_set
_offset
_item
,
2030 _SLeb128Int
: self
._handle
_leb
128_int
_item
,
2031 _Str
: self
._handle
_str
_item
,
2032 _ULeb128Int
: self
._handle
_leb
128_int
_item
,
2033 _VarAssign
: self
._handle
_var
_assign
_item
,
2034 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2036 # Dry item handlers (only updates the state offset)
2037 self
._dry
_handle
_item
_funcs
= {
2038 _AlignOffset
: self
._dry
_handle
_align
_offset
_item
,
2039 _Byte
: self
._dry
_handle
_scalar
_item
,
2040 _Cond
: self
._dry
_handle
_cond
_item
,
2041 _FlNum
: self
._dry
_handle
_scalar
_item
,
2042 _Group
: self
._dry
_handle
_group
_item
,
2043 _Label
: self
._update
_offset
_noop
,
2044 _Rep
: self
._dry
_handle
_rep
_item
,
2045 _SetBo
: self
._update
_offset
_noop
,
2046 _SetOffset
: self
._dry
_handle
_set
_offset
_item
,
2047 _SLeb128Int
: self
._dry
_handle
_leb
128_int
_item
,
2048 _Str
: self
._dry
_handle
_scalar
_item
,
2049 _ULeb128Int
: self
._dry
_handle
_leb
128_int
_item
,
2050 _VarAssign
: self
._update
_offset
_noop
,
2051 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2053 # Handle the group item, _not_ removing the immediate labels
2054 # because the `labels` property offers them.
2055 self
._handle
_group
_item
(group
, state
, 0, False)
2057 # This is actually the final state
2058 self
._final
_state
= state
2061 # Returns a `ParseResult` instance containing the bytes encoded by the
2062 # input string `normand`.
2064 # `init_variables` is a dictionary of initial variable names (valid
2065 # Python names) to integral values. A variable name must not be the
2066 # reserved name `ICITTE`.
2068 # `init_labels` is a dictionary of initial label names (valid Python
2069 # names) to integral values. A label name must not be the reserved name
2072 # `init_offset` is the initial offset.
2074 # `init_byte_order` is the initial byte order.
2076 # Raises `ParseError` on any parsing error.
2079 init_variables
: Optional
[VariablesT
] = None,
2080 init_labels
: Optional
[LabelsT
] = None,
2081 init_offset
: int = 0,
2082 init_byte_order
: Optional
[ByteOrder
] = None,
2084 if init_variables
is None:
2087 if init_labels
is None:
2091 _Parser(normand
, init_variables
, init_labels
).res
,
2097 return ParseResult
._create
( # pyright: ignore[reportPrivateUsage]
2098 gen
.data
, gen
.variables
, gen
.labels
, gen
.offset
, gen
.bo
2102 # Parses the command-line arguments.
2103 def _parse_cli_args():
2107 ap
= argparse
.ArgumentParser()
2114 help="initial offset (positive)",
2120 choices
=["be", "le"],
2122 help="initial byte order (`be` or `le`)",
2128 help="add an initial variable (may be repeated)",
2135 help="add an initial label (may be repeated)",
2138 "--version", action
="version", version
="Normand {}".format(__version__
)
2145 help="input path (none means standard input)",
2149 return ap
.parse_args()
2152 # Raises a command-line error with the message `msg`.
2153 def _raise_cli_error(msg
: str) -> NoReturn
:
2154 raise RuntimeError("Command-line error: {}".format(msg
))
2157 # Returns a dictionary of string to integers from the list of strings
2158 # `args` containing `NAME=VAL` entries.
2159 def _dict_from_arg(args
: Optional
[List
[str]]):
2160 d
= {} # type: LabelsT
2166 m
= re
.match(r
"({})=(\d+)$".format(_py_name_pat
.pattern
), arg
)
2169 _raise_cli_error("Invalid assignment {}".format(arg
))
2171 d
[m
.group(1)] = int(m
.group(2))
2176 # CLI entry point without exception handling.
2181 args
= _parse_cli_args()
2184 if args
.path
is None:
2185 normand
= sys
.stdin
.read()
2187 with
open(args
.path
) as f
:
2190 # Variables and labels
2191 variables
= typing
.cast(VariablesT
, _dict_from_arg(args
.var
))
2192 labels
= _dict_from_arg(args
.label
)
2196 _raise_cli_error("Invalid negative offset {}")
2198 # Validate and set byte order
2199 bo
= None # type: Optional[ByteOrder]
2201 if args
.byte_order
is not None:
2202 if args
.byte_order
== "be":
2205 assert args
.byte_order
== "le"
2210 res
= parse(normand
, variables
, labels
, args
.offset
, bo
)
2211 except ParseError
as exc
:
2214 if args
.path
is not None:
2215 prefix
= "{}:".format(os
.path
.abspath(args
.path
))
2218 "{}{}:{} - {}".format(
2219 prefix
, exc
.text_loc
.line_no
, exc
.text_loc
.col_no
, str(exc
)
2224 sys
.stdout
.buffer.write(res
.data
)
2227 # Prints the exception message `msg` and exits with status 1.
2228 def _fail(msg
: str) -> NoReturn
:
2229 if not msg
.endswith("."):
2232 print(msg
, file=sys
.stderr
)
2240 except Exception as exc
:
2244 if __name__
== "__main__":