1 # The MIT License (MIT)
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
28 # Feel free to copy this module file to your own project to use Normand.
30 # Upstream repository: <https://github.com/efficios/normand>.
32 __author__
= "Philippe Proulx"
54 from typing
import Any
, Set
, Dict
, List
, Union
, Pattern
, Callable
, NoReturn
, Optional
57 # Text location (line and column numbers).
60 def _create(cls
, line_no
: int, col_no
: int):
61 self
= cls
.__new
__(cls
)
62 self
._init
(line_no
, col_no
)
65 def __init__(*args
, **kwargs
): # type: ignore
66 raise NotImplementedError
68 def _init(self
, line_no
: int, col_no
: int):
69 self
._line
_no
= line_no
83 return "TextLocation({}, {})".format(self
._line
_no
, self
._col
_no
)
88 def __init__(self
, text_loc
: TextLocation
):
89 self
._text
_loc
= text_loc
91 # Source text location.
98 class _ScalarItem(_Item
):
99 # Returns the size, in bytes, of this item.
102 def size(self
) -> int:
112 class _Byte(_ScalarItem
, _RepableItem
):
113 def __init__(self
, val
: int, text_loc
: TextLocation
):
114 super().__init
__(text_loc
)
127 return "_Byte({}, {})".format(hex(self
._val
), repr(self
._text
_loc
))
131 class _Str(_ScalarItem
, _RepableItem
):
132 def __init__(self
, data
: bytes
, text_loc
: TextLocation
):
133 super().__init
__(text_loc
)
143 return len(self
._data
)
146 return "_Str({}, {})".format(repr(self
._data
), repr(self
._text
_loc
))
151 class ByteOrder(enum
.Enum
):
159 # Byte order setting.
161 def __init__(self
, bo
: ByteOrder
, text_loc
: TextLocation
):
162 super().__init
__(text_loc
)
170 return "_SetBo({}, {})".format(repr(self
._bo
), repr(self
._text
_loc
))
175 def __init__(self
, name
: str, text_loc
: TextLocation
):
176 super().__init
__(text_loc
)
185 return "_Label({}, {})".format(repr(self
._name
), repr(self
._text
_loc
))
189 class _SetOffset(_Item
):
190 def __init__(self
, val
: int, text_loc
: TextLocation
):
191 super().__init
__(text_loc
)
194 # Offset value (bytes).
200 return "_SetOffset({}, {})".format(repr(self
._val
), repr(self
._text
_loc
))
204 class _AlignOffset(_Item
):
205 def __init__(self
, val
: int, pad_val
: int, text_loc
: TextLocation
):
206 super().__init
__(text_loc
)
208 self
._pad
_val
= pad_val
210 # Alignment value (bits).
215 # Padding byte value.
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self
._val
), repr(self
._pad
_val
), repr(self
._text
_loc
)
226 # Mixin of containing an AST expression and its string.
228 def __init__(self
, expr_str
: str, expr
: ast
.Expression
):
229 self
._expr
_str
= expr_str
235 return self
._expr
_str
237 # Expression node to evaluate.
243 # Variable assignment.
244 class _VarAssign(_Item
, _ExprMixin
):
246 self
, name
: str, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
248 super().__init
__(text_loc
)
249 _ExprMixin
.__init
__(self
, expr_str
, expr
)
258 return "_VarAssign({}, {}, {}, {})".format(
260 repr(self
._expr
_str
),
262 repr(self
._text
_loc
),
266 # Fixed-length number, possibly needing more than one byte.
267 class _FlNum(_ScalarItem
, _RepableItem
, _ExprMixin
):
269 self
, expr_str
: str, expr
: ast
.Expression
, len: int, text_loc
: TextLocation
271 super().__init
__(text_loc
)
272 _ExprMixin
.__init
__(self
, expr_str
, expr
)
282 return self
._len
// 8
285 return "_FlNum({}, {}, {}, {})".format(
286 repr(self
._expr
_str
),
289 repr(self
._text
_loc
),
294 class _Leb128Int(_Item
, _RepableItem
, _ExprMixin
):
295 def __init__(self
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
):
296 super().__init
__(text_loc
)
297 _ExprMixin
.__init
__(self
, expr_str
, expr
)
300 return "{}({}, {}, {})".format(
301 self
.__class
__.__name
__,
302 repr(self
._expr
_str
),
304 repr(self
._text
_loc
),
308 # Unsigned LEB128 integer.
309 class _ULeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
313 # Signed LEB128 integer.
314 class _SLeb128Int(_Leb128Int
, _RepableItem
, _ExprMixin
):
319 class _Group(_Item
, _RepableItem
):
320 def __init__(self
, items
: List
[_Item
], text_loc
: TextLocation
):
321 super().__init
__(text_loc
)
330 return "_Group({}, {})".format(repr(self
._items
), repr(self
._text
_loc
))
334 class _Rep(_Item
, _ExprMixin
):
336 self
, item
: _Item
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLocation
338 super().__init
__(text_loc
)
339 _ExprMixin
.__init
__(self
, expr_str
, expr
)
348 return "_Rep({}, {}, {}, {})".format(
350 repr(self
._expr
_str
),
352 repr(self
._text
_loc
),
356 # Expression item type.
357 _ExprItemT
= Union
[_FlNum
, _Leb128Int
, _VarAssign
, _Rep
]
360 # A parsing error containing a message and a text location.
361 class ParseError(RuntimeError):
363 def _create(cls
, msg
: str, text_loc
: TextLocation
):
364 self
= cls
.__new
__(cls
)
365 self
._init
(msg
, text_loc
)
368 def __init__(self
, *args
, **kwargs
): # type: ignore
369 raise NotImplementedError
371 def _init(self
, msg
: str, text_loc
: TextLocation
):
372 super().__init
__(msg
)
373 self
._text
_loc
= text_loc
375 # Source text location.
378 return self
._text
_loc
381 # Raises a parsing error, forwarding the parameters to the constructor.
382 def _raise_error(msg
: str, text_loc
: TextLocation
) -> NoReturn
:
383 raise ParseError
._create
(msg
, text_loc
) # pyright: ignore[reportPrivateUsage]
386 # Variables dictionary type (for type hints).
387 VariablesT
= Dict
[str, Union
[int, float]]
390 # Labels dictionary type (for type hints).
391 LabelsT
= Dict
[str, int]
394 # Python name pattern.
395 _py_name_pat
= re
.compile(r
"[a-zA-Z_][a-zA-Z0-9_]*")
400 # The constructor accepts a Normand input. After building, use the `res`
401 # property to get the resulting main group.
403 # Builds a parser to parse the Normand input `normand`, parsing
405 def __init__(self
, normand
: str, variables
: VariablesT
, labels
: LabelsT
):
406 self
._normand
= normand
410 self
._label
_names
= set(labels
.keys())
411 self
._var
_names
= set(variables
.keys())
414 # Result (main group).
419 # Current text location.
422 return TextLocation
._create
( # pyright: ignore[reportPrivateUsage]
423 self
._line
_no
, self
._col
_no
426 # Returns `True` if this parser is done parsing.
428 return self
._at
== len(self
._normand
)
430 # Returns `True` if this parser isn't done parsing.
431 def _isnt_done(self
):
432 return not self
._is
_done
()
434 # Raises a parse error, creating it using the message `msg` and the
435 # current text location.
436 def _raise_error(self
, msg
: str) -> NoReturn
:
437 _raise_error(msg
, self
._text
_loc
)
439 # Tries to make the pattern `pat` match the current substring,
440 # returning the match object and updating `self._at`,
441 # `self._line_no`, and `self._col_no` on success.
442 def _try_parse_pat(self
, pat
: Pattern
[str]):
443 m
= pat
.match(self
._normand
, self
._at
)
448 # Skip matched string
449 self
._at
+= len(m
.group(0))
452 self
._line
_no
+= m
.group(0).count("\n")
454 # Update column number
455 for i
in reversed(range(self
._at
)):
456 if self
._normand
[i
] == "\n" or i
== 0:
458 self
._col
_no
= self
._at
+ 1
460 self
._col
_no
= self
._at
- i
464 # Return match object
467 # Expects the pattern `pat` to match the current substring,
468 # returning the match object and updating `self._at`,
469 # `self._line_no`, and `self._col_no` on success, or raising a parse
470 # error with the message `error_msg` on error.
471 def _expect_pat(self
, pat
: Pattern
[str], error_msg
: str):
473 m
= self
._try
_parse
_pat
(pat
)
477 self
._raise
_error
(error_msg
)
479 # Return match object
482 # Pattern for _skip_ws_and_comments()
483 _ws_or_syms_or_comments_pat
= re
.compile(
484 r
"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
487 # Skips as many whitespaces, insignificant symbol characters, and
488 # comments as possible.
489 def _skip_ws_and_comments(self
):
490 self
._try
_parse
_pat
(self
._ws
_or
_syms
_or
_comments
_pat
)
492 # Pattern for _try_parse_hex_byte()
493 _nibble_pat
= re
.compile(r
"[A-Fa-f0-9]")
495 # Tries to parse a hexadecimal byte, returning a byte item on
497 def _try_parse_hex_byte(self
):
498 begin_text_loc
= self
._text
_loc
500 # Match initial nibble
501 m_high
= self
._try
_parse
_pat
(self
._nibble
_pat
)
507 # Expect another nibble
508 self
._skip
_ws
_and
_comments
()
509 m_low
= self
._expect
_pat
(
510 self
._nibble
_pat
, "Expecting another hexadecimal nibble"
514 return _Byte(int(m_high
.group(0) + m_low
.group(0), 16), begin_text_loc
)
516 # Patterns for _try_parse_bin_byte()
517 _bin_byte_bit_pat
= re
.compile(r
"[01]")
518 _bin_byte_prefix_pat
= re
.compile(r
"%")
520 # Tries to parse a binary byte, returning a byte item on success.
521 def _try_parse_bin_byte(self
):
522 begin_text_loc
= self
._text
_loc
525 if self
._try
_parse
_pat
(self
._bin
_byte
_prefix
_pat
) is None:
530 bits
= [] # type: List[str]
533 self
._skip
_ws
_and
_comments
()
534 m
= self
._expect
_pat
(self
._bin
_byte
_bit
_pat
, "Expecting a bit (`0` or `1`)")
535 bits
.append(m
.group(0))
538 return _Byte(int("".join(bits
), 2), begin_text_loc
)
540 # Patterns for _try_parse_dec_byte()
541 _dec_byte_prefix_pat
= re
.compile(r
"\$\s*")
542 _dec_byte_val_pat
= re
.compile(r
"(?P<neg>-?)(?P<val>\d+)")
544 # Tries to parse a decimal byte, returning a byte item on success.
545 def _try_parse_dec_byte(self
):
546 begin_text_loc
= self
._text
_loc
549 if self
._try
_parse
_pat
(self
._dec
_byte
_prefix
_pat
) is None:
554 m
= self
._expect
_pat
(self
._dec
_byte
_val
_pat
, "Expecting a decimal constant")
557 val
= int(m
.group("val")) * (-1 if m
.group("neg") == "-" else 1)
560 if val
< -128 or val
> 255:
561 _raise_error("Invalid decimal byte value {}".format(val
), begin_text_loc
)
567 return _Byte(val
, begin_text_loc
)
569 # Tries to parse a byte, returning a byte item on success.
570 def _try_parse_byte(self
):
572 item
= self
._try
_parse
_hex
_byte
()
578 item
= self
._try
_parse
_bin
_byte
()
584 item
= self
._try
_parse
_dec
_byte
()
589 # Patterns for _try_parse_str()
590 _str_prefix_pat
= re
.compile(r
'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
591 _str_suffix_pat
= re
.compile(r
'"')
592 _str_str_pat
= re
.compile(r
'(?:(?:\\.)|[^"])*')
594 # Strings corresponding to escape sequence characters
595 _str_escape_seq_strs
= {
609 # Tries to parse a string, returning a string item on success.
610 def _try_parse_str(self
):
611 begin_text_loc
= self
._text
_loc
614 m
= self
._try
_parse
_pat
(self
._str
_prefix
_pat
)
623 if m
.group("len") is not None:
624 encoding
= "utf_{}_{}".format(m
.group("len"), m
.group("bo"))
627 m
= self
._expect
_pat
(self
._str
_str
_pat
, "Expecting a literal string")
629 # Expect end of string
630 self
._expect
_pat
(self
._str
_suffix
_pat
, 'Expecting `"` (end of literal string)')
632 # Replace escape sequences
635 for ec
in '0abefnrtv"\\':
636 val
= val
.replace(r
"\{}".format(ec
), self
._str
_escape
_seq
_strs
[ec
])
639 data
= val
.encode(encoding
)
642 return _Str(data
, begin_text_loc
)
644 # Patterns for _try_parse_group()
645 _group_prefix_pat
= re
.compile(r
"\(")
646 _group_suffix_pat
= re
.compile(r
"\)")
648 # Tries to parse a group, returning a group item on success.
649 def _try_parse_group(self
):
650 begin_text_loc
= self
._text
_loc
653 if self
._try
_parse
_pat
(self
._group
_prefix
_pat
) is None:
658 items
= self
._parse
_items
()
660 # Expect end of group
661 self
._skip
_ws
_and
_comments
()
663 self
._group
_suffix
_pat
, "Expecting an item or `)` (end of group)"
667 return _Group(items
, begin_text_loc
)
669 # Returns a stripped expression string and an AST expression node
670 # from the expression string `expr_str` at text location `text_loc`.
671 def _ast_expr_from_str(self
, expr_str
: str, text_loc
: TextLocation
):
672 # Create an expression node from the expression string
673 expr_str
= expr_str
.strip().replace("\n", " ")
676 expr
= ast
.parse(expr_str
, mode
="eval")
679 "Invalid expression `{}`: invalid syntax".format(expr_str
),
683 return expr_str
, expr
685 # Patterns for _try_parse_num_and_attr()
686 _val_expr_pat
= re
.compile(r
"([^}:]+):\s*")
687 _fl_num_len_attr_pat
= re
.compile(r
"8|16|24|32|40|48|56|64")
688 _leb128_int_attr_pat
= re
.compile(r
"(u|s)leb128")
690 # Tries to parse a value and attribute (fixed length in bits or
691 # `leb128`), returning a value item on success.
692 def _try_parse_num_and_attr(self
):
693 begin_text_loc
= self
._text
_loc
696 m_expr
= self
._try
_parse
_pat
(self
._val
_expr
_pat
)
702 # Create an expression node from the expression string
703 expr_str
, expr
= self
._ast
_expr
_from
_str
(m_expr
.group(1), begin_text_loc
)
706 m_attr
= self
._try
_parse
_pat
(self
._fl
_num
_len
_attr
_pat
)
710 m_attr
= self
._try
_parse
_pat
(self
._leb
128_int
_attr
_pat
)
713 # At this point it's invalid
715 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
718 # Return LEB128 integer item
719 cls
= _ULeb128Int
if m_attr
.group(1) == "u" else _SLeb128Int
720 return cls(expr_str
, expr
, begin_text_loc
)
722 # Return fixed-length number item
726 int(m_attr
.group(0)),
730 # Patterns for _try_parse_num_and_attr()
731 _var_assign_pat
= re
.compile(
732 r
"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat
.pattern
)
735 # Tries to parse a variable assignment, returning a variable
736 # assignment item on success.
737 def _try_parse_var_assign(self
):
738 begin_text_loc
= self
._text
_loc
741 m
= self
._try
_parse
_pat
(self
._var
_assign
_pat
)
748 name
= m
.group("name")
750 if name
== _icitte_name
:
752 "`{}` is a reserved variable name".format(_icitte_name
), begin_text_loc
755 if name
in self
._label
_names
:
756 _raise_error("Existing label named `{}`".format(name
), begin_text_loc
)
758 # Add to known variable names
759 self
._var
_names
.add(name
)
761 # Create an expression node from the expression string
762 expr_str
, expr
= self
._ast
_expr
_from
_str
(m
.group("expr"), begin_text_loc
)
772 # Pattern for _try_parse_set_bo()
773 _bo_pat
= re
.compile(r
"[bl]e")
775 # Tries to parse a byte order name, returning a byte order setting
777 def _try_parse_set_bo(self
):
778 begin_text_loc
= self
._text
_loc
781 m
= self
._try
_parse
_pat
(self
._bo
_pat
)
787 # Return corresponding item
788 if m
.group(0) == "be":
789 return _SetBo(ByteOrder
.BE
, begin_text_loc
)
791 assert m
.group(0) == "le"
792 return _SetBo(ByteOrder
.LE
, begin_text_loc
)
794 # Patterns for _try_parse_val_or_bo()
795 _val_var_assign_set_bo_prefix_pat
= re
.compile(r
"\{\s*")
796 _val_var_assign_set_bo_suffix_pat
= re
.compile(r
"\s*}")
798 # Tries to parse a value, a variable assignment, or a byte order
799 # setting, returning an item on success.
800 def _try_parse_val_or_var_assign_or_set_bo(self
):
802 if self
._try
_parse
_pat
(self
._val
_var
_assign
_set
_bo
_prefix
_pat
) is None:
806 # Variable assignment item?
807 item
= self
._try
_parse
_var
_assign
()
811 item
= self
._try
_parse
_num
_and
_attr
()
814 # Byte order setting item?
815 item
= self
._try
_parse
_set
_bo
()
818 # At this point it's invalid
820 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
824 self
._expect
_pat
(self
._val
_var
_assign
_set
_bo
_suffix
_pat
, "Expecting `}`")
827 # Common positive constant integer pattern
828 _pos_const_int_pat
= re
.compile(r
"0[Xx][A-Fa-f0-9]+|\d+")
830 # Tries to parse an offset setting value (after the initial `<`),
831 # returning an offset item on success.
832 def _try_parse_set_offset_val(self
):
833 begin_text_loc
= self
._text
_loc
836 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
843 return _SetOffset(int(m
.group(0), 0), begin_text_loc
)
845 # Tries to parse a label name (after the initial `<`), returning a
846 # label item on success.
847 def _try_parse_label_name(self
):
848 begin_text_loc
= self
._text
_loc
851 m
= self
._try
_parse
_pat
(_py_name_pat
)
860 if name
== _icitte_name
:
862 "`{}` is a reserved label name".format(_icitte_name
), begin_text_loc
865 if name
in self
._label
_names
:
866 _raise_error("Duplicate label name `{}`".format(name
), begin_text_loc
)
868 if name
in self
._var
_names
:
869 _raise_error("Existing variable named `{}`".format(name
), begin_text_loc
)
871 # Add to known label names
872 self
._label
_names
.add(name
)
875 return _Label(name
, begin_text_loc
)
877 # Patterns for _try_parse_label_or_set_offset()
878 _label_set_offset_prefix_pat
= re
.compile(r
"<\s*")
879 _label_set_offset_suffix_pat
= re
.compile(r
"\s*>")
881 # Tries to parse a label or an offset setting, returning an item on
883 def _try_parse_label_or_set_offset(self
):
885 if self
._try
_parse
_pat
(self
._label
_set
_offset
_prefix
_pat
) is None:
889 # Offset setting item?
890 item
= self
._try
_parse
_set
_offset
_val
()
894 item
= self
._try
_parse
_label
_name
()
897 # At this point it's invalid
898 self
._raise
_error
("Expecting a label name or an offset setting value")
901 self
._expect
_pat
(self
._label
_set
_offset
_suffix
_pat
, "Expecting `>`")
904 # Patterns for _try_parse_align_offset()
905 _align_offset_prefix_pat
= re
.compile(r
"@\s*")
906 _align_offset_val_pat
= re
.compile(r
"(\d+)\s*")
907 _align_offset_pad_val_prefix_pat
= re
.compile(r
"~\s*")
909 # Tries to parse an offset alignment, returning an offset alignment
911 def _try_parse_align_offset(self
):
912 begin_text_loc
= self
._text
_loc
915 if self
._try
_parse
_pat
(self
._align
_offset
_prefix
_pat
) is None:
919 align_text_loc
= self
._text
_loc
920 m
= self
._expect
_pat
(
921 self
._align
_offset
_val
_pat
,
922 "Expecting an alignment (positive multiple of eight bits)",
926 val
= int(m
.group(1))
928 if val
<= 0 or (val
% 8) != 0:
930 "Invalid alignment value {} (not a positive multiple of eight)".format(
939 if self
._try
_parse
_pat
(self
._align
_offset
_pad
_val
_prefix
_pat
) is not None:
940 pad_val_text_loc
= self
._text
_loc
941 m
= self
._expect
_pat
(self
._pos
_const
_int
_pat
, "Expecting a byte value")
944 pad_val
= int(m
.group(0), 0)
948 "Invalid padding byte value {}".format(pad_val
),
953 return _AlignOffset(val
, pad_val
, begin_text_loc
)
955 # Patterns for _expect_rep_mul_expr()
956 _rep_expr_prefix_pat
= re
.compile(r
"\{")
957 _rep_expr_pat
= re
.compile(r
"[^}p]+")
958 _rep_expr_suffix_pat
= re
.compile(r
"\}")
960 # Parses the multiplier expression of a repetition (block or
961 # post-item) and returns the expression string and AST node.
962 def _expect_rep_mul_expr(self
):
963 expr_text_loc
= self
._text
_loc
966 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
970 m
= self
._try
_parse
_pat
(_py_name_pat
)
974 if self
._try
_parse
_pat
(self
._rep
_expr
_prefix
_pat
) is None:
975 # At this point it's invalid
977 "Expecting a positive integral multiplier, a name, or `{`"
980 # Expect an expression
981 expr_text_loc
= self
._text
_loc
982 m
= self
._expect
_pat
(self
._rep
_expr
_pat
, "Expecting an expression")
983 expr_str
= m
.group(0)
986 self
._expect
_pat
(self
._rep
_expr
_suffix
_pat
, "Expecting `}`")
988 expr_str
= m
.group(0)
990 expr_str
= m
.group(0)
992 return self
._ast
_expr
_from
_str
(expr_str
, expr_text_loc
)
994 # Pattern for _try_parse_rep_block()
995 _rep_block_prefix_pat
= re
.compile(r
"!r(?:epeat)?\b\s*")
996 _rep_block_end_pat
= re
.compile(r
"!end\b\s*")
998 # Tries to parse a repetition block, returning a repetition item on
1000 def _try_parse_rep_block(self
):
1001 begin_text_loc
= self
._text
_loc
1004 if self
._try
_parse
_pat
(self
._rep
_block
_prefix
_pat
) is None:
1009 self
._skip
_ws
_and
_comments
()
1010 expr_str
, expr
= self
._expect
_rep
_mul
_expr
()
1013 self
._skip
_ws
_and
_comments
()
1014 items_text_loc
= self
._text
_loc
1015 items
= self
._parse
_items
()
1017 # Expect end of block
1018 self
._skip
_ws
_and
_comments
()
1020 self
._rep
_block
_end
_pat
, "Expecting an item or `!end` (end of repetition)"
1024 return _Rep(_Group(items
, items_text_loc
), expr_str
, expr
, begin_text_loc
)
1026 # Tries to parse a base item (anything except a repetition),
1027 # returning it on success.
1028 def _try_parse_base_item(self
):
1030 item
= self
._try
_parse
_byte
()
1032 if item
is not None:
1036 item
= self
._try
_parse
_str
()
1038 if item
is not None:
1041 # Value, variable assignment, or byte order setting item?
1042 item
= self
._try
_parse
_val
_or
_var
_assign
_or
_set
_bo
()
1044 if item
is not None:
1047 # Label or offset setting item?
1048 item
= self
._try
_parse
_label
_or
_set
_offset
()
1050 if item
is not None:
1053 # Offset alignment item?
1054 item
= self
._try
_parse
_align
_offset
()
1056 if item
is not None:
1060 item
= self
._try
_parse
_group
()
1062 if item
is not None:
1065 # Repetition (block) item?
1066 item
= self
._try
_parse
_rep
_block
()
1068 if item
is not None:
1071 # Pattern for _try_parse_rep_post()
1072 _rep_post_prefix_pat
= re
.compile(r
"\*")
1074 # Tries to parse a post-item repetition, returning the expression
1075 # string and AST expression node on success.
1076 def _try_parse_rep_post(self
):
1078 if self
._try
_parse
_pat
(self
._rep
_post
_prefix
_pat
) is None:
1082 # Return expression string and AST expression
1083 self
._skip
_ws
_and
_comments
()
1084 return self
._expect
_rep
_mul
_expr
()
1086 # Tries to parse an item, possibly followed by a repetition,
1087 # returning `True` on success.
1089 # Appends any parsed item to `items`.
1090 def _try_append_item(self
, items
: List
[_Item
]):
1091 self
._skip
_ws
_and
_comments
()
1094 item
= self
._try
_parse
_base
_item
()
1100 # Parse repetition if the base item is repeatable
1101 if isinstance(item
, _RepableItem
):
1102 self
._skip
_ws
_and
_comments
()
1103 rep_text_loc
= self
._text
_loc
1104 rep_ret
= self
._try
_parse
_rep
_post
()
1106 if rep_ret
is not None:
1107 item
= _Rep(item
, rep_ret
[0], rep_ret
[1], rep_text_loc
)
1112 # Parses and returns items, skipping whitespaces, insignificant
1113 # symbols, and comments when allowed, and stopping at the first
1114 # unknown character.
1115 def _parse_items(self
) -> List
[_Item
]:
1116 items
= [] # type: List[_Item]
1118 while self
._isnt
_done
():
1119 # Try to append item
1120 if not self
._try
_append
_item
(items
):
1121 # Unknown at this point
1126 # Parses the whole Normand input, setting `self._res` to the main
1127 # group item on success.
1129 if len(self
._normand
.strip()) == 0:
1130 # Special case to make sure there's something to consume
1131 self
._res
= _Group([], self
._text
_loc
)
1134 # Parse first level items
1135 items
= self
._parse
_items
()
1137 # Make sure there's nothing left
1138 self
._skip
_ws
_and
_comments
()
1140 if self
._isnt
_done
():
1142 "Unexpected character `{}`".format(self
._normand
[self
._at
])
1145 # Set main group item
1146 self
._res
= _Group(items
, self
._text
_loc
)
1149 # The return type of parse().
1155 variables
: VariablesT
,
1158 bo
: Optional
[ByteOrder
],
1160 self
= cls
.__new
__(cls
)
1161 self
._init
(data
, variables
, labels
, offset
, bo
)
1164 def __init__(self
, *args
, **kwargs
): # type: ignore
1165 raise NotImplementedError
1170 variables
: VariablesT
,
1173 bo
: Optional
[ByteOrder
],
1176 self
._vars
= variables
1177 self
._labels
= labels
1178 self
._offset
= offset
1186 # Dictionary of updated variable names to their last computed value.
1188 def variables(self
):
1191 # Dictionary of updated main group label names to their computed
1202 # Updated byte order.
1204 def byte_order(self
):
1208 # Raises a parse error for the item `item`, creating it using the
1210 def _raise_error_for_item(msg
: str, item
: _Item
) -> NoReturn
:
1211 _raise_error(msg
, item
.text_loc
)
1214 # The `ICITTE` reserved name.
1215 _icitte_name
= "ICITTE"
1218 # Base node visitor.
1220 # Calls the _visit_name() method for each name node which isn't the name
1222 class _NodeVisitor(ast
.NodeVisitor
):
1224 self
._parent
_is
_call
= False
1226 def generic_visit(self
, node
: ast
.AST
):
1227 if type(node
) is ast
.Call
:
1228 self
._parent
_is
_call
= True
1229 elif type(node
) is ast
.Name
and not self
._parent
_is
_call
:
1230 self
._visit
_name
(node
.id)
1232 super().generic_visit(node
)
1233 self
._parent
_is
_call
= False
1236 def _visit_name(self
, name
: str):
1240 # Expression validator: validates that all the names within the
1241 # expression are allowed.
1242 class _ExprValidator(_NodeVisitor
):
1243 def __init__(self
, item
: _ExprItemT
, allowed_names
: Set
[str]):
1246 self
._allowed
_names
= allowed_names
1248 def _visit_name(self
, name
: str):
1249 # Make sure the name refers to a known and reachable
1250 # variable/label name.
1251 if name
!= _icitte_name
and name
not in self
._allowed
_names
:
1252 msg
= "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1253 name
, self
._item
.expr_str
1256 allowed_names
= self
._allowed
_names
.copy()
1257 allowed_names
.add(_icitte_name
)
1259 if len(allowed_names
) > 0:
1260 allowed_names_str
= ", ".join(
1261 sorted(["`{}`".format(name
) for name
in allowed_names
])
1263 msg
+= "; the legal names are {{{}}}".format(allowed_names_str
)
1267 self
._item
.text_loc
,
1271 # Expression visitor getting all the contained names.
1272 class _ExprNamesVisitor(_NodeVisitor
):
1274 self
._parent
_is
_call
= False
1275 self
._names
= set() # type: Set[str]
1281 def _visit_name(self
, name
: str):
1282 self
._names
.add(name
)
1289 variables
: VariablesT
,
1292 bo
: Optional
[ByteOrder
],
1294 self
.variables
= variables
.copy()
1295 self
.labels
= labels
.copy()
1296 self
.offset
= offset
1300 # Generator of data and final state from a group item.
1302 # Generation happens in memory at construction time. After building, use
1303 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1304 # get the resulting context.
1306 # The steps of generation are:
1308 # 1. Validate that each repetition and LEB128 integer expression uses
1309 # only reachable names.
1311 # 2. Compute and keep the effective repetition count and LEB128 integer
1312 # value for each repetition and LEB128 integer instance.
1314 # 3. Generate bytes, updating the initial state as it goes which becomes
1315 # the final state after the operation.
1317 # During the generation, when handling a `_Rep` or `_Leb128Int` item,
1318 # we already have the effective repetition count or value of the
1321 # When handling a `_Group` item, first update the current labels with
1322 # all the immediate (not nested) labels, and then handle each
1323 # contained item. This gives contained item access to "future" outer
1324 # labels. Then remove the immediate labels from the state so that
1325 # outer items don't have access to inner labels.
1330 variables
: VariablesT
,
1333 bo
: Optional
[ByteOrder
],
1335 self
._validate
_vl
_exprs
(group
, set(variables
.keys()), set(labels
.keys()))
1336 self
._vl
_instance
_vals
= self
._compute
_vl
_instance
_vals
(
1337 group
, _GenState(variables
, labels
, offset
, bo
)
1339 self
._gen
(group
, _GenState(variables
, labels
, offset
, bo
))
1346 # Updated variables.
1348 def variables(self
):
1349 return self
._final
_state
.variables
1351 # Updated main group labels.
1354 return self
._final
_state
.labels
1359 return self
._final
_state
.offset
1361 # Updated byte order.
1364 return self
._final
_state
.bo
1366 # Returns the set of used, non-called names within the AST
1367 # expression `expr`.
1369 def _names_of_expr(expr
: ast
.Expression
):
1370 visitor
= _ExprNamesVisitor()
1372 return visitor
.names
1374 # Validates that all the repetition and LEB128 integer expressions
1375 # within `group` don't refer, directly or indirectly, to subsequent
1378 # The strategy here is to keep a set of allowed label names, per
1379 # group, initialized to `allowed_label_names`, and a set of allowed
1380 # variable names initialized to `allowed_variable_names`.
1382 # Then, depending on the type of `item`:
1385 # Add its name to the local allowed label names: a label
1386 # occurring before a repetition, and not within a nested group,
1387 # is always reachable.
1390 # If all the names within its expression are allowed, then add
1391 # its name to the allowed variable names.
1393 # Otherwise, remove its name from the allowed variable names (if
1394 # it's in there): a variable which refers to an unreachable name
1395 # is unreachable itself.
1397 # `_Rep` and `_Leb128`:
1398 # Make sure all the names within its expression are allowed.
1401 # Call this function for each contained item with a _copy_ of
1402 # the current allowed label names and the same current allowed
1405 def _validate_vl_exprs(
1406 item
: _Item
, allowed_variable_names
: Set
[str], allowed_label_names
: Set
[str]
1408 if type(item
) is _Label
:
1409 allowed_label_names
.add(item
.name
)
1410 elif type(item
) is _VarAssign
:
1411 # Check if this variable name is allowed
1414 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1416 allowed_label_names | allowed_variable_names | {_icitte_name}
1423 allowed_variable_names
.add(item
.name
)
1424 elif item
.name
in allowed_variable_names
:
1425 allowed_variable_names
.remove(item
.name
)
1426 elif isinstance(item
, _Leb128Int
):
1427 # Validate the expression
1428 _ExprValidator(item
, allowed_label_names | allowed_variable_names
).visit(
1431 elif type(item
) is _Rep
:
1432 # Validate the expression first
1433 _ExprValidator(item
, allowed_label_names | allowed_variable_names
).visit(
1437 # Validate inner item
1438 _Gen
._validate
_vl
_exprs
(
1439 item
.item
, allowed_variable_names
, allowed_label_names
1441 elif type(item
) is _Group
:
1442 # Copy `allowed_label_names` so that this frame cannot
1443 # access the nested label names.
1444 group_allowed_label_names
= allowed_label_names
.copy()
1446 for subitem
in item
.items
:
1447 _Gen
._validate
_vl
_exprs
(
1448 subitem
, allowed_variable_names
, group_allowed_label_names
1451 # Evaluates the expression of `item` considering the current
1452 # generation state `state`.
1454 # If `allow_float` is `True`, then the type of the result may be
1457 def _eval_item_expr(
1460 allow_float
: bool = False,
1462 syms
= {} # type: VariablesT
1463 syms
.update(state
.labels
)
1465 # Set the `ICITTE` name to the current offset
1466 syms
[_icitte_name
] = state
.offset
1468 # Add the current variables
1469 syms
.update(state
.variables
)
1471 # Validate the node and its children
1472 _ExprValidator(item
, set(syms
.keys())).visit(item
.expr
)
1474 # Compile and evaluate expression node
1476 val
= eval(compile(item
.expr
, "", "eval"), None, syms
)
1477 except Exception as exc
:
1478 _raise_error_for_item(
1479 "Failed to evaluate expression `{}`: {}".format(item
.expr_str
, exc
),
1483 # Validate result type
1484 expected_types
= {int}
# type: Set[type]
1488 expected_types
.add(float)
1489 type_msg
+= " or `float`"
1491 if type(val
) not in expected_types
:
1492 _raise_error_for_item(
1493 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1494 item
.expr_str
, type_msg
, type(val
).__name
__
1501 # Returns the size, in bytes, required to encode the value `val`
1502 # with LEB128 (signed version if `is_signed` is `True`).
1504 def _leb128_size_for_val(val
: int, is_signed
: bool):
1506 # Equivalent upper bound.
1508 # For example, if `val` is -128, then the full integer for
1509 # this number of bits would be [-128, 127].
1512 # Number of bits (add one for the sign if needed)
1513 bits
= val
.bit_length() + int(is_signed
)
1518 # Seven bits per byte
1519 return math
.ceil(bits
/ 7)
1521 # Returns the offset `offset` aligned according to `item`.
1523 def _align_offset(offset
: int, item
: _AlignOffset
):
1524 align_bytes
= item
.val
// 8
1525 return (offset
+ align_bytes
- 1) // align_bytes
* align_bytes
1527 # Computes the effective value for each repetition and LEB128
1528 # integer instance, filling `instance_vals` (if not `None`) and
1529 # returning `instance_vals`.
1531 # At this point it must be known that, for a given variable-length
1532 # item, its expression only contains reachable names.
1534 # When handling a `_Rep` item, this function appends its effective
1535 # multiplier to `instance_vals` _before_ handling its repeated item.
1537 # When handling a `_VarAssign` item, this function only evaluates it
1538 # if all its names are reachable.
1540 def _compute_vl_instance_vals(
1541 item
: _Item
, state
: _GenState
, instance_vals
: Optional
[List
[int]] = None
1543 if instance_vals
is None:
1546 if isinstance(item
, _ScalarItem
):
1547 state
.offset
+= item
.size
1548 elif type(item
) is _Label
:
1549 state
.labels
[item
.name
] = state
.offset
1550 elif type(item
) is _VarAssign
:
1551 # Check if all the names are reachable
1554 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1556 name
!= _icitte_name
1557 and name
not in state
.variables
1558 and name
not in state
.labels
1560 # A name is unknown: cannot evaluate
1565 # Evaluate the expression and keep the result
1566 state
.variables
[item
.name
] = _Gen
._eval
_item
_expr
(item
, state
, True)
1567 elif type(item
) is _SetOffset
:
1568 state
.offset
= item
.val
1569 elif type(item
) is _AlignOffset
:
1570 state
.offset
= _Gen
._align
_offset
(state
.offset
, item
)
1571 elif isinstance(item
, _Leb128Int
):
1572 # Evaluate the expression
1573 val
= _Gen
._eval
_item
_expr
(item
, state
)
1576 if type(item
) is _ULeb128Int
and val
< 0:
1577 _raise_error_for_item(
1578 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1584 # Add the evaluation result to the to variable-length item
1586 instance_vals
.append(val
)
1589 state
.offset
+= _Gen
._leb
128_size
_for
_val
(val
, type(item
) is _SLeb128Int
)
1590 elif type(item
) is _Rep
:
1591 # Evaluate the expression and keep the result
1592 val
= _Gen
._eval
_item
_expr
(item
, state
)
1596 _raise_error_for_item(
1597 "Invalid expression `{}`: unexpected negative result {:,}".format(
1603 # Add to repetition instance values
1604 instance_vals
.append(val
)
1606 # Process the repeated item `val` times
1607 for _
in range(val
):
1608 _Gen
._compute
_vl
_instance
_vals
(item
.item
, state
, instance_vals
)
1609 elif type(item
) is _Group
:
1610 prev_labels
= state
.labels
.copy()
1613 for subitem
in item
.items
:
1614 _Gen
._compute
_vl
_instance
_vals
(subitem
, state
, instance_vals
)
1616 state
.labels
= prev_labels
1618 return instance_vals
1620 def _update_offset_noop(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
1621 return next_vl_instance
1623 def _dry_handle_scalar_item(
1624 self
, item
: _ScalarItem
, state
: _GenState
, next_vl_instance
: int
1626 state
.offset
+= item
.size
1627 return next_vl_instance
1629 def _dry_handle_leb128_int_item(
1630 self
, item
: _Leb128Int
, state
: _GenState
, next_vl_instance
: int
1632 # Get the value from `self._vl_instance_vals` _before_
1633 # incrementing `next_vl_instance` to honor the order of
1634 # _compute_vl_instance_vals().
1635 state
.offset
+= self
._leb
128_size
_for
_val
(
1636 self
._vl
_instance
_vals
[next_vl_instance
], type(item
) is _SLeb128Int
1639 return next_vl_instance
+ 1
1641 def _dry_handle_group_item(
1642 self
, item
: _Group
, state
: _GenState
, next_vl_instance
: int
1644 for subitem
in item
.items
:
1645 next_vl_instance
= self
._dry
_handle
_item
(subitem
, state
, next_vl_instance
)
1647 return next_vl_instance
1649 def _dry_handle_rep_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1650 # Get the value from `self._vl_instance_vals` _before_
1651 # incrementing `next_vl_instance` to honor the order of
1652 # _compute_vl_instance_vals().
1653 mul
= self
._vl
_instance
_vals
[next_vl_instance
]
1654 next_vl_instance
+= 1
1656 for _
in range(mul
):
1657 next_vl_instance
= self
._dry
_handle
_item
(item
.item
, state
, next_vl_instance
)
1659 return next_vl_instance
1661 def _dry_handle_align_offset_item(
1662 self
, item
: _AlignOffset
, state
: _GenState
, next_vl_instance
: int
1664 state
.offset
= self
._align
_offset
(state
.offset
, item
)
1665 return next_vl_instance
1667 def _dry_handle_set_offset_item(
1668 self
, item
: _SetOffset
, state
: _GenState
, next_vl_instance
: int
1670 state
.offset
= item
.val
1671 return next_vl_instance
1673 # Updates `state.offset` considering the generated data of `item`,
1674 # without generating any, and returns the updated next
1675 # variable-length item instance.
1676 def _dry_handle_item(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
1677 return self
._dry
_handle
_item
_funcs
[type(item
)](item
, state
, next_vl_instance
)
1679 # Handles the byte item `item`.
1680 def _handle_byte_item(self
, item
: _Byte
, state
: _GenState
, next_vl_instance
: int):
1681 self
._data
.append(item
.val
)
1682 state
.offset
+= item
.size
1683 return next_vl_instance
1685 # Handles the string item `item`.
1686 def _handle_str_item(self
, item
: _Str
, state
: _GenState
, next_vl_instance
: int):
1687 self
._data
+= item
.data
1688 state
.offset
+= item
.size
1689 return next_vl_instance
1691 # Handles the byte order setting item `item`.
1692 def _handle_set_bo_item(
1693 self
, item
: _SetBo
, state
: _GenState
, next_vl_instance
: int
1695 # Update current byte order
1697 return next_vl_instance
1699 # Handles the variable assignment item `item`.
1700 def _handle_var_assign_item(
1701 self
, item
: _VarAssign
, state
: _GenState
, next_vl_instance
: int
1704 state
.variables
[item
.name
] = self
._eval
_item
_expr
(item
, state
, True)
1705 return next_vl_instance
1707 # Handles the fixed-length integer item `item`.
1708 def _handle_fl_int_item(self
, val
: int, item
: _FlNum
, state
: _GenState
):
1710 if val
< -(2 ** (item
.len - 1)) or val
> 2**item
.len - 1:
1711 _raise_error_for_item(
1712 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1713 val
, item
.len, item
.expr_str
, state
.offset
1718 # Encode result on 64 bits (to extend the sign bit whatever the
1719 # value of `item.len`).
1722 ">" if state
.bo
in (None, ByteOrder
.BE
) else "<",
1723 "Q" if val
>= 0 else "q",
1728 # Keep only the requested length
1729 len_bytes
= item
.len // 8
1731 if state
.bo
in (None, ByteOrder
.BE
):
1732 # Big endian: keep last bytes
1733 data
= data
[-len_bytes
:]
1735 # Little endian: keep first bytes
1736 assert state
.bo
== ByteOrder
.LE
1737 data
= data
[:len_bytes
]
1739 # Append to current bytes and update offset
1742 # Handles the fixed-length integer item `item`.
1743 def _handle_fl_float_item(self
, val
: float, item
: _FlNum
, state
: _GenState
):
1745 if item
.len not in (32, 64):
1746 _raise_error_for_item(
1747 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1754 self
._data
+= struct
.pack(
1756 ">" if state
.bo
in (None, ByteOrder
.BE
) else "<",
1757 "f" if item
.len == 32 else "d",
1762 # Handles the fixed-length number item `item`.
1763 def _handle_fl_num_item(
1764 self
, item
: _FlNum
, state
: _GenState
, next_vl_instance
: int
1767 val
= self
._eval
_item
_expr
(item
, state
, True)
1769 # Validate current byte order
1770 if state
.bo
is None and item
.len > 8:
1771 _raise_error_for_item(
1772 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1778 # Handle depending on type
1779 if type(val
) is int:
1780 self
._handle
_fl
_int
_item
(val
, item
, state
)
1782 assert type(val
) is float
1783 self
._handle
_fl
_float
_item
(val
, item
, state
)
1786 state
.offset
+= item
.size
1788 return next_vl_instance
1790 # Handles the LEB128 integer item `item`.
1791 def _handle_leb128_int_item(
1792 self
, item
: _Leb128Int
, state
: _GenState
, next_vl_instance
: int
1794 # Get the precomputed value
1795 val
= self
._vl
_instance
_vals
[next_vl_instance
]
1798 size
= self
._leb
128_size
_for
_val
(val
, type(item
) is _SLeb128Int
)
1801 for _
in range(size
):
1802 # Seven LSBs, MSB of the byte set (continue)
1803 self
._data
.append((val
& 0x7F) |
0x80)
1806 # Clear MSB of last byte (stop)
1807 self
._data
[-1] &= ~
0x80
1809 # Consumed this instance
1810 return next_vl_instance
+ 1
1812 # Handles the group item `item`, only removing the immediate labels
1813 # from `state.labels` if `remove_immediate_labels` is `True`.
1814 def _handle_group_item(
1818 next_vl_instance
: int,
1819 remove_immediate_labels
: bool = True,
1821 # Compute the values of the immediate (not nested) labels. Those
1822 # labels are reachable by any expression within the group.
1823 tmp_state
= _GenState({}, {}, state
.offset
, None)
1824 immediate_label_names
= set() # type: Set[str]
1825 tmp_next_vl_instance
= next_vl_instance
1827 for subitem
in item
.items
:
1828 if type(subitem
) is _Label
:
1829 # New immediate label
1830 state
.labels
[subitem
.name
] = tmp_state
.offset
1831 immediate_label_names
.add(subitem
.name
)
1833 tmp_next_vl_instance
= self
._dry
_handle
_item
(
1834 subitem
, tmp_state
, tmp_next_vl_instance
1837 # Handle each item now with the actual state
1838 for subitem
in item
.items
:
1839 next_vl_instance
= self
._handle
_item
(subitem
, state
, next_vl_instance
)
1841 # Remove immediate labels if required so that outer items won't
1842 # reach inner labels.
1843 if remove_immediate_labels
:
1844 for name
in immediate_label_names
:
1845 del state
.labels
[name
]
1847 return next_vl_instance
1849 # Handles the repetition item `item`.
1850 def _handle_rep_item(self
, item
: _Rep
, state
: _GenState
, next_vl_instance
: int):
1851 # Get the precomputed repetition count
1852 mul
= self
._vl
_instance
_vals
[next_vl_instance
]
1854 # Consumed this instance
1855 next_vl_instance
+= 1
1857 for _
in range(mul
):
1858 next_vl_instance
= self
._handle
_item
(item
.item
, state
, next_vl_instance
)
1860 return next_vl_instance
1862 # Handles the offset setting item `item`.
1863 def _handle_set_offset_item(
1864 self
, item
: _SetOffset
, state
: _GenState
, next_vl_instance
: int
1866 state
.offset
= item
.val
1867 return next_vl_instance
1869 # Handles offset alignment item `item` (adds padding).
1870 def _handle_align_offset_item(
1871 self
, item
: _AlignOffset
, state
: _GenState
, next_vl_instance
: int
1873 init_offset
= state
.offset
1874 state
.offset
= self
._align
_offset
(state
.offset
, item
)
1875 self
._data
+= bytes([item
.pad_val
] * (state
.offset
- init_offset
))
1876 return next_vl_instance
1878 # Handles the label item `item`.
1879 def _handle_label_item(self
, item
: _Label
, state
: _GenState
, next_vl_instance
: int):
1880 return next_vl_instance
1882 # Handles the item `item`, returning the updated next repetition
1884 def _handle_item(self
, item
: _Item
, state
: _GenState
, next_vl_instance
: int):
1885 return self
._item
_handlers
[type(item
)](item
, state
, next_vl_instance
)
1887 # Generates the data (`self._data`) and final state
1888 # (`self._final_state`) from `group` and the initial state `state`.
1889 def _gen(self
, group
: _Group
, state
: _GenState
):
1891 self
._data
= bytearray()
1894 self
._item
_handlers
= {
1895 _AlignOffset
: self
._handle
_align
_offset
_item
,
1896 _Byte
: self
._handle
_byte
_item
,
1897 _FlNum
: self
._handle
_fl
_num
_item
,
1898 _Group
: self
._handle
_group
_item
,
1899 _Label
: self
._handle
_label
_item
,
1900 _Rep
: self
._handle
_rep
_item
,
1901 _SetBo
: self
._handle
_set
_bo
_item
,
1902 _SetOffset
: self
._handle
_set
_offset
_item
,
1903 _SLeb128Int
: self
._handle
_leb
128_int
_item
,
1904 _Str
: self
._handle
_str
_item
,
1905 _ULeb128Int
: self
._handle
_leb
128_int
_item
,
1906 _VarAssign
: self
._handle
_var
_assign
_item
,
1907 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1909 # Dry item handlers (only updates the state offset)
1910 self
._dry
_handle
_item
_funcs
= {
1911 _AlignOffset
: self
._dry
_handle
_align
_offset
_item
,
1912 _Byte
: self
._dry
_handle
_scalar
_item
,
1913 _FlNum
: self
._dry
_handle
_scalar
_item
,
1914 _Group
: self
._dry
_handle
_group
_item
,
1915 _Label
: self
._update
_offset
_noop
,
1916 _Rep
: self
._dry
_handle
_rep
_item
,
1917 _SetBo
: self
._update
_offset
_noop
,
1918 _SetOffset
: self
._dry
_handle
_set
_offset
_item
,
1919 _SLeb128Int
: self
._dry
_handle
_leb
128_int
_item
,
1920 _Str
: self
._dry
_handle
_scalar
_item
,
1921 _ULeb128Int
: self
._dry
_handle
_leb
128_int
_item
,
1922 _VarAssign
: self
._update
_offset
_noop
,
1923 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1925 # Handle the group item, _not_ removing the immediate labels
1926 # because the `labels` property offers them.
1927 self
._handle
_group
_item
(group
, state
, 0, False)
1929 # This is actually the final state
1930 self
._final
_state
= state
1933 # Returns a `ParseResult` instance containing the bytes encoded by the
1934 # input string `normand`.
1936 # `init_variables` is a dictionary of initial variable names (valid
1937 # Python names) to integral values. A variable name must not be the
1938 # reserved name `ICITTE`.
1940 # `init_labels` is a dictionary of initial label names (valid Python
1941 # names) to integral values. A label name must not be the reserved name
1944 # `init_offset` is the initial offset.
1946 # `init_byte_order` is the initial byte order.
1948 # Raises `ParseError` on any parsing error.
1951 init_variables
: Optional
[VariablesT
] = None,
1952 init_labels
: Optional
[LabelsT
] = None,
1953 init_offset
: int = 0,
1954 init_byte_order
: Optional
[ByteOrder
] = None,
1956 if init_variables
is None:
1959 if init_labels
is None:
1963 _Parser(normand
, init_variables
, init_labels
).res
,
1969 return ParseResult
._create
( # pyright: ignore[reportPrivateUsage]
1970 gen
.data
, gen
.variables
, gen
.labels
, gen
.offset
, gen
.bo
1974 # Parses the command-line arguments.
1975 def _parse_cli_args():
1979 ap
= argparse
.ArgumentParser()
1986 help="initial offset (positive)",
1992 choices
=["be", "le"],
1994 help="initial byte order (`be` or `le`)",
2000 help="add an initial variable (may be repeated)",
2007 help="add an initial label (may be repeated)",
2010 "--version", action
="version", version
="Normand {}".format(__version__
)
2017 help="input path (none means standard input)",
2021 return ap
.parse_args()
2024 # Raises a command-line error with the message `msg`.
2025 def _raise_cli_error(msg
: str) -> NoReturn
:
2026 raise RuntimeError("Command-line error: {}".format(msg
))
2029 # Returns a dictionary of string to integers from the list of strings
2030 # `args` containing `NAME=VAL` entries.
2031 def _dict_from_arg(args
: Optional
[List
[str]]):
2032 d
= {} # type: LabelsT
2038 m
= re
.match(r
"({})=(\d+)$".format(_py_name_pat
.pattern
), arg
)
2041 _raise_cli_error("Invalid assignment {}".format(arg
))
2043 d
[m
.group(1)] = int(m
.group(2))
2048 # CLI entry point without exception handling.
2053 args
= _parse_cli_args()
2056 if args
.path
is None:
2057 normand
= sys
.stdin
.read()
2059 with
open(args
.path
) as f
:
2062 # Variables and labels
2063 variables
= typing
.cast(VariablesT
, _dict_from_arg(args
.var
))
2064 labels
= _dict_from_arg(args
.label
)
2068 _raise_cli_error("Invalid negative offset {}")
2070 # Validate and set byte order
2071 bo
= None # type: Optional[ByteOrder]
2073 if args
.byte_order
is not None:
2074 if args
.byte_order
== "be":
2077 assert args
.byte_order
== "le"
2082 res
= parse(normand
, variables
, labels
, args
.offset
, bo
)
2083 except ParseError
as exc
:
2086 if args
.path
is not None:
2087 prefix
= "{}:".format(os
.path
.abspath(args
.path
))
2090 "{}{}:{} - {}".format(
2091 prefix
, exc
.text_loc
.line_no
, exc
.text_loc
.col_no
, str(exc
)
2096 sys
.stdout
.buffer.write(res
.data
)
2099 # Prints the exception message `msg` and exits with status 1.
2100 def _fail(msg
: str) -> NoReturn
:
2101 if not msg
.endswith("."):
2104 print(msg
, file=sys
.stderr
)
2112 except Exception as exc
:
2116 if __name__
== "__main__":