1 # The MIT License (MIT)
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
28 # Feel free to copy this module file to your own project to use Normand.
30 # Upstream repository: <https://github.com/efficios/normand>.
32 __author__
= "Philippe Proulx"
65 # Text location (line and column numbers).
68 def _create(cls
, line_no
: int, col_no
: int):
69 self
= cls
.__new
__(cls
)
70 self
._init
(line_no
, col_no
)
73 def __init__(*args
, **kwargs
): # type: ignore
74 raise NotImplementedError
76 def _init(self
, line_no
: int, col_no
: int):
77 self
._line
_no
= line_no
91 return "TextLoc({}, {})".format(self
._line
_no
, self
._col
_no
)
96 def __init__(self
, text_loc
: TextLoc
):
97 self
._text
_loc
= text_loc
99 # Source text location.
102 return self
._text
_loc
106 class _ScalarItem(_Item
):
107 # Returns the size, in bytes, of this item.
110 def size(self
) -> int:
120 class _Byte(_ScalarItem
, _RepableItem
):
121 def __init__(self
, val
: int, text_loc
: TextLoc
):
122 super().__init
__(text_loc
)
135 return "_Byte({}, {})".format(hex(self
._val
), self
._text
_loc
)
139 class _Str(_ScalarItem
, _RepableItem
):
140 def __init__(self
, data
: bytes
, text_loc
: TextLoc
):
141 super().__init
__(text_loc
)
151 return len(self
._data
)
154 return "_Str({}, {})".format(repr(self
._data
), self
._text
_loc
)
159 class ByteOrder(enum
.Enum
):
167 # Byte order setting.
169 def __init__(self
, bo
: ByteOrder
, text_loc
: TextLoc
):
170 super().__init
__(text_loc
)
178 return "_SetBo({}, {})".format(repr(self
._bo
), self
._text
_loc
)
183 def __init__(self
, name
: str, text_loc
: TextLoc
):
184 super().__init
__(text_loc
)
193 return "_Label({}, {})".format(repr(self
._name
), self
._text
_loc
)
197 class _SetOffset(_Item
):
198 def __init__(self
, val
: int, text_loc
: TextLoc
):
199 super().__init
__(text_loc
)
208 return "_SetOffset({}, {})".format(repr(self
._val
), self
._text
_loc
)
211 # Mixin of containing an AST expression and its string.
213 def __init__(self
, expr_str
: str, expr
: ast
.Expression
):
214 self
._expr
_str
= expr_str
220 return self
._expr
_str
222 # Expression node to evaluate.
228 # Variable assignment.
229 class _VarAssign(_Item
, _ExprMixin
):
231 self
, name
: str, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLoc
233 super().__init
__(text_loc
)
234 _ExprMixin
.__init
__(self
, expr_str
, expr
)
243 return "_VarAssign({}, {}, {}, {})".format(
244 repr(self
._name
), repr(self
._expr
_str
), repr(self
._expr
), self
._text
_loc
248 # Value, possibly needing more than one byte.
249 class _Val(_ScalarItem
, _RepableItem
, _ExprMixin
):
251 self
, expr_str
: str, expr
: ast
.Expression
, len: int, text_loc
: TextLoc
253 super().__init
__(text_loc
)
254 _ExprMixin
.__init
__(self
, expr_str
, expr
)
264 return self
._len
// 8
267 return "_Val({}, {}, {}, {})".format(
268 repr(self
._expr
_str
), repr(self
._expr
), repr(self
._len
), self
._text
_loc
273 class _Group(_Item
, _RepableItem
):
274 def __init__(self
, items
: List
[_Item
], text_loc
: TextLoc
):
275 super().__init
__(text_loc
)
284 return "_Group({}, {})".format(repr(self
._items
), self
._text
_loc
)
288 class _Rep(_Item
, _ExprMixin
):
290 self
, item
: _Item
, expr_str
: str, expr
: ast
.Expression
, text_loc
: TextLoc
292 super().__init
__(text_loc
)
293 _ExprMixin
.__init
__(self
, expr_str
, expr
)
302 return "_Rep({}, {}, {}, {})".format(
303 repr(self
._item
), repr(self
._expr
_str
), repr(self
._expr
), self
._text
_loc
307 # Expression item type.
308 _ExprItemT
= Union
[_Val
, _VarAssign
, _Rep
]
311 # A parsing error containing a message and a text location.
312 class ParseError(RuntimeError):
314 def _create(cls
, msg
: str, text_loc
: TextLoc
):
315 self
= cls
.__new
__(cls
)
316 self
._init
(msg
, text_loc
)
319 def __init__(self
, *args
, **kwargs
): # type: ignore
320 raise NotImplementedError
322 def _init(self
, msg
: str, text_loc
: TextLoc
):
323 super().__init
__(msg
)
324 self
._text
_loc
= text_loc
326 # Source text location.
329 return self
._text
_loc
332 # Raises a parsing error, forwarding the parameters to the constructor.
333 def _raise_error(msg
: str, text_loc
: TextLoc
) -> NoReturn
:
334 raise ParseError
._create
(msg
, text_loc
) # pyright: ignore[reportPrivateUsage]
337 # Variable/label dictionary type.
338 VarsT
= Dict
[str, int]
341 # Python name pattern.
342 _py_name_pat
= re
.compile(r
"[a-zA-Z_][a-zA-Z0-9_]*")
347 # The constructor accepts a Normand input. After building, use the `res`
348 # property to get the resulting main group.
350 # Builds a parser to parse the Normand input `normand`, parsing
352 def __init__(self
, normand
: str, variables
: VarsT
, labels
: VarsT
):
353 self
._normand
= normand
357 self
._label
_names
= set(labels
.keys())
358 self
._var
_names
= set(variables
.keys())
361 # Result (main group).
366 # Current text location.
369 return TextLoc
._create
( # pyright: ignore[reportPrivateUsage]
370 self
._line
_no
, self
._col
_no
373 # Returns `True` if this parser is done parsing.
375 return self
._at
== len(self
._normand
)
377 # Returns `True` if this parser isn't done parsing.
378 def _isnt_done(self
):
379 return not self
._is
_done
()
381 # Raises a parse error, creating it using the message `msg` and the
382 # current text location.
383 def _raise_error(self
, msg
: str) -> NoReturn
:
384 _raise_error(msg
, self
._text
_loc
)
386 # Tries to make the pattern `pat` match the current substring,
387 # returning the match object and updating `self._at`,
388 # `self._line_no`, and `self._col_no` on success.
389 def _try_parse_pat(self
, pat
: Pattern
[str]):
390 m
= pat
.match(self
._normand
, self
._at
)
395 # Skip matched string
396 self
._at
+= len(m
.group(0))
399 self
._line
_no
+= m
.group(0).count("\n")
401 # Update column number
402 for i
in reversed(range(self
._at
)):
403 if self
._normand
[i
] == "\n" or i
== 0:
405 self
._col
_no
= self
._at
+ 1
407 self
._col
_no
= self
._at
- i
411 # Return match object
414 # Expects the pattern `pat` to match the current substring,
415 # returning the match object and updating `self._at`,
416 # `self._line_no`, and `self._col_no` on success, or raising a parse
417 # error with the message `error_msg` on error.
418 def _expect_pat(self
, pat
: Pattern
[str], error_msg
: str):
420 m
= self
._try
_parse
_pat
(pat
)
424 self
._raise
_error
(error_msg
)
426 # Return match object
429 # Pattern for _skip_ws_and_comments()
430 _ws_or_syms_or_comments_pat
= re
.compile(
431 r
"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
434 # Skips as many whitespaces, insignificant symbol characters, and
435 # comments as possible.
436 def _skip_ws_and_comments(self
):
437 self
._try
_parse
_pat
(self
._ws
_or
_syms
_or
_comments
_pat
)
439 # Pattern for _try_parse_hex_byte()
440 _nibble_pat
= re
.compile(r
"[A-Fa-f0-9]")
442 # Tries to parse a hexadecimal byte, returning a byte item on
444 def _try_parse_hex_byte(self
):
445 begin_text_loc
= self
._text
_loc
447 # Match initial nibble
448 m_high
= self
._try
_parse
_pat
(self
._nibble
_pat
)
454 # Expect another nibble
455 self
._skip
_ws
_and
_comments
()
456 m_low
= self
._expect
_pat
(
457 self
._nibble
_pat
, "Expecting another hexadecimal nibble"
461 return _Byte(int(m_high
.group(0) + m_low
.group(0), 16), begin_text_loc
)
463 # Patterns for _try_parse_bin_byte()
464 _bin_byte_bit_pat
= re
.compile(r
"[01]")
465 _bin_byte_prefix_pat
= re
.compile(r
"%")
467 # Tries to parse a binary byte, returning a byte item on success.
468 def _try_parse_bin_byte(self
):
469 begin_text_loc
= self
._text
_loc
472 if self
._try
_parse
_pat
(self
._bin
_byte
_prefix
_pat
) is None:
477 bits
= [] # type: List[str]
480 self
._skip
_ws
_and
_comments
()
481 m
= self
._expect
_pat
(self
._bin
_byte
_bit
_pat
, "Expecting a bit (`0` or `1`)")
482 bits
.append(m
.group(0))
485 return _Byte(int("".join(bits
), 2), begin_text_loc
)
487 # Patterns for _try_parse_dec_byte()
488 _dec_byte_prefix_pat
= re
.compile(r
"\$\s*")
489 _dec_byte_val_pat
= re
.compile(r
"(?P<neg>-?)(?P<val>\d+)")
491 # Tries to parse a decimal byte, returning a byte item on success.
492 def _try_parse_dec_byte(self
):
493 begin_text_loc
= self
._text
_loc
496 if self
._try
_parse
_pat
(self
._dec
_byte
_prefix
_pat
) is None:
501 m
= self
._expect
_pat
(self
._dec
_byte
_val
_pat
, "Expecting a decimal constant")
504 val
= int(m
.group("val")) * (-1 if m
.group("neg") == "-" else 1)
507 if val
< -128 or val
> 255:
508 _raise_error("Invalid decimal byte value {}".format(val
), begin_text_loc
)
514 return _Byte(val
, begin_text_loc
)
516 # Tries to parse a byte, returning a byte item on success.
517 def _try_parse_byte(self
):
519 item
= self
._try
_parse
_hex
_byte
()
525 item
= self
._try
_parse
_bin
_byte
()
531 item
= self
._try
_parse
_dec
_byte
()
536 # Patterns for _try_parse_str()
537 _str_prefix_pat
= re
.compile(r
'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
538 _str_suffix_pat
= re
.compile(r
'"')
539 _str_str_pat
= re
.compile(r
'(?:(?:\\.)|[^"])*')
541 # Strings corresponding to escape sequence characters
542 _str_escape_seq_strs
= {
556 # Tries to parse a string, returning a string item on success.
557 def _try_parse_str(self
):
558 begin_text_loc
= self
._text
_loc
561 m
= self
._try
_parse
_pat
(self
._str
_prefix
_pat
)
570 if m
.group("len") is not None:
571 encoding
= "utf_{}_{}".format(m
.group("len"), m
.group("bo"))
574 m
= self
._expect
_pat
(self
._str
_str
_pat
, "Expecting a literal string")
576 # Expect end of string
577 self
._expect
_pat
(self
._str
_suffix
_pat
, 'Expecting `"` (end of literal string)')
579 # Replace escape sequences
582 for ec
in '0abefnrtv"\\':
583 val
= val
.replace(r
"\{}".format(ec
), self
._str
_escape
_seq
_strs
[ec
])
586 data
= val
.encode(encoding
)
589 return _Str(data
, begin_text_loc
)
591 # Patterns for _try_parse_group()
592 _group_prefix_pat
= re
.compile(r
"\(")
593 _group_suffix_pat
= re
.compile(r
"\)")
595 # Tries to parse a group, returning a group item on success.
596 def _try_parse_group(self
):
597 begin_text_loc
= self
._text
_loc
600 if self
._try
_parse
_pat
(self
._group
_prefix
_pat
) is None:
605 items
= self
._parse
_items
()
607 # Expect end of group
608 self
._skip
_ws
_and
_comments
()
610 self
._group
_suffix
_pat
, "Expecting an item or `)` (end of group)"
614 return _Group(items
, begin_text_loc
)
616 # Returns a stripped expression string and an AST expression node
617 # from the expression string `expr_str` at text location `text_loc`.
618 def _ast_expr_from_str(self
, expr_str
: str, text_loc
: TextLoc
):
619 # Create an expression node from the expression string
620 expr_str
= expr_str
.strip().replace("\n", " ")
623 expr
= ast
.parse(expr_str
, mode
="eval")
626 "Invalid expression `{}`: invalid syntax".format(expr_str
),
630 return expr_str
, expr
632 # Patterns for _try_parse_val_and_len()
633 _val_expr_pat
= re
.compile(r
"([^}:]+):")
634 _val_len_pat
= re
.compile(r
"\s*(8|16|24|32|40|48|56|64)")
636 # Tries to parse a value and length, returning a value item on
638 def _try_parse_val_and_len(self
):
639 begin_text_loc
= self
._text
_loc
642 m_expr
= self
._try
_parse
_pat
(self
._val
_expr
_pat
)
649 m_len
= self
._expect
_pat
(
650 self
._val
_len
_pat
, "Expecting a length (multiple of eight bits)"
653 # Create an expression node from the expression string
654 expr_str
, expr
= self
._ast
_expr
_from
_str
(m_expr
.group(1), begin_text_loc
)
664 # Patterns for _try_parse_val_and_len()
665 _var_assign_pat
= re
.compile(
666 r
"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat
.pattern
)
669 # Tries to parse a variable assignment, returning a variable
670 # assignment item on success.
671 def _try_parse_var_assign(self
):
672 begin_text_loc
= self
._text
_loc
675 m
= self
._try
_parse
_pat
(self
._var
_assign
_pat
)
682 name
= m
.group("name")
684 if name
== _icitte_name
:
686 "`{}` is a reserved variable name".format(_icitte_name
), begin_text_loc
689 if name
in self
._label
_names
:
690 _raise_error("Existing label named `{}`".format(name
), begin_text_loc
)
692 # Add to known variable names
693 self
._var
_names
.add(name
)
695 # Create an expression node from the expression string
696 expr_str
, expr
= self
._ast
_expr
_from
_str
(m
.group("expr"), begin_text_loc
)
706 # Pattern for _try_parse_set_bo()
707 _bo_pat
= re
.compile(r
"[bl]e")
709 # Tries to parse a byte order name, returning a byte order setting
711 def _try_parse_set_bo(self
):
712 begin_text_loc
= self
._text
_loc
715 m
= self
._try
_parse
_pat
(self
._bo
_pat
)
721 # Return corresponding item
722 if m
.group(0) == "be":
723 return _SetBo(ByteOrder
.BE
, begin_text_loc
)
725 assert m
.group(0) == "le"
726 return _SetBo(ByteOrder
.LE
, begin_text_loc
)
728 # Patterns for _try_parse_val_or_bo()
729 _val_var_assign_set_bo_prefix_pat
= re
.compile(r
"\{\s*")
730 _val_var_assign_set_bo_suffix_pat
= re
.compile(r
"\s*}")
732 # Tries to parse a value, a variable assignment, or a byte order
733 # setting, returning an item on success.
734 def _try_parse_val_or_var_assign_or_set_bo(self
):
736 if self
._try
_parse
_pat
(self
._val
_var
_assign
_set
_bo
_prefix
_pat
) is None:
740 # Variable assignment item?
741 item
= self
._try
_parse
_var
_assign
()
745 item
= self
._try
_parse
_val
_and
_len
()
748 # Byte order setting item?
749 item
= self
._try
_parse
_set
_bo
()
752 # At this point it's invalid
754 "Expecting a value, a variable assignment, or a byte order setting"
758 self
._expect
_pat
(self
._val
_var
_assign
_set
_bo
_suffix
_pat
, "Expecting `}`")
761 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
762 _pos_const_int_pat
= re
.compile(r
"0[Xx][A-Fa-f0-9]+|\d+")
764 # Tries to parse an offset setting value (after the initial `<`),
765 # returning an offset item on success.
766 def _try_parse_set_offset_val(self
):
767 begin_text_loc
= self
._text
_loc
770 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
777 return _SetOffset(int(m
.group(0), 0), begin_text_loc
)
779 # Tries to parse a label name (after the initial `<`), returning a
780 # label item on success.
781 def _try_parse_label_name(self
):
782 begin_text_loc
= self
._text
_loc
785 m
= self
._try
_parse
_pat
(_py_name_pat
)
794 if name
== _icitte_name
:
796 "`{}` is a reserved label name".format(_icitte_name
), begin_text_loc
799 if name
in self
._label
_names
:
800 _raise_error("Duplicate label name `{}`".format(name
), begin_text_loc
)
802 if name
in self
._var
_names
:
803 _raise_error("Existing variable named `{}`".format(name
), begin_text_loc
)
805 # Add to known label names
806 self
._label
_names
.add(name
)
809 return _Label(name
, begin_text_loc
)
811 # Patterns for _try_parse_label_or_set_offset()
812 _label_set_offset_prefix_pat
= re
.compile(r
"<\s*")
813 _label_set_offset_suffix_pat
= re
.compile(r
"\s*>")
815 # Tries to parse a label or an offset setting, returning an item on
817 def _try_parse_label_or_set_offset(self
):
819 if self
._try
_parse
_pat
(self
._label
_set
_offset
_prefix
_pat
) is None:
823 # Offset setting item?
824 item
= self
._try
_parse
_set
_offset
_val
()
828 item
= self
._try
_parse
_label
_name
()
831 # At this point it's invalid
832 self
._raise
_error
("Expecting a label name or an offset setting value")
835 self
._expect
_pat
(self
._label
_set
_offset
_suffix
_pat
, "Expecting `>`")
838 # Tries to parse a base item (anything except a repetition),
839 # returning it on success.
840 def _try_parse_base_item(self
):
842 item
= self
._try
_parse
_byte
()
848 item
= self
._try
_parse
_str
()
853 # Value, variable assignment, or byte order setting item?
854 item
= self
._try
_parse
_val
_or
_var
_assign
_or
_set
_bo
()
859 # Label or offset setting item?
860 item
= self
._try
_parse
_label
_or
_set
_offset
()
866 item
= self
._try
_parse
_group
()
871 # Pattern for _try_parse_rep()
872 _rep_prefix_pat
= re
.compile(r
"\*\s*")
873 _rep_expr_prefix_pat
= re
.compile(r
"\{")
874 _rep_expr_pat
= re
.compile(r
"[^}p]+")
875 _rep_expr_suffix_pat
= re
.compile(r
"\}")
877 # Tries to parse a repetition, returning the expression string and
878 # AST expression node on success.
879 def _try_parse_rep(self
):
881 if self
._try
_parse
_pat
(self
._rep
_prefix
_pat
) is None:
885 # Expect and return a decimal multiplier
886 self
._skip
_ws
_and
_comments
()
889 m
= self
._try
_parse
_pat
(self
._pos
_const
_int
_pat
)
893 if self
._try
_parse
_pat
(self
._rep
_expr
_prefix
_pat
) is None:
894 # At this point it's invalid
895 self
._raise
_error
("Expecting a positive integral multiplier or `{`")
897 # Expect an expression
898 expr_str_loc
= self
._text
_loc
899 m
= self
._expect
_pat
(self
._rep
_expr
_pat
, "Expecting an expression")
900 expr_str
= self
._ast
_expr
_from
_str
(m
.group(0), expr_str_loc
)
903 self
._expect
_pat
(self
._rep
_expr
_suffix
_pat
, "Expecting `}`")
904 expr_str
= m
.group(0)
906 expr_str_loc
= self
._text
_loc
907 expr_str
= m
.group(0)
909 return self
._ast
_expr
_from
_str
(expr_str
, expr_str_loc
)
911 # Tries to parse an item, possibly followed by a repetition,
912 # returning `True` on success.
914 # Appends any parsed item to `items`.
915 def _try_append_item(self
, items
: List
[_Item
]):
916 self
._skip
_ws
_and
_comments
()
919 item
= self
._try
_parse
_base
_item
()
925 # Parse repetition if the base item is repeatable
926 if isinstance(item
, _RepableItem
):
927 self
._skip
_ws
_and
_comments
()
928 rep_text_loc
= self
._text
_loc
929 rep_ret
= self
._try
_parse
_rep
()
931 if rep_ret
is not None:
932 item
= _Rep(item
, rep_ret
[0], rep_ret
[1], rep_text_loc
)
937 # Parses and returns items, skipping whitespaces, insignificant
938 # symbols, and comments when allowed, and stopping at the first
940 def _parse_items(self
) -> List
[_Item
]:
941 items
= [] # type: List[_Item]
943 while self
._isnt
_done
():
945 if not self
._try
_append
_item
(items
):
946 # Unknown at this point
951 # Parses the whole Normand input, setting `self._res` to the main
952 # group item on success.
954 if len(self
._normand
.strip()) == 0:
955 # Special case to make sure there's something to consume
956 self
._res
= _Group([], self
._text
_loc
)
959 # Parse first level items
960 items
= self
._parse
_items
()
962 # Make sure there's nothing left
963 self
._skip
_ws
_and
_comments
()
965 if self
._isnt
_done
():
967 "Unexpected character `{}`".format(self
._normand
[self
._at
])
970 # Set main group item
971 self
._res
= _Group(items
, self
._text
_loc
)
974 # The return type of parse().
983 bo
: Optional
[ByteOrder
],
985 self
= cls
.__new
__(cls
)
986 self
._init
(data
, variables
, labels
, offset
, bo
)
989 def __init__(self
, *args
, **kwargs
): # type: ignore
990 raise NotImplementedError
998 bo
: Optional
[ByteOrder
],
1001 self
._vars
= variables
1002 self
._labels
= labels
1003 self
._offset
= offset
1011 # Dictionary of updated variable names to their last computed value.
1013 def variables(self
):
1016 # Dictionary of updated main group label names to their computed
1027 # Updated byte order.
1029 def byte_order(self
):
1033 # Raises a parse error for the item `item`, creating it using the
1035 def _raise_error_for_item(msg
: str, item
: _Item
) -> NoReturn
:
1036 _raise_error(msg
, item
.text_loc
)
1039 # The `ICITTE` reserved name.
1040 _icitte_name
= "ICITTE"
1043 # Base node visitor.
1045 # Calls the _visit_name() method for each name node which isn't the name
1047 class _NodeVisitor(ast
.NodeVisitor
):
1049 self
._parent
_is
_call
= False
1051 def generic_visit(self
, node
: ast
.AST
):
1052 if type(node
) is ast
.Call
:
1053 self
._parent
_is
_call
= True
1054 elif type(node
) is ast
.Name
and not self
._parent
_is
_call
:
1055 self
._visit
_name
(node
.id)
1057 super().generic_visit(node
)
1058 self
._parent
_is
_call
= False
1061 def _visit_name(self
, name
: str):
1065 # Expression validator: validates that all the names within the
1066 # expression are allowed.
1067 class _ExprValidator(_NodeVisitor
):
1068 def __init__(self
, item
: _ExprItemT
, allowed_names
: Set
[str], icitte_allowed
: bool):
1071 self
._allowed
_names
= allowed_names
1072 self
._icitte
_allowed
= icitte_allowed
1074 def _visit_name(self
, name
: str):
1075 # Make sure the name refers to a known and reachable
1076 # variable/label name.
1077 if name
== _icitte_name
and not self
._icitte
_allowed
:
1079 "Illegal reserved name `{}` in expression `{}`".format(
1080 _icitte_name
, self
._item
.expr_str
1082 self
._item
.text_loc
,
1084 elif name
!= _icitte_name
and name
not in self
._allowed
_names
:
1085 msg
= "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1086 name
, self
._item
.expr_str
1089 if len(self
._allowed
_names
) > 0:
1090 allowed_names
= self
._allowed
_names
.copy()
1092 if self
._icitte
_allowed
:
1093 allowed_names
.add(_icitte_name
)
1095 allowed_names_str
= ", ".join(
1096 sorted(["`{}`".format(name
) for name
in allowed_names
])
1098 msg
+= "; the legal names are {{{}}}".format(allowed_names_str
)
1102 self
._item
.text_loc
,
1106 # Expression visitor getting all the contained names.
1107 class _ExprNamesVisitor(_NodeVisitor
):
1109 self
._parent
_is
_call
= False
1110 self
._names
= set() # type: Set[str]
1116 def _visit_name(self
, name
: str):
1117 self
._names
.add(name
)
1123 self
, variables
: VarsT
, labels
: VarsT
, offset
: int, bo
: Optional
[ByteOrder
]
1125 self
.variables
= variables
.copy()
1126 self
.labels
= labels
.copy()
1127 self
.offset
= offset
1131 # Generator of data and final state from a group item.
1133 # Generation happens in memory at construction time. After building, use
1134 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1135 # get the resulting context.
1137 # The steps of generation are:
1139 # 1. Validate that each repetition expression uses only reachable names
1142 # 2. Compute and keep the effective repetition count for each repetition
1145 # 3. Generate bytes, updating the initial state as it goes which becomes
1146 # the final state after the operation.
1148 # During the generation, when handling a `_Rep` item, we already have
1149 # the effective repetition count of the instance.
1151 # When handling a `_Group` item, first update the current labels with
1152 # all the immediate (not nested) labels, and then handle each
1153 # contained item. This gives contained item access to "future" outer
1154 # labels. Then remove the immediate labels from the state so that
1155 # outer items don't have access to inner labels.
1163 bo
: Optional
[ByteOrder
],
1165 self
._validate
_rep
_exprs
(group
, set(variables
.keys()), set(labels
.keys()))
1166 self
._rep
_instance
_vals
= self
._compute
_rep
_instance
_vals
(
1167 group
, _GenState(variables
, labels
, offset
, bo
)
1169 self
._gen
(group
, _GenState(variables
, labels
, offset
, bo
))
1176 # Updated variables.
1178 def variables(self
):
1179 return self
._final
_state
.variables
1181 # Updated main group labels.
1184 return self
._final
_state
.labels
1189 return self
._final
_state
.offset
1191 # Updated byte order.
1194 return self
._final
_state
.bo
1196 # Returns the set of used, non-called names within the AST
1197 # expression `expr`.
1199 def _names_of_expr(expr
: ast
.Expression
):
1200 visitor
= _ExprNamesVisitor()
1202 return visitor
.names
1204 # Validates that all the repetition expressions within `group` don't
1205 # refer, directly or indirectly, to subsequent labels.
1207 # The strategy here is to keep a set of allowed label names, per
1208 # group, initialized to `allowed_label_names`, and a set of allowed
1209 # variable names initialized to `allowed_variable_names`.
1211 # Then, depending on the type of `item`:
1214 # Add its name to the local allowed label names: a label
1215 # occurring before a repetition, and not within a nested group,
1216 # is always reachable.
1219 # If all the names within its expression are allowed, then add
1220 # its name to the allowed variable names.
1222 # Otherwise, remove its name from the allowed variable names (if
1223 # it's in there): a variable which refers to an unreachable name
1224 # is unreachable itself.
1227 # Make sure all the names within its expression are allowed.
1230 # Call this function for each contained item with a _copy_ of
1231 # the current allowed label names and the same current allowed
1234 def _validate_rep_exprs(
1235 item
: _Item
, allowed_variable_names
: Set
[str], allowed_label_names
: Set
[str]
1237 if type(item
) is _Label
:
1238 allowed_label_names
.add(item
.name
)
1239 elif type(item
) is _VarAssign
:
1240 # Check if this variable name is allowed
1243 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1245 allowed_label_names | allowed_variable_names | {_icitte_name}
1252 allowed_variable_names
.add(item
.name
)
1253 elif item
.name
in allowed_variable_names
:
1254 allowed_variable_names
.remove(item
.name
)
1255 elif type(item
) is _Rep
:
1256 # Validate the expression first
1258 item
, allowed_label_names | allowed_variable_names
, False
1261 # Validate inner item
1262 _Gen
._validate
_rep
_exprs
(
1263 item
.item
, allowed_variable_names
, allowed_label_names
1265 elif type(item
) is _Group
:
1266 # Copy `allowed_label_names` so that this frame cannot
1267 # access the nested label names.
1268 group_allowed_label_names
= allowed_label_names
.copy()
1270 for subitem
in item
.items
:
1271 _Gen
._validate
_rep
_exprs
(
1272 subitem
, allowed_variable_names
, group_allowed_label_names
1275 # Evaluates the expression of `item` considering the current
1276 # generation state `state`.
1278 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1279 # for the expression to evaluate.
1281 def _eval_item_expr(item
: _ExprItemT
, state
: _GenState
, allow_icitte
: bool):
1282 syms
= state
.labels
.copy()
1284 # Set the `ICITTE` name to the current offset, if any
1286 syms
[_icitte_name
] = state
.offset
1288 # Add the current variables
1289 syms
.update(state
.variables
)
1291 # Validate the node and its children
1292 _ExprValidator(item
, set(syms
.keys()), True).visit(item
.expr
)
1294 # Compile and evaluate expression node
1296 val
= eval(compile(item
.expr
, "", "eval"), None, syms
)
1297 except Exception as exc
:
1298 _raise_error_for_item(
1299 "Failed to evaluate expression `{}`: {}".format(item
.expr_str
, exc
),
1304 if type(val
) is not int:
1305 _raise_error_for_item(
1306 "Invalid expression `{}`: expecting result type `int`, not `{}`".format(
1307 item
.expr_str
, type(val
).__name
__
1314 # Computes the effective value (multiplier) for each repetition
1315 # instance, filling `instance_vals` (if not `None`) and returning
1318 # At this point it must be known that, for a given repetition, its
1319 # expression only contains reachable names.
1321 # When handling a `_Rep` item, this function appends its effective
1322 # multiplier to `instance_vals` _before_ handling its repeated item.
1324 # When handling a `_VarAssign` item, this function only evaluates it if
1325 # all its names are reachable.
1327 def _compute_rep_instance_vals(
1328 item
: _Item
, state
: _GenState
, instance_vals
: Optional
[List
[int]] = None
1330 if instance_vals
is None:
1333 if isinstance(item
, _ScalarItem
):
1334 state
.offset
+= item
.size
1335 elif type(item
) is _Label
:
1336 state
.labels
[item
.name
] = state
.offset
1337 elif type(item
) is _VarAssign
:
1338 # Check if all the names are reachable
1341 for name
in _Gen
._names
_of
_expr
(item
.expr
):
1343 name
!= _icitte_name
1344 and name
not in state
.variables
1345 and name
not in state
.labels
1347 # A name is unknown: cannot evaluate
1352 # Evaluate the expression and keep the result
1353 state
.variables
[item
.name
] = _Gen
._eval
_item
_expr
(item
, state
, True)
1354 elif type(item
) is _SetOffset
:
1355 state
.offset
= item
.val
1356 elif type(item
) is _Rep
:
1357 # Evaluate the expression and keep the result
1358 val
= _Gen
._eval
_item
_expr
(item
, state
, False)
1362 _raise_error_for_item(
1363 "Invalid expression `{}`: unexpected negative result {:,}".format(
1369 # Add to repetition instance values
1370 instance_vals
.append(val
)
1372 # Process the repeated item `val` times
1373 for _
in range(val
):
1374 _Gen
._compute
_rep
_instance
_vals
(item
.item
, state
, instance_vals
)
1375 elif type(item
) is _Group
:
1376 prev_labels
= state
.labels
.copy()
1379 for subitem
in item
.items
:
1380 _Gen
._compute
_rep
_instance
_vals
(subitem
, state
, instance_vals
)
1382 state
.labels
= prev_labels
1384 return instance_vals
1386 def _zero_item_size(self
, item
: _Item
, next_rep_instance
: int):
1387 return 0, next_rep_instance
1389 def _scalar_item_size(self
, item
: _ScalarItem
, next_rep_instance
: int):
1390 return item
.size
, next_rep_instance
1392 def _group_item_size(self
, item
: _Group
, next_rep_instance
: int):
1395 for subitem
in item
.items
:
1396 subitem_size
, next_rep_instance
= self
._item
_size
(
1397 subitem
, next_rep_instance
1399 size
+= subitem_size
1401 return size
, next_rep_instance
1403 def _rep_item_size(self
, item
: _Rep
, next_rep_instance
: int):
1404 # Get the value from `self._rep_instance_vals` _before_
1405 # incrementing `next_rep_instance` to honor the order of
1406 # _compute_rep_instance_vals().
1407 mul
= self
._rep
_instance
_vals
[next_rep_instance
]
1408 next_rep_instance
+= 1
1411 for _
in range(mul
):
1412 iter_size
, next_rep_instance
= self
._item
_size
(item
.item
, next_rep_instance
)
1415 return size
, next_rep_instance
1417 # Returns the size of `item` and the new next repetition instance.
1418 def _item_size(self
, item
: _Item
, next_rep_instance
: int):
1419 return self
._item
_size
_funcs
[type(item
)](item
, next_rep_instance
)
1421 # Handles the byte item `item`.
1422 def _handle_byte_item(self
, item
: _Byte
, state
: _GenState
, next_rep_instance
: int):
1423 self
._data
.append(item
.val
)
1424 state
.offset
+= item
.size
1425 return next_rep_instance
1427 # Handles the string item `item`.
1428 def _handle_str_item(self
, item
: _Str
, state
: _GenState
, next_rep_instance
: int):
1429 self
._data
+= item
.data
1430 state
.offset
+= item
.size
1431 return next_rep_instance
1433 # Handles the byte order setting item `item`.
1434 def _handle_set_bo_item(
1435 self
, item
: _SetBo
, state
: _GenState
, next_rep_instance
: int
1437 # Update current byte order
1439 return next_rep_instance
1441 # Handles the variable assignment item `item`.
1442 def _handle_var_assign_item(
1443 self
, item
: _VarAssign
, state
: _GenState
, next_rep_instance
: int
1446 state
.variables
[item
.name
] = self
._eval
_item
_expr
(item
, state
, True)
1447 return next_rep_instance
1449 # Handles the value item `item`.
1450 def _handle_val_item(self
, item
: _Val
, state
: _GenState
, next_rep_instance
: int):
1452 val
= self
._eval
_item
_expr
(item
, state
, True)
1455 if val
< -(2 ** (item
.len - 1)) or val
> 2**item
.len - 1:
1456 _raise_error_for_item(
1457 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1458 val
, item
.len, item
.expr_str
, state
.offset
1463 # Encode result on 64 bits (to extend the sign bit whatever the
1464 # value of `item.len`).
1465 if state
.bo
is None and item
.len > 8:
1466 _raise_error_for_item(
1467 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1475 ">" if state
.bo
in (None, ByteOrder
.BE
) else "<",
1476 "Q" if val
>= 0 else "q",
1481 # Keep only the requested length
1482 len_bytes
= item
.len // 8
1484 if state
.bo
in (None, ByteOrder
.BE
):
1485 # Big endian: keep last bytes
1486 data
= data
[-len_bytes
:]
1488 # Little endian: keep first bytes
1489 assert state
.bo
== ByteOrder
.LE
1490 data
= data
[:len_bytes
]
1492 # Append to current bytes and update offset
1494 state
.offset
+= len(data
)
1495 return next_rep_instance
1497 # Handles the group item `item`, only removing the immediate labels
1498 # from `state.labels` if `remove_immediate_labels` is `True`.
1499 def _handle_group_item(
1503 next_rep_instance
: int,
1504 remove_immediate_labels
: bool = True,
1506 # Compute the values of the immediate (not nested) labels. Those
1507 # labels are reachable by any expression within the group.
1508 offset
= state
.offset
1509 immediate_label_names
= set() # type: Set[str]
1510 tmp_next_rep_instance
= next_rep_instance
1512 for subitem
in item
.items
:
1513 if type(subitem
) is _SetOffset
:
1515 offset
= subitem
.val
1516 elif type(subitem
) is _Label
:
1517 # New immediate label
1518 state
.labels
[subitem
.name
] = offset
1519 immediate_label_names
.add(subitem
.name
)
1521 subitem_size
, tmp_next_rep_instance
= self
._item
_size
(
1522 subitem
, tmp_next_rep_instance
1524 offset
+= subitem_size
1526 # Handle each item now with the actual state
1527 for subitem
in item
.items
:
1528 next_rep_instance
= self
._handle
_item
(subitem
, state
, next_rep_instance
)
1530 # Remove immediate labels if required so that outer items won't
1531 # reach inner labels.
1532 if remove_immediate_labels
:
1533 for name
in immediate_label_names
:
1534 del state
.labels
[name
]
1536 return next_rep_instance
1538 # Handles the repetition item `item`.
1539 def _handle_rep_item(self
, item
: _Rep
, state
: _GenState
, next_rep_instance
: int):
1540 mul
= self
._rep
_instance
_vals
[next_rep_instance
]
1541 next_rep_instance
+= 1
1543 for _
in range(mul
):
1544 next_rep_instance
= self
._handle
_item
(item
.item
, state
, next_rep_instance
)
1546 return next_rep_instance
1548 # Handles the offset setting item `item`.
1549 def _handle_set_offset_item(
1550 self
, item
: _SetOffset
, state
: _GenState
, next_rep_instance
: int
1552 state
.offset
= item
.val
1553 return next_rep_instance
1555 # Handles the label item `item`.
1556 def _handle_label_item(
1557 self
, item
: _Label
, state
: _GenState
, next_rep_instance
: int
1559 return next_rep_instance
1561 # Handles the item `item`, returning the updated next repetition
1563 def _handle_item(self
, item
: _Item
, state
: _GenState
, next_rep_instance
: int):
1564 return self
._item
_handlers
[type(item
)](item
, state
, next_rep_instance
)
1566 # Generates the data (`self._data`) and final state
1567 # (`self._final_state`) from `group` and the initial state `state`.
1568 def _gen(self
, group
: _Group
, state
: _GenState
):
1570 self
._data
= bytearray()
1573 self
._item
_handlers
= {
1574 _Byte
: self
._handle
_byte
_item
,
1575 _Group
: self
._handle
_group
_item
,
1576 _Label
: self
._handle
_label
_item
,
1577 _Rep
: self
._handle
_rep
_item
,
1578 _SetBo
: self
._handle
_set
_bo
_item
,
1579 _SetOffset
: self
._handle
_set
_offset
_item
,
1580 _Str
: self
._handle
_str
_item
,
1581 _Val
: self
._handle
_val
_item
,
1582 _VarAssign
: self
._handle
_var
_assign
_item
,
1583 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1586 self
._item
_size
_funcs
= {
1587 _Byte
: self
._scalar
_item
_size
,
1588 _Group
: self
._group
_item
_size
,
1589 _Label
: self
._zero
_item
_size
,
1590 _Rep
: self
._rep
_item
_size
,
1591 _SetBo
: self
._zero
_item
_size
,
1592 _SetOffset
: self
._zero
_item
_size
,
1593 _Str
: self
._scalar
_item
_size
,
1594 _Val
: self
._scalar
_item
_size
,
1595 _VarAssign
: self
._zero
_item
_size
,
1596 } # type: Dict[type, Callable[[Any, int], Tuple[int, int]]]
1598 # Handle the group item, _not_ removing the immediate labels
1599 # because the `labels` property offers them.
1600 self
._handle
_group
_item
(group
, state
, 0, False)
1602 # This is actually the final state
1603 self
._final
_state
= state
1606 # Returns a `ParseResult` instance containing the bytes encoded by the
1607 # input string `normand`.
1609 # `init_variables` is a dictionary of initial variable names (valid
1610 # Python names) to integral values. A variable name must not be the
1611 # reserved name `ICITTE`.
1613 # `init_labels` is a dictionary of initial label names (valid Python
1614 # names) to integral values. A label name must not be the reserved name
1617 # `init_offset` is the initial offset.
1619 # `init_byte_order` is the initial byte order.
1621 # Raises `ParseError` on any parsing error.
1624 init_variables
: Optional
[VarsT
] = None,
1625 init_labels
: Optional
[VarsT
] = None,
1626 init_offset
: int = 0,
1627 init_byte_order
: Optional
[ByteOrder
] = None,
1629 if init_variables
is None:
1632 if init_labels
is None:
1636 _Parser(normand
, init_variables
, init_labels
).res
,
1642 return ParseResult
._create
( # pyright: ignore[reportPrivateUsage]
1643 gen
.data
, gen
.variables
, gen
.labels
, gen
.offset
, gen
.bo
1647 # Parses the command-line arguments.
1648 def _parse_cli_args():
1652 ap
= argparse
.ArgumentParser()
1659 help="initial offset (positive)",
1665 choices
=["be", "le"],
1667 help="initial byte order (`be` or `le`)",
1673 help="add an initial variable (may be repeated)",
1680 help="add an initial label (may be repeated)",
1683 "--version", action
="version", version
="Normand {}".format(__version__
)
1690 help="input path (none means standard input)",
1694 return ap
.parse_args()
1697 # Raises a command-line error with the message `msg`.
1698 def _raise_cli_error(msg
: str) -> NoReturn
:
1699 raise RuntimeError("Command-line error: {}".format(msg
))
1702 # Returns a dictionary of string to integers from the list of strings
1703 # `args` containing `NAME=VAL` entries.
1704 def _dict_from_arg(args
: Optional
[List
[str]]):
1705 d
= {} # type: Dict[str, int]
1711 m
= re
.match(r
"({})=(\d+)$".format(_py_name_pat
.pattern
), arg
)
1714 _raise_cli_error("Invalid assignment {}".format(arg
))
1716 d
[m
.group(1)] = int(m
.group(2))
1721 # CLI entry point without exception handling.
1726 args
= _parse_cli_args()
1729 if args
.path
is None:
1730 normand
= sys
.stdin
.read()
1732 with
open(args
.path
) as f
:
1735 # Variables and labels
1736 variables
= _dict_from_arg(args
.var
)
1737 labels
= _dict_from_arg(args
.label
)
1741 _raise_cli_error("Invalid negative offset {}")
1743 # Validate and set byte order
1744 bo
= None # type: Optional[ByteOrder]
1746 if args
.byte_order
is not None:
1747 if args
.byte_order
== "be":
1750 assert args
.byte_order
== "le"
1755 res
= parse(normand
, variables
, labels
, args
.offset
, bo
)
1756 except ParseError
as exc
:
1759 if args
.path
is not None:
1760 prefix
= "{}:".format(os
.path
.abspath(args
.path
))
1763 "{}{}:{} - {}".format(
1764 prefix
, exc
.text_loc
.line_no
, exc
.text_loc
.col_no
, str(exc
)
1769 sys
.stdout
.buffer.write(res
.data
)
1772 # Prints the exception message `msg` and exits with status 1.
1773 def _fail(msg
: str) -> NoReturn
:
1774 if not msg
.endswith("."):
1777 print(msg
, file=sys
.stderr
)
1785 except Exception as exc
:
1789 if __name__
== "__main__":