X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=normand%2Fnormand.py;h=c44373d9d722d5f6e919f0d86ae1dfd498baaf55;hb=refs%2Ftags%2Fv0.12.0;hp=c4c0e2f93b1b53859337f6a9b3b0a85ee212d380;hpb=0e8e316956be8862871e36da87781b14f05da733;p=normand.git diff --git a/normand/normand.py b/normand/normand.py index c4c0e2f..c44373d 100644 --- a/normand/normand.py +++ b/normand/normand.py @@ -21,30 +21,42 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# This module is the portable Normand processor. It offers both the +# parse() function and the command-line tool (run the module itself) +# without external dependencies except a `typing` module for Python 3.4. +# +# Feel free to copy this module file to your own project to use Normand. +# +# Upstream repository: . + __author__ = "Philippe Proulx" -__version__ = "0.2.0" +__version__ = "0.12.0" __all__ = [ + "__author__", + "__version__", "ByteOrder", + "LabelsT", "parse", "ParseError", "ParseResult", - "TextLoc", - "VarsT", - "__author__", - "__version__", + "TextLocation", + "VariablesT", ] import re import abc import ast import sys +import copy import enum +import math import struct -from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional +import typing +from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional # Text location (line and column numbers). -class TextLoc: +class TextLocation: @classmethod def _create(cls, line_no: int, col_no: int): self = cls.__new__(cls) @@ -68,10 +80,13 @@ class TextLoc: def col_no(self): return self._col_no + def __repr__(self): + return "TextLocation({}, {})".format(self._line_no, self._col_no) + # Any item. class _Item: - def __init__(self, text_loc: TextLoc): + def __init__(self, text_loc: TextLocation): self._text_loc = text_loc # Source text location. @@ -79,6 +94,9 @@ class _Item: def text_loc(self): return self._text_loc + +# Scalar item. +class _ScalarItem(_Item): # Returns the size, in bytes, of this item. @property @abc.abstractmethod @@ -87,13 +105,13 @@ class _Item: # A repeatable item. -class _RepableItem(_Item): +class _RepableItem: pass # Single byte. -class _Byte(_RepableItem): - def __init__(self, val: int, text_loc: TextLoc): +class _Byte(_ScalarItem, _RepableItem): + def __init__(self, val: int, text_loc: TextLocation): super().__init__(text_loc) self._val = val @@ -107,12 +125,12 @@ class _Byte(_RepableItem): return 1 def __repr__(self): - return "_Byte({}, {})".format(hex(self._val), self._text_loc) + return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc)) # String. -class _Str(_RepableItem): - def __init__(self, data: bytes, text_loc: TextLoc): +class _Str(_ScalarItem, _RepableItem): + def __init__(self, data: bytes, text_loc: TextLocation): super().__init__(text_loc) self._data = data @@ -126,7 +144,7 @@ class _Str(_RepableItem): return len(self._data) def __repr__(self): - return "_Str({}, {})".format(repr(self._data), self._text_loc) + return "_Str({}, {})".format(repr(self._data), repr(self._text_loc)) # Byte order. @@ -139,9 +157,9 @@ class ByteOrder(enum.Enum): LE = "le" -# Byte order. -class _Bo(_Item): - def __init__(self, bo: ByteOrder, text_loc: TextLoc): +# Byte order setting. +class _SetBo(_Item): + def __init__(self, bo: ByteOrder, text_loc: TextLocation): super().__init__(text_loc) self._bo = bo @@ -149,14 +167,13 @@ class _Bo(_Item): def bo(self): return self._bo - @property - def size(self): - return 0 + def __repr__(self): + return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc)) # Label. class _Label(_Item): - def __init__(self, name: str, text_loc: TextLoc): + def __init__(self, name: str, text_loc: TextLocation): super().__init__(text_loc) self._name = name @@ -165,31 +182,46 @@ class _Label(_Item): def name(self): return self._name + def __repr__(self): + return "_Label({}, {})".format(repr(self._name), repr(self._text_loc)) + + +# Offset setting. +class _SetOffset(_Item): + def __init__(self, val: int, text_loc: TextLocation): + super().__init__(text_loc) + self._val = val + + # Offset value (bytes). @property - def size(self): - return 0 + def val(self): + return self._val def __repr__(self): - return "_Label({}, {})".format(repr(self._name), self._text_loc) + return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc)) -# Offset. -class _Offset(_Item): - def __init__(self, val: int, text_loc: TextLoc): +# Offset alignment. +class _AlignOffset(_Item): + def __init__(self, val: int, pad_val: int, text_loc: TextLocation): super().__init__(text_loc) self._val = val + self._pad_val = pad_val - # Offset value. + # Alignment value (bits). @property def val(self): return self._val + # Padding byte value. @property - def size(self): - return 0 + def pad_val(self): + return self._pad_val def __repr__(self): - return "_Offset({}, {})".format(repr(self._val), self._text_loc) + return "_AlignOffset({}, {}, {})".format( + repr(self._val), repr(self._pad_val), repr(self._text_loc) + ) # Mixin of containing an AST expression and its string. @@ -209,10 +241,33 @@ class _ExprMixin: return self._expr -# Variable. -class _Var(_Item, _ExprMixin): +# Fill until some offset. +class _FillUntil(_Item, _ExprMixin): + def __init__( + self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation + ): + super().__init__(text_loc) + _ExprMixin.__init__(self, expr_str, expr) + self._pad_val = pad_val + + # Padding byte value. + @property + def pad_val(self): + return self._pad_val + + def __repr__(self): + return "_FillUntil({}, {}, {}, {})".format( + repr(self._expr_str), + repr(self._expr), + repr(self._pad_val), + repr(self._text_loc), + ) + + +# Variable assignment. +class _VarAssign(_Item, _ExprMixin): def __init__( - self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc + self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation ): super().__init__(text_loc) _ExprMixin.__init__(self, expr_str, expr) @@ -223,20 +278,19 @@ class _Var(_Item, _ExprMixin): def name(self): return self._name - @property - def size(self): - return 0 - def __repr__(self): - return "_Var({}, {}, {}, {})".format( - repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc + return "_VarAssign({}, {}, {}, {})".format( + repr(self._name), + repr(self._expr_str), + repr(self._expr), + repr(self._text_loc), ) -# Value, possibly needing more than one byte. -class _Val(_RepableItem, _ExprMixin): +# Fixed-length number, possibly needing more than one byte. +class _FlNum(_ScalarItem, _RepableItem, _ExprMixin): def __init__( - self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc + self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation ): super().__init__(text_loc) _ExprMixin.__init__(self, expr_str, expr) @@ -252,66 +306,196 @@ class _Val(_RepableItem, _ExprMixin): return self._len // 8 def __repr__(self): - return "_Val({}, {}, {}, {})".format( - repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc + return "_FlNum({}, {}, {}, {})".format( + repr(self._expr_str), + repr(self._expr), + repr(self._len), + repr(self._text_loc), + ) + + +# LEB128 integer. +class _Leb128Int(_Item, _RepableItem, _ExprMixin): + def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation): + super().__init__(text_loc) + _ExprMixin.__init__(self, expr_str, expr) + + def __repr__(self): + return "{}({}, {}, {})".format( + self.__class__.__name__, + repr(self._expr_str), + repr(self._expr), + repr(self._text_loc), ) -# Expression item type. -_ExprItemT = Union[_Val, _Var] +# Unsigned LEB128 integer. +class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin): + pass + + +# Signed LEB128 integer. +class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin): + pass # Group of items. -class _Group(_RepableItem): - def __init__(self, items: List[_Item], text_loc: TextLoc): +class _Group(_Item, _RepableItem): + def __init__(self, items: List[_Item], text_loc: TextLocation): super().__init__(text_loc) self._items = items - self._size = sum([item.size for item in self._items]) # Contained items. @property def items(self): return self._items - @property - def size(self): - return self._size - def __repr__(self): - return "_Group({}, {})".format(repr(self._items), self._text_loc) + return "_Group({}, {})".format(repr(self._items), repr(self._text_loc)) # Repetition item. -class _Rep(_Item): - def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc): +class _Rep(_Item, _ExprMixin): + def __init__( + self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation + ): super().__init__(text_loc) + _ExprMixin.__init__(self, expr_str, expr) self._item = item - self._mul = mul # Item to repeat. @property def item(self): return self._item - # Repetition multiplier. + def __repr__(self): + return "_Rep({}, {}, {}, {})".format( + repr(self._item), + repr(self._expr_str), + repr(self._expr), + repr(self._text_loc), + ) + + +# Conditional item. +class _Cond(_Item, _ExprMixin): + def __init__( + self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation + ): + super().__init__(text_loc) + _ExprMixin.__init__(self, expr_str, expr) + self._item = item + + # Conditional item. + @property + def item(self): + return self._item + + def __repr__(self): + return "_Cond({}, {}, {}, {})".format( + repr(self._item), + repr(self._expr_str), + repr(self._expr), + repr(self._text_loc), + ) + + +# Macro definition item. +class _MacroDef(_Item): + def __init__( + self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation + ): + super().__init__(text_loc) + self._name = name + self._param_names = param_names + self._group = group + + # Name. + @property + def name(self): + return self._name + + # Parameters. @property - def mul(self): - return self._mul + def param_names(self): + return self._param_names + # Contained items. @property - def size(self): - return self._item.size * self._mul + def group(self): + return self._group + + def __repr__(self): + return "_MacroDef({}, {}, {}, {})".format( + repr(self._name), + repr(self._param_names), + repr(self._group), + repr(self._text_loc), + ) + + +# Macro expansion parameter. +class _MacroExpParam: + def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation): + self._expr_str = expr_str + self._expr = expr + self._text_loc = text_loc + + # Expression string. + @property + def expr_str(self): + return self._expr_str + + # Expression. + @property + def expr(self): + return self._expr + + # Source text location. + @property + def text_loc(self): + return self._text_loc + + def __repr__(self): + return "_MacroExpParam({}, {}, {})".format( + repr(self._expr_str), repr(self._expr), repr(self._text_loc) + ) + + +# Macro expansion item. +class _MacroExp(_Item, _RepableItem): + def __init__( + self, + name: str, + params: List[_MacroExpParam], + text_loc: TextLocation, + ): + super().__init__(text_loc) + self._name = name + self._params = params + + # Name. + @property + def name(self): + return self._name + + # Parameters. + @property + def params(self): + return self._params def __repr__(self): - return "_Rep({}, {}, {})".format( - repr(self._item), repr(self._mul), self._text_loc + return "_MacroExp({}, {}, {})".format( + repr(self._name), + repr(self._params), + repr(self._text_loc), ) # A parsing error containing a message and a text location. class ParseError(RuntimeError): @classmethod - def _create(cls, msg: str, text_loc: TextLoc): + def _create(cls, msg: str, text_loc: TextLocation): self = cls.__new__(cls) self._init(msg, text_loc) return self @@ -319,7 +503,7 @@ class ParseError(RuntimeError): def __init__(self, *args, **kwargs): # type: ignore raise NotImplementedError - def _init(self, msg: str, text_loc: TextLoc): + def _init(self, msg: str, text_loc: TextLocation): super().__init__(msg) self._text_loc = text_loc @@ -330,18 +514,26 @@ class ParseError(RuntimeError): # Raises a parsing error, forwarding the parameters to the constructor. -def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn: +def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn: raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage] -# Variable (and label) dictionary type. -VarsT = Dict[str, int] +# Variables dictionary type (for type hints). +VariablesT = Dict[str, Union[int, float]] + + +# Labels dictionary type (for type hints). +LabelsT = Dict[str, int] # Python name pattern. _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") +# Macro definition dictionary. +_MacroDefsT = Dict[str, _MacroDef] + + # Normand parser. # # The constructor accepts a Normand input. After building, use the `res` @@ -349,13 +541,14 @@ _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") class _Parser: # Builds a parser to parse the Normand input `normand`, parsing # immediately. - def __init__(self, normand: str, variables: VarsT, labels: VarsT): + def __init__(self, normand: str, variables: VariablesT, labels: LabelsT): self._normand = normand self._at = 0 self._line_no = 1 self._col_no = 1 self._label_names = set(labels.keys()) self._var_names = set(variables.keys()) + self._macro_defs = {} # type: _MacroDefsT self._parse() # Result (main group). @@ -363,10 +556,15 @@ class _Parser: def res(self): return self._res + # Macro definitions. + @property + def macro_defs(self): + return self._macro_defs + # Current text location. @property def _text_loc(self): - return TextLoc._create( # pyright: ignore[reportPrivateUsage] + return TextLocation._create( # pyright: ignore[reportPrivateUsage] self._line_no, self._col_no ) @@ -428,7 +626,7 @@ class _Parser: # Pattern for _skip_ws_and_comments() _ws_or_syms_or_comments_pat = re.compile( - r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*" + r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*" ) # Skips as many whitespaces, insignificant symbol characters, and @@ -436,6 +634,13 @@ class _Parser: def _skip_ws_and_comments(self): self._try_parse_pat(self._ws_or_syms_or_comments_pat) + # Pattern for _skip_ws() + _ws_pat = re.compile(r"\s*") + + # Skips as many whitespaces as possible. + def _skip_ws(self): + self._try_parse_pat(self._ws_pat) + # Pattern for _try_parse_hex_byte() _nibble_pat = re.compile(r"[A-Fa-f0-9]") @@ -485,7 +690,7 @@ class _Parser: return _Byte(int("".join(bits), 2), begin_text_loc) # Patterns for _try_parse_dec_byte() - _dec_byte_prefix_pat = re.compile(r"\$\s*") + _dec_byte_prefix_pat = re.compile(r"\$") _dec_byte_val_pat = re.compile(r"(?P-?)(?P\d+)") # Tries to parse a decimal byte, returning a byte item on success. @@ -498,6 +703,7 @@ class _Parser: return # Expect the value + self._skip_ws() m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant") # Compute value @@ -508,7 +714,7 @@ class _Parser: _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc) # Two's complement - val = val % 256 + val %= 256 # Return item return _Byte(val, begin_text_loc) @@ -588,16 +794,20 @@ class _Parser: # Return item return _Str(data, begin_text_loc) + # Common right parenthesis pattern + _right_paren_pat = re.compile(r"\)") + # Patterns for _try_parse_group() - _group_prefix_pat = re.compile(r"\(") - _group_suffix_pat = re.compile(r"\)") + _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b") # Tries to parse a group, returning a group item on success. def _try_parse_group(self): begin_text_loc = self._text_loc # Match prefix - if self._try_parse_pat(self._group_prefix_pat) is None: + m_open = self._try_parse_pat(self._group_prefix_pat) + + if m_open is None: # No match return @@ -606,16 +816,22 @@ class _Parser: # Expect end of group self._skip_ws_and_comments() - self._expect_pat( - self._group_suffix_pat, "Expecting an item or `)` (end of group)" - ) + + if m_open.group(0) == "(": + pat = self._right_paren_pat + exp = ")" + else: + pat = self._block_end_pat + exp = "!end" + + self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp)) # Return item return _Group(items, begin_text_loc) # Returns a stripped expression string and an AST expression node # from the expression string `expr_str` at text location `text_loc`. - def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc): + def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation): # Create an expression node from the expression string expr_str = expr_str.strip().replace("\n", " ") @@ -629,13 +845,14 @@ class _Parser: return expr_str, expr - # Patterns for _try_parse_val_and_len() - _val_expr_pat = re.compile(r"([^}:]+):") - _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)") + # Patterns for _try_parse_num_and_attr() + _val_expr_pat = re.compile(r"([^}:]+):\s*") + _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64") + _leb128_int_attr_pat = re.compile(r"(u|s)leb128") - # Tries to parse a value and length, returning a value item on - # success. - def _try_parse_val_and_len(self): + # Tries to parse a value and attribute (fixed length in bits or + # `leb128`), returning a value item on success. + def _try_parse_num_and_attr(self): begin_text_loc = self._text_loc # Match @@ -645,40 +862,52 @@ class _Parser: # No match return - # Expect a length - m_len = self._expect_pat( - self._val_len_pat, "Expecting a length (multiple of eight bits)" - ) - # Create an expression node from the expression string expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc) - # Return item - return _Val( - expr_str, - expr, - int(m_len.group(1)), - begin_text_loc, - ) + # Length? + m_attr = self._try_parse_pat(self._fl_num_len_attr_pat) - # Patterns for _try_parse_val_and_len() - _var_pat = re.compile( - r"(?P{})\s*=\s*(?P[^}}]+)".format(_py_name_pat.pattern) - ) + if m_attr is None: + # LEB128? + m_attr = self._try_parse_pat(self._leb128_int_attr_pat) + + if m_attr is None: + # At this point it's invalid + self._raise_error( + "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`" + ) - # Tries to parse a variable, returning a variable item on success. - def _try_parse_var(self): + # Return LEB128 integer item + cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int + return cls(expr_str, expr, begin_text_loc) + else: + # Return fixed-length number item + return _FlNum( + expr_str, + expr, + int(m_attr.group(0)), + begin_text_loc, + ) + + # Patterns for _try_parse_var_assign() + _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern)) + _var_assign_expr_pat = re.compile(r"[^}]+") + + # Tries to parse a variable assignment, returning a variable + # assignment item on success. + def _try_parse_var_assign(self): begin_text_loc = self._text_loc # Match - m = self._try_parse_pat(self._var_pat) + m = self._try_parse_pat(self._var_assign_name_equal_pat) if m is None: # No match return # Validate name - name = m.group("name") + name = m.group(1) if name == _icitte_name: _raise_error( @@ -688,26 +917,30 @@ class _Parser: if name in self._label_names: _raise_error("Existing label named `{}`".format(name), begin_text_loc) - # Add to known variable names - self._var_names.add(name) + # Expect an expression + self._skip_ws() + m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression") # Create an expression node from the expression string - expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc) + expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc) + + # Add to known variable names + self._var_names.add(name) # Return item - return _Var( + return _VarAssign( name, expr_str, expr, begin_text_loc, ) - # Pattern for _try_parse_bo_name() + # Pattern for _try_parse_set_bo() _bo_pat = re.compile(r"[bl]e") - # Tries to parse a byte order name, returning a byte order item on - # success. - def _try_parse_bo_name(self): + # Tries to parse a byte order name, returning a byte order setting + # item on success. + def _try_parse_set_bo(self): begin_text_loc = self._text_loc # Match @@ -719,48 +952,54 @@ class _Parser: # Return corresponding item if m.group(0) == "be": - return _Bo(ByteOrder.BE, begin_text_loc) + return _SetBo(ByteOrder.BE, begin_text_loc) else: assert m.group(0) == "le" - return _Bo(ByteOrder.LE, begin_text_loc) + return _SetBo(ByteOrder.LE, begin_text_loc) # Patterns for _try_parse_val_or_bo() - _val_var_bo_prefix_pat = re.compile(r"\{\s*") - _val_var_bo_suffix_pat = re.compile(r"\s*}") + _val_var_assign_set_bo_prefix_pat = re.compile(r"\{") + _val_var_assign_set_bo_suffix_pat = re.compile(r"\}") - # Tries to parse a value, a variable, or a byte order, returning an - # item on success. - def _try_parse_val_or_var_or_bo(self): + # Tries to parse a value, a variable assignment, or a byte order + # setting, returning an item on success. + def _try_parse_val_or_var_assign_or_set_bo(self): # Match prefix - if self._try_parse_pat(self._val_var_bo_prefix_pat) is None: + if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None: # No match return - # Variable item? - item = self._try_parse_var() + self._skip_ws() + + # Variable assignment item? + item = self._try_parse_var_assign() if item is None: - # Value item? - item = self._try_parse_val_and_len() + # Number item? + item = self._try_parse_num_and_attr() if item is None: - # Byte order item? - item = self._try_parse_bo_name() + # Byte order setting item? + item = self._try_parse_set_bo() if item is None: # At this point it's invalid - self._raise_error("Expecting a value, a variable, or a byte order") + self._raise_error( + "Expecting a fixed-length number, a variable assignment, or a byte order setting" + ) # Expect suffix - self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`") + self._skip_ws() + self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`") return item - # Pattern for _try_parse_offset_val() and _try_parse_rep() + # Common constant integer patterns _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+") + _const_int_pat = re.compile(r"(?P-)?(?:{})".format(_pos_const_int_pat.pattern)) - # Tries to parse an offset value (after the initial `<`), returning - # an offset item on success. - def _try_parse_offset_val(self): + # Tries to parse an offset setting value (after the initial `<`), + # returning an offset item on success. + def _try_parse_set_offset_val(self): begin_text_loc = self._text_loc # Match @@ -771,7 +1010,7 @@ class _Parser: return # Return item - return _Offset(int(m.group(0), 0), begin_text_loc) + return _SetOffset(int(m.group(0), 0), begin_text_loc) # Tries to parse a label name (after the initial `<`), returning a # label item on success. @@ -805,19 +1044,21 @@ class _Parser: # Return item return _Label(name, begin_text_loc) - # Patterns for _try_parse_label_or_offset() - _label_offset_prefix_pat = re.compile(r"<\s*") - _label_offset_suffix_pat = re.compile(r"\s*>") + # Patterns for _try_parse_label_or_set_offset() + _label_set_offset_prefix_pat = re.compile(r"<") + _label_set_offset_suffix_pat = re.compile(r">") - # Tries to parse a label or an offset, returning an item on success. - def _try_parse_label_or_offset(self): + # Tries to parse a label or an offset setting, returning an item on + # success. + def _try_parse_label_or_set_offset(self): # Match prefix - if self._try_parse_pat(self._label_offset_prefix_pat) is None: + if self._try_parse_pat(self._label_set_offset_prefix_pat) is None: # No match return - # Offset item? - item = self._try_parse_offset_val() + # Offset setting item? + self._skip_ws() + item = self._try_parse_set_offset_val() if item is None: # Label item? @@ -825,12 +1066,392 @@ class _Parser: if item is None: # At this point it's invalid - self._raise_error("Expecting a label name or an offset value") + self._raise_error("Expecting a label name or an offset setting value") # Expect suffix - self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`") + self._skip_ws() + self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`") return item + # Pattern for _parse_pad_val() + _pad_val_prefix_pat = re.compile(r"~") + + # Tries to parse a padding value, returning the padding value, or 0 + # if none. + def _parse_pad_val(self): + # Padding value? + self._skip_ws() + pad_val = 0 + + if self._try_parse_pat(self._pad_val_prefix_pat) is not None: + self._skip_ws() + pad_val_text_loc = self._text_loc + m = self._expect_pat( + self._pos_const_int_pat, + "Expecting a positive constant integer (byte value)", + ) + + # Validate + pad_val = int(m.group(0), 0) + + if pad_val > 255: + _raise_error( + "Invalid padding byte value {}".format(pad_val), + pad_val_text_loc, + ) + + return pad_val + + # Patterns for _try_parse_align_offset() + _align_offset_prefix_pat = re.compile(r"@") + _align_offset_val_pat = re.compile(r"\d+") + + # Tries to parse an offset alignment, returning an offset alignment + # item on success. + def _try_parse_align_offset(self): + begin_text_loc = self._text_loc + + # Match prefix + if self._try_parse_pat(self._align_offset_prefix_pat) is None: + # No match + return + + # Expect an alignment + self._skip_ws() + align_text_loc = self._text_loc + m = self._expect_pat( + self._align_offset_val_pat, + "Expecting an alignment (positive multiple of eight bits)", + ) + + # Validate alignment + val = int(m.group(0)) + + if val <= 0 or (val % 8) != 0: + _raise_error( + "Invalid alignment value {} (not a positive multiple of eight)".format( + val + ), + align_text_loc, + ) + + # Padding value + pad_val = self._parse_pad_val() + + # Return item + return _AlignOffset(val, pad_val, begin_text_loc) + + # Patterns for _try_parse_fill_until() + _fill_until_prefix_pat = re.compile(r"\+") + _fill_until_pad_val_prefix_pat = re.compile(r"~") + + # Tries to parse a filling, returning a filling item on success. + def _try_parse_fill_until(self): + begin_text_loc = self._text_loc + + # Match prefix + if self._try_parse_pat(self._fill_until_prefix_pat) is None: + # No match + return + + # Expect expression + self._skip_ws() + expr_str, expr = self._expect_const_int_name_expr(True) + + # Padding value + pad_val = self._parse_pad_val() + + # Return item + return _FillUntil(expr_str, expr, pad_val, begin_text_loc) + + # Patterns for _expect_rep_mul_expr() + _inner_expr_prefix_pat = re.compile(r"\{") + _inner_expr_pat = re.compile(r"[^}]+") + _inner_expr_suffix_pat = re.compile(r"\}") + + # Parses a constant integer if `accept_const_int` is `True` + # (possibly negative if `allow_neg` is `True`), a name, or an + # expression within `{` and `}`. + def _expect_const_int_name_expr( + self, accept_const_int: bool, allow_neg: bool = False + ): + expr_text_loc = self._text_loc + + # Constant integer? + m = None + + if accept_const_int: + m = self._try_parse_pat(self._const_int_pat) + + if m is None: + # Name? + m = self._try_parse_pat(_py_name_pat) + + if m is None: + # Expression? + if self._try_parse_pat(self._inner_expr_prefix_pat) is None: + pos_msg = "" if allow_neg else "positive " + + if accept_const_int: + mid_msg = "a {}constant integer, a name, or `{{`".format( + pos_msg + ) + else: + mid_msg = "a name or `{`" + + # At this point it's invalid + self._raise_error("Expecting {}".format(mid_msg)) + + # Expect an expression + self._skip_ws() + expr_text_loc = self._text_loc + m = self._expect_pat(self._inner_expr_pat, "Expecting an expression") + expr_str = m.group(0) + + # Expect `}` + self._skip_ws() + self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`") + else: + expr_str = m.group(0) + else: + if m.group("neg") == "-" and not allow_neg: + _raise_error("Expecting a positive constant integer", expr_text_loc) + + expr_str = m.group(0) + + return self._ast_expr_from_str(expr_str, expr_text_loc) + + # Parses the multiplier expression of a repetition (block or + # post-item) and returns the expression string and AST node. + def _expect_rep_mul_expr(self): + return self._expect_const_int_name_expr(True) + + # Common block end pattern + _block_end_pat = re.compile(r"!end\b") + + # Pattern for _try_parse_rep_block() + _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b") + + # Tries to parse a repetition block, returning a repetition item on + # success. + def _try_parse_rep_block(self): + begin_text_loc = self._text_loc + + # Match prefix + if self._try_parse_pat(self._rep_block_prefix_pat) is None: + # No match + return + + # Expect expression + self._skip_ws_and_comments() + expr_str, expr = self._expect_rep_mul_expr() + + # Parse items + self._skip_ws_and_comments() + items_text_loc = self._text_loc + items = self._parse_items() + + # Expect end of block + self._skip_ws_and_comments() + self._expect_pat( + self._block_end_pat, "Expecting an item or `!end` (end of repetition block)" + ) + + # Return item + return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc) + + # Pattern for _try_parse_cond_block() + _cond_block_prefix_pat = re.compile(r"!if\b") + + # Tries to parse a conditional block, returning a conditional item + # on success. + def _try_parse_cond_block(self): + begin_text_loc = self._text_loc + + # Match prefix + if self._try_parse_pat(self._cond_block_prefix_pat) is None: + # No match + return + + # Expect expression + self._skip_ws_and_comments() + expr_str, expr = self._expect_const_int_name_expr(False) + + # Parse items + self._skip_ws_and_comments() + items_text_loc = self._text_loc + items = self._parse_items() + + # Expect end of block + self._skip_ws_and_comments() + self._expect_pat( + self._block_end_pat, + "Expecting an item or `!end` (end of conditional block)", + ) + + # Return item + return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc) + + # Common left parenthesis pattern + _left_paren_pat = re.compile(r"\(") + + # Patterns for _try_parse_macro_def() and _try_parse_macro_exp() + _macro_params_comma_pat = re.compile(",") + + # Patterns for _try_parse_macro_def() + _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b") + + # Tries to parse a macro definition, adding it to `self._macro_defs` + # and returning `True` on success. + def _try_parse_macro_def(self): + begin_text_loc = self._text_loc + + # Match prefix + if self._try_parse_pat(self._macro_def_prefix_pat) is None: + # No match + return False + + # Expect a name + self._skip_ws() + name_text_loc = self._text_loc + m = self._expect_pat(_py_name_pat, "Expecting a valid macro name") + + # Validate name + name = m.group(0) + + if name in self._macro_defs: + _raise_error("Duplicate macro named `{}`".format(name), name_text_loc) + + # Expect `(` + self._skip_ws() + self._expect_pat(self._left_paren_pat, "Expecting `(`") + + # Try to parse comma-separated parameter names + param_names = [] # type: List[str] + expect_comma = False + + while True: + self._skip_ws() + + # End? + if self._try_parse_pat(self._right_paren_pat) is not None: + # End + break + + # Comma? + if expect_comma: + self._expect_pat(self._macro_params_comma_pat, "Expecting `,`") + + # Expect parameter name + self._skip_ws() + param_text_loc = self._text_loc + m = self._expect_pat(_py_name_pat, "Expecting valid parameter name") + + if m.group(0) in param_names: + _raise_error( + "Duplicate macro parameter named `{}`".format(m.group(0)), + param_text_loc, + ) + + param_names.append(m.group(0)) + expect_comma = True + + # Expect items + self._skip_ws_and_comments() + items_text_loc = self._text_loc + old_var_names = self._var_names.copy() + old_label_names = self._label_names.copy() + self._var_names = set() # type: Set[str] + self._label_names = set() # type: Set[str] + items = self._parse_items() + self._var_names = old_var_names + self._label_names = old_label_names + + # Expect suffix + self._expect_pat( + self._block_end_pat, "Expecting an item or `!end` (end of macro block)" + ) + + # Register macro + self._macro_defs[name] = _MacroDef( + name, param_names, _Group(items, items_text_loc), begin_text_loc + ) + + return True + + # Patterns for _try_parse_macro_exp() + _macro_exp_prefix_pat = re.compile(r"m\b") + _macro_exp_colon_pat = re.compile(r":") + + # Tries to parse a macro expansion, returning a macro expansion item + # on success. + def _try_parse_macro_exp(self): + begin_text_loc = self._text_loc + + # Match prefix + if self._try_parse_pat(self._macro_exp_prefix_pat) is None: + # No match + return + + # Expect `:` + self._skip_ws() + self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`") + + # Expect a macro name + self._skip_ws() + name_text_loc = self._text_loc + m = self._expect_pat(_py_name_pat, "Expecting a valid macro name") + + # Validate name + name = m.group(0) + macro_def = self._macro_defs.get(name) + + if macro_def is None: + _raise_error("Unknown macro name `{}`".format(name), name_text_loc) + + # Expect `(` + self._skip_ws() + self._expect_pat(self._left_paren_pat, "Expecting `(`") + + # Try to parse comma-separated parameter values + params_text_loc = self._text_loc + params = [] # type: List[_MacroExpParam] + expect_comma = False + + while True: + self._skip_ws() + + # End? + if self._try_parse_pat(self._right_paren_pat) is not None: + # End + break + + # Expect a Value + if expect_comma: + self._expect_pat(self._macro_params_comma_pat, "Expecting `,`") + + self._skip_ws() + param_text_loc = self._text_loc + params.append( + _MacroExpParam( + *self._expect_const_int_name_expr(True, True), param_text_loc + ) + ) + expect_comma = True + + # Validate parameter values + if len(params) != len(macro_def.param_names): + sing_plur = "" if len(params) == 1 else "s" + _raise_error( + "Macro expansion passes {} parameter{} while the definition expects {}".format( + len(params), sing_plur, len(macro_def.param_names) + ), + params_text_loc, + ) + + # Return item + return _MacroExp(name, params, begin_text_loc) + # Tries to parse a base item (anything except a repetition), # returning it on success. def _try_parse_base_item(self): @@ -846,14 +1467,26 @@ class _Parser: if item is not None: return item - # Value, variable, or byte order item? - item = self._try_parse_val_or_var_or_bo() + # Value, variable assignment, or byte order setting item? + item = self._try_parse_val_or_var_assign_or_set_bo() if item is not None: return item - # Label or offset item? - item = self._try_parse_label_or_offset() + # Label or offset setting item? + item = self._try_parse_label_or_set_offset() + + if item is not None: + return item + + # Offset alignment item? + item = self._try_parse_align_offset() + + if item is not None: + return item + + # Filling item? + item = self._try_parse_fill_until() if item is not None: return item @@ -864,23 +1497,38 @@ class _Parser: if item is not None: return item - # Pattern for _try_parse_rep() - _rep_prefix_pat = re.compile(r"\*\s*") + # Repetition block item? + item = self._try_parse_rep_block() + + if item is not None: + return item + + # Conditional block item? + item = self._try_parse_cond_block() + + if item is not None: + return item + + # Macro expansion? + item = self._try_parse_macro_exp() - # Tries to parse a repetition, returning the multiplier on success, - # or 1 otherwise. - def _try_parse_rep(self): + if item is not None: + return item + + # Pattern for _try_parse_rep_post() + _rep_post_prefix_pat = re.compile(r"\*") + + # Tries to parse a post-item repetition, returning the expression + # string and AST expression node on success. + def _try_parse_rep_post(self): # Match prefix - if self._try_parse_pat(self._rep_prefix_pat) is None: + if self._try_parse_pat(self._rep_post_prefix_pat) is None: # No match - return 1 + return - # Expect and return a decimal multiplier + # Return expression string and AST expression self._skip_ws_and_comments() - m = self._expect_pat( - self._pos_const_int_pat, "Expecting a positive integral multiplier" - ) - return int(m.group(0), 0) + return self._expect_rep_mul_expr() # Tries to parse an item, possibly followed by a repetition, # returning `True` on success. @@ -889,25 +1537,20 @@ class _Parser: def _try_append_item(self, items: List[_Item]): self._skip_ws_and_comments() - # Parse a base item + # Base item item = self._try_parse_base_item() if item is None: - # No item - return False + return # Parse repetition if the base item is repeatable if isinstance(item, _RepableItem): self._skip_ws_and_comments() rep_text_loc = self._text_loc - rep = self._try_parse_rep() + rep_ret = self._try_parse_rep_post() - if rep == 0: - # No item, but that's okay - return True - elif rep > 1: - # Convert to repetition item - item = _Rep(item, rep, rep_text_loc) + if rep_ret is not None: + item = _Rep(item, *rep_ret, rep_text_loc) items.append(item) return True @@ -915,12 +1558,18 @@ class _Parser: # Parses and returns items, skipping whitespaces, insignificant # symbols, and comments when allowed, and stopping at the first # unknown character. - def _parse_items(self) -> List[_Item]: + # + # Accepts and registers macro definitions if `accept_macro_defs` + # is `True`. + def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]: items = [] # type: List[_Item] while self._isnt_done(): # Try to append item if not self._try_append_item(items): + if accept_macro_defs and self._try_parse_macro_def(): + continue + # Unknown at this point break @@ -935,7 +1584,7 @@ class _Parser: return # Parse first level items - items = self._parse_items() + items = self._parse_items(True) # Make sure there's nothing left self._skip_ws_and_comments() @@ -955,8 +1604,8 @@ class ParseResult: def _create( cls, data: bytearray, - variables: VarsT, - labels: VarsT, + variables: VariablesT, + labels: LabelsT, offset: int, bo: Optional[ByteOrder], ): @@ -970,8 +1619,8 @@ class ParseResult: def _init( self, data: bytearray, - variables: VarsT, - labels: VarsT, + variables: VariablesT, + labels: LabelsT, offset: int, bo: Optional[ByteOrder], ): @@ -1018,74 +1667,148 @@ def _raise_error_for_item(msg: str, item: _Item) -> NoReturn: _icitte_name = "ICITTE" -# Value expression validator. -class _ExprValidator(ast.NodeVisitor): - def __init__(self, item: _ExprItemT, syms: VarsT): - self._item = item - self._syms = syms +# Base node visitor. +# +# Calls the _visit_name() method for each name node which isn't the name +# of a call. +class _NodeVisitor(ast.NodeVisitor): + def __init__(self): self._parent_is_call = False def generic_visit(self, node: ast.AST): if type(node) is ast.Call: self._parent_is_call = True elif type(node) is ast.Name and not self._parent_is_call: - # Make sure the name refers to a known label name - if node.id != _icitte_name and node.id not in self._syms: - _raise_error( - "Unknown variable/label name `{}` in expression `{}`".format( - node.id, self._item.expr_str - ), - self._item.text_loc, - ) - - # TODO: Restrict the set of allowed node types + self._visit_name(node.id) super().generic_visit(node) self._parent_is_call = False + @abc.abstractmethod + def _visit_name(self, name: str): + ... + -# Keeper of labels for a given group instance. -# -# A group instance is one iteration of a given group. -class _GroupInstanceLabels: - def __init__(self): - self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]] +# Expression validator: validates that all the names within the +# expression are allowed. +class _ExprValidator(_NodeVisitor): + def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]): + super().__init__() + self._expr_str = expr_str + self._text_loc = text_loc + self._allowed_names = allowed_names + + def _visit_name(self, name: str): + # Make sure the name refers to a known and reachable + # variable/label name. + if name != _icitte_name and name not in self._allowed_names: + msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format( + name, self._expr_str + ) + + allowed_names = self._allowed_names.copy() + allowed_names.add(_icitte_name) + + if len(allowed_names) > 0: + allowed_names_str = ", ".join( + sorted(["`{}`".format(name) for name in allowed_names]) + ) + msg += "; the legal names are {{{}}}".format(allowed_names_str) + + _raise_error( + msg, + self._text_loc, + ) - # Assigns the labels `labels` to a new instance of `group`. - def add(self, group: _Group, labels: VarsT): - if group not in self._instance_labels: - self._instance_labels[group] = {} - spec_instance_labels = self._instance_labels[group] - spec_instance_labels[len(spec_instance_labels)] = labels.copy() +# Generator state. +class _GenState: + def __init__( + self, + variables: VariablesT, + labels: LabelsT, + offset: int, + bo: Optional[ByteOrder], + ): + self.variables = variables.copy() + self.labels = labels.copy() + self.offset = offset + self.bo = bo + + def __repr__(self): + return "_GenState({}, {}, {}, {})".format( + repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo) + ) + + +# Fixed-length number item instance. +class _FlNumItemInst: + def __init__(self, item: _FlNum, offset_in_data: int, state: _GenState): + self._item = item + self._offset_in_data = offset_in_data + self._state = state + + @property + def item(self): + return self._item + + @property + def offset_in_data(self): + return self._offset_in_data - # Returns the labels (not a copy) of the instance `instance_index` - # of the group `group`. - def labels(self, group: _Group, instance_index: int): - return self._instance_labels[group][instance_index] + @property + def state(self): + return self._state -# Generator of data and labels from a group item. +# Generator of data and final state from a group item. # # Generation happens in memory at construction time. After building, use # the `data`, `variables`, `labels`, `offset`, and `bo` properties to # get the resulting context. +# +# The steps of generation are: +# +# 1. Handle each item in prefix order. +# +# The handlers append bytes to `self._data` and update some current +# state object (`_GenState` instance). +# +# When handling a fixed-length number item, try to evaluate its +# expression using the current state. If this fails, then it might be +# because the expression refers to a "future" label: save the current +# offset in `self._data` (generated data) and a snapshot of the +# current state within `self._fl_num_item_insts` (`_FlNumItemInst` +# object). _gen_fl_num_item_insts() will deal with this later. +# +# When handling the items of a group, keep a map of immediate label +# names to their offset. Then, after having processed all the items, +# update the relevant saved state snapshots in +# `self._fl_num_item_insts` with those immediate label values. +# _gen_fl_num_item_insts() will deal with this later. +# +# 2. Handle all the fixed-length number item instances of which the +# expression evaluation failed before. +# +# At this point, `self._fl_num_item_insts` contains everything that's +# needed to evaluate the expressions, including the values of +# "future" labels from the point of view of some fixed-length number +# item instance. +# +# If an evaluation fails at this point, then it's a user error. class _Gen: def __init__( self, group: _Group, - variables: VarsT, - labels: VarsT, + macro_defs: _MacroDefsT, + variables: VariablesT, + labels: LabelsT, offset: int, bo: Optional[ByteOrder], ): - self._group_instance_labels = _GroupInstanceLabels() - self._resolve_labels(group, offset, labels.copy()) - self._vars = variables.copy() - self._offset = offset - self._bo = bo - self._main_group = group - self._gen() + self._macro_defs = macro_defs + self._fl_num_item_insts = [] # type: List[_FlNumItemInst] + self._gen(group, _GenState(variables, labels, offset, bo)) # Generated bytes. @property @@ -1095,137 +1818,323 @@ class _Gen: # Updated variables. @property def variables(self): - return self._vars + return self._final_state.variables # Updated main group labels. @property def labels(self): - return self._group_instance_labels.labels(self._main_group, 0) + return self._final_state.labels # Updated offset. @property def offset(self): - return self._offset + return self._final_state.offset # Updated byte order. @property def bo(self): - return self._bo + return self._final_state.bo - # Fills `self._group_instance_labels` with the labels for each group - # instance in `item`, starting at current offset `offset` with the - # current labels `labels`. + # Evaluates the expression `expr` of which the original string is + # `expr_str` at the location `text_loc` considering the current + # generation state `state`. # - # Returns the new current offset. - def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int: - if type(item) is _Group: - # First pass: compute immediate labels of this instance - group_labels = labels.copy() - group_offset = offset - - for subitem in item.items: - if type(subitem) is _Offset: - group_offset = subitem.val - elif type(subitem) is _Label: - assert subitem.name not in group_labels - group_labels[subitem.name] = group_offset - else: - group_offset += subitem.size - - # Add to group instance labels - self._group_instance_labels.add(item, group_labels) - - # Second pass: handle each item - for subitem in item.items: - offset = self._resolve_labels(subitem, offset, group_labels) - elif type(item) is _Rep: - for _ in range(item.mul): - offset = self._resolve_labels(item.item, offset, labels) - elif type(item) is _Offset: - offset = item.val - else: - offset += item.size - - return offset - - def _handle_byte_item(self, item: _Byte): - self._data.append(item.val) - self._offset += item.size - - def _handle_str_item(self, item: _Str): - self._data += item.data - self._offset += item.size - - def _handle_bo_item(self, item: _Bo): - self._bo = item.bo - - def _eval_expr(self, item: _ExprItemT): - # Get the labels of the current group instance as the initial - # symbols (copied because we're adding stuff). - assert self._cur_group is not None - syms = self._group_instance_labels.labels( - self._cur_group, self._group_instance_indexes[self._cur_group] - ).copy() + # If `allow_float` is `True`, then the type of the result may be + # `float` too. + @staticmethod + def _eval_expr( + expr_str: str, + expr: ast.Expression, + text_loc: TextLocation, + state: _GenState, + allow_float: bool = False, + ): + syms = {} # type: VariablesT + syms.update(state.labels) - # Set the `ICITTE` name to the current offset (before encoding) - syms[_icitte_name] = self._offset + # Set the `ICITTE` name to the current offset + syms[_icitte_name] = state.offset # Add the current variables - syms.update(self._vars) + syms.update(state.variables) # Validate the node and its children - _ExprValidator(item, syms).visit(item.expr) + _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr) # Compile and evaluate expression node try: - val = eval(compile(item.expr, "", "eval"), None, syms) + val = eval(compile(expr, "", "eval"), None, syms) except Exception as exc: + _raise_error( + "Failed to evaluate expression `{}`: {}".format(expr_str, exc), + text_loc, + ) + + # Convert `bool` result type to `int` to normalize + if type(val) is bool: + val = int(val) + + # Validate result type + expected_types = {int} # type: Set[type] + type_msg = "`int`" + + if allow_float: + expected_types.add(float) + type_msg += " or `float`" + + if type(val) not in expected_types: + _raise_error( + "Invalid expression `{}`: expecting result type {}, not `{}`".format( + expr_str, type_msg, type(val).__name__ + ), + text_loc, + ) + + return val + + # Evaluates the expression of `item` considering the current + # generation state `state`. + # + # If `allow_float` is `True`, then the type of the result may be + # `float` too. + @staticmethod + def _eval_item_expr( + item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond], + state: _GenState, + allow_float: bool = False, + ): + return _Gen._eval_expr( + item.expr_str, item.expr, item.text_loc, state, allow_float + ) + + # Handles the byte item `item`. + def _handle_byte_item(self, item: _Byte, state: _GenState): + self._data.append(item.val) + state.offset += item.size + + # Handles the string item `item`. + def _handle_str_item(self, item: _Str, state: _GenState): + self._data += item.data + state.offset += item.size + + # Handles the byte order setting item `item`. + def _handle_set_bo_item(self, item: _SetBo, state: _GenState): + # Update current byte order + state.bo = item.bo + + # Handles the variable assignment item `item`. + def _handle_var_assign_item(self, item: _VarAssign, state: _GenState): + # Update variable + state.variables[item.name] = self._eval_item_expr(item, state, True) + + # Handles the fixed-length number item `item`. + def _handle_fl_num_item(self, item: _FlNum, state: _GenState): + # Validate current byte order + if state.bo is None and item.len > 8: _raise_error_for_item( - "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc), + "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format( + item.expr_str + ), item, ) + # Try an immediate evaluation. If it fails, then keep everything + # needed to (try to) generate the bytes of this item later. + try: + data = self._gen_fl_num_item_inst_data(item, state) + except Exception: + self._fl_num_item_insts.append( + _FlNumItemInst(item, len(self._data), copy.deepcopy(state)) + ) + + # Reserve space in `self._data` for this instance + data = bytes([0] * (item.len // 8)) + + # Append bytes + self._data += data + + # Update offset + state.offset += len(data) + + # Returns the size, in bytes, required to encode the value `val` + # with LEB128 (signed version if `is_signed` is `True`). + @staticmethod + def _leb128_size_for_val(val: int, is_signed: bool): + if val < 0: + # Equivalent upper bound. + # + # For example, if `val` is -128, then the full integer for + # this number of bits would be [-128, 127]. + val = -val - 1 + + # Number of bits (add one for the sign if needed) + bits = val.bit_length() + int(is_signed) + + if bits == 0: + bits = 1 + + # Seven bits per byte + return math.ceil(bits / 7) + + # Handles the LEB128 integer item `item`. + def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState): + # Compute value + val = self._eval_item_expr(item, state, False) + + # Size in bytes + size = self._leb128_size_for_val(val, type(item) is _SLeb128Int) + + # For each byte + for _ in range(size): + # Seven LSBs, MSB of the byte set (continue) + self._data.append((val & 0x7F) | 0x80) + val >>= 7 + + # Clear MSB of last byte (stop) + self._data[-1] &= ~0x80 + + # Update offset + state.offset += size + + # Handles the group item `item`, removing the immediate labels from + # `state` at the end if `remove_immediate_labels` is `True`. + def _handle_group_item( + self, item: _Group, state: _GenState, remove_immediate_labels: bool = True + ): + first_fl_num_item_inst_index = len(self._fl_num_item_insts) + immediate_labels = {} # type: LabelsT + + # Handle each item + for subitem in item.items: + if type(subitem) is _Label: + # Add to local immediate labels + immediate_labels[subitem.name] = state.offset + + self._handle_item(subitem, state) + + # Remove immediate labels from current state if needed + if remove_immediate_labels: + for name in immediate_labels: + del state.labels[name] + + # Add all immediate labels to all state snapshots since + # `first_fl_num_item_inst_index`. + for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]: + inst.state.labels.update(immediate_labels) + + # Handles the repetition item `item`. + def _handle_rep_item(self, item: _Rep, state: _GenState): + # Compute the repetition count + mul = _Gen._eval_item_expr(item, state) + # Validate result - if type(val) is not int: + if mul < 0: _raise_error_for_item( - "Invalid expression `{}`: unexpected result type `{}`".format( - item.expr_str, type(val).__name__ + "Invalid expression `{}`: unexpected negative result {:,}".format( + item.expr_str, mul ), item, ) - return val + # Generate item data `mul` times + for _ in range(mul): + self._handle_item(item.item, state) - def _handle_var_item(self, item: _Var): - # Update variable - self._vars[item.name] = self._eval_expr(item) + # Handles the conditional item `item`. + def _handle_cond_item(self, item: _Rep, state: _GenState): + # Compute the conditional value + val = _Gen._eval_item_expr(item, state) - def _handle_val_item(self, item: _Val): - # Compute value - val = self._eval_expr(item) + # Generate item data if needed + if val: + self._handle_item(item.item, state) - # Validate range - if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1: + # Evaluates the parameters of the macro expansion item `item` + # considering the initial state `init_state` and returns a new state + # to handle the items of the macro. + def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState): + # New state + exp_state = _GenState({}, {}, init_state.offset, init_state.bo) + + # Evaluate the parameter expressions + macro_def = self._macro_defs[item.name] + + for param_name, param in zip(macro_def.param_names, item.params): + exp_state.variables[param_name] = _Gen._eval_expr( + param.expr_str, param.expr, param.text_loc, init_state, True + ) + + return exp_state + + # Handles the macro expansion item `item`. + def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState): + # New state + exp_state = self._eval_macro_exp_params(item, state) + + # Process the contained group + init_data_size = len(self._data) + self._handle_item(self._macro_defs[item.name].group, exp_state) + + # Update state offset and return + state.offset += len(self._data) - init_data_size + + # Handles the offset setting item `item`. + def _handle_set_offset_item(self, item: _SetOffset, state: _GenState): + state.offset = item.val + + # Handles the offset alignment item `item` (adds padding). + def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState): + init_offset = state.offset + align_bytes = item.val // 8 + state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes + self._data += bytes([item.pad_val] * (state.offset - init_offset)) + + # Handles the filling item `item` (adds padding). + def _handle_fill_until_item(self, item: _FillUntil, state: _GenState): + # Compute the new offset + new_offset = _Gen._eval_item_expr(item, state) + + # Validate the new offset + if new_offset < state.offset: _raise_error_for_item( - "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format( - val, item.len, item.expr_str, self._offset + "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format( + item.expr_str, new_offset, state.offset ), item, ) - # Encode result on 64 bits (to extend the sign bit whatever the - # value of `item.len`). - if self._bo is None and item.len > 8: + # Fill + self._data += bytes([item.pad_val] * (new_offset - state.offset)) + + # Update offset + state.offset = new_offset + + # Handles the label item `item`. + def _handle_label_item(self, item: _Label, state: _GenState): + state.labels[item.name] = state.offset + + # Handles the item `item`, returning the updated next repetition + # instance. + def _handle_item(self, item: _Item, state: _GenState): + return self._item_handlers[type(item)](item, state) + + # Generates the data for a fixed-length integer item instance having + # the value `val` and returns it. + def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState): + # Validate range + if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1: _raise_error_for_item( - "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format( - item.expr_str + "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format( + val, item.len, item.expr_str ), item, ) + # Encode result on 64 bits (to extend the sign bit whatever the + # value of `item.len`). data = struct.pack( "{}{}".format( - ">" if self._bo in (None, ByteOrder.BE) else "<", + ">" if state.bo in (None, ByteOrder.BE) else "<", "Q" if val >= 0 else "q", ), val, @@ -1234,67 +2143,96 @@ class _Gen: # Keep only the requested length len_bytes = item.len // 8 - if self._bo in (None, ByteOrder.BE): + if state.bo in (None, ByteOrder.BE): # Big endian: keep last bytes data = data[-len_bytes:] else: # Little endian: keep first bytes - assert self._bo == ByteOrder.LE + assert state.bo == ByteOrder.LE data = data[:len_bytes] - # Append to current bytes and update offset - self._data += data - self._offset += len(data) - - def _handle_group_item(self, item: _Group): - # Update the instance index of `item` - if item not in self._group_instance_indexes: - self._group_instance_indexes[item] = 0 - else: - self._group_instance_indexes[item] += 1 - - # Changed current group - old_cur_group = self._cur_group - self._cur_group = item - - # Handle each item - for subitem in item.items: - self._handle_item(subitem) - - # Restore current group - self._cur_group = old_cur_group + # Return data + return data - def _handle_rep_item(self, item: _Rep): - for _ in range(item.mul): - self._handle_item(item.item) + # Generates the data for a fixed-length floating point number item + # instance having the value `val` and returns it. + def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState): + # Validate length + if item.len not in (32, 64): + _raise_error_for_item( + "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format( + item.len, val + ), + item, + ) - def _handle_offset_item(self, item: _Offset): - self._offset = item.val + # Encode and return result + return struct.pack( + "{}{}".format( + ">" if state.bo in (None, ByteOrder.BE) else "<", + "f" if item.len == 32 else "d", + ), + val, + ) - def _handle_item(self, item: _Item): - if type(item) in self._item_handlers: - self._item_handlers[type(item)](item) + # Generates the data for a fixed-length number item instance and + # returns it. + def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState): + # Compute value + val = self._eval_item_expr(item, state, True) - def _gen(self): + # Handle depending on type + if type(val) is int: + return self._gen_fl_int_item_inst_data(val, item, state) + else: + assert type(val) is float + return self._gen_fl_float_item_inst_data(val, item, state) + + # Generates the data for all the fixed-length number item instances + # and writes it at the correct offset within `self._data`. + def _gen_fl_num_item_insts(self): + for inst in self._fl_num_item_insts: + # Generate bytes + data = self._gen_fl_num_item_inst_data(inst.item, inst.state) + + # Insert bytes into `self._data` + self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data + + # Generates the data (`self._data`) and final state + # (`self._final_state`) from `group` and the initial state `state`. + def _gen(self, group: _Group, state: _GenState): # Initial state self._data = bytearray() - self._group_instance_indexes = {} # type: Dict[_Group, int] - self._cur_group = None # Item handlers self._item_handlers = { + _AlignOffset: self._handle_align_offset_item, _Byte: self._handle_byte_item, - _Str: self._handle_str_item, - _Bo: self._handle_bo_item, - _Val: self._handle_val_item, - _Var: self._handle_var_item, + _Cond: self._handle_cond_item, + _FillUntil: self._handle_fill_until_item, + _FlNum: self._handle_fl_num_item, _Group: self._handle_group_item, + _Label: self._handle_label_item, + _MacroExp: self._handle_macro_exp_item, _Rep: self._handle_rep_item, - _Offset: self._handle_offset_item, - } # type: Dict[type, Callable[[Any], None]] + _SetBo: self._handle_set_bo_item, + _SetOffset: self._handle_set_offset_item, + _SLeb128Int: self._handle_leb128_int_item, + _Str: self._handle_str_item, + _ULeb128Int: self._handle_leb128_int_item, + _VarAssign: self._handle_var_assign_item, + } # type: Dict[type, Callable[[Any, _GenState], None]] + + # Handle the group item, _not_ removing the immediate labels + # because the `labels` property offers them. + self._handle_group_item(group, state, False) - # Handle the group item - self._handle_item(self._main_group) + # This is actually the final state + self._final_state = state + + # Generate all the fixed-length number bytes now that we know + # their full state + self._gen_fl_num_item_insts() # Returns a `ParseResult` instance containing the bytes encoded by the @@ -1315,8 +2253,8 @@ class _Gen: # Raises `ParseError` on any parsing error. def parse( normand: str, - init_variables: Optional[VarsT] = None, - init_labels: Optional[VarsT] = None, + init_variables: Optional[VariablesT] = None, + init_labels: Optional[LabelsT] = None, init_offset: int = 0, init_byte_order: Optional[ByteOrder] = None, ): @@ -1326,8 +2264,10 @@ def parse( if init_labels is None: init_labels = {} + parser = _Parser(normand, init_variables, init_labels) gen = _Gen( - _Parser(normand, init_variables, init_labels).res, + parser.res, + parser.macro_defs, init_variables, init_labels, init_offset, @@ -1396,7 +2336,7 @@ def _raise_cli_error(msg: str) -> NoReturn: # Returns a dictionary of string to integers from the list of strings # `args` containing `NAME=VAL` entries. def _dict_from_arg(args: Optional[List[str]]): - d = {} # type: Dict[str, int] + d = {} # type: LabelsT if args is None: return d @@ -1407,6 +2347,8 @@ def _dict_from_arg(args: Optional[List[str]]): if m is None: _raise_cli_error("Invalid assignment {}".format(arg)) + d[m.group(1)] = int(m.group(2)) + return d @@ -1425,7 +2367,7 @@ def _try_run_cli(): normand = f.read() # Variables and labels - variables = _dict_from_arg(args.var) + variables = typing.cast(VariablesT, _dict_from_arg(args.var)) labels = _dict_from_arg(args.label) # Validate offset