Rename `normand.VarsT` to `normand.SymbolsT`
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
1b8aa84a 33__version__ = "0.5.0"
71aaa3f7
PP
34__all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
1b8aa84a 40 "SymbolsT",
71aaa3f7
PP
41 "__author__",
42 "__version__",
43]
44
45import re
46import abc
47import ast
48import sys
49import enum
05f81895 50import math
71aaa3f7 51import struct
2adf4336
PP
52from typing import (
53 Any,
54 Set,
55 Dict,
56 List,
57 Tuple,
58 Union,
59 Pattern,
60 Callable,
61 NoReturn,
62 Optional,
63)
71aaa3f7
PP
64
65
66# Text location (line and column numbers).
67class TextLoc:
68 @classmethod
69 def _create(cls, line_no: int, col_no: int):
70 self = cls.__new__(cls)
71 self._init(line_no, col_no)
72 return self
73
74 def __init__(*args, **kwargs): # type: ignore
75 raise NotImplementedError
76
77 def _init(self, line_no: int, col_no: int):
78 self._line_no = line_no
79 self._col_no = col_no
80
81 # Line number.
82 @property
83 def line_no(self):
84 return self._line_no
85
86 # Column number.
87 @property
88 def col_no(self):
89 return self._col_no
90
2adf4336
PP
91 def __repr__(self):
92 return "TextLoc({}, {})".format(self._line_no, self._col_no)
93
71aaa3f7
PP
94
95# Any item.
96class _Item:
97 def __init__(self, text_loc: TextLoc):
98 self._text_loc = text_loc
99
100 # Source text location.
101 @property
102 def text_loc(self):
103 return self._text_loc
104
2adf4336
PP
105
106# Scalar item.
107class _ScalarItem(_Item):
71aaa3f7
PP
108 # Returns the size, in bytes, of this item.
109 @property
110 @abc.abstractmethod
111 def size(self) -> int:
112 ...
113
114
115# A repeatable item.
2adf4336 116class _RepableItem:
71aaa3f7
PP
117 pass
118
119
120# Single byte.
2adf4336 121class _Byte(_ScalarItem, _RepableItem):
71aaa3f7
PP
122 def __init__(self, val: int, text_loc: TextLoc):
123 super().__init__(text_loc)
124 self._val = val
125
126 # Byte value.
127 @property
128 def val(self):
129 return self._val
130
131 @property
132 def size(self):
133 return 1
134
135 def __repr__(self):
136 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
137
138
139# String.
2adf4336 140class _Str(_ScalarItem, _RepableItem):
71aaa3f7
PP
141 def __init__(self, data: bytes, text_loc: TextLoc):
142 super().__init__(text_loc)
143 self._data = data
144
145 # Encoded bytes.
146 @property
147 def data(self):
148 return self._data
149
150 @property
151 def size(self):
152 return len(self._data)
153
154 def __repr__(self):
155 return "_Str({}, {})".format(repr(self._data), self._text_loc)
156
157
158# Byte order.
159@enum.unique
160class ByteOrder(enum.Enum):
161 # Big endian.
162 BE = "be"
163
164 # Little endian.
165 LE = "le"
166
167
2adf4336
PP
168# Byte order setting.
169class _SetBo(_Item):
0e8e3169
PP
170 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
171 super().__init__(text_loc)
71aaa3f7
PP
172 self._bo = bo
173
174 @property
175 def bo(self):
176 return self._bo
177
2adf4336
PP
178 def __repr__(self):
179 return "_SetBo({}, {})".format(repr(self._bo), self._text_loc)
71aaa3f7
PP
180
181
182# Label.
183class _Label(_Item):
184 def __init__(self, name: str, text_loc: TextLoc):
185 super().__init__(text_loc)
186 self._name = name
187
188 # Label name.
189 @property
190 def name(self):
191 return self._name
192
71aaa3f7
PP
193 def __repr__(self):
194 return "_Label({}, {})".format(repr(self._name), self._text_loc)
195
196
2adf4336
PP
197# Offset setting.
198class _SetOffset(_Item):
71aaa3f7
PP
199 def __init__(self, val: int, text_loc: TextLoc):
200 super().__init__(text_loc)
201 self._val = val
202
203 # Offset value.
204 @property
205 def val(self):
206 return self._val
207
71aaa3f7 208 def __repr__(self):
2adf4336 209 return "_SetOffset({}, {})".format(repr(self._val), self._text_loc)
71aaa3f7
PP
210
211
212# Mixin of containing an AST expression and its string.
213class _ExprMixin:
214 def __init__(self, expr_str: str, expr: ast.Expression):
215 self._expr_str = expr_str
216 self._expr = expr
217
218 # Expression string.
219 @property
220 def expr_str(self):
221 return self._expr_str
222
223 # Expression node to evaluate.
224 @property
225 def expr(self):
226 return self._expr
227
228
2adf4336
PP
229# Variable assignment.
230class _VarAssign(_Item, _ExprMixin):
71aaa3f7
PP
231 def __init__(
232 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
233 ):
234 super().__init__(text_loc)
235 _ExprMixin.__init__(self, expr_str, expr)
236 self._name = name
237
238 # Name.
239 @property
240 def name(self):
241 return self._name
242
71aaa3f7 243 def __repr__(self):
2adf4336 244 return "_VarAssign({}, {}, {}, {})".format(
71aaa3f7
PP
245 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
246 )
247
248
05f81895
PP
249# Fixed-length integer, possibly needing more than one byte.
250class _FlInt(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7
PP
251 def __init__(
252 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
253 ):
254 super().__init__(text_loc)
255 _ExprMixin.__init__(self, expr_str, expr)
256 self._len = len
257
258 # Length (bits).
259 @property
260 def len(self):
261 return self._len
262
263 @property
264 def size(self):
265 return self._len // 8
266
267 def __repr__(self):
05f81895 268 return "_FlInt({}, {}, {}, {})".format(
71aaa3f7
PP
269 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
270 )
271
272
05f81895
PP
273# LEB128 integer.
274class _Leb128Int(_Item, _RepableItem, _ExprMixin):
275 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLoc):
276 super().__init__(text_loc)
277 _ExprMixin.__init__(self, expr_str, expr)
278
279 def __repr__(self):
280 return "{}({}, {}, {})".format(
281 self.__class__.__name__,
282 repr(self._expr_str),
283 repr(self._expr),
284 self._text_loc,
285 )
286
287
288# Unsigned LEB128 integer.
289class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
290 pass
291
292
293# Signed LEB128 integer.
294class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
295 pass
296
297
71aaa3f7 298# Group of items.
2adf4336 299class _Group(_Item, _RepableItem):
71aaa3f7
PP
300 def __init__(self, items: List[_Item], text_loc: TextLoc):
301 super().__init__(text_loc)
302 self._items = items
71aaa3f7
PP
303
304 # Contained items.
305 @property
306 def items(self):
307 return self._items
308
71aaa3f7
PP
309 def __repr__(self):
310 return "_Group({}, {})".format(repr(self._items), self._text_loc)
311
312
313# Repetition item.
2adf4336
PP
314class _Rep(_Item, _ExprMixin):
315 def __init__(
316 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLoc
317 ):
71aaa3f7 318 super().__init__(text_loc)
2adf4336 319 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 320 self._item = item
71aaa3f7
PP
321
322 # Item to repeat.
323 @property
324 def item(self):
325 return self._item
326
71aaa3f7 327 def __repr__(self):
2adf4336
PP
328 return "_Rep({}, {}, {}, {})".format(
329 repr(self._item), repr(self._expr_str), repr(self._expr), self._text_loc
71aaa3f7
PP
330 )
331
332
2adf4336 333# Expression item type.
05f81895 334_ExprItemT = Union[_FlInt, _Leb128Int, _VarAssign, _Rep]
2adf4336
PP
335
336
71aaa3f7
PP
337# A parsing error containing a message and a text location.
338class ParseError(RuntimeError):
339 @classmethod
340 def _create(cls, msg: str, text_loc: TextLoc):
341 self = cls.__new__(cls)
342 self._init(msg, text_loc)
343 return self
344
345 def __init__(self, *args, **kwargs): # type: ignore
346 raise NotImplementedError
347
348 def _init(self, msg: str, text_loc: TextLoc):
349 super().__init__(msg)
350 self._text_loc = text_loc
351
352 # Source text location.
353 @property
354 def text_loc(self):
355 return self._text_loc
356
357
358# Raises a parsing error, forwarding the parameters to the constructor.
359def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
360 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
361
362
2adf4336 363# Variable/label dictionary type.
1b8aa84a 364SymbolsT = Dict[str, int]
71aaa3f7
PP
365
366
367# Python name pattern.
368_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
369
370
371# Normand parser.
372#
373# The constructor accepts a Normand input. After building, use the `res`
374# property to get the resulting main group.
375class _Parser:
376 # Builds a parser to parse the Normand input `normand`, parsing
377 # immediately.
1b8aa84a 378 def __init__(self, normand: str, variables: SymbolsT, labels: SymbolsT):
71aaa3f7
PP
379 self._normand = normand
380 self._at = 0
381 self._line_no = 1
382 self._col_no = 1
383 self._label_names = set(labels.keys())
384 self._var_names = set(variables.keys())
385 self._parse()
386
387 # Result (main group).
388 @property
389 def res(self):
390 return self._res
391
392 # Current text location.
393 @property
394 def _text_loc(self):
395 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
396 self._line_no, self._col_no
397 )
398
399 # Returns `True` if this parser is done parsing.
400 def _is_done(self):
401 return self._at == len(self._normand)
402
403 # Returns `True` if this parser isn't done parsing.
404 def _isnt_done(self):
405 return not self._is_done()
406
407 # Raises a parse error, creating it using the message `msg` and the
408 # current text location.
409 def _raise_error(self, msg: str) -> NoReturn:
410 _raise_error(msg, self._text_loc)
411
412 # Tries to make the pattern `pat` match the current substring,
413 # returning the match object and updating `self._at`,
414 # `self._line_no`, and `self._col_no` on success.
415 def _try_parse_pat(self, pat: Pattern[str]):
416 m = pat.match(self._normand, self._at)
417
418 if m is None:
419 return
420
421 # Skip matched string
422 self._at += len(m.group(0))
423
424 # Update line number
425 self._line_no += m.group(0).count("\n")
426
427 # Update column number
428 for i in reversed(range(self._at)):
429 if self._normand[i] == "\n" or i == 0:
430 if i == 0:
431 self._col_no = self._at + 1
432 else:
433 self._col_no = self._at - i
434
435 break
436
437 # Return match object
438 return m
439
440 # Expects the pattern `pat` to match the current substring,
441 # returning the match object and updating `self._at`,
442 # `self._line_no`, and `self._col_no` on success, or raising a parse
443 # error with the message `error_msg` on error.
444 def _expect_pat(self, pat: Pattern[str], error_msg: str):
445 # Match
446 m = self._try_parse_pat(pat)
447
448 if m is None:
449 # No match: error
450 self._raise_error(error_msg)
451
452 # Return match object
453 return m
454
455 # Pattern for _skip_ws_and_comments()
456 _ws_or_syms_or_comments_pat = re.compile(
457 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
458 )
459
460 # Skips as many whitespaces, insignificant symbol characters, and
461 # comments as possible.
462 def _skip_ws_and_comments(self):
463 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
464
465 # Pattern for _try_parse_hex_byte()
466 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
467
468 # Tries to parse a hexadecimal byte, returning a byte item on
469 # success.
470 def _try_parse_hex_byte(self):
0e8e3169
PP
471 begin_text_loc = self._text_loc
472
71aaa3f7
PP
473 # Match initial nibble
474 m_high = self._try_parse_pat(self._nibble_pat)
475
476 if m_high is None:
477 # No match
478 return
479
480 # Expect another nibble
481 self._skip_ws_and_comments()
482 m_low = self._expect_pat(
483 self._nibble_pat, "Expecting another hexadecimal nibble"
484 )
485
486 # Return item
0e8e3169 487 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
488
489 # Patterns for _try_parse_bin_byte()
490 _bin_byte_bit_pat = re.compile(r"[01]")
491 _bin_byte_prefix_pat = re.compile(r"%")
492
493 # Tries to parse a binary byte, returning a byte item on success.
494 def _try_parse_bin_byte(self):
0e8e3169
PP
495 begin_text_loc = self._text_loc
496
71aaa3f7
PP
497 # Match prefix
498 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
499 # No match
500 return
501
502 # Expect eight bits
503 bits = [] # type: List[str]
504
505 for _ in range(8):
506 self._skip_ws_and_comments()
507 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
508 bits.append(m.group(0))
509
510 # Return item
0e8e3169 511 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
512
513 # Patterns for _try_parse_dec_byte()
514 _dec_byte_prefix_pat = re.compile(r"\$\s*")
515 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
516
517 # Tries to parse a decimal byte, returning a byte item on success.
518 def _try_parse_dec_byte(self):
0e8e3169
PP
519 begin_text_loc = self._text_loc
520
71aaa3f7
PP
521 # Match prefix
522 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
523 # No match
524 return
525
526 # Expect the value
527 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
528
529 # Compute value
530 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
531
532 # Validate
533 if val < -128 or val > 255:
0e8e3169 534 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
535
536 # Two's complement
05f81895 537 val %= 256
71aaa3f7
PP
538
539 # Return item
0e8e3169 540 return _Byte(val, begin_text_loc)
71aaa3f7
PP
541
542 # Tries to parse a byte, returning a byte item on success.
543 def _try_parse_byte(self):
544 # Hexadecimal
545 item = self._try_parse_hex_byte()
546
547 if item is not None:
548 return item
549
550 # Binary
551 item = self._try_parse_bin_byte()
552
553 if item is not None:
554 return item
555
556 # Decimal
557 item = self._try_parse_dec_byte()
558
559 if item is not None:
560 return item
561
562 # Patterns for _try_parse_str()
563 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
564 _str_suffix_pat = re.compile(r'"')
565 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
566
567 # Strings corresponding to escape sequence characters
568 _str_escape_seq_strs = {
569 "0": "\0",
570 "a": "\a",
571 "b": "\b",
572 "e": "\x1b",
573 "f": "\f",
574 "n": "\n",
575 "r": "\r",
576 "t": "\t",
577 "v": "\v",
578 "\\": "\\",
579 '"': '"',
580 }
581
582 # Tries to parse a string, returning a string item on success.
583 def _try_parse_str(self):
0e8e3169
PP
584 begin_text_loc = self._text_loc
585
71aaa3f7
PP
586 # Match prefix
587 m = self._try_parse_pat(self._str_prefix_pat)
588
589 if m is None:
590 # No match
591 return
592
593 # Get encoding
594 encoding = "utf8"
595
596 if m.group("len") is not None:
597 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
598
599 # Actual string
600 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
601
602 # Expect end of string
603 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
604
605 # Replace escape sequences
606 val = m.group(0)
607
608 for ec in '0abefnrtv"\\':
609 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
610
611 # Encode
612 data = val.encode(encoding)
613
614 # Return item
0e8e3169 615 return _Str(data, begin_text_loc)
71aaa3f7
PP
616
617 # Patterns for _try_parse_group()
618 _group_prefix_pat = re.compile(r"\(")
619 _group_suffix_pat = re.compile(r"\)")
620
621 # Tries to parse a group, returning a group item on success.
622 def _try_parse_group(self):
0e8e3169
PP
623 begin_text_loc = self._text_loc
624
71aaa3f7
PP
625 # Match prefix
626 if self._try_parse_pat(self._group_prefix_pat) is None:
627 # No match
628 return
629
630 # Parse items
631 items = self._parse_items()
632
633 # Expect end of group
634 self._skip_ws_and_comments()
635 self._expect_pat(
636 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
637 )
638
639 # Return item
0e8e3169 640 return _Group(items, begin_text_loc)
71aaa3f7
PP
641
642 # Returns a stripped expression string and an AST expression node
643 # from the expression string `expr_str` at text location `text_loc`.
644 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
645 # Create an expression node from the expression string
646 expr_str = expr_str.strip().replace("\n", " ")
647
648 try:
649 expr = ast.parse(expr_str, mode="eval")
650 except SyntaxError:
651 _raise_error(
652 "Invalid expression `{}`: invalid syntax".format(expr_str),
653 text_loc,
654 )
655
656 return expr_str, expr
657
05f81895
PP
658 # Patterns for _try_parse_val_and_attr()
659 _val_expr_pat = re.compile(r"([^}:]+):\s*")
660 _fl_int_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
661 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 662
05f81895
PP
663 # Tries to parse a value and attribute (fixed length in bits or
664 # `leb128`), returning a value item on success.
665 def _try_parse_val_and_attr(self):
71aaa3f7
PP
666 begin_text_loc = self._text_loc
667
668 # Match
669 m_expr = self._try_parse_pat(self._val_expr_pat)
670
671 if m_expr is None:
672 # No match
673 return
674
71aaa3f7
PP
675 # Create an expression node from the expression string
676 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
677
05f81895
PP
678 # Length?
679 m_attr = self._try_parse_pat(self._fl_int_len_attr_pat)
680
681 if m_attr is None:
682 # LEB128?
683 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
684
685 if m_attr is None:
686 # At this point it's invalid
687 self._raise_error(
688 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
689 )
690
691 # Return LEB128 integer item
692 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
693 return cls(expr_str, expr, begin_text_loc)
694 else:
695 # Return fixed-length integer item
696 return _FlInt(
697 expr_str,
698 expr,
699 int(m_attr.group(0)),
700 begin_text_loc,
701 )
71aaa3f7 702
05f81895 703 # Patterns for _try_parse_val_and_attr()
2adf4336 704 _var_assign_pat = re.compile(
71aaa3f7
PP
705 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
706 )
707
2adf4336
PP
708 # Tries to parse a variable assignment, returning a variable
709 # assignment item on success.
710 def _try_parse_var_assign(self):
71aaa3f7
PP
711 begin_text_loc = self._text_loc
712
713 # Match
2adf4336 714 m = self._try_parse_pat(self._var_assign_pat)
71aaa3f7
PP
715
716 if m is None:
717 # No match
718 return
719
720 # Validate name
721 name = m.group("name")
722
723 if name == _icitte_name:
0e8e3169
PP
724 _raise_error(
725 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
726 )
71aaa3f7
PP
727
728 if name in self._label_names:
0e8e3169 729 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
730
731 # Add to known variable names
732 self._var_names.add(name)
733
734 # Create an expression node from the expression string
735 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
736
737 # Return item
2adf4336 738 return _VarAssign(
71aaa3f7
PP
739 name,
740 expr_str,
741 expr,
0e8e3169 742 begin_text_loc,
71aaa3f7
PP
743 )
744
2adf4336 745 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
746 _bo_pat = re.compile(r"[bl]e")
747
2adf4336
PP
748 # Tries to parse a byte order name, returning a byte order setting
749 # item on success.
750 def _try_parse_set_bo(self):
0e8e3169
PP
751 begin_text_loc = self._text_loc
752
71aaa3f7
PP
753 # Match
754 m = self._try_parse_pat(self._bo_pat)
755
756 if m is None:
757 # No match
758 return
759
760 # Return corresponding item
761 if m.group(0) == "be":
2adf4336 762 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
763 else:
764 assert m.group(0) == "le"
2adf4336 765 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
766
767 # Patterns for _try_parse_val_or_bo()
2adf4336
PP
768 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
769 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
71aaa3f7 770
2adf4336
PP
771 # Tries to parse a value, a variable assignment, or a byte order
772 # setting, returning an item on success.
773 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 774 # Match prefix
2adf4336 775 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
776 # No match
777 return
778
2adf4336
PP
779 # Variable assignment item?
780 item = self._try_parse_var_assign()
71aaa3f7
PP
781
782 if item is None:
05f81895
PP
783 # Fixed-length value item?
784 item = self._try_parse_val_and_attr()
71aaa3f7
PP
785
786 if item is None:
2adf4336
PP
787 # Byte order setting item?
788 item = self._try_parse_set_bo()
71aaa3f7
PP
789
790 if item is None:
791 # At this point it's invalid
2adf4336 792 self._raise_error(
05f81895 793 "Expecting a fixed-length integer, a variable assignment, or a byte order setting"
2adf4336 794 )
71aaa3f7
PP
795
796 # Expect suffix
2adf4336 797 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
798 return item
799
2adf4336 800 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
71aaa3f7
PP
801 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
802
2adf4336
PP
803 # Tries to parse an offset setting value (after the initial `<`),
804 # returning an offset item on success.
805 def _try_parse_set_offset_val(self):
0e8e3169
PP
806 begin_text_loc = self._text_loc
807
71aaa3f7
PP
808 # Match
809 m = self._try_parse_pat(self._pos_const_int_pat)
810
811 if m is None:
812 # No match
813 return
814
815 # Return item
2adf4336 816 return _SetOffset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
817
818 # Tries to parse a label name (after the initial `<`), returning a
819 # label item on success.
820 def _try_parse_label_name(self):
0e8e3169
PP
821 begin_text_loc = self._text_loc
822
71aaa3f7
PP
823 # Match
824 m = self._try_parse_pat(_py_name_pat)
825
826 if m is None:
827 # No match
828 return
829
830 # Validate
831 name = m.group(0)
832
833 if name == _icitte_name:
0e8e3169
PP
834 _raise_error(
835 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
836 )
71aaa3f7
PP
837
838 if name in self._label_names:
0e8e3169 839 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
840
841 if name in self._var_names:
0e8e3169 842 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
843
844 # Add to known label names
845 self._label_names.add(name)
846
847 # Return item
0e8e3169 848 return _Label(name, begin_text_loc)
71aaa3f7 849
2adf4336
PP
850 # Patterns for _try_parse_label_or_set_offset()
851 _label_set_offset_prefix_pat = re.compile(r"<\s*")
852 _label_set_offset_suffix_pat = re.compile(r"\s*>")
71aaa3f7 853
2adf4336
PP
854 # Tries to parse a label or an offset setting, returning an item on
855 # success.
856 def _try_parse_label_or_set_offset(self):
71aaa3f7 857 # Match prefix
2adf4336 858 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
859 # No match
860 return
861
2adf4336
PP
862 # Offset setting item?
863 item = self._try_parse_set_offset_val()
71aaa3f7
PP
864
865 if item is None:
866 # Label item?
867 item = self._try_parse_label_name()
868
869 if item is None:
870 # At this point it's invalid
2adf4336 871 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
872
873 # Expect suffix
2adf4336 874 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
875 return item
876
877 # Tries to parse a base item (anything except a repetition),
878 # returning it on success.
879 def _try_parse_base_item(self):
880 # Byte item?
881 item = self._try_parse_byte()
882
883 if item is not None:
884 return item
885
886 # String item?
887 item = self._try_parse_str()
888
889 if item is not None:
890 return item
891
2adf4336
PP
892 # Value, variable assignment, or byte order setting item?
893 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
894
895 if item is not None:
896 return item
897
2adf4336
PP
898 # Label or offset setting item?
899 item = self._try_parse_label_or_set_offset()
71aaa3f7
PP
900
901 if item is not None:
902 return item
903
904 # Group item?
905 item = self._try_parse_group()
906
907 if item is not None:
908 return item
909
910 # Pattern for _try_parse_rep()
911 _rep_prefix_pat = re.compile(r"\*\s*")
2adf4336
PP
912 _rep_expr_prefix_pat = re.compile(r"\{")
913 _rep_expr_pat = re.compile(r"[^}p]+")
914 _rep_expr_suffix_pat = re.compile(r"\}")
71aaa3f7 915
2adf4336
PP
916 # Tries to parse a repetition, returning the expression string and
917 # AST expression node on success.
71aaa3f7 918 def _try_parse_rep(self):
71aaa3f7
PP
919 # Match prefix
920 if self._try_parse_pat(self._rep_prefix_pat) is None:
921 # No match
2adf4336 922 return
71aaa3f7
PP
923
924 # Expect and return a decimal multiplier
925 self._skip_ws_and_comments()
2adf4336
PP
926
927 # Integer?
928 m = self._try_parse_pat(self._pos_const_int_pat)
929
930 if m is None:
931 # Expression?
932 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
933 # At this point it's invalid
934 self._raise_error("Expecting a positive integral multiplier or `{`")
935
936 # Expect an expression
937 expr_str_loc = self._text_loc
938 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
939 expr_str = self._ast_expr_from_str(m.group(0), expr_str_loc)
940
941 # Expect `}`
942 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
943 expr_str = m.group(0)
944 else:
945 expr_str_loc = self._text_loc
946 expr_str = m.group(0)
947
948 return self._ast_expr_from_str(expr_str, expr_str_loc)
71aaa3f7 949
1ca7b5e1
PP
950 # Tries to parse an item, possibly followed by a repetition,
951 # returning `True` on success.
952 #
953 # Appends any parsed item to `items`.
954 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
955 self._skip_ws_and_comments()
956
957 # Parse a base item
958 item = self._try_parse_base_item()
959
960 if item is None:
961 # No item
1ca7b5e1 962 return False
71aaa3f7
PP
963
964 # Parse repetition if the base item is repeatable
965 if isinstance(item, _RepableItem):
0e8e3169
PP
966 self._skip_ws_and_comments()
967 rep_text_loc = self._text_loc
2adf4336 968 rep_ret = self._try_parse_rep()
71aaa3f7 969
2adf4336
PP
970 if rep_ret is not None:
971 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
71aaa3f7 972
1ca7b5e1
PP
973 items.append(item)
974 return True
71aaa3f7
PP
975
976 # Parses and returns items, skipping whitespaces, insignificant
977 # symbols, and comments when allowed, and stopping at the first
978 # unknown character.
979 def _parse_items(self) -> List[_Item]:
980 items = [] # type: List[_Item]
981
982 while self._isnt_done():
1ca7b5e1
PP
983 # Try to append item
984 if not self._try_append_item(items):
985 # Unknown at this point
986 break
71aaa3f7
PP
987
988 return items
989
990 # Parses the whole Normand input, setting `self._res` to the main
991 # group item on success.
992 def _parse(self):
993 if len(self._normand.strip()) == 0:
994 # Special case to make sure there's something to consume
995 self._res = _Group([], self._text_loc)
996 return
997
998 # Parse first level items
999 items = self._parse_items()
1000
1001 # Make sure there's nothing left
1002 self._skip_ws_and_comments()
1003
1004 if self._isnt_done():
1005 self._raise_error(
1006 "Unexpected character `{}`".format(self._normand[self._at])
1007 )
1008
1009 # Set main group item
1010 self._res = _Group(items, self._text_loc)
1011
1012
1013# The return type of parse().
1014class ParseResult:
1015 @classmethod
1016 def _create(
1017 cls,
1018 data: bytearray,
1b8aa84a
PP
1019 variables: SymbolsT,
1020 labels: SymbolsT,
71aaa3f7
PP
1021 offset: int,
1022 bo: Optional[ByteOrder],
1023 ):
1024 self = cls.__new__(cls)
1025 self._init(data, variables, labels, offset, bo)
1026 return self
1027
1028 def __init__(self, *args, **kwargs): # type: ignore
1029 raise NotImplementedError
1030
1031 def _init(
1032 self,
1033 data: bytearray,
1b8aa84a
PP
1034 variables: SymbolsT,
1035 labels: SymbolsT,
71aaa3f7
PP
1036 offset: int,
1037 bo: Optional[ByteOrder],
1038 ):
1039 self._data = data
1040 self._vars = variables
1041 self._labels = labels
1042 self._offset = offset
1043 self._bo = bo
1044
1045 # Generated data.
1046 @property
1047 def data(self):
1048 return self._data
1049
1050 # Dictionary of updated variable names to their last computed value.
1051 @property
1052 def variables(self):
1053 return self._vars
1054
1055 # Dictionary of updated main group label names to their computed
1056 # value.
1057 @property
1058 def labels(self):
1059 return self._labels
1060
1061 # Updated offset.
1062 @property
1063 def offset(self):
1064 return self._offset
1065
1066 # Updated byte order.
1067 @property
1068 def byte_order(self):
1069 return self._bo
1070
1071
1072# Raises a parse error for the item `item`, creating it using the
1073# message `msg`.
1074def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1075 _raise_error(msg, item.text_loc)
1076
1077
1078# The `ICITTE` reserved name.
1079_icitte_name = "ICITTE"
1080
1081
2adf4336
PP
1082# Base node visitor.
1083#
1084# Calls the _visit_name() method for each name node which isn't the name
1085# of a call.
1086class _NodeVisitor(ast.NodeVisitor):
1087 def __init__(self):
71aaa3f7
PP
1088 self._parent_is_call = False
1089
1090 def generic_visit(self, node: ast.AST):
1091 if type(node) is ast.Call:
1092 self._parent_is_call = True
1093 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1094 self._visit_name(node.id)
71aaa3f7
PP
1095
1096 super().generic_visit(node)
1097 self._parent_is_call = False
1098
2adf4336
PP
1099 @abc.abstractmethod
1100 def _visit_name(self, name: str):
1101 ...
1102
71aaa3f7 1103
2adf4336
PP
1104# Expression validator: validates that all the names within the
1105# expression are allowed.
1106class _ExprValidator(_NodeVisitor):
1107 def __init__(self, item: _ExprItemT, allowed_names: Set[str], icitte_allowed: bool):
1108 super().__init__()
1109 self._item = item
1110 self._allowed_names = allowed_names
1111 self._icitte_allowed = icitte_allowed
1112
1113 def _visit_name(self, name: str):
1114 # Make sure the name refers to a known and reachable
1115 # variable/label name.
1116 if name == _icitte_name and not self._icitte_allowed:
1117 _raise_error(
1118 "Illegal reserved name `{}` in expression `{}`".format(
1119 _icitte_name, self._item.expr_str
1120 ),
1121 self._item.text_loc,
1122 )
1123 elif name != _icitte_name and name not in self._allowed_names:
1124 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1125 name, self._item.expr_str
1126 )
1127
05f81895 1128 allowed_names = self._allowed_names.copy()
2adf4336 1129
05f81895
PP
1130 if self._icitte_allowed:
1131 allowed_names.add(_icitte_name)
2adf4336 1132
05f81895 1133 if len(allowed_names) > 0:
2adf4336
PP
1134 allowed_names_str = ", ".join(
1135 sorted(["`{}`".format(name) for name in allowed_names])
1136 )
1137 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1138
1139 _raise_error(
1140 msg,
1141 self._item.text_loc,
1142 )
1143
1144
1145# Expression visitor getting all the contained names.
1146class _ExprNamesVisitor(_NodeVisitor):
71aaa3f7 1147 def __init__(self):
2adf4336
PP
1148 self._parent_is_call = False
1149 self._names = set() # type: Set[str]
1150
1151 @property
1152 def names(self):
1153 return self._names
71aaa3f7 1154
2adf4336
PP
1155 def _visit_name(self, name: str):
1156 self._names.add(name)
71aaa3f7 1157
71aaa3f7 1158
2adf4336
PP
1159# Generator state.
1160class _GenState:
1161 def __init__(
1b8aa84a
PP
1162 self,
1163 variables: SymbolsT,
1164 labels: SymbolsT,
1165 offset: int,
1166 bo: Optional[ByteOrder],
2adf4336
PP
1167 ):
1168 self.variables = variables.copy()
1169 self.labels = labels.copy()
1170 self.offset = offset
1171 self.bo = bo
71aaa3f7
PP
1172
1173
2adf4336 1174# Generator of data and final state from a group item.
71aaa3f7
PP
1175#
1176# Generation happens in memory at construction time. After building, use
1177# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1178# get the resulting context.
2adf4336
PP
1179#
1180# The steps of generation are:
1181#
05f81895
PP
1182# 1. Validate that each repetition and LEB128 integer expression uses
1183# only reachable names and not `ICITTE`.
2adf4336 1184#
05f81895
PP
1185# 2. Compute and keep the effective repetition count and LEB128 integer
1186# value for each repetition and LEB128 integer instance.
2adf4336
PP
1187#
1188# 3. Generate bytes, updating the initial state as it goes which becomes
1189# the final state after the operation.
1190#
05f81895
PP
1191# During the generation, when handling a `_Rep` or `_Leb128Int` item,
1192# we already have the effective repetition count or value of the
1193# instance.
2adf4336
PP
1194#
1195# When handling a `_Group` item, first update the current labels with
1196# all the immediate (not nested) labels, and then handle each
1197# contained item. This gives contained item access to "future" outer
1198# labels. Then remove the immediate labels from the state so that
1199# outer items don't have access to inner labels.
71aaa3f7
PP
1200class _Gen:
1201 def __init__(
1202 self,
1203 group: _Group,
1b8aa84a
PP
1204 variables: SymbolsT,
1205 labels: SymbolsT,
71aaa3f7
PP
1206 offset: int,
1207 bo: Optional[ByteOrder],
1208 ):
05f81895
PP
1209 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1210 self._vl_instance_vals = self._compute_vl_instance_vals(
2adf4336
PP
1211 group, _GenState(variables, labels, offset, bo)
1212 )
1213 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1214
1215 # Generated bytes.
1216 @property
1217 def data(self):
1218 return self._data
1219
1220 # Updated variables.
1221 @property
1222 def variables(self):
2adf4336 1223 return self._final_state.variables
71aaa3f7
PP
1224
1225 # Updated main group labels.
1226 @property
1227 def labels(self):
2adf4336 1228 return self._final_state.labels
71aaa3f7
PP
1229
1230 # Updated offset.
1231 @property
1232 def offset(self):
2adf4336 1233 return self._final_state.offset
71aaa3f7
PP
1234
1235 # Updated byte order.
1236 @property
1237 def bo(self):
2adf4336
PP
1238 return self._final_state.bo
1239
1240 # Returns the set of used, non-called names within the AST
1241 # expression `expr`.
1242 @staticmethod
1243 def _names_of_expr(expr: ast.Expression):
1244 visitor = _ExprNamesVisitor()
1245 visitor.visit(expr)
1246 return visitor.names
1247
05f81895
PP
1248 # Validates that all the repetition and LEB128 integer expressions
1249 # within `group` don't refer, directly or indirectly, to subsequent
1250 # labels.
71aaa3f7 1251 #
2adf4336
PP
1252 # The strategy here is to keep a set of allowed label names, per
1253 # group, initialized to `allowed_label_names`, and a set of allowed
1254 # variable names initialized to `allowed_variable_names`.
1255 #
1256 # Then, depending on the type of `item`:
1257 #
1258 # `_Label`:
1259 # Add its name to the local allowed label names: a label
1260 # occurring before a repetition, and not within a nested group,
1261 # is always reachable.
1262 #
1263 # `_VarAssign`:
1264 # If all the names within its expression are allowed, then add
1265 # its name to the allowed variable names.
1266 #
1267 # Otherwise, remove its name from the allowed variable names (if
1268 # it's in there): a variable which refers to an unreachable name
1269 # is unreachable itself.
1270 #
05f81895 1271 # `_Rep` and `_Leb128`:
2adf4336
PP
1272 # Make sure all the names within its expression are allowed.
1273 #
1274 # `_Group`:
1275 # Call this function for each contained item with a _copy_ of
1276 # the current allowed label names and the same current allowed
1277 # variable names.
1278 @staticmethod
05f81895 1279 def _validate_vl_exprs(
2adf4336
PP
1280 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1281 ):
1282 if type(item) is _Label:
1283 allowed_label_names.add(item.name)
1284 elif type(item) is _VarAssign:
1285 # Check if this variable name is allowed
1286 allowed = True
1287
1288 for name in _Gen._names_of_expr(item.expr):
1289 if name not in (
1290 allowed_label_names | allowed_variable_names | {_icitte_name}
1291 ):
1292 # Not allowed
1293 allowed = False
1294 break
1295
1296 if allowed:
1297 allowed_variable_names.add(item.name)
1298 elif item.name in allowed_variable_names:
1299 allowed_variable_names.remove(item.name)
05f81895
PP
1300 elif isinstance(item, _Leb128Int):
1301 # Validate the expression (`ICITTE` allowed)
1302 _ExprValidator(
1303 item, allowed_label_names | allowed_variable_names, True
1304 ).visit(item.expr)
71aaa3f7 1305 elif type(item) is _Rep:
05f81895 1306 # Validate the expression first (`ICITTE` not allowed)
2adf4336
PP
1307 _ExprValidator(
1308 item, allowed_label_names | allowed_variable_names, False
1309 ).visit(item.expr)
1310
1311 # Validate inner item
05f81895 1312 _Gen._validate_vl_exprs(
2adf4336
PP
1313 item.item, allowed_variable_names, allowed_label_names
1314 )
1315 elif type(item) is _Group:
1316 # Copy `allowed_label_names` so that this frame cannot
1317 # access the nested label names.
1318 group_allowed_label_names = allowed_label_names.copy()
71aaa3f7 1319
2adf4336 1320 for subitem in item.items:
05f81895 1321 _Gen._validate_vl_exprs(
2adf4336
PP
1322 subitem, allowed_variable_names, group_allowed_label_names
1323 )
71aaa3f7 1324
2adf4336
PP
1325 # Evaluates the expression of `item` considering the current
1326 # generation state `state`.
1327 #
1328 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1329 # for the expression to evaluate.
1330 @staticmethod
1331 def _eval_item_expr(item: _ExprItemT, state: _GenState, allow_icitte: bool):
1332 syms = state.labels.copy()
71aaa3f7 1333
2adf4336
PP
1334 # Set the `ICITTE` name to the current offset, if any
1335 if allow_icitte:
1336 syms[_icitte_name] = state.offset
71aaa3f7
PP
1337
1338 # Add the current variables
2adf4336 1339 syms.update(state.variables)
71aaa3f7
PP
1340
1341 # Validate the node and its children
2adf4336 1342 _ExprValidator(item, set(syms.keys()), True).visit(item.expr)
71aaa3f7
PP
1343
1344 # Compile and evaluate expression node
1345 try:
1346 val = eval(compile(item.expr, "", "eval"), None, syms)
1347 except Exception as exc:
1348 _raise_error_for_item(
1349 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1350 item,
1351 )
1352
1353 # Validate result
1354 if type(val) is not int:
1355 _raise_error_for_item(
2adf4336 1356 "Invalid expression `{}`: expecting result type `int`, not `{}`".format(
71aaa3f7
PP
1357 item.expr_str, type(val).__name__
1358 ),
1359 item,
1360 )
1361
1362 return val
1363
05f81895
PP
1364 # Returns the size, in bytes, required to encode the value `val`
1365 # with LEB128 (signed version if `is_signed` is `True`).
1366 @staticmethod
1367 def _leb128_size_for_val(val: int, is_signed: bool):
1368 if val < 0:
1369 # Equivalent upper bound.
1370 #
1371 # For example, if `val` is -128, then the full integer for
1372 # this number of bits would be [-128, 127].
1373 val = -val - 1
1374
1375 # Number of bits (add one for the sign if needed)
1376 bits = val.bit_length() + int(is_signed)
1377
1378 if bits == 0:
1379 bits = 1
1380
1381 # Seven bits per byte
1382 return math.ceil(bits / 7)
1383
1384 # Computes the effective value for each repetition and LEB128
1385 # integer instance, filling `instance_vals` (if not `None`) and
1386 # returning `instance_vals`.
2adf4336 1387 #
05f81895
PP
1388 # At this point it must be known that, for a given variable-length
1389 # item, its expression only contains reachable names.
2adf4336
PP
1390 #
1391 # When handling a `_Rep` item, this function appends its effective
1392 # multiplier to `instance_vals` _before_ handling its repeated item.
1393 #
05f81895
PP
1394 # When handling a `_VarAssign` item, this function only evaluates it
1395 # if all its names are reachable.
2adf4336 1396 @staticmethod
05f81895 1397 def _compute_vl_instance_vals(
2adf4336
PP
1398 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1399 ):
1400 if instance_vals is None:
1401 instance_vals = []
1402
1403 if isinstance(item, _ScalarItem):
1404 state.offset += item.size
1405 elif type(item) is _Label:
1406 state.labels[item.name] = state.offset
1407 elif type(item) is _VarAssign:
1408 # Check if all the names are reachable
1409 do_eval = True
1410
1411 for name in _Gen._names_of_expr(item.expr):
1412 if (
1413 name != _icitte_name
1414 and name not in state.variables
1415 and name not in state.labels
1416 ):
1417 # A name is unknown: cannot evaluate
1418 do_eval = False
1419 break
1420
1421 if do_eval:
1422 # Evaluate the expression and keep the result
1423 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1424 elif type(item) is _SetOffset:
1425 state.offset = item.val
05f81895
PP
1426 elif isinstance(item, _Leb128Int):
1427 # Evaluate the expression
1428 val = _Gen._eval_item_expr(item, state, True)
1429
1430 # Validate result
1431 if type(item) is _ULeb128Int and val < 0:
1432 _raise_error_for_item(
1433 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1434 item.expr_str, val
1435 ),
1436 item,
1437 )
1438
1439 # Add the evaluation result to the to variable-length item
1440 # instance values.
1441 instance_vals.append(val)
1442
1443 # Update offset
1444 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
2adf4336
PP
1445 elif type(item) is _Rep:
1446 # Evaluate the expression and keep the result
1447 val = _Gen._eval_item_expr(item, state, False)
1448
1449 # Validate result
1450 if val < 0:
1451 _raise_error_for_item(
1452 "Invalid expression `{}`: unexpected negative result {:,}".format(
1453 item.expr_str, val
1454 ),
1455 item,
1456 )
1457
1458 # Add to repetition instance values
1459 instance_vals.append(val)
1460
1461 # Process the repeated item `val` times
1462 for _ in range(val):
05f81895 1463 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
2adf4336
PP
1464 elif type(item) is _Group:
1465 prev_labels = state.labels.copy()
1466
1467 # Process each item
1468 for subitem in item.items:
05f81895 1469 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
2adf4336
PP
1470
1471 state.labels = prev_labels
1472
1473 return instance_vals
1474
05f81895
PP
1475 def _zero_item_size(self, item: _Item, next_vl_instance: int):
1476 return 0, next_vl_instance
1477
1478 def _scalar_item_size(self, item: _ScalarItem, next_vl_instance: int):
1479 return item.size, next_vl_instance
2adf4336 1480
05f81895
PP
1481 def _leb128_int_item_size(self, item: _Leb128Int, next_vl_instance: int):
1482 # Get the value from `self._vl_instance_vals` _before_
1483 # incrementing `next_vl_instance` to honor the order of
1484 # _compute_vl_instance_vals().
1485 return (
1486 self._leb128_size_for_val(
1487 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1488 ),
1489 next_vl_instance + 1,
1490 )
2adf4336 1491
05f81895 1492 def _group_item_size(self, item: _Group, next_vl_instance: int):
2adf4336
PP
1493 size = 0
1494
1495 for subitem in item.items:
05f81895 1496 subitem_size, next_vl_instance = self._item_size(subitem, next_vl_instance)
2adf4336
PP
1497 size += subitem_size
1498
05f81895 1499 return size, next_vl_instance
2adf4336 1500
05f81895
PP
1501 def _rep_item_size(self, item: _Rep, next_vl_instance: int):
1502 # Get the value from `self._vl_instance_vals` _before_
1503 # incrementing `next_vl_instance` to honor the order of
1504 # _compute_vl_instance_vals().
1505 mul = self._vl_instance_vals[next_vl_instance]
1506 next_vl_instance += 1
2adf4336
PP
1507 size = 0
1508
1509 for _ in range(mul):
05f81895 1510 iter_size, next_vl_instance = self._item_size(item.item, next_vl_instance)
2adf4336
PP
1511 size += iter_size
1512
05f81895 1513 return size, next_vl_instance
2adf4336
PP
1514
1515 # Returns the size of `item` and the new next repetition instance.
05f81895
PP
1516 def _item_size(self, item: _Item, next_vl_instance: int):
1517 return self._item_size_funcs[type(item)](item, next_vl_instance)
2adf4336
PP
1518
1519 # Handles the byte item `item`.
05f81895 1520 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
2adf4336
PP
1521 self._data.append(item.val)
1522 state.offset += item.size
05f81895 1523 return next_vl_instance
2adf4336
PP
1524
1525 # Handles the string item `item`.
05f81895 1526 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
2adf4336
PP
1527 self._data += item.data
1528 state.offset += item.size
05f81895 1529 return next_vl_instance
2adf4336
PP
1530
1531 # Handles the byte order setting item `item`.
1532 def _handle_set_bo_item(
05f81895 1533 self, item: _SetBo, state: _GenState, next_vl_instance: int
2adf4336
PP
1534 ):
1535 # Update current byte order
1536 state.bo = item.bo
05f81895 1537 return next_vl_instance
2adf4336
PP
1538
1539 # Handles the variable assignment item `item`.
1540 def _handle_var_assign_item(
05f81895 1541 self, item: _VarAssign, state: _GenState, next_vl_instance: int
2adf4336 1542 ):
71aaa3f7 1543 # Update variable
2adf4336 1544 state.variables[item.name] = self._eval_item_expr(item, state, True)
05f81895 1545 return next_vl_instance
71aaa3f7 1546
05f81895
PP
1547 # Handles the fixed-length integer item `item`.
1548 def _handle_fl_int_item(
1549 self, item: _FlInt, state: _GenState, next_vl_instance: int
1550 ):
71aaa3f7 1551 # Compute value
2adf4336 1552 val = self._eval_item_expr(item, state, True)
71aaa3f7
PP
1553
1554 # Validate range
1555 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1556 _raise_error_for_item(
1557 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
2adf4336 1558 val, item.len, item.expr_str, state.offset
71aaa3f7
PP
1559 ),
1560 item,
1561 )
1562
1563 # Encode result on 64 bits (to extend the sign bit whatever the
1564 # value of `item.len`).
2adf4336 1565 if state.bo is None and item.len > 8:
71aaa3f7
PP
1566 _raise_error_for_item(
1567 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1568 item.expr_str
1569 ),
1570 item,
1571 )
1572
1573 data = struct.pack(
1574 "{}{}".format(
2adf4336 1575 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
1576 "Q" if val >= 0 else "q",
1577 ),
1578 val,
1579 )
1580
1581 # Keep only the requested length
1582 len_bytes = item.len // 8
1583
2adf4336 1584 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
1585 # Big endian: keep last bytes
1586 data = data[-len_bytes:]
1587 else:
1588 # Little endian: keep first bytes
2adf4336 1589 assert state.bo == ByteOrder.LE
71aaa3f7
PP
1590 data = data[:len_bytes]
1591
1592 # Append to current bytes and update offset
1593 self._data += data
2adf4336 1594 state.offset += len(data)
05f81895
PP
1595 return next_vl_instance
1596
1597 # Handles the LEB128 integer item `item`.
1598 def _handle_leb128_int_item(
1599 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1600 ):
1601 # Get the precomputed value
1602 val = self._vl_instance_vals[next_vl_instance]
1603
1604 # Size in bytes
1605 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1606
1607 # For each byte
1608 for _ in range(size):
1609 # Seven LSBs, MSB of the byte set (continue)
1610 self._data.append((val & 0x7F) | 0x80)
1611 val >>= 7
1612
1613 # Clear MSB of last byte (stop)
1614 self._data[-1] &= ~0x80
1615
1616 # Consumed this instance
1617 return next_vl_instance + 1
71aaa3f7 1618
2adf4336
PP
1619 # Handles the group item `item`, only removing the immediate labels
1620 # from `state.labels` if `remove_immediate_labels` is `True`.
1621 def _handle_group_item(
1622 self,
1623 item: _Group,
1624 state: _GenState,
05f81895 1625 next_vl_instance: int,
2adf4336
PP
1626 remove_immediate_labels: bool = True,
1627 ):
1628 # Compute the values of the immediate (not nested) labels. Those
1629 # labels are reachable by any expression within the group.
1630 offset = state.offset
1631 immediate_label_names = set() # type: Set[str]
05f81895 1632 tmp_next_vl_instance = next_vl_instance
71aaa3f7 1633
2adf4336
PP
1634 for subitem in item.items:
1635 if type(subitem) is _SetOffset:
1636 # Update offset
1637 offset = subitem.val
1638 elif type(subitem) is _Label:
1639 # New immediate label
1640 state.labels[subitem.name] = offset
1641 immediate_label_names.add(subitem.name)
1642
05f81895
PP
1643 subitem_size, tmp_next_vl_instance = self._item_size(
1644 subitem, tmp_next_vl_instance
2adf4336
PP
1645 )
1646 offset += subitem_size
71aaa3f7 1647
2adf4336 1648 # Handle each item now with the actual state
71aaa3f7 1649 for subitem in item.items:
05f81895 1650 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
2adf4336
PP
1651
1652 # Remove immediate labels if required so that outer items won't
1653 # reach inner labels.
1654 if remove_immediate_labels:
1655 for name in immediate_label_names:
1656 del state.labels[name]
71aaa3f7 1657
05f81895 1658 return next_vl_instance
71aaa3f7 1659
2adf4336 1660 # Handles the repetition item `item`.
05f81895
PP
1661 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1662 # Get the precomputed repetition count
1663 mul = self._vl_instance_vals[next_vl_instance]
1664
1665 # Consumed this instance
1666 next_vl_instance += 1
71aaa3f7 1667
2adf4336 1668 for _ in range(mul):
05f81895 1669 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
71aaa3f7 1670
05f81895 1671 return next_vl_instance
71aaa3f7 1672
2adf4336
PP
1673 # Handles the offset setting item `item`.
1674 def _handle_set_offset_item(
05f81895 1675 self, item: _SetOffset, state: _GenState, next_vl_instance: int
2adf4336
PP
1676 ):
1677 state.offset = item.val
05f81895 1678 return next_vl_instance
2adf4336
PP
1679
1680 # Handles the label item `item`.
05f81895
PP
1681 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1682 return next_vl_instance
2adf4336
PP
1683
1684 # Handles the item `item`, returning the updated next repetition
1685 # instance.
05f81895
PP
1686 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1687 return self._item_handlers[type(item)](item, state, next_vl_instance)
2adf4336
PP
1688
1689 # Generates the data (`self._data`) and final state
1690 # (`self._final_state`) from `group` and the initial state `state`.
1691 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
1692 # Initial state
1693 self._data = bytearray()
71aaa3f7
PP
1694
1695 # Item handlers
1696 self._item_handlers = {
1697 _Byte: self._handle_byte_item,
05f81895 1698 _FlInt: self._handle_fl_int_item,
71aaa3f7 1699 _Group: self._handle_group_item,
2adf4336 1700 _Label: self._handle_label_item,
71aaa3f7 1701 _Rep: self._handle_rep_item,
2adf4336
PP
1702 _SetBo: self._handle_set_bo_item,
1703 _SetOffset: self._handle_set_offset_item,
05f81895 1704 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 1705 _Str: self._handle_str_item,
05f81895 1706 _ULeb128Int: self._handle_leb128_int_item,
2adf4336
PP
1707 _VarAssign: self._handle_var_assign_item,
1708 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1709
1710 # Item size getters
1711 self._item_size_funcs = {
1712 _Byte: self._scalar_item_size,
05f81895 1713 _FlInt: self._scalar_item_size,
2adf4336
PP
1714 _Group: self._group_item_size,
1715 _Label: self._zero_item_size,
1716 _Rep: self._rep_item_size,
1717 _SetBo: self._zero_item_size,
1718 _SetOffset: self._zero_item_size,
05f81895 1719 _SLeb128Int: self._leb128_int_item_size,
2adf4336 1720 _Str: self._scalar_item_size,
05f81895 1721 _ULeb128Int: self._leb128_int_item_size,
2adf4336
PP
1722 _VarAssign: self._zero_item_size,
1723 } # type: Dict[type, Callable[[Any, int], Tuple[int, int]]]
1724
1725 # Handle the group item, _not_ removing the immediate labels
1726 # because the `labels` property offers them.
1727 self._handle_group_item(group, state, 0, False)
1728
1729 # This is actually the final state
1730 self._final_state = state
71aaa3f7
PP
1731
1732
1733# Returns a `ParseResult` instance containing the bytes encoded by the
1734# input string `normand`.
1735#
1736# `init_variables` is a dictionary of initial variable names (valid
1737# Python names) to integral values. A variable name must not be the
1738# reserved name `ICITTE`.
1739#
1740# `init_labels` is a dictionary of initial label names (valid Python
1741# names) to integral values. A label name must not be the reserved name
1742# `ICITTE`.
1743#
1744# `init_offset` is the initial offset.
1745#
1746# `init_byte_order` is the initial byte order.
1747#
1748# Raises `ParseError` on any parsing error.
1749def parse(
1750 normand: str,
1b8aa84a
PP
1751 init_variables: Optional[SymbolsT] = None,
1752 init_labels: Optional[SymbolsT] = None,
71aaa3f7
PP
1753 init_offset: int = 0,
1754 init_byte_order: Optional[ByteOrder] = None,
1755):
1756 if init_variables is None:
1757 init_variables = {}
1758
1759 if init_labels is None:
1760 init_labels = {}
1761
1762 gen = _Gen(
1763 _Parser(normand, init_variables, init_labels).res,
1764 init_variables,
1765 init_labels,
1766 init_offset,
1767 init_byte_order,
1768 )
1769 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1770 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1771 )
1772
1773
1774# Parses the command-line arguments.
1775def _parse_cli_args():
1776 import argparse
1777
1778 # Build parser
1779 ap = argparse.ArgumentParser()
1780 ap.add_argument(
1781 "--offset",
1782 metavar="OFFSET",
1783 action="store",
1784 type=int,
1785 default=0,
1786 help="initial offset (positive)",
1787 )
1788 ap.add_argument(
1789 "-b",
1790 "--byte-order",
1791 metavar="BO",
1792 choices=["be", "le"],
1793 type=str,
1794 help="initial byte order (`be` or `le`)",
1795 )
1796 ap.add_argument(
1797 "--var",
1798 metavar="NAME=VAL",
1799 action="append",
1800 help="add an initial variable (may be repeated)",
1801 )
1802 ap.add_argument(
1803 "-l",
1804 "--label",
1805 metavar="NAME=VAL",
1806 action="append",
1807 help="add an initial label (may be repeated)",
1808 )
1809 ap.add_argument(
1810 "--version", action="version", version="Normand {}".format(__version__)
1811 )
1812 ap.add_argument(
1813 "path",
1814 metavar="PATH",
1815 action="store",
1816 nargs="?",
1817 help="input path (none means standard input)",
1818 )
1819
1820 # Parse
1821 return ap.parse_args()
1822
1823
1824# Raises a command-line error with the message `msg`.
1825def _raise_cli_error(msg: str) -> NoReturn:
1826 raise RuntimeError("Command-line error: {}".format(msg))
1827
1828
1829# Returns a dictionary of string to integers from the list of strings
1830# `args` containing `NAME=VAL` entries.
1831def _dict_from_arg(args: Optional[List[str]]):
1832 d = {} # type: Dict[str, int]
1833
1834 if args is None:
1835 return d
1836
1837 for arg in args:
1838 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1839
1840 if m is None:
1841 _raise_cli_error("Invalid assignment {}".format(arg))
1842
2e1c1acd
PP
1843 d[m.group(1)] = int(m.group(2))
1844
71aaa3f7
PP
1845 return d
1846
1847
1848# CLI entry point without exception handling.
1849def _try_run_cli():
1850 import os.path
1851
1852 # Parse arguments
1853 args = _parse_cli_args()
1854
1855 # Read input
1856 if args.path is None:
1857 normand = sys.stdin.read()
1858 else:
1859 with open(args.path) as f:
1860 normand = f.read()
1861
1862 # Variables and labels
1863 variables = _dict_from_arg(args.var)
1864 labels = _dict_from_arg(args.label)
1865
1866 # Validate offset
1867 if args.offset < 0:
1868 _raise_cli_error("Invalid negative offset {}")
1869
1870 # Validate and set byte order
1871 bo = None # type: Optional[ByteOrder]
1872
1873 if args.byte_order is not None:
1874 if args.byte_order == "be":
1875 bo = ByteOrder.BE
1876 else:
1877 assert args.byte_order == "le"
1878 bo = ByteOrder.LE
1879
1880 # Parse
1881 try:
1882 res = parse(normand, variables, labels, args.offset, bo)
1883 except ParseError as exc:
1884 prefix = ""
1885
1886 if args.path is not None:
1887 prefix = "{}:".format(os.path.abspath(args.path))
1888
1889 _fail(
1890 "{}{}:{} - {}".format(
1891 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1892 )
1893 )
1894
1895 # Print
1896 sys.stdout.buffer.write(res.data)
1897
1898
1899# Prints the exception message `msg` and exits with status 1.
1900def _fail(msg: str) -> NoReturn:
1901 if not msg.endswith("."):
1902 msg += "."
1903
1904 print(msg, file=sys.stderr)
1905 sys.exit(1)
1906
1907
1908# CLI entry point.
1909def _run_cli():
1910 try:
1911 _try_run_cli()
1912 except Exception as exc:
1913 _fail(str(exc))
1914
1915
1916if __name__ == "__main__":
1917 _run_cli()
This page took 0.099176 seconds and 4 git commands to generate.