Add LEB128 integer support
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
05f81895 33__version__ = "0.4.0"
71aaa3f7
PP
34__all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "VarsT",
41 "__author__",
42 "__version__",
43]
44
45import re
46import abc
47import ast
48import sys
49import enum
05f81895 50import math
71aaa3f7 51import struct
2adf4336
PP
52from typing import (
53 Any,
54 Set,
55 Dict,
56 List,
57 Tuple,
58 Union,
59 Pattern,
60 Callable,
61 NoReturn,
62 Optional,
63)
71aaa3f7
PP
64
65
66# Text location (line and column numbers).
67class TextLoc:
68 @classmethod
69 def _create(cls, line_no: int, col_no: int):
70 self = cls.__new__(cls)
71 self._init(line_no, col_no)
72 return self
73
74 def __init__(*args, **kwargs): # type: ignore
75 raise NotImplementedError
76
77 def _init(self, line_no: int, col_no: int):
78 self._line_no = line_no
79 self._col_no = col_no
80
81 # Line number.
82 @property
83 def line_no(self):
84 return self._line_no
85
86 # Column number.
87 @property
88 def col_no(self):
89 return self._col_no
90
2adf4336
PP
91 def __repr__(self):
92 return "TextLoc({}, {})".format(self._line_no, self._col_no)
93
71aaa3f7
PP
94
95# Any item.
96class _Item:
97 def __init__(self, text_loc: TextLoc):
98 self._text_loc = text_loc
99
100 # Source text location.
101 @property
102 def text_loc(self):
103 return self._text_loc
104
2adf4336
PP
105
106# Scalar item.
107class _ScalarItem(_Item):
71aaa3f7
PP
108 # Returns the size, in bytes, of this item.
109 @property
110 @abc.abstractmethod
111 def size(self) -> int:
112 ...
113
114
115# A repeatable item.
2adf4336 116class _RepableItem:
71aaa3f7
PP
117 pass
118
119
120# Single byte.
2adf4336 121class _Byte(_ScalarItem, _RepableItem):
71aaa3f7
PP
122 def __init__(self, val: int, text_loc: TextLoc):
123 super().__init__(text_loc)
124 self._val = val
125
126 # Byte value.
127 @property
128 def val(self):
129 return self._val
130
131 @property
132 def size(self):
133 return 1
134
135 def __repr__(self):
136 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
137
138
139# String.
2adf4336 140class _Str(_ScalarItem, _RepableItem):
71aaa3f7
PP
141 def __init__(self, data: bytes, text_loc: TextLoc):
142 super().__init__(text_loc)
143 self._data = data
144
145 # Encoded bytes.
146 @property
147 def data(self):
148 return self._data
149
150 @property
151 def size(self):
152 return len(self._data)
153
154 def __repr__(self):
155 return "_Str({}, {})".format(repr(self._data), self._text_loc)
156
157
158# Byte order.
159@enum.unique
160class ByteOrder(enum.Enum):
161 # Big endian.
162 BE = "be"
163
164 # Little endian.
165 LE = "le"
166
167
2adf4336
PP
168# Byte order setting.
169class _SetBo(_Item):
0e8e3169
PP
170 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
171 super().__init__(text_loc)
71aaa3f7
PP
172 self._bo = bo
173
174 @property
175 def bo(self):
176 return self._bo
177
2adf4336
PP
178 def __repr__(self):
179 return "_SetBo({}, {})".format(repr(self._bo), self._text_loc)
71aaa3f7
PP
180
181
182# Label.
183class _Label(_Item):
184 def __init__(self, name: str, text_loc: TextLoc):
185 super().__init__(text_loc)
186 self._name = name
187
188 # Label name.
189 @property
190 def name(self):
191 return self._name
192
71aaa3f7
PP
193 def __repr__(self):
194 return "_Label({}, {})".format(repr(self._name), self._text_loc)
195
196
2adf4336
PP
197# Offset setting.
198class _SetOffset(_Item):
71aaa3f7
PP
199 def __init__(self, val: int, text_loc: TextLoc):
200 super().__init__(text_loc)
201 self._val = val
202
203 # Offset value.
204 @property
205 def val(self):
206 return self._val
207
71aaa3f7 208 def __repr__(self):
2adf4336 209 return "_SetOffset({}, {})".format(repr(self._val), self._text_loc)
71aaa3f7
PP
210
211
212# Mixin of containing an AST expression and its string.
213class _ExprMixin:
214 def __init__(self, expr_str: str, expr: ast.Expression):
215 self._expr_str = expr_str
216 self._expr = expr
217
218 # Expression string.
219 @property
220 def expr_str(self):
221 return self._expr_str
222
223 # Expression node to evaluate.
224 @property
225 def expr(self):
226 return self._expr
227
228
2adf4336
PP
229# Variable assignment.
230class _VarAssign(_Item, _ExprMixin):
71aaa3f7
PP
231 def __init__(
232 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
233 ):
234 super().__init__(text_loc)
235 _ExprMixin.__init__(self, expr_str, expr)
236 self._name = name
237
238 # Name.
239 @property
240 def name(self):
241 return self._name
242
71aaa3f7 243 def __repr__(self):
2adf4336 244 return "_VarAssign({}, {}, {}, {})".format(
71aaa3f7
PP
245 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
246 )
247
248
05f81895
PP
249# Fixed-length integer, possibly needing more than one byte.
250class _FlInt(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7
PP
251 def __init__(
252 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
253 ):
254 super().__init__(text_loc)
255 _ExprMixin.__init__(self, expr_str, expr)
256 self._len = len
257
258 # Length (bits).
259 @property
260 def len(self):
261 return self._len
262
263 @property
264 def size(self):
265 return self._len // 8
266
267 def __repr__(self):
05f81895 268 return "_FlInt({}, {}, {}, {})".format(
71aaa3f7
PP
269 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
270 )
271
272
05f81895
PP
273# LEB128 integer.
274class _Leb128Int(_Item, _RepableItem, _ExprMixin):
275 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLoc):
276 super().__init__(text_loc)
277 _ExprMixin.__init__(self, expr_str, expr)
278
279 def __repr__(self):
280 return "{}({}, {}, {})".format(
281 self.__class__.__name__,
282 repr(self._expr_str),
283 repr(self._expr),
284 self._text_loc,
285 )
286
287
288# Unsigned LEB128 integer.
289class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
290 pass
291
292
293# Signed LEB128 integer.
294class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
295 pass
296
297
71aaa3f7 298# Group of items.
2adf4336 299class _Group(_Item, _RepableItem):
71aaa3f7
PP
300 def __init__(self, items: List[_Item], text_loc: TextLoc):
301 super().__init__(text_loc)
302 self._items = items
71aaa3f7
PP
303
304 # Contained items.
305 @property
306 def items(self):
307 return self._items
308
71aaa3f7
PP
309 def __repr__(self):
310 return "_Group({}, {})".format(repr(self._items), self._text_loc)
311
312
313# Repetition item.
2adf4336
PP
314class _Rep(_Item, _ExprMixin):
315 def __init__(
316 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLoc
317 ):
71aaa3f7 318 super().__init__(text_loc)
2adf4336 319 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 320 self._item = item
71aaa3f7
PP
321
322 # Item to repeat.
323 @property
324 def item(self):
325 return self._item
326
71aaa3f7 327 def __repr__(self):
2adf4336
PP
328 return "_Rep({}, {}, {}, {})".format(
329 repr(self._item), repr(self._expr_str), repr(self._expr), self._text_loc
71aaa3f7
PP
330 )
331
332
2adf4336 333# Expression item type.
05f81895 334_ExprItemT = Union[_FlInt, _Leb128Int, _VarAssign, _Rep]
2adf4336
PP
335
336
71aaa3f7
PP
337# A parsing error containing a message and a text location.
338class ParseError(RuntimeError):
339 @classmethod
340 def _create(cls, msg: str, text_loc: TextLoc):
341 self = cls.__new__(cls)
342 self._init(msg, text_loc)
343 return self
344
345 def __init__(self, *args, **kwargs): # type: ignore
346 raise NotImplementedError
347
348 def _init(self, msg: str, text_loc: TextLoc):
349 super().__init__(msg)
350 self._text_loc = text_loc
351
352 # Source text location.
353 @property
354 def text_loc(self):
355 return self._text_loc
356
357
358# Raises a parsing error, forwarding the parameters to the constructor.
359def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
360 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
361
362
2adf4336 363# Variable/label dictionary type.
71aaa3f7
PP
364VarsT = Dict[str, int]
365
366
367# Python name pattern.
368_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
369
370
371# Normand parser.
372#
373# The constructor accepts a Normand input. After building, use the `res`
374# property to get the resulting main group.
375class _Parser:
376 # Builds a parser to parse the Normand input `normand`, parsing
377 # immediately.
378 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
379 self._normand = normand
380 self._at = 0
381 self._line_no = 1
382 self._col_no = 1
383 self._label_names = set(labels.keys())
384 self._var_names = set(variables.keys())
385 self._parse()
386
387 # Result (main group).
388 @property
389 def res(self):
390 return self._res
391
392 # Current text location.
393 @property
394 def _text_loc(self):
395 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
396 self._line_no, self._col_no
397 )
398
399 # Returns `True` if this parser is done parsing.
400 def _is_done(self):
401 return self._at == len(self._normand)
402
403 # Returns `True` if this parser isn't done parsing.
404 def _isnt_done(self):
405 return not self._is_done()
406
407 # Raises a parse error, creating it using the message `msg` and the
408 # current text location.
409 def _raise_error(self, msg: str) -> NoReturn:
410 _raise_error(msg, self._text_loc)
411
412 # Tries to make the pattern `pat` match the current substring,
413 # returning the match object and updating `self._at`,
414 # `self._line_no`, and `self._col_no` on success.
415 def _try_parse_pat(self, pat: Pattern[str]):
416 m = pat.match(self._normand, self._at)
417
418 if m is None:
419 return
420
421 # Skip matched string
422 self._at += len(m.group(0))
423
424 # Update line number
425 self._line_no += m.group(0).count("\n")
426
427 # Update column number
428 for i in reversed(range(self._at)):
429 if self._normand[i] == "\n" or i == 0:
430 if i == 0:
431 self._col_no = self._at + 1
432 else:
433 self._col_no = self._at - i
434
435 break
436
437 # Return match object
438 return m
439
440 # Expects the pattern `pat` to match the current substring,
441 # returning the match object and updating `self._at`,
442 # `self._line_no`, and `self._col_no` on success, or raising a parse
443 # error with the message `error_msg` on error.
444 def _expect_pat(self, pat: Pattern[str], error_msg: str):
445 # Match
446 m = self._try_parse_pat(pat)
447
448 if m is None:
449 # No match: error
450 self._raise_error(error_msg)
451
452 # Return match object
453 return m
454
455 # Pattern for _skip_ws_and_comments()
456 _ws_or_syms_or_comments_pat = re.compile(
457 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
458 )
459
460 # Skips as many whitespaces, insignificant symbol characters, and
461 # comments as possible.
462 def _skip_ws_and_comments(self):
463 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
464
465 # Pattern for _try_parse_hex_byte()
466 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
467
468 # Tries to parse a hexadecimal byte, returning a byte item on
469 # success.
470 def _try_parse_hex_byte(self):
0e8e3169
PP
471 begin_text_loc = self._text_loc
472
71aaa3f7
PP
473 # Match initial nibble
474 m_high = self._try_parse_pat(self._nibble_pat)
475
476 if m_high is None:
477 # No match
478 return
479
480 # Expect another nibble
481 self._skip_ws_and_comments()
482 m_low = self._expect_pat(
483 self._nibble_pat, "Expecting another hexadecimal nibble"
484 )
485
486 # Return item
0e8e3169 487 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
488
489 # Patterns for _try_parse_bin_byte()
490 _bin_byte_bit_pat = re.compile(r"[01]")
491 _bin_byte_prefix_pat = re.compile(r"%")
492
493 # Tries to parse a binary byte, returning a byte item on success.
494 def _try_parse_bin_byte(self):
0e8e3169
PP
495 begin_text_loc = self._text_loc
496
71aaa3f7
PP
497 # Match prefix
498 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
499 # No match
500 return
501
502 # Expect eight bits
503 bits = [] # type: List[str]
504
505 for _ in range(8):
506 self._skip_ws_and_comments()
507 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
508 bits.append(m.group(0))
509
510 # Return item
0e8e3169 511 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
512
513 # Patterns for _try_parse_dec_byte()
514 _dec_byte_prefix_pat = re.compile(r"\$\s*")
515 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
516
517 # Tries to parse a decimal byte, returning a byte item on success.
518 def _try_parse_dec_byte(self):
0e8e3169
PP
519 begin_text_loc = self._text_loc
520
71aaa3f7
PP
521 # Match prefix
522 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
523 # No match
524 return
525
526 # Expect the value
527 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
528
529 # Compute value
530 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
531
532 # Validate
533 if val < -128 or val > 255:
0e8e3169 534 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
535
536 # Two's complement
05f81895 537 val %= 256
71aaa3f7
PP
538
539 # Return item
0e8e3169 540 return _Byte(val, begin_text_loc)
71aaa3f7
PP
541
542 # Tries to parse a byte, returning a byte item on success.
543 def _try_parse_byte(self):
544 # Hexadecimal
545 item = self._try_parse_hex_byte()
546
547 if item is not None:
548 return item
549
550 # Binary
551 item = self._try_parse_bin_byte()
552
553 if item is not None:
554 return item
555
556 # Decimal
557 item = self._try_parse_dec_byte()
558
559 if item is not None:
560 return item
561
562 # Patterns for _try_parse_str()
563 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
564 _str_suffix_pat = re.compile(r'"')
565 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
566
567 # Strings corresponding to escape sequence characters
568 _str_escape_seq_strs = {
569 "0": "\0",
570 "a": "\a",
571 "b": "\b",
572 "e": "\x1b",
573 "f": "\f",
574 "n": "\n",
575 "r": "\r",
576 "t": "\t",
577 "v": "\v",
578 "\\": "\\",
579 '"': '"',
580 }
581
582 # Tries to parse a string, returning a string item on success.
583 def _try_parse_str(self):
0e8e3169
PP
584 begin_text_loc = self._text_loc
585
71aaa3f7
PP
586 # Match prefix
587 m = self._try_parse_pat(self._str_prefix_pat)
588
589 if m is None:
590 # No match
591 return
592
593 # Get encoding
594 encoding = "utf8"
595
596 if m.group("len") is not None:
597 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
598
599 # Actual string
600 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
601
602 # Expect end of string
603 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
604
605 # Replace escape sequences
606 val = m.group(0)
607
608 for ec in '0abefnrtv"\\':
609 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
610
611 # Encode
612 data = val.encode(encoding)
613
614 # Return item
0e8e3169 615 return _Str(data, begin_text_loc)
71aaa3f7
PP
616
617 # Patterns for _try_parse_group()
618 _group_prefix_pat = re.compile(r"\(")
619 _group_suffix_pat = re.compile(r"\)")
620
621 # Tries to parse a group, returning a group item on success.
622 def _try_parse_group(self):
0e8e3169
PP
623 begin_text_loc = self._text_loc
624
71aaa3f7
PP
625 # Match prefix
626 if self._try_parse_pat(self._group_prefix_pat) is None:
627 # No match
628 return
629
630 # Parse items
631 items = self._parse_items()
632
633 # Expect end of group
634 self._skip_ws_and_comments()
635 self._expect_pat(
636 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
637 )
638
639 # Return item
0e8e3169 640 return _Group(items, begin_text_loc)
71aaa3f7
PP
641
642 # Returns a stripped expression string and an AST expression node
643 # from the expression string `expr_str` at text location `text_loc`.
644 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
645 # Create an expression node from the expression string
646 expr_str = expr_str.strip().replace("\n", " ")
647
648 try:
649 expr = ast.parse(expr_str, mode="eval")
650 except SyntaxError:
651 _raise_error(
652 "Invalid expression `{}`: invalid syntax".format(expr_str),
653 text_loc,
654 )
655
656 return expr_str, expr
657
05f81895
PP
658 # Patterns for _try_parse_val_and_attr()
659 _val_expr_pat = re.compile(r"([^}:]+):\s*")
660 _fl_int_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
661 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 662
05f81895
PP
663 # Tries to parse a value and attribute (fixed length in bits or
664 # `leb128`), returning a value item on success.
665 def _try_parse_val_and_attr(self):
71aaa3f7
PP
666 begin_text_loc = self._text_loc
667
668 # Match
669 m_expr = self._try_parse_pat(self._val_expr_pat)
670
671 if m_expr is None:
672 # No match
673 return
674
71aaa3f7
PP
675 # Create an expression node from the expression string
676 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
677
05f81895
PP
678 # Length?
679 m_attr = self._try_parse_pat(self._fl_int_len_attr_pat)
680
681 if m_attr is None:
682 # LEB128?
683 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
684
685 if m_attr is None:
686 # At this point it's invalid
687 self._raise_error(
688 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
689 )
690
691 # Return LEB128 integer item
692 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
693 return cls(expr_str, expr, begin_text_loc)
694 else:
695 # Return fixed-length integer item
696 return _FlInt(
697 expr_str,
698 expr,
699 int(m_attr.group(0)),
700 begin_text_loc,
701 )
71aaa3f7 702
05f81895 703 # Patterns for _try_parse_val_and_attr()
2adf4336 704 _var_assign_pat = re.compile(
71aaa3f7
PP
705 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
706 )
707
2adf4336
PP
708 # Tries to parse a variable assignment, returning a variable
709 # assignment item on success.
710 def _try_parse_var_assign(self):
71aaa3f7
PP
711 begin_text_loc = self._text_loc
712
713 # Match
2adf4336 714 m = self._try_parse_pat(self._var_assign_pat)
71aaa3f7
PP
715
716 if m is None:
717 # No match
718 return
719
720 # Validate name
721 name = m.group("name")
722
723 if name == _icitte_name:
0e8e3169
PP
724 _raise_error(
725 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
726 )
71aaa3f7
PP
727
728 if name in self._label_names:
0e8e3169 729 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
730
731 # Add to known variable names
732 self._var_names.add(name)
733
734 # Create an expression node from the expression string
735 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
736
737 # Return item
2adf4336 738 return _VarAssign(
71aaa3f7
PP
739 name,
740 expr_str,
741 expr,
0e8e3169 742 begin_text_loc,
71aaa3f7
PP
743 )
744
2adf4336 745 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
746 _bo_pat = re.compile(r"[bl]e")
747
2adf4336
PP
748 # Tries to parse a byte order name, returning a byte order setting
749 # item on success.
750 def _try_parse_set_bo(self):
0e8e3169
PP
751 begin_text_loc = self._text_loc
752
71aaa3f7
PP
753 # Match
754 m = self._try_parse_pat(self._bo_pat)
755
756 if m is None:
757 # No match
758 return
759
760 # Return corresponding item
761 if m.group(0) == "be":
2adf4336 762 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
763 else:
764 assert m.group(0) == "le"
2adf4336 765 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
766
767 # Patterns for _try_parse_val_or_bo()
2adf4336
PP
768 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
769 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
71aaa3f7 770
2adf4336
PP
771 # Tries to parse a value, a variable assignment, or a byte order
772 # setting, returning an item on success.
773 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 774 # Match prefix
2adf4336 775 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
776 # No match
777 return
778
2adf4336
PP
779 # Variable assignment item?
780 item = self._try_parse_var_assign()
71aaa3f7
PP
781
782 if item is None:
05f81895
PP
783 # Fixed-length value item?
784 item = self._try_parse_val_and_attr()
71aaa3f7
PP
785
786 if item is None:
2adf4336
PP
787 # Byte order setting item?
788 item = self._try_parse_set_bo()
71aaa3f7
PP
789
790 if item is None:
791 # At this point it's invalid
2adf4336 792 self._raise_error(
05f81895 793 "Expecting a fixed-length integer, a variable assignment, or a byte order setting"
2adf4336 794 )
71aaa3f7
PP
795
796 # Expect suffix
2adf4336 797 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
798 return item
799
2adf4336 800 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
71aaa3f7
PP
801 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
802
2adf4336
PP
803 # Tries to parse an offset setting value (after the initial `<`),
804 # returning an offset item on success.
805 def _try_parse_set_offset_val(self):
0e8e3169
PP
806 begin_text_loc = self._text_loc
807
71aaa3f7
PP
808 # Match
809 m = self._try_parse_pat(self._pos_const_int_pat)
810
811 if m is None:
812 # No match
813 return
814
815 # Return item
2adf4336 816 return _SetOffset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
817
818 # Tries to parse a label name (after the initial `<`), returning a
819 # label item on success.
820 def _try_parse_label_name(self):
0e8e3169
PP
821 begin_text_loc = self._text_loc
822
71aaa3f7
PP
823 # Match
824 m = self._try_parse_pat(_py_name_pat)
825
826 if m is None:
827 # No match
828 return
829
830 # Validate
831 name = m.group(0)
832
833 if name == _icitte_name:
0e8e3169
PP
834 _raise_error(
835 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
836 )
71aaa3f7
PP
837
838 if name in self._label_names:
0e8e3169 839 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
840
841 if name in self._var_names:
0e8e3169 842 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
843
844 # Add to known label names
845 self._label_names.add(name)
846
847 # Return item
0e8e3169 848 return _Label(name, begin_text_loc)
71aaa3f7 849
2adf4336
PP
850 # Patterns for _try_parse_label_or_set_offset()
851 _label_set_offset_prefix_pat = re.compile(r"<\s*")
852 _label_set_offset_suffix_pat = re.compile(r"\s*>")
71aaa3f7 853
2adf4336
PP
854 # Tries to parse a label or an offset setting, returning an item on
855 # success.
856 def _try_parse_label_or_set_offset(self):
71aaa3f7 857 # Match prefix
2adf4336 858 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
859 # No match
860 return
861
2adf4336
PP
862 # Offset setting item?
863 item = self._try_parse_set_offset_val()
71aaa3f7
PP
864
865 if item is None:
866 # Label item?
867 item = self._try_parse_label_name()
868
869 if item is None:
870 # At this point it's invalid
2adf4336 871 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
872
873 # Expect suffix
2adf4336 874 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
875 return item
876
877 # Tries to parse a base item (anything except a repetition),
878 # returning it on success.
879 def _try_parse_base_item(self):
880 # Byte item?
881 item = self._try_parse_byte()
882
883 if item is not None:
884 return item
885
886 # String item?
887 item = self._try_parse_str()
888
889 if item is not None:
890 return item
891
2adf4336
PP
892 # Value, variable assignment, or byte order setting item?
893 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
894
895 if item is not None:
896 return item
897
2adf4336
PP
898 # Label or offset setting item?
899 item = self._try_parse_label_or_set_offset()
71aaa3f7
PP
900
901 if item is not None:
902 return item
903
904 # Group item?
905 item = self._try_parse_group()
906
907 if item is not None:
908 return item
909
910 # Pattern for _try_parse_rep()
911 _rep_prefix_pat = re.compile(r"\*\s*")
2adf4336
PP
912 _rep_expr_prefix_pat = re.compile(r"\{")
913 _rep_expr_pat = re.compile(r"[^}p]+")
914 _rep_expr_suffix_pat = re.compile(r"\}")
71aaa3f7 915
2adf4336
PP
916 # Tries to parse a repetition, returning the expression string and
917 # AST expression node on success.
71aaa3f7 918 def _try_parse_rep(self):
71aaa3f7
PP
919 # Match prefix
920 if self._try_parse_pat(self._rep_prefix_pat) is None:
921 # No match
2adf4336 922 return
71aaa3f7
PP
923
924 # Expect and return a decimal multiplier
925 self._skip_ws_and_comments()
2adf4336
PP
926
927 # Integer?
928 m = self._try_parse_pat(self._pos_const_int_pat)
929
930 if m is None:
931 # Expression?
932 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
933 # At this point it's invalid
934 self._raise_error("Expecting a positive integral multiplier or `{`")
935
936 # Expect an expression
937 expr_str_loc = self._text_loc
938 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
939 expr_str = self._ast_expr_from_str(m.group(0), expr_str_loc)
940
941 # Expect `}`
942 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
943 expr_str = m.group(0)
944 else:
945 expr_str_loc = self._text_loc
946 expr_str = m.group(0)
947
948 return self._ast_expr_from_str(expr_str, expr_str_loc)
71aaa3f7 949
1ca7b5e1
PP
950 # Tries to parse an item, possibly followed by a repetition,
951 # returning `True` on success.
952 #
953 # Appends any parsed item to `items`.
954 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
955 self._skip_ws_and_comments()
956
957 # Parse a base item
958 item = self._try_parse_base_item()
959
960 if item is None:
961 # No item
1ca7b5e1 962 return False
71aaa3f7
PP
963
964 # Parse repetition if the base item is repeatable
965 if isinstance(item, _RepableItem):
0e8e3169
PP
966 self._skip_ws_and_comments()
967 rep_text_loc = self._text_loc
2adf4336 968 rep_ret = self._try_parse_rep()
71aaa3f7 969
2adf4336
PP
970 if rep_ret is not None:
971 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
71aaa3f7 972
1ca7b5e1
PP
973 items.append(item)
974 return True
71aaa3f7
PP
975
976 # Parses and returns items, skipping whitespaces, insignificant
977 # symbols, and comments when allowed, and stopping at the first
978 # unknown character.
979 def _parse_items(self) -> List[_Item]:
980 items = [] # type: List[_Item]
981
982 while self._isnt_done():
1ca7b5e1
PP
983 # Try to append item
984 if not self._try_append_item(items):
985 # Unknown at this point
986 break
71aaa3f7
PP
987
988 return items
989
990 # Parses the whole Normand input, setting `self._res` to the main
991 # group item on success.
992 def _parse(self):
993 if len(self._normand.strip()) == 0:
994 # Special case to make sure there's something to consume
995 self._res = _Group([], self._text_loc)
996 return
997
998 # Parse first level items
999 items = self._parse_items()
1000
1001 # Make sure there's nothing left
1002 self._skip_ws_and_comments()
1003
1004 if self._isnt_done():
1005 self._raise_error(
1006 "Unexpected character `{}`".format(self._normand[self._at])
1007 )
1008
1009 # Set main group item
1010 self._res = _Group(items, self._text_loc)
1011
1012
1013# The return type of parse().
1014class ParseResult:
1015 @classmethod
1016 def _create(
1017 cls,
1018 data: bytearray,
1019 variables: VarsT,
1020 labels: VarsT,
1021 offset: int,
1022 bo: Optional[ByteOrder],
1023 ):
1024 self = cls.__new__(cls)
1025 self._init(data, variables, labels, offset, bo)
1026 return self
1027
1028 def __init__(self, *args, **kwargs): # type: ignore
1029 raise NotImplementedError
1030
1031 def _init(
1032 self,
1033 data: bytearray,
1034 variables: VarsT,
1035 labels: VarsT,
1036 offset: int,
1037 bo: Optional[ByteOrder],
1038 ):
1039 self._data = data
1040 self._vars = variables
1041 self._labels = labels
1042 self._offset = offset
1043 self._bo = bo
1044
1045 # Generated data.
1046 @property
1047 def data(self):
1048 return self._data
1049
1050 # Dictionary of updated variable names to their last computed value.
1051 @property
1052 def variables(self):
1053 return self._vars
1054
1055 # Dictionary of updated main group label names to their computed
1056 # value.
1057 @property
1058 def labels(self):
1059 return self._labels
1060
1061 # Updated offset.
1062 @property
1063 def offset(self):
1064 return self._offset
1065
1066 # Updated byte order.
1067 @property
1068 def byte_order(self):
1069 return self._bo
1070
1071
1072# Raises a parse error for the item `item`, creating it using the
1073# message `msg`.
1074def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1075 _raise_error(msg, item.text_loc)
1076
1077
1078# The `ICITTE` reserved name.
1079_icitte_name = "ICITTE"
1080
1081
2adf4336
PP
1082# Base node visitor.
1083#
1084# Calls the _visit_name() method for each name node which isn't the name
1085# of a call.
1086class _NodeVisitor(ast.NodeVisitor):
1087 def __init__(self):
71aaa3f7
PP
1088 self._parent_is_call = False
1089
1090 def generic_visit(self, node: ast.AST):
1091 if type(node) is ast.Call:
1092 self._parent_is_call = True
1093 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1094 self._visit_name(node.id)
71aaa3f7
PP
1095
1096 super().generic_visit(node)
1097 self._parent_is_call = False
1098
2adf4336
PP
1099 @abc.abstractmethod
1100 def _visit_name(self, name: str):
1101 ...
1102
71aaa3f7 1103
2adf4336
PP
1104# Expression validator: validates that all the names within the
1105# expression are allowed.
1106class _ExprValidator(_NodeVisitor):
1107 def __init__(self, item: _ExprItemT, allowed_names: Set[str], icitte_allowed: bool):
1108 super().__init__()
1109 self._item = item
1110 self._allowed_names = allowed_names
1111 self._icitte_allowed = icitte_allowed
1112
1113 def _visit_name(self, name: str):
1114 # Make sure the name refers to a known and reachable
1115 # variable/label name.
1116 if name == _icitte_name and not self._icitte_allowed:
1117 _raise_error(
1118 "Illegal reserved name `{}` in expression `{}`".format(
1119 _icitte_name, self._item.expr_str
1120 ),
1121 self._item.text_loc,
1122 )
1123 elif name != _icitte_name and name not in self._allowed_names:
1124 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1125 name, self._item.expr_str
1126 )
1127
05f81895 1128 allowed_names = self._allowed_names.copy()
2adf4336 1129
05f81895
PP
1130 if self._icitte_allowed:
1131 allowed_names.add(_icitte_name)
2adf4336 1132
05f81895 1133 if len(allowed_names) > 0:
2adf4336
PP
1134 allowed_names_str = ", ".join(
1135 sorted(["`{}`".format(name) for name in allowed_names])
1136 )
1137 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1138
1139 _raise_error(
1140 msg,
1141 self._item.text_loc,
1142 )
1143
1144
1145# Expression visitor getting all the contained names.
1146class _ExprNamesVisitor(_NodeVisitor):
71aaa3f7 1147 def __init__(self):
2adf4336
PP
1148 self._parent_is_call = False
1149 self._names = set() # type: Set[str]
1150
1151 @property
1152 def names(self):
1153 return self._names
71aaa3f7 1154
2adf4336
PP
1155 def _visit_name(self, name: str):
1156 self._names.add(name)
71aaa3f7 1157
71aaa3f7 1158
2adf4336
PP
1159# Generator state.
1160class _GenState:
1161 def __init__(
1162 self, variables: VarsT, labels: VarsT, offset: int, bo: Optional[ByteOrder]
1163 ):
1164 self.variables = variables.copy()
1165 self.labels = labels.copy()
1166 self.offset = offset
1167 self.bo = bo
71aaa3f7
PP
1168
1169
2adf4336 1170# Generator of data and final state from a group item.
71aaa3f7
PP
1171#
1172# Generation happens in memory at construction time. After building, use
1173# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1174# get the resulting context.
2adf4336
PP
1175#
1176# The steps of generation are:
1177#
05f81895
PP
1178# 1. Validate that each repetition and LEB128 integer expression uses
1179# only reachable names and not `ICITTE`.
2adf4336 1180#
05f81895
PP
1181# 2. Compute and keep the effective repetition count and LEB128 integer
1182# value for each repetition and LEB128 integer instance.
2adf4336
PP
1183#
1184# 3. Generate bytes, updating the initial state as it goes which becomes
1185# the final state after the operation.
1186#
05f81895
PP
1187# During the generation, when handling a `_Rep` or `_Leb128Int` item,
1188# we already have the effective repetition count or value of the
1189# instance.
2adf4336
PP
1190#
1191# When handling a `_Group` item, first update the current labels with
1192# all the immediate (not nested) labels, and then handle each
1193# contained item. This gives contained item access to "future" outer
1194# labels. Then remove the immediate labels from the state so that
1195# outer items don't have access to inner labels.
71aaa3f7
PP
1196class _Gen:
1197 def __init__(
1198 self,
1199 group: _Group,
1200 variables: VarsT,
1201 labels: VarsT,
1202 offset: int,
1203 bo: Optional[ByteOrder],
1204 ):
05f81895
PP
1205 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1206 self._vl_instance_vals = self._compute_vl_instance_vals(
2adf4336
PP
1207 group, _GenState(variables, labels, offset, bo)
1208 )
1209 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1210
1211 # Generated bytes.
1212 @property
1213 def data(self):
1214 return self._data
1215
1216 # Updated variables.
1217 @property
1218 def variables(self):
2adf4336 1219 return self._final_state.variables
71aaa3f7
PP
1220
1221 # Updated main group labels.
1222 @property
1223 def labels(self):
2adf4336 1224 return self._final_state.labels
71aaa3f7
PP
1225
1226 # Updated offset.
1227 @property
1228 def offset(self):
2adf4336 1229 return self._final_state.offset
71aaa3f7
PP
1230
1231 # Updated byte order.
1232 @property
1233 def bo(self):
2adf4336
PP
1234 return self._final_state.bo
1235
1236 # Returns the set of used, non-called names within the AST
1237 # expression `expr`.
1238 @staticmethod
1239 def _names_of_expr(expr: ast.Expression):
1240 visitor = _ExprNamesVisitor()
1241 visitor.visit(expr)
1242 return visitor.names
1243
05f81895
PP
1244 # Validates that all the repetition and LEB128 integer expressions
1245 # within `group` don't refer, directly or indirectly, to subsequent
1246 # labels.
71aaa3f7 1247 #
2adf4336
PP
1248 # The strategy here is to keep a set of allowed label names, per
1249 # group, initialized to `allowed_label_names`, and a set of allowed
1250 # variable names initialized to `allowed_variable_names`.
1251 #
1252 # Then, depending on the type of `item`:
1253 #
1254 # `_Label`:
1255 # Add its name to the local allowed label names: a label
1256 # occurring before a repetition, and not within a nested group,
1257 # is always reachable.
1258 #
1259 # `_VarAssign`:
1260 # If all the names within its expression are allowed, then add
1261 # its name to the allowed variable names.
1262 #
1263 # Otherwise, remove its name from the allowed variable names (if
1264 # it's in there): a variable which refers to an unreachable name
1265 # is unreachable itself.
1266 #
05f81895 1267 # `_Rep` and `_Leb128`:
2adf4336
PP
1268 # Make sure all the names within its expression are allowed.
1269 #
1270 # `_Group`:
1271 # Call this function for each contained item with a _copy_ of
1272 # the current allowed label names and the same current allowed
1273 # variable names.
1274 @staticmethod
05f81895 1275 def _validate_vl_exprs(
2adf4336
PP
1276 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1277 ):
1278 if type(item) is _Label:
1279 allowed_label_names.add(item.name)
1280 elif type(item) is _VarAssign:
1281 # Check if this variable name is allowed
1282 allowed = True
1283
1284 for name in _Gen._names_of_expr(item.expr):
1285 if name not in (
1286 allowed_label_names | allowed_variable_names | {_icitte_name}
1287 ):
1288 # Not allowed
1289 allowed = False
1290 break
1291
1292 if allowed:
1293 allowed_variable_names.add(item.name)
1294 elif item.name in allowed_variable_names:
1295 allowed_variable_names.remove(item.name)
05f81895
PP
1296 elif isinstance(item, _Leb128Int):
1297 # Validate the expression (`ICITTE` allowed)
1298 _ExprValidator(
1299 item, allowed_label_names | allowed_variable_names, True
1300 ).visit(item.expr)
71aaa3f7 1301 elif type(item) is _Rep:
05f81895 1302 # Validate the expression first (`ICITTE` not allowed)
2adf4336
PP
1303 _ExprValidator(
1304 item, allowed_label_names | allowed_variable_names, False
1305 ).visit(item.expr)
1306
1307 # Validate inner item
05f81895 1308 _Gen._validate_vl_exprs(
2adf4336
PP
1309 item.item, allowed_variable_names, allowed_label_names
1310 )
1311 elif type(item) is _Group:
1312 # Copy `allowed_label_names` so that this frame cannot
1313 # access the nested label names.
1314 group_allowed_label_names = allowed_label_names.copy()
71aaa3f7 1315
2adf4336 1316 for subitem in item.items:
05f81895 1317 _Gen._validate_vl_exprs(
2adf4336
PP
1318 subitem, allowed_variable_names, group_allowed_label_names
1319 )
71aaa3f7 1320
2adf4336
PP
1321 # Evaluates the expression of `item` considering the current
1322 # generation state `state`.
1323 #
1324 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1325 # for the expression to evaluate.
1326 @staticmethod
1327 def _eval_item_expr(item: _ExprItemT, state: _GenState, allow_icitte: bool):
1328 syms = state.labels.copy()
71aaa3f7 1329
2adf4336
PP
1330 # Set the `ICITTE` name to the current offset, if any
1331 if allow_icitte:
1332 syms[_icitte_name] = state.offset
71aaa3f7
PP
1333
1334 # Add the current variables
2adf4336 1335 syms.update(state.variables)
71aaa3f7
PP
1336
1337 # Validate the node and its children
2adf4336 1338 _ExprValidator(item, set(syms.keys()), True).visit(item.expr)
71aaa3f7
PP
1339
1340 # Compile and evaluate expression node
1341 try:
1342 val = eval(compile(item.expr, "", "eval"), None, syms)
1343 except Exception as exc:
1344 _raise_error_for_item(
1345 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1346 item,
1347 )
1348
1349 # Validate result
1350 if type(val) is not int:
1351 _raise_error_for_item(
2adf4336 1352 "Invalid expression `{}`: expecting result type `int`, not `{}`".format(
71aaa3f7
PP
1353 item.expr_str, type(val).__name__
1354 ),
1355 item,
1356 )
1357
1358 return val
1359
05f81895
PP
1360 # Returns the size, in bytes, required to encode the value `val`
1361 # with LEB128 (signed version if `is_signed` is `True`).
1362 @staticmethod
1363 def _leb128_size_for_val(val: int, is_signed: bool):
1364 if val < 0:
1365 # Equivalent upper bound.
1366 #
1367 # For example, if `val` is -128, then the full integer for
1368 # this number of bits would be [-128, 127].
1369 val = -val - 1
1370
1371 # Number of bits (add one for the sign if needed)
1372 bits = val.bit_length() + int(is_signed)
1373
1374 if bits == 0:
1375 bits = 1
1376
1377 # Seven bits per byte
1378 return math.ceil(bits / 7)
1379
1380 # Computes the effective value for each repetition and LEB128
1381 # integer instance, filling `instance_vals` (if not `None`) and
1382 # returning `instance_vals`.
2adf4336 1383 #
05f81895
PP
1384 # At this point it must be known that, for a given variable-length
1385 # item, its expression only contains reachable names.
2adf4336
PP
1386 #
1387 # When handling a `_Rep` item, this function appends its effective
1388 # multiplier to `instance_vals` _before_ handling its repeated item.
1389 #
05f81895
PP
1390 # When handling a `_VarAssign` item, this function only evaluates it
1391 # if all its names are reachable.
2adf4336 1392 @staticmethod
05f81895 1393 def _compute_vl_instance_vals(
2adf4336
PP
1394 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1395 ):
1396 if instance_vals is None:
1397 instance_vals = []
1398
1399 if isinstance(item, _ScalarItem):
1400 state.offset += item.size
1401 elif type(item) is _Label:
1402 state.labels[item.name] = state.offset
1403 elif type(item) is _VarAssign:
1404 # Check if all the names are reachable
1405 do_eval = True
1406
1407 for name in _Gen._names_of_expr(item.expr):
1408 if (
1409 name != _icitte_name
1410 and name not in state.variables
1411 and name not in state.labels
1412 ):
1413 # A name is unknown: cannot evaluate
1414 do_eval = False
1415 break
1416
1417 if do_eval:
1418 # Evaluate the expression and keep the result
1419 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1420 elif type(item) is _SetOffset:
1421 state.offset = item.val
05f81895
PP
1422 elif isinstance(item, _Leb128Int):
1423 # Evaluate the expression
1424 val = _Gen._eval_item_expr(item, state, True)
1425
1426 # Validate result
1427 if type(item) is _ULeb128Int and val < 0:
1428 _raise_error_for_item(
1429 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1430 item.expr_str, val
1431 ),
1432 item,
1433 )
1434
1435 # Add the evaluation result to the to variable-length item
1436 # instance values.
1437 instance_vals.append(val)
1438
1439 # Update offset
1440 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
2adf4336
PP
1441 elif type(item) is _Rep:
1442 # Evaluate the expression and keep the result
1443 val = _Gen._eval_item_expr(item, state, False)
1444
1445 # Validate result
1446 if val < 0:
1447 _raise_error_for_item(
1448 "Invalid expression `{}`: unexpected negative result {:,}".format(
1449 item.expr_str, val
1450 ),
1451 item,
1452 )
1453
1454 # Add to repetition instance values
1455 instance_vals.append(val)
1456
1457 # Process the repeated item `val` times
1458 for _ in range(val):
05f81895 1459 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
2adf4336
PP
1460 elif type(item) is _Group:
1461 prev_labels = state.labels.copy()
1462
1463 # Process each item
1464 for subitem in item.items:
05f81895 1465 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
2adf4336
PP
1466
1467 state.labels = prev_labels
1468
1469 return instance_vals
1470
05f81895
PP
1471 def _zero_item_size(self, item: _Item, next_vl_instance: int):
1472 return 0, next_vl_instance
1473
1474 def _scalar_item_size(self, item: _ScalarItem, next_vl_instance: int):
1475 return item.size, next_vl_instance
2adf4336 1476
05f81895
PP
1477 def _leb128_int_item_size(self, item: _Leb128Int, next_vl_instance: int):
1478 # Get the value from `self._vl_instance_vals` _before_
1479 # incrementing `next_vl_instance` to honor the order of
1480 # _compute_vl_instance_vals().
1481 return (
1482 self._leb128_size_for_val(
1483 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1484 ),
1485 next_vl_instance + 1,
1486 )
2adf4336 1487
05f81895 1488 def _group_item_size(self, item: _Group, next_vl_instance: int):
2adf4336
PP
1489 size = 0
1490
1491 for subitem in item.items:
05f81895 1492 subitem_size, next_vl_instance = self._item_size(subitem, next_vl_instance)
2adf4336
PP
1493 size += subitem_size
1494
05f81895 1495 return size, next_vl_instance
2adf4336 1496
05f81895
PP
1497 def _rep_item_size(self, item: _Rep, next_vl_instance: int):
1498 # Get the value from `self._vl_instance_vals` _before_
1499 # incrementing `next_vl_instance` to honor the order of
1500 # _compute_vl_instance_vals().
1501 mul = self._vl_instance_vals[next_vl_instance]
1502 next_vl_instance += 1
2adf4336
PP
1503 size = 0
1504
1505 for _ in range(mul):
05f81895 1506 iter_size, next_vl_instance = self._item_size(item.item, next_vl_instance)
2adf4336
PP
1507 size += iter_size
1508
05f81895 1509 return size, next_vl_instance
2adf4336
PP
1510
1511 # Returns the size of `item` and the new next repetition instance.
05f81895
PP
1512 def _item_size(self, item: _Item, next_vl_instance: int):
1513 return self._item_size_funcs[type(item)](item, next_vl_instance)
2adf4336
PP
1514
1515 # Handles the byte item `item`.
05f81895 1516 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
2adf4336
PP
1517 self._data.append(item.val)
1518 state.offset += item.size
05f81895 1519 return next_vl_instance
2adf4336
PP
1520
1521 # Handles the string item `item`.
05f81895 1522 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
2adf4336
PP
1523 self._data += item.data
1524 state.offset += item.size
05f81895 1525 return next_vl_instance
2adf4336
PP
1526
1527 # Handles the byte order setting item `item`.
1528 def _handle_set_bo_item(
05f81895 1529 self, item: _SetBo, state: _GenState, next_vl_instance: int
2adf4336
PP
1530 ):
1531 # Update current byte order
1532 state.bo = item.bo
05f81895 1533 return next_vl_instance
2adf4336
PP
1534
1535 # Handles the variable assignment item `item`.
1536 def _handle_var_assign_item(
05f81895 1537 self, item: _VarAssign, state: _GenState, next_vl_instance: int
2adf4336 1538 ):
71aaa3f7 1539 # Update variable
2adf4336 1540 state.variables[item.name] = self._eval_item_expr(item, state, True)
05f81895 1541 return next_vl_instance
71aaa3f7 1542
05f81895
PP
1543 # Handles the fixed-length integer item `item`.
1544 def _handle_fl_int_item(
1545 self, item: _FlInt, state: _GenState, next_vl_instance: int
1546 ):
71aaa3f7 1547 # Compute value
2adf4336 1548 val = self._eval_item_expr(item, state, True)
71aaa3f7
PP
1549
1550 # Validate range
1551 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1552 _raise_error_for_item(
1553 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
2adf4336 1554 val, item.len, item.expr_str, state.offset
71aaa3f7
PP
1555 ),
1556 item,
1557 )
1558
1559 # Encode result on 64 bits (to extend the sign bit whatever the
1560 # value of `item.len`).
2adf4336 1561 if state.bo is None and item.len > 8:
71aaa3f7
PP
1562 _raise_error_for_item(
1563 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1564 item.expr_str
1565 ),
1566 item,
1567 )
1568
1569 data = struct.pack(
1570 "{}{}".format(
2adf4336 1571 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
1572 "Q" if val >= 0 else "q",
1573 ),
1574 val,
1575 )
1576
1577 # Keep only the requested length
1578 len_bytes = item.len // 8
1579
2adf4336 1580 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
1581 # Big endian: keep last bytes
1582 data = data[-len_bytes:]
1583 else:
1584 # Little endian: keep first bytes
2adf4336 1585 assert state.bo == ByteOrder.LE
71aaa3f7
PP
1586 data = data[:len_bytes]
1587
1588 # Append to current bytes and update offset
1589 self._data += data
2adf4336 1590 state.offset += len(data)
05f81895
PP
1591 return next_vl_instance
1592
1593 # Handles the LEB128 integer item `item`.
1594 def _handle_leb128_int_item(
1595 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1596 ):
1597 # Get the precomputed value
1598 val = self._vl_instance_vals[next_vl_instance]
1599
1600 # Size in bytes
1601 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1602
1603 # For each byte
1604 for _ in range(size):
1605 # Seven LSBs, MSB of the byte set (continue)
1606 self._data.append((val & 0x7F) | 0x80)
1607 val >>= 7
1608
1609 # Clear MSB of last byte (stop)
1610 self._data[-1] &= ~0x80
1611
1612 # Consumed this instance
1613 return next_vl_instance + 1
71aaa3f7 1614
2adf4336
PP
1615 # Handles the group item `item`, only removing the immediate labels
1616 # from `state.labels` if `remove_immediate_labels` is `True`.
1617 def _handle_group_item(
1618 self,
1619 item: _Group,
1620 state: _GenState,
05f81895 1621 next_vl_instance: int,
2adf4336
PP
1622 remove_immediate_labels: bool = True,
1623 ):
1624 # Compute the values of the immediate (not nested) labels. Those
1625 # labels are reachable by any expression within the group.
1626 offset = state.offset
1627 immediate_label_names = set() # type: Set[str]
05f81895 1628 tmp_next_vl_instance = next_vl_instance
71aaa3f7 1629
2adf4336
PP
1630 for subitem in item.items:
1631 if type(subitem) is _SetOffset:
1632 # Update offset
1633 offset = subitem.val
1634 elif type(subitem) is _Label:
1635 # New immediate label
1636 state.labels[subitem.name] = offset
1637 immediate_label_names.add(subitem.name)
1638
05f81895
PP
1639 subitem_size, tmp_next_vl_instance = self._item_size(
1640 subitem, tmp_next_vl_instance
2adf4336
PP
1641 )
1642 offset += subitem_size
71aaa3f7 1643
2adf4336 1644 # Handle each item now with the actual state
71aaa3f7 1645 for subitem in item.items:
05f81895 1646 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
2adf4336
PP
1647
1648 # Remove immediate labels if required so that outer items won't
1649 # reach inner labels.
1650 if remove_immediate_labels:
1651 for name in immediate_label_names:
1652 del state.labels[name]
71aaa3f7 1653
05f81895 1654 return next_vl_instance
71aaa3f7 1655
2adf4336 1656 # Handles the repetition item `item`.
05f81895
PP
1657 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1658 # Get the precomputed repetition count
1659 mul = self._vl_instance_vals[next_vl_instance]
1660
1661 # Consumed this instance
1662 next_vl_instance += 1
71aaa3f7 1663
2adf4336 1664 for _ in range(mul):
05f81895 1665 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
71aaa3f7 1666
05f81895 1667 return next_vl_instance
71aaa3f7 1668
2adf4336
PP
1669 # Handles the offset setting item `item`.
1670 def _handle_set_offset_item(
05f81895 1671 self, item: _SetOffset, state: _GenState, next_vl_instance: int
2adf4336
PP
1672 ):
1673 state.offset = item.val
05f81895 1674 return next_vl_instance
2adf4336
PP
1675
1676 # Handles the label item `item`.
05f81895
PP
1677 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1678 return next_vl_instance
2adf4336
PP
1679
1680 # Handles the item `item`, returning the updated next repetition
1681 # instance.
05f81895
PP
1682 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1683 return self._item_handlers[type(item)](item, state, next_vl_instance)
2adf4336
PP
1684
1685 # Generates the data (`self._data`) and final state
1686 # (`self._final_state`) from `group` and the initial state `state`.
1687 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
1688 # Initial state
1689 self._data = bytearray()
71aaa3f7
PP
1690
1691 # Item handlers
1692 self._item_handlers = {
1693 _Byte: self._handle_byte_item,
05f81895 1694 _FlInt: self._handle_fl_int_item,
71aaa3f7 1695 _Group: self._handle_group_item,
2adf4336 1696 _Label: self._handle_label_item,
71aaa3f7 1697 _Rep: self._handle_rep_item,
2adf4336
PP
1698 _SetBo: self._handle_set_bo_item,
1699 _SetOffset: self._handle_set_offset_item,
05f81895 1700 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 1701 _Str: self._handle_str_item,
05f81895 1702 _ULeb128Int: self._handle_leb128_int_item,
2adf4336
PP
1703 _VarAssign: self._handle_var_assign_item,
1704 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1705
1706 # Item size getters
1707 self._item_size_funcs = {
1708 _Byte: self._scalar_item_size,
05f81895 1709 _FlInt: self._scalar_item_size,
2adf4336
PP
1710 _Group: self._group_item_size,
1711 _Label: self._zero_item_size,
1712 _Rep: self._rep_item_size,
1713 _SetBo: self._zero_item_size,
1714 _SetOffset: self._zero_item_size,
05f81895 1715 _SLeb128Int: self._leb128_int_item_size,
2adf4336 1716 _Str: self._scalar_item_size,
05f81895 1717 _ULeb128Int: self._leb128_int_item_size,
2adf4336
PP
1718 _VarAssign: self._zero_item_size,
1719 } # type: Dict[type, Callable[[Any, int], Tuple[int, int]]]
1720
1721 # Handle the group item, _not_ removing the immediate labels
1722 # because the `labels` property offers them.
1723 self._handle_group_item(group, state, 0, False)
1724
1725 # This is actually the final state
1726 self._final_state = state
71aaa3f7
PP
1727
1728
1729# Returns a `ParseResult` instance containing the bytes encoded by the
1730# input string `normand`.
1731#
1732# `init_variables` is a dictionary of initial variable names (valid
1733# Python names) to integral values. A variable name must not be the
1734# reserved name `ICITTE`.
1735#
1736# `init_labels` is a dictionary of initial label names (valid Python
1737# names) to integral values. A label name must not be the reserved name
1738# `ICITTE`.
1739#
1740# `init_offset` is the initial offset.
1741#
1742# `init_byte_order` is the initial byte order.
1743#
1744# Raises `ParseError` on any parsing error.
1745def parse(
1746 normand: str,
1747 init_variables: Optional[VarsT] = None,
1748 init_labels: Optional[VarsT] = None,
1749 init_offset: int = 0,
1750 init_byte_order: Optional[ByteOrder] = None,
1751):
1752 if init_variables is None:
1753 init_variables = {}
1754
1755 if init_labels is None:
1756 init_labels = {}
1757
1758 gen = _Gen(
1759 _Parser(normand, init_variables, init_labels).res,
1760 init_variables,
1761 init_labels,
1762 init_offset,
1763 init_byte_order,
1764 )
1765 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1766 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1767 )
1768
1769
1770# Parses the command-line arguments.
1771def _parse_cli_args():
1772 import argparse
1773
1774 # Build parser
1775 ap = argparse.ArgumentParser()
1776 ap.add_argument(
1777 "--offset",
1778 metavar="OFFSET",
1779 action="store",
1780 type=int,
1781 default=0,
1782 help="initial offset (positive)",
1783 )
1784 ap.add_argument(
1785 "-b",
1786 "--byte-order",
1787 metavar="BO",
1788 choices=["be", "le"],
1789 type=str,
1790 help="initial byte order (`be` or `le`)",
1791 )
1792 ap.add_argument(
1793 "--var",
1794 metavar="NAME=VAL",
1795 action="append",
1796 help="add an initial variable (may be repeated)",
1797 )
1798 ap.add_argument(
1799 "-l",
1800 "--label",
1801 metavar="NAME=VAL",
1802 action="append",
1803 help="add an initial label (may be repeated)",
1804 )
1805 ap.add_argument(
1806 "--version", action="version", version="Normand {}".format(__version__)
1807 )
1808 ap.add_argument(
1809 "path",
1810 metavar="PATH",
1811 action="store",
1812 nargs="?",
1813 help="input path (none means standard input)",
1814 )
1815
1816 # Parse
1817 return ap.parse_args()
1818
1819
1820# Raises a command-line error with the message `msg`.
1821def _raise_cli_error(msg: str) -> NoReturn:
1822 raise RuntimeError("Command-line error: {}".format(msg))
1823
1824
1825# Returns a dictionary of string to integers from the list of strings
1826# `args` containing `NAME=VAL` entries.
1827def _dict_from_arg(args: Optional[List[str]]):
1828 d = {} # type: Dict[str, int]
1829
1830 if args is None:
1831 return d
1832
1833 for arg in args:
1834 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1835
1836 if m is None:
1837 _raise_cli_error("Invalid assignment {}".format(arg))
1838
2e1c1acd
PP
1839 d[m.group(1)] = int(m.group(2))
1840
71aaa3f7
PP
1841 return d
1842
1843
1844# CLI entry point without exception handling.
1845def _try_run_cli():
1846 import os.path
1847
1848 # Parse arguments
1849 args = _parse_cli_args()
1850
1851 # Read input
1852 if args.path is None:
1853 normand = sys.stdin.read()
1854 else:
1855 with open(args.path) as f:
1856 normand = f.read()
1857
1858 # Variables and labels
1859 variables = _dict_from_arg(args.var)
1860 labels = _dict_from_arg(args.label)
1861
1862 # Validate offset
1863 if args.offset < 0:
1864 _raise_cli_error("Invalid negative offset {}")
1865
1866 # Validate and set byte order
1867 bo = None # type: Optional[ByteOrder]
1868
1869 if args.byte_order is not None:
1870 if args.byte_order == "be":
1871 bo = ByteOrder.BE
1872 else:
1873 assert args.byte_order == "le"
1874 bo = ByteOrder.LE
1875
1876 # Parse
1877 try:
1878 res = parse(normand, variables, labels, args.offset, bo)
1879 except ParseError as exc:
1880 prefix = ""
1881
1882 if args.path is not None:
1883 prefix = "{}:".format(os.path.abspath(args.path))
1884
1885 _fail(
1886 "{}{}:{} - {}".format(
1887 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1888 )
1889 )
1890
1891 # Print
1892 sys.stdout.buffer.write(res.data)
1893
1894
1895# Prints the exception message `msg` and exits with status 1.
1896def _fail(msg: str) -> NoReturn:
1897 if not msg.endswith("."):
1898 msg += "."
1899
1900 print(msg, file=sys.stderr)
1901 sys.exit(1)
1902
1903
1904# CLI entry point.
1905def _run_cli():
1906 try:
1907 _try_run_cli()
1908 except Exception as exc:
1909 _fail(str(exc))
1910
1911
1912if __name__ == "__main__":
1913 _run_cli()
This page took 0.138871 seconds and 4 git commands to generate.