Add the directive form of a group (`!group`)
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
261c5ecf 33__version__ = "0.10.0"
71aaa3f7
PP
34__all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
e57a18e1
PP
39 "TextLocation",
40 "LabelsT",
41 "VariablesT",
71aaa3f7
PP
42 "__author__",
43 "__version__",
44]
45
46import re
47import abc
48import ast
49import sys
50import enum
05f81895 51import math
71aaa3f7 52import struct
e57a18e1
PP
53import typing
54from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
55
56
57# Text location (line and column numbers).
e57a18e1 58class TextLocation:
71aaa3f7
PP
59 @classmethod
60 def _create(cls, line_no: int, col_no: int):
61 self = cls.__new__(cls)
62 self._init(line_no, col_no)
63 return self
64
65 def __init__(*args, **kwargs): # type: ignore
66 raise NotImplementedError
67
68 def _init(self, line_no: int, col_no: int):
69 self._line_no = line_no
70 self._col_no = col_no
71
72 # Line number.
73 @property
74 def line_no(self):
75 return self._line_no
76
77 # Column number.
78 @property
79 def col_no(self):
80 return self._col_no
81
2adf4336 82 def __repr__(self):
e57a18e1 83 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 84
71aaa3f7
PP
85
86# Any item.
87class _Item:
e57a18e1 88 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
89 self._text_loc = text_loc
90
91 # Source text location.
92 @property
93 def text_loc(self):
94 return self._text_loc
95
2adf4336
PP
96
97# Scalar item.
98class _ScalarItem(_Item):
71aaa3f7
PP
99 # Returns the size, in bytes, of this item.
100 @property
101 @abc.abstractmethod
102 def size(self) -> int:
103 ...
104
105
106# A repeatable item.
2adf4336 107class _RepableItem:
71aaa3f7
PP
108 pass
109
110
111# Single byte.
2adf4336 112class _Byte(_ScalarItem, _RepableItem):
e57a18e1 113 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
114 super().__init__(text_loc)
115 self._val = val
116
117 # Byte value.
118 @property
119 def val(self):
120 return self._val
121
122 @property
123 def size(self):
124 return 1
125
126 def __repr__(self):
676f6189 127 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
128
129
130# String.
2adf4336 131class _Str(_ScalarItem, _RepableItem):
e57a18e1 132 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
133 super().__init__(text_loc)
134 self._data = data
135
136 # Encoded bytes.
137 @property
138 def data(self):
139 return self._data
140
141 @property
142 def size(self):
143 return len(self._data)
144
145 def __repr__(self):
676f6189 146 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
147
148
149# Byte order.
150@enum.unique
151class ByteOrder(enum.Enum):
152 # Big endian.
153 BE = "be"
154
155 # Little endian.
156 LE = "le"
157
158
2adf4336
PP
159# Byte order setting.
160class _SetBo(_Item):
e57a18e1 161 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 162 super().__init__(text_loc)
71aaa3f7
PP
163 self._bo = bo
164
165 @property
166 def bo(self):
167 return self._bo
168
2adf4336 169 def __repr__(self):
676f6189 170 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
171
172
173# Label.
174class _Label(_Item):
e57a18e1 175 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
176 super().__init__(text_loc)
177 self._name = name
178
179 # Label name.
180 @property
181 def name(self):
182 return self._name
183
71aaa3f7 184 def __repr__(self):
676f6189 185 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
186
187
2adf4336
PP
188# Offset setting.
189class _SetOffset(_Item):
e57a18e1 190 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
191 super().__init__(text_loc)
192 self._val = val
193
676f6189 194 # Offset value (bytes).
71aaa3f7
PP
195 @property
196 def val(self):
197 return self._val
198
71aaa3f7 199 def __repr__(self):
676f6189
PP
200 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
201
202
203# Offset alignment.
204class _AlignOffset(_Item):
e57a18e1 205 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
206 super().__init__(text_loc)
207 self._val = val
208 self._pad_val = pad_val
209
210 # Alignment value (bits).
211 @property
212 def val(self):
213 return self._val
214
215 # Padding byte value.
216 @property
217 def pad_val(self):
218 return self._pad_val
219
220 def __repr__(self):
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self._val), repr(self._pad_val), repr(self._text_loc)
223 )
71aaa3f7
PP
224
225
226# Mixin of containing an AST expression and its string.
227class _ExprMixin:
228 def __init__(self, expr_str: str, expr: ast.Expression):
229 self._expr_str = expr_str
230 self._expr = expr
231
232 # Expression string.
233 @property
234 def expr_str(self):
235 return self._expr_str
236
237 # Expression node to evaluate.
238 @property
239 def expr(self):
240 return self._expr
241
242
2adf4336
PP
243# Variable assignment.
244class _VarAssign(_Item, _ExprMixin):
71aaa3f7 245 def __init__(
e57a18e1 246 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
247 ):
248 super().__init__(text_loc)
249 _ExprMixin.__init__(self, expr_str, expr)
250 self._name = name
251
252 # Name.
253 @property
254 def name(self):
255 return self._name
256
71aaa3f7 257 def __repr__(self):
2adf4336 258 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
259 repr(self._name),
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._text_loc),
71aaa3f7
PP
263 )
264
265
269f6eb3
PP
266# Fixed-length number, possibly needing more than one byte.
267class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 268 def __init__(
e57a18e1 269 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
270 ):
271 super().__init__(text_loc)
272 _ExprMixin.__init__(self, expr_str, expr)
273 self._len = len
274
275 # Length (bits).
276 @property
277 def len(self):
278 return self._len
279
280 @property
281 def size(self):
282 return self._len // 8
283
284 def __repr__(self):
269f6eb3 285 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
286 repr(self._expr_str),
287 repr(self._expr),
288 repr(self._len),
289 repr(self._text_loc),
71aaa3f7
PP
290 )
291
292
05f81895
PP
293# LEB128 integer.
294class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 295 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298
299 def __repr__(self):
300 return "{}({}, {}, {})".format(
301 self.__class__.__name__,
302 repr(self._expr_str),
303 repr(self._expr),
676f6189 304 repr(self._text_loc),
05f81895
PP
305 )
306
307
308# Unsigned LEB128 integer.
309class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
310 pass
311
312
313# Signed LEB128 integer.
314class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
315 pass
316
317
71aaa3f7 318# Group of items.
2adf4336 319class _Group(_Item, _RepableItem):
e57a18e1 320 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
321 super().__init__(text_loc)
322 self._items = items
71aaa3f7
PP
323
324 # Contained items.
325 @property
326 def items(self):
327 return self._items
328
71aaa3f7 329 def __repr__(self):
676f6189 330 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
331
332
333# Repetition item.
2adf4336
PP
334class _Rep(_Item, _ExprMixin):
335 def __init__(
e57a18e1 336 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 337 ):
71aaa3f7 338 super().__init__(text_loc)
2adf4336 339 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 340 self._item = item
71aaa3f7
PP
341
342 # Item to repeat.
343 @property
344 def item(self):
345 return self._item
346
71aaa3f7 347 def __repr__(self):
2adf4336 348 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
349 repr(self._item),
350 repr(self._expr_str),
351 repr(self._expr),
352 repr(self._text_loc),
71aaa3f7
PP
353 )
354
355
27d52a19
PP
356# Conditional item.
357class _Cond(_Item, _ExprMixin):
358 def __init__(
359 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
360 ):
361 super().__init__(text_loc)
362 _ExprMixin.__init__(self, expr_str, expr)
363 self._item = item
364
365 # Conditional item.
366 @property
367 def item(self):
368 return self._item
369
370 def __repr__(self):
371 return "_Cond({}, {}, {}, {})".format(
372 repr(self._item),
373 repr(self._expr_str),
374 repr(self._expr),
375 repr(self._text_loc),
376 )
377
378
2adf4336 379# Expression item type.
27d52a19 380_ExprItemT = Union[_FlNum, _Leb128Int, _VarAssign, _Rep, _Cond]
2adf4336
PP
381
382
71aaa3f7
PP
383# A parsing error containing a message and a text location.
384class ParseError(RuntimeError):
385 @classmethod
e57a18e1 386 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
387 self = cls.__new__(cls)
388 self._init(msg, text_loc)
389 return self
390
391 def __init__(self, *args, **kwargs): # type: ignore
392 raise NotImplementedError
393
e57a18e1 394 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7
PP
395 super().__init__(msg)
396 self._text_loc = text_loc
397
398 # Source text location.
399 @property
400 def text_loc(self):
401 return self._text_loc
402
403
404# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 405def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
406 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
407
408
e57a18e1
PP
409# Variables dictionary type (for type hints).
410VariablesT = Dict[str, Union[int, float]]
411
412
413# Labels dictionary type (for type hints).
414LabelsT = Dict[str, int]
71aaa3f7
PP
415
416
417# Python name pattern.
418_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
419
420
421# Normand parser.
422#
423# The constructor accepts a Normand input. After building, use the `res`
424# property to get the resulting main group.
425class _Parser:
426 # Builds a parser to parse the Normand input `normand`, parsing
427 # immediately.
e57a18e1 428 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
429 self._normand = normand
430 self._at = 0
431 self._line_no = 1
432 self._col_no = 1
433 self._label_names = set(labels.keys())
434 self._var_names = set(variables.keys())
435 self._parse()
436
437 # Result (main group).
438 @property
439 def res(self):
440 return self._res
441
442 # Current text location.
443 @property
444 def _text_loc(self):
e57a18e1 445 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
446 self._line_no, self._col_no
447 )
448
449 # Returns `True` if this parser is done parsing.
450 def _is_done(self):
451 return self._at == len(self._normand)
452
453 # Returns `True` if this parser isn't done parsing.
454 def _isnt_done(self):
455 return not self._is_done()
456
457 # Raises a parse error, creating it using the message `msg` and the
458 # current text location.
459 def _raise_error(self, msg: str) -> NoReturn:
460 _raise_error(msg, self._text_loc)
461
462 # Tries to make the pattern `pat` match the current substring,
463 # returning the match object and updating `self._at`,
464 # `self._line_no`, and `self._col_no` on success.
465 def _try_parse_pat(self, pat: Pattern[str]):
466 m = pat.match(self._normand, self._at)
467
468 if m is None:
469 return
470
471 # Skip matched string
472 self._at += len(m.group(0))
473
474 # Update line number
475 self._line_no += m.group(0).count("\n")
476
477 # Update column number
478 for i in reversed(range(self._at)):
479 if self._normand[i] == "\n" or i == 0:
480 if i == 0:
481 self._col_no = self._at + 1
482 else:
483 self._col_no = self._at - i
484
485 break
486
487 # Return match object
488 return m
489
490 # Expects the pattern `pat` to match the current substring,
491 # returning the match object and updating `self._at`,
492 # `self._line_no`, and `self._col_no` on success, or raising a parse
493 # error with the message `error_msg` on error.
494 def _expect_pat(self, pat: Pattern[str], error_msg: str):
495 # Match
496 m = self._try_parse_pat(pat)
497
498 if m is None:
499 # No match: error
500 self._raise_error(error_msg)
501
502 # Return match object
503 return m
504
505 # Pattern for _skip_ws_and_comments()
506 _ws_or_syms_or_comments_pat = re.compile(
e57a18e1 507 r"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
508 )
509
510 # Skips as many whitespaces, insignificant symbol characters, and
511 # comments as possible.
512 def _skip_ws_and_comments(self):
513 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
514
515 # Pattern for _try_parse_hex_byte()
516 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
517
518 # Tries to parse a hexadecimal byte, returning a byte item on
519 # success.
520 def _try_parse_hex_byte(self):
0e8e3169
PP
521 begin_text_loc = self._text_loc
522
71aaa3f7
PP
523 # Match initial nibble
524 m_high = self._try_parse_pat(self._nibble_pat)
525
526 if m_high is None:
527 # No match
528 return
529
530 # Expect another nibble
531 self._skip_ws_and_comments()
532 m_low = self._expect_pat(
533 self._nibble_pat, "Expecting another hexadecimal nibble"
534 )
535
536 # Return item
0e8e3169 537 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
538
539 # Patterns for _try_parse_bin_byte()
540 _bin_byte_bit_pat = re.compile(r"[01]")
541 _bin_byte_prefix_pat = re.compile(r"%")
542
543 # Tries to parse a binary byte, returning a byte item on success.
544 def _try_parse_bin_byte(self):
0e8e3169
PP
545 begin_text_loc = self._text_loc
546
71aaa3f7
PP
547 # Match prefix
548 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
549 # No match
550 return
551
552 # Expect eight bits
553 bits = [] # type: List[str]
554
555 for _ in range(8):
556 self._skip_ws_and_comments()
557 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
558 bits.append(m.group(0))
559
560 # Return item
0e8e3169 561 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
562
563 # Patterns for _try_parse_dec_byte()
564 _dec_byte_prefix_pat = re.compile(r"\$\s*")
565 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
566
567 # Tries to parse a decimal byte, returning a byte item on success.
568 def _try_parse_dec_byte(self):
0e8e3169
PP
569 begin_text_loc = self._text_loc
570
71aaa3f7
PP
571 # Match prefix
572 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
573 # No match
574 return
575
576 # Expect the value
577 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
578
579 # Compute value
580 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
581
582 # Validate
583 if val < -128 or val > 255:
0e8e3169 584 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
585
586 # Two's complement
05f81895 587 val %= 256
71aaa3f7
PP
588
589 # Return item
0e8e3169 590 return _Byte(val, begin_text_loc)
71aaa3f7
PP
591
592 # Tries to parse a byte, returning a byte item on success.
593 def _try_parse_byte(self):
594 # Hexadecimal
595 item = self._try_parse_hex_byte()
596
597 if item is not None:
598 return item
599
600 # Binary
601 item = self._try_parse_bin_byte()
602
603 if item is not None:
604 return item
605
606 # Decimal
607 item = self._try_parse_dec_byte()
608
609 if item is not None:
610 return item
611
612 # Patterns for _try_parse_str()
613 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
614 _str_suffix_pat = re.compile(r'"')
615 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
616
617 # Strings corresponding to escape sequence characters
618 _str_escape_seq_strs = {
619 "0": "\0",
620 "a": "\a",
621 "b": "\b",
622 "e": "\x1b",
623 "f": "\f",
624 "n": "\n",
625 "r": "\r",
626 "t": "\t",
627 "v": "\v",
628 "\\": "\\",
629 '"': '"',
630 }
631
632 # Tries to parse a string, returning a string item on success.
633 def _try_parse_str(self):
0e8e3169
PP
634 begin_text_loc = self._text_loc
635
71aaa3f7
PP
636 # Match prefix
637 m = self._try_parse_pat(self._str_prefix_pat)
638
639 if m is None:
640 # No match
641 return
642
643 # Get encoding
644 encoding = "utf8"
645
646 if m.group("len") is not None:
647 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
648
649 # Actual string
650 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
651
652 # Expect end of string
653 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
654
655 # Replace escape sequences
656 val = m.group(0)
657
658 for ec in '0abefnrtv"\\':
659 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
660
661 # Encode
662 data = val.encode(encoding)
663
664 # Return item
0e8e3169 665 return _Str(data, begin_text_loc)
71aaa3f7
PP
666
667 # Patterns for _try_parse_group()
261c5ecf
PP
668 _group_prefix_pat = re.compile(r"\(|!g(roup)?\b")
669 _group_suffix_paren_pat = re.compile(r"\)")
71aaa3f7
PP
670
671 # Tries to parse a group, returning a group item on success.
672 def _try_parse_group(self):
0e8e3169
PP
673 begin_text_loc = self._text_loc
674
71aaa3f7 675 # Match prefix
261c5ecf
PP
676 m_open = self._try_parse_pat(self._group_prefix_pat)
677
678 if m_open is None:
71aaa3f7
PP
679 # No match
680 return
681
682 # Parse items
683 items = self._parse_items()
684
685 # Expect end of group
686 self._skip_ws_and_comments()
261c5ecf
PP
687
688 if m_open.group(0) == "(":
689 pat = self._group_suffix_paren_pat
690 exp = ")"
691 else:
692 pat = self._block_end_pat
693 exp = "!end"
694
695 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
696
697 # Return item
0e8e3169 698 return _Group(items, begin_text_loc)
71aaa3f7
PP
699
700 # Returns a stripped expression string and an AST expression node
701 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 702 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
703 # Create an expression node from the expression string
704 expr_str = expr_str.strip().replace("\n", " ")
705
706 try:
707 expr = ast.parse(expr_str, mode="eval")
708 except SyntaxError:
709 _raise_error(
710 "Invalid expression `{}`: invalid syntax".format(expr_str),
711 text_loc,
712 )
713
714 return expr_str, expr
715
269f6eb3 716 # Patterns for _try_parse_num_and_attr()
05f81895 717 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 718 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 719 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 720
05f81895
PP
721 # Tries to parse a value and attribute (fixed length in bits or
722 # `leb128`), returning a value item on success.
269f6eb3 723 def _try_parse_num_and_attr(self):
71aaa3f7
PP
724 begin_text_loc = self._text_loc
725
726 # Match
727 m_expr = self._try_parse_pat(self._val_expr_pat)
728
729 if m_expr is None:
730 # No match
731 return
732
71aaa3f7
PP
733 # Create an expression node from the expression string
734 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
735
05f81895 736 # Length?
269f6eb3 737 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
738
739 if m_attr is None:
740 # LEB128?
741 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
742
743 if m_attr is None:
744 # At this point it's invalid
745 self._raise_error(
746 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
747 )
748
749 # Return LEB128 integer item
750 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
751 return cls(expr_str, expr, begin_text_loc)
752 else:
269f6eb3
PP
753 # Return fixed-length number item
754 return _FlNum(
05f81895
PP
755 expr_str,
756 expr,
757 int(m_attr.group(0)),
758 begin_text_loc,
759 )
71aaa3f7 760
269f6eb3 761 # Patterns for _try_parse_num_and_attr()
2adf4336 762 _var_assign_pat = re.compile(
71aaa3f7
PP
763 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
764 )
765
2adf4336
PP
766 # Tries to parse a variable assignment, returning a variable
767 # assignment item on success.
768 def _try_parse_var_assign(self):
71aaa3f7
PP
769 begin_text_loc = self._text_loc
770
771 # Match
2adf4336 772 m = self._try_parse_pat(self._var_assign_pat)
71aaa3f7
PP
773
774 if m is None:
775 # No match
776 return
777
778 # Validate name
779 name = m.group("name")
780
781 if name == _icitte_name:
0e8e3169
PP
782 _raise_error(
783 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
784 )
71aaa3f7
PP
785
786 if name in self._label_names:
0e8e3169 787 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
788
789 # Add to known variable names
790 self._var_names.add(name)
791
792 # Create an expression node from the expression string
793 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
794
795 # Return item
2adf4336 796 return _VarAssign(
71aaa3f7
PP
797 name,
798 expr_str,
799 expr,
0e8e3169 800 begin_text_loc,
71aaa3f7
PP
801 )
802
2adf4336 803 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
804 _bo_pat = re.compile(r"[bl]e")
805
2adf4336
PP
806 # Tries to parse a byte order name, returning a byte order setting
807 # item on success.
808 def _try_parse_set_bo(self):
0e8e3169
PP
809 begin_text_loc = self._text_loc
810
71aaa3f7
PP
811 # Match
812 m = self._try_parse_pat(self._bo_pat)
813
814 if m is None:
815 # No match
816 return
817
818 # Return corresponding item
819 if m.group(0) == "be":
2adf4336 820 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
821 else:
822 assert m.group(0) == "le"
2adf4336 823 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
824
825 # Patterns for _try_parse_val_or_bo()
2adf4336
PP
826 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
827 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
71aaa3f7 828
2adf4336
PP
829 # Tries to parse a value, a variable assignment, or a byte order
830 # setting, returning an item on success.
831 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 832 # Match prefix
2adf4336 833 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
834 # No match
835 return
836
2adf4336
PP
837 # Variable assignment item?
838 item = self._try_parse_var_assign()
71aaa3f7
PP
839
840 if item is None:
269f6eb3
PP
841 # Number item?
842 item = self._try_parse_num_and_attr()
71aaa3f7
PP
843
844 if item is None:
2adf4336
PP
845 # Byte order setting item?
846 item = self._try_parse_set_bo()
71aaa3f7
PP
847
848 if item is None:
849 # At this point it's invalid
2adf4336 850 self._raise_error(
269f6eb3 851 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 852 )
71aaa3f7
PP
853
854 # Expect suffix
2adf4336 855 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
856 return item
857
e57a18e1 858 # Common positive constant integer pattern
71aaa3f7
PP
859 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
860
2adf4336
PP
861 # Tries to parse an offset setting value (after the initial `<`),
862 # returning an offset item on success.
863 def _try_parse_set_offset_val(self):
0e8e3169
PP
864 begin_text_loc = self._text_loc
865
71aaa3f7
PP
866 # Match
867 m = self._try_parse_pat(self._pos_const_int_pat)
868
869 if m is None:
870 # No match
871 return
872
873 # Return item
2adf4336 874 return _SetOffset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
875
876 # Tries to parse a label name (after the initial `<`), returning a
877 # label item on success.
878 def _try_parse_label_name(self):
0e8e3169
PP
879 begin_text_loc = self._text_loc
880
71aaa3f7
PP
881 # Match
882 m = self._try_parse_pat(_py_name_pat)
883
884 if m is None:
885 # No match
886 return
887
888 # Validate
889 name = m.group(0)
890
891 if name == _icitte_name:
0e8e3169
PP
892 _raise_error(
893 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
894 )
71aaa3f7
PP
895
896 if name in self._label_names:
0e8e3169 897 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
898
899 if name in self._var_names:
0e8e3169 900 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
901
902 # Add to known label names
903 self._label_names.add(name)
904
905 # Return item
0e8e3169 906 return _Label(name, begin_text_loc)
71aaa3f7 907
2adf4336
PP
908 # Patterns for _try_parse_label_or_set_offset()
909 _label_set_offset_prefix_pat = re.compile(r"<\s*")
910 _label_set_offset_suffix_pat = re.compile(r"\s*>")
71aaa3f7 911
2adf4336
PP
912 # Tries to parse a label or an offset setting, returning an item on
913 # success.
914 def _try_parse_label_or_set_offset(self):
71aaa3f7 915 # Match prefix
2adf4336 916 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
917 # No match
918 return
919
2adf4336
PP
920 # Offset setting item?
921 item = self._try_parse_set_offset_val()
71aaa3f7
PP
922
923 if item is None:
924 # Label item?
925 item = self._try_parse_label_name()
926
927 if item is None:
928 # At this point it's invalid
2adf4336 929 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
930
931 # Expect suffix
2adf4336 932 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
933 return item
934
676f6189
PP
935 # Patterns for _try_parse_align_offset()
936 _align_offset_prefix_pat = re.compile(r"@\s*")
937 _align_offset_val_pat = re.compile(r"(\d+)\s*")
938 _align_offset_pad_val_prefix_pat = re.compile(r"~\s*")
939
940 # Tries to parse an offset alignment, returning an offset alignment
941 # item on success.
942 def _try_parse_align_offset(self):
943 begin_text_loc = self._text_loc
944
945 # Match prefix
946 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
947 # No match
948 return
949
950 align_text_loc = self._text_loc
951 m = self._expect_pat(
952 self._align_offset_val_pat,
953 "Expecting an alignment (positive multiple of eight bits)",
954 )
955
956 # Validate alignment
957 val = int(m.group(1))
958
959 if val <= 0 or (val % 8) != 0:
960 _raise_error(
961 "Invalid alignment value {} (not a positive multiple of eight)".format(
962 val
963 ),
964 align_text_loc,
965 )
966
967 # Padding value?
968 pad_val = 0
969
970 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
971 pad_val_text_loc = self._text_loc
972 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
973
974 # Validate
975 pad_val = int(m.group(0), 0)
976
977 if pad_val > 255:
978 _raise_error(
979 "Invalid padding byte value {}".format(pad_val),
980 pad_val_text_loc,
981 )
982
983 # Return item
984 return _AlignOffset(val, pad_val, begin_text_loc)
985
e57a18e1 986 # Patterns for _expect_rep_mul_expr()
27d52a19
PP
987 _rep_cond_expr_prefix_pat = re.compile(r"\{")
988 _rep_cond_expr_pat = re.compile(r"[^}]+")
989 _rep_cond_expr_suffix_pat = re.compile(r"\}")
990
991 # Parses the expression of a conditional block or of a repetition
992 # (block or post-item) and returns the expression string and AST
993 # node.
994 def _expect_rep_cond_expr(self, accept_int: bool):
e57a18e1
PP
995 expr_text_loc = self._text_loc
996
997 # Constant integer?
27d52a19
PP
998 m = None
999
1000 if accept_int:
1001 m = self._try_parse_pat(self._pos_const_int_pat)
e57a18e1
PP
1002
1003 if m is None:
1004 # Name?
1005 m = self._try_parse_pat(_py_name_pat)
1006
1007 if m is None:
1008 # Expression?
27d52a19
PP
1009 if self._try_parse_pat(self._rep_cond_expr_prefix_pat) is None:
1010 if accept_int:
1011 mid_msg = "a positive constant integer, a name, or `{`"
1012 else:
1013 mid_msg = "a name or `{`"
1014
e57a18e1 1015 # At this point it's invalid
27d52a19 1016 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1017
1018 # Expect an expression
1019 expr_text_loc = self._text_loc
27d52a19 1020 m = self._expect_pat(self._rep_cond_expr_pat, "Expecting an expression")
e57a18e1
PP
1021 expr_str = m.group(0)
1022
1023 # Expect `}`
27d52a19 1024 self._expect_pat(self._rep_cond_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1025 else:
1026 expr_str = m.group(0)
1027 else:
1028 expr_str = m.group(0)
1029
1030 return self._ast_expr_from_str(expr_str, expr_text_loc)
1031
27d52a19
PP
1032 # Parses the multiplier expression of a repetition (block or
1033 # post-item) and returns the expression string and AST node.
1034 def _expect_rep_mul_expr(self):
1035 return self._expect_rep_cond_expr(True)
1036
1037 # Common block end pattern
1038 _block_end_pat = re.compile(r"!end\b\s*")
1039
e57a18e1
PP
1040 # Pattern for _try_parse_rep_block()
1041 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b\s*")
e57a18e1
PP
1042
1043 # Tries to parse a repetition block, returning a repetition item on
1044 # success.
1045 def _try_parse_rep_block(self):
1046 begin_text_loc = self._text_loc
1047
1048 # Match prefix
1049 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1050 # No match
1051 return
1052
1053 # Expect expression
1054 self._skip_ws_and_comments()
1055 expr_str, expr = self._expect_rep_mul_expr()
1056
1057 # Parse items
1058 self._skip_ws_and_comments()
1059 items_text_loc = self._text_loc
1060 items = self._parse_items()
1061
1062 # Expect end of block
1063 self._skip_ws_and_comments()
1064 self._expect_pat(
27d52a19 1065 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1066 )
1067
1068 # Return item
1069 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1070
27d52a19
PP
1071 # Pattern for _try_parse_cond_block()
1072 _cond_block_prefix_pat = re.compile(r"!if\b\s*")
1073
1074 # Tries to parse a conditional block, returning a conditional item
1075 # on success.
1076 def _try_parse_cond_block(self):
1077 begin_text_loc = self._text_loc
1078
1079 # Match prefix
1080 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1081 # No match
1082 return
1083
1084 # Expect expression
1085 self._skip_ws_and_comments()
1086 expr_str, expr = self._expect_rep_cond_expr(False)
1087
1088 # Parse items
1089 self._skip_ws_and_comments()
1090 items_text_loc = self._text_loc
1091 items = self._parse_items()
1092
1093 # Expect end of block
1094 self._skip_ws_and_comments()
1095 self._expect_pat(
1096 self._block_end_pat,
1097 "Expecting an item or `!end` (end of conditional block)",
1098 )
1099
1100 # Return item
1101 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1102
71aaa3f7
PP
1103 # Tries to parse a base item (anything except a repetition),
1104 # returning it on success.
1105 def _try_parse_base_item(self):
1106 # Byte item?
1107 item = self._try_parse_byte()
1108
1109 if item is not None:
1110 return item
1111
1112 # String item?
1113 item = self._try_parse_str()
1114
1115 if item is not None:
1116 return item
1117
2adf4336
PP
1118 # Value, variable assignment, or byte order setting item?
1119 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1120
1121 if item is not None:
1122 return item
1123
2adf4336
PP
1124 # Label or offset setting item?
1125 item = self._try_parse_label_or_set_offset()
71aaa3f7 1126
676f6189
PP
1127 if item is not None:
1128 return item
1129
1130 # Offset alignment item?
1131 item = self._try_parse_align_offset()
1132
71aaa3f7
PP
1133 if item is not None:
1134 return item
1135
1136 # Group item?
1137 item = self._try_parse_group()
1138
1139 if item is not None:
1140 return item
1141
e57a18e1
PP
1142 # Repetition (block) item?
1143 item = self._try_parse_rep_block()
71aaa3f7 1144
e57a18e1
PP
1145 if item is not None:
1146 return item
1147
27d52a19
PP
1148 # Conditional block item?
1149 item = self._try_parse_cond_block()
1150
1151 if item is not None:
1152 return item
1153
e57a18e1
PP
1154 # Pattern for _try_parse_rep_post()
1155 _rep_post_prefix_pat = re.compile(r"\*")
1156
1157 # Tries to parse a post-item repetition, returning the expression
1158 # string and AST expression node on success.
1159 def _try_parse_rep_post(self):
71aaa3f7 1160 # Match prefix
e57a18e1 1161 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1162 # No match
2adf4336 1163 return
71aaa3f7 1164
e57a18e1 1165 # Return expression string and AST expression
71aaa3f7 1166 self._skip_ws_and_comments()
e57a18e1 1167 return self._expect_rep_mul_expr()
71aaa3f7 1168
1ca7b5e1
PP
1169 # Tries to parse an item, possibly followed by a repetition,
1170 # returning `True` on success.
1171 #
1172 # Appends any parsed item to `items`.
1173 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1174 self._skip_ws_and_comments()
1175
1176 # Parse a base item
1177 item = self._try_parse_base_item()
1178
1179 if item is None:
1180 # No item
1ca7b5e1 1181 return False
71aaa3f7
PP
1182
1183 # Parse repetition if the base item is repeatable
1184 if isinstance(item, _RepableItem):
0e8e3169
PP
1185 self._skip_ws_and_comments()
1186 rep_text_loc = self._text_loc
e57a18e1 1187 rep_ret = self._try_parse_rep_post()
71aaa3f7 1188
2adf4336
PP
1189 if rep_ret is not None:
1190 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
71aaa3f7 1191
1ca7b5e1
PP
1192 items.append(item)
1193 return True
71aaa3f7
PP
1194
1195 # Parses and returns items, skipping whitespaces, insignificant
1196 # symbols, and comments when allowed, and stopping at the first
1197 # unknown character.
1198 def _parse_items(self) -> List[_Item]:
1199 items = [] # type: List[_Item]
1200
1201 while self._isnt_done():
1ca7b5e1
PP
1202 # Try to append item
1203 if not self._try_append_item(items):
1204 # Unknown at this point
1205 break
71aaa3f7
PP
1206
1207 return items
1208
1209 # Parses the whole Normand input, setting `self._res` to the main
1210 # group item on success.
1211 def _parse(self):
1212 if len(self._normand.strip()) == 0:
1213 # Special case to make sure there's something to consume
1214 self._res = _Group([], self._text_loc)
1215 return
1216
1217 # Parse first level items
1218 items = self._parse_items()
1219
1220 # Make sure there's nothing left
1221 self._skip_ws_and_comments()
1222
1223 if self._isnt_done():
1224 self._raise_error(
1225 "Unexpected character `{}`".format(self._normand[self._at])
1226 )
1227
1228 # Set main group item
1229 self._res = _Group(items, self._text_loc)
1230
1231
1232# The return type of parse().
1233class ParseResult:
1234 @classmethod
1235 def _create(
1236 cls,
1237 data: bytearray,
e57a18e1
PP
1238 variables: VariablesT,
1239 labels: LabelsT,
71aaa3f7
PP
1240 offset: int,
1241 bo: Optional[ByteOrder],
1242 ):
1243 self = cls.__new__(cls)
1244 self._init(data, variables, labels, offset, bo)
1245 return self
1246
1247 def __init__(self, *args, **kwargs): # type: ignore
1248 raise NotImplementedError
1249
1250 def _init(
1251 self,
1252 data: bytearray,
e57a18e1
PP
1253 variables: VariablesT,
1254 labels: LabelsT,
71aaa3f7
PP
1255 offset: int,
1256 bo: Optional[ByteOrder],
1257 ):
1258 self._data = data
1259 self._vars = variables
1260 self._labels = labels
1261 self._offset = offset
1262 self._bo = bo
1263
1264 # Generated data.
1265 @property
1266 def data(self):
1267 return self._data
1268
1269 # Dictionary of updated variable names to their last computed value.
1270 @property
1271 def variables(self):
1272 return self._vars
1273
1274 # Dictionary of updated main group label names to their computed
1275 # value.
1276 @property
1277 def labels(self):
1278 return self._labels
1279
1280 # Updated offset.
1281 @property
1282 def offset(self):
1283 return self._offset
1284
1285 # Updated byte order.
1286 @property
1287 def byte_order(self):
1288 return self._bo
1289
1290
1291# Raises a parse error for the item `item`, creating it using the
1292# message `msg`.
1293def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1294 _raise_error(msg, item.text_loc)
1295
1296
1297# The `ICITTE` reserved name.
1298_icitte_name = "ICITTE"
1299
1300
2adf4336
PP
1301# Base node visitor.
1302#
1303# Calls the _visit_name() method for each name node which isn't the name
1304# of a call.
1305class _NodeVisitor(ast.NodeVisitor):
1306 def __init__(self):
71aaa3f7
PP
1307 self._parent_is_call = False
1308
1309 def generic_visit(self, node: ast.AST):
1310 if type(node) is ast.Call:
1311 self._parent_is_call = True
1312 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1313 self._visit_name(node.id)
71aaa3f7
PP
1314
1315 super().generic_visit(node)
1316 self._parent_is_call = False
1317
2adf4336
PP
1318 @abc.abstractmethod
1319 def _visit_name(self, name: str):
1320 ...
1321
71aaa3f7 1322
2adf4336
PP
1323# Expression validator: validates that all the names within the
1324# expression are allowed.
1325class _ExprValidator(_NodeVisitor):
e57a18e1 1326 def __init__(self, item: _ExprItemT, allowed_names: Set[str]):
2adf4336
PP
1327 super().__init__()
1328 self._item = item
1329 self._allowed_names = allowed_names
2adf4336
PP
1330
1331 def _visit_name(self, name: str):
1332 # Make sure the name refers to a known and reachable
1333 # variable/label name.
e57a18e1 1334 if name != _icitte_name and name not in self._allowed_names:
2adf4336
PP
1335 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1336 name, self._item.expr_str
1337 )
1338
05f81895 1339 allowed_names = self._allowed_names.copy()
e57a18e1 1340 allowed_names.add(_icitte_name)
2adf4336 1341
05f81895 1342 if len(allowed_names) > 0:
2adf4336
PP
1343 allowed_names_str = ", ".join(
1344 sorted(["`{}`".format(name) for name in allowed_names])
1345 )
1346 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1347
1348 _raise_error(
1349 msg,
1350 self._item.text_loc,
1351 )
1352
1353
1354# Expression visitor getting all the contained names.
1355class _ExprNamesVisitor(_NodeVisitor):
71aaa3f7 1356 def __init__(self):
2adf4336
PP
1357 self._parent_is_call = False
1358 self._names = set() # type: Set[str]
1359
1360 @property
1361 def names(self):
1362 return self._names
71aaa3f7 1363
2adf4336
PP
1364 def _visit_name(self, name: str):
1365 self._names.add(name)
71aaa3f7 1366
71aaa3f7 1367
2adf4336
PP
1368# Generator state.
1369class _GenState:
1370 def __init__(
1b8aa84a 1371 self,
e57a18e1
PP
1372 variables: VariablesT,
1373 labels: LabelsT,
1b8aa84a
PP
1374 offset: int,
1375 bo: Optional[ByteOrder],
2adf4336
PP
1376 ):
1377 self.variables = variables.copy()
1378 self.labels = labels.copy()
1379 self.offset = offset
1380 self.bo = bo
71aaa3f7
PP
1381
1382
2adf4336 1383# Generator of data and final state from a group item.
71aaa3f7
PP
1384#
1385# Generation happens in memory at construction time. After building, use
1386# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1387# get the resulting context.
2adf4336
PP
1388#
1389# The steps of generation are:
1390#
27d52a19
PP
1391# 1. Validate that each repetition, conditional, and LEB128 integer
1392# expression uses only reachable names.
2adf4336 1393#
27d52a19
PP
1394# 2. Compute and keep the effective repetition count, conditional value,
1395# and LEB128 integer value for each repetition and LEB128 integer
1396# instance.
2adf4336
PP
1397#
1398# 3. Generate bytes, updating the initial state as it goes which becomes
1399# the final state after the operation.
1400#
27d52a19
PP
1401# During the generation, when handling a `_Rep`, `_Cond`, or
1402# `_Leb128Int` item, we already have the effective repetition count,
1403# conditional value, or value of the instance.
2adf4336
PP
1404#
1405# When handling a `_Group` item, first update the current labels with
1406# all the immediate (not nested) labels, and then handle each
1407# contained item. This gives contained item access to "future" outer
1408# labels. Then remove the immediate labels from the state so that
1409# outer items don't have access to inner labels.
71aaa3f7
PP
1410class _Gen:
1411 def __init__(
1412 self,
1413 group: _Group,
e57a18e1
PP
1414 variables: VariablesT,
1415 labels: LabelsT,
71aaa3f7
PP
1416 offset: int,
1417 bo: Optional[ByteOrder],
1418 ):
05f81895
PP
1419 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1420 self._vl_instance_vals = self._compute_vl_instance_vals(
2adf4336
PP
1421 group, _GenState(variables, labels, offset, bo)
1422 )
1423 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1424
1425 # Generated bytes.
1426 @property
1427 def data(self):
1428 return self._data
1429
1430 # Updated variables.
1431 @property
1432 def variables(self):
2adf4336 1433 return self._final_state.variables
71aaa3f7
PP
1434
1435 # Updated main group labels.
1436 @property
1437 def labels(self):
2adf4336 1438 return self._final_state.labels
71aaa3f7
PP
1439
1440 # Updated offset.
1441 @property
1442 def offset(self):
2adf4336 1443 return self._final_state.offset
71aaa3f7
PP
1444
1445 # Updated byte order.
1446 @property
1447 def bo(self):
2adf4336
PP
1448 return self._final_state.bo
1449
1450 # Returns the set of used, non-called names within the AST
1451 # expression `expr`.
1452 @staticmethod
1453 def _names_of_expr(expr: ast.Expression):
1454 visitor = _ExprNamesVisitor()
1455 visitor.visit(expr)
1456 return visitor.names
1457
27d52a19
PP
1458 # Validates that all the repetition, conditional, and LEB128 integer
1459 # expressions within `group` don't refer, directly or indirectly, to
1460 # subsequent labels.
71aaa3f7 1461 #
2adf4336
PP
1462 # The strategy here is to keep a set of allowed label names, per
1463 # group, initialized to `allowed_label_names`, and a set of allowed
1464 # variable names initialized to `allowed_variable_names`.
1465 #
1466 # Then, depending on the type of `item`:
1467 #
1468 # `_Label`:
1469 # Add its name to the local allowed label names: a label
1470 # occurring before a repetition, and not within a nested group,
1471 # is always reachable.
1472 #
1473 # `_VarAssign`:
1474 # If all the names within its expression are allowed, then add
1475 # its name to the allowed variable names.
1476 #
1477 # Otherwise, remove its name from the allowed variable names (if
1478 # it's in there): a variable which refers to an unreachable name
1479 # is unreachable itself.
1480 #
27d52a19 1481 # `_Rep`, `_Cond`, and `_Leb128`:
2adf4336
PP
1482 # Make sure all the names within its expression are allowed.
1483 #
1484 # `_Group`:
1485 # Call this function for each contained item with a _copy_ of
1486 # the current allowed label names and the same current allowed
1487 # variable names.
1488 @staticmethod
05f81895 1489 def _validate_vl_exprs(
2adf4336
PP
1490 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1491 ):
1492 if type(item) is _Label:
1493 allowed_label_names.add(item.name)
1494 elif type(item) is _VarAssign:
1495 # Check if this variable name is allowed
1496 allowed = True
1497
1498 for name in _Gen._names_of_expr(item.expr):
1499 if name not in (
1500 allowed_label_names | allowed_variable_names | {_icitte_name}
1501 ):
1502 # Not allowed
1503 allowed = False
1504 break
1505
1506 if allowed:
1507 allowed_variable_names.add(item.name)
1508 elif item.name in allowed_variable_names:
1509 allowed_variable_names.remove(item.name)
05f81895 1510 elif isinstance(item, _Leb128Int):
e57a18e1
PP
1511 # Validate the expression
1512 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1513 item.expr
1514 )
27d52a19 1515 elif type(item) is _Rep or type(item) is _Cond:
e57a18e1
PP
1516 # Validate the expression first
1517 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1518 item.expr
1519 )
2adf4336
PP
1520
1521 # Validate inner item
05f81895 1522 _Gen._validate_vl_exprs(
2adf4336
PP
1523 item.item, allowed_variable_names, allowed_label_names
1524 )
1525 elif type(item) is _Group:
1526 # Copy `allowed_label_names` so that this frame cannot
1527 # access the nested label names.
1528 group_allowed_label_names = allowed_label_names.copy()
71aaa3f7 1529
2adf4336 1530 for subitem in item.items:
05f81895 1531 _Gen._validate_vl_exprs(
2adf4336
PP
1532 subitem, allowed_variable_names, group_allowed_label_names
1533 )
71aaa3f7 1534
2adf4336
PP
1535 # Evaluates the expression of `item` considering the current
1536 # generation state `state`.
1537 #
269f6eb3
PP
1538 # If `allow_float` is `True`, then the type of the result may be
1539 # `float` too.
2adf4336 1540 @staticmethod
269f6eb3
PP
1541 def _eval_item_expr(
1542 item: _ExprItemT,
1543 state: _GenState,
269f6eb3
PP
1544 allow_float: bool = False,
1545 ):
e57a18e1
PP
1546 syms = {} # type: VariablesT
1547 syms.update(state.labels)
71aaa3f7 1548
e57a18e1
PP
1549 # Set the `ICITTE` name to the current offset
1550 syms[_icitte_name] = state.offset
71aaa3f7
PP
1551
1552 # Add the current variables
2adf4336 1553 syms.update(state.variables)
71aaa3f7
PP
1554
1555 # Validate the node and its children
e57a18e1 1556 _ExprValidator(item, set(syms.keys())).visit(item.expr)
71aaa3f7
PP
1557
1558 # Compile and evaluate expression node
1559 try:
1560 val = eval(compile(item.expr, "", "eval"), None, syms)
1561 except Exception as exc:
1562 _raise_error_for_item(
1563 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1564 item,
1565 )
1566
27d52a19
PP
1567 # Convert `bool` result type to `int` to normalize
1568 if type(val) is bool:
1569 val = int(val)
1570
269f6eb3
PP
1571 # Validate result type
1572 expected_types = {int} # type: Set[type]
1573 type_msg = "`int`"
1574
1575 if allow_float:
1576 expected_types.add(float)
1577 type_msg += " or `float`"
1578
1579 if type(val) not in expected_types:
71aaa3f7 1580 _raise_error_for_item(
269f6eb3
PP
1581 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1582 item.expr_str, type_msg, type(val).__name__
71aaa3f7
PP
1583 ),
1584 item,
1585 )
1586
1587 return val
1588
05f81895
PP
1589 # Returns the size, in bytes, required to encode the value `val`
1590 # with LEB128 (signed version if `is_signed` is `True`).
1591 @staticmethod
1592 def _leb128_size_for_val(val: int, is_signed: bool):
1593 if val < 0:
1594 # Equivalent upper bound.
1595 #
1596 # For example, if `val` is -128, then the full integer for
1597 # this number of bits would be [-128, 127].
1598 val = -val - 1
1599
1600 # Number of bits (add one for the sign if needed)
1601 bits = val.bit_length() + int(is_signed)
1602
1603 if bits == 0:
1604 bits = 1
1605
1606 # Seven bits per byte
1607 return math.ceil(bits / 7)
1608
676f6189
PP
1609 # Returns the offset `offset` aligned according to `item`.
1610 @staticmethod
1611 def _align_offset(offset: int, item: _AlignOffset):
1612 align_bytes = item.val // 8
1613 return (offset + align_bytes - 1) // align_bytes * align_bytes
1614
27d52a19
PP
1615 # Computes the effective value for each repetition, conditional, and
1616 # LEB128 integer instance, filling `instance_vals` (if not `None`)
1617 # and returning `instance_vals`.
2adf4336 1618 #
05f81895
PP
1619 # At this point it must be known that, for a given variable-length
1620 # item, its expression only contains reachable names.
2adf4336 1621 #
27d52a19
PP
1622 # When handling a `_Rep` or `_Cond` item, this function appends its
1623 # effective multiplier/value to `instance_vals` _before_ handling
1624 # its repeated/conditional item.
2adf4336 1625 #
05f81895
PP
1626 # When handling a `_VarAssign` item, this function only evaluates it
1627 # if all its names are reachable.
2adf4336 1628 @staticmethod
05f81895 1629 def _compute_vl_instance_vals(
2adf4336
PP
1630 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1631 ):
1632 if instance_vals is None:
1633 instance_vals = []
1634
1635 if isinstance(item, _ScalarItem):
1636 state.offset += item.size
1637 elif type(item) is _Label:
1638 state.labels[item.name] = state.offset
1639 elif type(item) is _VarAssign:
1640 # Check if all the names are reachable
1641 do_eval = True
1642
1643 for name in _Gen._names_of_expr(item.expr):
1644 if (
1645 name != _icitte_name
1646 and name not in state.variables
1647 and name not in state.labels
1648 ):
1649 # A name is unknown: cannot evaluate
1650 do_eval = False
1651 break
1652
1653 if do_eval:
1654 # Evaluate the expression and keep the result
e57a18e1 1655 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
2adf4336
PP
1656 elif type(item) is _SetOffset:
1657 state.offset = item.val
676f6189
PP
1658 elif type(item) is _AlignOffset:
1659 state.offset = _Gen._align_offset(state.offset, item)
05f81895
PP
1660 elif isinstance(item, _Leb128Int):
1661 # Evaluate the expression
e57a18e1 1662 val = _Gen._eval_item_expr(item, state)
05f81895
PP
1663
1664 # Validate result
1665 if type(item) is _ULeb128Int and val < 0:
1666 _raise_error_for_item(
1667 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1668 item.expr_str, val
1669 ),
1670 item,
1671 )
1672
1673 # Add the evaluation result to the to variable-length item
1674 # instance values.
1675 instance_vals.append(val)
1676
1677 # Update offset
1678 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
2adf4336
PP
1679 elif type(item) is _Rep:
1680 # Evaluate the expression and keep the result
e57a18e1 1681 val = _Gen._eval_item_expr(item, state)
2adf4336
PP
1682
1683 # Validate result
1684 if val < 0:
1685 _raise_error_for_item(
1686 "Invalid expression `{}`: unexpected negative result {:,}".format(
1687 item.expr_str, val
1688 ),
1689 item,
1690 )
1691
27d52a19 1692 # Add to variable-length item instance values
2adf4336
PP
1693 instance_vals.append(val)
1694
1695 # Process the repeated item `val` times
1696 for _ in range(val):
05f81895 1697 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
27d52a19
PP
1698 elif type(item) is _Cond:
1699 # Evaluate the expression and keep the result
1700 val = _Gen._eval_item_expr(item, state)
1701
1702 # Add to variable-length item instance values
1703 instance_vals.append(val)
1704
1705 # Process the conditional item if needed
1706 if val:
1707 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
2adf4336
PP
1708 elif type(item) is _Group:
1709 prev_labels = state.labels.copy()
1710
1711 # Process each item
1712 for subitem in item.items:
05f81895 1713 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
2adf4336
PP
1714
1715 state.labels = prev_labels
1716
1717 return instance_vals
1718
676f6189
PP
1719 def _update_offset_noop(self, item: _Item, state: _GenState, next_vl_instance: int):
1720 return next_vl_instance
05f81895 1721
676f6189
PP
1722 def _dry_handle_scalar_item(
1723 self, item: _ScalarItem, state: _GenState, next_vl_instance: int
1724 ):
1725 state.offset += item.size
1726 return next_vl_instance
2adf4336 1727
676f6189
PP
1728 def _dry_handle_leb128_int_item(
1729 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1730 ):
05f81895
PP
1731 # Get the value from `self._vl_instance_vals` _before_
1732 # incrementing `next_vl_instance` to honor the order of
1733 # _compute_vl_instance_vals().
676f6189
PP
1734 state.offset += self._leb128_size_for_val(
1735 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
05f81895 1736 )
2adf4336 1737
676f6189 1738 return next_vl_instance + 1
2adf4336 1739
676f6189
PP
1740 def _dry_handle_group_item(
1741 self, item: _Group, state: _GenState, next_vl_instance: int
1742 ):
2adf4336 1743 for subitem in item.items:
676f6189 1744 next_vl_instance = self._dry_handle_item(subitem, state, next_vl_instance)
2adf4336 1745
676f6189 1746 return next_vl_instance
2adf4336 1747
676f6189 1748 def _dry_handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
05f81895
PP
1749 # Get the value from `self._vl_instance_vals` _before_
1750 # incrementing `next_vl_instance` to honor the order of
1751 # _compute_vl_instance_vals().
1752 mul = self._vl_instance_vals[next_vl_instance]
1753 next_vl_instance += 1
2adf4336
PP
1754
1755 for _ in range(mul):
676f6189 1756 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
2adf4336 1757
676f6189 1758 return next_vl_instance
2adf4336 1759
27d52a19
PP
1760 def _dry_handle_cond_item(
1761 self, item: _Cond, state: _GenState, next_vl_instance: int
1762 ):
1763 # Get the value from `self._vl_instance_vals` _before_
1764 # incrementing `next_vl_instance` to honor the order of
1765 # _compute_vl_instance_vals().
1766 val = self._vl_instance_vals[next_vl_instance]
1767 next_vl_instance += 1
1768
1769 if val:
1770 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1771
1772 return next_vl_instance
1773
676f6189
PP
1774 def _dry_handle_align_offset_item(
1775 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1776 ):
1777 state.offset = self._align_offset(state.offset, item)
1778 return next_vl_instance
1779
1780 def _dry_handle_set_offset_item(
1781 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1782 ):
1783 state.offset = item.val
1784 return next_vl_instance
1785
1786 # Updates `state.offset` considering the generated data of `item`,
1787 # without generating any, and returns the updated next
1788 # variable-length item instance.
1789 def _dry_handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1790 return self._dry_handle_item_funcs[type(item)](item, state, next_vl_instance)
2adf4336
PP
1791
1792 # Handles the byte item `item`.
05f81895 1793 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
2adf4336
PP
1794 self._data.append(item.val)
1795 state.offset += item.size
05f81895 1796 return next_vl_instance
2adf4336
PP
1797
1798 # Handles the string item `item`.
05f81895 1799 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
2adf4336
PP
1800 self._data += item.data
1801 state.offset += item.size
05f81895 1802 return next_vl_instance
2adf4336
PP
1803
1804 # Handles the byte order setting item `item`.
1805 def _handle_set_bo_item(
05f81895 1806 self, item: _SetBo, state: _GenState, next_vl_instance: int
2adf4336
PP
1807 ):
1808 # Update current byte order
1809 state.bo = item.bo
05f81895 1810 return next_vl_instance
2adf4336
PP
1811
1812 # Handles the variable assignment item `item`.
1813 def _handle_var_assign_item(
05f81895 1814 self, item: _VarAssign, state: _GenState, next_vl_instance: int
2adf4336 1815 ):
71aaa3f7 1816 # Update variable
e57a18e1 1817 state.variables[item.name] = self._eval_item_expr(item, state, True)
05f81895 1818 return next_vl_instance
71aaa3f7 1819
05f81895 1820 # Handles the fixed-length integer item `item`.
269f6eb3 1821 def _handle_fl_int_item(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
1822 # Validate range
1823 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1824 _raise_error_for_item(
1825 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
2adf4336 1826 val, item.len, item.expr_str, state.offset
71aaa3f7
PP
1827 ),
1828 item,
1829 )
1830
1831 # Encode result on 64 bits (to extend the sign bit whatever the
1832 # value of `item.len`).
71aaa3f7
PP
1833 data = struct.pack(
1834 "{}{}".format(
2adf4336 1835 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
1836 "Q" if val >= 0 else "q",
1837 ),
1838 val,
1839 )
1840
1841 # Keep only the requested length
1842 len_bytes = item.len // 8
1843
2adf4336 1844 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
1845 # Big endian: keep last bytes
1846 data = data[-len_bytes:]
1847 else:
1848 # Little endian: keep first bytes
2adf4336 1849 assert state.bo == ByteOrder.LE
71aaa3f7
PP
1850 data = data[:len_bytes]
1851
1852 # Append to current bytes and update offset
1853 self._data += data
269f6eb3
PP
1854
1855 # Handles the fixed-length integer item `item`.
1856 def _handle_fl_float_item(self, val: float, item: _FlNum, state: _GenState):
1857 # Validate length
1858 if item.len not in (32, 64):
1859 _raise_error_for_item(
1860 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1861 item.len, val
1862 ),
1863 item,
1864 )
1865
1866 # Encode result
1867 self._data += struct.pack(
1868 "{}{}".format(
1869 ">" if state.bo in (None, ByteOrder.BE) else "<",
1870 "f" if item.len == 32 else "d",
1871 ),
1872 val,
1873 )
1874
1875 # Handles the fixed-length number item `item`.
1876 def _handle_fl_num_item(
1877 self, item: _FlNum, state: _GenState, next_vl_instance: int
1878 ):
1879 # Compute value
e57a18e1 1880 val = self._eval_item_expr(item, state, True)
269f6eb3
PP
1881
1882 # Validate current byte order
1883 if state.bo is None and item.len > 8:
1884 _raise_error_for_item(
1885 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1886 item.expr_str
1887 ),
1888 item,
1889 )
1890
1891 # Handle depending on type
1892 if type(val) is int:
1893 self._handle_fl_int_item(val, item, state)
1894 else:
1895 assert type(val) is float
1896 self._handle_fl_float_item(val, item, state)
1897
1898 # Update offset
1899 state.offset += item.size
1900
05f81895
PP
1901 return next_vl_instance
1902
1903 # Handles the LEB128 integer item `item`.
1904 def _handle_leb128_int_item(
1905 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1906 ):
1907 # Get the precomputed value
1908 val = self._vl_instance_vals[next_vl_instance]
1909
1910 # Size in bytes
1911 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1912
1913 # For each byte
1914 for _ in range(size):
1915 # Seven LSBs, MSB of the byte set (continue)
1916 self._data.append((val & 0x7F) | 0x80)
1917 val >>= 7
1918
1919 # Clear MSB of last byte (stop)
1920 self._data[-1] &= ~0x80
1921
1922 # Consumed this instance
1923 return next_vl_instance + 1
71aaa3f7 1924
2adf4336
PP
1925 # Handles the group item `item`, only removing the immediate labels
1926 # from `state.labels` if `remove_immediate_labels` is `True`.
1927 def _handle_group_item(
1928 self,
1929 item: _Group,
1930 state: _GenState,
05f81895 1931 next_vl_instance: int,
2adf4336
PP
1932 remove_immediate_labels: bool = True,
1933 ):
1934 # Compute the values of the immediate (not nested) labels. Those
1935 # labels are reachable by any expression within the group.
676f6189 1936 tmp_state = _GenState({}, {}, state.offset, None)
2adf4336 1937 immediate_label_names = set() # type: Set[str]
05f81895 1938 tmp_next_vl_instance = next_vl_instance
71aaa3f7 1939
2adf4336 1940 for subitem in item.items:
676f6189 1941 if type(subitem) is _Label:
2adf4336 1942 # New immediate label
676f6189 1943 state.labels[subitem.name] = tmp_state.offset
2adf4336
PP
1944 immediate_label_names.add(subitem.name)
1945
676f6189
PP
1946 tmp_next_vl_instance = self._dry_handle_item(
1947 subitem, tmp_state, tmp_next_vl_instance
2adf4336 1948 )
71aaa3f7 1949
2adf4336 1950 # Handle each item now with the actual state
71aaa3f7 1951 for subitem in item.items:
05f81895 1952 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
2adf4336
PP
1953
1954 # Remove immediate labels if required so that outer items won't
1955 # reach inner labels.
1956 if remove_immediate_labels:
1957 for name in immediate_label_names:
1958 del state.labels[name]
71aaa3f7 1959
05f81895 1960 return next_vl_instance
71aaa3f7 1961
2adf4336 1962 # Handles the repetition item `item`.
05f81895
PP
1963 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1964 # Get the precomputed repetition count
1965 mul = self._vl_instance_vals[next_vl_instance]
1966
1967 # Consumed this instance
1968 next_vl_instance += 1
71aaa3f7 1969
2adf4336 1970 for _ in range(mul):
05f81895 1971 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
71aaa3f7 1972
05f81895 1973 return next_vl_instance
71aaa3f7 1974
27d52a19
PP
1975 # Handles the conditional item `item`.
1976 def _handle_cond_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1977 # Get the precomputed conditional value
1978 val = self._vl_instance_vals[next_vl_instance]
1979
1980 # Consumed this instance
1981 next_vl_instance += 1
1982
1983 if val:
1984 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1985
1986 return next_vl_instance
1987
2adf4336
PP
1988 # Handles the offset setting item `item`.
1989 def _handle_set_offset_item(
05f81895 1990 self, item: _SetOffset, state: _GenState, next_vl_instance: int
2adf4336
PP
1991 ):
1992 state.offset = item.val
05f81895 1993 return next_vl_instance
2adf4336 1994
676f6189
PP
1995 # Handles offset alignment item `item` (adds padding).
1996 def _handle_align_offset_item(
1997 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1998 ):
1999 init_offset = state.offset
2000 state.offset = self._align_offset(state.offset, item)
2001 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2002 return next_vl_instance
2003
2adf4336 2004 # Handles the label item `item`.
05f81895
PP
2005 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
2006 return next_vl_instance
2adf4336
PP
2007
2008 # Handles the item `item`, returning the updated next repetition
2009 # instance.
05f81895
PP
2010 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
2011 return self._item_handlers[type(item)](item, state, next_vl_instance)
2adf4336
PP
2012
2013 # Generates the data (`self._data`) and final state
2014 # (`self._final_state`) from `group` and the initial state `state`.
2015 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2016 # Initial state
2017 self._data = bytearray()
71aaa3f7
PP
2018
2019 # Item handlers
2020 self._item_handlers = {
676f6189 2021 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2022 _Byte: self._handle_byte_item,
27d52a19 2023 _Cond: self._handle_cond_item,
269f6eb3 2024 _FlNum: self._handle_fl_num_item,
71aaa3f7 2025 _Group: self._handle_group_item,
2adf4336 2026 _Label: self._handle_label_item,
71aaa3f7 2027 _Rep: self._handle_rep_item,
2adf4336
PP
2028 _SetBo: self._handle_set_bo_item,
2029 _SetOffset: self._handle_set_offset_item,
05f81895 2030 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2031 _Str: self._handle_str_item,
05f81895 2032 _ULeb128Int: self._handle_leb128_int_item,
2adf4336
PP
2033 _VarAssign: self._handle_var_assign_item,
2034 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2035
676f6189
PP
2036 # Dry item handlers (only updates the state offset)
2037 self._dry_handle_item_funcs = {
2038 _AlignOffset: self._dry_handle_align_offset_item,
2039 _Byte: self._dry_handle_scalar_item,
27d52a19 2040 _Cond: self._dry_handle_cond_item,
676f6189
PP
2041 _FlNum: self._dry_handle_scalar_item,
2042 _Group: self._dry_handle_group_item,
2043 _Label: self._update_offset_noop,
2044 _Rep: self._dry_handle_rep_item,
2045 _SetBo: self._update_offset_noop,
2046 _SetOffset: self._dry_handle_set_offset_item,
2047 _SLeb128Int: self._dry_handle_leb128_int_item,
2048 _Str: self._dry_handle_scalar_item,
2049 _ULeb128Int: self._dry_handle_leb128_int_item,
2050 _VarAssign: self._update_offset_noop,
2051 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2adf4336
PP
2052
2053 # Handle the group item, _not_ removing the immediate labels
2054 # because the `labels` property offers them.
2055 self._handle_group_item(group, state, 0, False)
2056
2057 # This is actually the final state
2058 self._final_state = state
71aaa3f7
PP
2059
2060
2061# Returns a `ParseResult` instance containing the bytes encoded by the
2062# input string `normand`.
2063#
2064# `init_variables` is a dictionary of initial variable names (valid
2065# Python names) to integral values. A variable name must not be the
2066# reserved name `ICITTE`.
2067#
2068# `init_labels` is a dictionary of initial label names (valid Python
2069# names) to integral values. A label name must not be the reserved name
2070# `ICITTE`.
2071#
2072# `init_offset` is the initial offset.
2073#
2074# `init_byte_order` is the initial byte order.
2075#
2076# Raises `ParseError` on any parsing error.
2077def parse(
2078 normand: str,
e57a18e1
PP
2079 init_variables: Optional[VariablesT] = None,
2080 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2081 init_offset: int = 0,
2082 init_byte_order: Optional[ByteOrder] = None,
2083):
2084 if init_variables is None:
2085 init_variables = {}
2086
2087 if init_labels is None:
2088 init_labels = {}
2089
2090 gen = _Gen(
2091 _Parser(normand, init_variables, init_labels).res,
2092 init_variables,
2093 init_labels,
2094 init_offset,
2095 init_byte_order,
2096 )
2097 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2098 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2099 )
2100
2101
2102# Parses the command-line arguments.
2103def _parse_cli_args():
2104 import argparse
2105
2106 # Build parser
2107 ap = argparse.ArgumentParser()
2108 ap.add_argument(
2109 "--offset",
2110 metavar="OFFSET",
2111 action="store",
2112 type=int,
2113 default=0,
2114 help="initial offset (positive)",
2115 )
2116 ap.add_argument(
2117 "-b",
2118 "--byte-order",
2119 metavar="BO",
2120 choices=["be", "le"],
2121 type=str,
2122 help="initial byte order (`be` or `le`)",
2123 )
2124 ap.add_argument(
2125 "--var",
2126 metavar="NAME=VAL",
2127 action="append",
2128 help="add an initial variable (may be repeated)",
2129 )
2130 ap.add_argument(
2131 "-l",
2132 "--label",
2133 metavar="NAME=VAL",
2134 action="append",
2135 help="add an initial label (may be repeated)",
2136 )
2137 ap.add_argument(
2138 "--version", action="version", version="Normand {}".format(__version__)
2139 )
2140 ap.add_argument(
2141 "path",
2142 metavar="PATH",
2143 action="store",
2144 nargs="?",
2145 help="input path (none means standard input)",
2146 )
2147
2148 # Parse
2149 return ap.parse_args()
2150
2151
2152# Raises a command-line error with the message `msg`.
2153def _raise_cli_error(msg: str) -> NoReturn:
2154 raise RuntimeError("Command-line error: {}".format(msg))
2155
2156
2157# Returns a dictionary of string to integers from the list of strings
2158# `args` containing `NAME=VAL` entries.
2159def _dict_from_arg(args: Optional[List[str]]):
e57a18e1 2160 d = {} # type: LabelsT
71aaa3f7
PP
2161
2162 if args is None:
2163 return d
2164
2165 for arg in args:
2166 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2167
2168 if m is None:
2169 _raise_cli_error("Invalid assignment {}".format(arg))
2170
2e1c1acd
PP
2171 d[m.group(1)] = int(m.group(2))
2172
71aaa3f7
PP
2173 return d
2174
2175
2176# CLI entry point without exception handling.
2177def _try_run_cli():
2178 import os.path
2179
2180 # Parse arguments
2181 args = _parse_cli_args()
2182
2183 # Read input
2184 if args.path is None:
2185 normand = sys.stdin.read()
2186 else:
2187 with open(args.path) as f:
2188 normand = f.read()
2189
2190 # Variables and labels
e57a18e1 2191 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2192 labels = _dict_from_arg(args.label)
2193
2194 # Validate offset
2195 if args.offset < 0:
2196 _raise_cli_error("Invalid negative offset {}")
2197
2198 # Validate and set byte order
2199 bo = None # type: Optional[ByteOrder]
2200
2201 if args.byte_order is not None:
2202 if args.byte_order == "be":
2203 bo = ByteOrder.BE
2204 else:
2205 assert args.byte_order == "le"
2206 bo = ByteOrder.LE
2207
2208 # Parse
2209 try:
2210 res = parse(normand, variables, labels, args.offset, bo)
2211 except ParseError as exc:
2212 prefix = ""
2213
2214 if args.path is not None:
2215 prefix = "{}:".format(os.path.abspath(args.path))
2216
2217 _fail(
2218 "{}{}:{} - {}".format(
2219 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2220 )
2221 )
2222
2223 # Print
2224 sys.stdout.buffer.write(res.data)
2225
2226
2227# Prints the exception message `msg` and exits with status 1.
2228def _fail(msg: str) -> NoReturn:
2229 if not msg.endswith("."):
2230 msg += "."
2231
2232 print(msg, file=sys.stderr)
2233 sys.exit(1)
2234
2235
2236# CLI entry point.
2237def _run_cli():
2238 try:
2239 _try_run_cli()
2240 except Exception as exc:
2241 _fail(str(exc))
2242
2243
2244if __name__ == "__main__":
2245 _run_cli()
This page took 0.126034 seconds and 4 git commands to generate.