Add macro support
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
320644e2 33__version__ = "0.11.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
41 "ParseResult",
e57a18e1 42 "TextLocation",
e57a18e1 43 "VariablesT",
71aaa3f7
PP
44]
45
46import re
47import abc
48import ast
49import sys
320644e2 50import copy
71aaa3f7 51import enum
05f81895 52import math
71aaa3f7 53import struct
e57a18e1
PP
54import typing
55from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
56
57
58# Text location (line and column numbers).
e57a18e1 59class TextLocation:
71aaa3f7
PP
60 @classmethod
61 def _create(cls, line_no: int, col_no: int):
62 self = cls.__new__(cls)
63 self._init(line_no, col_no)
64 return self
65
66 def __init__(*args, **kwargs): # type: ignore
67 raise NotImplementedError
68
69 def _init(self, line_no: int, col_no: int):
70 self._line_no = line_no
71 self._col_no = col_no
72
73 # Line number.
74 @property
75 def line_no(self):
76 return self._line_no
77
78 # Column number.
79 @property
80 def col_no(self):
81 return self._col_no
82
2adf4336 83 def __repr__(self):
e57a18e1 84 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 85
71aaa3f7
PP
86
87# Any item.
88class _Item:
e57a18e1 89 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
90 self._text_loc = text_loc
91
92 # Source text location.
93 @property
94 def text_loc(self):
95 return self._text_loc
96
2adf4336
PP
97
98# Scalar item.
99class _ScalarItem(_Item):
71aaa3f7
PP
100 # Returns the size, in bytes, of this item.
101 @property
102 @abc.abstractmethod
103 def size(self) -> int:
104 ...
105
106
107# A repeatable item.
2adf4336 108class _RepableItem:
71aaa3f7
PP
109 pass
110
111
112# Single byte.
2adf4336 113class _Byte(_ScalarItem, _RepableItem):
e57a18e1 114 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
115 super().__init__(text_loc)
116 self._val = val
117
118 # Byte value.
119 @property
120 def val(self):
121 return self._val
122
123 @property
124 def size(self):
125 return 1
126
127 def __repr__(self):
676f6189 128 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
129
130
131# String.
2adf4336 132class _Str(_ScalarItem, _RepableItem):
e57a18e1 133 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
134 super().__init__(text_loc)
135 self._data = data
136
137 # Encoded bytes.
138 @property
139 def data(self):
140 return self._data
141
142 @property
143 def size(self):
144 return len(self._data)
145
146 def __repr__(self):
676f6189 147 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
148
149
150# Byte order.
151@enum.unique
152class ByteOrder(enum.Enum):
153 # Big endian.
154 BE = "be"
155
156 # Little endian.
157 LE = "le"
158
159
2adf4336
PP
160# Byte order setting.
161class _SetBo(_Item):
e57a18e1 162 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 163 super().__init__(text_loc)
71aaa3f7
PP
164 self._bo = bo
165
166 @property
167 def bo(self):
168 return self._bo
169
2adf4336 170 def __repr__(self):
676f6189 171 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
172
173
174# Label.
175class _Label(_Item):
e57a18e1 176 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
177 super().__init__(text_loc)
178 self._name = name
179
180 # Label name.
181 @property
182 def name(self):
183 return self._name
184
71aaa3f7 185 def __repr__(self):
676f6189 186 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
187
188
2adf4336
PP
189# Offset setting.
190class _SetOffset(_Item):
e57a18e1 191 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
192 super().__init__(text_loc)
193 self._val = val
194
676f6189 195 # Offset value (bytes).
71aaa3f7
PP
196 @property
197 def val(self):
198 return self._val
199
71aaa3f7 200 def __repr__(self):
676f6189
PP
201 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
202
203
204# Offset alignment.
205class _AlignOffset(_Item):
e57a18e1 206 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
207 super().__init__(text_loc)
208 self._val = val
209 self._pad_val = pad_val
210
211 # Alignment value (bits).
212 @property
213 def val(self):
214 return self._val
215
216 # Padding byte value.
217 @property
218 def pad_val(self):
219 return self._pad_val
220
221 def __repr__(self):
222 return "_AlignOffset({}, {}, {})".format(
223 repr(self._val), repr(self._pad_val), repr(self._text_loc)
224 )
71aaa3f7
PP
225
226
227# Mixin of containing an AST expression and its string.
228class _ExprMixin:
229 def __init__(self, expr_str: str, expr: ast.Expression):
230 self._expr_str = expr_str
231 self._expr = expr
232
233 # Expression string.
234 @property
235 def expr_str(self):
236 return self._expr_str
237
238 # Expression node to evaluate.
239 @property
240 def expr(self):
241 return self._expr
242
243
2adf4336
PP
244# Variable assignment.
245class _VarAssign(_Item, _ExprMixin):
71aaa3f7 246 def __init__(
e57a18e1 247 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
248 ):
249 super().__init__(text_loc)
250 _ExprMixin.__init__(self, expr_str, expr)
251 self._name = name
252
253 # Name.
254 @property
255 def name(self):
256 return self._name
257
71aaa3f7 258 def __repr__(self):
2adf4336 259 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
260 repr(self._name),
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._text_loc),
71aaa3f7
PP
264 )
265
266
269f6eb3
PP
267# Fixed-length number, possibly needing more than one byte.
268class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 269 def __init__(
e57a18e1 270 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
271 ):
272 super().__init__(text_loc)
273 _ExprMixin.__init__(self, expr_str, expr)
274 self._len = len
275
276 # Length (bits).
277 @property
278 def len(self):
279 return self._len
280
281 @property
282 def size(self):
283 return self._len // 8
284
285 def __repr__(self):
269f6eb3 286 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
287 repr(self._expr_str),
288 repr(self._expr),
289 repr(self._len),
290 repr(self._text_loc),
71aaa3f7
PP
291 )
292
293
05f81895
PP
294# LEB128 integer.
295class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 296 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
297 super().__init__(text_loc)
298 _ExprMixin.__init__(self, expr_str, expr)
299
300 def __repr__(self):
301 return "{}({}, {}, {})".format(
302 self.__class__.__name__,
303 repr(self._expr_str),
304 repr(self._expr),
676f6189 305 repr(self._text_loc),
05f81895
PP
306 )
307
308
309# Unsigned LEB128 integer.
310class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
311 pass
312
313
314# Signed LEB128 integer.
315class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
316 pass
317
318
71aaa3f7 319# Group of items.
2adf4336 320class _Group(_Item, _RepableItem):
e57a18e1 321 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
322 super().__init__(text_loc)
323 self._items = items
71aaa3f7
PP
324
325 # Contained items.
326 @property
327 def items(self):
328 return self._items
329
71aaa3f7 330 def __repr__(self):
676f6189 331 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
332
333
334# Repetition item.
2adf4336
PP
335class _Rep(_Item, _ExprMixin):
336 def __init__(
e57a18e1 337 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 338 ):
71aaa3f7 339 super().__init__(text_loc)
2adf4336 340 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 341 self._item = item
71aaa3f7
PP
342
343 # Item to repeat.
344 @property
345 def item(self):
346 return self._item
347
71aaa3f7 348 def __repr__(self):
2adf4336 349 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
350 repr(self._item),
351 repr(self._expr_str),
352 repr(self._expr),
353 repr(self._text_loc),
71aaa3f7
PP
354 )
355
356
27d52a19
PP
357# Conditional item.
358class _Cond(_Item, _ExprMixin):
359 def __init__(
360 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
361 ):
362 super().__init__(text_loc)
363 _ExprMixin.__init__(self, expr_str, expr)
364 self._item = item
365
366 # Conditional item.
367 @property
368 def item(self):
369 return self._item
370
371 def __repr__(self):
372 return "_Cond({}, {}, {}, {})".format(
373 repr(self._item),
374 repr(self._expr_str),
375 repr(self._expr),
376 repr(self._text_loc),
377 )
378
379
320644e2
PP
380# Macro definition item.
381class _MacroDef(_Item):
382 def __init__(
383 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
384 ):
385 super().__init__(text_loc)
386 self._name = name
387 self._param_names = param_names
388 self._group = group
389
390 # Name.
391 @property
392 def name(self):
393 return self._name
394
395 # Parameters.
396 @property
397 def param_names(self):
398 return self._param_names
399
400 # Contained items.
401 @property
402 def group(self):
403 return self._group
404
405 def __repr__(self):
406 return "_MacroDef({}, {}, {}, {})".format(
407 repr(self._name),
408 repr(self._param_names),
409 repr(self._group),
410 repr(self._text_loc),
411 )
412
413
414# Macro expansion parameter.
415class _MacroExpParam:
416 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
417 self._expr_str = expr_str
418 self._expr = expr
419 self._text_loc = text_loc
420
421 # Expression string.
422 @property
423 def expr_str(self):
424 return self._expr_str
425
426 # Expression.
427 @property
428 def expr(self):
429 return self._expr
430
431 # Source text location.
432 @property
433 def text_loc(self):
434 return self._text_loc
435
436 def __repr__(self):
437 return "_MacroExpParam({}, {}, {})".format(
438 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
439 )
440
441
442# Macro expansion item.
443class _MacroExp(_Item, _RepableItem):
444 def __init__(
445 self,
446 name: str,
447 params: List[_MacroExpParam],
448 text_loc: TextLocation,
449 ):
450 super().__init__(text_loc)
451 self._name = name
452 self._params = params
453
454 # Name.
455 @property
456 def name(self):
457 return self._name
458
459 # Parameters.
460 @property
461 def params(self):
462 return self._params
463
464 def __repr__(self):
465 return "_MacroExp({}, {}, {})".format(
466 repr(self._name),
467 repr(self._params),
468 repr(self._text_loc),
469 )
2adf4336
PP
470
471
71aaa3f7
PP
472# A parsing error containing a message and a text location.
473class ParseError(RuntimeError):
474 @classmethod
e57a18e1 475 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
476 self = cls.__new__(cls)
477 self._init(msg, text_loc)
478 return self
479
480 def __init__(self, *args, **kwargs): # type: ignore
481 raise NotImplementedError
482
e57a18e1 483 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7
PP
484 super().__init__(msg)
485 self._text_loc = text_loc
486
487 # Source text location.
488 @property
489 def text_loc(self):
490 return self._text_loc
491
492
493# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 494def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
495 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
496
497
e57a18e1
PP
498# Variables dictionary type (for type hints).
499VariablesT = Dict[str, Union[int, float]]
500
501
502# Labels dictionary type (for type hints).
503LabelsT = Dict[str, int]
71aaa3f7
PP
504
505
506# Python name pattern.
507_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
508
509
320644e2
PP
510# Macro definition dictionary.
511_MacroDefsT = Dict[str, _MacroDef]
512
513
71aaa3f7
PP
514# Normand parser.
515#
516# The constructor accepts a Normand input. After building, use the `res`
517# property to get the resulting main group.
518class _Parser:
519 # Builds a parser to parse the Normand input `normand`, parsing
520 # immediately.
e57a18e1 521 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
522 self._normand = normand
523 self._at = 0
524 self._line_no = 1
525 self._col_no = 1
526 self._label_names = set(labels.keys())
527 self._var_names = set(variables.keys())
320644e2 528 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
529 self._parse()
530
531 # Result (main group).
532 @property
533 def res(self):
534 return self._res
535
320644e2
PP
536 # Macro definitions.
537 @property
538 def macro_defs(self):
539 return self._macro_defs
540
71aaa3f7
PP
541 # Current text location.
542 @property
543 def _text_loc(self):
e57a18e1 544 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
545 self._line_no, self._col_no
546 )
547
548 # Returns `True` if this parser is done parsing.
549 def _is_done(self):
550 return self._at == len(self._normand)
551
552 # Returns `True` if this parser isn't done parsing.
553 def _isnt_done(self):
554 return not self._is_done()
555
556 # Raises a parse error, creating it using the message `msg` and the
557 # current text location.
558 def _raise_error(self, msg: str) -> NoReturn:
559 _raise_error(msg, self._text_loc)
560
561 # Tries to make the pattern `pat` match the current substring,
562 # returning the match object and updating `self._at`,
563 # `self._line_no`, and `self._col_no` on success.
564 def _try_parse_pat(self, pat: Pattern[str]):
565 m = pat.match(self._normand, self._at)
566
567 if m is None:
568 return
569
570 # Skip matched string
571 self._at += len(m.group(0))
572
573 # Update line number
574 self._line_no += m.group(0).count("\n")
575
576 # Update column number
577 for i in reversed(range(self._at)):
578 if self._normand[i] == "\n" or i == 0:
579 if i == 0:
580 self._col_no = self._at + 1
581 else:
582 self._col_no = self._at - i
583
584 break
585
586 # Return match object
587 return m
588
589 # Expects the pattern `pat` to match the current substring,
590 # returning the match object and updating `self._at`,
591 # `self._line_no`, and `self._col_no` on success, or raising a parse
592 # error with the message `error_msg` on error.
593 def _expect_pat(self, pat: Pattern[str], error_msg: str):
594 # Match
595 m = self._try_parse_pat(pat)
596
597 if m is None:
598 # No match: error
599 self._raise_error(error_msg)
600
601 # Return match object
602 return m
603
604 # Pattern for _skip_ws_and_comments()
605 _ws_or_syms_or_comments_pat = re.compile(
e57a18e1 606 r"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
607 )
608
609 # Skips as many whitespaces, insignificant symbol characters, and
610 # comments as possible.
611 def _skip_ws_and_comments(self):
612 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
613
320644e2
PP
614 # Pattern for _skip_ws()
615 _ws_pat = re.compile(r"\s*")
616
617 # Skips as many whitespaces as possible.
618 def _skip_ws(self):
619 self._try_parse_pat(self._ws_pat)
620
71aaa3f7
PP
621 # Pattern for _try_parse_hex_byte()
622 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
623
624 # Tries to parse a hexadecimal byte, returning a byte item on
625 # success.
626 def _try_parse_hex_byte(self):
0e8e3169
PP
627 begin_text_loc = self._text_loc
628
71aaa3f7
PP
629 # Match initial nibble
630 m_high = self._try_parse_pat(self._nibble_pat)
631
632 if m_high is None:
633 # No match
634 return
635
636 # Expect another nibble
637 self._skip_ws_and_comments()
638 m_low = self._expect_pat(
639 self._nibble_pat, "Expecting another hexadecimal nibble"
640 )
641
642 # Return item
0e8e3169 643 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
644
645 # Patterns for _try_parse_bin_byte()
646 _bin_byte_bit_pat = re.compile(r"[01]")
647 _bin_byte_prefix_pat = re.compile(r"%")
648
649 # Tries to parse a binary byte, returning a byte item on success.
650 def _try_parse_bin_byte(self):
0e8e3169
PP
651 begin_text_loc = self._text_loc
652
71aaa3f7
PP
653 # Match prefix
654 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
655 # No match
656 return
657
658 # Expect eight bits
659 bits = [] # type: List[str]
660
661 for _ in range(8):
662 self._skip_ws_and_comments()
663 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
664 bits.append(m.group(0))
665
666 # Return item
0e8e3169 667 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
668
669 # Patterns for _try_parse_dec_byte()
320644e2 670 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
671 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
672
673 # Tries to parse a decimal byte, returning a byte item on success.
674 def _try_parse_dec_byte(self):
0e8e3169
PP
675 begin_text_loc = self._text_loc
676
71aaa3f7
PP
677 # Match prefix
678 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
679 # No match
680 return
681
682 # Expect the value
320644e2 683 self._skip_ws()
71aaa3f7
PP
684 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
685
686 # Compute value
687 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
688
689 # Validate
690 if val < -128 or val > 255:
0e8e3169 691 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
692
693 # Two's complement
05f81895 694 val %= 256
71aaa3f7
PP
695
696 # Return item
0e8e3169 697 return _Byte(val, begin_text_loc)
71aaa3f7
PP
698
699 # Tries to parse a byte, returning a byte item on success.
700 def _try_parse_byte(self):
701 # Hexadecimal
702 item = self._try_parse_hex_byte()
703
704 if item is not None:
705 return item
706
707 # Binary
708 item = self._try_parse_bin_byte()
709
710 if item is not None:
711 return item
712
713 # Decimal
714 item = self._try_parse_dec_byte()
715
716 if item is not None:
717 return item
718
719 # Patterns for _try_parse_str()
720 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
721 _str_suffix_pat = re.compile(r'"')
722 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
723
724 # Strings corresponding to escape sequence characters
725 _str_escape_seq_strs = {
726 "0": "\0",
727 "a": "\a",
728 "b": "\b",
729 "e": "\x1b",
730 "f": "\f",
731 "n": "\n",
732 "r": "\r",
733 "t": "\t",
734 "v": "\v",
735 "\\": "\\",
736 '"': '"',
737 }
738
739 # Tries to parse a string, returning a string item on success.
740 def _try_parse_str(self):
0e8e3169
PP
741 begin_text_loc = self._text_loc
742
71aaa3f7
PP
743 # Match prefix
744 m = self._try_parse_pat(self._str_prefix_pat)
745
746 if m is None:
747 # No match
748 return
749
750 # Get encoding
751 encoding = "utf8"
752
753 if m.group("len") is not None:
754 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
755
756 # Actual string
757 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
758
759 # Expect end of string
760 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
761
762 # Replace escape sequences
763 val = m.group(0)
764
765 for ec in '0abefnrtv"\\':
766 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
767
768 # Encode
769 data = val.encode(encoding)
770
771 # Return item
0e8e3169 772 return _Str(data, begin_text_loc)
71aaa3f7 773
320644e2
PP
774 # Common right parenthesis pattern
775 _right_paren_pat = re.compile(r"\)")
776
71aaa3f7 777 # Patterns for _try_parse_group()
320644e2 778 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
779
780 # Tries to parse a group, returning a group item on success.
781 def _try_parse_group(self):
0e8e3169
PP
782 begin_text_loc = self._text_loc
783
71aaa3f7 784 # Match prefix
261c5ecf
PP
785 m_open = self._try_parse_pat(self._group_prefix_pat)
786
787 if m_open is None:
71aaa3f7
PP
788 # No match
789 return
790
791 # Parse items
792 items = self._parse_items()
793
794 # Expect end of group
795 self._skip_ws_and_comments()
261c5ecf
PP
796
797 if m_open.group(0) == "(":
320644e2 798 pat = self._right_paren_pat
261c5ecf
PP
799 exp = ")"
800 else:
801 pat = self._block_end_pat
802 exp = "!end"
803
804 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
805
806 # Return item
0e8e3169 807 return _Group(items, begin_text_loc)
71aaa3f7
PP
808
809 # Returns a stripped expression string and an AST expression node
810 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 811 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
812 # Create an expression node from the expression string
813 expr_str = expr_str.strip().replace("\n", " ")
814
815 try:
816 expr = ast.parse(expr_str, mode="eval")
817 except SyntaxError:
818 _raise_error(
819 "Invalid expression `{}`: invalid syntax".format(expr_str),
820 text_loc,
821 )
822
823 return expr_str, expr
824
269f6eb3 825 # Patterns for _try_parse_num_and_attr()
05f81895 826 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 827 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 828 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 829
05f81895
PP
830 # Tries to parse a value and attribute (fixed length in bits or
831 # `leb128`), returning a value item on success.
269f6eb3 832 def _try_parse_num_and_attr(self):
71aaa3f7
PP
833 begin_text_loc = self._text_loc
834
835 # Match
836 m_expr = self._try_parse_pat(self._val_expr_pat)
837
838 if m_expr is None:
839 # No match
840 return
841
71aaa3f7
PP
842 # Create an expression node from the expression string
843 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
844
05f81895 845 # Length?
269f6eb3 846 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
847
848 if m_attr is None:
849 # LEB128?
850 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
851
852 if m_attr is None:
853 # At this point it's invalid
854 self._raise_error(
855 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
856 )
857
858 # Return LEB128 integer item
859 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
860 return cls(expr_str, expr, begin_text_loc)
861 else:
269f6eb3
PP
862 # Return fixed-length number item
863 return _FlNum(
05f81895
PP
864 expr_str,
865 expr,
866 int(m_attr.group(0)),
867 begin_text_loc,
868 )
71aaa3f7 869
320644e2
PP
870 # Patterns for _try_parse_var_assign()
871 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
872 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 873
2adf4336
PP
874 # Tries to parse a variable assignment, returning a variable
875 # assignment item on success.
876 def _try_parse_var_assign(self):
71aaa3f7
PP
877 begin_text_loc = self._text_loc
878
879 # Match
320644e2 880 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
881
882 if m is None:
883 # No match
884 return
885
886 # Validate name
320644e2 887 name = m.group(1)
71aaa3f7
PP
888
889 if name == _icitte_name:
0e8e3169
PP
890 _raise_error(
891 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
892 )
71aaa3f7
PP
893
894 if name in self._label_names:
0e8e3169 895 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 896
320644e2
PP
897 # Expect an expression
898 self._skip_ws()
899 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
900
901 # Create an expression node from the expression string
320644e2
PP
902 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
903
904 # Add to known variable names
905 self._var_names.add(name)
71aaa3f7
PP
906
907 # Return item
2adf4336 908 return _VarAssign(
71aaa3f7
PP
909 name,
910 expr_str,
911 expr,
0e8e3169 912 begin_text_loc,
71aaa3f7
PP
913 )
914
2adf4336 915 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
916 _bo_pat = re.compile(r"[bl]e")
917
2adf4336
PP
918 # Tries to parse a byte order name, returning a byte order setting
919 # item on success.
920 def _try_parse_set_bo(self):
0e8e3169
PP
921 begin_text_loc = self._text_loc
922
71aaa3f7
PP
923 # Match
924 m = self._try_parse_pat(self._bo_pat)
925
926 if m is None:
927 # No match
928 return
929
930 # Return corresponding item
931 if m.group(0) == "be":
2adf4336 932 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
933 else:
934 assert m.group(0) == "le"
2adf4336 935 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
936
937 # Patterns for _try_parse_val_or_bo()
320644e2
PP
938 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
939 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 940
2adf4336
PP
941 # Tries to parse a value, a variable assignment, or a byte order
942 # setting, returning an item on success.
943 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 944 # Match prefix
2adf4336 945 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
946 # No match
947 return
948
320644e2
PP
949 self._skip_ws()
950
2adf4336
PP
951 # Variable assignment item?
952 item = self._try_parse_var_assign()
71aaa3f7
PP
953
954 if item is None:
269f6eb3
PP
955 # Number item?
956 item = self._try_parse_num_and_attr()
71aaa3f7
PP
957
958 if item is None:
2adf4336
PP
959 # Byte order setting item?
960 item = self._try_parse_set_bo()
71aaa3f7
PP
961
962 if item is None:
963 # At this point it's invalid
2adf4336 964 self._raise_error(
269f6eb3 965 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 966 )
71aaa3f7
PP
967
968 # Expect suffix
320644e2 969 self._skip_ws()
2adf4336 970 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
971 return item
972
320644e2 973 # Common constant integer patterns
71aaa3f7 974 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
320644e2 975 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
71aaa3f7 976
2adf4336
PP
977 # Tries to parse an offset setting value (after the initial `<`),
978 # returning an offset item on success.
979 def _try_parse_set_offset_val(self):
0e8e3169
PP
980 begin_text_loc = self._text_loc
981
71aaa3f7
PP
982 # Match
983 m = self._try_parse_pat(self._pos_const_int_pat)
984
985 if m is None:
986 # No match
987 return
988
989 # Return item
2adf4336 990 return _SetOffset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
991
992 # Tries to parse a label name (after the initial `<`), returning a
993 # label item on success.
994 def _try_parse_label_name(self):
0e8e3169
PP
995 begin_text_loc = self._text_loc
996
71aaa3f7
PP
997 # Match
998 m = self._try_parse_pat(_py_name_pat)
999
1000 if m is None:
1001 # No match
1002 return
1003
1004 # Validate
1005 name = m.group(0)
1006
1007 if name == _icitte_name:
0e8e3169
PP
1008 _raise_error(
1009 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1010 )
71aaa3f7
PP
1011
1012 if name in self._label_names:
0e8e3169 1013 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1014
1015 if name in self._var_names:
0e8e3169 1016 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1017
1018 # Add to known label names
1019 self._label_names.add(name)
1020
1021 # Return item
0e8e3169 1022 return _Label(name, begin_text_loc)
71aaa3f7 1023
2adf4336 1024 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1025 _label_set_offset_prefix_pat = re.compile(r"<")
1026 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1027
2adf4336
PP
1028 # Tries to parse a label or an offset setting, returning an item on
1029 # success.
1030 def _try_parse_label_or_set_offset(self):
71aaa3f7 1031 # Match prefix
2adf4336 1032 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1033 # No match
1034 return
1035
2adf4336 1036 # Offset setting item?
320644e2 1037 self._skip_ws()
2adf4336 1038 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1039
1040 if item is None:
1041 # Label item?
1042 item = self._try_parse_label_name()
1043
1044 if item is None:
1045 # At this point it's invalid
2adf4336 1046 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1047
1048 # Expect suffix
320644e2 1049 self._skip_ws()
2adf4336 1050 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1051 return item
1052
676f6189 1053 # Patterns for _try_parse_align_offset()
320644e2
PP
1054 _align_offset_prefix_pat = re.compile(r"@")
1055 _align_offset_val_pat = re.compile(r"\d+")
1056 _align_offset_pad_val_prefix_pat = re.compile(r"~")
676f6189
PP
1057
1058 # Tries to parse an offset alignment, returning an offset alignment
1059 # item on success.
1060 def _try_parse_align_offset(self):
1061 begin_text_loc = self._text_loc
1062
1063 # Match prefix
1064 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1065 # No match
1066 return
1067
320644e2
PP
1068 self._skip_ws()
1069
1070 # Expect an alignment
676f6189
PP
1071 align_text_loc = self._text_loc
1072 m = self._expect_pat(
1073 self._align_offset_val_pat,
1074 "Expecting an alignment (positive multiple of eight bits)",
1075 )
1076
1077 # Validate alignment
320644e2 1078 val = int(m.group(0))
676f6189
PP
1079
1080 if val <= 0 or (val % 8) != 0:
1081 _raise_error(
1082 "Invalid alignment value {} (not a positive multiple of eight)".format(
1083 val
1084 ),
1085 align_text_loc,
1086 )
1087
1088 # Padding value?
320644e2 1089 self._skip_ws()
676f6189
PP
1090 pad_val = 0
1091
1092 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
320644e2 1093 self._skip_ws()
676f6189
PP
1094 pad_val_text_loc = self._text_loc
1095 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
1096
1097 # Validate
1098 pad_val = int(m.group(0), 0)
1099
1100 if pad_val > 255:
1101 _raise_error(
1102 "Invalid padding byte value {}".format(pad_val),
1103 pad_val_text_loc,
1104 )
1105
1106 # Return item
1107 return _AlignOffset(val, pad_val, begin_text_loc)
1108
e57a18e1 1109 # Patterns for _expect_rep_mul_expr()
320644e2
PP
1110 _inner_expr_prefix_pat = re.compile(r"\{")
1111 _inner_expr_pat = re.compile(r"[^}]+")
1112 _inner_expr_suffix_pat = re.compile(r"\}")
1113
1114 # Parses a constant integer if `accept_const_int` is `True`
1115 # (possibly negative if `allow_neg` is `True`), a name, or an
1116 # expression within `{` and `}`.
1117 def _expect_const_int_name_expr(
1118 self, accept_const_int: bool, allow_neg: bool = False
1119 ):
e57a18e1
PP
1120 expr_text_loc = self._text_loc
1121
1122 # Constant integer?
27d52a19
PP
1123 m = None
1124
320644e2
PP
1125 if accept_const_int:
1126 m = self._try_parse_pat(self._const_int_pat)
e57a18e1
PP
1127
1128 if m is None:
1129 # Name?
1130 m = self._try_parse_pat(_py_name_pat)
1131
1132 if m is None:
1133 # Expression?
320644e2
PP
1134 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1135 pos_msg = "" if allow_neg else "positive "
1136
1137 if accept_const_int:
1138 mid_msg = "a {}constant integer, a name, or `{{`".format(
1139 pos_msg
1140 )
27d52a19
PP
1141 else:
1142 mid_msg = "a name or `{`"
1143
e57a18e1 1144 # At this point it's invalid
27d52a19 1145 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1146
1147 # Expect an expression
320644e2 1148 self._skip_ws()
e57a18e1 1149 expr_text_loc = self._text_loc
320644e2 1150 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
e57a18e1
PP
1151 expr_str = m.group(0)
1152
1153 # Expect `}`
320644e2
PP
1154 self._skip_ws()
1155 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1156 else:
1157 expr_str = m.group(0)
1158 else:
320644e2
PP
1159 if m.group("neg") == "-" and not allow_neg:
1160 _raise_error("Expecting a positive constant integer", expr_text_loc)
1161
e57a18e1
PP
1162 expr_str = m.group(0)
1163
1164 return self._ast_expr_from_str(expr_str, expr_text_loc)
1165
27d52a19
PP
1166 # Parses the multiplier expression of a repetition (block or
1167 # post-item) and returns the expression string and AST node.
1168 def _expect_rep_mul_expr(self):
320644e2 1169 return self._expect_const_int_name_expr(True)
27d52a19
PP
1170
1171 # Common block end pattern
320644e2 1172 _block_end_pat = re.compile(r"!end\b")
27d52a19 1173
e57a18e1 1174 # Pattern for _try_parse_rep_block()
320644e2 1175 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1176
1177 # Tries to parse a repetition block, returning a repetition item on
1178 # success.
1179 def _try_parse_rep_block(self):
1180 begin_text_loc = self._text_loc
1181
1182 # Match prefix
1183 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1184 # No match
1185 return
1186
1187 # Expect expression
1188 self._skip_ws_and_comments()
1189 expr_str, expr = self._expect_rep_mul_expr()
1190
1191 # Parse items
1192 self._skip_ws_and_comments()
1193 items_text_loc = self._text_loc
1194 items = self._parse_items()
1195
1196 # Expect end of block
1197 self._skip_ws_and_comments()
1198 self._expect_pat(
27d52a19 1199 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1200 )
1201
1202 # Return item
1203 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1204
27d52a19 1205 # Pattern for _try_parse_cond_block()
320644e2 1206 _cond_block_prefix_pat = re.compile(r"!if\b")
27d52a19
PP
1207
1208 # Tries to parse a conditional block, returning a conditional item
1209 # on success.
1210 def _try_parse_cond_block(self):
1211 begin_text_loc = self._text_loc
1212
1213 # Match prefix
1214 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1215 # No match
1216 return
1217
1218 # Expect expression
1219 self._skip_ws_and_comments()
320644e2 1220 expr_str, expr = self._expect_const_int_name_expr(False)
27d52a19
PP
1221
1222 # Parse items
1223 self._skip_ws_and_comments()
1224 items_text_loc = self._text_loc
1225 items = self._parse_items()
1226
1227 # Expect end of block
1228 self._skip_ws_and_comments()
1229 self._expect_pat(
1230 self._block_end_pat,
1231 "Expecting an item or `!end` (end of conditional block)",
1232 )
1233
1234 # Return item
1235 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1236
320644e2
PP
1237 # Common left parenthesis pattern
1238 _left_paren_pat = re.compile(r"\(")
1239
1240 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1241 _macro_params_comma_pat = re.compile(",")
1242
1243 # Patterns for _try_parse_macro_def()
1244 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1245
1246 # Tries to parse a macro definition, adding it to `self._macro_defs`
1247 # and returning `True` on success.
1248 def _try_parse_macro_def(self):
1249 begin_text_loc = self._text_loc
1250
1251 # Match prefix
1252 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1253 # No match
1254 return False
1255
1256 # Expect a name
1257 self._skip_ws()
1258 name_text_loc = self._text_loc
1259 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1260
1261 # Validate name
1262 name = m.group(0)
1263
1264 if name in self._macro_defs:
1265 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1266
1267 # Expect `(`
1268 self._skip_ws()
1269 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1270
1271 # Try to parse comma-separated parameter names
1272 param_names = [] # type: List[str]
1273 expect_comma = False
1274
1275 while True:
1276 self._skip_ws()
1277
1278 # End?
1279 if self._try_parse_pat(self._right_paren_pat) is not None:
1280 # End
1281 break
1282
1283 # Comma?
1284 if expect_comma:
1285 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1286
1287 # Expect parameter name
1288 self._skip_ws()
1289 param_text_loc = self._text_loc
1290 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1291
1292 if m.group(0) in param_names:
1293 _raise_error(
1294 "Duplicate macro parameter named `{}`".format(m.group(0)),
1295 param_text_loc,
1296 )
1297
1298 param_names.append(m.group(0))
1299 expect_comma = True
1300
1301 # Expect items
1302 self._skip_ws_and_comments()
1303 items_text_loc = self._text_loc
1304 old_var_names = self._var_names.copy()
1305 old_label_names = self._label_names.copy()
1306 self._var_names = set() # type: Set[str]
1307 self._label_names = set() # type: Set[str]
1308 items = self._parse_items()
1309 self._var_names = old_var_names
1310 self._label_names = old_label_names
1311
1312 # Expect suffix
1313 self._expect_pat(
1314 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1315 )
1316
1317 # Register macro
1318 self._macro_defs[name] = _MacroDef(
1319 name, param_names, _Group(items, items_text_loc), begin_text_loc
1320 )
1321
1322 return True
1323
1324 # Patterns for _try_parse_macro_exp()
1325 _macro_exp_prefix_pat = re.compile(r"m\b")
1326 _macro_exp_colon_pat = re.compile(r":")
1327
1328 # Tries to parse a macro expansion, returning a macro expansion item
1329 # on success.
1330 def _try_parse_macro_exp(self):
1331 begin_text_loc = self._text_loc
1332
1333 # Match prefix
1334 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1335 # No match
1336 return
1337
1338 # Expect `:`
1339 self._skip_ws()
1340 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1341
1342 # Expect a macro name
1343 self._skip_ws()
1344 name_text_loc = self._text_loc
1345 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1346
1347 # Validate name
1348 name = m.group(0)
1349 macro_def = self._macro_defs.get(name)
1350
1351 if macro_def is None:
1352 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1353
1354 # Expect `(`
1355 self._skip_ws()
1356 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1357
1358 # Try to parse comma-separated parameter values
1359 params_text_loc = self._text_loc
1360 params = [] # type: List[_MacroExpParam]
1361 expect_comma = False
1362
1363 while True:
1364 self._skip_ws()
1365
1366 # End?
1367 if self._try_parse_pat(self._right_paren_pat) is not None:
1368 # End
1369 break
1370
1371 # Expect a Value
1372 if expect_comma:
1373 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1374
1375 self._skip_ws()
1376 param_text_loc = self._text_loc
1377 params.append(
1378 _MacroExpParam(
1379 *self._expect_const_int_name_expr(True, True), param_text_loc
1380 )
1381 )
1382 expect_comma = True
1383
1384 # Validate parameter values
1385 if len(params) != len(macro_def.param_names):
1386 sing_plur = "" if len(params) == 1 else "s"
1387 _raise_error(
1388 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1389 len(params), sing_plur, len(macro_def.param_names)
1390 ),
1391 params_text_loc,
1392 )
1393
1394 # Return item
1395 return _MacroExp(name, params, begin_text_loc)
1396
71aaa3f7
PP
1397 # Tries to parse a base item (anything except a repetition),
1398 # returning it on success.
1399 def _try_parse_base_item(self):
1400 # Byte item?
1401 item = self._try_parse_byte()
1402
1403 if item is not None:
1404 return item
1405
1406 # String item?
1407 item = self._try_parse_str()
1408
1409 if item is not None:
1410 return item
1411
2adf4336
PP
1412 # Value, variable assignment, or byte order setting item?
1413 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1414
1415 if item is not None:
1416 return item
1417
2adf4336
PP
1418 # Label or offset setting item?
1419 item = self._try_parse_label_or_set_offset()
71aaa3f7 1420
676f6189
PP
1421 if item is not None:
1422 return item
1423
1424 # Offset alignment item?
1425 item = self._try_parse_align_offset()
1426
71aaa3f7
PP
1427 if item is not None:
1428 return item
1429
1430 # Group item?
1431 item = self._try_parse_group()
1432
1433 if item is not None:
1434 return item
1435
320644e2 1436 # Repetition block item?
e57a18e1 1437 item = self._try_parse_rep_block()
71aaa3f7 1438
e57a18e1
PP
1439 if item is not None:
1440 return item
1441
27d52a19
PP
1442 # Conditional block item?
1443 item = self._try_parse_cond_block()
1444
1445 if item is not None:
1446 return item
1447
320644e2
PP
1448 # Macro expansion?
1449 item = self._try_parse_macro_exp()
1450
1451 if item is not None:
1452 return item
1453
e57a18e1
PP
1454 # Pattern for _try_parse_rep_post()
1455 _rep_post_prefix_pat = re.compile(r"\*")
1456
1457 # Tries to parse a post-item repetition, returning the expression
1458 # string and AST expression node on success.
1459 def _try_parse_rep_post(self):
71aaa3f7 1460 # Match prefix
e57a18e1 1461 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1462 # No match
2adf4336 1463 return
71aaa3f7 1464
e57a18e1 1465 # Return expression string and AST expression
71aaa3f7 1466 self._skip_ws_and_comments()
e57a18e1 1467 return self._expect_rep_mul_expr()
71aaa3f7 1468
1ca7b5e1
PP
1469 # Tries to parse an item, possibly followed by a repetition,
1470 # returning `True` on success.
1471 #
1472 # Appends any parsed item to `items`.
1473 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1474 self._skip_ws_and_comments()
1475
320644e2 1476 # Base item
71aaa3f7
PP
1477 item = self._try_parse_base_item()
1478
1479 if item is None:
320644e2 1480 return
71aaa3f7
PP
1481
1482 # Parse repetition if the base item is repeatable
1483 if isinstance(item, _RepableItem):
0e8e3169
PP
1484 self._skip_ws_and_comments()
1485 rep_text_loc = self._text_loc
e57a18e1 1486 rep_ret = self._try_parse_rep_post()
71aaa3f7 1487
2adf4336 1488 if rep_ret is not None:
320644e2 1489 item = _Rep(item, *rep_ret, rep_text_loc)
71aaa3f7 1490
1ca7b5e1
PP
1491 items.append(item)
1492 return True
71aaa3f7
PP
1493
1494 # Parses and returns items, skipping whitespaces, insignificant
1495 # symbols, and comments when allowed, and stopping at the first
1496 # unknown character.
320644e2
PP
1497 #
1498 # Accepts and registers macro definitions if `accept_macro_defs`
1499 # is `True`.
1500 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1501 items = [] # type: List[_Item]
1502
1503 while self._isnt_done():
1ca7b5e1
PP
1504 # Try to append item
1505 if not self._try_append_item(items):
320644e2
PP
1506 if accept_macro_defs and self._try_parse_macro_def():
1507 continue
1508
1ca7b5e1
PP
1509 # Unknown at this point
1510 break
71aaa3f7
PP
1511
1512 return items
1513
1514 # Parses the whole Normand input, setting `self._res` to the main
1515 # group item on success.
1516 def _parse(self):
1517 if len(self._normand.strip()) == 0:
1518 # Special case to make sure there's something to consume
1519 self._res = _Group([], self._text_loc)
1520 return
1521
1522 # Parse first level items
320644e2 1523 items = self._parse_items(True)
71aaa3f7
PP
1524
1525 # Make sure there's nothing left
1526 self._skip_ws_and_comments()
1527
1528 if self._isnt_done():
1529 self._raise_error(
1530 "Unexpected character `{}`".format(self._normand[self._at])
1531 )
1532
1533 # Set main group item
1534 self._res = _Group(items, self._text_loc)
1535
1536
1537# The return type of parse().
1538class ParseResult:
1539 @classmethod
1540 def _create(
1541 cls,
1542 data: bytearray,
e57a18e1
PP
1543 variables: VariablesT,
1544 labels: LabelsT,
71aaa3f7
PP
1545 offset: int,
1546 bo: Optional[ByteOrder],
1547 ):
1548 self = cls.__new__(cls)
1549 self._init(data, variables, labels, offset, bo)
1550 return self
1551
1552 def __init__(self, *args, **kwargs): # type: ignore
1553 raise NotImplementedError
1554
1555 def _init(
1556 self,
1557 data: bytearray,
e57a18e1
PP
1558 variables: VariablesT,
1559 labels: LabelsT,
71aaa3f7
PP
1560 offset: int,
1561 bo: Optional[ByteOrder],
1562 ):
1563 self._data = data
1564 self._vars = variables
1565 self._labels = labels
1566 self._offset = offset
1567 self._bo = bo
1568
1569 # Generated data.
1570 @property
1571 def data(self):
1572 return self._data
1573
1574 # Dictionary of updated variable names to their last computed value.
1575 @property
1576 def variables(self):
1577 return self._vars
1578
1579 # Dictionary of updated main group label names to their computed
1580 # value.
1581 @property
1582 def labels(self):
1583 return self._labels
1584
1585 # Updated offset.
1586 @property
1587 def offset(self):
1588 return self._offset
1589
1590 # Updated byte order.
1591 @property
1592 def byte_order(self):
1593 return self._bo
1594
1595
1596# Raises a parse error for the item `item`, creating it using the
1597# message `msg`.
1598def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1599 _raise_error(msg, item.text_loc)
1600
1601
1602# The `ICITTE` reserved name.
1603_icitte_name = "ICITTE"
1604
1605
2adf4336
PP
1606# Base node visitor.
1607#
1608# Calls the _visit_name() method for each name node which isn't the name
1609# of a call.
1610class _NodeVisitor(ast.NodeVisitor):
1611 def __init__(self):
71aaa3f7
PP
1612 self._parent_is_call = False
1613
1614 def generic_visit(self, node: ast.AST):
1615 if type(node) is ast.Call:
1616 self._parent_is_call = True
1617 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1618 self._visit_name(node.id)
71aaa3f7
PP
1619
1620 super().generic_visit(node)
1621 self._parent_is_call = False
1622
2adf4336
PP
1623 @abc.abstractmethod
1624 def _visit_name(self, name: str):
1625 ...
1626
71aaa3f7 1627
2adf4336
PP
1628# Expression validator: validates that all the names within the
1629# expression are allowed.
1630class _ExprValidator(_NodeVisitor):
320644e2 1631 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1632 super().__init__()
320644e2
PP
1633 self._expr_str = expr_str
1634 self._text_loc = text_loc
2adf4336 1635 self._allowed_names = allowed_names
2adf4336
PP
1636
1637 def _visit_name(self, name: str):
1638 # Make sure the name refers to a known and reachable
1639 # variable/label name.
e57a18e1 1640 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1641 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1642 name, self._expr_str
2adf4336
PP
1643 )
1644
05f81895 1645 allowed_names = self._allowed_names.copy()
e57a18e1 1646 allowed_names.add(_icitte_name)
2adf4336 1647
05f81895 1648 if len(allowed_names) > 0:
2adf4336
PP
1649 allowed_names_str = ", ".join(
1650 sorted(["`{}`".format(name) for name in allowed_names])
1651 )
1652 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1653
1654 _raise_error(
1655 msg,
320644e2 1656 self._text_loc,
2adf4336
PP
1657 )
1658
1659
2adf4336
PP
1660# Generator state.
1661class _GenState:
1662 def __init__(
1b8aa84a 1663 self,
e57a18e1
PP
1664 variables: VariablesT,
1665 labels: LabelsT,
1b8aa84a
PP
1666 offset: int,
1667 bo: Optional[ByteOrder],
2adf4336
PP
1668 ):
1669 self.variables = variables.copy()
1670 self.labels = labels.copy()
1671 self.offset = offset
1672 self.bo = bo
71aaa3f7 1673
320644e2
PP
1674 def __repr__(self):
1675 return "_GenState({}, {}, {}, {})".format(
1676 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1677 )
1678
1679
1680# Fixed-length number item instance.
1681class _FlNumItemInst:
1682 def __init__(self, item: _FlNum, offset_in_data: int, state: _GenState):
1683 self._item = item
1684 self._offset_in_data = offset_in_data
1685 self._state = state
1686
1687 @property
1688 def item(self):
1689 return self._item
1690
1691 @property
1692 def offset_in_data(self):
1693 return self._offset_in_data
1694
1695 @property
1696 def state(self):
1697 return self._state
1698
71aaa3f7 1699
2adf4336 1700# Generator of data and final state from a group item.
71aaa3f7
PP
1701#
1702# Generation happens in memory at construction time. After building, use
1703# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1704# get the resulting context.
2adf4336
PP
1705#
1706# The steps of generation are:
1707#
320644e2
PP
1708# 1. Handle each item in prefix order.
1709#
1710# The handlers append bytes to `self._data` and update some current
1711# state object (`_GenState` instance).
1712#
1713# When handling a fixed-length number item, try to evaluate its
1714# expression using the current state. If this fails, then it might be
1715# because the expression refers to a "future" label: save the current
1716# offset in `self._data` (generated data) and a snapshot of the
1717# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1718# object). _gen_fl_num_item_insts() will deal with this later.
2adf4336 1719#
320644e2
PP
1720# When handling the items of a group, keep a map of immediate label
1721# names to their offset. Then, after having processed all the items,
1722# update the relevant saved state snapshots in
1723# `self._fl_num_item_insts` with those immediate label values.
1724# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1725#
320644e2
PP
1726# 2. Handle all the fixed-length number item instances of which the
1727# expression evaluation failed before.
2adf4336 1728#
320644e2
PP
1729# At this point, `self._fl_num_item_insts` contains everything that's
1730# needed to evaluate the expressions, including the values of
1731# "future" labels from the point of view of some fixed-length number
1732# item instance.
2adf4336 1733#
320644e2 1734# If an evaluation fails at this point, then it's a user error.
71aaa3f7
PP
1735class _Gen:
1736 def __init__(
1737 self,
1738 group: _Group,
320644e2 1739 macro_defs: _MacroDefsT,
e57a18e1
PP
1740 variables: VariablesT,
1741 labels: LabelsT,
71aaa3f7
PP
1742 offset: int,
1743 bo: Optional[ByteOrder],
1744 ):
320644e2
PP
1745 self._macro_defs = macro_defs
1746 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2adf4336 1747 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1748
1749 # Generated bytes.
1750 @property
1751 def data(self):
1752 return self._data
1753
1754 # Updated variables.
1755 @property
1756 def variables(self):
2adf4336 1757 return self._final_state.variables
71aaa3f7
PP
1758
1759 # Updated main group labels.
1760 @property
1761 def labels(self):
2adf4336 1762 return self._final_state.labels
71aaa3f7
PP
1763
1764 # Updated offset.
1765 @property
1766 def offset(self):
2adf4336 1767 return self._final_state.offset
71aaa3f7
PP
1768
1769 # Updated byte order.
1770 @property
1771 def bo(self):
2adf4336
PP
1772 return self._final_state.bo
1773
320644e2
PP
1774 # Evaluates the expression `expr` of which the original string is
1775 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
1776 # generation state `state`.
1777 #
269f6eb3
PP
1778 # If `allow_float` is `True`, then the type of the result may be
1779 # `float` too.
2adf4336 1780 @staticmethod
320644e2
PP
1781 def _eval_expr(
1782 expr_str: str,
1783 expr: ast.Expression,
1784 text_loc: TextLocation,
269f6eb3 1785 state: _GenState,
269f6eb3
PP
1786 allow_float: bool = False,
1787 ):
e57a18e1
PP
1788 syms = {} # type: VariablesT
1789 syms.update(state.labels)
71aaa3f7 1790
e57a18e1
PP
1791 # Set the `ICITTE` name to the current offset
1792 syms[_icitte_name] = state.offset
71aaa3f7
PP
1793
1794 # Add the current variables
2adf4336 1795 syms.update(state.variables)
71aaa3f7
PP
1796
1797 # Validate the node and its children
320644e2 1798 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
1799
1800 # Compile and evaluate expression node
1801 try:
320644e2 1802 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 1803 except Exception as exc:
320644e2
PP
1804 _raise_error(
1805 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
1806 text_loc,
71aaa3f7
PP
1807 )
1808
27d52a19
PP
1809 # Convert `bool` result type to `int` to normalize
1810 if type(val) is bool:
1811 val = int(val)
1812
269f6eb3
PP
1813 # Validate result type
1814 expected_types = {int} # type: Set[type]
1815 type_msg = "`int`"
1816
1817 if allow_float:
1818 expected_types.add(float)
1819 type_msg += " or `float`"
1820
1821 if type(val) not in expected_types:
320644e2 1822 _raise_error(
269f6eb3 1823 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 1824 expr_str, type_msg, type(val).__name__
71aaa3f7 1825 ),
320644e2 1826 text_loc,
71aaa3f7
PP
1827 )
1828
1829 return val
1830
320644e2
PP
1831 # Evaluates the expression of `item` considering the current
1832 # generation state `state`.
1833 #
1834 # If `allow_float` is `True`, then the type of the result may be
1835 # `float` too.
1836 @staticmethod
1837 def _eval_item_expr(
1838 item: Union[_FlNum, _Leb128Int, _VarAssign, _Rep, _Cond],
1839 state: _GenState,
1840 allow_float: bool = False,
1841 ):
1842 return _Gen._eval_expr(
1843 item.expr_str, item.expr, item.text_loc, state, allow_float
1844 )
1845
1846 # Handles the byte item `item`.
1847 def _handle_byte_item(self, item: _Byte, state: _GenState):
1848 self._data.append(item.val)
1849 state.offset += item.size
1850
1851 # Handles the string item `item`.
1852 def _handle_str_item(self, item: _Str, state: _GenState):
1853 self._data += item.data
1854 state.offset += item.size
1855
1856 # Handles the byte order setting item `item`.
1857 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
1858 # Update current byte order
1859 state.bo = item.bo
1860
1861 # Handles the variable assignment item `item`.
1862 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
1863 # Update variable
1864 state.variables[item.name] = self._eval_item_expr(item, state, True)
1865
1866 # Handles the fixed-length number item `item`.
1867 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
1868 # Validate current byte order
1869 if state.bo is None and item.len > 8:
1870 _raise_error_for_item(
1871 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1872 item.expr_str
1873 ),
1874 item,
1875 )
1876
1877 # Try an immediate evaluation. If it fails, then keep everything
1878 # needed to (try to) generate the bytes of this item later.
1879 try:
1880 data = self._gen_fl_num_item_inst_data(item, state)
1881 except Exception:
1882 self._fl_num_item_insts.append(
1883 _FlNumItemInst(item, len(self._data), copy.deepcopy(state))
1884 )
1885
1886 # Reserve space in `self._data` for this instance
1887 data = bytes([0] * (item.len // 8))
1888
1889 # Append bytes
1890 self._data += data
1891
1892 # Update offset
1893 state.offset += len(data)
1894
05f81895
PP
1895 # Returns the size, in bytes, required to encode the value `val`
1896 # with LEB128 (signed version if `is_signed` is `True`).
1897 @staticmethod
1898 def _leb128_size_for_val(val: int, is_signed: bool):
1899 if val < 0:
1900 # Equivalent upper bound.
1901 #
1902 # For example, if `val` is -128, then the full integer for
1903 # this number of bits would be [-128, 127].
1904 val = -val - 1
1905
1906 # Number of bits (add one for the sign if needed)
1907 bits = val.bit_length() + int(is_signed)
1908
1909 if bits == 0:
1910 bits = 1
1911
1912 # Seven bits per byte
1913 return math.ceil(bits / 7)
1914
320644e2
PP
1915 # Handles the LEB128 integer item `item`.
1916 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
1917 # Compute value
1918 val = self._eval_item_expr(item, state, False)
676f6189 1919
320644e2
PP
1920 # Size in bytes
1921 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 1922
320644e2
PP
1923 # For each byte
1924 for _ in range(size):
1925 # Seven LSBs, MSB of the byte set (continue)
1926 self._data.append((val & 0x7F) | 0x80)
1927 val >>= 7
2adf4336 1928
320644e2
PP
1929 # Clear MSB of last byte (stop)
1930 self._data[-1] &= ~0x80
2adf4336 1931
320644e2
PP
1932 # Update offset
1933 state.offset += size
27d52a19 1934
320644e2
PP
1935 # Handles the group item `item`, removing the immediate labels from
1936 # `state` at the end if `remove_immediate_labels` is `True`.
1937 def _handle_group_item(
1938 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
1939 ):
1940 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
1941 immediate_labels = {} # type: LabelsT
27d52a19 1942
320644e2
PP
1943 # Handle each item
1944 for subitem in item.items:
1945 if type(subitem) is _Label:
1946 # Add to local immediate labels
1947 immediate_labels[subitem.name] = state.offset
2adf4336 1948
320644e2 1949 self._handle_item(subitem, state)
2adf4336 1950
320644e2
PP
1951 # Remove immediate labels from current state if needed
1952 if remove_immediate_labels:
1953 for name in immediate_labels:
1954 del state.labels[name]
2adf4336 1955
320644e2
PP
1956 # Add all immediate labels to all state snapshots since
1957 # `first_fl_num_item_inst_index`.
1958 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
1959 inst.state.labels.update(immediate_labels)
2adf4336 1960
320644e2
PP
1961 # Handles the repetition item `item`.
1962 def _handle_rep_item(self, item: _Rep, state: _GenState):
1963 # Compute the repetition count
1964 mul = _Gen._eval_item_expr(item, state)
05f81895 1965
320644e2
PP
1966 # Validate result
1967 if mul < 0:
1968 _raise_error_for_item(
1969 "Invalid expression `{}`: unexpected negative result {:,}".format(
1970 item.expr_str, mul
1971 ),
1972 item,
1973 )
2adf4336 1974
320644e2
PP
1975 # Generate item data `mul` times
1976 for _ in range(mul):
1977 self._handle_item(item.item, state)
2adf4336 1978
320644e2
PP
1979 # Handles the conditional item `item`.
1980 def _handle_cond_item(self, item: _Rep, state: _GenState):
1981 # Compute the conditional value
1982 val = _Gen._eval_item_expr(item, state)
2adf4336 1983
320644e2
PP
1984 # Generate item data if needed
1985 if val:
1986 self._handle_item(item.item, state)
2adf4336 1987
320644e2
PP
1988 # Evaluates the parameters of the macro expansion item `item`
1989 # considering the initial state `init_state` and returns a new state
1990 # to handle the items of the macro.
1991 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
1992 # New state
1993 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 1994
320644e2
PP
1995 # Evaluate the parameter expressions
1996 macro_def = self._macro_defs[item.name]
2adf4336 1997
320644e2
PP
1998 for param_name, param in zip(macro_def.param_names, item.params):
1999 exp_state.variables[param_name] = _Gen._eval_expr(
2000 param.expr_str, param.expr, param.text_loc, init_state, True
2001 )
2adf4336 2002
320644e2 2003 return exp_state
2adf4336 2004
320644e2
PP
2005 # Handles the macro expansion item `item`.
2006 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2007 # New state
2008 exp_state = self._eval_macro_exp_params(item, state)
27d52a19 2009
320644e2
PP
2010 # Process the contained group
2011 init_data_size = len(self._data)
2012 self._handle_item(self._macro_defs[item.name].group, exp_state)
27d52a19 2013
320644e2
PP
2014 # Update state offset and return
2015 state.offset += len(self._data) - init_data_size
676f6189 2016
320644e2
PP
2017 # Handles the offset setting item `item`.
2018 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2019 state.offset = item.val
2adf4336 2020
320644e2
PP
2021 # Handles offset alignment item `item` (adds padding).
2022 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2023 init_offset = state.offset
2024 align_bytes = item.val // 8
2025 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2026 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2027
320644e2
PP
2028 # Handles the label item `item`.
2029 def _handle_label_item(self, item: _Label, state: _GenState):
2030 state.labels[item.name] = state.offset
2adf4336 2031
320644e2
PP
2032 # Handles the item `item`, returning the updated next repetition
2033 # instance.
2034 def _handle_item(self, item: _Item, state: _GenState):
2035 return self._item_handlers[type(item)](item, state)
71aaa3f7 2036
320644e2
PP
2037 # Generates the data for a fixed-length integer item instance having
2038 # the value `val` and returns it.
2039 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2040 # Validate range
2041 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2042 _raise_error_for_item(
320644e2
PP
2043 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2044 val, item.len, item.expr_str
71aaa3f7
PP
2045 ),
2046 item,
2047 )
2048
2049 # Encode result on 64 bits (to extend the sign bit whatever the
2050 # value of `item.len`).
71aaa3f7
PP
2051 data = struct.pack(
2052 "{}{}".format(
2adf4336 2053 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2054 "Q" if val >= 0 else "q",
2055 ),
2056 val,
2057 )
2058
2059 # Keep only the requested length
2060 len_bytes = item.len // 8
2061
2adf4336 2062 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2063 # Big endian: keep last bytes
2064 data = data[-len_bytes:]
2065 else:
2066 # Little endian: keep first bytes
2adf4336 2067 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2068 data = data[:len_bytes]
2069
320644e2
PP
2070 # Return data
2071 return data
269f6eb3 2072
320644e2
PP
2073 # Generates the data for a fixed-length floating point number item
2074 # instance having the value `val` and returns it.
2075 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2076 # Validate length
2077 if item.len not in (32, 64):
2078 _raise_error_for_item(
2079 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2080 item.len, val
2081 ),
2082 item,
2083 )
2084
320644e2
PP
2085 # Encode and return result
2086 return struct.pack(
269f6eb3
PP
2087 "{}{}".format(
2088 ">" if state.bo in (None, ByteOrder.BE) else "<",
2089 "f" if item.len == 32 else "d",
2090 ),
2091 val,
2092 )
2093
320644e2
PP
2094 # Generates the data for a fixed-length number item instance and
2095 # returns it.
2096 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2097 # Compute value
e57a18e1 2098 val = self._eval_item_expr(item, state, True)
269f6eb3 2099
269f6eb3
PP
2100 # Handle depending on type
2101 if type(val) is int:
320644e2 2102 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2103 else:
2104 assert type(val) is float
320644e2 2105 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2106
320644e2
PP
2107 # Generates the data for all the fixed-length number item instances
2108 # and writes it at the correct offset within `self._data`.
2109 def _gen_fl_num_item_insts(self):
2110 for inst in self._fl_num_item_insts:
2111 # Generate bytes
2112 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
05f81895 2113
320644e2
PP
2114 # Insert bytes into `self._data`
2115 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2116
2117 # Generates the data (`self._data`) and final state
2118 # (`self._final_state`) from `group` and the initial state `state`.
2119 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2120 # Initial state
2121 self._data = bytearray()
71aaa3f7
PP
2122
2123 # Item handlers
2124 self._item_handlers = {
676f6189 2125 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2126 _Byte: self._handle_byte_item,
27d52a19 2127 _Cond: self._handle_cond_item,
269f6eb3 2128 _FlNum: self._handle_fl_num_item,
71aaa3f7 2129 _Group: self._handle_group_item,
2adf4336 2130 _Label: self._handle_label_item,
320644e2 2131 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2132 _Rep: self._handle_rep_item,
2adf4336
PP
2133 _SetBo: self._handle_set_bo_item,
2134 _SetOffset: self._handle_set_offset_item,
05f81895 2135 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2136 _Str: self._handle_str_item,
05f81895 2137 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2138 _VarAssign: self._handle_var_assign_item,
320644e2 2139 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2140
2141 # Handle the group item, _not_ removing the immediate labels
2142 # because the `labels` property offers them.
320644e2 2143 self._handle_group_item(group, state, False)
2adf4336
PP
2144
2145 # This is actually the final state
2146 self._final_state = state
71aaa3f7 2147
320644e2
PP
2148 # Generate all the fixed-length number bytes now that we know
2149 # their full state
2150 self._gen_fl_num_item_insts()
2151
71aaa3f7
PP
2152
2153# Returns a `ParseResult` instance containing the bytes encoded by the
2154# input string `normand`.
2155#
2156# `init_variables` is a dictionary of initial variable names (valid
2157# Python names) to integral values. A variable name must not be the
2158# reserved name `ICITTE`.
2159#
2160# `init_labels` is a dictionary of initial label names (valid Python
2161# names) to integral values. A label name must not be the reserved name
2162# `ICITTE`.
2163#
2164# `init_offset` is the initial offset.
2165#
2166# `init_byte_order` is the initial byte order.
2167#
2168# Raises `ParseError` on any parsing error.
2169def parse(
2170 normand: str,
e57a18e1
PP
2171 init_variables: Optional[VariablesT] = None,
2172 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2173 init_offset: int = 0,
2174 init_byte_order: Optional[ByteOrder] = None,
2175):
2176 if init_variables is None:
2177 init_variables = {}
2178
2179 if init_labels is None:
2180 init_labels = {}
2181
320644e2 2182 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2183 gen = _Gen(
320644e2
PP
2184 parser.res,
2185 parser.macro_defs,
71aaa3f7
PP
2186 init_variables,
2187 init_labels,
2188 init_offset,
2189 init_byte_order,
2190 )
2191 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2192 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2193 )
2194
2195
2196# Parses the command-line arguments.
2197def _parse_cli_args():
2198 import argparse
2199
2200 # Build parser
2201 ap = argparse.ArgumentParser()
2202 ap.add_argument(
2203 "--offset",
2204 metavar="OFFSET",
2205 action="store",
2206 type=int,
2207 default=0,
2208 help="initial offset (positive)",
2209 )
2210 ap.add_argument(
2211 "-b",
2212 "--byte-order",
2213 metavar="BO",
2214 choices=["be", "le"],
2215 type=str,
2216 help="initial byte order (`be` or `le`)",
2217 )
2218 ap.add_argument(
2219 "--var",
2220 metavar="NAME=VAL",
2221 action="append",
2222 help="add an initial variable (may be repeated)",
2223 )
2224 ap.add_argument(
2225 "-l",
2226 "--label",
2227 metavar="NAME=VAL",
2228 action="append",
2229 help="add an initial label (may be repeated)",
2230 )
2231 ap.add_argument(
2232 "--version", action="version", version="Normand {}".format(__version__)
2233 )
2234 ap.add_argument(
2235 "path",
2236 metavar="PATH",
2237 action="store",
2238 nargs="?",
2239 help="input path (none means standard input)",
2240 )
2241
2242 # Parse
2243 return ap.parse_args()
2244
2245
2246# Raises a command-line error with the message `msg`.
2247def _raise_cli_error(msg: str) -> NoReturn:
2248 raise RuntimeError("Command-line error: {}".format(msg))
2249
2250
2251# Returns a dictionary of string to integers from the list of strings
2252# `args` containing `NAME=VAL` entries.
2253def _dict_from_arg(args: Optional[List[str]]):
e57a18e1 2254 d = {} # type: LabelsT
71aaa3f7
PP
2255
2256 if args is None:
2257 return d
2258
2259 for arg in args:
2260 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2261
2262 if m is None:
2263 _raise_cli_error("Invalid assignment {}".format(arg))
2264
2e1c1acd
PP
2265 d[m.group(1)] = int(m.group(2))
2266
71aaa3f7
PP
2267 return d
2268
2269
2270# CLI entry point without exception handling.
2271def _try_run_cli():
2272 import os.path
2273
2274 # Parse arguments
2275 args = _parse_cli_args()
2276
2277 # Read input
2278 if args.path is None:
2279 normand = sys.stdin.read()
2280 else:
2281 with open(args.path) as f:
2282 normand = f.read()
2283
2284 # Variables and labels
e57a18e1 2285 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2286 labels = _dict_from_arg(args.label)
2287
2288 # Validate offset
2289 if args.offset < 0:
2290 _raise_cli_error("Invalid negative offset {}")
2291
2292 # Validate and set byte order
2293 bo = None # type: Optional[ByteOrder]
2294
2295 if args.byte_order is not None:
2296 if args.byte_order == "be":
2297 bo = ByteOrder.BE
2298 else:
2299 assert args.byte_order == "le"
2300 bo = ByteOrder.LE
2301
2302 # Parse
2303 try:
2304 res = parse(normand, variables, labels, args.offset, bo)
2305 except ParseError as exc:
2306 prefix = ""
2307
2308 if args.path is not None:
2309 prefix = "{}:".format(os.path.abspath(args.path))
2310
2311 _fail(
2312 "{}{}:{} - {}".format(
2313 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2314 )
2315 )
2316
2317 # Print
2318 sys.stdout.buffer.write(res.data)
2319
2320
2321# Prints the exception message `msg` and exits with status 1.
2322def _fail(msg: str) -> NoReturn:
2323 if not msg.endswith("."):
2324 msg += "."
2325
2326 print(msg, file=sys.stderr)
2327 sys.exit(1)
2328
2329
2330# CLI entry point.
2331def _run_cli():
2332 try:
2333 _try_run_cli()
2334 except Exception as exc:
2335 _fail(str(exc))
2336
2337
2338if __name__ == "__main__":
2339 _run_cli()
This page took 0.123681 seconds and 4 git commands to generate.