Add `!else` support for conditional block
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
12b5dbc0 33__version__ = "0.14.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
41 "ParseResult",
e57a18e1 42 "TextLocation",
e57a18e1 43 "VariablesT",
71aaa3f7
PP
44]
45
46import re
47import abc
48import ast
49import sys
320644e2 50import copy
71aaa3f7 51import enum
05f81895 52import math
71aaa3f7 53import struct
e57a18e1
PP
54import typing
55from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
56
57
58# Text location (line and column numbers).
e57a18e1 59class TextLocation:
71aaa3f7
PP
60 @classmethod
61 def _create(cls, line_no: int, col_no: int):
62 self = cls.__new__(cls)
63 self._init(line_no, col_no)
64 return self
65
66 def __init__(*args, **kwargs): # type: ignore
67 raise NotImplementedError
68
69 def _init(self, line_no: int, col_no: int):
70 self._line_no = line_no
71 self._col_no = col_no
72
73 # Line number.
74 @property
75 def line_no(self):
76 return self._line_no
77
78 # Column number.
79 @property
80 def col_no(self):
81 return self._col_no
82
2adf4336 83 def __repr__(self):
e57a18e1 84 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 85
71aaa3f7
PP
86
87# Any item.
88class _Item:
e57a18e1 89 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
90 self._text_loc = text_loc
91
92 # Source text location.
93 @property
94 def text_loc(self):
95 return self._text_loc
96
2adf4336
PP
97
98# Scalar item.
99class _ScalarItem(_Item):
71aaa3f7
PP
100 # Returns the size, in bytes, of this item.
101 @property
102 @abc.abstractmethod
103 def size(self) -> int:
104 ...
105
106
107# A repeatable item.
2adf4336 108class _RepableItem:
71aaa3f7
PP
109 pass
110
111
112# Single byte.
2adf4336 113class _Byte(_ScalarItem, _RepableItem):
e57a18e1 114 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
115 super().__init__(text_loc)
116 self._val = val
117
118 # Byte value.
119 @property
120 def val(self):
121 return self._val
122
123 @property
124 def size(self):
125 return 1
126
127 def __repr__(self):
676f6189 128 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
129
130
131# String.
2adf4336 132class _Str(_ScalarItem, _RepableItem):
e57a18e1 133 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
134 super().__init__(text_loc)
135 self._data = data
136
137 # Encoded bytes.
138 @property
139 def data(self):
140 return self._data
141
142 @property
143 def size(self):
144 return len(self._data)
145
146 def __repr__(self):
676f6189 147 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
148
149
150# Byte order.
151@enum.unique
152class ByteOrder(enum.Enum):
153 # Big endian.
154 BE = "be"
155
156 # Little endian.
157 LE = "le"
158
159
2adf4336
PP
160# Byte order setting.
161class _SetBo(_Item):
e57a18e1 162 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 163 super().__init__(text_loc)
71aaa3f7
PP
164 self._bo = bo
165
166 @property
167 def bo(self):
168 return self._bo
169
2adf4336 170 def __repr__(self):
676f6189 171 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
172
173
174# Label.
175class _Label(_Item):
e57a18e1 176 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
177 super().__init__(text_loc)
178 self._name = name
179
180 # Label name.
181 @property
182 def name(self):
183 return self._name
184
71aaa3f7 185 def __repr__(self):
676f6189 186 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
187
188
2adf4336
PP
189# Offset setting.
190class _SetOffset(_Item):
e57a18e1 191 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
192 super().__init__(text_loc)
193 self._val = val
194
676f6189 195 # Offset value (bytes).
71aaa3f7
PP
196 @property
197 def val(self):
198 return self._val
199
71aaa3f7 200 def __repr__(self):
676f6189
PP
201 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
202
203
204# Offset alignment.
205class _AlignOffset(_Item):
e57a18e1 206 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
207 super().__init__(text_loc)
208 self._val = val
209 self._pad_val = pad_val
210
211 # Alignment value (bits).
212 @property
213 def val(self):
214 return self._val
215
216 # Padding byte value.
217 @property
218 def pad_val(self):
219 return self._pad_val
220
221 def __repr__(self):
222 return "_AlignOffset({}, {}, {})".format(
223 repr(self._val), repr(self._pad_val), repr(self._text_loc)
224 )
71aaa3f7
PP
225
226
227# Mixin of containing an AST expression and its string.
228class _ExprMixin:
229 def __init__(self, expr_str: str, expr: ast.Expression):
230 self._expr_str = expr_str
231 self._expr = expr
232
233 # Expression string.
234 @property
235 def expr_str(self):
236 return self._expr_str
237
238 # Expression node to evaluate.
239 @property
240 def expr(self):
241 return self._expr
242
243
25ca454b
PP
244# Fill until some offset.
245class _FillUntil(_Item, _ExprMixin):
246 def __init__(
247 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
248 ):
249 super().__init__(text_loc)
250 _ExprMixin.__init__(self, expr_str, expr)
251 self._pad_val = pad_val
252
253 # Padding byte value.
254 @property
255 def pad_val(self):
256 return self._pad_val
257
258 def __repr__(self):
259 return "_FillUntil({}, {}, {}, {})".format(
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._pad_val),
263 repr(self._text_loc),
264 )
265
266
2adf4336
PP
267# Variable assignment.
268class _VarAssign(_Item, _ExprMixin):
71aaa3f7 269 def __init__(
e57a18e1 270 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
271 ):
272 super().__init__(text_loc)
273 _ExprMixin.__init__(self, expr_str, expr)
274 self._name = name
275
276 # Name.
277 @property
278 def name(self):
279 return self._name
280
71aaa3f7 281 def __repr__(self):
2adf4336 282 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
283 repr(self._name),
284 repr(self._expr_str),
285 repr(self._expr),
286 repr(self._text_loc),
71aaa3f7
PP
287 )
288
289
269f6eb3
PP
290# Fixed-length number, possibly needing more than one byte.
291class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 292 def __init__(
e57a18e1 293 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
294 ):
295 super().__init__(text_loc)
296 _ExprMixin.__init__(self, expr_str, expr)
297 self._len = len
298
299 # Length (bits).
300 @property
301 def len(self):
302 return self._len
303
304 @property
305 def size(self):
306 return self._len // 8
307
308 def __repr__(self):
269f6eb3 309 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
310 repr(self._expr_str),
311 repr(self._expr),
312 repr(self._len),
313 repr(self._text_loc),
71aaa3f7
PP
314 )
315
316
05f81895
PP
317# LEB128 integer.
318class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 319 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
320 super().__init__(text_loc)
321 _ExprMixin.__init__(self, expr_str, expr)
322
323 def __repr__(self):
324 return "{}({}, {}, {})".format(
325 self.__class__.__name__,
326 repr(self._expr_str),
327 repr(self._expr),
676f6189 328 repr(self._text_loc),
05f81895
PP
329 )
330
331
332# Unsigned LEB128 integer.
333class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
334 pass
335
336
337# Signed LEB128 integer.
338class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
339 pass
340
341
71aaa3f7 342# Group of items.
2adf4336 343class _Group(_Item, _RepableItem):
e57a18e1 344 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
345 super().__init__(text_loc)
346 self._items = items
71aaa3f7
PP
347
348 # Contained items.
349 @property
350 def items(self):
351 return self._items
352
71aaa3f7 353 def __repr__(self):
676f6189 354 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
355
356
357# Repetition item.
2adf4336
PP
358class _Rep(_Item, _ExprMixin):
359 def __init__(
e57a18e1 360 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 361 ):
71aaa3f7 362 super().__init__(text_loc)
2adf4336 363 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 364 self._item = item
71aaa3f7
PP
365
366 # Item to repeat.
367 @property
368 def item(self):
369 return self._item
370
71aaa3f7 371 def __repr__(self):
2adf4336 372 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
373 repr(self._item),
374 repr(self._expr_str),
375 repr(self._expr),
376 repr(self._text_loc),
71aaa3f7
PP
377 )
378
379
27d52a19
PP
380# Conditional item.
381class _Cond(_Item, _ExprMixin):
382 def __init__(
12b5dbc0
PP
383 self,
384 true_item: _Item,
385 false_item: _Item,
386 expr_str: str,
387 expr: ast.Expression,
388 text_loc: TextLocation,
27d52a19
PP
389 ):
390 super().__init__(text_loc)
391 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
392 self._true_item = true_item
393 self._false_item = false_item
27d52a19 394
12b5dbc0 395 # Item when condition is true.
27d52a19 396 @property
12b5dbc0
PP
397 def true_item(self):
398 return self._true_item
399
400 # Item when condition is false.
401 @property
402 def false_item(self):
403 return self._false_item
27d52a19
PP
404
405 def __repr__(self):
12b5dbc0
PP
406 return "_Cond({}, {}, {}, {}, {})".format(
407 repr(self._true_item),
408 repr(self._false_item),
27d52a19
PP
409 repr(self._expr_str),
410 repr(self._expr),
411 repr(self._text_loc),
412 )
413
414
320644e2
PP
415# Macro definition item.
416class _MacroDef(_Item):
417 def __init__(
418 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
419 ):
420 super().__init__(text_loc)
421 self._name = name
422 self._param_names = param_names
423 self._group = group
424
425 # Name.
426 @property
427 def name(self):
428 return self._name
429
430 # Parameters.
431 @property
432 def param_names(self):
433 return self._param_names
434
435 # Contained items.
436 @property
437 def group(self):
438 return self._group
439
440 def __repr__(self):
441 return "_MacroDef({}, {}, {}, {})".format(
442 repr(self._name),
443 repr(self._param_names),
444 repr(self._group),
445 repr(self._text_loc),
446 )
447
448
449# Macro expansion parameter.
450class _MacroExpParam:
451 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
452 self._expr_str = expr_str
453 self._expr = expr
454 self._text_loc = text_loc
455
456 # Expression string.
457 @property
458 def expr_str(self):
459 return self._expr_str
460
461 # Expression.
462 @property
463 def expr(self):
464 return self._expr
465
466 # Source text location.
467 @property
468 def text_loc(self):
469 return self._text_loc
470
471 def __repr__(self):
472 return "_MacroExpParam({}, {}, {})".format(
473 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
474 )
475
476
477# Macro expansion item.
478class _MacroExp(_Item, _RepableItem):
479 def __init__(
480 self,
481 name: str,
482 params: List[_MacroExpParam],
483 text_loc: TextLocation,
484 ):
485 super().__init__(text_loc)
486 self._name = name
487 self._params = params
488
489 # Name.
490 @property
491 def name(self):
492 return self._name
493
494 # Parameters.
495 @property
496 def params(self):
497 return self._params
498
499 def __repr__(self):
500 return "_MacroExp({}, {}, {})".format(
501 repr(self._name),
502 repr(self._params),
503 repr(self._text_loc),
504 )
2adf4336
PP
505
506
71aaa3f7
PP
507# A parsing error containing a message and a text location.
508class ParseError(RuntimeError):
509 @classmethod
e57a18e1 510 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
511 self = cls.__new__(cls)
512 self._init(msg, text_loc)
513 return self
514
515 def __init__(self, *args, **kwargs): # type: ignore
516 raise NotImplementedError
517
e57a18e1 518 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7
PP
519 super().__init__(msg)
520 self._text_loc = text_loc
521
522 # Source text location.
523 @property
524 def text_loc(self):
525 return self._text_loc
526
527
528# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 529def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
530 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
531
532
e57a18e1
PP
533# Variables dictionary type (for type hints).
534VariablesT = Dict[str, Union[int, float]]
535
536
537# Labels dictionary type (for type hints).
538LabelsT = Dict[str, int]
71aaa3f7
PP
539
540
541# Python name pattern.
542_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
543
544
320644e2
PP
545# Macro definition dictionary.
546_MacroDefsT = Dict[str, _MacroDef]
547
548
71aaa3f7
PP
549# Normand parser.
550#
551# The constructor accepts a Normand input. After building, use the `res`
552# property to get the resulting main group.
553class _Parser:
554 # Builds a parser to parse the Normand input `normand`, parsing
555 # immediately.
e57a18e1 556 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
557 self._normand = normand
558 self._at = 0
559 self._line_no = 1
560 self._col_no = 1
561 self._label_names = set(labels.keys())
562 self._var_names = set(variables.keys())
320644e2 563 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
564 self._parse()
565
566 # Result (main group).
567 @property
568 def res(self):
569 return self._res
570
320644e2
PP
571 # Macro definitions.
572 @property
573 def macro_defs(self):
574 return self._macro_defs
575
71aaa3f7
PP
576 # Current text location.
577 @property
578 def _text_loc(self):
e57a18e1 579 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
580 self._line_no, self._col_no
581 )
582
583 # Returns `True` if this parser is done parsing.
584 def _is_done(self):
585 return self._at == len(self._normand)
586
587 # Returns `True` if this parser isn't done parsing.
588 def _isnt_done(self):
589 return not self._is_done()
590
591 # Raises a parse error, creating it using the message `msg` and the
592 # current text location.
593 def _raise_error(self, msg: str) -> NoReturn:
594 _raise_error(msg, self._text_loc)
595
596 # Tries to make the pattern `pat` match the current substring,
597 # returning the match object and updating `self._at`,
598 # `self._line_no`, and `self._col_no` on success.
599 def _try_parse_pat(self, pat: Pattern[str]):
600 m = pat.match(self._normand, self._at)
601
602 if m is None:
603 return
604
605 # Skip matched string
606 self._at += len(m.group(0))
607
608 # Update line number
609 self._line_no += m.group(0).count("\n")
610
611 # Update column number
612 for i in reversed(range(self._at)):
613 if self._normand[i] == "\n" or i == 0:
614 if i == 0:
615 self._col_no = self._at + 1
616 else:
617 self._col_no = self._at - i
618
619 break
620
621 # Return match object
622 return m
623
624 # Expects the pattern `pat` to match the current substring,
625 # returning the match object and updating `self._at`,
626 # `self._line_no`, and `self._col_no` on success, or raising a parse
627 # error with the message `error_msg` on error.
628 def _expect_pat(self, pat: Pattern[str], error_msg: str):
629 # Match
630 m = self._try_parse_pat(pat)
631
632 if m is None:
633 # No match: error
634 self._raise_error(error_msg)
635
636 # Return match object
637 return m
638
639 # Pattern for _skip_ws_and_comments()
640 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 641 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
642 )
643
644 # Skips as many whitespaces, insignificant symbol characters, and
645 # comments as possible.
646 def _skip_ws_and_comments(self):
647 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
648
320644e2
PP
649 # Pattern for _skip_ws()
650 _ws_pat = re.compile(r"\s*")
651
652 # Skips as many whitespaces as possible.
653 def _skip_ws(self):
654 self._try_parse_pat(self._ws_pat)
655
71aaa3f7
PP
656 # Pattern for _try_parse_hex_byte()
657 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
658
659 # Tries to parse a hexadecimal byte, returning a byte item on
660 # success.
661 def _try_parse_hex_byte(self):
0e8e3169
PP
662 begin_text_loc = self._text_loc
663
71aaa3f7
PP
664 # Match initial nibble
665 m_high = self._try_parse_pat(self._nibble_pat)
666
667 if m_high is None:
668 # No match
669 return
670
671 # Expect another nibble
672 self._skip_ws_and_comments()
673 m_low = self._expect_pat(
674 self._nibble_pat, "Expecting another hexadecimal nibble"
675 )
676
677 # Return item
0e8e3169 678 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
679
680 # Patterns for _try_parse_bin_byte()
681 _bin_byte_bit_pat = re.compile(r"[01]")
682 _bin_byte_prefix_pat = re.compile(r"%")
683
684 # Tries to parse a binary byte, returning a byte item on success.
685 def _try_parse_bin_byte(self):
0e8e3169
PP
686 begin_text_loc = self._text_loc
687
71aaa3f7
PP
688 # Match prefix
689 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
690 # No match
691 return
692
693 # Expect eight bits
694 bits = [] # type: List[str]
695
696 for _ in range(8):
697 self._skip_ws_and_comments()
698 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
699 bits.append(m.group(0))
700
701 # Return item
0e8e3169 702 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
703
704 # Patterns for _try_parse_dec_byte()
320644e2 705 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
706 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
707
708 # Tries to parse a decimal byte, returning a byte item on success.
709 def _try_parse_dec_byte(self):
0e8e3169
PP
710 begin_text_loc = self._text_loc
711
71aaa3f7
PP
712 # Match prefix
713 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
714 # No match
715 return
716
717 # Expect the value
320644e2 718 self._skip_ws()
71aaa3f7
PP
719 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
720
721 # Compute value
722 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
723
724 # Validate
725 if val < -128 or val > 255:
0e8e3169 726 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
727
728 # Two's complement
05f81895 729 val %= 256
71aaa3f7
PP
730
731 # Return item
0e8e3169 732 return _Byte(val, begin_text_loc)
71aaa3f7
PP
733
734 # Tries to parse a byte, returning a byte item on success.
735 def _try_parse_byte(self):
736 # Hexadecimal
737 item = self._try_parse_hex_byte()
738
739 if item is not None:
740 return item
741
742 # Binary
743 item = self._try_parse_bin_byte()
744
745 if item is not None:
746 return item
747
748 # Decimal
749 item = self._try_parse_dec_byte()
750
751 if item is not None:
752 return item
753
754 # Patterns for _try_parse_str()
755 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
756 _str_suffix_pat = re.compile(r'"')
757 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
758
759 # Strings corresponding to escape sequence characters
760 _str_escape_seq_strs = {
761 "0": "\0",
762 "a": "\a",
763 "b": "\b",
764 "e": "\x1b",
765 "f": "\f",
766 "n": "\n",
767 "r": "\r",
768 "t": "\t",
769 "v": "\v",
770 "\\": "\\",
771 '"': '"',
772 }
773
774 # Tries to parse a string, returning a string item on success.
775 def _try_parse_str(self):
0e8e3169
PP
776 begin_text_loc = self._text_loc
777
71aaa3f7
PP
778 # Match prefix
779 m = self._try_parse_pat(self._str_prefix_pat)
780
781 if m is None:
782 # No match
783 return
784
785 # Get encoding
786 encoding = "utf8"
787
788 if m.group("len") is not None:
789 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
790
791 # Actual string
792 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
793
794 # Expect end of string
795 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
796
797 # Replace escape sequences
798 val = m.group(0)
799
800 for ec in '0abefnrtv"\\':
801 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
802
803 # Encode
804 data = val.encode(encoding)
805
806 # Return item
0e8e3169 807 return _Str(data, begin_text_loc)
71aaa3f7 808
320644e2
PP
809 # Common right parenthesis pattern
810 _right_paren_pat = re.compile(r"\)")
811
71aaa3f7 812 # Patterns for _try_parse_group()
320644e2 813 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
814
815 # Tries to parse a group, returning a group item on success.
816 def _try_parse_group(self):
0e8e3169
PP
817 begin_text_loc = self._text_loc
818
71aaa3f7 819 # Match prefix
261c5ecf
PP
820 m_open = self._try_parse_pat(self._group_prefix_pat)
821
822 if m_open is None:
71aaa3f7
PP
823 # No match
824 return
825
826 # Parse items
827 items = self._parse_items()
828
829 # Expect end of group
830 self._skip_ws_and_comments()
261c5ecf
PP
831
832 if m_open.group(0) == "(":
320644e2 833 pat = self._right_paren_pat
261c5ecf
PP
834 exp = ")"
835 else:
836 pat = self._block_end_pat
837 exp = "!end"
838
839 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
840
841 # Return item
0e8e3169 842 return _Group(items, begin_text_loc)
71aaa3f7
PP
843
844 # Returns a stripped expression string and an AST expression node
845 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 846 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
847 # Create an expression node from the expression string
848 expr_str = expr_str.strip().replace("\n", " ")
849
850 try:
851 expr = ast.parse(expr_str, mode="eval")
852 except SyntaxError:
853 _raise_error(
854 "Invalid expression `{}`: invalid syntax".format(expr_str),
855 text_loc,
856 )
857
858 return expr_str, expr
859
269f6eb3 860 # Patterns for _try_parse_num_and_attr()
05f81895 861 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 862 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 863 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 864
05f81895
PP
865 # Tries to parse a value and attribute (fixed length in bits or
866 # `leb128`), returning a value item on success.
269f6eb3 867 def _try_parse_num_and_attr(self):
71aaa3f7
PP
868 begin_text_loc = self._text_loc
869
870 # Match
871 m_expr = self._try_parse_pat(self._val_expr_pat)
872
873 if m_expr is None:
874 # No match
875 return
876
71aaa3f7
PP
877 # Create an expression node from the expression string
878 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
879
05f81895 880 # Length?
269f6eb3 881 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
882
883 if m_attr is None:
884 # LEB128?
885 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
886
887 if m_attr is None:
888 # At this point it's invalid
889 self._raise_error(
890 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
891 )
892
893 # Return LEB128 integer item
894 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
895 return cls(expr_str, expr, begin_text_loc)
896 else:
269f6eb3
PP
897 # Return fixed-length number item
898 return _FlNum(
05f81895
PP
899 expr_str,
900 expr,
901 int(m_attr.group(0)),
902 begin_text_loc,
903 )
71aaa3f7 904
320644e2
PP
905 # Patterns for _try_parse_var_assign()
906 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
907 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 908
2adf4336
PP
909 # Tries to parse a variable assignment, returning a variable
910 # assignment item on success.
911 def _try_parse_var_assign(self):
71aaa3f7
PP
912 begin_text_loc = self._text_loc
913
914 # Match
320644e2 915 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
916
917 if m is None:
918 # No match
919 return
920
921 # Validate name
320644e2 922 name = m.group(1)
71aaa3f7
PP
923
924 if name == _icitte_name:
0e8e3169
PP
925 _raise_error(
926 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
927 )
71aaa3f7
PP
928
929 if name in self._label_names:
0e8e3169 930 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 931
320644e2
PP
932 # Expect an expression
933 self._skip_ws()
934 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
935
936 # Create an expression node from the expression string
320644e2
PP
937 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
938
939 # Add to known variable names
940 self._var_names.add(name)
71aaa3f7
PP
941
942 # Return item
2adf4336 943 return _VarAssign(
71aaa3f7
PP
944 name,
945 expr_str,
946 expr,
0e8e3169 947 begin_text_loc,
71aaa3f7
PP
948 )
949
2adf4336 950 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
951 _bo_pat = re.compile(r"[bl]e")
952
2adf4336
PP
953 # Tries to parse a byte order name, returning a byte order setting
954 # item on success.
955 def _try_parse_set_bo(self):
0e8e3169
PP
956 begin_text_loc = self._text_loc
957
71aaa3f7
PP
958 # Match
959 m = self._try_parse_pat(self._bo_pat)
960
961 if m is None:
962 # No match
963 return
964
965 # Return corresponding item
966 if m.group(0) == "be":
2adf4336 967 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
968 else:
969 assert m.group(0) == "le"
2adf4336 970 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
971
972 # Patterns for _try_parse_val_or_bo()
320644e2
PP
973 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
974 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 975
2adf4336
PP
976 # Tries to parse a value, a variable assignment, or a byte order
977 # setting, returning an item on success.
978 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 979 # Match prefix
2adf4336 980 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
981 # No match
982 return
983
320644e2
PP
984 self._skip_ws()
985
2adf4336
PP
986 # Variable assignment item?
987 item = self._try_parse_var_assign()
71aaa3f7
PP
988
989 if item is None:
269f6eb3
PP
990 # Number item?
991 item = self._try_parse_num_and_attr()
71aaa3f7
PP
992
993 if item is None:
2adf4336
PP
994 # Byte order setting item?
995 item = self._try_parse_set_bo()
71aaa3f7
PP
996
997 if item is None:
998 # At this point it's invalid
2adf4336 999 self._raise_error(
269f6eb3 1000 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 1001 )
71aaa3f7
PP
1002
1003 # Expect suffix
320644e2 1004 self._skip_ws()
2adf4336 1005 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1006 return item
1007
fc21bb27
PP
1008 # Returns a normalized version (so as to be parseable by int()) of
1009 # the constant integer string `s`, possibly negative, dealing with
1010 # any radix suffix.
1011 @staticmethod
1012 def _norm_const_int(s: str):
1013 neg = ""
1014 pos = s
1015
1016 if s.startswith("-"):
1017 neg = "-"
1018 pos = s[1:]
1019
1020 for r in "xXoObB":
1021 if pos.startswith("0" + r):
1022 # Already correct
1023 return s
1024
1025 # Try suffix
1026 asm_suf_base = {
1027 "h": "x",
1028 "H": "x",
1029 "q": "o",
1030 "Q": "o",
1031 "o": "o",
1032 "O": "o",
1033 "b": "b",
1034 "B": "B",
1035 }
1036
1037 for suf in asm_suf_base:
1038 if pos[-1] == suf:
1039 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
1040
1041 return s
1042
320644e2 1043 # Common constant integer patterns
fc21bb27
PP
1044 _pos_const_int_pat = re.compile(
1045 r"0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+"
1046 )
320644e2 1047 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
71aaa3f7 1048
2adf4336
PP
1049 # Tries to parse an offset setting value (after the initial `<`),
1050 # returning an offset item on success.
1051 def _try_parse_set_offset_val(self):
0e8e3169
PP
1052 begin_text_loc = self._text_loc
1053
71aaa3f7
PP
1054 # Match
1055 m = self._try_parse_pat(self._pos_const_int_pat)
1056
1057 if m is None:
1058 # No match
1059 return
1060
1061 # Return item
fc21bb27 1062 return _SetOffset(int(self._norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1063
1064 # Tries to parse a label name (after the initial `<`), returning a
1065 # label item on success.
1066 def _try_parse_label_name(self):
0e8e3169
PP
1067 begin_text_loc = self._text_loc
1068
71aaa3f7
PP
1069 # Match
1070 m = self._try_parse_pat(_py_name_pat)
1071
1072 if m is None:
1073 # No match
1074 return
1075
1076 # Validate
1077 name = m.group(0)
1078
1079 if name == _icitte_name:
0e8e3169
PP
1080 _raise_error(
1081 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1082 )
71aaa3f7
PP
1083
1084 if name in self._label_names:
0e8e3169 1085 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1086
1087 if name in self._var_names:
0e8e3169 1088 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1089
1090 # Add to known label names
1091 self._label_names.add(name)
1092
1093 # Return item
0e8e3169 1094 return _Label(name, begin_text_loc)
71aaa3f7 1095
2adf4336 1096 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1097 _label_set_offset_prefix_pat = re.compile(r"<")
1098 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1099
2adf4336
PP
1100 # Tries to parse a label or an offset setting, returning an item on
1101 # success.
1102 def _try_parse_label_or_set_offset(self):
71aaa3f7 1103 # Match prefix
2adf4336 1104 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1105 # No match
1106 return
1107
2adf4336 1108 # Offset setting item?
320644e2 1109 self._skip_ws()
2adf4336 1110 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1111
1112 if item is None:
1113 # Label item?
1114 item = self._try_parse_label_name()
1115
1116 if item is None:
1117 # At this point it's invalid
2adf4336 1118 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1119
1120 # Expect suffix
320644e2 1121 self._skip_ws()
2adf4336 1122 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1123 return item
1124
25ca454b
PP
1125 # Pattern for _parse_pad_val()
1126 _pad_val_prefix_pat = re.compile(r"~")
1127
1128 # Tries to parse a padding value, returning the padding value, or 0
1129 # if none.
1130 def _parse_pad_val(self):
1131 # Padding value?
1132 self._skip_ws()
1133 pad_val = 0
1134
1135 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1136 self._skip_ws()
1137 pad_val_text_loc = self._text_loc
1138 m = self._expect_pat(
1139 self._pos_const_int_pat,
1140 "Expecting a positive constant integer (byte value)",
1141 )
1142
1143 # Validate
fc21bb27 1144 pad_val = int(self._norm_const_int(m.group(0)), 0)
25ca454b
PP
1145
1146 if pad_val > 255:
1147 _raise_error(
1148 "Invalid padding byte value {}".format(pad_val),
1149 pad_val_text_loc,
1150 )
1151
1152 return pad_val
1153
676f6189 1154 # Patterns for _try_parse_align_offset()
320644e2
PP
1155 _align_offset_prefix_pat = re.compile(r"@")
1156 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1157
1158 # Tries to parse an offset alignment, returning an offset alignment
1159 # item on success.
1160 def _try_parse_align_offset(self):
1161 begin_text_loc = self._text_loc
1162
1163 # Match prefix
1164 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1165 # No match
1166 return
1167
320644e2 1168 # Expect an alignment
25ca454b 1169 self._skip_ws()
676f6189
PP
1170 align_text_loc = self._text_loc
1171 m = self._expect_pat(
1172 self._align_offset_val_pat,
1173 "Expecting an alignment (positive multiple of eight bits)",
1174 )
1175
1176 # Validate alignment
320644e2 1177 val = int(m.group(0))
676f6189
PP
1178
1179 if val <= 0 or (val % 8) != 0:
1180 _raise_error(
1181 "Invalid alignment value {} (not a positive multiple of eight)".format(
1182 val
1183 ),
1184 align_text_loc,
1185 )
1186
25ca454b
PP
1187 # Padding value
1188 pad_val = self._parse_pad_val()
676f6189 1189
25ca454b
PP
1190 # Return item
1191 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1192
25ca454b
PP
1193 # Patterns for _try_parse_fill_until()
1194 _fill_until_prefix_pat = re.compile(r"\+")
1195 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1196
25ca454b
PP
1197 # Tries to parse a filling, returning a filling item on success.
1198 def _try_parse_fill_until(self):
1199 begin_text_loc = self._text_loc
1200
1201 # Match prefix
1202 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1203 # No match
1204 return
1205
1206 # Expect expression
1207 self._skip_ws()
1208 expr_str, expr = self._expect_const_int_name_expr(True)
1209
1210 # Padding value
1211 pad_val = self._parse_pad_val()
676f6189
PP
1212
1213 # Return item
25ca454b 1214 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1215
e57a18e1 1216 # Patterns for _expect_rep_mul_expr()
320644e2
PP
1217 _inner_expr_prefix_pat = re.compile(r"\{")
1218 _inner_expr_pat = re.compile(r"[^}]+")
1219 _inner_expr_suffix_pat = re.compile(r"\}")
1220
1221 # Parses a constant integer if `accept_const_int` is `True`
1222 # (possibly negative if `allow_neg` is `True`), a name, or an
1223 # expression within `{` and `}`.
1224 def _expect_const_int_name_expr(
1225 self, accept_const_int: bool, allow_neg: bool = False
1226 ):
e57a18e1
PP
1227 expr_text_loc = self._text_loc
1228
1229 # Constant integer?
27d52a19
PP
1230 m = None
1231
320644e2
PP
1232 if accept_const_int:
1233 m = self._try_parse_pat(self._const_int_pat)
e57a18e1
PP
1234
1235 if m is None:
1236 # Name?
1237 m = self._try_parse_pat(_py_name_pat)
1238
1239 if m is None:
1240 # Expression?
320644e2
PP
1241 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1242 pos_msg = "" if allow_neg else "positive "
1243
1244 if accept_const_int:
1245 mid_msg = "a {}constant integer, a name, or `{{`".format(
1246 pos_msg
1247 )
27d52a19
PP
1248 else:
1249 mid_msg = "a name or `{`"
1250
e57a18e1 1251 # At this point it's invalid
27d52a19 1252 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1253
1254 # Expect an expression
320644e2 1255 self._skip_ws()
e57a18e1 1256 expr_text_loc = self._text_loc
320644e2 1257 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
e57a18e1
PP
1258 expr_str = m.group(0)
1259
1260 # Expect `}`
320644e2
PP
1261 self._skip_ws()
1262 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1263 else:
1264 expr_str = m.group(0)
1265 else:
320644e2
PP
1266 if m.group("neg") == "-" and not allow_neg:
1267 _raise_error("Expecting a positive constant integer", expr_text_loc)
1268
fc21bb27 1269 expr_str = self._norm_const_int(m.group(0))
e57a18e1
PP
1270
1271 return self._ast_expr_from_str(expr_str, expr_text_loc)
1272
27d52a19
PP
1273 # Parses the multiplier expression of a repetition (block or
1274 # post-item) and returns the expression string and AST node.
1275 def _expect_rep_mul_expr(self):
320644e2 1276 return self._expect_const_int_name_expr(True)
27d52a19
PP
1277
1278 # Common block end pattern
320644e2 1279 _block_end_pat = re.compile(r"!end\b")
27d52a19 1280
e57a18e1 1281 # Pattern for _try_parse_rep_block()
320644e2 1282 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1283
1284 # Tries to parse a repetition block, returning a repetition item on
1285 # success.
1286 def _try_parse_rep_block(self):
1287 begin_text_loc = self._text_loc
1288
1289 # Match prefix
1290 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1291 # No match
1292 return
1293
1294 # Expect expression
1295 self._skip_ws_and_comments()
1296 expr_str, expr = self._expect_rep_mul_expr()
1297
1298 # Parse items
1299 self._skip_ws_and_comments()
1300 items_text_loc = self._text_loc
1301 items = self._parse_items()
1302
1303 # Expect end of block
1304 self._skip_ws_and_comments()
1305 self._expect_pat(
27d52a19 1306 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1307 )
1308
1309 # Return item
1310 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1311
27d52a19 1312 # Pattern for _try_parse_cond_block()
320644e2 1313 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1314 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1315
1316 # Tries to parse a conditional block, returning a conditional item
1317 # on success.
1318 def _try_parse_cond_block(self):
1319 begin_text_loc = self._text_loc
1320
1321 # Match prefix
1322 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1323 # No match
1324 return
1325
1326 # Expect expression
1327 self._skip_ws_and_comments()
320644e2 1328 expr_str, expr = self._expect_const_int_name_expr(False)
27d52a19 1329
12b5dbc0 1330 # Parse "true" items
27d52a19 1331 self._skip_ws_and_comments()
12b5dbc0
PP
1332 true_items_text_loc = self._text_loc
1333 true_items = self._parse_items()
1334 false_items = [] # type: List[_Item]
1335 false_items_text_loc = begin_text_loc
27d52a19 1336
12b5dbc0 1337 # `!else`?
27d52a19 1338 self._skip_ws_and_comments()
12b5dbc0
PP
1339
1340 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1341 # Parse "false" items
1342 self._skip_ws_and_comments()
1343 false_items_text_loc = self._text_loc
1344 false_items = self._parse_items()
1345
1346 # Expect end of block
27d52a19
PP
1347 self._expect_pat(
1348 self._block_end_pat,
12b5dbc0 1349 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1350 )
1351
1352 # Return item
12b5dbc0
PP
1353 return _Cond(
1354 _Group(true_items, true_items_text_loc),
1355 _Group(false_items, false_items_text_loc),
1356 expr_str,
1357 expr,
1358 begin_text_loc,
1359 )
27d52a19 1360
320644e2
PP
1361 # Common left parenthesis pattern
1362 _left_paren_pat = re.compile(r"\(")
1363
1364 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1365 _macro_params_comma_pat = re.compile(",")
1366
1367 # Patterns for _try_parse_macro_def()
1368 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1369
1370 # Tries to parse a macro definition, adding it to `self._macro_defs`
1371 # and returning `True` on success.
1372 def _try_parse_macro_def(self):
1373 begin_text_loc = self._text_loc
1374
1375 # Match prefix
1376 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1377 # No match
1378 return False
1379
1380 # Expect a name
1381 self._skip_ws()
1382 name_text_loc = self._text_loc
1383 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1384
1385 # Validate name
1386 name = m.group(0)
1387
1388 if name in self._macro_defs:
1389 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1390
1391 # Expect `(`
1392 self._skip_ws()
1393 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1394
1395 # Try to parse comma-separated parameter names
1396 param_names = [] # type: List[str]
1397 expect_comma = False
1398
1399 while True:
1400 self._skip_ws()
1401
1402 # End?
1403 if self._try_parse_pat(self._right_paren_pat) is not None:
1404 # End
1405 break
1406
1407 # Comma?
1408 if expect_comma:
1409 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1410
1411 # Expect parameter name
1412 self._skip_ws()
1413 param_text_loc = self._text_loc
1414 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1415
1416 if m.group(0) in param_names:
1417 _raise_error(
1418 "Duplicate macro parameter named `{}`".format(m.group(0)),
1419 param_text_loc,
1420 )
1421
1422 param_names.append(m.group(0))
1423 expect_comma = True
1424
1425 # Expect items
1426 self._skip_ws_and_comments()
1427 items_text_loc = self._text_loc
1428 old_var_names = self._var_names.copy()
1429 old_label_names = self._label_names.copy()
1430 self._var_names = set() # type: Set[str]
1431 self._label_names = set() # type: Set[str]
1432 items = self._parse_items()
1433 self._var_names = old_var_names
1434 self._label_names = old_label_names
1435
1436 # Expect suffix
1437 self._expect_pat(
1438 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1439 )
1440
1441 # Register macro
1442 self._macro_defs[name] = _MacroDef(
1443 name, param_names, _Group(items, items_text_loc), begin_text_loc
1444 )
1445
1446 return True
1447
1448 # Patterns for _try_parse_macro_exp()
1449 _macro_exp_prefix_pat = re.compile(r"m\b")
1450 _macro_exp_colon_pat = re.compile(r":")
1451
1452 # Tries to parse a macro expansion, returning a macro expansion item
1453 # on success.
1454 def _try_parse_macro_exp(self):
1455 begin_text_loc = self._text_loc
1456
1457 # Match prefix
1458 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1459 # No match
1460 return
1461
1462 # Expect `:`
1463 self._skip_ws()
1464 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1465
1466 # Expect a macro name
1467 self._skip_ws()
1468 name_text_loc = self._text_loc
1469 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1470
1471 # Validate name
1472 name = m.group(0)
1473 macro_def = self._macro_defs.get(name)
1474
1475 if macro_def is None:
1476 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1477
1478 # Expect `(`
1479 self._skip_ws()
1480 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1481
1482 # Try to parse comma-separated parameter values
1483 params_text_loc = self._text_loc
1484 params = [] # type: List[_MacroExpParam]
1485 expect_comma = False
1486
1487 while True:
1488 self._skip_ws()
1489
1490 # End?
1491 if self._try_parse_pat(self._right_paren_pat) is not None:
1492 # End
1493 break
1494
1495 # Expect a Value
1496 if expect_comma:
1497 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1498
1499 self._skip_ws()
1500 param_text_loc = self._text_loc
1501 params.append(
1502 _MacroExpParam(
1503 *self._expect_const_int_name_expr(True, True), param_text_loc
1504 )
1505 )
1506 expect_comma = True
1507
1508 # Validate parameter values
1509 if len(params) != len(macro_def.param_names):
1510 sing_plur = "" if len(params) == 1 else "s"
1511 _raise_error(
1512 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1513 len(params), sing_plur, len(macro_def.param_names)
1514 ),
1515 params_text_loc,
1516 )
1517
1518 # Return item
1519 return _MacroExp(name, params, begin_text_loc)
1520
71aaa3f7
PP
1521 # Tries to parse a base item (anything except a repetition),
1522 # returning it on success.
1523 def _try_parse_base_item(self):
1524 # Byte item?
1525 item = self._try_parse_byte()
1526
1527 if item is not None:
1528 return item
1529
1530 # String item?
1531 item = self._try_parse_str()
1532
1533 if item is not None:
1534 return item
1535
2adf4336
PP
1536 # Value, variable assignment, or byte order setting item?
1537 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1538
1539 if item is not None:
1540 return item
1541
2adf4336
PP
1542 # Label or offset setting item?
1543 item = self._try_parse_label_or_set_offset()
71aaa3f7 1544
676f6189
PP
1545 if item is not None:
1546 return item
1547
1548 # Offset alignment item?
1549 item = self._try_parse_align_offset()
1550
25ca454b
PP
1551 if item is not None:
1552 return item
1553
1554 # Filling item?
1555 item = self._try_parse_fill_until()
1556
71aaa3f7
PP
1557 if item is not None:
1558 return item
1559
1560 # Group item?
1561 item = self._try_parse_group()
1562
1563 if item is not None:
1564 return item
1565
320644e2 1566 # Repetition block item?
e57a18e1 1567 item = self._try_parse_rep_block()
71aaa3f7 1568
e57a18e1
PP
1569 if item is not None:
1570 return item
1571
27d52a19
PP
1572 # Conditional block item?
1573 item = self._try_parse_cond_block()
1574
1575 if item is not None:
1576 return item
1577
320644e2
PP
1578 # Macro expansion?
1579 item = self._try_parse_macro_exp()
1580
1581 if item is not None:
1582 return item
1583
e57a18e1
PP
1584 # Pattern for _try_parse_rep_post()
1585 _rep_post_prefix_pat = re.compile(r"\*")
1586
1587 # Tries to parse a post-item repetition, returning the expression
1588 # string and AST expression node on success.
1589 def _try_parse_rep_post(self):
71aaa3f7 1590 # Match prefix
e57a18e1 1591 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1592 # No match
2adf4336 1593 return
71aaa3f7 1594
e57a18e1 1595 # Return expression string and AST expression
71aaa3f7 1596 self._skip_ws_and_comments()
e57a18e1 1597 return self._expect_rep_mul_expr()
71aaa3f7 1598
1ca7b5e1
PP
1599 # Tries to parse an item, possibly followed by a repetition,
1600 # returning `True` on success.
1601 #
1602 # Appends any parsed item to `items`.
1603 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1604 self._skip_ws_and_comments()
1605
320644e2 1606 # Base item
71aaa3f7
PP
1607 item = self._try_parse_base_item()
1608
1609 if item is None:
320644e2 1610 return
71aaa3f7
PP
1611
1612 # Parse repetition if the base item is repeatable
1613 if isinstance(item, _RepableItem):
0e8e3169
PP
1614 self._skip_ws_and_comments()
1615 rep_text_loc = self._text_loc
e57a18e1 1616 rep_ret = self._try_parse_rep_post()
71aaa3f7 1617
2adf4336 1618 if rep_ret is not None:
320644e2 1619 item = _Rep(item, *rep_ret, rep_text_loc)
71aaa3f7 1620
1ca7b5e1
PP
1621 items.append(item)
1622 return True
71aaa3f7
PP
1623
1624 # Parses and returns items, skipping whitespaces, insignificant
1625 # symbols, and comments when allowed, and stopping at the first
1626 # unknown character.
320644e2
PP
1627 #
1628 # Accepts and registers macro definitions if `accept_macro_defs`
1629 # is `True`.
1630 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1631 items = [] # type: List[_Item]
1632
1633 while self._isnt_done():
1ca7b5e1
PP
1634 # Try to append item
1635 if not self._try_append_item(items):
320644e2
PP
1636 if accept_macro_defs and self._try_parse_macro_def():
1637 continue
1638
1ca7b5e1
PP
1639 # Unknown at this point
1640 break
71aaa3f7
PP
1641
1642 return items
1643
1644 # Parses the whole Normand input, setting `self._res` to the main
1645 # group item on success.
1646 def _parse(self):
1647 if len(self._normand.strip()) == 0:
1648 # Special case to make sure there's something to consume
1649 self._res = _Group([], self._text_loc)
1650 return
1651
1652 # Parse first level items
320644e2 1653 items = self._parse_items(True)
71aaa3f7
PP
1654
1655 # Make sure there's nothing left
1656 self._skip_ws_and_comments()
1657
1658 if self._isnt_done():
1659 self._raise_error(
1660 "Unexpected character `{}`".format(self._normand[self._at])
1661 )
1662
1663 # Set main group item
1664 self._res = _Group(items, self._text_loc)
1665
1666
1667# The return type of parse().
1668class ParseResult:
1669 @classmethod
1670 def _create(
1671 cls,
1672 data: bytearray,
e57a18e1
PP
1673 variables: VariablesT,
1674 labels: LabelsT,
71aaa3f7
PP
1675 offset: int,
1676 bo: Optional[ByteOrder],
1677 ):
1678 self = cls.__new__(cls)
1679 self._init(data, variables, labels, offset, bo)
1680 return self
1681
1682 def __init__(self, *args, **kwargs): # type: ignore
1683 raise NotImplementedError
1684
1685 def _init(
1686 self,
1687 data: bytearray,
e57a18e1
PP
1688 variables: VariablesT,
1689 labels: LabelsT,
71aaa3f7
PP
1690 offset: int,
1691 bo: Optional[ByteOrder],
1692 ):
1693 self._data = data
1694 self._vars = variables
1695 self._labels = labels
1696 self._offset = offset
1697 self._bo = bo
1698
1699 # Generated data.
1700 @property
1701 def data(self):
1702 return self._data
1703
1704 # Dictionary of updated variable names to their last computed value.
1705 @property
1706 def variables(self):
1707 return self._vars
1708
1709 # Dictionary of updated main group label names to their computed
1710 # value.
1711 @property
1712 def labels(self):
1713 return self._labels
1714
1715 # Updated offset.
1716 @property
1717 def offset(self):
1718 return self._offset
1719
1720 # Updated byte order.
1721 @property
1722 def byte_order(self):
1723 return self._bo
1724
1725
1726# Raises a parse error for the item `item`, creating it using the
1727# message `msg`.
1728def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1729 _raise_error(msg, item.text_loc)
1730
1731
1732# The `ICITTE` reserved name.
1733_icitte_name = "ICITTE"
1734
1735
2adf4336
PP
1736# Base node visitor.
1737#
1738# Calls the _visit_name() method for each name node which isn't the name
1739# of a call.
1740class _NodeVisitor(ast.NodeVisitor):
1741 def __init__(self):
71aaa3f7
PP
1742 self._parent_is_call = False
1743
1744 def generic_visit(self, node: ast.AST):
1745 if type(node) is ast.Call:
1746 self._parent_is_call = True
1747 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1748 self._visit_name(node.id)
71aaa3f7
PP
1749
1750 super().generic_visit(node)
1751 self._parent_is_call = False
1752
2adf4336
PP
1753 @abc.abstractmethod
1754 def _visit_name(self, name: str):
1755 ...
1756
71aaa3f7 1757
2adf4336
PP
1758# Expression validator: validates that all the names within the
1759# expression are allowed.
1760class _ExprValidator(_NodeVisitor):
320644e2 1761 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1762 super().__init__()
320644e2
PP
1763 self._expr_str = expr_str
1764 self._text_loc = text_loc
2adf4336 1765 self._allowed_names = allowed_names
2adf4336
PP
1766
1767 def _visit_name(self, name: str):
1768 # Make sure the name refers to a known and reachable
1769 # variable/label name.
e57a18e1 1770 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1771 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1772 name, self._expr_str
2adf4336
PP
1773 )
1774
05f81895 1775 allowed_names = self._allowed_names.copy()
e57a18e1 1776 allowed_names.add(_icitte_name)
2adf4336 1777
05f81895 1778 if len(allowed_names) > 0:
2adf4336
PP
1779 allowed_names_str = ", ".join(
1780 sorted(["`{}`".format(name) for name in allowed_names])
1781 )
1782 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1783
1784 _raise_error(
1785 msg,
320644e2 1786 self._text_loc,
2adf4336
PP
1787 )
1788
1789
2adf4336
PP
1790# Generator state.
1791class _GenState:
1792 def __init__(
1b8aa84a 1793 self,
e57a18e1
PP
1794 variables: VariablesT,
1795 labels: LabelsT,
1b8aa84a
PP
1796 offset: int,
1797 bo: Optional[ByteOrder],
2adf4336
PP
1798 ):
1799 self.variables = variables.copy()
1800 self.labels = labels.copy()
1801 self.offset = offset
1802 self.bo = bo
71aaa3f7 1803
320644e2
PP
1804 def __repr__(self):
1805 return "_GenState({}, {}, {}, {})".format(
1806 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1807 )
1808
1809
1810# Fixed-length number item instance.
1811class _FlNumItemInst:
1812 def __init__(self, item: _FlNum, offset_in_data: int, state: _GenState):
1813 self._item = item
1814 self._offset_in_data = offset_in_data
1815 self._state = state
1816
1817 @property
1818 def item(self):
1819 return self._item
1820
1821 @property
1822 def offset_in_data(self):
1823 return self._offset_in_data
1824
1825 @property
1826 def state(self):
1827 return self._state
1828
71aaa3f7 1829
2adf4336 1830# Generator of data and final state from a group item.
71aaa3f7
PP
1831#
1832# Generation happens in memory at construction time. After building, use
1833# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1834# get the resulting context.
2adf4336
PP
1835#
1836# The steps of generation are:
1837#
320644e2
PP
1838# 1. Handle each item in prefix order.
1839#
1840# The handlers append bytes to `self._data` and update some current
1841# state object (`_GenState` instance).
1842#
1843# When handling a fixed-length number item, try to evaluate its
1844# expression using the current state. If this fails, then it might be
1845# because the expression refers to a "future" label: save the current
1846# offset in `self._data` (generated data) and a snapshot of the
1847# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1848# object). _gen_fl_num_item_insts() will deal with this later.
2adf4336 1849#
320644e2
PP
1850# When handling the items of a group, keep a map of immediate label
1851# names to their offset. Then, after having processed all the items,
1852# update the relevant saved state snapshots in
1853# `self._fl_num_item_insts` with those immediate label values.
1854# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1855#
320644e2
PP
1856# 2. Handle all the fixed-length number item instances of which the
1857# expression evaluation failed before.
2adf4336 1858#
320644e2
PP
1859# At this point, `self._fl_num_item_insts` contains everything that's
1860# needed to evaluate the expressions, including the values of
1861# "future" labels from the point of view of some fixed-length number
1862# item instance.
2adf4336 1863#
320644e2 1864# If an evaluation fails at this point, then it's a user error.
71aaa3f7
PP
1865class _Gen:
1866 def __init__(
1867 self,
1868 group: _Group,
320644e2 1869 macro_defs: _MacroDefsT,
e57a18e1
PP
1870 variables: VariablesT,
1871 labels: LabelsT,
71aaa3f7
PP
1872 offset: int,
1873 bo: Optional[ByteOrder],
1874 ):
320644e2
PP
1875 self._macro_defs = macro_defs
1876 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2adf4336 1877 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1878
1879 # Generated bytes.
1880 @property
1881 def data(self):
1882 return self._data
1883
1884 # Updated variables.
1885 @property
1886 def variables(self):
2adf4336 1887 return self._final_state.variables
71aaa3f7
PP
1888
1889 # Updated main group labels.
1890 @property
1891 def labels(self):
2adf4336 1892 return self._final_state.labels
71aaa3f7
PP
1893
1894 # Updated offset.
1895 @property
1896 def offset(self):
2adf4336 1897 return self._final_state.offset
71aaa3f7
PP
1898
1899 # Updated byte order.
1900 @property
1901 def bo(self):
2adf4336
PP
1902 return self._final_state.bo
1903
320644e2
PP
1904 # Evaluates the expression `expr` of which the original string is
1905 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
1906 # generation state `state`.
1907 #
269f6eb3
PP
1908 # If `allow_float` is `True`, then the type of the result may be
1909 # `float` too.
2adf4336 1910 @staticmethod
320644e2
PP
1911 def _eval_expr(
1912 expr_str: str,
1913 expr: ast.Expression,
1914 text_loc: TextLocation,
269f6eb3 1915 state: _GenState,
269f6eb3
PP
1916 allow_float: bool = False,
1917 ):
e57a18e1
PP
1918 syms = {} # type: VariablesT
1919 syms.update(state.labels)
71aaa3f7 1920
e57a18e1
PP
1921 # Set the `ICITTE` name to the current offset
1922 syms[_icitte_name] = state.offset
71aaa3f7
PP
1923
1924 # Add the current variables
2adf4336 1925 syms.update(state.variables)
71aaa3f7
PP
1926
1927 # Validate the node and its children
320644e2 1928 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
1929
1930 # Compile and evaluate expression node
1931 try:
320644e2 1932 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 1933 except Exception as exc:
320644e2
PP
1934 _raise_error(
1935 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
1936 text_loc,
71aaa3f7
PP
1937 )
1938
27d52a19
PP
1939 # Convert `bool` result type to `int` to normalize
1940 if type(val) is bool:
1941 val = int(val)
1942
269f6eb3
PP
1943 # Validate result type
1944 expected_types = {int} # type: Set[type]
1945 type_msg = "`int`"
1946
1947 if allow_float:
1948 expected_types.add(float)
1949 type_msg += " or `float`"
1950
1951 if type(val) not in expected_types:
320644e2 1952 _raise_error(
269f6eb3 1953 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 1954 expr_str, type_msg, type(val).__name__
71aaa3f7 1955 ),
320644e2 1956 text_loc,
71aaa3f7
PP
1957 )
1958
1959 return val
1960
320644e2
PP
1961 # Evaluates the expression of `item` considering the current
1962 # generation state `state`.
1963 #
1964 # If `allow_float` is `True`, then the type of the result may be
1965 # `float` too.
1966 @staticmethod
1967 def _eval_item_expr(
25ca454b 1968 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
320644e2
PP
1969 state: _GenState,
1970 allow_float: bool = False,
1971 ):
1972 return _Gen._eval_expr(
1973 item.expr_str, item.expr, item.text_loc, state, allow_float
1974 )
1975
1976 # Handles the byte item `item`.
1977 def _handle_byte_item(self, item: _Byte, state: _GenState):
1978 self._data.append(item.val)
1979 state.offset += item.size
1980
1981 # Handles the string item `item`.
1982 def _handle_str_item(self, item: _Str, state: _GenState):
1983 self._data += item.data
1984 state.offset += item.size
1985
1986 # Handles the byte order setting item `item`.
1987 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
1988 # Update current byte order
1989 state.bo = item.bo
1990
1991 # Handles the variable assignment item `item`.
1992 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
1993 # Update variable
1994 state.variables[item.name] = self._eval_item_expr(item, state, True)
1995
1996 # Handles the fixed-length number item `item`.
1997 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
1998 # Validate current byte order
1999 if state.bo is None and item.len > 8:
2000 _raise_error_for_item(
2001 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2002 item.expr_str
2003 ),
2004 item,
2005 )
2006
2007 # Try an immediate evaluation. If it fails, then keep everything
2008 # needed to (try to) generate the bytes of this item later.
2009 try:
2010 data = self._gen_fl_num_item_inst_data(item, state)
2011 except Exception:
2012 self._fl_num_item_insts.append(
2013 _FlNumItemInst(item, len(self._data), copy.deepcopy(state))
2014 )
2015
2016 # Reserve space in `self._data` for this instance
2017 data = bytes([0] * (item.len // 8))
2018
2019 # Append bytes
2020 self._data += data
2021
2022 # Update offset
2023 state.offset += len(data)
2024
05f81895
PP
2025 # Returns the size, in bytes, required to encode the value `val`
2026 # with LEB128 (signed version if `is_signed` is `True`).
2027 @staticmethod
2028 def _leb128_size_for_val(val: int, is_signed: bool):
2029 if val < 0:
2030 # Equivalent upper bound.
2031 #
2032 # For example, if `val` is -128, then the full integer for
2033 # this number of bits would be [-128, 127].
2034 val = -val - 1
2035
2036 # Number of bits (add one for the sign if needed)
2037 bits = val.bit_length() + int(is_signed)
2038
2039 if bits == 0:
2040 bits = 1
2041
2042 # Seven bits per byte
2043 return math.ceil(bits / 7)
2044
320644e2
PP
2045 # Handles the LEB128 integer item `item`.
2046 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2047 # Compute value
2048 val = self._eval_item_expr(item, state, False)
676f6189 2049
320644e2
PP
2050 # Size in bytes
2051 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2052
320644e2
PP
2053 # For each byte
2054 for _ in range(size):
2055 # Seven LSBs, MSB of the byte set (continue)
2056 self._data.append((val & 0x7F) | 0x80)
2057 val >>= 7
2adf4336 2058
320644e2
PP
2059 # Clear MSB of last byte (stop)
2060 self._data[-1] &= ~0x80
2adf4336 2061
320644e2
PP
2062 # Update offset
2063 state.offset += size
27d52a19 2064
320644e2
PP
2065 # Handles the group item `item`, removing the immediate labels from
2066 # `state` at the end if `remove_immediate_labels` is `True`.
2067 def _handle_group_item(
2068 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2069 ):
2070 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2071 immediate_labels = {} # type: LabelsT
27d52a19 2072
320644e2
PP
2073 # Handle each item
2074 for subitem in item.items:
2075 if type(subitem) is _Label:
2076 # Add to local immediate labels
2077 immediate_labels[subitem.name] = state.offset
2adf4336 2078
320644e2 2079 self._handle_item(subitem, state)
2adf4336 2080
320644e2
PP
2081 # Remove immediate labels from current state if needed
2082 if remove_immediate_labels:
2083 for name in immediate_labels:
2084 del state.labels[name]
2adf4336 2085
320644e2
PP
2086 # Add all immediate labels to all state snapshots since
2087 # `first_fl_num_item_inst_index`.
2088 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2089 inst.state.labels.update(immediate_labels)
2adf4336 2090
320644e2
PP
2091 # Handles the repetition item `item`.
2092 def _handle_rep_item(self, item: _Rep, state: _GenState):
2093 # Compute the repetition count
2094 mul = _Gen._eval_item_expr(item, state)
05f81895 2095
320644e2
PP
2096 # Validate result
2097 if mul < 0:
2098 _raise_error_for_item(
2099 "Invalid expression `{}`: unexpected negative result {:,}".format(
2100 item.expr_str, mul
2101 ),
2102 item,
2103 )
2adf4336 2104
320644e2
PP
2105 # Generate item data `mul` times
2106 for _ in range(mul):
2107 self._handle_item(item.item, state)
2adf4336 2108
320644e2 2109 # Handles the conditional item `item`.
12b5dbc0 2110 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2111 # Compute the conditional value
2112 val = _Gen._eval_item_expr(item, state)
2adf4336 2113
320644e2
PP
2114 # Generate item data if needed
2115 if val:
12b5dbc0
PP
2116 self._handle_item(item.true_item, state)
2117 else:
2118 self._handle_item(item.false_item, state)
2adf4336 2119
320644e2
PP
2120 # Evaluates the parameters of the macro expansion item `item`
2121 # considering the initial state `init_state` and returns a new state
2122 # to handle the items of the macro.
2123 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2124 # New state
2125 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2126
320644e2
PP
2127 # Evaluate the parameter expressions
2128 macro_def = self._macro_defs[item.name]
2adf4336 2129
320644e2
PP
2130 for param_name, param in zip(macro_def.param_names, item.params):
2131 exp_state.variables[param_name] = _Gen._eval_expr(
2132 param.expr_str, param.expr, param.text_loc, init_state, True
2133 )
2adf4336 2134
320644e2 2135 return exp_state
2adf4336 2136
320644e2
PP
2137 # Handles the macro expansion item `item`.
2138 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2139 # New state
2140 exp_state = self._eval_macro_exp_params(item, state)
27d52a19 2141
320644e2
PP
2142 # Process the contained group
2143 init_data_size = len(self._data)
2144 self._handle_item(self._macro_defs[item.name].group, exp_state)
27d52a19 2145
320644e2
PP
2146 # Update state offset and return
2147 state.offset += len(self._data) - init_data_size
676f6189 2148
320644e2
PP
2149 # Handles the offset setting item `item`.
2150 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2151 state.offset = item.val
2adf4336 2152
25ca454b 2153 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2154 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2155 init_offset = state.offset
2156 align_bytes = item.val // 8
2157 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2158 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2159
25ca454b
PP
2160 # Handles the filling item `item` (adds padding).
2161 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2162 # Compute the new offset
2163 new_offset = _Gen._eval_item_expr(item, state)
2164
2165 # Validate the new offset
2166 if new_offset < state.offset:
2167 _raise_error_for_item(
2168 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2169 item.expr_str, new_offset, state.offset
2170 ),
2171 item,
2172 )
2173
2174 # Fill
2175 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2176
2177 # Update offset
2178 state.offset = new_offset
2179
320644e2
PP
2180 # Handles the label item `item`.
2181 def _handle_label_item(self, item: _Label, state: _GenState):
2182 state.labels[item.name] = state.offset
2adf4336 2183
320644e2
PP
2184 # Handles the item `item`, returning the updated next repetition
2185 # instance.
2186 def _handle_item(self, item: _Item, state: _GenState):
2187 return self._item_handlers[type(item)](item, state)
71aaa3f7 2188
320644e2
PP
2189 # Generates the data for a fixed-length integer item instance having
2190 # the value `val` and returns it.
2191 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2192 # Validate range
2193 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2194 _raise_error_for_item(
320644e2
PP
2195 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2196 val, item.len, item.expr_str
71aaa3f7
PP
2197 ),
2198 item,
2199 )
2200
2201 # Encode result on 64 bits (to extend the sign bit whatever the
2202 # value of `item.len`).
71aaa3f7
PP
2203 data = struct.pack(
2204 "{}{}".format(
2adf4336 2205 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2206 "Q" if val >= 0 else "q",
2207 ),
2208 val,
2209 )
2210
2211 # Keep only the requested length
2212 len_bytes = item.len // 8
2213
2adf4336 2214 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2215 # Big endian: keep last bytes
2216 data = data[-len_bytes:]
2217 else:
2218 # Little endian: keep first bytes
2adf4336 2219 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2220 data = data[:len_bytes]
2221
320644e2
PP
2222 # Return data
2223 return data
269f6eb3 2224
320644e2
PP
2225 # Generates the data for a fixed-length floating point number item
2226 # instance having the value `val` and returns it.
2227 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2228 # Validate length
2229 if item.len not in (32, 64):
2230 _raise_error_for_item(
2231 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2232 item.len, val
2233 ),
2234 item,
2235 )
2236
320644e2
PP
2237 # Encode and return result
2238 return struct.pack(
269f6eb3
PP
2239 "{}{}".format(
2240 ">" if state.bo in (None, ByteOrder.BE) else "<",
2241 "f" if item.len == 32 else "d",
2242 ),
2243 val,
2244 )
2245
320644e2
PP
2246 # Generates the data for a fixed-length number item instance and
2247 # returns it.
2248 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2249 # Compute value
e57a18e1 2250 val = self._eval_item_expr(item, state, True)
269f6eb3 2251
269f6eb3
PP
2252 # Handle depending on type
2253 if type(val) is int:
320644e2 2254 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2255 else:
2256 assert type(val) is float
320644e2 2257 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2258
320644e2
PP
2259 # Generates the data for all the fixed-length number item instances
2260 # and writes it at the correct offset within `self._data`.
2261 def _gen_fl_num_item_insts(self):
2262 for inst in self._fl_num_item_insts:
2263 # Generate bytes
2264 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
05f81895 2265
320644e2
PP
2266 # Insert bytes into `self._data`
2267 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2268
2269 # Generates the data (`self._data`) and final state
2270 # (`self._final_state`) from `group` and the initial state `state`.
2271 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2272 # Initial state
2273 self._data = bytearray()
71aaa3f7
PP
2274
2275 # Item handlers
2276 self._item_handlers = {
676f6189 2277 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2278 _Byte: self._handle_byte_item,
27d52a19 2279 _Cond: self._handle_cond_item,
25ca454b 2280 _FillUntil: self._handle_fill_until_item,
269f6eb3 2281 _FlNum: self._handle_fl_num_item,
71aaa3f7 2282 _Group: self._handle_group_item,
2adf4336 2283 _Label: self._handle_label_item,
320644e2 2284 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2285 _Rep: self._handle_rep_item,
2adf4336
PP
2286 _SetBo: self._handle_set_bo_item,
2287 _SetOffset: self._handle_set_offset_item,
05f81895 2288 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2289 _Str: self._handle_str_item,
05f81895 2290 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2291 _VarAssign: self._handle_var_assign_item,
320644e2 2292 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2293
2294 # Handle the group item, _not_ removing the immediate labels
2295 # because the `labels` property offers them.
320644e2 2296 self._handle_group_item(group, state, False)
2adf4336
PP
2297
2298 # This is actually the final state
2299 self._final_state = state
71aaa3f7 2300
320644e2
PP
2301 # Generate all the fixed-length number bytes now that we know
2302 # their full state
2303 self._gen_fl_num_item_insts()
2304
71aaa3f7
PP
2305
2306# Returns a `ParseResult` instance containing the bytes encoded by the
2307# input string `normand`.
2308#
2309# `init_variables` is a dictionary of initial variable names (valid
2310# Python names) to integral values. A variable name must not be the
2311# reserved name `ICITTE`.
2312#
2313# `init_labels` is a dictionary of initial label names (valid Python
2314# names) to integral values. A label name must not be the reserved name
2315# `ICITTE`.
2316#
2317# `init_offset` is the initial offset.
2318#
2319# `init_byte_order` is the initial byte order.
2320#
2321# Raises `ParseError` on any parsing error.
2322def parse(
2323 normand: str,
e57a18e1
PP
2324 init_variables: Optional[VariablesT] = None,
2325 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2326 init_offset: int = 0,
2327 init_byte_order: Optional[ByteOrder] = None,
2328):
2329 if init_variables is None:
2330 init_variables = {}
2331
2332 if init_labels is None:
2333 init_labels = {}
2334
320644e2 2335 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2336 gen = _Gen(
320644e2
PP
2337 parser.res,
2338 parser.macro_defs,
71aaa3f7
PP
2339 init_variables,
2340 init_labels,
2341 init_offset,
2342 init_byte_order,
2343 )
2344 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2345 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2346 )
2347
2348
2349# Parses the command-line arguments.
2350def _parse_cli_args():
2351 import argparse
2352
2353 # Build parser
2354 ap = argparse.ArgumentParser()
2355 ap.add_argument(
2356 "--offset",
2357 metavar="OFFSET",
2358 action="store",
2359 type=int,
2360 default=0,
2361 help="initial offset (positive)",
2362 )
2363 ap.add_argument(
2364 "-b",
2365 "--byte-order",
2366 metavar="BO",
2367 choices=["be", "le"],
2368 type=str,
2369 help="initial byte order (`be` or `le`)",
2370 )
2371 ap.add_argument(
2372 "--var",
2373 metavar="NAME=VAL",
2374 action="append",
2375 help="add an initial variable (may be repeated)",
2376 )
2377 ap.add_argument(
2378 "-l",
2379 "--label",
2380 metavar="NAME=VAL",
2381 action="append",
2382 help="add an initial label (may be repeated)",
2383 )
2384 ap.add_argument(
2385 "--version", action="version", version="Normand {}".format(__version__)
2386 )
2387 ap.add_argument(
2388 "path",
2389 metavar="PATH",
2390 action="store",
2391 nargs="?",
2392 help="input path (none means standard input)",
2393 )
2394
2395 # Parse
2396 return ap.parse_args()
2397
2398
2399# Raises a command-line error with the message `msg`.
2400def _raise_cli_error(msg: str) -> NoReturn:
2401 raise RuntimeError("Command-line error: {}".format(msg))
2402
2403
2404# Returns a dictionary of string to integers from the list of strings
2405# `args` containing `NAME=VAL` entries.
2406def _dict_from_arg(args: Optional[List[str]]):
e57a18e1 2407 d = {} # type: LabelsT
71aaa3f7
PP
2408
2409 if args is None:
2410 return d
2411
2412 for arg in args:
2413 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2414
2415 if m is None:
2416 _raise_cli_error("Invalid assignment {}".format(arg))
2417
2e1c1acd
PP
2418 d[m.group(1)] = int(m.group(2))
2419
71aaa3f7
PP
2420 return d
2421
2422
2423# CLI entry point without exception handling.
2424def _try_run_cli():
2425 import os.path
2426
2427 # Parse arguments
2428 args = _parse_cli_args()
2429
2430 # Read input
2431 if args.path is None:
2432 normand = sys.stdin.read()
2433 else:
2434 with open(args.path) as f:
2435 normand = f.read()
2436
2437 # Variables and labels
e57a18e1 2438 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2439 labels = _dict_from_arg(args.label)
2440
2441 # Validate offset
2442 if args.offset < 0:
2443 _raise_cli_error("Invalid negative offset {}")
2444
2445 # Validate and set byte order
2446 bo = None # type: Optional[ByteOrder]
2447
2448 if args.byte_order is not None:
2449 if args.byte_order == "be":
2450 bo = ByteOrder.BE
2451 else:
2452 assert args.byte_order == "le"
2453 bo = ByteOrder.LE
2454
2455 # Parse
2456 try:
2457 res = parse(normand, variables, labels, args.offset, bo)
2458 except ParseError as exc:
2459 prefix = ""
2460
2461 if args.path is not None:
2462 prefix = "{}:".format(os.path.abspath(args.path))
2463
2464 _fail(
2465 "{}{}:{} - {}".format(
2466 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2467 )
2468 )
2469
2470 # Print
2471 sys.stdout.buffer.write(res.data)
2472
2473
2474# Prints the exception message `msg` and exits with status 1.
2475def _fail(msg: str) -> NoReturn:
2476 if not msg.endswith("."):
2477 msg += "."
2478
2479 print(msg, file=sys.stderr)
2480 sys.exit(1)
2481
2482
2483# CLI entry point.
2484def _run_cli():
2485 try:
2486 _try_run_cli()
2487 except Exception as exc:
2488 _fail(str(exc))
2489
2490
2491if __name__ == "__main__":
2492 _run_cli()
This page took 0.130249 seconds and 4 git commands to generate.