Add "fill until" support
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
25ca454b 33__version__ = "0.12.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
41 "ParseResult",
e57a18e1 42 "TextLocation",
e57a18e1 43 "VariablesT",
71aaa3f7
PP
44]
45
46import re
47import abc
48import ast
49import sys
320644e2 50import copy
71aaa3f7 51import enum
05f81895 52import math
71aaa3f7 53import struct
e57a18e1
PP
54import typing
55from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
56
57
58# Text location (line and column numbers).
e57a18e1 59class TextLocation:
71aaa3f7
PP
60 @classmethod
61 def _create(cls, line_no: int, col_no: int):
62 self = cls.__new__(cls)
63 self._init(line_no, col_no)
64 return self
65
66 def __init__(*args, **kwargs): # type: ignore
67 raise NotImplementedError
68
69 def _init(self, line_no: int, col_no: int):
70 self._line_no = line_no
71 self._col_no = col_no
72
73 # Line number.
74 @property
75 def line_no(self):
76 return self._line_no
77
78 # Column number.
79 @property
80 def col_no(self):
81 return self._col_no
82
2adf4336 83 def __repr__(self):
e57a18e1 84 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 85
71aaa3f7
PP
86
87# Any item.
88class _Item:
e57a18e1 89 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
90 self._text_loc = text_loc
91
92 # Source text location.
93 @property
94 def text_loc(self):
95 return self._text_loc
96
2adf4336
PP
97
98# Scalar item.
99class _ScalarItem(_Item):
71aaa3f7
PP
100 # Returns the size, in bytes, of this item.
101 @property
102 @abc.abstractmethod
103 def size(self) -> int:
104 ...
105
106
107# A repeatable item.
2adf4336 108class _RepableItem:
71aaa3f7
PP
109 pass
110
111
112# Single byte.
2adf4336 113class _Byte(_ScalarItem, _RepableItem):
e57a18e1 114 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
115 super().__init__(text_loc)
116 self._val = val
117
118 # Byte value.
119 @property
120 def val(self):
121 return self._val
122
123 @property
124 def size(self):
125 return 1
126
127 def __repr__(self):
676f6189 128 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
129
130
131# String.
2adf4336 132class _Str(_ScalarItem, _RepableItem):
e57a18e1 133 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
134 super().__init__(text_loc)
135 self._data = data
136
137 # Encoded bytes.
138 @property
139 def data(self):
140 return self._data
141
142 @property
143 def size(self):
144 return len(self._data)
145
146 def __repr__(self):
676f6189 147 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
148
149
150# Byte order.
151@enum.unique
152class ByteOrder(enum.Enum):
153 # Big endian.
154 BE = "be"
155
156 # Little endian.
157 LE = "le"
158
159
2adf4336
PP
160# Byte order setting.
161class _SetBo(_Item):
e57a18e1 162 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 163 super().__init__(text_loc)
71aaa3f7
PP
164 self._bo = bo
165
166 @property
167 def bo(self):
168 return self._bo
169
2adf4336 170 def __repr__(self):
676f6189 171 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
172
173
174# Label.
175class _Label(_Item):
e57a18e1 176 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
177 super().__init__(text_loc)
178 self._name = name
179
180 # Label name.
181 @property
182 def name(self):
183 return self._name
184
71aaa3f7 185 def __repr__(self):
676f6189 186 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
187
188
2adf4336
PP
189# Offset setting.
190class _SetOffset(_Item):
e57a18e1 191 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
192 super().__init__(text_loc)
193 self._val = val
194
676f6189 195 # Offset value (bytes).
71aaa3f7
PP
196 @property
197 def val(self):
198 return self._val
199
71aaa3f7 200 def __repr__(self):
676f6189
PP
201 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
202
203
204# Offset alignment.
205class _AlignOffset(_Item):
e57a18e1 206 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
207 super().__init__(text_loc)
208 self._val = val
209 self._pad_val = pad_val
210
211 # Alignment value (bits).
212 @property
213 def val(self):
214 return self._val
215
216 # Padding byte value.
217 @property
218 def pad_val(self):
219 return self._pad_val
220
221 def __repr__(self):
222 return "_AlignOffset({}, {}, {})".format(
223 repr(self._val), repr(self._pad_val), repr(self._text_loc)
224 )
71aaa3f7
PP
225
226
227# Mixin of containing an AST expression and its string.
228class _ExprMixin:
229 def __init__(self, expr_str: str, expr: ast.Expression):
230 self._expr_str = expr_str
231 self._expr = expr
232
233 # Expression string.
234 @property
235 def expr_str(self):
236 return self._expr_str
237
238 # Expression node to evaluate.
239 @property
240 def expr(self):
241 return self._expr
242
243
25ca454b
PP
244# Fill until some offset.
245class _FillUntil(_Item, _ExprMixin):
246 def __init__(
247 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
248 ):
249 super().__init__(text_loc)
250 _ExprMixin.__init__(self, expr_str, expr)
251 self._pad_val = pad_val
252
253 # Padding byte value.
254 @property
255 def pad_val(self):
256 return self._pad_val
257
258 def __repr__(self):
259 return "_FillUntil({}, {}, {}, {})".format(
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._pad_val),
263 repr(self._text_loc),
264 )
265
266
2adf4336
PP
267# Variable assignment.
268class _VarAssign(_Item, _ExprMixin):
71aaa3f7 269 def __init__(
e57a18e1 270 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
271 ):
272 super().__init__(text_loc)
273 _ExprMixin.__init__(self, expr_str, expr)
274 self._name = name
275
276 # Name.
277 @property
278 def name(self):
279 return self._name
280
71aaa3f7 281 def __repr__(self):
2adf4336 282 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
283 repr(self._name),
284 repr(self._expr_str),
285 repr(self._expr),
286 repr(self._text_loc),
71aaa3f7
PP
287 )
288
289
269f6eb3
PP
290# Fixed-length number, possibly needing more than one byte.
291class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 292 def __init__(
e57a18e1 293 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
294 ):
295 super().__init__(text_loc)
296 _ExprMixin.__init__(self, expr_str, expr)
297 self._len = len
298
299 # Length (bits).
300 @property
301 def len(self):
302 return self._len
303
304 @property
305 def size(self):
306 return self._len // 8
307
308 def __repr__(self):
269f6eb3 309 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
310 repr(self._expr_str),
311 repr(self._expr),
312 repr(self._len),
313 repr(self._text_loc),
71aaa3f7
PP
314 )
315
316
05f81895
PP
317# LEB128 integer.
318class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 319 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
320 super().__init__(text_loc)
321 _ExprMixin.__init__(self, expr_str, expr)
322
323 def __repr__(self):
324 return "{}({}, {}, {})".format(
325 self.__class__.__name__,
326 repr(self._expr_str),
327 repr(self._expr),
676f6189 328 repr(self._text_loc),
05f81895
PP
329 )
330
331
332# Unsigned LEB128 integer.
333class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
334 pass
335
336
337# Signed LEB128 integer.
338class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
339 pass
340
341
71aaa3f7 342# Group of items.
2adf4336 343class _Group(_Item, _RepableItem):
e57a18e1 344 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
345 super().__init__(text_loc)
346 self._items = items
71aaa3f7
PP
347
348 # Contained items.
349 @property
350 def items(self):
351 return self._items
352
71aaa3f7 353 def __repr__(self):
676f6189 354 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
355
356
357# Repetition item.
2adf4336
PP
358class _Rep(_Item, _ExprMixin):
359 def __init__(
e57a18e1 360 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 361 ):
71aaa3f7 362 super().__init__(text_loc)
2adf4336 363 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 364 self._item = item
71aaa3f7
PP
365
366 # Item to repeat.
367 @property
368 def item(self):
369 return self._item
370
71aaa3f7 371 def __repr__(self):
2adf4336 372 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
373 repr(self._item),
374 repr(self._expr_str),
375 repr(self._expr),
376 repr(self._text_loc),
71aaa3f7
PP
377 )
378
379
27d52a19
PP
380# Conditional item.
381class _Cond(_Item, _ExprMixin):
382 def __init__(
383 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
384 ):
385 super().__init__(text_loc)
386 _ExprMixin.__init__(self, expr_str, expr)
387 self._item = item
388
389 # Conditional item.
390 @property
391 def item(self):
392 return self._item
393
394 def __repr__(self):
395 return "_Cond({}, {}, {}, {})".format(
396 repr(self._item),
397 repr(self._expr_str),
398 repr(self._expr),
399 repr(self._text_loc),
400 )
401
402
320644e2
PP
403# Macro definition item.
404class _MacroDef(_Item):
405 def __init__(
406 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
407 ):
408 super().__init__(text_loc)
409 self._name = name
410 self._param_names = param_names
411 self._group = group
412
413 # Name.
414 @property
415 def name(self):
416 return self._name
417
418 # Parameters.
419 @property
420 def param_names(self):
421 return self._param_names
422
423 # Contained items.
424 @property
425 def group(self):
426 return self._group
427
428 def __repr__(self):
429 return "_MacroDef({}, {}, {}, {})".format(
430 repr(self._name),
431 repr(self._param_names),
432 repr(self._group),
433 repr(self._text_loc),
434 )
435
436
437# Macro expansion parameter.
438class _MacroExpParam:
439 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
440 self._expr_str = expr_str
441 self._expr = expr
442 self._text_loc = text_loc
443
444 # Expression string.
445 @property
446 def expr_str(self):
447 return self._expr_str
448
449 # Expression.
450 @property
451 def expr(self):
452 return self._expr
453
454 # Source text location.
455 @property
456 def text_loc(self):
457 return self._text_loc
458
459 def __repr__(self):
460 return "_MacroExpParam({}, {}, {})".format(
461 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
462 )
463
464
465# Macro expansion item.
466class _MacroExp(_Item, _RepableItem):
467 def __init__(
468 self,
469 name: str,
470 params: List[_MacroExpParam],
471 text_loc: TextLocation,
472 ):
473 super().__init__(text_loc)
474 self._name = name
475 self._params = params
476
477 # Name.
478 @property
479 def name(self):
480 return self._name
481
482 # Parameters.
483 @property
484 def params(self):
485 return self._params
486
487 def __repr__(self):
488 return "_MacroExp({}, {}, {})".format(
489 repr(self._name),
490 repr(self._params),
491 repr(self._text_loc),
492 )
2adf4336
PP
493
494
71aaa3f7
PP
495# A parsing error containing a message and a text location.
496class ParseError(RuntimeError):
497 @classmethod
e57a18e1 498 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
499 self = cls.__new__(cls)
500 self._init(msg, text_loc)
501 return self
502
503 def __init__(self, *args, **kwargs): # type: ignore
504 raise NotImplementedError
505
e57a18e1 506 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7
PP
507 super().__init__(msg)
508 self._text_loc = text_loc
509
510 # Source text location.
511 @property
512 def text_loc(self):
513 return self._text_loc
514
515
516# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 517def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
518 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
519
520
e57a18e1
PP
521# Variables dictionary type (for type hints).
522VariablesT = Dict[str, Union[int, float]]
523
524
525# Labels dictionary type (for type hints).
526LabelsT = Dict[str, int]
71aaa3f7
PP
527
528
529# Python name pattern.
530_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
531
532
320644e2
PP
533# Macro definition dictionary.
534_MacroDefsT = Dict[str, _MacroDef]
535
536
71aaa3f7
PP
537# Normand parser.
538#
539# The constructor accepts a Normand input. After building, use the `res`
540# property to get the resulting main group.
541class _Parser:
542 # Builds a parser to parse the Normand input `normand`, parsing
543 # immediately.
e57a18e1 544 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
545 self._normand = normand
546 self._at = 0
547 self._line_no = 1
548 self._col_no = 1
549 self._label_names = set(labels.keys())
550 self._var_names = set(variables.keys())
320644e2 551 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
552 self._parse()
553
554 # Result (main group).
555 @property
556 def res(self):
557 return self._res
558
320644e2
PP
559 # Macro definitions.
560 @property
561 def macro_defs(self):
562 return self._macro_defs
563
71aaa3f7
PP
564 # Current text location.
565 @property
566 def _text_loc(self):
e57a18e1 567 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
568 self._line_no, self._col_no
569 )
570
571 # Returns `True` if this parser is done parsing.
572 def _is_done(self):
573 return self._at == len(self._normand)
574
575 # Returns `True` if this parser isn't done parsing.
576 def _isnt_done(self):
577 return not self._is_done()
578
579 # Raises a parse error, creating it using the message `msg` and the
580 # current text location.
581 def _raise_error(self, msg: str) -> NoReturn:
582 _raise_error(msg, self._text_loc)
583
584 # Tries to make the pattern `pat` match the current substring,
585 # returning the match object and updating `self._at`,
586 # `self._line_no`, and `self._col_no` on success.
587 def _try_parse_pat(self, pat: Pattern[str]):
588 m = pat.match(self._normand, self._at)
589
590 if m is None:
591 return
592
593 # Skip matched string
594 self._at += len(m.group(0))
595
596 # Update line number
597 self._line_no += m.group(0).count("\n")
598
599 # Update column number
600 for i in reversed(range(self._at)):
601 if self._normand[i] == "\n" or i == 0:
602 if i == 0:
603 self._col_no = self._at + 1
604 else:
605 self._col_no = self._at - i
606
607 break
608
609 # Return match object
610 return m
611
612 # Expects the pattern `pat` to match the current substring,
613 # returning the match object and updating `self._at`,
614 # `self._line_no`, and `self._col_no` on success, or raising a parse
615 # error with the message `error_msg` on error.
616 def _expect_pat(self, pat: Pattern[str], error_msg: str):
617 # Match
618 m = self._try_parse_pat(pat)
619
620 if m is None:
621 # No match: error
622 self._raise_error(error_msg)
623
624 # Return match object
625 return m
626
627 # Pattern for _skip_ws_and_comments()
628 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 629 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
630 )
631
632 # Skips as many whitespaces, insignificant symbol characters, and
633 # comments as possible.
634 def _skip_ws_and_comments(self):
635 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
636
320644e2
PP
637 # Pattern for _skip_ws()
638 _ws_pat = re.compile(r"\s*")
639
640 # Skips as many whitespaces as possible.
641 def _skip_ws(self):
642 self._try_parse_pat(self._ws_pat)
643
71aaa3f7
PP
644 # Pattern for _try_parse_hex_byte()
645 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
646
647 # Tries to parse a hexadecimal byte, returning a byte item on
648 # success.
649 def _try_parse_hex_byte(self):
0e8e3169
PP
650 begin_text_loc = self._text_loc
651
71aaa3f7
PP
652 # Match initial nibble
653 m_high = self._try_parse_pat(self._nibble_pat)
654
655 if m_high is None:
656 # No match
657 return
658
659 # Expect another nibble
660 self._skip_ws_and_comments()
661 m_low = self._expect_pat(
662 self._nibble_pat, "Expecting another hexadecimal nibble"
663 )
664
665 # Return item
0e8e3169 666 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
667
668 # Patterns for _try_parse_bin_byte()
669 _bin_byte_bit_pat = re.compile(r"[01]")
670 _bin_byte_prefix_pat = re.compile(r"%")
671
672 # Tries to parse a binary byte, returning a byte item on success.
673 def _try_parse_bin_byte(self):
0e8e3169
PP
674 begin_text_loc = self._text_loc
675
71aaa3f7
PP
676 # Match prefix
677 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
678 # No match
679 return
680
681 # Expect eight bits
682 bits = [] # type: List[str]
683
684 for _ in range(8):
685 self._skip_ws_and_comments()
686 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
687 bits.append(m.group(0))
688
689 # Return item
0e8e3169 690 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
691
692 # Patterns for _try_parse_dec_byte()
320644e2 693 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
694 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
695
696 # Tries to parse a decimal byte, returning a byte item on success.
697 def _try_parse_dec_byte(self):
0e8e3169
PP
698 begin_text_loc = self._text_loc
699
71aaa3f7
PP
700 # Match prefix
701 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
702 # No match
703 return
704
705 # Expect the value
320644e2 706 self._skip_ws()
71aaa3f7
PP
707 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
708
709 # Compute value
710 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
711
712 # Validate
713 if val < -128 or val > 255:
0e8e3169 714 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
715
716 # Two's complement
05f81895 717 val %= 256
71aaa3f7
PP
718
719 # Return item
0e8e3169 720 return _Byte(val, begin_text_loc)
71aaa3f7
PP
721
722 # Tries to parse a byte, returning a byte item on success.
723 def _try_parse_byte(self):
724 # Hexadecimal
725 item = self._try_parse_hex_byte()
726
727 if item is not None:
728 return item
729
730 # Binary
731 item = self._try_parse_bin_byte()
732
733 if item is not None:
734 return item
735
736 # Decimal
737 item = self._try_parse_dec_byte()
738
739 if item is not None:
740 return item
741
742 # Patterns for _try_parse_str()
743 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
744 _str_suffix_pat = re.compile(r'"')
745 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
746
747 # Strings corresponding to escape sequence characters
748 _str_escape_seq_strs = {
749 "0": "\0",
750 "a": "\a",
751 "b": "\b",
752 "e": "\x1b",
753 "f": "\f",
754 "n": "\n",
755 "r": "\r",
756 "t": "\t",
757 "v": "\v",
758 "\\": "\\",
759 '"': '"',
760 }
761
762 # Tries to parse a string, returning a string item on success.
763 def _try_parse_str(self):
0e8e3169
PP
764 begin_text_loc = self._text_loc
765
71aaa3f7
PP
766 # Match prefix
767 m = self._try_parse_pat(self._str_prefix_pat)
768
769 if m is None:
770 # No match
771 return
772
773 # Get encoding
774 encoding = "utf8"
775
776 if m.group("len") is not None:
777 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
778
779 # Actual string
780 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
781
782 # Expect end of string
783 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
784
785 # Replace escape sequences
786 val = m.group(0)
787
788 for ec in '0abefnrtv"\\':
789 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
790
791 # Encode
792 data = val.encode(encoding)
793
794 # Return item
0e8e3169 795 return _Str(data, begin_text_loc)
71aaa3f7 796
320644e2
PP
797 # Common right parenthesis pattern
798 _right_paren_pat = re.compile(r"\)")
799
71aaa3f7 800 # Patterns for _try_parse_group()
320644e2 801 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
802
803 # Tries to parse a group, returning a group item on success.
804 def _try_parse_group(self):
0e8e3169
PP
805 begin_text_loc = self._text_loc
806
71aaa3f7 807 # Match prefix
261c5ecf
PP
808 m_open = self._try_parse_pat(self._group_prefix_pat)
809
810 if m_open is None:
71aaa3f7
PP
811 # No match
812 return
813
814 # Parse items
815 items = self._parse_items()
816
817 # Expect end of group
818 self._skip_ws_and_comments()
261c5ecf
PP
819
820 if m_open.group(0) == "(":
320644e2 821 pat = self._right_paren_pat
261c5ecf
PP
822 exp = ")"
823 else:
824 pat = self._block_end_pat
825 exp = "!end"
826
827 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
828
829 # Return item
0e8e3169 830 return _Group(items, begin_text_loc)
71aaa3f7
PP
831
832 # Returns a stripped expression string and an AST expression node
833 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 834 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
835 # Create an expression node from the expression string
836 expr_str = expr_str.strip().replace("\n", " ")
837
838 try:
839 expr = ast.parse(expr_str, mode="eval")
840 except SyntaxError:
841 _raise_error(
842 "Invalid expression `{}`: invalid syntax".format(expr_str),
843 text_loc,
844 )
845
846 return expr_str, expr
847
269f6eb3 848 # Patterns for _try_parse_num_and_attr()
05f81895 849 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 850 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 851 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 852
05f81895
PP
853 # Tries to parse a value and attribute (fixed length in bits or
854 # `leb128`), returning a value item on success.
269f6eb3 855 def _try_parse_num_and_attr(self):
71aaa3f7
PP
856 begin_text_loc = self._text_loc
857
858 # Match
859 m_expr = self._try_parse_pat(self._val_expr_pat)
860
861 if m_expr is None:
862 # No match
863 return
864
71aaa3f7
PP
865 # Create an expression node from the expression string
866 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
867
05f81895 868 # Length?
269f6eb3 869 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
870
871 if m_attr is None:
872 # LEB128?
873 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
874
875 if m_attr is None:
876 # At this point it's invalid
877 self._raise_error(
878 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
879 )
880
881 # Return LEB128 integer item
882 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
883 return cls(expr_str, expr, begin_text_loc)
884 else:
269f6eb3
PP
885 # Return fixed-length number item
886 return _FlNum(
05f81895
PP
887 expr_str,
888 expr,
889 int(m_attr.group(0)),
890 begin_text_loc,
891 )
71aaa3f7 892
320644e2
PP
893 # Patterns for _try_parse_var_assign()
894 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
895 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 896
2adf4336
PP
897 # Tries to parse a variable assignment, returning a variable
898 # assignment item on success.
899 def _try_parse_var_assign(self):
71aaa3f7
PP
900 begin_text_loc = self._text_loc
901
902 # Match
320644e2 903 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
904
905 if m is None:
906 # No match
907 return
908
909 # Validate name
320644e2 910 name = m.group(1)
71aaa3f7
PP
911
912 if name == _icitte_name:
0e8e3169
PP
913 _raise_error(
914 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
915 )
71aaa3f7
PP
916
917 if name in self._label_names:
0e8e3169 918 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 919
320644e2
PP
920 # Expect an expression
921 self._skip_ws()
922 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
923
924 # Create an expression node from the expression string
320644e2
PP
925 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
926
927 # Add to known variable names
928 self._var_names.add(name)
71aaa3f7
PP
929
930 # Return item
2adf4336 931 return _VarAssign(
71aaa3f7
PP
932 name,
933 expr_str,
934 expr,
0e8e3169 935 begin_text_loc,
71aaa3f7
PP
936 )
937
2adf4336 938 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
939 _bo_pat = re.compile(r"[bl]e")
940
2adf4336
PP
941 # Tries to parse a byte order name, returning a byte order setting
942 # item on success.
943 def _try_parse_set_bo(self):
0e8e3169
PP
944 begin_text_loc = self._text_loc
945
71aaa3f7
PP
946 # Match
947 m = self._try_parse_pat(self._bo_pat)
948
949 if m is None:
950 # No match
951 return
952
953 # Return corresponding item
954 if m.group(0) == "be":
2adf4336 955 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
956 else:
957 assert m.group(0) == "le"
2adf4336 958 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
959
960 # Patterns for _try_parse_val_or_bo()
320644e2
PP
961 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
962 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 963
2adf4336
PP
964 # Tries to parse a value, a variable assignment, or a byte order
965 # setting, returning an item on success.
966 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 967 # Match prefix
2adf4336 968 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
969 # No match
970 return
971
320644e2
PP
972 self._skip_ws()
973
2adf4336
PP
974 # Variable assignment item?
975 item = self._try_parse_var_assign()
71aaa3f7
PP
976
977 if item is None:
269f6eb3
PP
978 # Number item?
979 item = self._try_parse_num_and_attr()
71aaa3f7
PP
980
981 if item is None:
2adf4336
PP
982 # Byte order setting item?
983 item = self._try_parse_set_bo()
71aaa3f7
PP
984
985 if item is None:
986 # At this point it's invalid
2adf4336 987 self._raise_error(
269f6eb3 988 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 989 )
71aaa3f7
PP
990
991 # Expect suffix
320644e2 992 self._skip_ws()
2adf4336 993 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
994 return item
995
320644e2 996 # Common constant integer patterns
71aaa3f7 997 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
320644e2 998 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
71aaa3f7 999
2adf4336
PP
1000 # Tries to parse an offset setting value (after the initial `<`),
1001 # returning an offset item on success.
1002 def _try_parse_set_offset_val(self):
0e8e3169
PP
1003 begin_text_loc = self._text_loc
1004
71aaa3f7
PP
1005 # Match
1006 m = self._try_parse_pat(self._pos_const_int_pat)
1007
1008 if m is None:
1009 # No match
1010 return
1011
1012 # Return item
2adf4336 1013 return _SetOffset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
1014
1015 # Tries to parse a label name (after the initial `<`), returning a
1016 # label item on success.
1017 def _try_parse_label_name(self):
0e8e3169
PP
1018 begin_text_loc = self._text_loc
1019
71aaa3f7
PP
1020 # Match
1021 m = self._try_parse_pat(_py_name_pat)
1022
1023 if m is None:
1024 # No match
1025 return
1026
1027 # Validate
1028 name = m.group(0)
1029
1030 if name == _icitte_name:
0e8e3169
PP
1031 _raise_error(
1032 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1033 )
71aaa3f7
PP
1034
1035 if name in self._label_names:
0e8e3169 1036 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1037
1038 if name in self._var_names:
0e8e3169 1039 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1040
1041 # Add to known label names
1042 self._label_names.add(name)
1043
1044 # Return item
0e8e3169 1045 return _Label(name, begin_text_loc)
71aaa3f7 1046
2adf4336 1047 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1048 _label_set_offset_prefix_pat = re.compile(r"<")
1049 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1050
2adf4336
PP
1051 # Tries to parse a label or an offset setting, returning an item on
1052 # success.
1053 def _try_parse_label_or_set_offset(self):
71aaa3f7 1054 # Match prefix
2adf4336 1055 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1056 # No match
1057 return
1058
2adf4336 1059 # Offset setting item?
320644e2 1060 self._skip_ws()
2adf4336 1061 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1062
1063 if item is None:
1064 # Label item?
1065 item = self._try_parse_label_name()
1066
1067 if item is None:
1068 # At this point it's invalid
2adf4336 1069 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1070
1071 # Expect suffix
320644e2 1072 self._skip_ws()
2adf4336 1073 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1074 return item
1075
25ca454b
PP
1076 # Pattern for _parse_pad_val()
1077 _pad_val_prefix_pat = re.compile(r"~")
1078
1079 # Tries to parse a padding value, returning the padding value, or 0
1080 # if none.
1081 def _parse_pad_val(self):
1082 # Padding value?
1083 self._skip_ws()
1084 pad_val = 0
1085
1086 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1087 self._skip_ws()
1088 pad_val_text_loc = self._text_loc
1089 m = self._expect_pat(
1090 self._pos_const_int_pat,
1091 "Expecting a positive constant integer (byte value)",
1092 )
1093
1094 # Validate
1095 pad_val = int(m.group(0), 0)
1096
1097 if pad_val > 255:
1098 _raise_error(
1099 "Invalid padding byte value {}".format(pad_val),
1100 pad_val_text_loc,
1101 )
1102
1103 return pad_val
1104
676f6189 1105 # Patterns for _try_parse_align_offset()
320644e2
PP
1106 _align_offset_prefix_pat = re.compile(r"@")
1107 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1108
1109 # Tries to parse an offset alignment, returning an offset alignment
1110 # item on success.
1111 def _try_parse_align_offset(self):
1112 begin_text_loc = self._text_loc
1113
1114 # Match prefix
1115 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1116 # No match
1117 return
1118
320644e2 1119 # Expect an alignment
25ca454b 1120 self._skip_ws()
676f6189
PP
1121 align_text_loc = self._text_loc
1122 m = self._expect_pat(
1123 self._align_offset_val_pat,
1124 "Expecting an alignment (positive multiple of eight bits)",
1125 )
1126
1127 # Validate alignment
320644e2 1128 val = int(m.group(0))
676f6189
PP
1129
1130 if val <= 0 or (val % 8) != 0:
1131 _raise_error(
1132 "Invalid alignment value {} (not a positive multiple of eight)".format(
1133 val
1134 ),
1135 align_text_loc,
1136 )
1137
25ca454b
PP
1138 # Padding value
1139 pad_val = self._parse_pad_val()
676f6189 1140
25ca454b
PP
1141 # Return item
1142 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1143
25ca454b
PP
1144 # Patterns for _try_parse_fill_until()
1145 _fill_until_prefix_pat = re.compile(r"\+")
1146 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1147
25ca454b
PP
1148 # Tries to parse a filling, returning a filling item on success.
1149 def _try_parse_fill_until(self):
1150 begin_text_loc = self._text_loc
1151
1152 # Match prefix
1153 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1154 # No match
1155 return
1156
1157 # Expect expression
1158 self._skip_ws()
1159 expr_str, expr = self._expect_const_int_name_expr(True)
1160
1161 # Padding value
1162 pad_val = self._parse_pad_val()
676f6189
PP
1163
1164 # Return item
25ca454b 1165 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1166
e57a18e1 1167 # Patterns for _expect_rep_mul_expr()
320644e2
PP
1168 _inner_expr_prefix_pat = re.compile(r"\{")
1169 _inner_expr_pat = re.compile(r"[^}]+")
1170 _inner_expr_suffix_pat = re.compile(r"\}")
1171
1172 # Parses a constant integer if `accept_const_int` is `True`
1173 # (possibly negative if `allow_neg` is `True`), a name, or an
1174 # expression within `{` and `}`.
1175 def _expect_const_int_name_expr(
1176 self, accept_const_int: bool, allow_neg: bool = False
1177 ):
e57a18e1
PP
1178 expr_text_loc = self._text_loc
1179
1180 # Constant integer?
27d52a19
PP
1181 m = None
1182
320644e2
PP
1183 if accept_const_int:
1184 m = self._try_parse_pat(self._const_int_pat)
e57a18e1
PP
1185
1186 if m is None:
1187 # Name?
1188 m = self._try_parse_pat(_py_name_pat)
1189
1190 if m is None:
1191 # Expression?
320644e2
PP
1192 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1193 pos_msg = "" if allow_neg else "positive "
1194
1195 if accept_const_int:
1196 mid_msg = "a {}constant integer, a name, or `{{`".format(
1197 pos_msg
1198 )
27d52a19
PP
1199 else:
1200 mid_msg = "a name or `{`"
1201
e57a18e1 1202 # At this point it's invalid
27d52a19 1203 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1204
1205 # Expect an expression
320644e2 1206 self._skip_ws()
e57a18e1 1207 expr_text_loc = self._text_loc
320644e2 1208 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
e57a18e1
PP
1209 expr_str = m.group(0)
1210
1211 # Expect `}`
320644e2
PP
1212 self._skip_ws()
1213 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1214 else:
1215 expr_str = m.group(0)
1216 else:
320644e2
PP
1217 if m.group("neg") == "-" and not allow_neg:
1218 _raise_error("Expecting a positive constant integer", expr_text_loc)
1219
e57a18e1
PP
1220 expr_str = m.group(0)
1221
1222 return self._ast_expr_from_str(expr_str, expr_text_loc)
1223
27d52a19
PP
1224 # Parses the multiplier expression of a repetition (block or
1225 # post-item) and returns the expression string and AST node.
1226 def _expect_rep_mul_expr(self):
320644e2 1227 return self._expect_const_int_name_expr(True)
27d52a19
PP
1228
1229 # Common block end pattern
320644e2 1230 _block_end_pat = re.compile(r"!end\b")
27d52a19 1231
e57a18e1 1232 # Pattern for _try_parse_rep_block()
320644e2 1233 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1234
1235 # Tries to parse a repetition block, returning a repetition item on
1236 # success.
1237 def _try_parse_rep_block(self):
1238 begin_text_loc = self._text_loc
1239
1240 # Match prefix
1241 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1242 # No match
1243 return
1244
1245 # Expect expression
1246 self._skip_ws_and_comments()
1247 expr_str, expr = self._expect_rep_mul_expr()
1248
1249 # Parse items
1250 self._skip_ws_and_comments()
1251 items_text_loc = self._text_loc
1252 items = self._parse_items()
1253
1254 # Expect end of block
1255 self._skip_ws_and_comments()
1256 self._expect_pat(
27d52a19 1257 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1258 )
1259
1260 # Return item
1261 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1262
27d52a19 1263 # Pattern for _try_parse_cond_block()
320644e2 1264 _cond_block_prefix_pat = re.compile(r"!if\b")
27d52a19
PP
1265
1266 # Tries to parse a conditional block, returning a conditional item
1267 # on success.
1268 def _try_parse_cond_block(self):
1269 begin_text_loc = self._text_loc
1270
1271 # Match prefix
1272 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1273 # No match
1274 return
1275
1276 # Expect expression
1277 self._skip_ws_and_comments()
320644e2 1278 expr_str, expr = self._expect_const_int_name_expr(False)
27d52a19
PP
1279
1280 # Parse items
1281 self._skip_ws_and_comments()
1282 items_text_loc = self._text_loc
1283 items = self._parse_items()
1284
1285 # Expect end of block
1286 self._skip_ws_and_comments()
1287 self._expect_pat(
1288 self._block_end_pat,
1289 "Expecting an item or `!end` (end of conditional block)",
1290 )
1291
1292 # Return item
1293 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1294
320644e2
PP
1295 # Common left parenthesis pattern
1296 _left_paren_pat = re.compile(r"\(")
1297
1298 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1299 _macro_params_comma_pat = re.compile(",")
1300
1301 # Patterns for _try_parse_macro_def()
1302 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1303
1304 # Tries to parse a macro definition, adding it to `self._macro_defs`
1305 # and returning `True` on success.
1306 def _try_parse_macro_def(self):
1307 begin_text_loc = self._text_loc
1308
1309 # Match prefix
1310 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1311 # No match
1312 return False
1313
1314 # Expect a name
1315 self._skip_ws()
1316 name_text_loc = self._text_loc
1317 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1318
1319 # Validate name
1320 name = m.group(0)
1321
1322 if name in self._macro_defs:
1323 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1324
1325 # Expect `(`
1326 self._skip_ws()
1327 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1328
1329 # Try to parse comma-separated parameter names
1330 param_names = [] # type: List[str]
1331 expect_comma = False
1332
1333 while True:
1334 self._skip_ws()
1335
1336 # End?
1337 if self._try_parse_pat(self._right_paren_pat) is not None:
1338 # End
1339 break
1340
1341 # Comma?
1342 if expect_comma:
1343 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1344
1345 # Expect parameter name
1346 self._skip_ws()
1347 param_text_loc = self._text_loc
1348 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1349
1350 if m.group(0) in param_names:
1351 _raise_error(
1352 "Duplicate macro parameter named `{}`".format(m.group(0)),
1353 param_text_loc,
1354 )
1355
1356 param_names.append(m.group(0))
1357 expect_comma = True
1358
1359 # Expect items
1360 self._skip_ws_and_comments()
1361 items_text_loc = self._text_loc
1362 old_var_names = self._var_names.copy()
1363 old_label_names = self._label_names.copy()
1364 self._var_names = set() # type: Set[str]
1365 self._label_names = set() # type: Set[str]
1366 items = self._parse_items()
1367 self._var_names = old_var_names
1368 self._label_names = old_label_names
1369
1370 # Expect suffix
1371 self._expect_pat(
1372 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1373 )
1374
1375 # Register macro
1376 self._macro_defs[name] = _MacroDef(
1377 name, param_names, _Group(items, items_text_loc), begin_text_loc
1378 )
1379
1380 return True
1381
1382 # Patterns for _try_parse_macro_exp()
1383 _macro_exp_prefix_pat = re.compile(r"m\b")
1384 _macro_exp_colon_pat = re.compile(r":")
1385
1386 # Tries to parse a macro expansion, returning a macro expansion item
1387 # on success.
1388 def _try_parse_macro_exp(self):
1389 begin_text_loc = self._text_loc
1390
1391 # Match prefix
1392 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1393 # No match
1394 return
1395
1396 # Expect `:`
1397 self._skip_ws()
1398 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1399
1400 # Expect a macro name
1401 self._skip_ws()
1402 name_text_loc = self._text_loc
1403 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1404
1405 # Validate name
1406 name = m.group(0)
1407 macro_def = self._macro_defs.get(name)
1408
1409 if macro_def is None:
1410 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1411
1412 # Expect `(`
1413 self._skip_ws()
1414 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1415
1416 # Try to parse comma-separated parameter values
1417 params_text_loc = self._text_loc
1418 params = [] # type: List[_MacroExpParam]
1419 expect_comma = False
1420
1421 while True:
1422 self._skip_ws()
1423
1424 # End?
1425 if self._try_parse_pat(self._right_paren_pat) is not None:
1426 # End
1427 break
1428
1429 # Expect a Value
1430 if expect_comma:
1431 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1432
1433 self._skip_ws()
1434 param_text_loc = self._text_loc
1435 params.append(
1436 _MacroExpParam(
1437 *self._expect_const_int_name_expr(True, True), param_text_loc
1438 )
1439 )
1440 expect_comma = True
1441
1442 # Validate parameter values
1443 if len(params) != len(macro_def.param_names):
1444 sing_plur = "" if len(params) == 1 else "s"
1445 _raise_error(
1446 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1447 len(params), sing_plur, len(macro_def.param_names)
1448 ),
1449 params_text_loc,
1450 )
1451
1452 # Return item
1453 return _MacroExp(name, params, begin_text_loc)
1454
71aaa3f7
PP
1455 # Tries to parse a base item (anything except a repetition),
1456 # returning it on success.
1457 def _try_parse_base_item(self):
1458 # Byte item?
1459 item = self._try_parse_byte()
1460
1461 if item is not None:
1462 return item
1463
1464 # String item?
1465 item = self._try_parse_str()
1466
1467 if item is not None:
1468 return item
1469
2adf4336
PP
1470 # Value, variable assignment, or byte order setting item?
1471 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1472
1473 if item is not None:
1474 return item
1475
2adf4336
PP
1476 # Label or offset setting item?
1477 item = self._try_parse_label_or_set_offset()
71aaa3f7 1478
676f6189
PP
1479 if item is not None:
1480 return item
1481
1482 # Offset alignment item?
1483 item = self._try_parse_align_offset()
1484
25ca454b
PP
1485 if item is not None:
1486 return item
1487
1488 # Filling item?
1489 item = self._try_parse_fill_until()
1490
71aaa3f7
PP
1491 if item is not None:
1492 return item
1493
1494 # Group item?
1495 item = self._try_parse_group()
1496
1497 if item is not None:
1498 return item
1499
320644e2 1500 # Repetition block item?
e57a18e1 1501 item = self._try_parse_rep_block()
71aaa3f7 1502
e57a18e1
PP
1503 if item is not None:
1504 return item
1505
27d52a19
PP
1506 # Conditional block item?
1507 item = self._try_parse_cond_block()
1508
1509 if item is not None:
1510 return item
1511
320644e2
PP
1512 # Macro expansion?
1513 item = self._try_parse_macro_exp()
1514
1515 if item is not None:
1516 return item
1517
e57a18e1
PP
1518 # Pattern for _try_parse_rep_post()
1519 _rep_post_prefix_pat = re.compile(r"\*")
1520
1521 # Tries to parse a post-item repetition, returning the expression
1522 # string and AST expression node on success.
1523 def _try_parse_rep_post(self):
71aaa3f7 1524 # Match prefix
e57a18e1 1525 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1526 # No match
2adf4336 1527 return
71aaa3f7 1528
e57a18e1 1529 # Return expression string and AST expression
71aaa3f7 1530 self._skip_ws_and_comments()
e57a18e1 1531 return self._expect_rep_mul_expr()
71aaa3f7 1532
1ca7b5e1
PP
1533 # Tries to parse an item, possibly followed by a repetition,
1534 # returning `True` on success.
1535 #
1536 # Appends any parsed item to `items`.
1537 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1538 self._skip_ws_and_comments()
1539
320644e2 1540 # Base item
71aaa3f7
PP
1541 item = self._try_parse_base_item()
1542
1543 if item is None:
320644e2 1544 return
71aaa3f7
PP
1545
1546 # Parse repetition if the base item is repeatable
1547 if isinstance(item, _RepableItem):
0e8e3169
PP
1548 self._skip_ws_and_comments()
1549 rep_text_loc = self._text_loc
e57a18e1 1550 rep_ret = self._try_parse_rep_post()
71aaa3f7 1551
2adf4336 1552 if rep_ret is not None:
320644e2 1553 item = _Rep(item, *rep_ret, rep_text_loc)
71aaa3f7 1554
1ca7b5e1
PP
1555 items.append(item)
1556 return True
71aaa3f7
PP
1557
1558 # Parses and returns items, skipping whitespaces, insignificant
1559 # symbols, and comments when allowed, and stopping at the first
1560 # unknown character.
320644e2
PP
1561 #
1562 # Accepts and registers macro definitions if `accept_macro_defs`
1563 # is `True`.
1564 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1565 items = [] # type: List[_Item]
1566
1567 while self._isnt_done():
1ca7b5e1
PP
1568 # Try to append item
1569 if not self._try_append_item(items):
320644e2
PP
1570 if accept_macro_defs and self._try_parse_macro_def():
1571 continue
1572
1ca7b5e1
PP
1573 # Unknown at this point
1574 break
71aaa3f7
PP
1575
1576 return items
1577
1578 # Parses the whole Normand input, setting `self._res` to the main
1579 # group item on success.
1580 def _parse(self):
1581 if len(self._normand.strip()) == 0:
1582 # Special case to make sure there's something to consume
1583 self._res = _Group([], self._text_loc)
1584 return
1585
1586 # Parse first level items
320644e2 1587 items = self._parse_items(True)
71aaa3f7
PP
1588
1589 # Make sure there's nothing left
1590 self._skip_ws_and_comments()
1591
1592 if self._isnt_done():
1593 self._raise_error(
1594 "Unexpected character `{}`".format(self._normand[self._at])
1595 )
1596
1597 # Set main group item
1598 self._res = _Group(items, self._text_loc)
1599
1600
1601# The return type of parse().
1602class ParseResult:
1603 @classmethod
1604 def _create(
1605 cls,
1606 data: bytearray,
e57a18e1
PP
1607 variables: VariablesT,
1608 labels: LabelsT,
71aaa3f7
PP
1609 offset: int,
1610 bo: Optional[ByteOrder],
1611 ):
1612 self = cls.__new__(cls)
1613 self._init(data, variables, labels, offset, bo)
1614 return self
1615
1616 def __init__(self, *args, **kwargs): # type: ignore
1617 raise NotImplementedError
1618
1619 def _init(
1620 self,
1621 data: bytearray,
e57a18e1
PP
1622 variables: VariablesT,
1623 labels: LabelsT,
71aaa3f7
PP
1624 offset: int,
1625 bo: Optional[ByteOrder],
1626 ):
1627 self._data = data
1628 self._vars = variables
1629 self._labels = labels
1630 self._offset = offset
1631 self._bo = bo
1632
1633 # Generated data.
1634 @property
1635 def data(self):
1636 return self._data
1637
1638 # Dictionary of updated variable names to their last computed value.
1639 @property
1640 def variables(self):
1641 return self._vars
1642
1643 # Dictionary of updated main group label names to their computed
1644 # value.
1645 @property
1646 def labels(self):
1647 return self._labels
1648
1649 # Updated offset.
1650 @property
1651 def offset(self):
1652 return self._offset
1653
1654 # Updated byte order.
1655 @property
1656 def byte_order(self):
1657 return self._bo
1658
1659
1660# Raises a parse error for the item `item`, creating it using the
1661# message `msg`.
1662def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1663 _raise_error(msg, item.text_loc)
1664
1665
1666# The `ICITTE` reserved name.
1667_icitte_name = "ICITTE"
1668
1669
2adf4336
PP
1670# Base node visitor.
1671#
1672# Calls the _visit_name() method for each name node which isn't the name
1673# of a call.
1674class _NodeVisitor(ast.NodeVisitor):
1675 def __init__(self):
71aaa3f7
PP
1676 self._parent_is_call = False
1677
1678 def generic_visit(self, node: ast.AST):
1679 if type(node) is ast.Call:
1680 self._parent_is_call = True
1681 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1682 self._visit_name(node.id)
71aaa3f7
PP
1683
1684 super().generic_visit(node)
1685 self._parent_is_call = False
1686
2adf4336
PP
1687 @abc.abstractmethod
1688 def _visit_name(self, name: str):
1689 ...
1690
71aaa3f7 1691
2adf4336
PP
1692# Expression validator: validates that all the names within the
1693# expression are allowed.
1694class _ExprValidator(_NodeVisitor):
320644e2 1695 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1696 super().__init__()
320644e2
PP
1697 self._expr_str = expr_str
1698 self._text_loc = text_loc
2adf4336 1699 self._allowed_names = allowed_names
2adf4336
PP
1700
1701 def _visit_name(self, name: str):
1702 # Make sure the name refers to a known and reachable
1703 # variable/label name.
e57a18e1 1704 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1705 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1706 name, self._expr_str
2adf4336
PP
1707 )
1708
05f81895 1709 allowed_names = self._allowed_names.copy()
e57a18e1 1710 allowed_names.add(_icitte_name)
2adf4336 1711
05f81895 1712 if len(allowed_names) > 0:
2adf4336
PP
1713 allowed_names_str = ", ".join(
1714 sorted(["`{}`".format(name) for name in allowed_names])
1715 )
1716 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1717
1718 _raise_error(
1719 msg,
320644e2 1720 self._text_loc,
2adf4336
PP
1721 )
1722
1723
2adf4336
PP
1724# Generator state.
1725class _GenState:
1726 def __init__(
1b8aa84a 1727 self,
e57a18e1
PP
1728 variables: VariablesT,
1729 labels: LabelsT,
1b8aa84a
PP
1730 offset: int,
1731 bo: Optional[ByteOrder],
2adf4336
PP
1732 ):
1733 self.variables = variables.copy()
1734 self.labels = labels.copy()
1735 self.offset = offset
1736 self.bo = bo
71aaa3f7 1737
320644e2
PP
1738 def __repr__(self):
1739 return "_GenState({}, {}, {}, {})".format(
1740 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1741 )
1742
1743
1744# Fixed-length number item instance.
1745class _FlNumItemInst:
1746 def __init__(self, item: _FlNum, offset_in_data: int, state: _GenState):
1747 self._item = item
1748 self._offset_in_data = offset_in_data
1749 self._state = state
1750
1751 @property
1752 def item(self):
1753 return self._item
1754
1755 @property
1756 def offset_in_data(self):
1757 return self._offset_in_data
1758
1759 @property
1760 def state(self):
1761 return self._state
1762
71aaa3f7 1763
2adf4336 1764# Generator of data and final state from a group item.
71aaa3f7
PP
1765#
1766# Generation happens in memory at construction time. After building, use
1767# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1768# get the resulting context.
2adf4336
PP
1769#
1770# The steps of generation are:
1771#
320644e2
PP
1772# 1. Handle each item in prefix order.
1773#
1774# The handlers append bytes to `self._data` and update some current
1775# state object (`_GenState` instance).
1776#
1777# When handling a fixed-length number item, try to evaluate its
1778# expression using the current state. If this fails, then it might be
1779# because the expression refers to a "future" label: save the current
1780# offset in `self._data` (generated data) and a snapshot of the
1781# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1782# object). _gen_fl_num_item_insts() will deal with this later.
2adf4336 1783#
320644e2
PP
1784# When handling the items of a group, keep a map of immediate label
1785# names to their offset. Then, after having processed all the items,
1786# update the relevant saved state snapshots in
1787# `self._fl_num_item_insts` with those immediate label values.
1788# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1789#
320644e2
PP
1790# 2. Handle all the fixed-length number item instances of which the
1791# expression evaluation failed before.
2adf4336 1792#
320644e2
PP
1793# At this point, `self._fl_num_item_insts` contains everything that's
1794# needed to evaluate the expressions, including the values of
1795# "future" labels from the point of view of some fixed-length number
1796# item instance.
2adf4336 1797#
320644e2 1798# If an evaluation fails at this point, then it's a user error.
71aaa3f7
PP
1799class _Gen:
1800 def __init__(
1801 self,
1802 group: _Group,
320644e2 1803 macro_defs: _MacroDefsT,
e57a18e1
PP
1804 variables: VariablesT,
1805 labels: LabelsT,
71aaa3f7
PP
1806 offset: int,
1807 bo: Optional[ByteOrder],
1808 ):
320644e2
PP
1809 self._macro_defs = macro_defs
1810 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2adf4336 1811 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1812
1813 # Generated bytes.
1814 @property
1815 def data(self):
1816 return self._data
1817
1818 # Updated variables.
1819 @property
1820 def variables(self):
2adf4336 1821 return self._final_state.variables
71aaa3f7
PP
1822
1823 # Updated main group labels.
1824 @property
1825 def labels(self):
2adf4336 1826 return self._final_state.labels
71aaa3f7
PP
1827
1828 # Updated offset.
1829 @property
1830 def offset(self):
2adf4336 1831 return self._final_state.offset
71aaa3f7
PP
1832
1833 # Updated byte order.
1834 @property
1835 def bo(self):
2adf4336
PP
1836 return self._final_state.bo
1837
320644e2
PP
1838 # Evaluates the expression `expr` of which the original string is
1839 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
1840 # generation state `state`.
1841 #
269f6eb3
PP
1842 # If `allow_float` is `True`, then the type of the result may be
1843 # `float` too.
2adf4336 1844 @staticmethod
320644e2
PP
1845 def _eval_expr(
1846 expr_str: str,
1847 expr: ast.Expression,
1848 text_loc: TextLocation,
269f6eb3 1849 state: _GenState,
269f6eb3
PP
1850 allow_float: bool = False,
1851 ):
e57a18e1
PP
1852 syms = {} # type: VariablesT
1853 syms.update(state.labels)
71aaa3f7 1854
e57a18e1
PP
1855 # Set the `ICITTE` name to the current offset
1856 syms[_icitte_name] = state.offset
71aaa3f7
PP
1857
1858 # Add the current variables
2adf4336 1859 syms.update(state.variables)
71aaa3f7
PP
1860
1861 # Validate the node and its children
320644e2 1862 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
1863
1864 # Compile and evaluate expression node
1865 try:
320644e2 1866 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 1867 except Exception as exc:
320644e2
PP
1868 _raise_error(
1869 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
1870 text_loc,
71aaa3f7
PP
1871 )
1872
27d52a19
PP
1873 # Convert `bool` result type to `int` to normalize
1874 if type(val) is bool:
1875 val = int(val)
1876
269f6eb3
PP
1877 # Validate result type
1878 expected_types = {int} # type: Set[type]
1879 type_msg = "`int`"
1880
1881 if allow_float:
1882 expected_types.add(float)
1883 type_msg += " or `float`"
1884
1885 if type(val) not in expected_types:
320644e2 1886 _raise_error(
269f6eb3 1887 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 1888 expr_str, type_msg, type(val).__name__
71aaa3f7 1889 ),
320644e2 1890 text_loc,
71aaa3f7
PP
1891 )
1892
1893 return val
1894
320644e2
PP
1895 # Evaluates the expression of `item` considering the current
1896 # generation state `state`.
1897 #
1898 # If `allow_float` is `True`, then the type of the result may be
1899 # `float` too.
1900 @staticmethod
1901 def _eval_item_expr(
25ca454b 1902 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
320644e2
PP
1903 state: _GenState,
1904 allow_float: bool = False,
1905 ):
1906 return _Gen._eval_expr(
1907 item.expr_str, item.expr, item.text_loc, state, allow_float
1908 )
1909
1910 # Handles the byte item `item`.
1911 def _handle_byte_item(self, item: _Byte, state: _GenState):
1912 self._data.append(item.val)
1913 state.offset += item.size
1914
1915 # Handles the string item `item`.
1916 def _handle_str_item(self, item: _Str, state: _GenState):
1917 self._data += item.data
1918 state.offset += item.size
1919
1920 # Handles the byte order setting item `item`.
1921 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
1922 # Update current byte order
1923 state.bo = item.bo
1924
1925 # Handles the variable assignment item `item`.
1926 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
1927 # Update variable
1928 state.variables[item.name] = self._eval_item_expr(item, state, True)
1929
1930 # Handles the fixed-length number item `item`.
1931 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
1932 # Validate current byte order
1933 if state.bo is None and item.len > 8:
1934 _raise_error_for_item(
1935 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1936 item.expr_str
1937 ),
1938 item,
1939 )
1940
1941 # Try an immediate evaluation. If it fails, then keep everything
1942 # needed to (try to) generate the bytes of this item later.
1943 try:
1944 data = self._gen_fl_num_item_inst_data(item, state)
1945 except Exception:
1946 self._fl_num_item_insts.append(
1947 _FlNumItemInst(item, len(self._data), copy.deepcopy(state))
1948 )
1949
1950 # Reserve space in `self._data` for this instance
1951 data = bytes([0] * (item.len // 8))
1952
1953 # Append bytes
1954 self._data += data
1955
1956 # Update offset
1957 state.offset += len(data)
1958
05f81895
PP
1959 # Returns the size, in bytes, required to encode the value `val`
1960 # with LEB128 (signed version if `is_signed` is `True`).
1961 @staticmethod
1962 def _leb128_size_for_val(val: int, is_signed: bool):
1963 if val < 0:
1964 # Equivalent upper bound.
1965 #
1966 # For example, if `val` is -128, then the full integer for
1967 # this number of bits would be [-128, 127].
1968 val = -val - 1
1969
1970 # Number of bits (add one for the sign if needed)
1971 bits = val.bit_length() + int(is_signed)
1972
1973 if bits == 0:
1974 bits = 1
1975
1976 # Seven bits per byte
1977 return math.ceil(bits / 7)
1978
320644e2
PP
1979 # Handles the LEB128 integer item `item`.
1980 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
1981 # Compute value
1982 val = self._eval_item_expr(item, state, False)
676f6189 1983
320644e2
PP
1984 # Size in bytes
1985 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 1986
320644e2
PP
1987 # For each byte
1988 for _ in range(size):
1989 # Seven LSBs, MSB of the byte set (continue)
1990 self._data.append((val & 0x7F) | 0x80)
1991 val >>= 7
2adf4336 1992
320644e2
PP
1993 # Clear MSB of last byte (stop)
1994 self._data[-1] &= ~0x80
2adf4336 1995
320644e2
PP
1996 # Update offset
1997 state.offset += size
27d52a19 1998
320644e2
PP
1999 # Handles the group item `item`, removing the immediate labels from
2000 # `state` at the end if `remove_immediate_labels` is `True`.
2001 def _handle_group_item(
2002 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2003 ):
2004 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2005 immediate_labels = {} # type: LabelsT
27d52a19 2006
320644e2
PP
2007 # Handle each item
2008 for subitem in item.items:
2009 if type(subitem) is _Label:
2010 # Add to local immediate labels
2011 immediate_labels[subitem.name] = state.offset
2adf4336 2012
320644e2 2013 self._handle_item(subitem, state)
2adf4336 2014
320644e2
PP
2015 # Remove immediate labels from current state if needed
2016 if remove_immediate_labels:
2017 for name in immediate_labels:
2018 del state.labels[name]
2adf4336 2019
320644e2
PP
2020 # Add all immediate labels to all state snapshots since
2021 # `first_fl_num_item_inst_index`.
2022 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2023 inst.state.labels.update(immediate_labels)
2adf4336 2024
320644e2
PP
2025 # Handles the repetition item `item`.
2026 def _handle_rep_item(self, item: _Rep, state: _GenState):
2027 # Compute the repetition count
2028 mul = _Gen._eval_item_expr(item, state)
05f81895 2029
320644e2
PP
2030 # Validate result
2031 if mul < 0:
2032 _raise_error_for_item(
2033 "Invalid expression `{}`: unexpected negative result {:,}".format(
2034 item.expr_str, mul
2035 ),
2036 item,
2037 )
2adf4336 2038
320644e2
PP
2039 # Generate item data `mul` times
2040 for _ in range(mul):
2041 self._handle_item(item.item, state)
2adf4336 2042
320644e2
PP
2043 # Handles the conditional item `item`.
2044 def _handle_cond_item(self, item: _Rep, state: _GenState):
2045 # Compute the conditional value
2046 val = _Gen._eval_item_expr(item, state)
2adf4336 2047
320644e2
PP
2048 # Generate item data if needed
2049 if val:
2050 self._handle_item(item.item, state)
2adf4336 2051
320644e2
PP
2052 # Evaluates the parameters of the macro expansion item `item`
2053 # considering the initial state `init_state` and returns a new state
2054 # to handle the items of the macro.
2055 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2056 # New state
2057 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2058
320644e2
PP
2059 # Evaluate the parameter expressions
2060 macro_def = self._macro_defs[item.name]
2adf4336 2061
320644e2
PP
2062 for param_name, param in zip(macro_def.param_names, item.params):
2063 exp_state.variables[param_name] = _Gen._eval_expr(
2064 param.expr_str, param.expr, param.text_loc, init_state, True
2065 )
2adf4336 2066
320644e2 2067 return exp_state
2adf4336 2068
320644e2
PP
2069 # Handles the macro expansion item `item`.
2070 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2071 # New state
2072 exp_state = self._eval_macro_exp_params(item, state)
27d52a19 2073
320644e2
PP
2074 # Process the contained group
2075 init_data_size = len(self._data)
2076 self._handle_item(self._macro_defs[item.name].group, exp_state)
27d52a19 2077
320644e2
PP
2078 # Update state offset and return
2079 state.offset += len(self._data) - init_data_size
676f6189 2080
320644e2
PP
2081 # Handles the offset setting item `item`.
2082 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2083 state.offset = item.val
2adf4336 2084
25ca454b 2085 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2086 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2087 init_offset = state.offset
2088 align_bytes = item.val // 8
2089 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2090 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2091
25ca454b
PP
2092 # Handles the filling item `item` (adds padding).
2093 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2094 # Compute the new offset
2095 new_offset = _Gen._eval_item_expr(item, state)
2096
2097 # Validate the new offset
2098 if new_offset < state.offset:
2099 _raise_error_for_item(
2100 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2101 item.expr_str, new_offset, state.offset
2102 ),
2103 item,
2104 )
2105
2106 # Fill
2107 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2108
2109 # Update offset
2110 state.offset = new_offset
2111
320644e2
PP
2112 # Handles the label item `item`.
2113 def _handle_label_item(self, item: _Label, state: _GenState):
2114 state.labels[item.name] = state.offset
2adf4336 2115
320644e2
PP
2116 # Handles the item `item`, returning the updated next repetition
2117 # instance.
2118 def _handle_item(self, item: _Item, state: _GenState):
2119 return self._item_handlers[type(item)](item, state)
71aaa3f7 2120
320644e2
PP
2121 # Generates the data for a fixed-length integer item instance having
2122 # the value `val` and returns it.
2123 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2124 # Validate range
2125 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2126 _raise_error_for_item(
320644e2
PP
2127 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2128 val, item.len, item.expr_str
71aaa3f7
PP
2129 ),
2130 item,
2131 )
2132
2133 # Encode result on 64 bits (to extend the sign bit whatever the
2134 # value of `item.len`).
71aaa3f7
PP
2135 data = struct.pack(
2136 "{}{}".format(
2adf4336 2137 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2138 "Q" if val >= 0 else "q",
2139 ),
2140 val,
2141 )
2142
2143 # Keep only the requested length
2144 len_bytes = item.len // 8
2145
2adf4336 2146 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2147 # Big endian: keep last bytes
2148 data = data[-len_bytes:]
2149 else:
2150 # Little endian: keep first bytes
2adf4336 2151 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2152 data = data[:len_bytes]
2153
320644e2
PP
2154 # Return data
2155 return data
269f6eb3 2156
320644e2
PP
2157 # Generates the data for a fixed-length floating point number item
2158 # instance having the value `val` and returns it.
2159 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2160 # Validate length
2161 if item.len not in (32, 64):
2162 _raise_error_for_item(
2163 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2164 item.len, val
2165 ),
2166 item,
2167 )
2168
320644e2
PP
2169 # Encode and return result
2170 return struct.pack(
269f6eb3
PP
2171 "{}{}".format(
2172 ">" if state.bo in (None, ByteOrder.BE) else "<",
2173 "f" if item.len == 32 else "d",
2174 ),
2175 val,
2176 )
2177
320644e2
PP
2178 # Generates the data for a fixed-length number item instance and
2179 # returns it.
2180 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2181 # Compute value
e57a18e1 2182 val = self._eval_item_expr(item, state, True)
269f6eb3 2183
269f6eb3
PP
2184 # Handle depending on type
2185 if type(val) is int:
320644e2 2186 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2187 else:
2188 assert type(val) is float
320644e2 2189 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2190
320644e2
PP
2191 # Generates the data for all the fixed-length number item instances
2192 # and writes it at the correct offset within `self._data`.
2193 def _gen_fl_num_item_insts(self):
2194 for inst in self._fl_num_item_insts:
2195 # Generate bytes
2196 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
05f81895 2197
320644e2
PP
2198 # Insert bytes into `self._data`
2199 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2200
2201 # Generates the data (`self._data`) and final state
2202 # (`self._final_state`) from `group` and the initial state `state`.
2203 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2204 # Initial state
2205 self._data = bytearray()
71aaa3f7
PP
2206
2207 # Item handlers
2208 self._item_handlers = {
676f6189 2209 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2210 _Byte: self._handle_byte_item,
27d52a19 2211 _Cond: self._handle_cond_item,
25ca454b 2212 _FillUntil: self._handle_fill_until_item,
269f6eb3 2213 _FlNum: self._handle_fl_num_item,
71aaa3f7 2214 _Group: self._handle_group_item,
2adf4336 2215 _Label: self._handle_label_item,
320644e2 2216 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2217 _Rep: self._handle_rep_item,
2adf4336
PP
2218 _SetBo: self._handle_set_bo_item,
2219 _SetOffset: self._handle_set_offset_item,
05f81895 2220 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2221 _Str: self._handle_str_item,
05f81895 2222 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2223 _VarAssign: self._handle_var_assign_item,
320644e2 2224 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2225
2226 # Handle the group item, _not_ removing the immediate labels
2227 # because the `labels` property offers them.
320644e2 2228 self._handle_group_item(group, state, False)
2adf4336
PP
2229
2230 # This is actually the final state
2231 self._final_state = state
71aaa3f7 2232
320644e2
PP
2233 # Generate all the fixed-length number bytes now that we know
2234 # their full state
2235 self._gen_fl_num_item_insts()
2236
71aaa3f7
PP
2237
2238# Returns a `ParseResult` instance containing the bytes encoded by the
2239# input string `normand`.
2240#
2241# `init_variables` is a dictionary of initial variable names (valid
2242# Python names) to integral values. A variable name must not be the
2243# reserved name `ICITTE`.
2244#
2245# `init_labels` is a dictionary of initial label names (valid Python
2246# names) to integral values. A label name must not be the reserved name
2247# `ICITTE`.
2248#
2249# `init_offset` is the initial offset.
2250#
2251# `init_byte_order` is the initial byte order.
2252#
2253# Raises `ParseError` on any parsing error.
2254def parse(
2255 normand: str,
e57a18e1
PP
2256 init_variables: Optional[VariablesT] = None,
2257 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2258 init_offset: int = 0,
2259 init_byte_order: Optional[ByteOrder] = None,
2260):
2261 if init_variables is None:
2262 init_variables = {}
2263
2264 if init_labels is None:
2265 init_labels = {}
2266
320644e2 2267 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2268 gen = _Gen(
320644e2
PP
2269 parser.res,
2270 parser.macro_defs,
71aaa3f7
PP
2271 init_variables,
2272 init_labels,
2273 init_offset,
2274 init_byte_order,
2275 )
2276 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2277 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2278 )
2279
2280
2281# Parses the command-line arguments.
2282def _parse_cli_args():
2283 import argparse
2284
2285 # Build parser
2286 ap = argparse.ArgumentParser()
2287 ap.add_argument(
2288 "--offset",
2289 metavar="OFFSET",
2290 action="store",
2291 type=int,
2292 default=0,
2293 help="initial offset (positive)",
2294 )
2295 ap.add_argument(
2296 "-b",
2297 "--byte-order",
2298 metavar="BO",
2299 choices=["be", "le"],
2300 type=str,
2301 help="initial byte order (`be` or `le`)",
2302 )
2303 ap.add_argument(
2304 "--var",
2305 metavar="NAME=VAL",
2306 action="append",
2307 help="add an initial variable (may be repeated)",
2308 )
2309 ap.add_argument(
2310 "-l",
2311 "--label",
2312 metavar="NAME=VAL",
2313 action="append",
2314 help="add an initial label (may be repeated)",
2315 )
2316 ap.add_argument(
2317 "--version", action="version", version="Normand {}".format(__version__)
2318 )
2319 ap.add_argument(
2320 "path",
2321 metavar="PATH",
2322 action="store",
2323 nargs="?",
2324 help="input path (none means standard input)",
2325 )
2326
2327 # Parse
2328 return ap.parse_args()
2329
2330
2331# Raises a command-line error with the message `msg`.
2332def _raise_cli_error(msg: str) -> NoReturn:
2333 raise RuntimeError("Command-line error: {}".format(msg))
2334
2335
2336# Returns a dictionary of string to integers from the list of strings
2337# `args` containing `NAME=VAL` entries.
2338def _dict_from_arg(args: Optional[List[str]]):
e57a18e1 2339 d = {} # type: LabelsT
71aaa3f7
PP
2340
2341 if args is None:
2342 return d
2343
2344 for arg in args:
2345 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2346
2347 if m is None:
2348 _raise_cli_error("Invalid assignment {}".format(arg))
2349
2e1c1acd
PP
2350 d[m.group(1)] = int(m.group(2))
2351
71aaa3f7
PP
2352 return d
2353
2354
2355# CLI entry point without exception handling.
2356def _try_run_cli():
2357 import os.path
2358
2359 # Parse arguments
2360 args = _parse_cli_args()
2361
2362 # Read input
2363 if args.path is None:
2364 normand = sys.stdin.read()
2365 else:
2366 with open(args.path) as f:
2367 normand = f.read()
2368
2369 # Variables and labels
e57a18e1 2370 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2371 labels = _dict_from_arg(args.label)
2372
2373 # Validate offset
2374 if args.offset < 0:
2375 _raise_cli_error("Invalid negative offset {}")
2376
2377 # Validate and set byte order
2378 bo = None # type: Optional[ByteOrder]
2379
2380 if args.byte_order is not None:
2381 if args.byte_order == "be":
2382 bo = ByteOrder.BE
2383 else:
2384 assert args.byte_order == "le"
2385 bo = ByteOrder.LE
2386
2387 # Parse
2388 try:
2389 res = parse(normand, variables, labels, args.offset, bo)
2390 except ParseError as exc:
2391 prefix = ""
2392
2393 if args.path is not None:
2394 prefix = "{}:".format(os.path.abspath(args.path))
2395
2396 _fail(
2397 "{}{}:{} - {}".format(
2398 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2399 )
2400 )
2401
2402 # Print
2403 sys.stdout.buffer.write(res.data)
2404
2405
2406# Prints the exception message `msg` and exits with status 1.
2407def _fail(msg: str) -> NoReturn:
2408 if not msg.endswith("."):
2409 msg += "."
2410
2411 print(msg, file=sys.stderr)
2412 sys.exit(1)
2413
2414
2415# CLI entry point.
2416def _run_cli():
2417 try:
2418 _try_run_cli()
2419 except Exception as exc:
2420 _fail(str(exc))
2421
2422
2423if __name__ == "__main__":
2424 _run_cli()
This page took 0.122932 seconds and 4 git commands to generate.