Accept many more prefixes and suffixes for a constant integer
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
fc21bb27 33__version__ = "0.13.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
41 "ParseResult",
e57a18e1 42 "TextLocation",
e57a18e1 43 "VariablesT",
71aaa3f7
PP
44]
45
46import re
47import abc
48import ast
49import sys
320644e2 50import copy
71aaa3f7 51import enum
05f81895 52import math
71aaa3f7 53import struct
e57a18e1
PP
54import typing
55from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
56
57
58# Text location (line and column numbers).
e57a18e1 59class TextLocation:
71aaa3f7
PP
60 @classmethod
61 def _create(cls, line_no: int, col_no: int):
62 self = cls.__new__(cls)
63 self._init(line_no, col_no)
64 return self
65
66 def __init__(*args, **kwargs): # type: ignore
67 raise NotImplementedError
68
69 def _init(self, line_no: int, col_no: int):
70 self._line_no = line_no
71 self._col_no = col_no
72
73 # Line number.
74 @property
75 def line_no(self):
76 return self._line_no
77
78 # Column number.
79 @property
80 def col_no(self):
81 return self._col_no
82
2adf4336 83 def __repr__(self):
e57a18e1 84 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 85
71aaa3f7
PP
86
87# Any item.
88class _Item:
e57a18e1 89 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
90 self._text_loc = text_loc
91
92 # Source text location.
93 @property
94 def text_loc(self):
95 return self._text_loc
96
2adf4336
PP
97
98# Scalar item.
99class _ScalarItem(_Item):
71aaa3f7
PP
100 # Returns the size, in bytes, of this item.
101 @property
102 @abc.abstractmethod
103 def size(self) -> int:
104 ...
105
106
107# A repeatable item.
2adf4336 108class _RepableItem:
71aaa3f7
PP
109 pass
110
111
112# Single byte.
2adf4336 113class _Byte(_ScalarItem, _RepableItem):
e57a18e1 114 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
115 super().__init__(text_loc)
116 self._val = val
117
118 # Byte value.
119 @property
120 def val(self):
121 return self._val
122
123 @property
124 def size(self):
125 return 1
126
127 def __repr__(self):
676f6189 128 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
129
130
131# String.
2adf4336 132class _Str(_ScalarItem, _RepableItem):
e57a18e1 133 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
134 super().__init__(text_loc)
135 self._data = data
136
137 # Encoded bytes.
138 @property
139 def data(self):
140 return self._data
141
142 @property
143 def size(self):
144 return len(self._data)
145
146 def __repr__(self):
676f6189 147 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
148
149
150# Byte order.
151@enum.unique
152class ByteOrder(enum.Enum):
153 # Big endian.
154 BE = "be"
155
156 # Little endian.
157 LE = "le"
158
159
2adf4336
PP
160# Byte order setting.
161class _SetBo(_Item):
e57a18e1 162 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 163 super().__init__(text_loc)
71aaa3f7
PP
164 self._bo = bo
165
166 @property
167 def bo(self):
168 return self._bo
169
2adf4336 170 def __repr__(self):
676f6189 171 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
172
173
174# Label.
175class _Label(_Item):
e57a18e1 176 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
177 super().__init__(text_loc)
178 self._name = name
179
180 # Label name.
181 @property
182 def name(self):
183 return self._name
184
71aaa3f7 185 def __repr__(self):
676f6189 186 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
187
188
2adf4336
PP
189# Offset setting.
190class _SetOffset(_Item):
e57a18e1 191 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
192 super().__init__(text_loc)
193 self._val = val
194
676f6189 195 # Offset value (bytes).
71aaa3f7
PP
196 @property
197 def val(self):
198 return self._val
199
71aaa3f7 200 def __repr__(self):
676f6189
PP
201 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
202
203
204# Offset alignment.
205class _AlignOffset(_Item):
e57a18e1 206 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
207 super().__init__(text_loc)
208 self._val = val
209 self._pad_val = pad_val
210
211 # Alignment value (bits).
212 @property
213 def val(self):
214 return self._val
215
216 # Padding byte value.
217 @property
218 def pad_val(self):
219 return self._pad_val
220
221 def __repr__(self):
222 return "_AlignOffset({}, {}, {})".format(
223 repr(self._val), repr(self._pad_val), repr(self._text_loc)
224 )
71aaa3f7
PP
225
226
227# Mixin of containing an AST expression and its string.
228class _ExprMixin:
229 def __init__(self, expr_str: str, expr: ast.Expression):
230 self._expr_str = expr_str
231 self._expr = expr
232
233 # Expression string.
234 @property
235 def expr_str(self):
236 return self._expr_str
237
238 # Expression node to evaluate.
239 @property
240 def expr(self):
241 return self._expr
242
243
25ca454b
PP
244# Fill until some offset.
245class _FillUntil(_Item, _ExprMixin):
246 def __init__(
247 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
248 ):
249 super().__init__(text_loc)
250 _ExprMixin.__init__(self, expr_str, expr)
251 self._pad_val = pad_val
252
253 # Padding byte value.
254 @property
255 def pad_val(self):
256 return self._pad_val
257
258 def __repr__(self):
259 return "_FillUntil({}, {}, {}, {})".format(
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._pad_val),
263 repr(self._text_loc),
264 )
265
266
2adf4336
PP
267# Variable assignment.
268class _VarAssign(_Item, _ExprMixin):
71aaa3f7 269 def __init__(
e57a18e1 270 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
271 ):
272 super().__init__(text_loc)
273 _ExprMixin.__init__(self, expr_str, expr)
274 self._name = name
275
276 # Name.
277 @property
278 def name(self):
279 return self._name
280
71aaa3f7 281 def __repr__(self):
2adf4336 282 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
283 repr(self._name),
284 repr(self._expr_str),
285 repr(self._expr),
286 repr(self._text_loc),
71aaa3f7
PP
287 )
288
289
269f6eb3
PP
290# Fixed-length number, possibly needing more than one byte.
291class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 292 def __init__(
e57a18e1 293 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
294 ):
295 super().__init__(text_loc)
296 _ExprMixin.__init__(self, expr_str, expr)
297 self._len = len
298
299 # Length (bits).
300 @property
301 def len(self):
302 return self._len
303
304 @property
305 def size(self):
306 return self._len // 8
307
308 def __repr__(self):
269f6eb3 309 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
310 repr(self._expr_str),
311 repr(self._expr),
312 repr(self._len),
313 repr(self._text_loc),
71aaa3f7
PP
314 )
315
316
05f81895
PP
317# LEB128 integer.
318class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 319 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
320 super().__init__(text_loc)
321 _ExprMixin.__init__(self, expr_str, expr)
322
323 def __repr__(self):
324 return "{}({}, {}, {})".format(
325 self.__class__.__name__,
326 repr(self._expr_str),
327 repr(self._expr),
676f6189 328 repr(self._text_loc),
05f81895
PP
329 )
330
331
332# Unsigned LEB128 integer.
333class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
334 pass
335
336
337# Signed LEB128 integer.
338class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
339 pass
340
341
71aaa3f7 342# Group of items.
2adf4336 343class _Group(_Item, _RepableItem):
e57a18e1 344 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
345 super().__init__(text_loc)
346 self._items = items
71aaa3f7
PP
347
348 # Contained items.
349 @property
350 def items(self):
351 return self._items
352
71aaa3f7 353 def __repr__(self):
676f6189 354 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
355
356
357# Repetition item.
2adf4336
PP
358class _Rep(_Item, _ExprMixin):
359 def __init__(
e57a18e1 360 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 361 ):
71aaa3f7 362 super().__init__(text_loc)
2adf4336 363 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 364 self._item = item
71aaa3f7
PP
365
366 # Item to repeat.
367 @property
368 def item(self):
369 return self._item
370
71aaa3f7 371 def __repr__(self):
2adf4336 372 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
373 repr(self._item),
374 repr(self._expr_str),
375 repr(self._expr),
376 repr(self._text_loc),
71aaa3f7
PP
377 )
378
379
27d52a19
PP
380# Conditional item.
381class _Cond(_Item, _ExprMixin):
382 def __init__(
383 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
384 ):
385 super().__init__(text_loc)
386 _ExprMixin.__init__(self, expr_str, expr)
387 self._item = item
388
389 # Conditional item.
390 @property
391 def item(self):
392 return self._item
393
394 def __repr__(self):
395 return "_Cond({}, {}, {}, {})".format(
396 repr(self._item),
397 repr(self._expr_str),
398 repr(self._expr),
399 repr(self._text_loc),
400 )
401
402
320644e2
PP
403# Macro definition item.
404class _MacroDef(_Item):
405 def __init__(
406 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
407 ):
408 super().__init__(text_loc)
409 self._name = name
410 self._param_names = param_names
411 self._group = group
412
413 # Name.
414 @property
415 def name(self):
416 return self._name
417
418 # Parameters.
419 @property
420 def param_names(self):
421 return self._param_names
422
423 # Contained items.
424 @property
425 def group(self):
426 return self._group
427
428 def __repr__(self):
429 return "_MacroDef({}, {}, {}, {})".format(
430 repr(self._name),
431 repr(self._param_names),
432 repr(self._group),
433 repr(self._text_loc),
434 )
435
436
437# Macro expansion parameter.
438class _MacroExpParam:
439 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
440 self._expr_str = expr_str
441 self._expr = expr
442 self._text_loc = text_loc
443
444 # Expression string.
445 @property
446 def expr_str(self):
447 return self._expr_str
448
449 # Expression.
450 @property
451 def expr(self):
452 return self._expr
453
454 # Source text location.
455 @property
456 def text_loc(self):
457 return self._text_loc
458
459 def __repr__(self):
460 return "_MacroExpParam({}, {}, {})".format(
461 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
462 )
463
464
465# Macro expansion item.
466class _MacroExp(_Item, _RepableItem):
467 def __init__(
468 self,
469 name: str,
470 params: List[_MacroExpParam],
471 text_loc: TextLocation,
472 ):
473 super().__init__(text_loc)
474 self._name = name
475 self._params = params
476
477 # Name.
478 @property
479 def name(self):
480 return self._name
481
482 # Parameters.
483 @property
484 def params(self):
485 return self._params
486
487 def __repr__(self):
488 return "_MacroExp({}, {}, {})".format(
489 repr(self._name),
490 repr(self._params),
491 repr(self._text_loc),
492 )
2adf4336
PP
493
494
71aaa3f7
PP
495# A parsing error containing a message and a text location.
496class ParseError(RuntimeError):
497 @classmethod
e57a18e1 498 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
499 self = cls.__new__(cls)
500 self._init(msg, text_loc)
501 return self
502
503 def __init__(self, *args, **kwargs): # type: ignore
504 raise NotImplementedError
505
e57a18e1 506 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7
PP
507 super().__init__(msg)
508 self._text_loc = text_loc
509
510 # Source text location.
511 @property
512 def text_loc(self):
513 return self._text_loc
514
515
516# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 517def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
518 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
519
520
e57a18e1
PP
521# Variables dictionary type (for type hints).
522VariablesT = Dict[str, Union[int, float]]
523
524
525# Labels dictionary type (for type hints).
526LabelsT = Dict[str, int]
71aaa3f7
PP
527
528
529# Python name pattern.
530_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
531
532
320644e2
PP
533# Macro definition dictionary.
534_MacroDefsT = Dict[str, _MacroDef]
535
536
71aaa3f7
PP
537# Normand parser.
538#
539# The constructor accepts a Normand input. After building, use the `res`
540# property to get the resulting main group.
541class _Parser:
542 # Builds a parser to parse the Normand input `normand`, parsing
543 # immediately.
e57a18e1 544 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
545 self._normand = normand
546 self._at = 0
547 self._line_no = 1
548 self._col_no = 1
549 self._label_names = set(labels.keys())
550 self._var_names = set(variables.keys())
320644e2 551 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
552 self._parse()
553
554 # Result (main group).
555 @property
556 def res(self):
557 return self._res
558
320644e2
PP
559 # Macro definitions.
560 @property
561 def macro_defs(self):
562 return self._macro_defs
563
71aaa3f7
PP
564 # Current text location.
565 @property
566 def _text_loc(self):
e57a18e1 567 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
568 self._line_no, self._col_no
569 )
570
571 # Returns `True` if this parser is done parsing.
572 def _is_done(self):
573 return self._at == len(self._normand)
574
575 # Returns `True` if this parser isn't done parsing.
576 def _isnt_done(self):
577 return not self._is_done()
578
579 # Raises a parse error, creating it using the message `msg` and the
580 # current text location.
581 def _raise_error(self, msg: str) -> NoReturn:
582 _raise_error(msg, self._text_loc)
583
584 # Tries to make the pattern `pat` match the current substring,
585 # returning the match object and updating `self._at`,
586 # `self._line_no`, and `self._col_no` on success.
587 def _try_parse_pat(self, pat: Pattern[str]):
588 m = pat.match(self._normand, self._at)
589
590 if m is None:
591 return
592
593 # Skip matched string
594 self._at += len(m.group(0))
595
596 # Update line number
597 self._line_no += m.group(0).count("\n")
598
599 # Update column number
600 for i in reversed(range(self._at)):
601 if self._normand[i] == "\n" or i == 0:
602 if i == 0:
603 self._col_no = self._at + 1
604 else:
605 self._col_no = self._at - i
606
607 break
608
609 # Return match object
610 return m
611
612 # Expects the pattern `pat` to match the current substring,
613 # returning the match object and updating `self._at`,
614 # `self._line_no`, and `self._col_no` on success, or raising a parse
615 # error with the message `error_msg` on error.
616 def _expect_pat(self, pat: Pattern[str], error_msg: str):
617 # Match
618 m = self._try_parse_pat(pat)
619
620 if m is None:
621 # No match: error
622 self._raise_error(error_msg)
623
624 # Return match object
625 return m
626
627 # Pattern for _skip_ws_and_comments()
628 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 629 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
630 )
631
632 # Skips as many whitespaces, insignificant symbol characters, and
633 # comments as possible.
634 def _skip_ws_and_comments(self):
635 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
636
320644e2
PP
637 # Pattern for _skip_ws()
638 _ws_pat = re.compile(r"\s*")
639
640 # Skips as many whitespaces as possible.
641 def _skip_ws(self):
642 self._try_parse_pat(self._ws_pat)
643
71aaa3f7
PP
644 # Pattern for _try_parse_hex_byte()
645 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
646
647 # Tries to parse a hexadecimal byte, returning a byte item on
648 # success.
649 def _try_parse_hex_byte(self):
0e8e3169
PP
650 begin_text_loc = self._text_loc
651
71aaa3f7
PP
652 # Match initial nibble
653 m_high = self._try_parse_pat(self._nibble_pat)
654
655 if m_high is None:
656 # No match
657 return
658
659 # Expect another nibble
660 self._skip_ws_and_comments()
661 m_low = self._expect_pat(
662 self._nibble_pat, "Expecting another hexadecimal nibble"
663 )
664
665 # Return item
0e8e3169 666 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
667
668 # Patterns for _try_parse_bin_byte()
669 _bin_byte_bit_pat = re.compile(r"[01]")
670 _bin_byte_prefix_pat = re.compile(r"%")
671
672 # Tries to parse a binary byte, returning a byte item on success.
673 def _try_parse_bin_byte(self):
0e8e3169
PP
674 begin_text_loc = self._text_loc
675
71aaa3f7
PP
676 # Match prefix
677 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
678 # No match
679 return
680
681 # Expect eight bits
682 bits = [] # type: List[str]
683
684 for _ in range(8):
685 self._skip_ws_and_comments()
686 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
687 bits.append(m.group(0))
688
689 # Return item
0e8e3169 690 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
691
692 # Patterns for _try_parse_dec_byte()
320644e2 693 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
694 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
695
696 # Tries to parse a decimal byte, returning a byte item on success.
697 def _try_parse_dec_byte(self):
0e8e3169
PP
698 begin_text_loc = self._text_loc
699
71aaa3f7
PP
700 # Match prefix
701 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
702 # No match
703 return
704
705 # Expect the value
320644e2 706 self._skip_ws()
71aaa3f7
PP
707 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
708
709 # Compute value
710 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
711
712 # Validate
713 if val < -128 or val > 255:
0e8e3169 714 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
715
716 # Two's complement
05f81895 717 val %= 256
71aaa3f7
PP
718
719 # Return item
0e8e3169 720 return _Byte(val, begin_text_loc)
71aaa3f7
PP
721
722 # Tries to parse a byte, returning a byte item on success.
723 def _try_parse_byte(self):
724 # Hexadecimal
725 item = self._try_parse_hex_byte()
726
727 if item is not None:
728 return item
729
730 # Binary
731 item = self._try_parse_bin_byte()
732
733 if item is not None:
734 return item
735
736 # Decimal
737 item = self._try_parse_dec_byte()
738
739 if item is not None:
740 return item
741
742 # Patterns for _try_parse_str()
743 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
744 _str_suffix_pat = re.compile(r'"')
745 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
746
747 # Strings corresponding to escape sequence characters
748 _str_escape_seq_strs = {
749 "0": "\0",
750 "a": "\a",
751 "b": "\b",
752 "e": "\x1b",
753 "f": "\f",
754 "n": "\n",
755 "r": "\r",
756 "t": "\t",
757 "v": "\v",
758 "\\": "\\",
759 '"': '"',
760 }
761
762 # Tries to parse a string, returning a string item on success.
763 def _try_parse_str(self):
0e8e3169
PP
764 begin_text_loc = self._text_loc
765
71aaa3f7
PP
766 # Match prefix
767 m = self._try_parse_pat(self._str_prefix_pat)
768
769 if m is None:
770 # No match
771 return
772
773 # Get encoding
774 encoding = "utf8"
775
776 if m.group("len") is not None:
777 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
778
779 # Actual string
780 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
781
782 # Expect end of string
783 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
784
785 # Replace escape sequences
786 val = m.group(0)
787
788 for ec in '0abefnrtv"\\':
789 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
790
791 # Encode
792 data = val.encode(encoding)
793
794 # Return item
0e8e3169 795 return _Str(data, begin_text_loc)
71aaa3f7 796
320644e2
PP
797 # Common right parenthesis pattern
798 _right_paren_pat = re.compile(r"\)")
799
71aaa3f7 800 # Patterns for _try_parse_group()
320644e2 801 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
802
803 # Tries to parse a group, returning a group item on success.
804 def _try_parse_group(self):
0e8e3169
PP
805 begin_text_loc = self._text_loc
806
71aaa3f7 807 # Match prefix
261c5ecf
PP
808 m_open = self._try_parse_pat(self._group_prefix_pat)
809
810 if m_open is None:
71aaa3f7
PP
811 # No match
812 return
813
814 # Parse items
815 items = self._parse_items()
816
817 # Expect end of group
818 self._skip_ws_and_comments()
261c5ecf
PP
819
820 if m_open.group(0) == "(":
320644e2 821 pat = self._right_paren_pat
261c5ecf
PP
822 exp = ")"
823 else:
824 pat = self._block_end_pat
825 exp = "!end"
826
827 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
828
829 # Return item
0e8e3169 830 return _Group(items, begin_text_loc)
71aaa3f7
PP
831
832 # Returns a stripped expression string and an AST expression node
833 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 834 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
835 # Create an expression node from the expression string
836 expr_str = expr_str.strip().replace("\n", " ")
837
838 try:
839 expr = ast.parse(expr_str, mode="eval")
840 except SyntaxError:
841 _raise_error(
842 "Invalid expression `{}`: invalid syntax".format(expr_str),
843 text_loc,
844 )
845
846 return expr_str, expr
847
269f6eb3 848 # Patterns for _try_parse_num_and_attr()
05f81895 849 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 850 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 851 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 852
05f81895
PP
853 # Tries to parse a value and attribute (fixed length in bits or
854 # `leb128`), returning a value item on success.
269f6eb3 855 def _try_parse_num_and_attr(self):
71aaa3f7
PP
856 begin_text_loc = self._text_loc
857
858 # Match
859 m_expr = self._try_parse_pat(self._val_expr_pat)
860
861 if m_expr is None:
862 # No match
863 return
864
71aaa3f7
PP
865 # Create an expression node from the expression string
866 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
867
05f81895 868 # Length?
269f6eb3 869 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
870
871 if m_attr is None:
872 # LEB128?
873 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
874
875 if m_attr is None:
876 # At this point it's invalid
877 self._raise_error(
878 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
879 )
880
881 # Return LEB128 integer item
882 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
883 return cls(expr_str, expr, begin_text_loc)
884 else:
269f6eb3
PP
885 # Return fixed-length number item
886 return _FlNum(
05f81895
PP
887 expr_str,
888 expr,
889 int(m_attr.group(0)),
890 begin_text_loc,
891 )
71aaa3f7 892
320644e2
PP
893 # Patterns for _try_parse_var_assign()
894 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
895 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 896
2adf4336
PP
897 # Tries to parse a variable assignment, returning a variable
898 # assignment item on success.
899 def _try_parse_var_assign(self):
71aaa3f7
PP
900 begin_text_loc = self._text_loc
901
902 # Match
320644e2 903 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
904
905 if m is None:
906 # No match
907 return
908
909 # Validate name
320644e2 910 name = m.group(1)
71aaa3f7
PP
911
912 if name == _icitte_name:
0e8e3169
PP
913 _raise_error(
914 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
915 )
71aaa3f7
PP
916
917 if name in self._label_names:
0e8e3169 918 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 919
320644e2
PP
920 # Expect an expression
921 self._skip_ws()
922 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
923
924 # Create an expression node from the expression string
320644e2
PP
925 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
926
927 # Add to known variable names
928 self._var_names.add(name)
71aaa3f7
PP
929
930 # Return item
2adf4336 931 return _VarAssign(
71aaa3f7
PP
932 name,
933 expr_str,
934 expr,
0e8e3169 935 begin_text_loc,
71aaa3f7
PP
936 )
937
2adf4336 938 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
939 _bo_pat = re.compile(r"[bl]e")
940
2adf4336
PP
941 # Tries to parse a byte order name, returning a byte order setting
942 # item on success.
943 def _try_parse_set_bo(self):
0e8e3169
PP
944 begin_text_loc = self._text_loc
945
71aaa3f7
PP
946 # Match
947 m = self._try_parse_pat(self._bo_pat)
948
949 if m is None:
950 # No match
951 return
952
953 # Return corresponding item
954 if m.group(0) == "be":
2adf4336 955 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
956 else:
957 assert m.group(0) == "le"
2adf4336 958 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
959
960 # Patterns for _try_parse_val_or_bo()
320644e2
PP
961 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
962 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 963
2adf4336
PP
964 # Tries to parse a value, a variable assignment, or a byte order
965 # setting, returning an item on success.
966 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 967 # Match prefix
2adf4336 968 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
969 # No match
970 return
971
320644e2
PP
972 self._skip_ws()
973
2adf4336
PP
974 # Variable assignment item?
975 item = self._try_parse_var_assign()
71aaa3f7
PP
976
977 if item is None:
269f6eb3
PP
978 # Number item?
979 item = self._try_parse_num_and_attr()
71aaa3f7
PP
980
981 if item is None:
2adf4336
PP
982 # Byte order setting item?
983 item = self._try_parse_set_bo()
71aaa3f7
PP
984
985 if item is None:
986 # At this point it's invalid
2adf4336 987 self._raise_error(
269f6eb3 988 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 989 )
71aaa3f7
PP
990
991 # Expect suffix
320644e2 992 self._skip_ws()
2adf4336 993 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
994 return item
995
fc21bb27
PP
996 # Returns a normalized version (so as to be parseable by int()) of
997 # the constant integer string `s`, possibly negative, dealing with
998 # any radix suffix.
999 @staticmethod
1000 def _norm_const_int(s: str):
1001 neg = ""
1002 pos = s
1003
1004 if s.startswith("-"):
1005 neg = "-"
1006 pos = s[1:]
1007
1008 for r in "xXoObB":
1009 if pos.startswith("0" + r):
1010 # Already correct
1011 return s
1012
1013 # Try suffix
1014 asm_suf_base = {
1015 "h": "x",
1016 "H": "x",
1017 "q": "o",
1018 "Q": "o",
1019 "o": "o",
1020 "O": "o",
1021 "b": "b",
1022 "B": "B",
1023 }
1024
1025 for suf in asm_suf_base:
1026 if pos[-1] == suf:
1027 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
1028
1029 return s
1030
320644e2 1031 # Common constant integer patterns
fc21bb27
PP
1032 _pos_const_int_pat = re.compile(
1033 r"0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+"
1034 )
320644e2 1035 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
71aaa3f7 1036
2adf4336
PP
1037 # Tries to parse an offset setting value (after the initial `<`),
1038 # returning an offset item on success.
1039 def _try_parse_set_offset_val(self):
0e8e3169
PP
1040 begin_text_loc = self._text_loc
1041
71aaa3f7
PP
1042 # Match
1043 m = self._try_parse_pat(self._pos_const_int_pat)
1044
1045 if m is None:
1046 # No match
1047 return
1048
1049 # Return item
fc21bb27 1050 return _SetOffset(int(self._norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1051
1052 # Tries to parse a label name (after the initial `<`), returning a
1053 # label item on success.
1054 def _try_parse_label_name(self):
0e8e3169
PP
1055 begin_text_loc = self._text_loc
1056
71aaa3f7
PP
1057 # Match
1058 m = self._try_parse_pat(_py_name_pat)
1059
1060 if m is None:
1061 # No match
1062 return
1063
1064 # Validate
1065 name = m.group(0)
1066
1067 if name == _icitte_name:
0e8e3169
PP
1068 _raise_error(
1069 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1070 )
71aaa3f7
PP
1071
1072 if name in self._label_names:
0e8e3169 1073 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1074
1075 if name in self._var_names:
0e8e3169 1076 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1077
1078 # Add to known label names
1079 self._label_names.add(name)
1080
1081 # Return item
0e8e3169 1082 return _Label(name, begin_text_loc)
71aaa3f7 1083
2adf4336 1084 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1085 _label_set_offset_prefix_pat = re.compile(r"<")
1086 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1087
2adf4336
PP
1088 # Tries to parse a label or an offset setting, returning an item on
1089 # success.
1090 def _try_parse_label_or_set_offset(self):
71aaa3f7 1091 # Match prefix
2adf4336 1092 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1093 # No match
1094 return
1095
2adf4336 1096 # Offset setting item?
320644e2 1097 self._skip_ws()
2adf4336 1098 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1099
1100 if item is None:
1101 # Label item?
1102 item = self._try_parse_label_name()
1103
1104 if item is None:
1105 # At this point it's invalid
2adf4336 1106 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1107
1108 # Expect suffix
320644e2 1109 self._skip_ws()
2adf4336 1110 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1111 return item
1112
25ca454b
PP
1113 # Pattern for _parse_pad_val()
1114 _pad_val_prefix_pat = re.compile(r"~")
1115
1116 # Tries to parse a padding value, returning the padding value, or 0
1117 # if none.
1118 def _parse_pad_val(self):
1119 # Padding value?
1120 self._skip_ws()
1121 pad_val = 0
1122
1123 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1124 self._skip_ws()
1125 pad_val_text_loc = self._text_loc
1126 m = self._expect_pat(
1127 self._pos_const_int_pat,
1128 "Expecting a positive constant integer (byte value)",
1129 )
1130
1131 # Validate
fc21bb27 1132 pad_val = int(self._norm_const_int(m.group(0)), 0)
25ca454b
PP
1133
1134 if pad_val > 255:
1135 _raise_error(
1136 "Invalid padding byte value {}".format(pad_val),
1137 pad_val_text_loc,
1138 )
1139
1140 return pad_val
1141
676f6189 1142 # Patterns for _try_parse_align_offset()
320644e2
PP
1143 _align_offset_prefix_pat = re.compile(r"@")
1144 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1145
1146 # Tries to parse an offset alignment, returning an offset alignment
1147 # item on success.
1148 def _try_parse_align_offset(self):
1149 begin_text_loc = self._text_loc
1150
1151 # Match prefix
1152 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1153 # No match
1154 return
1155
320644e2 1156 # Expect an alignment
25ca454b 1157 self._skip_ws()
676f6189
PP
1158 align_text_loc = self._text_loc
1159 m = self._expect_pat(
1160 self._align_offset_val_pat,
1161 "Expecting an alignment (positive multiple of eight bits)",
1162 )
1163
1164 # Validate alignment
320644e2 1165 val = int(m.group(0))
676f6189
PP
1166
1167 if val <= 0 or (val % 8) != 0:
1168 _raise_error(
1169 "Invalid alignment value {} (not a positive multiple of eight)".format(
1170 val
1171 ),
1172 align_text_loc,
1173 )
1174
25ca454b
PP
1175 # Padding value
1176 pad_val = self._parse_pad_val()
676f6189 1177
25ca454b
PP
1178 # Return item
1179 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1180
25ca454b
PP
1181 # Patterns for _try_parse_fill_until()
1182 _fill_until_prefix_pat = re.compile(r"\+")
1183 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1184
25ca454b
PP
1185 # Tries to parse a filling, returning a filling item on success.
1186 def _try_parse_fill_until(self):
1187 begin_text_loc = self._text_loc
1188
1189 # Match prefix
1190 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1191 # No match
1192 return
1193
1194 # Expect expression
1195 self._skip_ws()
1196 expr_str, expr = self._expect_const_int_name_expr(True)
1197
1198 # Padding value
1199 pad_val = self._parse_pad_val()
676f6189
PP
1200
1201 # Return item
25ca454b 1202 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1203
e57a18e1 1204 # Patterns for _expect_rep_mul_expr()
320644e2
PP
1205 _inner_expr_prefix_pat = re.compile(r"\{")
1206 _inner_expr_pat = re.compile(r"[^}]+")
1207 _inner_expr_suffix_pat = re.compile(r"\}")
1208
1209 # Parses a constant integer if `accept_const_int` is `True`
1210 # (possibly negative if `allow_neg` is `True`), a name, or an
1211 # expression within `{` and `}`.
1212 def _expect_const_int_name_expr(
1213 self, accept_const_int: bool, allow_neg: bool = False
1214 ):
e57a18e1
PP
1215 expr_text_loc = self._text_loc
1216
1217 # Constant integer?
27d52a19
PP
1218 m = None
1219
320644e2
PP
1220 if accept_const_int:
1221 m = self._try_parse_pat(self._const_int_pat)
e57a18e1
PP
1222
1223 if m is None:
1224 # Name?
1225 m = self._try_parse_pat(_py_name_pat)
1226
1227 if m is None:
1228 # Expression?
320644e2
PP
1229 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1230 pos_msg = "" if allow_neg else "positive "
1231
1232 if accept_const_int:
1233 mid_msg = "a {}constant integer, a name, or `{{`".format(
1234 pos_msg
1235 )
27d52a19
PP
1236 else:
1237 mid_msg = "a name or `{`"
1238
e57a18e1 1239 # At this point it's invalid
27d52a19 1240 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1241
1242 # Expect an expression
320644e2 1243 self._skip_ws()
e57a18e1 1244 expr_text_loc = self._text_loc
320644e2 1245 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
e57a18e1
PP
1246 expr_str = m.group(0)
1247
1248 # Expect `}`
320644e2
PP
1249 self._skip_ws()
1250 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1251 else:
1252 expr_str = m.group(0)
1253 else:
320644e2
PP
1254 if m.group("neg") == "-" and not allow_neg:
1255 _raise_error("Expecting a positive constant integer", expr_text_loc)
1256
fc21bb27 1257 expr_str = self._norm_const_int(m.group(0))
e57a18e1
PP
1258
1259 return self._ast_expr_from_str(expr_str, expr_text_loc)
1260
27d52a19
PP
1261 # Parses the multiplier expression of a repetition (block or
1262 # post-item) and returns the expression string and AST node.
1263 def _expect_rep_mul_expr(self):
320644e2 1264 return self._expect_const_int_name_expr(True)
27d52a19
PP
1265
1266 # Common block end pattern
320644e2 1267 _block_end_pat = re.compile(r"!end\b")
27d52a19 1268
e57a18e1 1269 # Pattern for _try_parse_rep_block()
320644e2 1270 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1271
1272 # Tries to parse a repetition block, returning a repetition item on
1273 # success.
1274 def _try_parse_rep_block(self):
1275 begin_text_loc = self._text_loc
1276
1277 # Match prefix
1278 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1279 # No match
1280 return
1281
1282 # Expect expression
1283 self._skip_ws_and_comments()
1284 expr_str, expr = self._expect_rep_mul_expr()
1285
1286 # Parse items
1287 self._skip_ws_and_comments()
1288 items_text_loc = self._text_loc
1289 items = self._parse_items()
1290
1291 # Expect end of block
1292 self._skip_ws_and_comments()
1293 self._expect_pat(
27d52a19 1294 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1295 )
1296
1297 # Return item
1298 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1299
27d52a19 1300 # Pattern for _try_parse_cond_block()
320644e2 1301 _cond_block_prefix_pat = re.compile(r"!if\b")
27d52a19
PP
1302
1303 # Tries to parse a conditional block, returning a conditional item
1304 # on success.
1305 def _try_parse_cond_block(self):
1306 begin_text_loc = self._text_loc
1307
1308 # Match prefix
1309 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1310 # No match
1311 return
1312
1313 # Expect expression
1314 self._skip_ws_and_comments()
320644e2 1315 expr_str, expr = self._expect_const_int_name_expr(False)
27d52a19
PP
1316
1317 # Parse items
1318 self._skip_ws_and_comments()
1319 items_text_loc = self._text_loc
1320 items = self._parse_items()
1321
1322 # Expect end of block
1323 self._skip_ws_and_comments()
1324 self._expect_pat(
1325 self._block_end_pat,
1326 "Expecting an item or `!end` (end of conditional block)",
1327 )
1328
1329 # Return item
1330 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1331
320644e2
PP
1332 # Common left parenthesis pattern
1333 _left_paren_pat = re.compile(r"\(")
1334
1335 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1336 _macro_params_comma_pat = re.compile(",")
1337
1338 # Patterns for _try_parse_macro_def()
1339 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1340
1341 # Tries to parse a macro definition, adding it to `self._macro_defs`
1342 # and returning `True` on success.
1343 def _try_parse_macro_def(self):
1344 begin_text_loc = self._text_loc
1345
1346 # Match prefix
1347 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1348 # No match
1349 return False
1350
1351 # Expect a name
1352 self._skip_ws()
1353 name_text_loc = self._text_loc
1354 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1355
1356 # Validate name
1357 name = m.group(0)
1358
1359 if name in self._macro_defs:
1360 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1361
1362 # Expect `(`
1363 self._skip_ws()
1364 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1365
1366 # Try to parse comma-separated parameter names
1367 param_names = [] # type: List[str]
1368 expect_comma = False
1369
1370 while True:
1371 self._skip_ws()
1372
1373 # End?
1374 if self._try_parse_pat(self._right_paren_pat) is not None:
1375 # End
1376 break
1377
1378 # Comma?
1379 if expect_comma:
1380 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1381
1382 # Expect parameter name
1383 self._skip_ws()
1384 param_text_loc = self._text_loc
1385 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1386
1387 if m.group(0) in param_names:
1388 _raise_error(
1389 "Duplicate macro parameter named `{}`".format(m.group(0)),
1390 param_text_loc,
1391 )
1392
1393 param_names.append(m.group(0))
1394 expect_comma = True
1395
1396 # Expect items
1397 self._skip_ws_and_comments()
1398 items_text_loc = self._text_loc
1399 old_var_names = self._var_names.copy()
1400 old_label_names = self._label_names.copy()
1401 self._var_names = set() # type: Set[str]
1402 self._label_names = set() # type: Set[str]
1403 items = self._parse_items()
1404 self._var_names = old_var_names
1405 self._label_names = old_label_names
1406
1407 # Expect suffix
1408 self._expect_pat(
1409 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1410 )
1411
1412 # Register macro
1413 self._macro_defs[name] = _MacroDef(
1414 name, param_names, _Group(items, items_text_loc), begin_text_loc
1415 )
1416
1417 return True
1418
1419 # Patterns for _try_parse_macro_exp()
1420 _macro_exp_prefix_pat = re.compile(r"m\b")
1421 _macro_exp_colon_pat = re.compile(r":")
1422
1423 # Tries to parse a macro expansion, returning a macro expansion item
1424 # on success.
1425 def _try_parse_macro_exp(self):
1426 begin_text_loc = self._text_loc
1427
1428 # Match prefix
1429 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1430 # No match
1431 return
1432
1433 # Expect `:`
1434 self._skip_ws()
1435 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1436
1437 # Expect a macro name
1438 self._skip_ws()
1439 name_text_loc = self._text_loc
1440 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1441
1442 # Validate name
1443 name = m.group(0)
1444 macro_def = self._macro_defs.get(name)
1445
1446 if macro_def is None:
1447 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1448
1449 # Expect `(`
1450 self._skip_ws()
1451 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1452
1453 # Try to parse comma-separated parameter values
1454 params_text_loc = self._text_loc
1455 params = [] # type: List[_MacroExpParam]
1456 expect_comma = False
1457
1458 while True:
1459 self._skip_ws()
1460
1461 # End?
1462 if self._try_parse_pat(self._right_paren_pat) is not None:
1463 # End
1464 break
1465
1466 # Expect a Value
1467 if expect_comma:
1468 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1469
1470 self._skip_ws()
1471 param_text_loc = self._text_loc
1472 params.append(
1473 _MacroExpParam(
1474 *self._expect_const_int_name_expr(True, True), param_text_loc
1475 )
1476 )
1477 expect_comma = True
1478
1479 # Validate parameter values
1480 if len(params) != len(macro_def.param_names):
1481 sing_plur = "" if len(params) == 1 else "s"
1482 _raise_error(
1483 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1484 len(params), sing_plur, len(macro_def.param_names)
1485 ),
1486 params_text_loc,
1487 )
1488
1489 # Return item
1490 return _MacroExp(name, params, begin_text_loc)
1491
71aaa3f7
PP
1492 # Tries to parse a base item (anything except a repetition),
1493 # returning it on success.
1494 def _try_parse_base_item(self):
1495 # Byte item?
1496 item = self._try_parse_byte()
1497
1498 if item is not None:
1499 return item
1500
1501 # String item?
1502 item = self._try_parse_str()
1503
1504 if item is not None:
1505 return item
1506
2adf4336
PP
1507 # Value, variable assignment, or byte order setting item?
1508 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1509
1510 if item is not None:
1511 return item
1512
2adf4336
PP
1513 # Label or offset setting item?
1514 item = self._try_parse_label_or_set_offset()
71aaa3f7 1515
676f6189
PP
1516 if item is not None:
1517 return item
1518
1519 # Offset alignment item?
1520 item = self._try_parse_align_offset()
1521
25ca454b
PP
1522 if item is not None:
1523 return item
1524
1525 # Filling item?
1526 item = self._try_parse_fill_until()
1527
71aaa3f7
PP
1528 if item is not None:
1529 return item
1530
1531 # Group item?
1532 item = self._try_parse_group()
1533
1534 if item is not None:
1535 return item
1536
320644e2 1537 # Repetition block item?
e57a18e1 1538 item = self._try_parse_rep_block()
71aaa3f7 1539
e57a18e1
PP
1540 if item is not None:
1541 return item
1542
27d52a19
PP
1543 # Conditional block item?
1544 item = self._try_parse_cond_block()
1545
1546 if item is not None:
1547 return item
1548
320644e2
PP
1549 # Macro expansion?
1550 item = self._try_parse_macro_exp()
1551
1552 if item is not None:
1553 return item
1554
e57a18e1
PP
1555 # Pattern for _try_parse_rep_post()
1556 _rep_post_prefix_pat = re.compile(r"\*")
1557
1558 # Tries to parse a post-item repetition, returning the expression
1559 # string and AST expression node on success.
1560 def _try_parse_rep_post(self):
71aaa3f7 1561 # Match prefix
e57a18e1 1562 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1563 # No match
2adf4336 1564 return
71aaa3f7 1565
e57a18e1 1566 # Return expression string and AST expression
71aaa3f7 1567 self._skip_ws_and_comments()
e57a18e1 1568 return self._expect_rep_mul_expr()
71aaa3f7 1569
1ca7b5e1
PP
1570 # Tries to parse an item, possibly followed by a repetition,
1571 # returning `True` on success.
1572 #
1573 # Appends any parsed item to `items`.
1574 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1575 self._skip_ws_and_comments()
1576
320644e2 1577 # Base item
71aaa3f7
PP
1578 item = self._try_parse_base_item()
1579
1580 if item is None:
320644e2 1581 return
71aaa3f7
PP
1582
1583 # Parse repetition if the base item is repeatable
1584 if isinstance(item, _RepableItem):
0e8e3169
PP
1585 self._skip_ws_and_comments()
1586 rep_text_loc = self._text_loc
e57a18e1 1587 rep_ret = self._try_parse_rep_post()
71aaa3f7 1588
2adf4336 1589 if rep_ret is not None:
320644e2 1590 item = _Rep(item, *rep_ret, rep_text_loc)
71aaa3f7 1591
1ca7b5e1
PP
1592 items.append(item)
1593 return True
71aaa3f7
PP
1594
1595 # Parses and returns items, skipping whitespaces, insignificant
1596 # symbols, and comments when allowed, and stopping at the first
1597 # unknown character.
320644e2
PP
1598 #
1599 # Accepts and registers macro definitions if `accept_macro_defs`
1600 # is `True`.
1601 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1602 items = [] # type: List[_Item]
1603
1604 while self._isnt_done():
1ca7b5e1
PP
1605 # Try to append item
1606 if not self._try_append_item(items):
320644e2
PP
1607 if accept_macro_defs and self._try_parse_macro_def():
1608 continue
1609
1ca7b5e1
PP
1610 # Unknown at this point
1611 break
71aaa3f7
PP
1612
1613 return items
1614
1615 # Parses the whole Normand input, setting `self._res` to the main
1616 # group item on success.
1617 def _parse(self):
1618 if len(self._normand.strip()) == 0:
1619 # Special case to make sure there's something to consume
1620 self._res = _Group([], self._text_loc)
1621 return
1622
1623 # Parse first level items
320644e2 1624 items = self._parse_items(True)
71aaa3f7
PP
1625
1626 # Make sure there's nothing left
1627 self._skip_ws_and_comments()
1628
1629 if self._isnt_done():
1630 self._raise_error(
1631 "Unexpected character `{}`".format(self._normand[self._at])
1632 )
1633
1634 # Set main group item
1635 self._res = _Group(items, self._text_loc)
1636
1637
1638# The return type of parse().
1639class ParseResult:
1640 @classmethod
1641 def _create(
1642 cls,
1643 data: bytearray,
e57a18e1
PP
1644 variables: VariablesT,
1645 labels: LabelsT,
71aaa3f7
PP
1646 offset: int,
1647 bo: Optional[ByteOrder],
1648 ):
1649 self = cls.__new__(cls)
1650 self._init(data, variables, labels, offset, bo)
1651 return self
1652
1653 def __init__(self, *args, **kwargs): # type: ignore
1654 raise NotImplementedError
1655
1656 def _init(
1657 self,
1658 data: bytearray,
e57a18e1
PP
1659 variables: VariablesT,
1660 labels: LabelsT,
71aaa3f7
PP
1661 offset: int,
1662 bo: Optional[ByteOrder],
1663 ):
1664 self._data = data
1665 self._vars = variables
1666 self._labels = labels
1667 self._offset = offset
1668 self._bo = bo
1669
1670 # Generated data.
1671 @property
1672 def data(self):
1673 return self._data
1674
1675 # Dictionary of updated variable names to their last computed value.
1676 @property
1677 def variables(self):
1678 return self._vars
1679
1680 # Dictionary of updated main group label names to their computed
1681 # value.
1682 @property
1683 def labels(self):
1684 return self._labels
1685
1686 # Updated offset.
1687 @property
1688 def offset(self):
1689 return self._offset
1690
1691 # Updated byte order.
1692 @property
1693 def byte_order(self):
1694 return self._bo
1695
1696
1697# Raises a parse error for the item `item`, creating it using the
1698# message `msg`.
1699def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1700 _raise_error(msg, item.text_loc)
1701
1702
1703# The `ICITTE` reserved name.
1704_icitte_name = "ICITTE"
1705
1706
2adf4336
PP
1707# Base node visitor.
1708#
1709# Calls the _visit_name() method for each name node which isn't the name
1710# of a call.
1711class _NodeVisitor(ast.NodeVisitor):
1712 def __init__(self):
71aaa3f7
PP
1713 self._parent_is_call = False
1714
1715 def generic_visit(self, node: ast.AST):
1716 if type(node) is ast.Call:
1717 self._parent_is_call = True
1718 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1719 self._visit_name(node.id)
71aaa3f7
PP
1720
1721 super().generic_visit(node)
1722 self._parent_is_call = False
1723
2adf4336
PP
1724 @abc.abstractmethod
1725 def _visit_name(self, name: str):
1726 ...
1727
71aaa3f7 1728
2adf4336
PP
1729# Expression validator: validates that all the names within the
1730# expression are allowed.
1731class _ExprValidator(_NodeVisitor):
320644e2 1732 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1733 super().__init__()
320644e2
PP
1734 self._expr_str = expr_str
1735 self._text_loc = text_loc
2adf4336 1736 self._allowed_names = allowed_names
2adf4336
PP
1737
1738 def _visit_name(self, name: str):
1739 # Make sure the name refers to a known and reachable
1740 # variable/label name.
e57a18e1 1741 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1742 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1743 name, self._expr_str
2adf4336
PP
1744 )
1745
05f81895 1746 allowed_names = self._allowed_names.copy()
e57a18e1 1747 allowed_names.add(_icitte_name)
2adf4336 1748
05f81895 1749 if len(allowed_names) > 0:
2adf4336
PP
1750 allowed_names_str = ", ".join(
1751 sorted(["`{}`".format(name) for name in allowed_names])
1752 )
1753 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1754
1755 _raise_error(
1756 msg,
320644e2 1757 self._text_loc,
2adf4336
PP
1758 )
1759
1760
2adf4336
PP
1761# Generator state.
1762class _GenState:
1763 def __init__(
1b8aa84a 1764 self,
e57a18e1
PP
1765 variables: VariablesT,
1766 labels: LabelsT,
1b8aa84a
PP
1767 offset: int,
1768 bo: Optional[ByteOrder],
2adf4336
PP
1769 ):
1770 self.variables = variables.copy()
1771 self.labels = labels.copy()
1772 self.offset = offset
1773 self.bo = bo
71aaa3f7 1774
320644e2
PP
1775 def __repr__(self):
1776 return "_GenState({}, {}, {}, {})".format(
1777 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1778 )
1779
1780
1781# Fixed-length number item instance.
1782class _FlNumItemInst:
1783 def __init__(self, item: _FlNum, offset_in_data: int, state: _GenState):
1784 self._item = item
1785 self._offset_in_data = offset_in_data
1786 self._state = state
1787
1788 @property
1789 def item(self):
1790 return self._item
1791
1792 @property
1793 def offset_in_data(self):
1794 return self._offset_in_data
1795
1796 @property
1797 def state(self):
1798 return self._state
1799
71aaa3f7 1800
2adf4336 1801# Generator of data and final state from a group item.
71aaa3f7
PP
1802#
1803# Generation happens in memory at construction time. After building, use
1804# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1805# get the resulting context.
2adf4336
PP
1806#
1807# The steps of generation are:
1808#
320644e2
PP
1809# 1. Handle each item in prefix order.
1810#
1811# The handlers append bytes to `self._data` and update some current
1812# state object (`_GenState` instance).
1813#
1814# When handling a fixed-length number item, try to evaluate its
1815# expression using the current state. If this fails, then it might be
1816# because the expression refers to a "future" label: save the current
1817# offset in `self._data` (generated data) and a snapshot of the
1818# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1819# object). _gen_fl_num_item_insts() will deal with this later.
2adf4336 1820#
320644e2
PP
1821# When handling the items of a group, keep a map of immediate label
1822# names to their offset. Then, after having processed all the items,
1823# update the relevant saved state snapshots in
1824# `self._fl_num_item_insts` with those immediate label values.
1825# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1826#
320644e2
PP
1827# 2. Handle all the fixed-length number item instances of which the
1828# expression evaluation failed before.
2adf4336 1829#
320644e2
PP
1830# At this point, `self._fl_num_item_insts` contains everything that's
1831# needed to evaluate the expressions, including the values of
1832# "future" labels from the point of view of some fixed-length number
1833# item instance.
2adf4336 1834#
320644e2 1835# If an evaluation fails at this point, then it's a user error.
71aaa3f7
PP
1836class _Gen:
1837 def __init__(
1838 self,
1839 group: _Group,
320644e2 1840 macro_defs: _MacroDefsT,
e57a18e1
PP
1841 variables: VariablesT,
1842 labels: LabelsT,
71aaa3f7
PP
1843 offset: int,
1844 bo: Optional[ByteOrder],
1845 ):
320644e2
PP
1846 self._macro_defs = macro_defs
1847 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
2adf4336 1848 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1849
1850 # Generated bytes.
1851 @property
1852 def data(self):
1853 return self._data
1854
1855 # Updated variables.
1856 @property
1857 def variables(self):
2adf4336 1858 return self._final_state.variables
71aaa3f7
PP
1859
1860 # Updated main group labels.
1861 @property
1862 def labels(self):
2adf4336 1863 return self._final_state.labels
71aaa3f7
PP
1864
1865 # Updated offset.
1866 @property
1867 def offset(self):
2adf4336 1868 return self._final_state.offset
71aaa3f7
PP
1869
1870 # Updated byte order.
1871 @property
1872 def bo(self):
2adf4336
PP
1873 return self._final_state.bo
1874
320644e2
PP
1875 # Evaluates the expression `expr` of which the original string is
1876 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
1877 # generation state `state`.
1878 #
269f6eb3
PP
1879 # If `allow_float` is `True`, then the type of the result may be
1880 # `float` too.
2adf4336 1881 @staticmethod
320644e2
PP
1882 def _eval_expr(
1883 expr_str: str,
1884 expr: ast.Expression,
1885 text_loc: TextLocation,
269f6eb3 1886 state: _GenState,
269f6eb3
PP
1887 allow_float: bool = False,
1888 ):
e57a18e1
PP
1889 syms = {} # type: VariablesT
1890 syms.update(state.labels)
71aaa3f7 1891
e57a18e1
PP
1892 # Set the `ICITTE` name to the current offset
1893 syms[_icitte_name] = state.offset
71aaa3f7
PP
1894
1895 # Add the current variables
2adf4336 1896 syms.update(state.variables)
71aaa3f7
PP
1897
1898 # Validate the node and its children
320644e2 1899 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
1900
1901 # Compile and evaluate expression node
1902 try:
320644e2 1903 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 1904 except Exception as exc:
320644e2
PP
1905 _raise_error(
1906 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
1907 text_loc,
71aaa3f7
PP
1908 )
1909
27d52a19
PP
1910 # Convert `bool` result type to `int` to normalize
1911 if type(val) is bool:
1912 val = int(val)
1913
269f6eb3
PP
1914 # Validate result type
1915 expected_types = {int} # type: Set[type]
1916 type_msg = "`int`"
1917
1918 if allow_float:
1919 expected_types.add(float)
1920 type_msg += " or `float`"
1921
1922 if type(val) not in expected_types:
320644e2 1923 _raise_error(
269f6eb3 1924 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 1925 expr_str, type_msg, type(val).__name__
71aaa3f7 1926 ),
320644e2 1927 text_loc,
71aaa3f7
PP
1928 )
1929
1930 return val
1931
320644e2
PP
1932 # Evaluates the expression of `item` considering the current
1933 # generation state `state`.
1934 #
1935 # If `allow_float` is `True`, then the type of the result may be
1936 # `float` too.
1937 @staticmethod
1938 def _eval_item_expr(
25ca454b 1939 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
320644e2
PP
1940 state: _GenState,
1941 allow_float: bool = False,
1942 ):
1943 return _Gen._eval_expr(
1944 item.expr_str, item.expr, item.text_loc, state, allow_float
1945 )
1946
1947 # Handles the byte item `item`.
1948 def _handle_byte_item(self, item: _Byte, state: _GenState):
1949 self._data.append(item.val)
1950 state.offset += item.size
1951
1952 # Handles the string item `item`.
1953 def _handle_str_item(self, item: _Str, state: _GenState):
1954 self._data += item.data
1955 state.offset += item.size
1956
1957 # Handles the byte order setting item `item`.
1958 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
1959 # Update current byte order
1960 state.bo = item.bo
1961
1962 # Handles the variable assignment item `item`.
1963 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
1964 # Update variable
1965 state.variables[item.name] = self._eval_item_expr(item, state, True)
1966
1967 # Handles the fixed-length number item `item`.
1968 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
1969 # Validate current byte order
1970 if state.bo is None and item.len > 8:
1971 _raise_error_for_item(
1972 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1973 item.expr_str
1974 ),
1975 item,
1976 )
1977
1978 # Try an immediate evaluation. If it fails, then keep everything
1979 # needed to (try to) generate the bytes of this item later.
1980 try:
1981 data = self._gen_fl_num_item_inst_data(item, state)
1982 except Exception:
1983 self._fl_num_item_insts.append(
1984 _FlNumItemInst(item, len(self._data), copy.deepcopy(state))
1985 )
1986
1987 # Reserve space in `self._data` for this instance
1988 data = bytes([0] * (item.len // 8))
1989
1990 # Append bytes
1991 self._data += data
1992
1993 # Update offset
1994 state.offset += len(data)
1995
05f81895
PP
1996 # Returns the size, in bytes, required to encode the value `val`
1997 # with LEB128 (signed version if `is_signed` is `True`).
1998 @staticmethod
1999 def _leb128_size_for_val(val: int, is_signed: bool):
2000 if val < 0:
2001 # Equivalent upper bound.
2002 #
2003 # For example, if `val` is -128, then the full integer for
2004 # this number of bits would be [-128, 127].
2005 val = -val - 1
2006
2007 # Number of bits (add one for the sign if needed)
2008 bits = val.bit_length() + int(is_signed)
2009
2010 if bits == 0:
2011 bits = 1
2012
2013 # Seven bits per byte
2014 return math.ceil(bits / 7)
2015
320644e2
PP
2016 # Handles the LEB128 integer item `item`.
2017 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2018 # Compute value
2019 val = self._eval_item_expr(item, state, False)
676f6189 2020
320644e2
PP
2021 # Size in bytes
2022 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2023
320644e2
PP
2024 # For each byte
2025 for _ in range(size):
2026 # Seven LSBs, MSB of the byte set (continue)
2027 self._data.append((val & 0x7F) | 0x80)
2028 val >>= 7
2adf4336 2029
320644e2
PP
2030 # Clear MSB of last byte (stop)
2031 self._data[-1] &= ~0x80
2adf4336 2032
320644e2
PP
2033 # Update offset
2034 state.offset += size
27d52a19 2035
320644e2
PP
2036 # Handles the group item `item`, removing the immediate labels from
2037 # `state` at the end if `remove_immediate_labels` is `True`.
2038 def _handle_group_item(
2039 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2040 ):
2041 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2042 immediate_labels = {} # type: LabelsT
27d52a19 2043
320644e2
PP
2044 # Handle each item
2045 for subitem in item.items:
2046 if type(subitem) is _Label:
2047 # Add to local immediate labels
2048 immediate_labels[subitem.name] = state.offset
2adf4336 2049
320644e2 2050 self._handle_item(subitem, state)
2adf4336 2051
320644e2
PP
2052 # Remove immediate labels from current state if needed
2053 if remove_immediate_labels:
2054 for name in immediate_labels:
2055 del state.labels[name]
2adf4336 2056
320644e2
PP
2057 # Add all immediate labels to all state snapshots since
2058 # `first_fl_num_item_inst_index`.
2059 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2060 inst.state.labels.update(immediate_labels)
2adf4336 2061
320644e2
PP
2062 # Handles the repetition item `item`.
2063 def _handle_rep_item(self, item: _Rep, state: _GenState):
2064 # Compute the repetition count
2065 mul = _Gen._eval_item_expr(item, state)
05f81895 2066
320644e2
PP
2067 # Validate result
2068 if mul < 0:
2069 _raise_error_for_item(
2070 "Invalid expression `{}`: unexpected negative result {:,}".format(
2071 item.expr_str, mul
2072 ),
2073 item,
2074 )
2adf4336 2075
320644e2
PP
2076 # Generate item data `mul` times
2077 for _ in range(mul):
2078 self._handle_item(item.item, state)
2adf4336 2079
320644e2
PP
2080 # Handles the conditional item `item`.
2081 def _handle_cond_item(self, item: _Rep, state: _GenState):
2082 # Compute the conditional value
2083 val = _Gen._eval_item_expr(item, state)
2adf4336 2084
320644e2
PP
2085 # Generate item data if needed
2086 if val:
2087 self._handle_item(item.item, state)
2adf4336 2088
320644e2
PP
2089 # Evaluates the parameters of the macro expansion item `item`
2090 # considering the initial state `init_state` and returns a new state
2091 # to handle the items of the macro.
2092 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2093 # New state
2094 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2095
320644e2
PP
2096 # Evaluate the parameter expressions
2097 macro_def = self._macro_defs[item.name]
2adf4336 2098
320644e2
PP
2099 for param_name, param in zip(macro_def.param_names, item.params):
2100 exp_state.variables[param_name] = _Gen._eval_expr(
2101 param.expr_str, param.expr, param.text_loc, init_state, True
2102 )
2adf4336 2103
320644e2 2104 return exp_state
2adf4336 2105
320644e2
PP
2106 # Handles the macro expansion item `item`.
2107 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2108 # New state
2109 exp_state = self._eval_macro_exp_params(item, state)
27d52a19 2110
320644e2
PP
2111 # Process the contained group
2112 init_data_size = len(self._data)
2113 self._handle_item(self._macro_defs[item.name].group, exp_state)
27d52a19 2114
320644e2
PP
2115 # Update state offset and return
2116 state.offset += len(self._data) - init_data_size
676f6189 2117
320644e2
PP
2118 # Handles the offset setting item `item`.
2119 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2120 state.offset = item.val
2adf4336 2121
25ca454b 2122 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2123 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2124 init_offset = state.offset
2125 align_bytes = item.val // 8
2126 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2127 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2128
25ca454b
PP
2129 # Handles the filling item `item` (adds padding).
2130 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2131 # Compute the new offset
2132 new_offset = _Gen._eval_item_expr(item, state)
2133
2134 # Validate the new offset
2135 if new_offset < state.offset:
2136 _raise_error_for_item(
2137 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2138 item.expr_str, new_offset, state.offset
2139 ),
2140 item,
2141 )
2142
2143 # Fill
2144 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2145
2146 # Update offset
2147 state.offset = new_offset
2148
320644e2
PP
2149 # Handles the label item `item`.
2150 def _handle_label_item(self, item: _Label, state: _GenState):
2151 state.labels[item.name] = state.offset
2adf4336 2152
320644e2
PP
2153 # Handles the item `item`, returning the updated next repetition
2154 # instance.
2155 def _handle_item(self, item: _Item, state: _GenState):
2156 return self._item_handlers[type(item)](item, state)
71aaa3f7 2157
320644e2
PP
2158 # Generates the data for a fixed-length integer item instance having
2159 # the value `val` and returns it.
2160 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2161 # Validate range
2162 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2163 _raise_error_for_item(
320644e2
PP
2164 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2165 val, item.len, item.expr_str
71aaa3f7
PP
2166 ),
2167 item,
2168 )
2169
2170 # Encode result on 64 bits (to extend the sign bit whatever the
2171 # value of `item.len`).
71aaa3f7
PP
2172 data = struct.pack(
2173 "{}{}".format(
2adf4336 2174 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2175 "Q" if val >= 0 else "q",
2176 ),
2177 val,
2178 )
2179
2180 # Keep only the requested length
2181 len_bytes = item.len // 8
2182
2adf4336 2183 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2184 # Big endian: keep last bytes
2185 data = data[-len_bytes:]
2186 else:
2187 # Little endian: keep first bytes
2adf4336 2188 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2189 data = data[:len_bytes]
2190
320644e2
PP
2191 # Return data
2192 return data
269f6eb3 2193
320644e2
PP
2194 # Generates the data for a fixed-length floating point number item
2195 # instance having the value `val` and returns it.
2196 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2197 # Validate length
2198 if item.len not in (32, 64):
2199 _raise_error_for_item(
2200 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2201 item.len, val
2202 ),
2203 item,
2204 )
2205
320644e2
PP
2206 # Encode and return result
2207 return struct.pack(
269f6eb3
PP
2208 "{}{}".format(
2209 ">" if state.bo in (None, ByteOrder.BE) else "<",
2210 "f" if item.len == 32 else "d",
2211 ),
2212 val,
2213 )
2214
320644e2
PP
2215 # Generates the data for a fixed-length number item instance and
2216 # returns it.
2217 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2218 # Compute value
e57a18e1 2219 val = self._eval_item_expr(item, state, True)
269f6eb3 2220
269f6eb3
PP
2221 # Handle depending on type
2222 if type(val) is int:
320644e2 2223 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2224 else:
2225 assert type(val) is float
320644e2 2226 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2227
320644e2
PP
2228 # Generates the data for all the fixed-length number item instances
2229 # and writes it at the correct offset within `self._data`.
2230 def _gen_fl_num_item_insts(self):
2231 for inst in self._fl_num_item_insts:
2232 # Generate bytes
2233 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
05f81895 2234
320644e2
PP
2235 # Insert bytes into `self._data`
2236 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2237
2238 # Generates the data (`self._data`) and final state
2239 # (`self._final_state`) from `group` and the initial state `state`.
2240 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2241 # Initial state
2242 self._data = bytearray()
71aaa3f7
PP
2243
2244 # Item handlers
2245 self._item_handlers = {
676f6189 2246 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2247 _Byte: self._handle_byte_item,
27d52a19 2248 _Cond: self._handle_cond_item,
25ca454b 2249 _FillUntil: self._handle_fill_until_item,
269f6eb3 2250 _FlNum: self._handle_fl_num_item,
71aaa3f7 2251 _Group: self._handle_group_item,
2adf4336 2252 _Label: self._handle_label_item,
320644e2 2253 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2254 _Rep: self._handle_rep_item,
2adf4336
PP
2255 _SetBo: self._handle_set_bo_item,
2256 _SetOffset: self._handle_set_offset_item,
05f81895 2257 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2258 _Str: self._handle_str_item,
05f81895 2259 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2260 _VarAssign: self._handle_var_assign_item,
320644e2 2261 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2262
2263 # Handle the group item, _not_ removing the immediate labels
2264 # because the `labels` property offers them.
320644e2 2265 self._handle_group_item(group, state, False)
2adf4336
PP
2266
2267 # This is actually the final state
2268 self._final_state = state
71aaa3f7 2269
320644e2
PP
2270 # Generate all the fixed-length number bytes now that we know
2271 # their full state
2272 self._gen_fl_num_item_insts()
2273
71aaa3f7
PP
2274
2275# Returns a `ParseResult` instance containing the bytes encoded by the
2276# input string `normand`.
2277#
2278# `init_variables` is a dictionary of initial variable names (valid
2279# Python names) to integral values. A variable name must not be the
2280# reserved name `ICITTE`.
2281#
2282# `init_labels` is a dictionary of initial label names (valid Python
2283# names) to integral values. A label name must not be the reserved name
2284# `ICITTE`.
2285#
2286# `init_offset` is the initial offset.
2287#
2288# `init_byte_order` is the initial byte order.
2289#
2290# Raises `ParseError` on any parsing error.
2291def parse(
2292 normand: str,
e57a18e1
PP
2293 init_variables: Optional[VariablesT] = None,
2294 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2295 init_offset: int = 0,
2296 init_byte_order: Optional[ByteOrder] = None,
2297):
2298 if init_variables is None:
2299 init_variables = {}
2300
2301 if init_labels is None:
2302 init_labels = {}
2303
320644e2 2304 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2305 gen = _Gen(
320644e2
PP
2306 parser.res,
2307 parser.macro_defs,
71aaa3f7
PP
2308 init_variables,
2309 init_labels,
2310 init_offset,
2311 init_byte_order,
2312 )
2313 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2314 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2315 )
2316
2317
2318# Parses the command-line arguments.
2319def _parse_cli_args():
2320 import argparse
2321
2322 # Build parser
2323 ap = argparse.ArgumentParser()
2324 ap.add_argument(
2325 "--offset",
2326 metavar="OFFSET",
2327 action="store",
2328 type=int,
2329 default=0,
2330 help="initial offset (positive)",
2331 )
2332 ap.add_argument(
2333 "-b",
2334 "--byte-order",
2335 metavar="BO",
2336 choices=["be", "le"],
2337 type=str,
2338 help="initial byte order (`be` or `le`)",
2339 )
2340 ap.add_argument(
2341 "--var",
2342 metavar="NAME=VAL",
2343 action="append",
2344 help="add an initial variable (may be repeated)",
2345 )
2346 ap.add_argument(
2347 "-l",
2348 "--label",
2349 metavar="NAME=VAL",
2350 action="append",
2351 help="add an initial label (may be repeated)",
2352 )
2353 ap.add_argument(
2354 "--version", action="version", version="Normand {}".format(__version__)
2355 )
2356 ap.add_argument(
2357 "path",
2358 metavar="PATH",
2359 action="store",
2360 nargs="?",
2361 help="input path (none means standard input)",
2362 )
2363
2364 # Parse
2365 return ap.parse_args()
2366
2367
2368# Raises a command-line error with the message `msg`.
2369def _raise_cli_error(msg: str) -> NoReturn:
2370 raise RuntimeError("Command-line error: {}".format(msg))
2371
2372
2373# Returns a dictionary of string to integers from the list of strings
2374# `args` containing `NAME=VAL` entries.
2375def _dict_from_arg(args: Optional[List[str]]):
e57a18e1 2376 d = {} # type: LabelsT
71aaa3f7
PP
2377
2378 if args is None:
2379 return d
2380
2381 for arg in args:
2382 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2383
2384 if m is None:
2385 _raise_cli_error("Invalid assignment {}".format(arg))
2386
2e1c1acd
PP
2387 d[m.group(1)] = int(m.group(2))
2388
71aaa3f7
PP
2389 return d
2390
2391
2392# CLI entry point without exception handling.
2393def _try_run_cli():
2394 import os.path
2395
2396 # Parse arguments
2397 args = _parse_cli_args()
2398
2399 # Read input
2400 if args.path is None:
2401 normand = sys.stdin.read()
2402 else:
2403 with open(args.path) as f:
2404 normand = f.read()
2405
2406 # Variables and labels
e57a18e1 2407 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2408 labels = _dict_from_arg(args.label)
2409
2410 # Validate offset
2411 if args.offset < 0:
2412 _raise_cli_error("Invalid negative offset {}")
2413
2414 # Validate and set byte order
2415 bo = None # type: Optional[ByteOrder]
2416
2417 if args.byte_order is not None:
2418 if args.byte_order == "be":
2419 bo = ByteOrder.BE
2420 else:
2421 assert args.byte_order == "le"
2422 bo = ByteOrder.LE
2423
2424 # Parse
2425 try:
2426 res = parse(normand, variables, labels, args.offset, bo)
2427 except ParseError as exc:
2428 prefix = ""
2429
2430 if args.path is not None:
2431 prefix = "{}:".format(os.path.abspath(args.path))
2432
2433 _fail(
2434 "{}{}:{} - {}".format(
2435 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2436 )
2437 )
2438
2439 # Print
2440 sys.stdout.buffer.write(res.data)
2441
2442
2443# Prints the exception message `msg` and exits with status 1.
2444def _fail(msg: str) -> NoReturn:
2445 if not msg.endswith("."):
2446 msg += "."
2447
2448 print(msg, file=sys.stderr)
2449 sys.exit(1)
2450
2451
2452# CLI entry point.
2453def _run_cli():
2454 try:
2455 _try_run_cli()
2456 except Exception as exc:
2457 _fail(str(exc))
2458
2459
2460if __name__ == "__main__":
2461 _run_cli()
This page took 0.149209 seconds and 4 git commands to generate.