Make `normand.ParseError` contain a list of messages
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
f5dcb24c 33__version__ = "0.15.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
f5dcb24c 41 "ParseErrorMessage",
71aaa3f7 42 "ParseResult",
e57a18e1 43 "TextLocation",
e57a18e1 44 "VariablesT",
71aaa3f7
PP
45]
46
47import re
48import abc
49import ast
50import sys
320644e2 51import copy
71aaa3f7 52import enum
05f81895 53import math
71aaa3f7 54import struct
e57a18e1
PP
55import typing
56from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
57
58
59# Text location (line and column numbers).
e57a18e1 60class TextLocation:
71aaa3f7
PP
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
2adf4336 84 def __repr__(self):
e57a18e1 85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 86
71aaa3f7
PP
87
88# Any item.
89class _Item:
e57a18e1 90 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
2adf4336
PP
98
99# Scalar item.
100class _ScalarItem(_Item):
71aaa3f7
PP
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108# A repeatable item.
2adf4336 109class _RepableItem:
71aaa3f7
PP
110 pass
111
112
113# Single byte.
2adf4336 114class _Byte(_ScalarItem, _RepableItem):
e57a18e1 115 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
676f6189 129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
130
131
132# String.
2adf4336 133class _Str(_ScalarItem, _RepableItem):
e57a18e1 134 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
676f6189 148 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
149
150
151# Byte order.
152@enum.unique
153class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
2adf4336
PP
161# Byte order setting.
162class _SetBo(_Item):
e57a18e1 163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 164 super().__init__(text_loc)
71aaa3f7
PP
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
2adf4336 171 def __repr__(self):
676f6189 172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
173
174
175# Label.
176class _Label(_Item):
e57a18e1 177 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
71aaa3f7 186 def __repr__(self):
676f6189 187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
188
189
2adf4336
PP
190# Offset setting.
191class _SetOffset(_Item):
e57a18e1 192 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
193 super().__init__(text_loc)
194 self._val = val
195
676f6189 196 # Offset value (bytes).
71aaa3f7
PP
197 @property
198 def val(self):
199 return self._val
200
71aaa3f7 201 def __repr__(self):
676f6189
PP
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205# Offset alignment.
206class _AlignOffset(_Item):
e57a18e1 207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
71aaa3f7
PP
226
227
228# Mixin of containing an AST expression and its string.
229class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
25ca454b
PP
245# Fill until some offset.
246class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
2adf4336
PP
268# Variable assignment.
269class _VarAssign(_Item, _ExprMixin):
71aaa3f7 270 def __init__(
e57a18e1 271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
71aaa3f7 282 def __repr__(self):
2adf4336 283 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
71aaa3f7
PP
288 )
289
290
269f6eb3
PP
291# Fixed-length number, possibly needing more than one byte.
292class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 293 def __init__(
e57a18e1 294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
269f6eb3 310 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
71aaa3f7
PP
315 )
316
317
05f81895
PP
318# LEB128 integer.
319class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
676f6189 329 repr(self._text_loc),
05f81895
PP
330 )
331
332
333# Unsigned LEB128 integer.
334class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338# Signed LEB128 integer.
339class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
71aaa3f7 343# Group of items.
2adf4336 344class _Group(_Item, _RepableItem):
e57a18e1 345 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
346 super().__init__(text_loc)
347 self._items = items
71aaa3f7
PP
348
349 # Contained items.
350 @property
351 def items(self):
352 return self._items
353
71aaa3f7 354 def __repr__(self):
676f6189 355 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
356
357
358# Repetition item.
2adf4336
PP
359class _Rep(_Item, _ExprMixin):
360 def __init__(
e57a18e1 361 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 362 ):
71aaa3f7 363 super().__init__(text_loc)
2adf4336 364 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 365 self._item = item
71aaa3f7
PP
366
367 # Item to repeat.
368 @property
369 def item(self):
370 return self._item
371
71aaa3f7 372 def __repr__(self):
2adf4336 373 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
374 repr(self._item),
375 repr(self._expr_str),
376 repr(self._expr),
377 repr(self._text_loc),
71aaa3f7
PP
378 )
379
380
27d52a19
PP
381# Conditional item.
382class _Cond(_Item, _ExprMixin):
383 def __init__(
12b5dbc0
PP
384 self,
385 true_item: _Item,
386 false_item: _Item,
387 expr_str: str,
388 expr: ast.Expression,
389 text_loc: TextLocation,
27d52a19
PP
390 ):
391 super().__init__(text_loc)
392 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
393 self._true_item = true_item
394 self._false_item = false_item
27d52a19 395
12b5dbc0 396 # Item when condition is true.
27d52a19 397 @property
12b5dbc0
PP
398 def true_item(self):
399 return self._true_item
400
401 # Item when condition is false.
402 @property
403 def false_item(self):
404 return self._false_item
27d52a19
PP
405
406 def __repr__(self):
12b5dbc0
PP
407 return "_Cond({}, {}, {}, {}, {})".format(
408 repr(self._true_item),
409 repr(self._false_item),
27d52a19
PP
410 repr(self._expr_str),
411 repr(self._expr),
412 repr(self._text_loc),
413 )
414
415
320644e2
PP
416# Macro definition item.
417class _MacroDef(_Item):
418 def __init__(
419 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
420 ):
421 super().__init__(text_loc)
422 self._name = name
423 self._param_names = param_names
424 self._group = group
425
426 # Name.
427 @property
428 def name(self):
429 return self._name
430
431 # Parameters.
432 @property
433 def param_names(self):
434 return self._param_names
435
436 # Contained items.
437 @property
438 def group(self):
439 return self._group
440
441 def __repr__(self):
442 return "_MacroDef({}, {}, {}, {})".format(
443 repr(self._name),
444 repr(self._param_names),
445 repr(self._group),
446 repr(self._text_loc),
447 )
448
449
450# Macro expansion parameter.
451class _MacroExpParam:
452 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
453 self._expr_str = expr_str
454 self._expr = expr
455 self._text_loc = text_loc
456
457 # Expression string.
458 @property
459 def expr_str(self):
460 return self._expr_str
461
462 # Expression.
463 @property
464 def expr(self):
465 return self._expr
466
467 # Source text location.
468 @property
469 def text_loc(self):
470 return self._text_loc
471
472 def __repr__(self):
473 return "_MacroExpParam({}, {}, {})".format(
474 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
475 )
476
477
478# Macro expansion item.
479class _MacroExp(_Item, _RepableItem):
480 def __init__(
481 self,
482 name: str,
483 params: List[_MacroExpParam],
484 text_loc: TextLocation,
485 ):
486 super().__init__(text_loc)
487 self._name = name
488 self._params = params
489
490 # Name.
491 @property
492 def name(self):
493 return self._name
494
495 # Parameters.
496 @property
497 def params(self):
498 return self._params
499
500 def __repr__(self):
501 return "_MacroExp({}, {}, {})".format(
502 repr(self._name),
503 repr(self._params),
504 repr(self._text_loc),
505 )
2adf4336
PP
506
507
f5dcb24c
PP
508# A parsing error message: a string and a text location.
509class ParseErrorMessage:
510 @classmethod
511 def _create(cls, text: str, text_loc: TextLocation):
512 self = cls.__new__(cls)
513 self._init(text, text_loc)
514 return self
515
516 def __init__(self, *args, **kwargs): # type: ignore
517 raise NotImplementedError
518
519 def _init(self, text: str, text_loc: TextLocation):
520 self._text = text
521 self._text_loc = text_loc
522
523 # Message text.
524 @property
525 def text(self):
526 return self._text
527
528 # Source text location.
529 @property
530 def text_location(self):
531 return self._text_loc
532
533
534# A parsing error containing one or more messages (`ParseErrorMessage`).
71aaa3f7
PP
535class ParseError(RuntimeError):
536 @classmethod
e57a18e1 537 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
538 self = cls.__new__(cls)
539 self._init(msg, text_loc)
540 return self
541
542 def __init__(self, *args, **kwargs): # type: ignore
543 raise NotImplementedError
544
e57a18e1 545 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7 546 super().__init__(msg)
f5dcb24c
PP
547 self._msgs = [] # type: List[ParseErrorMessage]
548 self._add_msg(msg, text_loc)
71aaa3f7 549
f5dcb24c
PP
550 def _add_msg(self, msg: str, text_loc: TextLocation):
551 self._msgs.append(
552 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
553 msg, text_loc
554 )
555 )
556
557 # Parsing error messages.
558 #
559 # The first message is the most specific one.
71aaa3f7 560 @property
f5dcb24c
PP
561 def messages(self):
562 return self._msgs
71aaa3f7
PP
563
564
565# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 566def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
567 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
568
569
f5dcb24c
PP
570# Adds a message to the parsing error `exc`.
571def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
572 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
573
574
575# Appends a message to the parsing error `exc` and reraises it.
576def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
577 _add_error_msg(exc, msg, text_loc)
578 raise exc
579
580
e57a18e1
PP
581# Variables dictionary type (for type hints).
582VariablesT = Dict[str, Union[int, float]]
583
584
585# Labels dictionary type (for type hints).
586LabelsT = Dict[str, int]
71aaa3f7
PP
587
588
589# Python name pattern.
590_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
591
592
320644e2
PP
593# Macro definition dictionary.
594_MacroDefsT = Dict[str, _MacroDef]
595
596
71aaa3f7
PP
597# Normand parser.
598#
599# The constructor accepts a Normand input. After building, use the `res`
600# property to get the resulting main group.
601class _Parser:
602 # Builds a parser to parse the Normand input `normand`, parsing
603 # immediately.
e57a18e1 604 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
605 self._normand = normand
606 self._at = 0
607 self._line_no = 1
608 self._col_no = 1
609 self._label_names = set(labels.keys())
610 self._var_names = set(variables.keys())
320644e2 611 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
612 self._parse()
613
614 # Result (main group).
615 @property
616 def res(self):
617 return self._res
618
320644e2
PP
619 # Macro definitions.
620 @property
621 def macro_defs(self):
622 return self._macro_defs
623
71aaa3f7
PP
624 # Current text location.
625 @property
626 def _text_loc(self):
e57a18e1 627 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
628 self._line_no, self._col_no
629 )
630
631 # Returns `True` if this parser is done parsing.
632 def _is_done(self):
633 return self._at == len(self._normand)
634
635 # Returns `True` if this parser isn't done parsing.
636 def _isnt_done(self):
637 return not self._is_done()
638
639 # Raises a parse error, creating it using the message `msg` and the
640 # current text location.
641 def _raise_error(self, msg: str) -> NoReturn:
642 _raise_error(msg, self._text_loc)
643
644 # Tries to make the pattern `pat` match the current substring,
645 # returning the match object and updating `self._at`,
646 # `self._line_no`, and `self._col_no` on success.
647 def _try_parse_pat(self, pat: Pattern[str]):
648 m = pat.match(self._normand, self._at)
649
650 if m is None:
651 return
652
653 # Skip matched string
654 self._at += len(m.group(0))
655
656 # Update line number
657 self._line_no += m.group(0).count("\n")
658
659 # Update column number
660 for i in reversed(range(self._at)):
661 if self._normand[i] == "\n" or i == 0:
662 if i == 0:
663 self._col_no = self._at + 1
664 else:
665 self._col_no = self._at - i
666
667 break
668
669 # Return match object
670 return m
671
672 # Expects the pattern `pat` to match the current substring,
673 # returning the match object and updating `self._at`,
674 # `self._line_no`, and `self._col_no` on success, or raising a parse
675 # error with the message `error_msg` on error.
676 def _expect_pat(self, pat: Pattern[str], error_msg: str):
677 # Match
678 m = self._try_parse_pat(pat)
679
680 if m is None:
681 # No match: error
682 self._raise_error(error_msg)
683
684 # Return match object
685 return m
686
687 # Pattern for _skip_ws_and_comments()
688 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 689 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
690 )
691
692 # Skips as many whitespaces, insignificant symbol characters, and
693 # comments as possible.
694 def _skip_ws_and_comments(self):
695 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
696
320644e2
PP
697 # Pattern for _skip_ws()
698 _ws_pat = re.compile(r"\s*")
699
700 # Skips as many whitespaces as possible.
701 def _skip_ws(self):
702 self._try_parse_pat(self._ws_pat)
703
71aaa3f7
PP
704 # Pattern for _try_parse_hex_byte()
705 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
706
707 # Tries to parse a hexadecimal byte, returning a byte item on
708 # success.
709 def _try_parse_hex_byte(self):
0e8e3169
PP
710 begin_text_loc = self._text_loc
711
71aaa3f7
PP
712 # Match initial nibble
713 m_high = self._try_parse_pat(self._nibble_pat)
714
715 if m_high is None:
716 # No match
717 return
718
719 # Expect another nibble
720 self._skip_ws_and_comments()
721 m_low = self._expect_pat(
722 self._nibble_pat, "Expecting another hexadecimal nibble"
723 )
724
725 # Return item
0e8e3169 726 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
727
728 # Patterns for _try_parse_bin_byte()
729 _bin_byte_bit_pat = re.compile(r"[01]")
730 _bin_byte_prefix_pat = re.compile(r"%")
731
732 # Tries to parse a binary byte, returning a byte item on success.
733 def _try_parse_bin_byte(self):
0e8e3169
PP
734 begin_text_loc = self._text_loc
735
71aaa3f7
PP
736 # Match prefix
737 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
738 # No match
739 return
740
741 # Expect eight bits
742 bits = [] # type: List[str]
743
744 for _ in range(8):
745 self._skip_ws_and_comments()
746 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
747 bits.append(m.group(0))
748
749 # Return item
0e8e3169 750 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
751
752 # Patterns for _try_parse_dec_byte()
320644e2 753 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
754 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
755
756 # Tries to parse a decimal byte, returning a byte item on success.
757 def _try_parse_dec_byte(self):
0e8e3169
PP
758 begin_text_loc = self._text_loc
759
71aaa3f7
PP
760 # Match prefix
761 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
762 # No match
763 return
764
765 # Expect the value
320644e2 766 self._skip_ws()
71aaa3f7
PP
767 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
768
769 # Compute value
770 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
771
772 # Validate
773 if val < -128 or val > 255:
0e8e3169 774 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
775
776 # Two's complement
05f81895 777 val %= 256
71aaa3f7
PP
778
779 # Return item
0e8e3169 780 return _Byte(val, begin_text_loc)
71aaa3f7
PP
781
782 # Tries to parse a byte, returning a byte item on success.
783 def _try_parse_byte(self):
784 # Hexadecimal
785 item = self._try_parse_hex_byte()
786
787 if item is not None:
788 return item
789
790 # Binary
791 item = self._try_parse_bin_byte()
792
793 if item is not None:
794 return item
795
796 # Decimal
797 item = self._try_parse_dec_byte()
798
799 if item is not None:
800 return item
801
802 # Patterns for _try_parse_str()
803 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
804 _str_suffix_pat = re.compile(r'"')
805 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
806
807 # Strings corresponding to escape sequence characters
808 _str_escape_seq_strs = {
809 "0": "\0",
810 "a": "\a",
811 "b": "\b",
812 "e": "\x1b",
813 "f": "\f",
814 "n": "\n",
815 "r": "\r",
816 "t": "\t",
817 "v": "\v",
818 "\\": "\\",
819 '"': '"',
820 }
821
822 # Tries to parse a string, returning a string item on success.
823 def _try_parse_str(self):
0e8e3169
PP
824 begin_text_loc = self._text_loc
825
71aaa3f7
PP
826 # Match prefix
827 m = self._try_parse_pat(self._str_prefix_pat)
828
829 if m is None:
830 # No match
831 return
832
833 # Get encoding
834 encoding = "utf8"
835
836 if m.group("len") is not None:
837 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
838
839 # Actual string
840 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
841
842 # Expect end of string
843 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
844
845 # Replace escape sequences
846 val = m.group(0)
847
848 for ec in '0abefnrtv"\\':
849 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
850
851 # Encode
852 data = val.encode(encoding)
853
854 # Return item
0e8e3169 855 return _Str(data, begin_text_loc)
71aaa3f7 856
320644e2
PP
857 # Common right parenthesis pattern
858 _right_paren_pat = re.compile(r"\)")
859
71aaa3f7 860 # Patterns for _try_parse_group()
320644e2 861 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
862
863 # Tries to parse a group, returning a group item on success.
864 def _try_parse_group(self):
0e8e3169
PP
865 begin_text_loc = self._text_loc
866
71aaa3f7 867 # Match prefix
261c5ecf
PP
868 m_open = self._try_parse_pat(self._group_prefix_pat)
869
870 if m_open is None:
71aaa3f7
PP
871 # No match
872 return
873
874 # Parse items
875 items = self._parse_items()
876
877 # Expect end of group
878 self._skip_ws_and_comments()
261c5ecf
PP
879
880 if m_open.group(0) == "(":
320644e2 881 pat = self._right_paren_pat
261c5ecf
PP
882 exp = ")"
883 else:
884 pat = self._block_end_pat
885 exp = "!end"
886
887 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
888
889 # Return item
0e8e3169 890 return _Group(items, begin_text_loc)
71aaa3f7
PP
891
892 # Returns a stripped expression string and an AST expression node
893 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 894 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
895 # Create an expression node from the expression string
896 expr_str = expr_str.strip().replace("\n", " ")
897
898 try:
899 expr = ast.parse(expr_str, mode="eval")
900 except SyntaxError:
901 _raise_error(
902 "Invalid expression `{}`: invalid syntax".format(expr_str),
903 text_loc,
904 )
905
906 return expr_str, expr
907
269f6eb3 908 # Patterns for _try_parse_num_and_attr()
05f81895 909 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 910 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 911 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 912
05f81895
PP
913 # Tries to parse a value and attribute (fixed length in bits or
914 # `leb128`), returning a value item on success.
269f6eb3 915 def _try_parse_num_and_attr(self):
71aaa3f7
PP
916 begin_text_loc = self._text_loc
917
918 # Match
919 m_expr = self._try_parse_pat(self._val_expr_pat)
920
921 if m_expr is None:
922 # No match
923 return
924
71aaa3f7
PP
925 # Create an expression node from the expression string
926 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
927
05f81895 928 # Length?
269f6eb3 929 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
930
931 if m_attr is None:
932 # LEB128?
933 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
934
935 if m_attr is None:
936 # At this point it's invalid
937 self._raise_error(
938 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
939 )
940
941 # Return LEB128 integer item
942 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
943 return cls(expr_str, expr, begin_text_loc)
944 else:
269f6eb3
PP
945 # Return fixed-length number item
946 return _FlNum(
05f81895
PP
947 expr_str,
948 expr,
949 int(m_attr.group(0)),
950 begin_text_loc,
951 )
71aaa3f7 952
320644e2
PP
953 # Patterns for _try_parse_var_assign()
954 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
955 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 956
2adf4336
PP
957 # Tries to parse a variable assignment, returning a variable
958 # assignment item on success.
959 def _try_parse_var_assign(self):
71aaa3f7
PP
960 begin_text_loc = self._text_loc
961
962 # Match
320644e2 963 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
964
965 if m is None:
966 # No match
967 return
968
969 # Validate name
320644e2 970 name = m.group(1)
71aaa3f7
PP
971
972 if name == _icitte_name:
0e8e3169
PP
973 _raise_error(
974 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
975 )
71aaa3f7
PP
976
977 if name in self._label_names:
0e8e3169 978 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 979
320644e2
PP
980 # Expect an expression
981 self._skip_ws()
982 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
983
984 # Create an expression node from the expression string
320644e2
PP
985 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
986
987 # Add to known variable names
988 self._var_names.add(name)
71aaa3f7
PP
989
990 # Return item
2adf4336 991 return _VarAssign(
71aaa3f7
PP
992 name,
993 expr_str,
994 expr,
0e8e3169 995 begin_text_loc,
71aaa3f7
PP
996 )
997
2adf4336 998 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
999 _bo_pat = re.compile(r"[bl]e")
1000
2adf4336
PP
1001 # Tries to parse a byte order name, returning a byte order setting
1002 # item on success.
1003 def _try_parse_set_bo(self):
0e8e3169
PP
1004 begin_text_loc = self._text_loc
1005
71aaa3f7
PP
1006 # Match
1007 m = self._try_parse_pat(self._bo_pat)
1008
1009 if m is None:
1010 # No match
1011 return
1012
1013 # Return corresponding item
1014 if m.group(0) == "be":
2adf4336 1015 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
1016 else:
1017 assert m.group(0) == "le"
2adf4336 1018 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
1019
1020 # Patterns for _try_parse_val_or_bo()
320644e2
PP
1021 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1022 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 1023
2adf4336
PP
1024 # Tries to parse a value, a variable assignment, or a byte order
1025 # setting, returning an item on success.
1026 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 1027 # Match prefix
2adf4336 1028 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
1029 # No match
1030 return
1031
320644e2
PP
1032 self._skip_ws()
1033
2adf4336
PP
1034 # Variable assignment item?
1035 item = self._try_parse_var_assign()
71aaa3f7
PP
1036
1037 if item is None:
269f6eb3
PP
1038 # Number item?
1039 item = self._try_parse_num_and_attr()
71aaa3f7
PP
1040
1041 if item is None:
2adf4336
PP
1042 # Byte order setting item?
1043 item = self._try_parse_set_bo()
71aaa3f7
PP
1044
1045 if item is None:
1046 # At this point it's invalid
2adf4336 1047 self._raise_error(
269f6eb3 1048 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 1049 )
71aaa3f7
PP
1050
1051 # Expect suffix
320644e2 1052 self._skip_ws()
2adf4336 1053 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1054 return item
1055
fc21bb27
PP
1056 # Returns a normalized version (so as to be parseable by int()) of
1057 # the constant integer string `s`, possibly negative, dealing with
1058 # any radix suffix.
1059 @staticmethod
1060 def _norm_const_int(s: str):
1061 neg = ""
1062 pos = s
1063
1064 if s.startswith("-"):
1065 neg = "-"
1066 pos = s[1:]
1067
1068 for r in "xXoObB":
1069 if pos.startswith("0" + r):
1070 # Already correct
1071 return s
1072
1073 # Try suffix
1074 asm_suf_base = {
1075 "h": "x",
1076 "H": "x",
1077 "q": "o",
1078 "Q": "o",
1079 "o": "o",
1080 "O": "o",
1081 "b": "b",
1082 "B": "B",
1083 }
1084
1085 for suf in asm_suf_base:
1086 if pos[-1] == suf:
1087 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
1088
1089 return s
1090
320644e2 1091 # Common constant integer patterns
fc21bb27
PP
1092 _pos_const_int_pat = re.compile(
1093 r"0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+"
1094 )
320644e2 1095 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
71aaa3f7 1096
2adf4336
PP
1097 # Tries to parse an offset setting value (after the initial `<`),
1098 # returning an offset item on success.
1099 def _try_parse_set_offset_val(self):
0e8e3169
PP
1100 begin_text_loc = self._text_loc
1101
71aaa3f7
PP
1102 # Match
1103 m = self._try_parse_pat(self._pos_const_int_pat)
1104
1105 if m is None:
1106 # No match
1107 return
1108
1109 # Return item
fc21bb27 1110 return _SetOffset(int(self._norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1111
1112 # Tries to parse a label name (after the initial `<`), returning a
1113 # label item on success.
1114 def _try_parse_label_name(self):
0e8e3169
PP
1115 begin_text_loc = self._text_loc
1116
71aaa3f7
PP
1117 # Match
1118 m = self._try_parse_pat(_py_name_pat)
1119
1120 if m is None:
1121 # No match
1122 return
1123
1124 # Validate
1125 name = m.group(0)
1126
1127 if name == _icitte_name:
0e8e3169
PP
1128 _raise_error(
1129 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1130 )
71aaa3f7
PP
1131
1132 if name in self._label_names:
0e8e3169 1133 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1134
1135 if name in self._var_names:
0e8e3169 1136 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1137
1138 # Add to known label names
1139 self._label_names.add(name)
1140
1141 # Return item
0e8e3169 1142 return _Label(name, begin_text_loc)
71aaa3f7 1143
2adf4336 1144 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1145 _label_set_offset_prefix_pat = re.compile(r"<")
1146 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1147
2adf4336
PP
1148 # Tries to parse a label or an offset setting, returning an item on
1149 # success.
1150 def _try_parse_label_or_set_offset(self):
71aaa3f7 1151 # Match prefix
2adf4336 1152 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1153 # No match
1154 return
1155
2adf4336 1156 # Offset setting item?
320644e2 1157 self._skip_ws()
2adf4336 1158 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1159
1160 if item is None:
1161 # Label item?
1162 item = self._try_parse_label_name()
1163
1164 if item is None:
1165 # At this point it's invalid
2adf4336 1166 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1167
1168 # Expect suffix
320644e2 1169 self._skip_ws()
2adf4336 1170 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1171 return item
1172
25ca454b
PP
1173 # Pattern for _parse_pad_val()
1174 _pad_val_prefix_pat = re.compile(r"~")
1175
1176 # Tries to parse a padding value, returning the padding value, or 0
1177 # if none.
1178 def _parse_pad_val(self):
1179 # Padding value?
1180 self._skip_ws()
1181 pad_val = 0
1182
1183 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1184 self._skip_ws()
1185 pad_val_text_loc = self._text_loc
1186 m = self._expect_pat(
1187 self._pos_const_int_pat,
1188 "Expecting a positive constant integer (byte value)",
1189 )
1190
1191 # Validate
fc21bb27 1192 pad_val = int(self._norm_const_int(m.group(0)), 0)
25ca454b
PP
1193
1194 if pad_val > 255:
1195 _raise_error(
1196 "Invalid padding byte value {}".format(pad_val),
1197 pad_val_text_loc,
1198 )
1199
1200 return pad_val
1201
676f6189 1202 # Patterns for _try_parse_align_offset()
320644e2
PP
1203 _align_offset_prefix_pat = re.compile(r"@")
1204 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1205
1206 # Tries to parse an offset alignment, returning an offset alignment
1207 # item on success.
1208 def _try_parse_align_offset(self):
1209 begin_text_loc = self._text_loc
1210
1211 # Match prefix
1212 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1213 # No match
1214 return
1215
320644e2 1216 # Expect an alignment
25ca454b 1217 self._skip_ws()
676f6189
PP
1218 align_text_loc = self._text_loc
1219 m = self._expect_pat(
1220 self._align_offset_val_pat,
1221 "Expecting an alignment (positive multiple of eight bits)",
1222 )
1223
1224 # Validate alignment
320644e2 1225 val = int(m.group(0))
676f6189
PP
1226
1227 if val <= 0 or (val % 8) != 0:
1228 _raise_error(
1229 "Invalid alignment value {} (not a positive multiple of eight)".format(
1230 val
1231 ),
1232 align_text_loc,
1233 )
1234
25ca454b
PP
1235 # Padding value
1236 pad_val = self._parse_pad_val()
676f6189 1237
25ca454b
PP
1238 # Return item
1239 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1240
25ca454b
PP
1241 # Patterns for _try_parse_fill_until()
1242 _fill_until_prefix_pat = re.compile(r"\+")
1243 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1244
25ca454b
PP
1245 # Tries to parse a filling, returning a filling item on success.
1246 def _try_parse_fill_until(self):
1247 begin_text_loc = self._text_loc
1248
1249 # Match prefix
1250 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1251 # No match
1252 return
1253
1254 # Expect expression
1255 self._skip_ws()
1256 expr_str, expr = self._expect_const_int_name_expr(True)
1257
1258 # Padding value
1259 pad_val = self._parse_pad_val()
676f6189
PP
1260
1261 # Return item
25ca454b 1262 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1263
e57a18e1 1264 # Patterns for _expect_rep_mul_expr()
320644e2
PP
1265 _inner_expr_prefix_pat = re.compile(r"\{")
1266 _inner_expr_pat = re.compile(r"[^}]+")
1267 _inner_expr_suffix_pat = re.compile(r"\}")
1268
1269 # Parses a constant integer if `accept_const_int` is `True`
1270 # (possibly negative if `allow_neg` is `True`), a name, or an
1271 # expression within `{` and `}`.
1272 def _expect_const_int_name_expr(
1273 self, accept_const_int: bool, allow_neg: bool = False
1274 ):
e57a18e1
PP
1275 expr_text_loc = self._text_loc
1276
1277 # Constant integer?
27d52a19
PP
1278 m = None
1279
320644e2
PP
1280 if accept_const_int:
1281 m = self._try_parse_pat(self._const_int_pat)
e57a18e1
PP
1282
1283 if m is None:
1284 # Name?
1285 m = self._try_parse_pat(_py_name_pat)
1286
1287 if m is None:
1288 # Expression?
320644e2
PP
1289 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1290 pos_msg = "" if allow_neg else "positive "
1291
1292 if accept_const_int:
1293 mid_msg = "a {}constant integer, a name, or `{{`".format(
1294 pos_msg
1295 )
27d52a19
PP
1296 else:
1297 mid_msg = "a name or `{`"
1298
e57a18e1 1299 # At this point it's invalid
27d52a19 1300 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1301
1302 # Expect an expression
320644e2 1303 self._skip_ws()
e57a18e1 1304 expr_text_loc = self._text_loc
320644e2 1305 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
e57a18e1
PP
1306 expr_str = m.group(0)
1307
1308 # Expect `}`
320644e2
PP
1309 self._skip_ws()
1310 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1311 else:
1312 expr_str = m.group(0)
1313 else:
320644e2
PP
1314 if m.group("neg") == "-" and not allow_neg:
1315 _raise_error("Expecting a positive constant integer", expr_text_loc)
1316
fc21bb27 1317 expr_str = self._norm_const_int(m.group(0))
e57a18e1
PP
1318
1319 return self._ast_expr_from_str(expr_str, expr_text_loc)
1320
27d52a19
PP
1321 # Parses the multiplier expression of a repetition (block or
1322 # post-item) and returns the expression string and AST node.
1323 def _expect_rep_mul_expr(self):
320644e2 1324 return self._expect_const_int_name_expr(True)
27d52a19
PP
1325
1326 # Common block end pattern
320644e2 1327 _block_end_pat = re.compile(r"!end\b")
27d52a19 1328
e57a18e1 1329 # Pattern for _try_parse_rep_block()
320644e2 1330 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1331
1332 # Tries to parse a repetition block, returning a repetition item on
1333 # success.
1334 def _try_parse_rep_block(self):
1335 begin_text_loc = self._text_loc
1336
1337 # Match prefix
1338 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1339 # No match
1340 return
1341
1342 # Expect expression
1343 self._skip_ws_and_comments()
1344 expr_str, expr = self._expect_rep_mul_expr()
1345
1346 # Parse items
1347 self._skip_ws_and_comments()
1348 items_text_loc = self._text_loc
1349 items = self._parse_items()
1350
1351 # Expect end of block
1352 self._skip_ws_and_comments()
1353 self._expect_pat(
27d52a19 1354 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1355 )
1356
1357 # Return item
1358 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1359
27d52a19 1360 # Pattern for _try_parse_cond_block()
320644e2 1361 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1362 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1363
1364 # Tries to parse a conditional block, returning a conditional item
1365 # on success.
1366 def _try_parse_cond_block(self):
1367 begin_text_loc = self._text_loc
1368
1369 # Match prefix
1370 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1371 # No match
1372 return
1373
1374 # Expect expression
1375 self._skip_ws_and_comments()
320644e2 1376 expr_str, expr = self._expect_const_int_name_expr(False)
27d52a19 1377
12b5dbc0 1378 # Parse "true" items
27d52a19 1379 self._skip_ws_and_comments()
12b5dbc0
PP
1380 true_items_text_loc = self._text_loc
1381 true_items = self._parse_items()
1382 false_items = [] # type: List[_Item]
1383 false_items_text_loc = begin_text_loc
27d52a19 1384
12b5dbc0 1385 # `!else`?
27d52a19 1386 self._skip_ws_and_comments()
12b5dbc0
PP
1387
1388 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1389 # Parse "false" items
1390 self._skip_ws_and_comments()
1391 false_items_text_loc = self._text_loc
1392 false_items = self._parse_items()
1393
1394 # Expect end of block
27d52a19
PP
1395 self._expect_pat(
1396 self._block_end_pat,
12b5dbc0 1397 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1398 )
1399
1400 # Return item
12b5dbc0
PP
1401 return _Cond(
1402 _Group(true_items, true_items_text_loc),
1403 _Group(false_items, false_items_text_loc),
1404 expr_str,
1405 expr,
1406 begin_text_loc,
1407 )
27d52a19 1408
320644e2
PP
1409 # Common left parenthesis pattern
1410 _left_paren_pat = re.compile(r"\(")
1411
1412 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1413 _macro_params_comma_pat = re.compile(",")
1414
1415 # Patterns for _try_parse_macro_def()
1416 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1417
1418 # Tries to parse a macro definition, adding it to `self._macro_defs`
1419 # and returning `True` on success.
1420 def _try_parse_macro_def(self):
1421 begin_text_loc = self._text_loc
1422
1423 # Match prefix
1424 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1425 # No match
1426 return False
1427
1428 # Expect a name
1429 self._skip_ws()
1430 name_text_loc = self._text_loc
1431 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1432
1433 # Validate name
1434 name = m.group(0)
1435
1436 if name in self._macro_defs:
1437 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1438
1439 # Expect `(`
1440 self._skip_ws()
1441 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1442
1443 # Try to parse comma-separated parameter names
1444 param_names = [] # type: List[str]
1445 expect_comma = False
1446
1447 while True:
1448 self._skip_ws()
1449
1450 # End?
1451 if self._try_parse_pat(self._right_paren_pat) is not None:
1452 # End
1453 break
1454
1455 # Comma?
1456 if expect_comma:
1457 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1458
1459 # Expect parameter name
1460 self._skip_ws()
1461 param_text_loc = self._text_loc
1462 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1463
1464 if m.group(0) in param_names:
1465 _raise_error(
1466 "Duplicate macro parameter named `{}`".format(m.group(0)),
1467 param_text_loc,
1468 )
1469
1470 param_names.append(m.group(0))
1471 expect_comma = True
1472
1473 # Expect items
1474 self._skip_ws_and_comments()
1475 items_text_loc = self._text_loc
1476 old_var_names = self._var_names.copy()
1477 old_label_names = self._label_names.copy()
1478 self._var_names = set() # type: Set[str]
1479 self._label_names = set() # type: Set[str]
1480 items = self._parse_items()
1481 self._var_names = old_var_names
1482 self._label_names = old_label_names
1483
1484 # Expect suffix
1485 self._expect_pat(
1486 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1487 )
1488
1489 # Register macro
1490 self._macro_defs[name] = _MacroDef(
1491 name, param_names, _Group(items, items_text_loc), begin_text_loc
1492 )
1493
1494 return True
1495
1496 # Patterns for _try_parse_macro_exp()
1497 _macro_exp_prefix_pat = re.compile(r"m\b")
1498 _macro_exp_colon_pat = re.compile(r":")
1499
1500 # Tries to parse a macro expansion, returning a macro expansion item
1501 # on success.
1502 def _try_parse_macro_exp(self):
1503 begin_text_loc = self._text_loc
1504
1505 # Match prefix
1506 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1507 # No match
1508 return
1509
1510 # Expect `:`
1511 self._skip_ws()
1512 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1513
1514 # Expect a macro name
1515 self._skip_ws()
1516 name_text_loc = self._text_loc
1517 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1518
1519 # Validate name
1520 name = m.group(0)
1521 macro_def = self._macro_defs.get(name)
1522
1523 if macro_def is None:
1524 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1525
1526 # Expect `(`
1527 self._skip_ws()
1528 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1529
1530 # Try to parse comma-separated parameter values
1531 params_text_loc = self._text_loc
1532 params = [] # type: List[_MacroExpParam]
1533 expect_comma = False
1534
1535 while True:
1536 self._skip_ws()
1537
1538 # End?
1539 if self._try_parse_pat(self._right_paren_pat) is not None:
1540 # End
1541 break
1542
1543 # Expect a Value
1544 if expect_comma:
1545 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1546
1547 self._skip_ws()
1548 param_text_loc = self._text_loc
1549 params.append(
1550 _MacroExpParam(
1551 *self._expect_const_int_name_expr(True, True), param_text_loc
1552 )
1553 )
1554 expect_comma = True
1555
1556 # Validate parameter values
1557 if len(params) != len(macro_def.param_names):
1558 sing_plur = "" if len(params) == 1 else "s"
1559 _raise_error(
1560 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1561 len(params), sing_plur, len(macro_def.param_names)
1562 ),
1563 params_text_loc,
1564 )
1565
1566 # Return item
1567 return _MacroExp(name, params, begin_text_loc)
1568
71aaa3f7
PP
1569 # Tries to parse a base item (anything except a repetition),
1570 # returning it on success.
1571 def _try_parse_base_item(self):
1572 # Byte item?
1573 item = self._try_parse_byte()
1574
1575 if item is not None:
1576 return item
1577
1578 # String item?
1579 item = self._try_parse_str()
1580
1581 if item is not None:
1582 return item
1583
2adf4336
PP
1584 # Value, variable assignment, or byte order setting item?
1585 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1586
1587 if item is not None:
1588 return item
1589
2adf4336
PP
1590 # Label or offset setting item?
1591 item = self._try_parse_label_or_set_offset()
71aaa3f7 1592
676f6189
PP
1593 if item is not None:
1594 return item
1595
1596 # Offset alignment item?
1597 item = self._try_parse_align_offset()
1598
25ca454b
PP
1599 if item is not None:
1600 return item
1601
1602 # Filling item?
1603 item = self._try_parse_fill_until()
1604
71aaa3f7
PP
1605 if item is not None:
1606 return item
1607
1608 # Group item?
1609 item = self._try_parse_group()
1610
1611 if item is not None:
1612 return item
1613
320644e2 1614 # Repetition block item?
e57a18e1 1615 item = self._try_parse_rep_block()
71aaa3f7 1616
e57a18e1
PP
1617 if item is not None:
1618 return item
1619
27d52a19
PP
1620 # Conditional block item?
1621 item = self._try_parse_cond_block()
1622
1623 if item is not None:
1624 return item
1625
320644e2
PP
1626 # Macro expansion?
1627 item = self._try_parse_macro_exp()
1628
1629 if item is not None:
1630 return item
1631
e57a18e1
PP
1632 # Pattern for _try_parse_rep_post()
1633 _rep_post_prefix_pat = re.compile(r"\*")
1634
1635 # Tries to parse a post-item repetition, returning the expression
1636 # string and AST expression node on success.
1637 def _try_parse_rep_post(self):
71aaa3f7 1638 # Match prefix
e57a18e1 1639 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1640 # No match
2adf4336 1641 return
71aaa3f7 1642
e57a18e1 1643 # Return expression string and AST expression
71aaa3f7 1644 self._skip_ws_and_comments()
e57a18e1 1645 return self._expect_rep_mul_expr()
71aaa3f7 1646
1ca7b5e1
PP
1647 # Tries to parse an item, possibly followed by a repetition,
1648 # returning `True` on success.
1649 #
1650 # Appends any parsed item to `items`.
1651 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1652 self._skip_ws_and_comments()
1653
320644e2 1654 # Base item
71aaa3f7
PP
1655 item = self._try_parse_base_item()
1656
1657 if item is None:
320644e2 1658 return
71aaa3f7
PP
1659
1660 # Parse repetition if the base item is repeatable
1661 if isinstance(item, _RepableItem):
0e8e3169
PP
1662 self._skip_ws_and_comments()
1663 rep_text_loc = self._text_loc
e57a18e1 1664 rep_ret = self._try_parse_rep_post()
71aaa3f7 1665
2adf4336 1666 if rep_ret is not None:
320644e2 1667 item = _Rep(item, *rep_ret, rep_text_loc)
71aaa3f7 1668
1ca7b5e1
PP
1669 items.append(item)
1670 return True
71aaa3f7
PP
1671
1672 # Parses and returns items, skipping whitespaces, insignificant
1673 # symbols, and comments when allowed, and stopping at the first
1674 # unknown character.
320644e2
PP
1675 #
1676 # Accepts and registers macro definitions if `accept_macro_defs`
1677 # is `True`.
1678 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1679 items = [] # type: List[_Item]
1680
1681 while self._isnt_done():
1ca7b5e1
PP
1682 # Try to append item
1683 if not self._try_append_item(items):
320644e2
PP
1684 if accept_macro_defs and self._try_parse_macro_def():
1685 continue
1686
1ca7b5e1
PP
1687 # Unknown at this point
1688 break
71aaa3f7
PP
1689
1690 return items
1691
1692 # Parses the whole Normand input, setting `self._res` to the main
1693 # group item on success.
1694 def _parse(self):
1695 if len(self._normand.strip()) == 0:
1696 # Special case to make sure there's something to consume
1697 self._res = _Group([], self._text_loc)
1698 return
1699
1700 # Parse first level items
320644e2 1701 items = self._parse_items(True)
71aaa3f7
PP
1702
1703 # Make sure there's nothing left
1704 self._skip_ws_and_comments()
1705
1706 if self._isnt_done():
1707 self._raise_error(
1708 "Unexpected character `{}`".format(self._normand[self._at])
1709 )
1710
1711 # Set main group item
1712 self._res = _Group(items, self._text_loc)
1713
1714
1715# The return type of parse().
1716class ParseResult:
1717 @classmethod
1718 def _create(
1719 cls,
1720 data: bytearray,
e57a18e1
PP
1721 variables: VariablesT,
1722 labels: LabelsT,
71aaa3f7
PP
1723 offset: int,
1724 bo: Optional[ByteOrder],
1725 ):
1726 self = cls.__new__(cls)
1727 self._init(data, variables, labels, offset, bo)
1728 return self
1729
1730 def __init__(self, *args, **kwargs): # type: ignore
1731 raise NotImplementedError
1732
1733 def _init(
1734 self,
1735 data: bytearray,
e57a18e1
PP
1736 variables: VariablesT,
1737 labels: LabelsT,
71aaa3f7
PP
1738 offset: int,
1739 bo: Optional[ByteOrder],
1740 ):
1741 self._data = data
1742 self._vars = variables
1743 self._labels = labels
1744 self._offset = offset
1745 self._bo = bo
1746
1747 # Generated data.
1748 @property
1749 def data(self):
1750 return self._data
1751
1752 # Dictionary of updated variable names to their last computed value.
1753 @property
1754 def variables(self):
1755 return self._vars
1756
1757 # Dictionary of updated main group label names to their computed
1758 # value.
1759 @property
1760 def labels(self):
1761 return self._labels
1762
1763 # Updated offset.
1764 @property
1765 def offset(self):
1766 return self._offset
1767
1768 # Updated byte order.
1769 @property
1770 def byte_order(self):
1771 return self._bo
1772
1773
1774# Raises a parse error for the item `item`, creating it using the
1775# message `msg`.
1776def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1777 _raise_error(msg, item.text_loc)
1778
1779
1780# The `ICITTE` reserved name.
1781_icitte_name = "ICITTE"
1782
1783
2adf4336
PP
1784# Base node visitor.
1785#
1786# Calls the _visit_name() method for each name node which isn't the name
1787# of a call.
1788class _NodeVisitor(ast.NodeVisitor):
1789 def __init__(self):
71aaa3f7
PP
1790 self._parent_is_call = False
1791
1792 def generic_visit(self, node: ast.AST):
1793 if type(node) is ast.Call:
1794 self._parent_is_call = True
1795 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1796 self._visit_name(node.id)
71aaa3f7
PP
1797
1798 super().generic_visit(node)
1799 self._parent_is_call = False
1800
2adf4336
PP
1801 @abc.abstractmethod
1802 def _visit_name(self, name: str):
1803 ...
1804
71aaa3f7 1805
2adf4336
PP
1806# Expression validator: validates that all the names within the
1807# expression are allowed.
1808class _ExprValidator(_NodeVisitor):
320644e2 1809 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1810 super().__init__()
320644e2
PP
1811 self._expr_str = expr_str
1812 self._text_loc = text_loc
2adf4336 1813 self._allowed_names = allowed_names
2adf4336
PP
1814
1815 def _visit_name(self, name: str):
1816 # Make sure the name refers to a known and reachable
1817 # variable/label name.
e57a18e1 1818 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1819 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1820 name, self._expr_str
2adf4336
PP
1821 )
1822
05f81895 1823 allowed_names = self._allowed_names.copy()
e57a18e1 1824 allowed_names.add(_icitte_name)
2adf4336 1825
05f81895 1826 if len(allowed_names) > 0:
2adf4336
PP
1827 allowed_names_str = ", ".join(
1828 sorted(["`{}`".format(name) for name in allowed_names])
1829 )
1830 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1831
1832 _raise_error(
1833 msg,
320644e2 1834 self._text_loc,
2adf4336
PP
1835 )
1836
1837
2adf4336
PP
1838# Generator state.
1839class _GenState:
1840 def __init__(
1b8aa84a 1841 self,
e57a18e1
PP
1842 variables: VariablesT,
1843 labels: LabelsT,
1b8aa84a
PP
1844 offset: int,
1845 bo: Optional[ByteOrder],
2adf4336
PP
1846 ):
1847 self.variables = variables.copy()
1848 self.labels = labels.copy()
1849 self.offset = offset
1850 self.bo = bo
71aaa3f7 1851
320644e2
PP
1852 def __repr__(self):
1853 return "_GenState({}, {}, {}, {})".format(
1854 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1855 )
1856
1857
1858# Fixed-length number item instance.
1859class _FlNumItemInst:
f5dcb24c
PP
1860 def __init__(
1861 self,
1862 item: _FlNum,
1863 offset_in_data: int,
1864 state: _GenState,
1865 parse_error_msgs: List[ParseErrorMessage],
1866 ):
320644e2
PP
1867 self._item = item
1868 self._offset_in_data = offset_in_data
1869 self._state = state
f5dcb24c 1870 self._parse_error_msgs = parse_error_msgs
320644e2
PP
1871
1872 @property
1873 def item(self):
1874 return self._item
1875
1876 @property
1877 def offset_in_data(self):
1878 return self._offset_in_data
1879
1880 @property
1881 def state(self):
1882 return self._state
1883
f5dcb24c
PP
1884 @property
1885 def parse_error_msgs(self):
1886 return self._parse_error_msgs
1887
71aaa3f7 1888
2adf4336 1889# Generator of data and final state from a group item.
71aaa3f7
PP
1890#
1891# Generation happens in memory at construction time. After building, use
1892# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1893# get the resulting context.
2adf4336
PP
1894#
1895# The steps of generation are:
1896#
320644e2
PP
1897# 1. Handle each item in prefix order.
1898#
1899# The handlers append bytes to `self._data` and update some current
1900# state object (`_GenState` instance).
1901#
1902# When handling a fixed-length number item, try to evaluate its
1903# expression using the current state. If this fails, then it might be
1904# because the expression refers to a "future" label: save the current
1905# offset in `self._data` (generated data) and a snapshot of the
1906# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
f5dcb24c
PP
1907# object). _gen_fl_num_item_insts() will deal with this later. A
1908# `_FlNumItemInst` instance also contains a snapshot of the current
1909# parsing error messages (`self._parse_error_msgs`) which need to be
1910# taken into account when handling the instance later.
2adf4336 1911#
320644e2
PP
1912# When handling the items of a group, keep a map of immediate label
1913# names to their offset. Then, after having processed all the items,
1914# update the relevant saved state snapshots in
1915# `self._fl_num_item_insts` with those immediate label values.
1916# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1917#
320644e2
PP
1918# 2. Handle all the fixed-length number item instances of which the
1919# expression evaluation failed before.
2adf4336 1920#
320644e2
PP
1921# At this point, `self._fl_num_item_insts` contains everything that's
1922# needed to evaluate the expressions, including the values of
1923# "future" labels from the point of view of some fixed-length number
1924# item instance.
2adf4336 1925#
f5dcb24c
PP
1926# If an evaluation fails at this point, then it's a user error. Add
1927# to the parsing error all the saved parsing error messages of the
1928# instance. Those additional messages add precious context to the
1929# error.
71aaa3f7
PP
1930class _Gen:
1931 def __init__(
1932 self,
1933 group: _Group,
320644e2 1934 macro_defs: _MacroDefsT,
e57a18e1
PP
1935 variables: VariablesT,
1936 labels: LabelsT,
71aaa3f7
PP
1937 offset: int,
1938 bo: Optional[ByteOrder],
1939 ):
320644e2
PP
1940 self._macro_defs = macro_defs
1941 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
f5dcb24c 1942 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2adf4336 1943 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1944
1945 # Generated bytes.
1946 @property
1947 def data(self):
1948 return self._data
1949
1950 # Updated variables.
1951 @property
1952 def variables(self):
2adf4336 1953 return self._final_state.variables
71aaa3f7
PP
1954
1955 # Updated main group labels.
1956 @property
1957 def labels(self):
2adf4336 1958 return self._final_state.labels
71aaa3f7
PP
1959
1960 # Updated offset.
1961 @property
1962 def offset(self):
2adf4336 1963 return self._final_state.offset
71aaa3f7
PP
1964
1965 # Updated byte order.
1966 @property
1967 def bo(self):
2adf4336
PP
1968 return self._final_state.bo
1969
320644e2
PP
1970 # Evaluates the expression `expr` of which the original string is
1971 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
1972 # generation state `state`.
1973 #
269f6eb3
PP
1974 # If `allow_float` is `True`, then the type of the result may be
1975 # `float` too.
2adf4336 1976 @staticmethod
320644e2
PP
1977 def _eval_expr(
1978 expr_str: str,
1979 expr: ast.Expression,
1980 text_loc: TextLocation,
269f6eb3 1981 state: _GenState,
269f6eb3
PP
1982 allow_float: bool = False,
1983 ):
e57a18e1
PP
1984 syms = {} # type: VariablesT
1985 syms.update(state.labels)
71aaa3f7 1986
e57a18e1
PP
1987 # Set the `ICITTE` name to the current offset
1988 syms[_icitte_name] = state.offset
71aaa3f7
PP
1989
1990 # Add the current variables
2adf4336 1991 syms.update(state.variables)
71aaa3f7
PP
1992
1993 # Validate the node and its children
320644e2 1994 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
1995
1996 # Compile and evaluate expression node
1997 try:
320644e2 1998 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 1999 except Exception as exc:
320644e2
PP
2000 _raise_error(
2001 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2002 text_loc,
71aaa3f7
PP
2003 )
2004
27d52a19
PP
2005 # Convert `bool` result type to `int` to normalize
2006 if type(val) is bool:
2007 val = int(val)
2008
269f6eb3
PP
2009 # Validate result type
2010 expected_types = {int} # type: Set[type]
2011 type_msg = "`int`"
2012
2013 if allow_float:
2014 expected_types.add(float)
2015 type_msg += " or `float`"
2016
2017 if type(val) not in expected_types:
320644e2 2018 _raise_error(
269f6eb3 2019 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 2020 expr_str, type_msg, type(val).__name__
71aaa3f7 2021 ),
320644e2 2022 text_loc,
71aaa3f7
PP
2023 )
2024
2025 return val
2026
320644e2
PP
2027 # Evaluates the expression of `item` considering the current
2028 # generation state `state`.
2029 #
2030 # If `allow_float` is `True`, then the type of the result may be
2031 # `float` too.
2032 @staticmethod
2033 def _eval_item_expr(
25ca454b 2034 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
320644e2
PP
2035 state: _GenState,
2036 allow_float: bool = False,
2037 ):
2038 return _Gen._eval_expr(
2039 item.expr_str, item.expr, item.text_loc, state, allow_float
2040 )
2041
2042 # Handles the byte item `item`.
2043 def _handle_byte_item(self, item: _Byte, state: _GenState):
2044 self._data.append(item.val)
2045 state.offset += item.size
2046
2047 # Handles the string item `item`.
2048 def _handle_str_item(self, item: _Str, state: _GenState):
2049 self._data += item.data
2050 state.offset += item.size
2051
2052 # Handles the byte order setting item `item`.
2053 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2054 # Update current byte order
2055 state.bo = item.bo
2056
2057 # Handles the variable assignment item `item`.
2058 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2059 # Update variable
2060 state.variables[item.name] = self._eval_item_expr(item, state, True)
2061
2062 # Handles the fixed-length number item `item`.
2063 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2064 # Validate current byte order
2065 if state.bo is None and item.len > 8:
2066 _raise_error_for_item(
2067 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2068 item.expr_str
2069 ),
2070 item,
2071 )
2072
2073 # Try an immediate evaluation. If it fails, then keep everything
2074 # needed to (try to) generate the bytes of this item later.
2075 try:
2076 data = self._gen_fl_num_item_inst_data(item, state)
2077 except Exception:
2078 self._fl_num_item_insts.append(
f5dcb24c
PP
2079 _FlNumItemInst(
2080 item,
2081 len(self._data),
2082 copy.deepcopy(state),
2083 copy.deepcopy(self._parse_error_msgs),
2084 )
320644e2
PP
2085 )
2086
2087 # Reserve space in `self._data` for this instance
2088 data = bytes([0] * (item.len // 8))
2089
2090 # Append bytes
2091 self._data += data
2092
2093 # Update offset
2094 state.offset += len(data)
2095
05f81895
PP
2096 # Returns the size, in bytes, required to encode the value `val`
2097 # with LEB128 (signed version if `is_signed` is `True`).
2098 @staticmethod
2099 def _leb128_size_for_val(val: int, is_signed: bool):
2100 if val < 0:
2101 # Equivalent upper bound.
2102 #
2103 # For example, if `val` is -128, then the full integer for
2104 # this number of bits would be [-128, 127].
2105 val = -val - 1
2106
2107 # Number of bits (add one for the sign if needed)
2108 bits = val.bit_length() + int(is_signed)
2109
2110 if bits == 0:
2111 bits = 1
2112
2113 # Seven bits per byte
2114 return math.ceil(bits / 7)
2115
320644e2
PP
2116 # Handles the LEB128 integer item `item`.
2117 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2118 # Compute value
2119 val = self._eval_item_expr(item, state, False)
676f6189 2120
320644e2
PP
2121 # Size in bytes
2122 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2123
320644e2
PP
2124 # For each byte
2125 for _ in range(size):
2126 # Seven LSBs, MSB of the byte set (continue)
2127 self._data.append((val & 0x7F) | 0x80)
2128 val >>= 7
2adf4336 2129
320644e2
PP
2130 # Clear MSB of last byte (stop)
2131 self._data[-1] &= ~0x80
2adf4336 2132
320644e2
PP
2133 # Update offset
2134 state.offset += size
27d52a19 2135
320644e2
PP
2136 # Handles the group item `item`, removing the immediate labels from
2137 # `state` at the end if `remove_immediate_labels` is `True`.
2138 def _handle_group_item(
2139 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2140 ):
2141 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2142 immediate_labels = {} # type: LabelsT
27d52a19 2143
320644e2
PP
2144 # Handle each item
2145 for subitem in item.items:
2146 if type(subitem) is _Label:
2147 # Add to local immediate labels
2148 immediate_labels[subitem.name] = state.offset
2adf4336 2149
320644e2 2150 self._handle_item(subitem, state)
2adf4336 2151
320644e2
PP
2152 # Remove immediate labels from current state if needed
2153 if remove_immediate_labels:
2154 for name in immediate_labels:
2155 del state.labels[name]
2adf4336 2156
320644e2
PP
2157 # Add all immediate labels to all state snapshots since
2158 # `first_fl_num_item_inst_index`.
2159 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2160 inst.state.labels.update(immediate_labels)
2adf4336 2161
320644e2
PP
2162 # Handles the repetition item `item`.
2163 def _handle_rep_item(self, item: _Rep, state: _GenState):
2164 # Compute the repetition count
2165 mul = _Gen._eval_item_expr(item, state)
05f81895 2166
320644e2
PP
2167 # Validate result
2168 if mul < 0:
2169 _raise_error_for_item(
2170 "Invalid expression `{}`: unexpected negative result {:,}".format(
2171 item.expr_str, mul
2172 ),
2173 item,
2174 )
2adf4336 2175
320644e2
PP
2176 # Generate item data `mul` times
2177 for _ in range(mul):
2178 self._handle_item(item.item, state)
2adf4336 2179
320644e2 2180 # Handles the conditional item `item`.
12b5dbc0 2181 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2182 # Compute the conditional value
2183 val = _Gen._eval_item_expr(item, state)
2adf4336 2184
320644e2
PP
2185 # Generate item data if needed
2186 if val:
12b5dbc0
PP
2187 self._handle_item(item.true_item, state)
2188 else:
2189 self._handle_item(item.false_item, state)
2adf4336 2190
320644e2
PP
2191 # Evaluates the parameters of the macro expansion item `item`
2192 # considering the initial state `init_state` and returns a new state
2193 # to handle the items of the macro.
2194 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2195 # New state
2196 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2197
320644e2
PP
2198 # Evaluate the parameter expressions
2199 macro_def = self._macro_defs[item.name]
2adf4336 2200
320644e2
PP
2201 for param_name, param in zip(macro_def.param_names, item.params):
2202 exp_state.variables[param_name] = _Gen._eval_expr(
2203 param.expr_str, param.expr, param.text_loc, init_state, True
2204 )
2adf4336 2205
320644e2 2206 return exp_state
2adf4336 2207
320644e2
PP
2208 # Handles the macro expansion item `item`.
2209 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
f5dcb24c 2210 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
27d52a19 2211
f5dcb24c
PP
2212 try:
2213 # New state
2214 exp_state = self._eval_macro_exp_params(item, state)
2215
2216 # Process the contained group
2217 init_data_size = len(self._data)
2218 parse_error_msg = (
2219 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2220 parse_error_msg_text, item.text_loc
2221 )
2222 )
2223 self._parse_error_msgs.append(parse_error_msg)
2224 self._handle_item(self._macro_defs[item.name].group, exp_state)
2225 self._parse_error_msgs.pop()
2226 except ParseError as exc:
2227 _augment_error(exc, parse_error_msg_text, item.text_loc)
27d52a19 2228
320644e2
PP
2229 # Update state offset and return
2230 state.offset += len(self._data) - init_data_size
676f6189 2231
320644e2
PP
2232 # Handles the offset setting item `item`.
2233 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2234 state.offset = item.val
2adf4336 2235
25ca454b 2236 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2237 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2238 init_offset = state.offset
2239 align_bytes = item.val // 8
2240 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2241 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2242
25ca454b
PP
2243 # Handles the filling item `item` (adds padding).
2244 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2245 # Compute the new offset
2246 new_offset = _Gen._eval_item_expr(item, state)
2247
2248 # Validate the new offset
2249 if new_offset < state.offset:
2250 _raise_error_for_item(
2251 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2252 item.expr_str, new_offset, state.offset
2253 ),
2254 item,
2255 )
2256
2257 # Fill
2258 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2259
2260 # Update offset
2261 state.offset = new_offset
2262
320644e2
PP
2263 # Handles the label item `item`.
2264 def _handle_label_item(self, item: _Label, state: _GenState):
2265 state.labels[item.name] = state.offset
2adf4336 2266
320644e2
PP
2267 # Handles the item `item`, returning the updated next repetition
2268 # instance.
2269 def _handle_item(self, item: _Item, state: _GenState):
2270 return self._item_handlers[type(item)](item, state)
71aaa3f7 2271
320644e2
PP
2272 # Generates the data for a fixed-length integer item instance having
2273 # the value `val` and returns it.
2274 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2275 # Validate range
2276 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2277 _raise_error_for_item(
320644e2
PP
2278 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2279 val, item.len, item.expr_str
71aaa3f7
PP
2280 ),
2281 item,
2282 )
2283
2284 # Encode result on 64 bits (to extend the sign bit whatever the
2285 # value of `item.len`).
71aaa3f7
PP
2286 data = struct.pack(
2287 "{}{}".format(
2adf4336 2288 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2289 "Q" if val >= 0 else "q",
2290 ),
2291 val,
2292 )
2293
2294 # Keep only the requested length
2295 len_bytes = item.len // 8
2296
2adf4336 2297 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2298 # Big endian: keep last bytes
2299 data = data[-len_bytes:]
2300 else:
2301 # Little endian: keep first bytes
2adf4336 2302 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2303 data = data[:len_bytes]
2304
320644e2
PP
2305 # Return data
2306 return data
269f6eb3 2307
320644e2
PP
2308 # Generates the data for a fixed-length floating point number item
2309 # instance having the value `val` and returns it.
2310 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2311 # Validate length
2312 if item.len not in (32, 64):
2313 _raise_error_for_item(
2314 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2315 item.len, val
2316 ),
2317 item,
2318 )
2319
320644e2
PP
2320 # Encode and return result
2321 return struct.pack(
269f6eb3
PP
2322 "{}{}".format(
2323 ">" if state.bo in (None, ByteOrder.BE) else "<",
2324 "f" if item.len == 32 else "d",
2325 ),
2326 val,
2327 )
2328
320644e2
PP
2329 # Generates the data for a fixed-length number item instance and
2330 # returns it.
2331 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2332 # Compute value
e57a18e1 2333 val = self._eval_item_expr(item, state, True)
269f6eb3 2334
269f6eb3
PP
2335 # Handle depending on type
2336 if type(val) is int:
320644e2 2337 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2338 else:
2339 assert type(val) is float
320644e2 2340 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2341
320644e2
PP
2342 # Generates the data for all the fixed-length number item instances
2343 # and writes it at the correct offset within `self._data`.
2344 def _gen_fl_num_item_insts(self):
2345 for inst in self._fl_num_item_insts:
2346 # Generate bytes
f5dcb24c
PP
2347 try:
2348 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2349 except ParseError as exc:
2350 # Add all the saved parse error messages for this
2351 # instance.
2352 for msg in reversed(inst.parse_error_msgs):
2353 _add_error_msg(exc, msg.text, msg.text_location)
2354
2355 raise
05f81895 2356
320644e2
PP
2357 # Insert bytes into `self._data`
2358 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2359
2360 # Generates the data (`self._data`) and final state
2361 # (`self._final_state`) from `group` and the initial state `state`.
2362 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2363 # Initial state
2364 self._data = bytearray()
71aaa3f7
PP
2365
2366 # Item handlers
2367 self._item_handlers = {
676f6189 2368 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2369 _Byte: self._handle_byte_item,
27d52a19 2370 _Cond: self._handle_cond_item,
25ca454b 2371 _FillUntil: self._handle_fill_until_item,
269f6eb3 2372 _FlNum: self._handle_fl_num_item,
71aaa3f7 2373 _Group: self._handle_group_item,
2adf4336 2374 _Label: self._handle_label_item,
320644e2 2375 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2376 _Rep: self._handle_rep_item,
2adf4336
PP
2377 _SetBo: self._handle_set_bo_item,
2378 _SetOffset: self._handle_set_offset_item,
05f81895 2379 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2380 _Str: self._handle_str_item,
05f81895 2381 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2382 _VarAssign: self._handle_var_assign_item,
320644e2 2383 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2384
2385 # Handle the group item, _not_ removing the immediate labels
2386 # because the `labels` property offers them.
320644e2 2387 self._handle_group_item(group, state, False)
2adf4336
PP
2388
2389 # This is actually the final state
2390 self._final_state = state
71aaa3f7 2391
320644e2
PP
2392 # Generate all the fixed-length number bytes now that we know
2393 # their full state
2394 self._gen_fl_num_item_insts()
2395
71aaa3f7
PP
2396
2397# Returns a `ParseResult` instance containing the bytes encoded by the
2398# input string `normand`.
2399#
2400# `init_variables` is a dictionary of initial variable names (valid
2401# Python names) to integral values. A variable name must not be the
2402# reserved name `ICITTE`.
2403#
2404# `init_labels` is a dictionary of initial label names (valid Python
2405# names) to integral values. A label name must not be the reserved name
2406# `ICITTE`.
2407#
2408# `init_offset` is the initial offset.
2409#
2410# `init_byte_order` is the initial byte order.
2411#
2412# Raises `ParseError` on any parsing error.
2413def parse(
2414 normand: str,
e57a18e1
PP
2415 init_variables: Optional[VariablesT] = None,
2416 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2417 init_offset: int = 0,
2418 init_byte_order: Optional[ByteOrder] = None,
2419):
2420 if init_variables is None:
2421 init_variables = {}
2422
2423 if init_labels is None:
2424 init_labels = {}
2425
320644e2 2426 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2427 gen = _Gen(
320644e2
PP
2428 parser.res,
2429 parser.macro_defs,
71aaa3f7
PP
2430 init_variables,
2431 init_labels,
2432 init_offset,
2433 init_byte_order,
2434 )
2435 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2436 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2437 )
2438
2439
f5dcb24c
PP
2440# Raises a command-line error with the message `msg`.
2441def _raise_cli_error(msg: str) -> NoReturn:
2442 raise RuntimeError("Command-line error: {}".format(msg))
2443
2444
2445# Returns a dictionary of string to integers from the list of strings
2446# `args` containing `NAME=VAL` entries.
2447def _dict_from_arg(args: Optional[List[str]]):
2448 d = {} # type: LabelsT
2449
2450 if args is None:
2451 return d
2452
2453 for arg in args:
2454 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2455
2456 if m is None:
2457 _raise_cli_error("Invalid assignment {}".format(arg))
2458
2459 d[m.group(1)] = int(m.group(2))
2460
2461 return d
2462
2463
2464# Parses the command-line arguments and returns, in this order:
2465#
2466# 1. The input file path, or `None` if none.
2467# 2. The Normand input text.
2468# 3. The initial offset.
2469# 4. The initial byte order.
2470# 5. The initial variables.
2471# 6. The initial labels.
71aaa3f7
PP
2472def _parse_cli_args():
2473 import argparse
2474
2475 # Build parser
2476 ap = argparse.ArgumentParser()
2477 ap.add_argument(
2478 "--offset",
2479 metavar="OFFSET",
2480 action="store",
2481 type=int,
2482 default=0,
2483 help="initial offset (positive)",
2484 )
2485 ap.add_argument(
2486 "-b",
2487 "--byte-order",
2488 metavar="BO",
2489 choices=["be", "le"],
2490 type=str,
2491 help="initial byte order (`be` or `le`)",
2492 )
2493 ap.add_argument(
2494 "--var",
2495 metavar="NAME=VAL",
2496 action="append",
2497 help="add an initial variable (may be repeated)",
2498 )
2499 ap.add_argument(
2500 "-l",
2501 "--label",
2502 metavar="NAME=VAL",
2503 action="append",
2504 help="add an initial label (may be repeated)",
2505 )
2506 ap.add_argument(
2507 "--version", action="version", version="Normand {}".format(__version__)
2508 )
2509 ap.add_argument(
2510 "path",
2511 metavar="PATH",
2512 action="store",
2513 nargs="?",
2514 help="input path (none means standard input)",
2515 )
2516
2517 # Parse
f5dcb24c 2518 args = ap.parse_args()
71aaa3f7
PP
2519
2520 # Read input
2521 if args.path is None:
2522 normand = sys.stdin.read()
2523 else:
2524 with open(args.path) as f:
2525 normand = f.read()
2526
2527 # Variables and labels
e57a18e1 2528 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2529 labels = _dict_from_arg(args.label)
2530
2531 # Validate offset
2532 if args.offset < 0:
2533 _raise_cli_error("Invalid negative offset {}")
2534
2535 # Validate and set byte order
2536 bo = None # type: Optional[ByteOrder]
2537
2538 if args.byte_order is not None:
2539 if args.byte_order == "be":
2540 bo = ByteOrder.BE
2541 else:
2542 assert args.byte_order == "le"
2543 bo = ByteOrder.LE
2544
f5dcb24c
PP
2545 # Return input and initial state
2546 return args.path, normand, args.offset, bo, variables, labels
71aaa3f7 2547
71aaa3f7 2548
f5dcb24c
PP
2549# CLI entry point without exception handling.
2550def _run_cli_with_args(
2551 normand: str,
2552 offset: int,
2553 bo: Optional[ByteOrder],
2554 variables: VariablesT,
2555 labels: LabelsT,
2556):
2557 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
71aaa3f7
PP
2558
2559
2560# Prints the exception message `msg` and exits with status 1.
2561def _fail(msg: str) -> NoReturn:
2562 if not msg.endswith("."):
2563 msg += "."
2564
f5dcb24c 2565 print(msg.strip(), file=sys.stderr)
71aaa3f7
PP
2566 sys.exit(1)
2567
2568
2569# CLI entry point.
2570def _run_cli():
2571 try:
f5dcb24c
PP
2572 args = _parse_cli_args()
2573 except Exception as exc:
2574 _fail(str(exc))
2575
2576 try:
2577 _run_cli_with_args(*args[1:])
2578 except ParseError as exc:
2579 import os.path
2580
2581 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2582 fail_msg = ""
2583
2584 for msg in reversed(exc.messages):
2585 fail_msg += "{}{}:{} - {}".format(
2586 prefix,
2587 msg.text_location.line_no,
2588 msg.text_location.col_no,
2589 msg.text,
2590 )
2591
2592 if fail_msg[-1] not in ".:;":
2593 fail_msg += "."
2594
2595 fail_msg += "\n"
2596
2597 _fail(fail_msg.strip())
71aaa3f7
PP
2598 except Exception as exc:
2599 _fail(str(exc))
2600
2601
2602if __name__ == "__main__":
2603 _run_cli()
This page took 0.133689 seconds and 4 git commands to generate.