Allow constant floating point numbers as macro expansion parameter
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
dbd84e74 33__version__ = "0.17.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
f5dcb24c 41 "ParseErrorMessage",
71aaa3f7 42 "ParseResult",
e57a18e1 43 "TextLocation",
e57a18e1 44 "VariablesT",
71aaa3f7
PP
45]
46
47import re
48import abc
49import ast
50import sys
320644e2 51import copy
71aaa3f7 52import enum
05f81895 53import math
71aaa3f7 54import struct
e57a18e1
PP
55import typing
56from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
57
58
59# Text location (line and column numbers).
e57a18e1 60class TextLocation:
71aaa3f7
PP
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
2adf4336 84 def __repr__(self):
e57a18e1 85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 86
71aaa3f7
PP
87
88# Any item.
89class _Item:
e57a18e1 90 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
2adf4336
PP
98
99# Scalar item.
100class _ScalarItem(_Item):
71aaa3f7
PP
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108# A repeatable item.
2adf4336 109class _RepableItem:
71aaa3f7
PP
110 pass
111
112
113# Single byte.
2adf4336 114class _Byte(_ScalarItem, _RepableItem):
e57a18e1 115 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
676f6189 129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
130
131
132# String.
2adf4336 133class _Str(_ScalarItem, _RepableItem):
e57a18e1 134 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
676f6189 148 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
149
150
151# Byte order.
152@enum.unique
153class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
2adf4336
PP
161# Byte order setting.
162class _SetBo(_Item):
e57a18e1 163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 164 super().__init__(text_loc)
71aaa3f7
PP
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
2adf4336 171 def __repr__(self):
676f6189 172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
173
174
175# Label.
176class _Label(_Item):
e57a18e1 177 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
71aaa3f7 186 def __repr__(self):
676f6189 187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
188
189
2adf4336
PP
190# Offset setting.
191class _SetOffset(_Item):
e57a18e1 192 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
193 super().__init__(text_loc)
194 self._val = val
195
676f6189 196 # Offset value (bytes).
71aaa3f7
PP
197 @property
198 def val(self):
199 return self._val
200
71aaa3f7 201 def __repr__(self):
676f6189
PP
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205# Offset alignment.
206class _AlignOffset(_Item):
e57a18e1 207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
71aaa3f7
PP
226
227
228# Mixin of containing an AST expression and its string.
229class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
25ca454b
PP
245# Fill until some offset.
246class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
2adf4336
PP
268# Variable assignment.
269class _VarAssign(_Item, _ExprMixin):
71aaa3f7 270 def __init__(
e57a18e1 271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
71aaa3f7 282 def __repr__(self):
2adf4336 283 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
71aaa3f7
PP
288 )
289
290
269f6eb3
PP
291# Fixed-length number, possibly needing more than one byte.
292class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 293 def __init__(
e57a18e1 294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
269f6eb3 310 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
71aaa3f7
PP
315 )
316
317
05f81895
PP
318# LEB128 integer.
319class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
676f6189 329 repr(self._text_loc),
05f81895
PP
330 )
331
332
333# Unsigned LEB128 integer.
334class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338# Signed LEB128 integer.
339class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
71aaa3f7 343# Group of items.
2adf4336 344class _Group(_Item, _RepableItem):
e57a18e1 345 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
346 super().__init__(text_loc)
347 self._items = items
71aaa3f7
PP
348
349 # Contained items.
350 @property
351 def items(self):
352 return self._items
353
71aaa3f7 354 def __repr__(self):
676f6189 355 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
356
357
358# Repetition item.
2adf4336
PP
359class _Rep(_Item, _ExprMixin):
360 def __init__(
e57a18e1 361 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 362 ):
71aaa3f7 363 super().__init__(text_loc)
2adf4336 364 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 365 self._item = item
71aaa3f7
PP
366
367 # Item to repeat.
368 @property
369 def item(self):
370 return self._item
371
71aaa3f7 372 def __repr__(self):
2adf4336 373 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
374 repr(self._item),
375 repr(self._expr_str),
376 repr(self._expr),
377 repr(self._text_loc),
71aaa3f7
PP
378 )
379
380
27d52a19
PP
381# Conditional item.
382class _Cond(_Item, _ExprMixin):
383 def __init__(
12b5dbc0
PP
384 self,
385 true_item: _Item,
386 false_item: _Item,
387 expr_str: str,
388 expr: ast.Expression,
389 text_loc: TextLocation,
27d52a19
PP
390 ):
391 super().__init__(text_loc)
392 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
393 self._true_item = true_item
394 self._false_item = false_item
27d52a19 395
12b5dbc0 396 # Item when condition is true.
27d52a19 397 @property
12b5dbc0
PP
398 def true_item(self):
399 return self._true_item
400
401 # Item when condition is false.
402 @property
403 def false_item(self):
404 return self._false_item
27d52a19
PP
405
406 def __repr__(self):
12b5dbc0
PP
407 return "_Cond({}, {}, {}, {}, {})".format(
408 repr(self._true_item),
409 repr(self._false_item),
27d52a19
PP
410 repr(self._expr_str),
411 repr(self._expr),
412 repr(self._text_loc),
413 )
414
415
320644e2
PP
416# Macro definition item.
417class _MacroDef(_Item):
418 def __init__(
419 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
420 ):
421 super().__init__(text_loc)
422 self._name = name
423 self._param_names = param_names
424 self._group = group
425
426 # Name.
427 @property
428 def name(self):
429 return self._name
430
431 # Parameters.
432 @property
433 def param_names(self):
434 return self._param_names
435
436 # Contained items.
437 @property
438 def group(self):
439 return self._group
440
441 def __repr__(self):
442 return "_MacroDef({}, {}, {}, {})".format(
443 repr(self._name),
444 repr(self._param_names),
445 repr(self._group),
446 repr(self._text_loc),
447 )
448
449
450# Macro expansion parameter.
451class _MacroExpParam:
452 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
453 self._expr_str = expr_str
454 self._expr = expr
455 self._text_loc = text_loc
456
457 # Expression string.
458 @property
459 def expr_str(self):
460 return self._expr_str
461
462 # Expression.
463 @property
464 def expr(self):
465 return self._expr
466
467 # Source text location.
468 @property
469 def text_loc(self):
470 return self._text_loc
471
472 def __repr__(self):
473 return "_MacroExpParam({}, {}, {})".format(
474 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
475 )
476
477
478# Macro expansion item.
479class _MacroExp(_Item, _RepableItem):
480 def __init__(
481 self,
482 name: str,
483 params: List[_MacroExpParam],
484 text_loc: TextLocation,
485 ):
486 super().__init__(text_loc)
487 self._name = name
488 self._params = params
489
490 # Name.
491 @property
492 def name(self):
493 return self._name
494
495 # Parameters.
496 @property
497 def params(self):
498 return self._params
499
500 def __repr__(self):
501 return "_MacroExp({}, {}, {})".format(
502 repr(self._name),
503 repr(self._params),
504 repr(self._text_loc),
505 )
2adf4336
PP
506
507
f5dcb24c
PP
508# A parsing error message: a string and a text location.
509class ParseErrorMessage:
510 @classmethod
511 def _create(cls, text: str, text_loc: TextLocation):
512 self = cls.__new__(cls)
513 self._init(text, text_loc)
514 return self
515
516 def __init__(self, *args, **kwargs): # type: ignore
517 raise NotImplementedError
518
519 def _init(self, text: str, text_loc: TextLocation):
520 self._text = text
521 self._text_loc = text_loc
522
523 # Message text.
524 @property
525 def text(self):
526 return self._text
527
528 # Source text location.
529 @property
530 def text_location(self):
531 return self._text_loc
532
533
534# A parsing error containing one or more messages (`ParseErrorMessage`).
71aaa3f7
PP
535class ParseError(RuntimeError):
536 @classmethod
e57a18e1 537 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
538 self = cls.__new__(cls)
539 self._init(msg, text_loc)
540 return self
541
542 def __init__(self, *args, **kwargs): # type: ignore
543 raise NotImplementedError
544
e57a18e1 545 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7 546 super().__init__(msg)
f5dcb24c
PP
547 self._msgs = [] # type: List[ParseErrorMessage]
548 self._add_msg(msg, text_loc)
71aaa3f7 549
f5dcb24c
PP
550 def _add_msg(self, msg: str, text_loc: TextLocation):
551 self._msgs.append(
552 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
553 msg, text_loc
554 )
555 )
556
557 # Parsing error messages.
558 #
559 # The first message is the most specific one.
71aaa3f7 560 @property
f5dcb24c
PP
561 def messages(self):
562 return self._msgs
71aaa3f7
PP
563
564
565# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 566def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
567 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
568
569
f5dcb24c
PP
570# Adds a message to the parsing error `exc`.
571def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
572 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
573
574
575# Appends a message to the parsing error `exc` and reraises it.
576def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
577 _add_error_msg(exc, msg, text_loc)
578 raise exc
579
580
e57a18e1
PP
581# Variables dictionary type (for type hints).
582VariablesT = Dict[str, Union[int, float]]
583
584
585# Labels dictionary type (for type hints).
586LabelsT = Dict[str, int]
71aaa3f7
PP
587
588
589# Python name pattern.
590_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
591
592
320644e2
PP
593# Macro definition dictionary.
594_MacroDefsT = Dict[str, _MacroDef]
595
596
71aaa3f7
PP
597# Normand parser.
598#
599# The constructor accepts a Normand input. After building, use the `res`
600# property to get the resulting main group.
601class _Parser:
602 # Builds a parser to parse the Normand input `normand`, parsing
603 # immediately.
e57a18e1 604 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
605 self._normand = normand
606 self._at = 0
607 self._line_no = 1
608 self._col_no = 1
609 self._label_names = set(labels.keys())
610 self._var_names = set(variables.keys())
320644e2 611 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
612 self._parse()
613
614 # Result (main group).
615 @property
616 def res(self):
617 return self._res
618
320644e2
PP
619 # Macro definitions.
620 @property
621 def macro_defs(self):
622 return self._macro_defs
623
71aaa3f7
PP
624 # Current text location.
625 @property
626 def _text_loc(self):
e57a18e1 627 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
628 self._line_no, self._col_no
629 )
630
631 # Returns `True` if this parser is done parsing.
632 def _is_done(self):
633 return self._at == len(self._normand)
634
635 # Returns `True` if this parser isn't done parsing.
636 def _isnt_done(self):
637 return not self._is_done()
638
639 # Raises a parse error, creating it using the message `msg` and the
640 # current text location.
641 def _raise_error(self, msg: str) -> NoReturn:
642 _raise_error(msg, self._text_loc)
643
644 # Tries to make the pattern `pat` match the current substring,
645 # returning the match object and updating `self._at`,
646 # `self._line_no`, and `self._col_no` on success.
647 def _try_parse_pat(self, pat: Pattern[str]):
648 m = pat.match(self._normand, self._at)
649
650 if m is None:
651 return
652
653 # Skip matched string
654 self._at += len(m.group(0))
655
656 # Update line number
657 self._line_no += m.group(0).count("\n")
658
659 # Update column number
660 for i in reversed(range(self._at)):
661 if self._normand[i] == "\n" or i == 0:
662 if i == 0:
663 self._col_no = self._at + 1
664 else:
665 self._col_no = self._at - i
666
667 break
668
669 # Return match object
670 return m
671
672 # Expects the pattern `pat` to match the current substring,
673 # returning the match object and updating `self._at`,
674 # `self._line_no`, and `self._col_no` on success, or raising a parse
675 # error with the message `error_msg` on error.
676 def _expect_pat(self, pat: Pattern[str], error_msg: str):
677 # Match
678 m = self._try_parse_pat(pat)
679
680 if m is None:
681 # No match: error
682 self._raise_error(error_msg)
683
684 # Return match object
685 return m
686
687 # Pattern for _skip_ws_and_comments()
688 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 689 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
690 )
691
692 # Skips as many whitespaces, insignificant symbol characters, and
693 # comments as possible.
694 def _skip_ws_and_comments(self):
695 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
696
320644e2
PP
697 # Pattern for _skip_ws()
698 _ws_pat = re.compile(r"\s*")
699
700 # Skips as many whitespaces as possible.
701 def _skip_ws(self):
702 self._try_parse_pat(self._ws_pat)
703
71aaa3f7
PP
704 # Pattern for _try_parse_hex_byte()
705 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
706
707 # Tries to parse a hexadecimal byte, returning a byte item on
708 # success.
709 def _try_parse_hex_byte(self):
0e8e3169
PP
710 begin_text_loc = self._text_loc
711
71aaa3f7
PP
712 # Match initial nibble
713 m_high = self._try_parse_pat(self._nibble_pat)
714
715 if m_high is None:
716 # No match
717 return
718
719 # Expect another nibble
720 self._skip_ws_and_comments()
721 m_low = self._expect_pat(
722 self._nibble_pat, "Expecting another hexadecimal nibble"
723 )
724
725 # Return item
0e8e3169 726 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
727
728 # Patterns for _try_parse_bin_byte()
729 _bin_byte_bit_pat = re.compile(r"[01]")
6dd69a2a 730 _bin_byte_prefix_pat = re.compile(r"%+")
71aaa3f7
PP
731
732 # Tries to parse a binary byte, returning a byte item on success.
733 def _try_parse_bin_byte(self):
0e8e3169
PP
734 begin_text_loc = self._text_loc
735
71aaa3f7 736 # Match prefix
6dd69a2a
PP
737 m = self._try_parse_pat(self._bin_byte_prefix_pat)
738
739 if m is None:
71aaa3f7
PP
740 # No match
741 return
742
6dd69a2a
PP
743 # Expect as many bytes as there are `%` prefixes
744 items = [] # type: List[_Item]
71aaa3f7 745
6dd69a2a 746 for _ in range(len(m.group(0))):
71aaa3f7 747 self._skip_ws_and_comments()
6dd69a2a
PP
748 byte_text_loc = self._text_loc
749 bits = [] # type: List[str]
750
751 # Expect eight bits
752 for _ in range(8):
753 self._skip_ws_and_comments()
754 m = self._expect_pat(
755 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
756 )
757 bits.append(m.group(0))
758
759 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
71aaa3f7
PP
760
761 # Return item
6dd69a2a
PP
762 if len(items) == 1:
763 return items[0]
764
765 # As group
766 return _Group(items, begin_text_loc)
71aaa3f7
PP
767
768 # Patterns for _try_parse_dec_byte()
320644e2 769 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
770 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
771
772 # Tries to parse a decimal byte, returning a byte item on success.
773 def _try_parse_dec_byte(self):
0e8e3169
PP
774 begin_text_loc = self._text_loc
775
71aaa3f7
PP
776 # Match prefix
777 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
778 # No match
779 return
780
781 # Expect the value
320644e2 782 self._skip_ws()
71aaa3f7
PP
783 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
784
785 # Compute value
786 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
787
788 # Validate
789 if val < -128 or val > 255:
0e8e3169 790 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
791
792 # Two's complement
05f81895 793 val %= 256
71aaa3f7
PP
794
795 # Return item
0e8e3169 796 return _Byte(val, begin_text_loc)
71aaa3f7
PP
797
798 # Tries to parse a byte, returning a byte item on success.
799 def _try_parse_byte(self):
800 # Hexadecimal
801 item = self._try_parse_hex_byte()
802
803 if item is not None:
804 return item
805
806 # Binary
807 item = self._try_parse_bin_byte()
808
809 if item is not None:
810 return item
811
812 # Decimal
813 item = self._try_parse_dec_byte()
814
815 if item is not None:
816 return item
817
818 # Patterns for _try_parse_str()
819 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
820 _str_suffix_pat = re.compile(r'"')
821 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
822
823 # Strings corresponding to escape sequence characters
824 _str_escape_seq_strs = {
825 "0": "\0",
826 "a": "\a",
827 "b": "\b",
828 "e": "\x1b",
829 "f": "\f",
830 "n": "\n",
831 "r": "\r",
832 "t": "\t",
833 "v": "\v",
834 "\\": "\\",
835 '"': '"',
836 }
837
838 # Tries to parse a string, returning a string item on success.
839 def _try_parse_str(self):
0e8e3169
PP
840 begin_text_loc = self._text_loc
841
71aaa3f7
PP
842 # Match prefix
843 m = self._try_parse_pat(self._str_prefix_pat)
844
845 if m is None:
846 # No match
847 return
848
849 # Get encoding
850 encoding = "utf8"
851
852 if m.group("len") is not None:
853 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
854
855 # Actual string
856 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
857
858 # Expect end of string
859 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
860
861 # Replace escape sequences
862 val = m.group(0)
863
864 for ec in '0abefnrtv"\\':
865 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
866
867 # Encode
868 data = val.encode(encoding)
869
870 # Return item
0e8e3169 871 return _Str(data, begin_text_loc)
71aaa3f7 872
320644e2
PP
873 # Common right parenthesis pattern
874 _right_paren_pat = re.compile(r"\)")
875
71aaa3f7 876 # Patterns for _try_parse_group()
320644e2 877 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
878
879 # Tries to parse a group, returning a group item on success.
880 def _try_parse_group(self):
0e8e3169
PP
881 begin_text_loc = self._text_loc
882
71aaa3f7 883 # Match prefix
261c5ecf
PP
884 m_open = self._try_parse_pat(self._group_prefix_pat)
885
886 if m_open is None:
71aaa3f7
PP
887 # No match
888 return
889
890 # Parse items
891 items = self._parse_items()
892
893 # Expect end of group
894 self._skip_ws_and_comments()
261c5ecf
PP
895
896 if m_open.group(0) == "(":
320644e2 897 pat = self._right_paren_pat
261c5ecf
PP
898 exp = ")"
899 else:
900 pat = self._block_end_pat
901 exp = "!end"
902
903 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
904
905 # Return item
0e8e3169 906 return _Group(items, begin_text_loc)
71aaa3f7
PP
907
908 # Returns a stripped expression string and an AST expression node
909 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 910 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
911 # Create an expression node from the expression string
912 expr_str = expr_str.strip().replace("\n", " ")
913
914 try:
915 expr = ast.parse(expr_str, mode="eval")
916 except SyntaxError:
917 _raise_error(
918 "Invalid expression `{}`: invalid syntax".format(expr_str),
919 text_loc,
920 )
921
922 return expr_str, expr
923
269f6eb3 924 # Patterns for _try_parse_num_and_attr()
05f81895 925 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 926 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 927 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 928
05f81895
PP
929 # Tries to parse a value and attribute (fixed length in bits or
930 # `leb128`), returning a value item on success.
269f6eb3 931 def _try_parse_num_and_attr(self):
71aaa3f7
PP
932 begin_text_loc = self._text_loc
933
934 # Match
935 m_expr = self._try_parse_pat(self._val_expr_pat)
936
937 if m_expr is None:
938 # No match
939 return
940
71aaa3f7
PP
941 # Create an expression node from the expression string
942 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
943
05f81895 944 # Length?
269f6eb3 945 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
946
947 if m_attr is None:
948 # LEB128?
949 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
950
951 if m_attr is None:
952 # At this point it's invalid
953 self._raise_error(
954 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
955 )
956
957 # Return LEB128 integer item
958 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
959 return cls(expr_str, expr, begin_text_loc)
960 else:
269f6eb3
PP
961 # Return fixed-length number item
962 return _FlNum(
05f81895
PP
963 expr_str,
964 expr,
965 int(m_attr.group(0)),
966 begin_text_loc,
967 )
71aaa3f7 968
320644e2
PP
969 # Patterns for _try_parse_var_assign()
970 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
971 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 972
2adf4336
PP
973 # Tries to parse a variable assignment, returning a variable
974 # assignment item on success.
975 def _try_parse_var_assign(self):
71aaa3f7
PP
976 begin_text_loc = self._text_loc
977
978 # Match
320644e2 979 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
980
981 if m is None:
982 # No match
983 return
984
985 # Validate name
320644e2 986 name = m.group(1)
71aaa3f7
PP
987
988 if name == _icitte_name:
0e8e3169
PP
989 _raise_error(
990 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
991 )
71aaa3f7
PP
992
993 if name in self._label_names:
0e8e3169 994 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 995
320644e2
PP
996 # Expect an expression
997 self._skip_ws()
998 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
999
1000 # Create an expression node from the expression string
320644e2
PP
1001 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1002
1003 # Add to known variable names
1004 self._var_names.add(name)
71aaa3f7
PP
1005
1006 # Return item
2adf4336 1007 return _VarAssign(
71aaa3f7
PP
1008 name,
1009 expr_str,
1010 expr,
0e8e3169 1011 begin_text_loc,
71aaa3f7
PP
1012 )
1013
2adf4336 1014 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
1015 _bo_pat = re.compile(r"[bl]e")
1016
2adf4336
PP
1017 # Tries to parse a byte order name, returning a byte order setting
1018 # item on success.
1019 def _try_parse_set_bo(self):
0e8e3169
PP
1020 begin_text_loc = self._text_loc
1021
71aaa3f7
PP
1022 # Match
1023 m = self._try_parse_pat(self._bo_pat)
1024
1025 if m is None:
1026 # No match
1027 return
1028
1029 # Return corresponding item
1030 if m.group(0) == "be":
2adf4336 1031 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
1032 else:
1033 assert m.group(0) == "le"
2adf4336 1034 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
1035
1036 # Patterns for _try_parse_val_or_bo()
320644e2
PP
1037 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1038 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 1039
2adf4336
PP
1040 # Tries to parse a value, a variable assignment, or a byte order
1041 # setting, returning an item on success.
1042 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 1043 # Match prefix
2adf4336 1044 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
1045 # No match
1046 return
1047
320644e2
PP
1048 self._skip_ws()
1049
2adf4336
PP
1050 # Variable assignment item?
1051 item = self._try_parse_var_assign()
71aaa3f7
PP
1052
1053 if item is None:
269f6eb3
PP
1054 # Number item?
1055 item = self._try_parse_num_and_attr()
71aaa3f7
PP
1056
1057 if item is None:
2adf4336
PP
1058 # Byte order setting item?
1059 item = self._try_parse_set_bo()
71aaa3f7
PP
1060
1061 if item is None:
1062 # At this point it's invalid
2adf4336 1063 self._raise_error(
269f6eb3 1064 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 1065 )
71aaa3f7
PP
1066
1067 # Expect suffix
320644e2 1068 self._skip_ws()
2adf4336 1069 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1070 return item
1071
fc21bb27
PP
1072 # Returns a normalized version (so as to be parseable by int()) of
1073 # the constant integer string `s`, possibly negative, dealing with
1074 # any radix suffix.
1075 @staticmethod
1076 def _norm_const_int(s: str):
1077 neg = ""
1078 pos = s
1079
1080 if s.startswith("-"):
1081 neg = "-"
1082 pos = s[1:]
1083
1084 for r in "xXoObB":
1085 if pos.startswith("0" + r):
1086 # Already correct
1087 return s
1088
1089 # Try suffix
1090 asm_suf_base = {
1091 "h": "x",
1092 "H": "x",
1093 "q": "o",
1094 "Q": "o",
1095 "o": "o",
1096 "O": "o",
1097 "b": "b",
1098 "B": "B",
1099 }
1100
1101 for suf in asm_suf_base:
1102 if pos[-1] == suf:
1103 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
1104
1105 return s
1106
320644e2 1107 # Common constant integer patterns
fc21bb27 1108 _pos_const_int_pat = re.compile(
dbd84e74 1109 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
fc21bb27 1110 )
320644e2 1111 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
71aaa3f7 1112
2adf4336
PP
1113 # Tries to parse an offset setting value (after the initial `<`),
1114 # returning an offset item on success.
1115 def _try_parse_set_offset_val(self):
0e8e3169
PP
1116 begin_text_loc = self._text_loc
1117
71aaa3f7
PP
1118 # Match
1119 m = self._try_parse_pat(self._pos_const_int_pat)
1120
1121 if m is None:
1122 # No match
1123 return
1124
1125 # Return item
fc21bb27 1126 return _SetOffset(int(self._norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1127
1128 # Tries to parse a label name (after the initial `<`), returning a
1129 # label item on success.
1130 def _try_parse_label_name(self):
0e8e3169
PP
1131 begin_text_loc = self._text_loc
1132
71aaa3f7
PP
1133 # Match
1134 m = self._try_parse_pat(_py_name_pat)
1135
1136 if m is None:
1137 # No match
1138 return
1139
1140 # Validate
1141 name = m.group(0)
1142
1143 if name == _icitte_name:
0e8e3169
PP
1144 _raise_error(
1145 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1146 )
71aaa3f7
PP
1147
1148 if name in self._label_names:
0e8e3169 1149 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1150
1151 if name in self._var_names:
0e8e3169 1152 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1153
1154 # Add to known label names
1155 self._label_names.add(name)
1156
1157 # Return item
0e8e3169 1158 return _Label(name, begin_text_loc)
71aaa3f7 1159
2adf4336 1160 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1161 _label_set_offset_prefix_pat = re.compile(r"<")
1162 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1163
2adf4336
PP
1164 # Tries to parse a label or an offset setting, returning an item on
1165 # success.
1166 def _try_parse_label_or_set_offset(self):
71aaa3f7 1167 # Match prefix
2adf4336 1168 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1169 # No match
1170 return
1171
2adf4336 1172 # Offset setting item?
320644e2 1173 self._skip_ws()
2adf4336 1174 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1175
1176 if item is None:
1177 # Label item?
1178 item = self._try_parse_label_name()
1179
1180 if item is None:
1181 # At this point it's invalid
2adf4336 1182 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1183
1184 # Expect suffix
320644e2 1185 self._skip_ws()
2adf4336 1186 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1187 return item
1188
25ca454b
PP
1189 # Pattern for _parse_pad_val()
1190 _pad_val_prefix_pat = re.compile(r"~")
1191
1192 # Tries to parse a padding value, returning the padding value, or 0
1193 # if none.
1194 def _parse_pad_val(self):
1195 # Padding value?
1196 self._skip_ws()
1197 pad_val = 0
1198
1199 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1200 self._skip_ws()
1201 pad_val_text_loc = self._text_loc
1202 m = self._expect_pat(
1203 self._pos_const_int_pat,
1204 "Expecting a positive constant integer (byte value)",
1205 )
1206
1207 # Validate
fc21bb27 1208 pad_val = int(self._norm_const_int(m.group(0)), 0)
25ca454b
PP
1209
1210 if pad_val > 255:
1211 _raise_error(
1212 "Invalid padding byte value {}".format(pad_val),
1213 pad_val_text_loc,
1214 )
1215
1216 return pad_val
1217
676f6189 1218 # Patterns for _try_parse_align_offset()
320644e2
PP
1219 _align_offset_prefix_pat = re.compile(r"@")
1220 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1221
1222 # Tries to parse an offset alignment, returning an offset alignment
1223 # item on success.
1224 def _try_parse_align_offset(self):
1225 begin_text_loc = self._text_loc
1226
1227 # Match prefix
1228 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1229 # No match
1230 return
1231
320644e2 1232 # Expect an alignment
25ca454b 1233 self._skip_ws()
676f6189
PP
1234 align_text_loc = self._text_loc
1235 m = self._expect_pat(
1236 self._align_offset_val_pat,
1237 "Expecting an alignment (positive multiple of eight bits)",
1238 )
1239
1240 # Validate alignment
320644e2 1241 val = int(m.group(0))
676f6189
PP
1242
1243 if val <= 0 or (val % 8) != 0:
1244 _raise_error(
1245 "Invalid alignment value {} (not a positive multiple of eight)".format(
1246 val
1247 ),
1248 align_text_loc,
1249 )
1250
25ca454b
PP
1251 # Padding value
1252 pad_val = self._parse_pad_val()
676f6189 1253
25ca454b
PP
1254 # Return item
1255 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1256
dbd84e74
PP
1257 # Patterns for _expect_expr()
1258 _inner_expr_prefix_pat = re.compile(r"\{")
1259 _inner_expr_pat = re.compile(r"[^}]+")
1260 _inner_expr_suffix_pat = re.compile(r"\}")
1261 _const_float_pat = re.compile(
1262 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.?))(?:[Ee][+-]?\d+)?\b"
1263 )
1264
1265 # Parses an expression outside a `{`/`}` context.
1266 #
1267 # This function accepts:
1268 #
1269 # • A Python expression within `{` and `}`.
1270 #
1271 # • A Python name.
1272 #
1273 # • If `accept_const_int` is `True`: a constant integer, which may
1274 # be negative if `allow_neg_int` is `True`.
1275 #
1276 # • If `accept_float` is `True`: a constant floating point number.
1277 #
1278 # Returns the stripped expression string and AST expression.
1279 def _expect_expr(
1280 self,
1281 accept_const_int: bool = False,
1282 allow_neg_int: bool = False,
1283 accept_const_float: bool = False,
1284 ):
1285 begin_text_loc = self._text_loc
1286
1287 # Constant floating point number?
1288 m = None
1289
1290 if accept_const_float:
1291 m = self._try_parse_pat(self._const_float_pat)
1292
1293 if m is not None:
1294 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1295
1296 # Constant integer?
1297 m = None
1298
1299 if accept_const_int:
1300 m = self._try_parse_pat(self._const_int_pat)
1301
1302 if m is not None:
1303 # Negative and allowed?
1304 if m.group("neg") == "-" and not allow_neg_int:
1305 _raise_error(
1306 "Expecting a positive constant integer", begin_text_loc
1307 )
1308
1309 expr_str = self._norm_const_int(m.group(0))
1310 return self._ast_expr_from_str(expr_str, begin_text_loc)
1311
1312 # Name?
1313 m = self._try_parse_pat(_py_name_pat)
1314
1315 if m is not None:
1316 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1317
1318 # Expect `{`
1319 msg_accepted_parts = ["a name", "or `{`"]
1320
1321 if accept_const_float:
1322 msg_accepted_parts.insert(0, "a constant floating point number")
1323
1324 if accept_const_int:
1325 msg_pos = "" if allow_neg_int else "positive "
1326 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1327
1328 if len(msg_accepted_parts) == 2:
1329 msg_accepted = " ".join(msg_accepted_parts)
1330 else:
1331 msg_accepted = ", ".join(msg_accepted_parts)
1332
1333 self._expect_pat(
1334 self._inner_expr_prefix_pat,
1335 "Expecting {}".format(msg_accepted),
1336 )
1337
1338 # Expect an expression
1339 self._skip_ws()
1340 expr_text_loc = self._text_loc
1341 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1342 expr_str = m.group(0)
1343
1344 # Expect `}`
1345 self._skip_ws()
1346 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1347
1348 return self._ast_expr_from_str(expr_str, expr_text_loc)
1349
25ca454b
PP
1350 # Patterns for _try_parse_fill_until()
1351 _fill_until_prefix_pat = re.compile(r"\+")
1352 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1353
25ca454b
PP
1354 # Tries to parse a filling, returning a filling item on success.
1355 def _try_parse_fill_until(self):
1356 begin_text_loc = self._text_loc
1357
1358 # Match prefix
1359 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1360 # No match
1361 return
1362
1363 # Expect expression
1364 self._skip_ws()
dbd84e74 1365 expr_str, expr = self._expect_expr(accept_const_int=True)
25ca454b
PP
1366
1367 # Padding value
1368 pad_val = self._parse_pad_val()
676f6189
PP
1369
1370 # Return item
25ca454b 1371 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1372
27d52a19
PP
1373 # Parses the multiplier expression of a repetition (block or
1374 # post-item) and returns the expression string and AST node.
1375 def _expect_rep_mul_expr(self):
dbd84e74 1376 return self._expect_expr(accept_const_int=True)
27d52a19
PP
1377
1378 # Common block end pattern
320644e2 1379 _block_end_pat = re.compile(r"!end\b")
27d52a19 1380
e57a18e1 1381 # Pattern for _try_parse_rep_block()
320644e2 1382 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1383
1384 # Tries to parse a repetition block, returning a repetition item on
1385 # success.
1386 def _try_parse_rep_block(self):
1387 begin_text_loc = self._text_loc
1388
1389 # Match prefix
1390 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1391 # No match
1392 return
1393
1394 # Expect expression
1395 self._skip_ws_and_comments()
1396 expr_str, expr = self._expect_rep_mul_expr()
1397
1398 # Parse items
1399 self._skip_ws_and_comments()
1400 items_text_loc = self._text_loc
1401 items = self._parse_items()
1402
1403 # Expect end of block
1404 self._skip_ws_and_comments()
1405 self._expect_pat(
27d52a19 1406 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1407 )
1408
1409 # Return item
1410 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1411
27d52a19 1412 # Pattern for _try_parse_cond_block()
320644e2 1413 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1414 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1415
1416 # Tries to parse a conditional block, returning a conditional item
1417 # on success.
1418 def _try_parse_cond_block(self):
1419 begin_text_loc = self._text_loc
1420
1421 # Match prefix
1422 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1423 # No match
1424 return
1425
1426 # Expect expression
1427 self._skip_ws_and_comments()
dbd84e74 1428 expr_str, expr = self._expect_expr()
27d52a19 1429
12b5dbc0 1430 # Parse "true" items
27d52a19 1431 self._skip_ws_and_comments()
12b5dbc0
PP
1432 true_items_text_loc = self._text_loc
1433 true_items = self._parse_items()
1434 false_items = [] # type: List[_Item]
1435 false_items_text_loc = begin_text_loc
27d52a19 1436
12b5dbc0 1437 # `!else`?
27d52a19 1438 self._skip_ws_and_comments()
12b5dbc0
PP
1439
1440 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1441 # Parse "false" items
1442 self._skip_ws_and_comments()
1443 false_items_text_loc = self._text_loc
1444 false_items = self._parse_items()
1445
1446 # Expect end of block
27d52a19
PP
1447 self._expect_pat(
1448 self._block_end_pat,
12b5dbc0 1449 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1450 )
1451
1452 # Return item
12b5dbc0
PP
1453 return _Cond(
1454 _Group(true_items, true_items_text_loc),
1455 _Group(false_items, false_items_text_loc),
1456 expr_str,
1457 expr,
1458 begin_text_loc,
1459 )
27d52a19 1460
320644e2
PP
1461 # Common left parenthesis pattern
1462 _left_paren_pat = re.compile(r"\(")
1463
1464 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1465 _macro_params_comma_pat = re.compile(",")
1466
1467 # Patterns for _try_parse_macro_def()
1468 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1469
1470 # Tries to parse a macro definition, adding it to `self._macro_defs`
1471 # and returning `True` on success.
1472 def _try_parse_macro_def(self):
1473 begin_text_loc = self._text_loc
1474
1475 # Match prefix
1476 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1477 # No match
1478 return False
1479
1480 # Expect a name
1481 self._skip_ws()
1482 name_text_loc = self._text_loc
1483 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1484
1485 # Validate name
1486 name = m.group(0)
1487
1488 if name in self._macro_defs:
1489 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1490
1491 # Expect `(`
1492 self._skip_ws()
1493 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1494
1495 # Try to parse comma-separated parameter names
1496 param_names = [] # type: List[str]
1497 expect_comma = False
1498
1499 while True:
1500 self._skip_ws()
1501
1502 # End?
1503 if self._try_parse_pat(self._right_paren_pat) is not None:
1504 # End
1505 break
1506
1507 # Comma?
1508 if expect_comma:
1509 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1510
1511 # Expect parameter name
1512 self._skip_ws()
1513 param_text_loc = self._text_loc
1514 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1515
1516 if m.group(0) in param_names:
1517 _raise_error(
1518 "Duplicate macro parameter named `{}`".format(m.group(0)),
1519 param_text_loc,
1520 )
1521
1522 param_names.append(m.group(0))
1523 expect_comma = True
1524
1525 # Expect items
1526 self._skip_ws_and_comments()
1527 items_text_loc = self._text_loc
1528 old_var_names = self._var_names.copy()
1529 old_label_names = self._label_names.copy()
1530 self._var_names = set() # type: Set[str]
1531 self._label_names = set() # type: Set[str]
1532 items = self._parse_items()
1533 self._var_names = old_var_names
1534 self._label_names = old_label_names
1535
1536 # Expect suffix
1537 self._expect_pat(
1538 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1539 )
1540
1541 # Register macro
1542 self._macro_defs[name] = _MacroDef(
1543 name, param_names, _Group(items, items_text_loc), begin_text_loc
1544 )
1545
1546 return True
1547
1548 # Patterns for _try_parse_macro_exp()
1549 _macro_exp_prefix_pat = re.compile(r"m\b")
1550 _macro_exp_colon_pat = re.compile(r":")
1551
1552 # Tries to parse a macro expansion, returning a macro expansion item
1553 # on success.
1554 def _try_parse_macro_exp(self):
1555 begin_text_loc = self._text_loc
1556
1557 # Match prefix
1558 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1559 # No match
1560 return
1561
1562 # Expect `:`
1563 self._skip_ws()
1564 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1565
1566 # Expect a macro name
1567 self._skip_ws()
1568 name_text_loc = self._text_loc
1569 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1570
1571 # Validate name
1572 name = m.group(0)
1573 macro_def = self._macro_defs.get(name)
1574
1575 if macro_def is None:
1576 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1577
1578 # Expect `(`
1579 self._skip_ws()
1580 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1581
1582 # Try to parse comma-separated parameter values
1583 params_text_loc = self._text_loc
1584 params = [] # type: List[_MacroExpParam]
1585 expect_comma = False
1586
1587 while True:
1588 self._skip_ws()
1589
1590 # End?
1591 if self._try_parse_pat(self._right_paren_pat) is not None:
1592 # End
1593 break
1594
1595 # Expect a Value
1596 if expect_comma:
1597 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1598
1599 self._skip_ws()
1600 param_text_loc = self._text_loc
1601 params.append(
1602 _MacroExpParam(
dbd84e74
PP
1603 *self._expect_expr(
1604 accept_const_int=True,
1605 allow_neg_int=True,
1606 accept_const_float=True,
1607 ),
6dd69a2a 1608 text_loc=param_text_loc
320644e2
PP
1609 )
1610 )
1611 expect_comma = True
1612
1613 # Validate parameter values
1614 if len(params) != len(macro_def.param_names):
1615 sing_plur = "" if len(params) == 1 else "s"
1616 _raise_error(
1617 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1618 len(params), sing_plur, len(macro_def.param_names)
1619 ),
1620 params_text_loc,
1621 )
1622
1623 # Return item
1624 return _MacroExp(name, params, begin_text_loc)
1625
71aaa3f7
PP
1626 # Tries to parse a base item (anything except a repetition),
1627 # returning it on success.
1628 def _try_parse_base_item(self):
1629 # Byte item?
1630 item = self._try_parse_byte()
1631
1632 if item is not None:
1633 return item
1634
1635 # String item?
1636 item = self._try_parse_str()
1637
1638 if item is not None:
1639 return item
1640
2adf4336
PP
1641 # Value, variable assignment, or byte order setting item?
1642 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1643
1644 if item is not None:
1645 return item
1646
2adf4336
PP
1647 # Label or offset setting item?
1648 item = self._try_parse_label_or_set_offset()
71aaa3f7 1649
676f6189
PP
1650 if item is not None:
1651 return item
1652
1653 # Offset alignment item?
1654 item = self._try_parse_align_offset()
1655
25ca454b
PP
1656 if item is not None:
1657 return item
1658
1659 # Filling item?
1660 item = self._try_parse_fill_until()
1661
71aaa3f7
PP
1662 if item is not None:
1663 return item
1664
1665 # Group item?
1666 item = self._try_parse_group()
1667
1668 if item is not None:
1669 return item
1670
320644e2 1671 # Repetition block item?
e57a18e1 1672 item = self._try_parse_rep_block()
71aaa3f7 1673
e57a18e1
PP
1674 if item is not None:
1675 return item
1676
27d52a19
PP
1677 # Conditional block item?
1678 item = self._try_parse_cond_block()
1679
1680 if item is not None:
1681 return item
1682
320644e2
PP
1683 # Macro expansion?
1684 item = self._try_parse_macro_exp()
1685
1686 if item is not None:
1687 return item
1688
e57a18e1
PP
1689 # Pattern for _try_parse_rep_post()
1690 _rep_post_prefix_pat = re.compile(r"\*")
1691
1692 # Tries to parse a post-item repetition, returning the expression
1693 # string and AST expression node on success.
1694 def _try_parse_rep_post(self):
71aaa3f7 1695 # Match prefix
e57a18e1 1696 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1697 # No match
2adf4336 1698 return
71aaa3f7 1699
e57a18e1 1700 # Return expression string and AST expression
71aaa3f7 1701 self._skip_ws_and_comments()
e57a18e1 1702 return self._expect_rep_mul_expr()
71aaa3f7 1703
1ca7b5e1
PP
1704 # Tries to parse an item, possibly followed by a repetition,
1705 # returning `True` on success.
1706 #
1707 # Appends any parsed item to `items`.
1708 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1709 self._skip_ws_and_comments()
1710
320644e2 1711 # Base item
71aaa3f7
PP
1712 item = self._try_parse_base_item()
1713
1714 if item is None:
320644e2 1715 return
71aaa3f7
PP
1716
1717 # Parse repetition if the base item is repeatable
1718 if isinstance(item, _RepableItem):
0e8e3169
PP
1719 self._skip_ws_and_comments()
1720 rep_text_loc = self._text_loc
e57a18e1 1721 rep_ret = self._try_parse_rep_post()
71aaa3f7 1722
2adf4336 1723 if rep_ret is not None:
6dd69a2a 1724 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
71aaa3f7 1725
1ca7b5e1
PP
1726 items.append(item)
1727 return True
71aaa3f7
PP
1728
1729 # Parses and returns items, skipping whitespaces, insignificant
1730 # symbols, and comments when allowed, and stopping at the first
1731 # unknown character.
320644e2
PP
1732 #
1733 # Accepts and registers macro definitions if `accept_macro_defs`
1734 # is `True`.
1735 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1736 items = [] # type: List[_Item]
1737
1738 while self._isnt_done():
1ca7b5e1
PP
1739 # Try to append item
1740 if not self._try_append_item(items):
320644e2
PP
1741 if accept_macro_defs and self._try_parse_macro_def():
1742 continue
1743
1ca7b5e1
PP
1744 # Unknown at this point
1745 break
71aaa3f7
PP
1746
1747 return items
1748
1749 # Parses the whole Normand input, setting `self._res` to the main
1750 # group item on success.
1751 def _parse(self):
1752 if len(self._normand.strip()) == 0:
1753 # Special case to make sure there's something to consume
1754 self._res = _Group([], self._text_loc)
1755 return
1756
1757 # Parse first level items
320644e2 1758 items = self._parse_items(True)
71aaa3f7
PP
1759
1760 # Make sure there's nothing left
1761 self._skip_ws_and_comments()
1762
1763 if self._isnt_done():
1764 self._raise_error(
1765 "Unexpected character `{}`".format(self._normand[self._at])
1766 )
1767
1768 # Set main group item
1769 self._res = _Group(items, self._text_loc)
1770
1771
1772# The return type of parse().
1773class ParseResult:
1774 @classmethod
1775 def _create(
1776 cls,
1777 data: bytearray,
e57a18e1
PP
1778 variables: VariablesT,
1779 labels: LabelsT,
71aaa3f7
PP
1780 offset: int,
1781 bo: Optional[ByteOrder],
1782 ):
1783 self = cls.__new__(cls)
1784 self._init(data, variables, labels, offset, bo)
1785 return self
1786
1787 def __init__(self, *args, **kwargs): # type: ignore
1788 raise NotImplementedError
1789
1790 def _init(
1791 self,
1792 data: bytearray,
e57a18e1
PP
1793 variables: VariablesT,
1794 labels: LabelsT,
71aaa3f7
PP
1795 offset: int,
1796 bo: Optional[ByteOrder],
1797 ):
1798 self._data = data
1799 self._vars = variables
1800 self._labels = labels
1801 self._offset = offset
1802 self._bo = bo
1803
1804 # Generated data.
1805 @property
1806 def data(self):
1807 return self._data
1808
1809 # Dictionary of updated variable names to their last computed value.
1810 @property
1811 def variables(self):
1812 return self._vars
1813
1814 # Dictionary of updated main group label names to their computed
1815 # value.
1816 @property
1817 def labels(self):
1818 return self._labels
1819
1820 # Updated offset.
1821 @property
1822 def offset(self):
1823 return self._offset
1824
1825 # Updated byte order.
1826 @property
1827 def byte_order(self):
1828 return self._bo
1829
1830
1831# Raises a parse error for the item `item`, creating it using the
1832# message `msg`.
1833def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1834 _raise_error(msg, item.text_loc)
1835
1836
1837# The `ICITTE` reserved name.
1838_icitte_name = "ICITTE"
1839
1840
2adf4336
PP
1841# Base node visitor.
1842#
1843# Calls the _visit_name() method for each name node which isn't the name
1844# of a call.
1845class _NodeVisitor(ast.NodeVisitor):
1846 def __init__(self):
71aaa3f7
PP
1847 self._parent_is_call = False
1848
1849 def generic_visit(self, node: ast.AST):
1850 if type(node) is ast.Call:
1851 self._parent_is_call = True
1852 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1853 self._visit_name(node.id)
71aaa3f7
PP
1854
1855 super().generic_visit(node)
1856 self._parent_is_call = False
1857
2adf4336
PP
1858 @abc.abstractmethod
1859 def _visit_name(self, name: str):
1860 ...
1861
71aaa3f7 1862
2adf4336
PP
1863# Expression validator: validates that all the names within the
1864# expression are allowed.
1865class _ExprValidator(_NodeVisitor):
320644e2 1866 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1867 super().__init__()
320644e2
PP
1868 self._expr_str = expr_str
1869 self._text_loc = text_loc
2adf4336 1870 self._allowed_names = allowed_names
2adf4336
PP
1871
1872 def _visit_name(self, name: str):
1873 # Make sure the name refers to a known and reachable
1874 # variable/label name.
e57a18e1 1875 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1876 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1877 name, self._expr_str
2adf4336
PP
1878 )
1879
05f81895 1880 allowed_names = self._allowed_names.copy()
e57a18e1 1881 allowed_names.add(_icitte_name)
2adf4336 1882
05f81895 1883 if len(allowed_names) > 0:
2adf4336
PP
1884 allowed_names_str = ", ".join(
1885 sorted(["`{}`".format(name) for name in allowed_names])
1886 )
1887 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1888
1889 _raise_error(
1890 msg,
320644e2 1891 self._text_loc,
2adf4336
PP
1892 )
1893
1894
2adf4336
PP
1895# Generator state.
1896class _GenState:
1897 def __init__(
1b8aa84a 1898 self,
e57a18e1
PP
1899 variables: VariablesT,
1900 labels: LabelsT,
1b8aa84a
PP
1901 offset: int,
1902 bo: Optional[ByteOrder],
2adf4336
PP
1903 ):
1904 self.variables = variables.copy()
1905 self.labels = labels.copy()
1906 self.offset = offset
1907 self.bo = bo
71aaa3f7 1908
320644e2
PP
1909 def __repr__(self):
1910 return "_GenState({}, {}, {}, {})".format(
1911 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1912 )
1913
1914
1915# Fixed-length number item instance.
1916class _FlNumItemInst:
f5dcb24c
PP
1917 def __init__(
1918 self,
1919 item: _FlNum,
1920 offset_in_data: int,
1921 state: _GenState,
1922 parse_error_msgs: List[ParseErrorMessage],
1923 ):
320644e2
PP
1924 self._item = item
1925 self._offset_in_data = offset_in_data
1926 self._state = state
f5dcb24c 1927 self._parse_error_msgs = parse_error_msgs
320644e2
PP
1928
1929 @property
1930 def item(self):
1931 return self._item
1932
1933 @property
1934 def offset_in_data(self):
1935 return self._offset_in_data
1936
1937 @property
1938 def state(self):
1939 return self._state
1940
f5dcb24c
PP
1941 @property
1942 def parse_error_msgs(self):
1943 return self._parse_error_msgs
1944
71aaa3f7 1945
2adf4336 1946# Generator of data and final state from a group item.
71aaa3f7
PP
1947#
1948# Generation happens in memory at construction time. After building, use
1949# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1950# get the resulting context.
2adf4336
PP
1951#
1952# The steps of generation are:
1953#
320644e2
PP
1954# 1. Handle each item in prefix order.
1955#
1956# The handlers append bytes to `self._data` and update some current
1957# state object (`_GenState` instance).
1958#
1959# When handling a fixed-length number item, try to evaluate its
1960# expression using the current state. If this fails, then it might be
1961# because the expression refers to a "future" label: save the current
1962# offset in `self._data` (generated data) and a snapshot of the
1963# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
f5dcb24c
PP
1964# object). _gen_fl_num_item_insts() will deal with this later. A
1965# `_FlNumItemInst` instance also contains a snapshot of the current
1966# parsing error messages (`self._parse_error_msgs`) which need to be
1967# taken into account when handling the instance later.
2adf4336 1968#
320644e2
PP
1969# When handling the items of a group, keep a map of immediate label
1970# names to their offset. Then, after having processed all the items,
1971# update the relevant saved state snapshots in
1972# `self._fl_num_item_insts` with those immediate label values.
1973# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1974#
320644e2
PP
1975# 2. Handle all the fixed-length number item instances of which the
1976# expression evaluation failed before.
2adf4336 1977#
320644e2
PP
1978# At this point, `self._fl_num_item_insts` contains everything that's
1979# needed to evaluate the expressions, including the values of
1980# "future" labels from the point of view of some fixed-length number
1981# item instance.
2adf4336 1982#
f5dcb24c
PP
1983# If an evaluation fails at this point, then it's a user error. Add
1984# to the parsing error all the saved parsing error messages of the
1985# instance. Those additional messages add precious context to the
1986# error.
71aaa3f7
PP
1987class _Gen:
1988 def __init__(
1989 self,
1990 group: _Group,
320644e2 1991 macro_defs: _MacroDefsT,
e57a18e1
PP
1992 variables: VariablesT,
1993 labels: LabelsT,
71aaa3f7
PP
1994 offset: int,
1995 bo: Optional[ByteOrder],
1996 ):
320644e2
PP
1997 self._macro_defs = macro_defs
1998 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
f5dcb24c 1999 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2adf4336 2000 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
2001
2002 # Generated bytes.
2003 @property
2004 def data(self):
2005 return self._data
2006
2007 # Updated variables.
2008 @property
2009 def variables(self):
2adf4336 2010 return self._final_state.variables
71aaa3f7
PP
2011
2012 # Updated main group labels.
2013 @property
2014 def labels(self):
2adf4336 2015 return self._final_state.labels
71aaa3f7
PP
2016
2017 # Updated offset.
2018 @property
2019 def offset(self):
2adf4336 2020 return self._final_state.offset
71aaa3f7
PP
2021
2022 # Updated byte order.
2023 @property
2024 def bo(self):
2adf4336
PP
2025 return self._final_state.bo
2026
320644e2
PP
2027 # Evaluates the expression `expr` of which the original string is
2028 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
2029 # generation state `state`.
2030 #
269f6eb3
PP
2031 # If `allow_float` is `True`, then the type of the result may be
2032 # `float` too.
2adf4336 2033 @staticmethod
320644e2
PP
2034 def _eval_expr(
2035 expr_str: str,
2036 expr: ast.Expression,
2037 text_loc: TextLocation,
269f6eb3 2038 state: _GenState,
269f6eb3
PP
2039 allow_float: bool = False,
2040 ):
e57a18e1
PP
2041 syms = {} # type: VariablesT
2042 syms.update(state.labels)
71aaa3f7 2043
e57a18e1
PP
2044 # Set the `ICITTE` name to the current offset
2045 syms[_icitte_name] = state.offset
71aaa3f7
PP
2046
2047 # Add the current variables
2adf4336 2048 syms.update(state.variables)
71aaa3f7
PP
2049
2050 # Validate the node and its children
320644e2 2051 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
2052
2053 # Compile and evaluate expression node
2054 try:
320644e2 2055 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 2056 except Exception as exc:
320644e2
PP
2057 _raise_error(
2058 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2059 text_loc,
71aaa3f7
PP
2060 )
2061
27d52a19
PP
2062 # Convert `bool` result type to `int` to normalize
2063 if type(val) is bool:
2064 val = int(val)
2065
269f6eb3
PP
2066 # Validate result type
2067 expected_types = {int} # type: Set[type]
2068 type_msg = "`int`"
2069
2070 if allow_float:
2071 expected_types.add(float)
2072 type_msg += " or `float`"
2073
2074 if type(val) not in expected_types:
320644e2 2075 _raise_error(
269f6eb3 2076 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 2077 expr_str, type_msg, type(val).__name__
71aaa3f7 2078 ),
320644e2 2079 text_loc,
71aaa3f7
PP
2080 )
2081
2082 return val
2083
320644e2
PP
2084 # Evaluates the expression of `item` considering the current
2085 # generation state `state`.
2086 #
2087 # If `allow_float` is `True`, then the type of the result may be
2088 # `float` too.
2089 @staticmethod
2090 def _eval_item_expr(
25ca454b 2091 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
320644e2
PP
2092 state: _GenState,
2093 allow_float: bool = False,
2094 ):
2095 return _Gen._eval_expr(
2096 item.expr_str, item.expr, item.text_loc, state, allow_float
2097 )
2098
2099 # Handles the byte item `item`.
2100 def _handle_byte_item(self, item: _Byte, state: _GenState):
2101 self._data.append(item.val)
2102 state.offset += item.size
2103
2104 # Handles the string item `item`.
2105 def _handle_str_item(self, item: _Str, state: _GenState):
2106 self._data += item.data
2107 state.offset += item.size
2108
2109 # Handles the byte order setting item `item`.
2110 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2111 # Update current byte order
2112 state.bo = item.bo
2113
2114 # Handles the variable assignment item `item`.
2115 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2116 # Update variable
2117 state.variables[item.name] = self._eval_item_expr(item, state, True)
2118
2119 # Handles the fixed-length number item `item`.
2120 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2121 # Validate current byte order
2122 if state.bo is None and item.len > 8:
2123 _raise_error_for_item(
2124 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2125 item.expr_str
2126 ),
2127 item,
2128 )
2129
2130 # Try an immediate evaluation. If it fails, then keep everything
2131 # needed to (try to) generate the bytes of this item later.
2132 try:
2133 data = self._gen_fl_num_item_inst_data(item, state)
2134 except Exception:
2135 self._fl_num_item_insts.append(
f5dcb24c
PP
2136 _FlNumItemInst(
2137 item,
2138 len(self._data),
2139 copy.deepcopy(state),
2140 copy.deepcopy(self._parse_error_msgs),
2141 )
320644e2
PP
2142 )
2143
2144 # Reserve space in `self._data` for this instance
2145 data = bytes([0] * (item.len // 8))
2146
2147 # Append bytes
2148 self._data += data
2149
2150 # Update offset
2151 state.offset += len(data)
2152
05f81895
PP
2153 # Returns the size, in bytes, required to encode the value `val`
2154 # with LEB128 (signed version if `is_signed` is `True`).
2155 @staticmethod
2156 def _leb128_size_for_val(val: int, is_signed: bool):
2157 if val < 0:
2158 # Equivalent upper bound.
2159 #
2160 # For example, if `val` is -128, then the full integer for
2161 # this number of bits would be [-128, 127].
2162 val = -val - 1
2163
2164 # Number of bits (add one for the sign if needed)
2165 bits = val.bit_length() + int(is_signed)
2166
2167 if bits == 0:
2168 bits = 1
2169
2170 # Seven bits per byte
2171 return math.ceil(bits / 7)
2172
320644e2
PP
2173 # Handles the LEB128 integer item `item`.
2174 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2175 # Compute value
2176 val = self._eval_item_expr(item, state, False)
676f6189 2177
320644e2
PP
2178 # Size in bytes
2179 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2180
320644e2
PP
2181 # For each byte
2182 for _ in range(size):
2183 # Seven LSBs, MSB of the byte set (continue)
2184 self._data.append((val & 0x7F) | 0x80)
2185 val >>= 7
2adf4336 2186
320644e2
PP
2187 # Clear MSB of last byte (stop)
2188 self._data[-1] &= ~0x80
2adf4336 2189
320644e2
PP
2190 # Update offset
2191 state.offset += size
27d52a19 2192
320644e2
PP
2193 # Handles the group item `item`, removing the immediate labels from
2194 # `state` at the end if `remove_immediate_labels` is `True`.
2195 def _handle_group_item(
2196 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2197 ):
2198 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2199 immediate_labels = {} # type: LabelsT
27d52a19 2200
320644e2
PP
2201 # Handle each item
2202 for subitem in item.items:
2203 if type(subitem) is _Label:
2204 # Add to local immediate labels
2205 immediate_labels[subitem.name] = state.offset
2adf4336 2206
320644e2 2207 self._handle_item(subitem, state)
2adf4336 2208
320644e2
PP
2209 # Remove immediate labels from current state if needed
2210 if remove_immediate_labels:
2211 for name in immediate_labels:
2212 del state.labels[name]
2adf4336 2213
320644e2
PP
2214 # Add all immediate labels to all state snapshots since
2215 # `first_fl_num_item_inst_index`.
2216 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2217 inst.state.labels.update(immediate_labels)
2adf4336 2218
320644e2
PP
2219 # Handles the repetition item `item`.
2220 def _handle_rep_item(self, item: _Rep, state: _GenState):
2221 # Compute the repetition count
2222 mul = _Gen._eval_item_expr(item, state)
05f81895 2223
320644e2
PP
2224 # Validate result
2225 if mul < 0:
2226 _raise_error_for_item(
2227 "Invalid expression `{}`: unexpected negative result {:,}".format(
2228 item.expr_str, mul
2229 ),
2230 item,
2231 )
2adf4336 2232
320644e2
PP
2233 # Generate item data `mul` times
2234 for _ in range(mul):
2235 self._handle_item(item.item, state)
2adf4336 2236
320644e2 2237 # Handles the conditional item `item`.
12b5dbc0 2238 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2239 # Compute the conditional value
2240 val = _Gen._eval_item_expr(item, state)
2adf4336 2241
320644e2
PP
2242 # Generate item data if needed
2243 if val:
12b5dbc0
PP
2244 self._handle_item(item.true_item, state)
2245 else:
2246 self._handle_item(item.false_item, state)
2adf4336 2247
320644e2
PP
2248 # Evaluates the parameters of the macro expansion item `item`
2249 # considering the initial state `init_state` and returns a new state
2250 # to handle the items of the macro.
2251 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2252 # New state
2253 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2254
320644e2
PP
2255 # Evaluate the parameter expressions
2256 macro_def = self._macro_defs[item.name]
2adf4336 2257
320644e2
PP
2258 for param_name, param in zip(macro_def.param_names, item.params):
2259 exp_state.variables[param_name] = _Gen._eval_expr(
2260 param.expr_str, param.expr, param.text_loc, init_state, True
2261 )
2adf4336 2262
320644e2 2263 return exp_state
2adf4336 2264
320644e2
PP
2265 # Handles the macro expansion item `item`.
2266 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
f5dcb24c 2267 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
27d52a19 2268
f5dcb24c
PP
2269 try:
2270 # New state
2271 exp_state = self._eval_macro_exp_params(item, state)
2272
2273 # Process the contained group
2274 init_data_size = len(self._data)
2275 parse_error_msg = (
2276 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2277 parse_error_msg_text, item.text_loc
2278 )
2279 )
2280 self._parse_error_msgs.append(parse_error_msg)
2281 self._handle_item(self._macro_defs[item.name].group, exp_state)
2282 self._parse_error_msgs.pop()
2283 except ParseError as exc:
2284 _augment_error(exc, parse_error_msg_text, item.text_loc)
27d52a19 2285
320644e2
PP
2286 # Update state offset and return
2287 state.offset += len(self._data) - init_data_size
676f6189 2288
320644e2
PP
2289 # Handles the offset setting item `item`.
2290 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2291 state.offset = item.val
2adf4336 2292
25ca454b 2293 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2294 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2295 init_offset = state.offset
2296 align_bytes = item.val // 8
2297 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2298 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2299
25ca454b
PP
2300 # Handles the filling item `item` (adds padding).
2301 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2302 # Compute the new offset
2303 new_offset = _Gen._eval_item_expr(item, state)
2304
2305 # Validate the new offset
2306 if new_offset < state.offset:
2307 _raise_error_for_item(
2308 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2309 item.expr_str, new_offset, state.offset
2310 ),
2311 item,
2312 )
2313
2314 # Fill
2315 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2316
2317 # Update offset
2318 state.offset = new_offset
2319
320644e2
PP
2320 # Handles the label item `item`.
2321 def _handle_label_item(self, item: _Label, state: _GenState):
2322 state.labels[item.name] = state.offset
2adf4336 2323
320644e2
PP
2324 # Handles the item `item`, returning the updated next repetition
2325 # instance.
2326 def _handle_item(self, item: _Item, state: _GenState):
2327 return self._item_handlers[type(item)](item, state)
71aaa3f7 2328
320644e2
PP
2329 # Generates the data for a fixed-length integer item instance having
2330 # the value `val` and returns it.
2331 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2332 # Validate range
2333 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2334 _raise_error_for_item(
320644e2
PP
2335 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2336 val, item.len, item.expr_str
71aaa3f7
PP
2337 ),
2338 item,
2339 )
2340
2341 # Encode result on 64 bits (to extend the sign bit whatever the
2342 # value of `item.len`).
71aaa3f7
PP
2343 data = struct.pack(
2344 "{}{}".format(
2adf4336 2345 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2346 "Q" if val >= 0 else "q",
2347 ),
2348 val,
2349 )
2350
2351 # Keep only the requested length
2352 len_bytes = item.len // 8
2353
2adf4336 2354 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2355 # Big endian: keep last bytes
2356 data = data[-len_bytes:]
2357 else:
2358 # Little endian: keep first bytes
2adf4336 2359 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2360 data = data[:len_bytes]
2361
320644e2
PP
2362 # Return data
2363 return data
269f6eb3 2364
320644e2
PP
2365 # Generates the data for a fixed-length floating point number item
2366 # instance having the value `val` and returns it.
2367 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2368 # Validate length
2369 if item.len not in (32, 64):
2370 _raise_error_for_item(
2371 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2372 item.len, val
2373 ),
2374 item,
2375 )
2376
320644e2
PP
2377 # Encode and return result
2378 return struct.pack(
269f6eb3
PP
2379 "{}{}".format(
2380 ">" if state.bo in (None, ByteOrder.BE) else "<",
2381 "f" if item.len == 32 else "d",
2382 ),
2383 val,
2384 )
2385
320644e2
PP
2386 # Generates the data for a fixed-length number item instance and
2387 # returns it.
2388 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2389 # Compute value
e57a18e1 2390 val = self._eval_item_expr(item, state, True)
269f6eb3 2391
269f6eb3
PP
2392 # Handle depending on type
2393 if type(val) is int:
320644e2 2394 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2395 else:
2396 assert type(val) is float
320644e2 2397 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2398
320644e2
PP
2399 # Generates the data for all the fixed-length number item instances
2400 # and writes it at the correct offset within `self._data`.
2401 def _gen_fl_num_item_insts(self):
2402 for inst in self._fl_num_item_insts:
2403 # Generate bytes
f5dcb24c
PP
2404 try:
2405 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2406 except ParseError as exc:
2407 # Add all the saved parse error messages for this
2408 # instance.
2409 for msg in reversed(inst.parse_error_msgs):
2410 _add_error_msg(exc, msg.text, msg.text_location)
2411
2412 raise
05f81895 2413
320644e2
PP
2414 # Insert bytes into `self._data`
2415 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2416
2417 # Generates the data (`self._data`) and final state
2418 # (`self._final_state`) from `group` and the initial state `state`.
2419 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2420 # Initial state
2421 self._data = bytearray()
71aaa3f7
PP
2422
2423 # Item handlers
2424 self._item_handlers = {
676f6189 2425 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2426 _Byte: self._handle_byte_item,
27d52a19 2427 _Cond: self._handle_cond_item,
25ca454b 2428 _FillUntil: self._handle_fill_until_item,
269f6eb3 2429 _FlNum: self._handle_fl_num_item,
71aaa3f7 2430 _Group: self._handle_group_item,
2adf4336 2431 _Label: self._handle_label_item,
320644e2 2432 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2433 _Rep: self._handle_rep_item,
2adf4336
PP
2434 _SetBo: self._handle_set_bo_item,
2435 _SetOffset: self._handle_set_offset_item,
05f81895 2436 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2437 _Str: self._handle_str_item,
05f81895 2438 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2439 _VarAssign: self._handle_var_assign_item,
320644e2 2440 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2441
2442 # Handle the group item, _not_ removing the immediate labels
2443 # because the `labels` property offers them.
320644e2 2444 self._handle_group_item(group, state, False)
2adf4336
PP
2445
2446 # This is actually the final state
2447 self._final_state = state
71aaa3f7 2448
320644e2
PP
2449 # Generate all the fixed-length number bytes now that we know
2450 # their full state
2451 self._gen_fl_num_item_insts()
2452
71aaa3f7
PP
2453
2454# Returns a `ParseResult` instance containing the bytes encoded by the
2455# input string `normand`.
2456#
2457# `init_variables` is a dictionary of initial variable names (valid
2458# Python names) to integral values. A variable name must not be the
2459# reserved name `ICITTE`.
2460#
2461# `init_labels` is a dictionary of initial label names (valid Python
2462# names) to integral values. A label name must not be the reserved name
2463# `ICITTE`.
2464#
2465# `init_offset` is the initial offset.
2466#
2467# `init_byte_order` is the initial byte order.
2468#
2469# Raises `ParseError` on any parsing error.
2470def parse(
2471 normand: str,
e57a18e1
PP
2472 init_variables: Optional[VariablesT] = None,
2473 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2474 init_offset: int = 0,
2475 init_byte_order: Optional[ByteOrder] = None,
2476):
2477 if init_variables is None:
2478 init_variables = {}
2479
2480 if init_labels is None:
2481 init_labels = {}
2482
320644e2 2483 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2484 gen = _Gen(
320644e2
PP
2485 parser.res,
2486 parser.macro_defs,
71aaa3f7
PP
2487 init_variables,
2488 init_labels,
2489 init_offset,
2490 init_byte_order,
2491 )
2492 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2493 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2494 )
2495
2496
f5dcb24c
PP
2497# Raises a command-line error with the message `msg`.
2498def _raise_cli_error(msg: str) -> NoReturn:
2499 raise RuntimeError("Command-line error: {}".format(msg))
2500
2501
2502# Returns a dictionary of string to integers from the list of strings
2503# `args` containing `NAME=VAL` entries.
2504def _dict_from_arg(args: Optional[List[str]]):
2505 d = {} # type: LabelsT
2506
2507 if args is None:
2508 return d
2509
2510 for arg in args:
2511 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2512
2513 if m is None:
2514 _raise_cli_error("Invalid assignment {}".format(arg))
2515
2516 d[m.group(1)] = int(m.group(2))
2517
2518 return d
2519
2520
2521# Parses the command-line arguments and returns, in this order:
2522#
2523# 1. The input file path, or `None` if none.
2524# 2. The Normand input text.
2525# 3. The initial offset.
2526# 4. The initial byte order.
2527# 5. The initial variables.
2528# 6. The initial labels.
71aaa3f7
PP
2529def _parse_cli_args():
2530 import argparse
2531
2532 # Build parser
2533 ap = argparse.ArgumentParser()
2534 ap.add_argument(
2535 "--offset",
2536 metavar="OFFSET",
2537 action="store",
2538 type=int,
2539 default=0,
2540 help="initial offset (positive)",
2541 )
2542 ap.add_argument(
2543 "-b",
2544 "--byte-order",
2545 metavar="BO",
2546 choices=["be", "le"],
2547 type=str,
2548 help="initial byte order (`be` or `le`)",
2549 )
2550 ap.add_argument(
2551 "--var",
2552 metavar="NAME=VAL",
2553 action="append",
2554 help="add an initial variable (may be repeated)",
2555 )
2556 ap.add_argument(
2557 "-l",
2558 "--label",
2559 metavar="NAME=VAL",
2560 action="append",
2561 help="add an initial label (may be repeated)",
2562 )
2563 ap.add_argument(
2564 "--version", action="version", version="Normand {}".format(__version__)
2565 )
2566 ap.add_argument(
2567 "path",
2568 metavar="PATH",
2569 action="store",
2570 nargs="?",
2571 help="input path (none means standard input)",
2572 )
2573
2574 # Parse
f5dcb24c 2575 args = ap.parse_args()
71aaa3f7
PP
2576
2577 # Read input
2578 if args.path is None:
2579 normand = sys.stdin.read()
2580 else:
2581 with open(args.path) as f:
2582 normand = f.read()
2583
2584 # Variables and labels
e57a18e1 2585 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2586 labels = _dict_from_arg(args.label)
2587
2588 # Validate offset
2589 if args.offset < 0:
2590 _raise_cli_error("Invalid negative offset {}")
2591
2592 # Validate and set byte order
2593 bo = None # type: Optional[ByteOrder]
2594
2595 if args.byte_order is not None:
2596 if args.byte_order == "be":
2597 bo = ByteOrder.BE
2598 else:
2599 assert args.byte_order == "le"
2600 bo = ByteOrder.LE
2601
f5dcb24c
PP
2602 # Return input and initial state
2603 return args.path, normand, args.offset, bo, variables, labels
71aaa3f7 2604
71aaa3f7 2605
f5dcb24c
PP
2606# CLI entry point without exception handling.
2607def _run_cli_with_args(
2608 normand: str,
2609 offset: int,
2610 bo: Optional[ByteOrder],
2611 variables: VariablesT,
2612 labels: LabelsT,
2613):
2614 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
71aaa3f7
PP
2615
2616
2617# Prints the exception message `msg` and exits with status 1.
2618def _fail(msg: str) -> NoReturn:
2619 if not msg.endswith("."):
2620 msg += "."
2621
f5dcb24c 2622 print(msg.strip(), file=sys.stderr)
71aaa3f7
PP
2623 sys.exit(1)
2624
2625
2626# CLI entry point.
2627def _run_cli():
2628 try:
f5dcb24c
PP
2629 args = _parse_cli_args()
2630 except Exception as exc:
2631 _fail(str(exc))
2632
2633 try:
2634 _run_cli_with_args(*args[1:])
2635 except ParseError as exc:
2636 import os.path
2637
2638 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2639 fail_msg = ""
2640
2641 for msg in reversed(exc.messages):
2642 fail_msg += "{}{}:{} - {}".format(
2643 prefix,
2644 msg.text_location.line_no,
2645 msg.text_location.col_no,
2646 msg.text,
2647 )
2648
2649 if fail_msg[-1] not in ".:;":
2650 fail_msg += "."
2651
2652 fail_msg += "\n"
2653
2654 _fail(fail_msg.strip())
71aaa3f7
PP
2655 except Exception as exc:
2656 _fail(str(exc))
2657
2658
2659if __name__ == "__main__":
2660 _run_cli()
This page took 0.137082 seconds and 4 git commands to generate.