CLI: --var/--label: accept the usual int./floating point number form
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
b2410769 33__version__ = "0.18.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
f5dcb24c 41 "ParseErrorMessage",
71aaa3f7 42 "ParseResult",
e57a18e1 43 "TextLocation",
e57a18e1 44 "VariablesT",
71aaa3f7
PP
45]
46
47import re
48import abc
49import ast
50import sys
320644e2 51import copy
71aaa3f7 52import enum
05f81895 53import math
71aaa3f7 54import struct
e57a18e1
PP
55import typing
56from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
57
58
59# Text location (line and column numbers).
e57a18e1 60class TextLocation:
71aaa3f7
PP
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
2adf4336 84 def __repr__(self):
e57a18e1 85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 86
71aaa3f7
PP
87
88# Any item.
89class _Item:
e57a18e1 90 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
2adf4336
PP
98
99# Scalar item.
100class _ScalarItem(_Item):
71aaa3f7
PP
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108# A repeatable item.
2adf4336 109class _RepableItem:
71aaa3f7
PP
110 pass
111
112
113# Single byte.
2adf4336 114class _Byte(_ScalarItem, _RepableItem):
e57a18e1 115 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
676f6189 129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
130
131
132# String.
2adf4336 133class _Str(_ScalarItem, _RepableItem):
e57a18e1 134 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
676f6189 148 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
149
150
151# Byte order.
152@enum.unique
153class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
2adf4336
PP
161# Byte order setting.
162class _SetBo(_Item):
e57a18e1 163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 164 super().__init__(text_loc)
71aaa3f7
PP
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
2adf4336 171 def __repr__(self):
676f6189 172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
173
174
175# Label.
176class _Label(_Item):
e57a18e1 177 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
71aaa3f7 186 def __repr__(self):
676f6189 187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
188
189
2adf4336
PP
190# Offset setting.
191class _SetOffset(_Item):
e57a18e1 192 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
193 super().__init__(text_loc)
194 self._val = val
195
676f6189 196 # Offset value (bytes).
71aaa3f7
PP
197 @property
198 def val(self):
199 return self._val
200
71aaa3f7 201 def __repr__(self):
676f6189
PP
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205# Offset alignment.
206class _AlignOffset(_Item):
e57a18e1 207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
71aaa3f7
PP
226
227
228# Mixin of containing an AST expression and its string.
229class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
25ca454b
PP
245# Fill until some offset.
246class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
2adf4336
PP
268# Variable assignment.
269class _VarAssign(_Item, _ExprMixin):
71aaa3f7 270 def __init__(
e57a18e1 271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
71aaa3f7 282 def __repr__(self):
2adf4336 283 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
71aaa3f7
PP
288 )
289
290
269f6eb3
PP
291# Fixed-length number, possibly needing more than one byte.
292class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 293 def __init__(
e57a18e1 294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
269f6eb3 310 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
71aaa3f7
PP
315 )
316
317
05f81895
PP
318# LEB128 integer.
319class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
676f6189 329 repr(self._text_loc),
05f81895
PP
330 )
331
332
333# Unsigned LEB128 integer.
334class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338# Signed LEB128 integer.
339class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
71aaa3f7 343# Group of items.
2adf4336 344class _Group(_Item, _RepableItem):
e57a18e1 345 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
346 super().__init__(text_loc)
347 self._items = items
71aaa3f7
PP
348
349 # Contained items.
350 @property
351 def items(self):
352 return self._items
353
71aaa3f7 354 def __repr__(self):
676f6189 355 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
356
357
358# Repetition item.
2adf4336
PP
359class _Rep(_Item, _ExprMixin):
360 def __init__(
e57a18e1 361 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 362 ):
71aaa3f7 363 super().__init__(text_loc)
2adf4336 364 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 365 self._item = item
71aaa3f7
PP
366
367 # Item to repeat.
368 @property
369 def item(self):
370 return self._item
371
71aaa3f7 372 def __repr__(self):
2adf4336 373 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
374 repr(self._item),
375 repr(self._expr_str),
376 repr(self._expr),
377 repr(self._text_loc),
71aaa3f7
PP
378 )
379
380
27d52a19
PP
381# Conditional item.
382class _Cond(_Item, _ExprMixin):
383 def __init__(
12b5dbc0
PP
384 self,
385 true_item: _Item,
386 false_item: _Item,
387 expr_str: str,
388 expr: ast.Expression,
389 text_loc: TextLocation,
27d52a19
PP
390 ):
391 super().__init__(text_loc)
392 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
393 self._true_item = true_item
394 self._false_item = false_item
27d52a19 395
12b5dbc0 396 # Item when condition is true.
27d52a19 397 @property
12b5dbc0
PP
398 def true_item(self):
399 return self._true_item
400
401 # Item when condition is false.
402 @property
403 def false_item(self):
404 return self._false_item
27d52a19
PP
405
406 def __repr__(self):
12b5dbc0
PP
407 return "_Cond({}, {}, {}, {}, {})".format(
408 repr(self._true_item),
409 repr(self._false_item),
27d52a19
PP
410 repr(self._expr_str),
411 repr(self._expr),
412 repr(self._text_loc),
413 )
414
415
320644e2
PP
416# Macro definition item.
417class _MacroDef(_Item):
418 def __init__(
419 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
420 ):
421 super().__init__(text_loc)
422 self._name = name
423 self._param_names = param_names
424 self._group = group
425
426 # Name.
427 @property
428 def name(self):
429 return self._name
430
431 # Parameters.
432 @property
433 def param_names(self):
434 return self._param_names
435
436 # Contained items.
437 @property
438 def group(self):
439 return self._group
440
441 def __repr__(self):
442 return "_MacroDef({}, {}, {}, {})".format(
443 repr(self._name),
444 repr(self._param_names),
445 repr(self._group),
446 repr(self._text_loc),
447 )
448
449
450# Macro expansion parameter.
451class _MacroExpParam:
452 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
453 self._expr_str = expr_str
454 self._expr = expr
455 self._text_loc = text_loc
456
457 # Expression string.
458 @property
459 def expr_str(self):
460 return self._expr_str
461
462 # Expression.
463 @property
464 def expr(self):
465 return self._expr
466
467 # Source text location.
468 @property
469 def text_loc(self):
470 return self._text_loc
471
472 def __repr__(self):
473 return "_MacroExpParam({}, {}, {})".format(
474 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
475 )
476
477
478# Macro expansion item.
479class _MacroExp(_Item, _RepableItem):
480 def __init__(
481 self,
482 name: str,
483 params: List[_MacroExpParam],
484 text_loc: TextLocation,
485 ):
486 super().__init__(text_loc)
487 self._name = name
488 self._params = params
489
490 # Name.
491 @property
492 def name(self):
493 return self._name
494
495 # Parameters.
496 @property
497 def params(self):
498 return self._params
499
500 def __repr__(self):
501 return "_MacroExp({}, {}, {})".format(
502 repr(self._name),
503 repr(self._params),
504 repr(self._text_loc),
505 )
2adf4336
PP
506
507
f5dcb24c
PP
508# A parsing error message: a string and a text location.
509class ParseErrorMessage:
510 @classmethod
511 def _create(cls, text: str, text_loc: TextLocation):
512 self = cls.__new__(cls)
513 self._init(text, text_loc)
514 return self
515
516 def __init__(self, *args, **kwargs): # type: ignore
517 raise NotImplementedError
518
519 def _init(self, text: str, text_loc: TextLocation):
520 self._text = text
521 self._text_loc = text_loc
522
523 # Message text.
524 @property
525 def text(self):
526 return self._text
527
528 # Source text location.
529 @property
530 def text_location(self):
531 return self._text_loc
532
533
534# A parsing error containing one or more messages (`ParseErrorMessage`).
71aaa3f7
PP
535class ParseError(RuntimeError):
536 @classmethod
e57a18e1 537 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
538 self = cls.__new__(cls)
539 self._init(msg, text_loc)
540 return self
541
542 def __init__(self, *args, **kwargs): # type: ignore
543 raise NotImplementedError
544
e57a18e1 545 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7 546 super().__init__(msg)
f5dcb24c
PP
547 self._msgs = [] # type: List[ParseErrorMessage]
548 self._add_msg(msg, text_loc)
71aaa3f7 549
f5dcb24c
PP
550 def _add_msg(self, msg: str, text_loc: TextLocation):
551 self._msgs.append(
552 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
553 msg, text_loc
554 )
555 )
556
557 # Parsing error messages.
558 #
559 # The first message is the most specific one.
71aaa3f7 560 @property
f5dcb24c
PP
561 def messages(self):
562 return self._msgs
71aaa3f7
PP
563
564
565# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 566def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
567 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
568
569
f5dcb24c
PP
570# Adds a message to the parsing error `exc`.
571def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
572 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
573
574
575# Appends a message to the parsing error `exc` and reraises it.
576def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
577 _add_error_msg(exc, msg, text_loc)
578 raise exc
579
580
b2410769
PP
581# Returns a normalized version (so as to be parseable by int()) of
582# the constant integer string `s`, possibly negative, dealing with
583# any radix suffix.
584def _norm_const_int(s: str):
585 neg = ""
586 pos = s
587
588 if s.startswith("-"):
589 neg = "-"
590 pos = s[1:]
591
592 for r in "xXoObB":
593 if pos.startswith("0" + r):
594 # Already correct
595 return s
596
597 # Try suffix
598 asm_suf_base = {
599 "h": "x",
600 "H": "x",
601 "q": "o",
602 "Q": "o",
603 "o": "o",
604 "O": "o",
605 "b": "b",
606 "B": "B",
607 }
608
609 for suf in asm_suf_base:
610 if pos[-1] == suf:
611 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
612
613 return s
614
615
e57a18e1
PP
616# Variables dictionary type (for type hints).
617VariablesT = Dict[str, Union[int, float]]
618
619
620# Labels dictionary type (for type hints).
621LabelsT = Dict[str, int]
71aaa3f7
PP
622
623
b2410769 624# Common patterns.
71aaa3f7 625_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
b2410769
PP
626_pos_const_int_pat = re.compile(
627 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
628)
629_const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
630_const_float_pat = re.compile(
631 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
632)
71aaa3f7
PP
633
634
320644e2
PP
635# Macro definition dictionary.
636_MacroDefsT = Dict[str, _MacroDef]
637
638
71aaa3f7
PP
639# Normand parser.
640#
641# The constructor accepts a Normand input. After building, use the `res`
642# property to get the resulting main group.
643class _Parser:
644 # Builds a parser to parse the Normand input `normand`, parsing
645 # immediately.
e57a18e1 646 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
647 self._normand = normand
648 self._at = 0
649 self._line_no = 1
650 self._col_no = 1
651 self._label_names = set(labels.keys())
652 self._var_names = set(variables.keys())
320644e2 653 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
654 self._parse()
655
656 # Result (main group).
657 @property
658 def res(self):
659 return self._res
660
320644e2
PP
661 # Macro definitions.
662 @property
663 def macro_defs(self):
664 return self._macro_defs
665
71aaa3f7
PP
666 # Current text location.
667 @property
668 def _text_loc(self):
e57a18e1 669 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
670 self._line_no, self._col_no
671 )
672
673 # Returns `True` if this parser is done parsing.
674 def _is_done(self):
675 return self._at == len(self._normand)
676
677 # Returns `True` if this parser isn't done parsing.
678 def _isnt_done(self):
679 return not self._is_done()
680
681 # Raises a parse error, creating it using the message `msg` and the
682 # current text location.
683 def _raise_error(self, msg: str) -> NoReturn:
684 _raise_error(msg, self._text_loc)
685
686 # Tries to make the pattern `pat` match the current substring,
687 # returning the match object and updating `self._at`,
688 # `self._line_no`, and `self._col_no` on success.
689 def _try_parse_pat(self, pat: Pattern[str]):
690 m = pat.match(self._normand, self._at)
691
692 if m is None:
693 return
694
695 # Skip matched string
696 self._at += len(m.group(0))
697
698 # Update line number
699 self._line_no += m.group(0).count("\n")
700
701 # Update column number
702 for i in reversed(range(self._at)):
703 if self._normand[i] == "\n" or i == 0:
704 if i == 0:
705 self._col_no = self._at + 1
706 else:
707 self._col_no = self._at - i
708
709 break
710
711 # Return match object
712 return m
713
714 # Expects the pattern `pat` to match the current substring,
715 # returning the match object and updating `self._at`,
716 # `self._line_no`, and `self._col_no` on success, or raising a parse
717 # error with the message `error_msg` on error.
718 def _expect_pat(self, pat: Pattern[str], error_msg: str):
719 # Match
720 m = self._try_parse_pat(pat)
721
722 if m is None:
723 # No match: error
724 self._raise_error(error_msg)
725
726 # Return match object
727 return m
728
729 # Pattern for _skip_ws_and_comments()
730 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 731 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
732 )
733
734 # Skips as many whitespaces, insignificant symbol characters, and
735 # comments as possible.
736 def _skip_ws_and_comments(self):
737 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
738
320644e2
PP
739 # Pattern for _skip_ws()
740 _ws_pat = re.compile(r"\s*")
741
742 # Skips as many whitespaces as possible.
743 def _skip_ws(self):
744 self._try_parse_pat(self._ws_pat)
745
71aaa3f7
PP
746 # Pattern for _try_parse_hex_byte()
747 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
748
749 # Tries to parse a hexadecimal byte, returning a byte item on
750 # success.
751 def _try_parse_hex_byte(self):
0e8e3169
PP
752 begin_text_loc = self._text_loc
753
71aaa3f7
PP
754 # Match initial nibble
755 m_high = self._try_parse_pat(self._nibble_pat)
756
757 if m_high is None:
758 # No match
759 return
760
761 # Expect another nibble
762 self._skip_ws_and_comments()
763 m_low = self._expect_pat(
764 self._nibble_pat, "Expecting another hexadecimal nibble"
765 )
766
767 # Return item
0e8e3169 768 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
769
770 # Patterns for _try_parse_bin_byte()
771 _bin_byte_bit_pat = re.compile(r"[01]")
6dd69a2a 772 _bin_byte_prefix_pat = re.compile(r"%+")
71aaa3f7
PP
773
774 # Tries to parse a binary byte, returning a byte item on success.
775 def _try_parse_bin_byte(self):
0e8e3169
PP
776 begin_text_loc = self._text_loc
777
71aaa3f7 778 # Match prefix
6dd69a2a
PP
779 m = self._try_parse_pat(self._bin_byte_prefix_pat)
780
781 if m is None:
71aaa3f7
PP
782 # No match
783 return
784
6dd69a2a
PP
785 # Expect as many bytes as there are `%` prefixes
786 items = [] # type: List[_Item]
71aaa3f7 787
6dd69a2a 788 for _ in range(len(m.group(0))):
71aaa3f7 789 self._skip_ws_and_comments()
6dd69a2a
PP
790 byte_text_loc = self._text_loc
791 bits = [] # type: List[str]
792
793 # Expect eight bits
794 for _ in range(8):
795 self._skip_ws_and_comments()
796 m = self._expect_pat(
797 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
798 )
799 bits.append(m.group(0))
800
801 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
71aaa3f7
PP
802
803 # Return item
6dd69a2a
PP
804 if len(items) == 1:
805 return items[0]
806
807 # As group
808 return _Group(items, begin_text_loc)
71aaa3f7
PP
809
810 # Patterns for _try_parse_dec_byte()
320644e2 811 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
812 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
813
814 # Tries to parse a decimal byte, returning a byte item on success.
815 def _try_parse_dec_byte(self):
0e8e3169
PP
816 begin_text_loc = self._text_loc
817
71aaa3f7
PP
818 # Match prefix
819 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
820 # No match
821 return
822
823 # Expect the value
320644e2 824 self._skip_ws()
71aaa3f7
PP
825 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
826
827 # Compute value
828 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
829
830 # Validate
831 if val < -128 or val > 255:
0e8e3169 832 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
833
834 # Two's complement
05f81895 835 val %= 256
71aaa3f7
PP
836
837 # Return item
0e8e3169 838 return _Byte(val, begin_text_loc)
71aaa3f7
PP
839
840 # Tries to parse a byte, returning a byte item on success.
841 def _try_parse_byte(self):
842 # Hexadecimal
843 item = self._try_parse_hex_byte()
844
845 if item is not None:
846 return item
847
848 # Binary
849 item = self._try_parse_bin_byte()
850
851 if item is not None:
852 return item
853
854 # Decimal
855 item = self._try_parse_dec_byte()
856
857 if item is not None:
858 return item
859
860 # Patterns for _try_parse_str()
861 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
862 _str_suffix_pat = re.compile(r'"')
863 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
864
865 # Strings corresponding to escape sequence characters
866 _str_escape_seq_strs = {
867 "0": "\0",
868 "a": "\a",
869 "b": "\b",
870 "e": "\x1b",
871 "f": "\f",
872 "n": "\n",
873 "r": "\r",
874 "t": "\t",
875 "v": "\v",
876 "\\": "\\",
877 '"': '"',
878 }
879
880 # Tries to parse a string, returning a string item on success.
881 def _try_parse_str(self):
0e8e3169
PP
882 begin_text_loc = self._text_loc
883
71aaa3f7
PP
884 # Match prefix
885 m = self._try_parse_pat(self._str_prefix_pat)
886
887 if m is None:
888 # No match
889 return
890
891 # Get encoding
892 encoding = "utf8"
893
894 if m.group("len") is not None:
895 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
896
897 # Actual string
898 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
899
900 # Expect end of string
901 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
902
903 # Replace escape sequences
904 val = m.group(0)
905
906 for ec in '0abefnrtv"\\':
907 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
908
909 # Encode
910 data = val.encode(encoding)
911
912 # Return item
0e8e3169 913 return _Str(data, begin_text_loc)
71aaa3f7 914
320644e2
PP
915 # Common right parenthesis pattern
916 _right_paren_pat = re.compile(r"\)")
917
71aaa3f7 918 # Patterns for _try_parse_group()
320644e2 919 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
920
921 # Tries to parse a group, returning a group item on success.
922 def _try_parse_group(self):
0e8e3169
PP
923 begin_text_loc = self._text_loc
924
71aaa3f7 925 # Match prefix
261c5ecf
PP
926 m_open = self._try_parse_pat(self._group_prefix_pat)
927
928 if m_open is None:
71aaa3f7
PP
929 # No match
930 return
931
932 # Parse items
933 items = self._parse_items()
934
935 # Expect end of group
936 self._skip_ws_and_comments()
261c5ecf
PP
937
938 if m_open.group(0) == "(":
320644e2 939 pat = self._right_paren_pat
261c5ecf
PP
940 exp = ")"
941 else:
942 pat = self._block_end_pat
943 exp = "!end"
944
945 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
946
947 # Return item
0e8e3169 948 return _Group(items, begin_text_loc)
71aaa3f7
PP
949
950 # Returns a stripped expression string and an AST expression node
951 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 952 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
953 # Create an expression node from the expression string
954 expr_str = expr_str.strip().replace("\n", " ")
955
956 try:
957 expr = ast.parse(expr_str, mode="eval")
958 except SyntaxError:
959 _raise_error(
960 "Invalid expression `{}`: invalid syntax".format(expr_str),
961 text_loc,
962 )
963
964 return expr_str, expr
965
269f6eb3 966 # Patterns for _try_parse_num_and_attr()
05f81895 967 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 968 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 969 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 970
05f81895
PP
971 # Tries to parse a value and attribute (fixed length in bits or
972 # `leb128`), returning a value item on success.
269f6eb3 973 def _try_parse_num_and_attr(self):
71aaa3f7
PP
974 begin_text_loc = self._text_loc
975
976 # Match
977 m_expr = self._try_parse_pat(self._val_expr_pat)
978
979 if m_expr is None:
980 # No match
981 return
982
71aaa3f7
PP
983 # Create an expression node from the expression string
984 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
985
05f81895 986 # Length?
269f6eb3 987 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
988
989 if m_attr is None:
990 # LEB128?
991 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
992
993 if m_attr is None:
994 # At this point it's invalid
995 self._raise_error(
996 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
997 )
998
999 # Return LEB128 integer item
1000 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
1001 return cls(expr_str, expr, begin_text_loc)
1002 else:
269f6eb3
PP
1003 # Return fixed-length number item
1004 return _FlNum(
05f81895
PP
1005 expr_str,
1006 expr,
1007 int(m_attr.group(0)),
1008 begin_text_loc,
1009 )
71aaa3f7 1010
320644e2
PP
1011 # Patterns for _try_parse_var_assign()
1012 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
1013 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 1014
2adf4336
PP
1015 # Tries to parse a variable assignment, returning a variable
1016 # assignment item on success.
1017 def _try_parse_var_assign(self):
71aaa3f7
PP
1018 begin_text_loc = self._text_loc
1019
1020 # Match
320644e2 1021 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
1022
1023 if m is None:
1024 # No match
1025 return
1026
1027 # Validate name
320644e2 1028 name = m.group(1)
71aaa3f7
PP
1029
1030 if name == _icitte_name:
0e8e3169
PP
1031 _raise_error(
1032 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1033 )
71aaa3f7
PP
1034
1035 if name in self._label_names:
0e8e3169 1036 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 1037
320644e2
PP
1038 # Expect an expression
1039 self._skip_ws()
1040 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
1041
1042 # Create an expression node from the expression string
320644e2
PP
1043 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1044
1045 # Add to known variable names
1046 self._var_names.add(name)
71aaa3f7
PP
1047
1048 # Return item
2adf4336 1049 return _VarAssign(
71aaa3f7
PP
1050 name,
1051 expr_str,
1052 expr,
0e8e3169 1053 begin_text_loc,
71aaa3f7
PP
1054 )
1055
2adf4336 1056 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
1057 _bo_pat = re.compile(r"[bl]e")
1058
2adf4336
PP
1059 # Tries to parse a byte order name, returning a byte order setting
1060 # item on success.
1061 def _try_parse_set_bo(self):
0e8e3169
PP
1062 begin_text_loc = self._text_loc
1063
71aaa3f7
PP
1064 # Match
1065 m = self._try_parse_pat(self._bo_pat)
1066
1067 if m is None:
1068 # No match
1069 return
1070
1071 # Return corresponding item
1072 if m.group(0) == "be":
2adf4336 1073 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
1074 else:
1075 assert m.group(0) == "le"
2adf4336 1076 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
1077
1078 # Patterns for _try_parse_val_or_bo()
320644e2
PP
1079 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1080 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 1081
2adf4336
PP
1082 # Tries to parse a value, a variable assignment, or a byte order
1083 # setting, returning an item on success.
1084 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 1085 # Match prefix
2adf4336 1086 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
1087 # No match
1088 return
1089
320644e2
PP
1090 self._skip_ws()
1091
2adf4336
PP
1092 # Variable assignment item?
1093 item = self._try_parse_var_assign()
71aaa3f7
PP
1094
1095 if item is None:
269f6eb3
PP
1096 # Number item?
1097 item = self._try_parse_num_and_attr()
71aaa3f7
PP
1098
1099 if item is None:
2adf4336
PP
1100 # Byte order setting item?
1101 item = self._try_parse_set_bo()
71aaa3f7
PP
1102
1103 if item is None:
1104 # At this point it's invalid
2adf4336 1105 self._raise_error(
269f6eb3 1106 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 1107 )
71aaa3f7
PP
1108
1109 # Expect suffix
320644e2 1110 self._skip_ws()
2adf4336 1111 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1112 return item
1113
2adf4336
PP
1114 # Tries to parse an offset setting value (after the initial `<`),
1115 # returning an offset item on success.
1116 def _try_parse_set_offset_val(self):
0e8e3169
PP
1117 begin_text_loc = self._text_loc
1118
71aaa3f7 1119 # Match
b2410769 1120 m = self._try_parse_pat(_pos_const_int_pat)
71aaa3f7
PP
1121
1122 if m is None:
1123 # No match
1124 return
1125
1126 # Return item
b2410769 1127 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1128
1129 # Tries to parse a label name (after the initial `<`), returning a
1130 # label item on success.
1131 def _try_parse_label_name(self):
0e8e3169
PP
1132 begin_text_loc = self._text_loc
1133
71aaa3f7
PP
1134 # Match
1135 m = self._try_parse_pat(_py_name_pat)
1136
1137 if m is None:
1138 # No match
1139 return
1140
1141 # Validate
1142 name = m.group(0)
1143
1144 if name == _icitte_name:
0e8e3169
PP
1145 _raise_error(
1146 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1147 )
71aaa3f7
PP
1148
1149 if name in self._label_names:
0e8e3169 1150 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1151
1152 if name in self._var_names:
0e8e3169 1153 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1154
1155 # Add to known label names
1156 self._label_names.add(name)
1157
1158 # Return item
0e8e3169 1159 return _Label(name, begin_text_loc)
71aaa3f7 1160
2adf4336 1161 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1162 _label_set_offset_prefix_pat = re.compile(r"<")
1163 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1164
2adf4336
PP
1165 # Tries to parse a label or an offset setting, returning an item on
1166 # success.
1167 def _try_parse_label_or_set_offset(self):
71aaa3f7 1168 # Match prefix
2adf4336 1169 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1170 # No match
1171 return
1172
2adf4336 1173 # Offset setting item?
320644e2 1174 self._skip_ws()
2adf4336 1175 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1176
1177 if item is None:
1178 # Label item?
1179 item = self._try_parse_label_name()
1180
1181 if item is None:
1182 # At this point it's invalid
2adf4336 1183 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1184
1185 # Expect suffix
320644e2 1186 self._skip_ws()
2adf4336 1187 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1188 return item
1189
25ca454b
PP
1190 # Pattern for _parse_pad_val()
1191 _pad_val_prefix_pat = re.compile(r"~")
1192
1193 # Tries to parse a padding value, returning the padding value, or 0
1194 # if none.
1195 def _parse_pad_val(self):
1196 # Padding value?
1197 self._skip_ws()
1198 pad_val = 0
1199
1200 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1201 self._skip_ws()
1202 pad_val_text_loc = self._text_loc
1203 m = self._expect_pat(
b2410769 1204 _pos_const_int_pat,
25ca454b
PP
1205 "Expecting a positive constant integer (byte value)",
1206 )
1207
1208 # Validate
b2410769 1209 pad_val = int(_norm_const_int(m.group(0)), 0)
25ca454b
PP
1210
1211 if pad_val > 255:
1212 _raise_error(
1213 "Invalid padding byte value {}".format(pad_val),
1214 pad_val_text_loc,
1215 )
1216
1217 return pad_val
1218
676f6189 1219 # Patterns for _try_parse_align_offset()
320644e2
PP
1220 _align_offset_prefix_pat = re.compile(r"@")
1221 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1222
1223 # Tries to parse an offset alignment, returning an offset alignment
1224 # item on success.
1225 def _try_parse_align_offset(self):
1226 begin_text_loc = self._text_loc
1227
1228 # Match prefix
1229 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1230 # No match
1231 return
1232
320644e2 1233 # Expect an alignment
25ca454b 1234 self._skip_ws()
676f6189
PP
1235 align_text_loc = self._text_loc
1236 m = self._expect_pat(
1237 self._align_offset_val_pat,
1238 "Expecting an alignment (positive multiple of eight bits)",
1239 )
1240
1241 # Validate alignment
320644e2 1242 val = int(m.group(0))
676f6189
PP
1243
1244 if val <= 0 or (val % 8) != 0:
1245 _raise_error(
1246 "Invalid alignment value {} (not a positive multiple of eight)".format(
1247 val
1248 ),
1249 align_text_loc,
1250 )
1251
25ca454b
PP
1252 # Padding value
1253 pad_val = self._parse_pad_val()
676f6189 1254
25ca454b
PP
1255 # Return item
1256 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1257
dbd84e74
PP
1258 # Patterns for _expect_expr()
1259 _inner_expr_prefix_pat = re.compile(r"\{")
1260 _inner_expr_pat = re.compile(r"[^}]+")
1261 _inner_expr_suffix_pat = re.compile(r"\}")
dbd84e74
PP
1262
1263 # Parses an expression outside a `{`/`}` context.
1264 #
1265 # This function accepts:
1266 #
1267 # • A Python expression within `{` and `}`.
1268 #
1269 # • A Python name.
1270 #
1271 # • If `accept_const_int` is `True`: a constant integer, which may
1272 # be negative if `allow_neg_int` is `True`.
1273 #
1274 # • If `accept_float` is `True`: a constant floating point number.
1275 #
1276 # Returns the stripped expression string and AST expression.
1277 def _expect_expr(
1278 self,
1279 accept_const_int: bool = False,
1280 allow_neg_int: bool = False,
1281 accept_const_float: bool = False,
1282 ):
1283 begin_text_loc = self._text_loc
1284
1285 # Constant floating point number?
dbd84e74 1286 if accept_const_float:
b2410769 1287 m = self._try_parse_pat(_const_float_pat)
dbd84e74
PP
1288
1289 if m is not None:
1290 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1291
1292 # Constant integer?
dbd84e74 1293 if accept_const_int:
b2410769 1294 m = self._try_parse_pat(_const_int_pat)
dbd84e74
PP
1295
1296 if m is not None:
1297 # Negative and allowed?
1298 if m.group("neg") == "-" and not allow_neg_int:
1299 _raise_error(
1300 "Expecting a positive constant integer", begin_text_loc
1301 )
1302
b2410769 1303 expr_str = _norm_const_int(m.group(0))
dbd84e74
PP
1304 return self._ast_expr_from_str(expr_str, begin_text_loc)
1305
1306 # Name?
1307 m = self._try_parse_pat(_py_name_pat)
1308
1309 if m is not None:
1310 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1311
1312 # Expect `{`
1313 msg_accepted_parts = ["a name", "or `{`"]
1314
1315 if accept_const_float:
1316 msg_accepted_parts.insert(0, "a constant floating point number")
1317
1318 if accept_const_int:
1319 msg_pos = "" if allow_neg_int else "positive "
1320 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1321
1322 if len(msg_accepted_parts) == 2:
1323 msg_accepted = " ".join(msg_accepted_parts)
1324 else:
1325 msg_accepted = ", ".join(msg_accepted_parts)
1326
1327 self._expect_pat(
1328 self._inner_expr_prefix_pat,
1329 "Expecting {}".format(msg_accepted),
1330 )
1331
1332 # Expect an expression
1333 self._skip_ws()
1334 expr_text_loc = self._text_loc
1335 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1336 expr_str = m.group(0)
1337
1338 # Expect `}`
1339 self._skip_ws()
1340 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1341
1342 return self._ast_expr_from_str(expr_str, expr_text_loc)
1343
25ca454b
PP
1344 # Patterns for _try_parse_fill_until()
1345 _fill_until_prefix_pat = re.compile(r"\+")
1346 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1347
25ca454b
PP
1348 # Tries to parse a filling, returning a filling item on success.
1349 def _try_parse_fill_until(self):
1350 begin_text_loc = self._text_loc
1351
1352 # Match prefix
1353 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1354 # No match
1355 return
1356
1357 # Expect expression
1358 self._skip_ws()
dbd84e74 1359 expr_str, expr = self._expect_expr(accept_const_int=True)
25ca454b
PP
1360
1361 # Padding value
1362 pad_val = self._parse_pad_val()
676f6189
PP
1363
1364 # Return item
25ca454b 1365 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1366
27d52a19
PP
1367 # Parses the multiplier expression of a repetition (block or
1368 # post-item) and returns the expression string and AST node.
1369 def _expect_rep_mul_expr(self):
dbd84e74 1370 return self._expect_expr(accept_const_int=True)
27d52a19
PP
1371
1372 # Common block end pattern
320644e2 1373 _block_end_pat = re.compile(r"!end\b")
27d52a19 1374
e57a18e1 1375 # Pattern for _try_parse_rep_block()
320644e2 1376 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1377
1378 # Tries to parse a repetition block, returning a repetition item on
1379 # success.
1380 def _try_parse_rep_block(self):
1381 begin_text_loc = self._text_loc
1382
1383 # Match prefix
1384 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1385 # No match
1386 return
1387
1388 # Expect expression
1389 self._skip_ws_and_comments()
1390 expr_str, expr = self._expect_rep_mul_expr()
1391
1392 # Parse items
1393 self._skip_ws_and_comments()
1394 items_text_loc = self._text_loc
1395 items = self._parse_items()
1396
1397 # Expect end of block
1398 self._skip_ws_and_comments()
1399 self._expect_pat(
27d52a19 1400 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1401 )
1402
1403 # Return item
1404 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1405
27d52a19 1406 # Pattern for _try_parse_cond_block()
320644e2 1407 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1408 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1409
1410 # Tries to parse a conditional block, returning a conditional item
1411 # on success.
1412 def _try_parse_cond_block(self):
1413 begin_text_loc = self._text_loc
1414
1415 # Match prefix
1416 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1417 # No match
1418 return
1419
1420 # Expect expression
1421 self._skip_ws_and_comments()
dbd84e74 1422 expr_str, expr = self._expect_expr()
27d52a19 1423
12b5dbc0 1424 # Parse "true" items
27d52a19 1425 self._skip_ws_and_comments()
12b5dbc0
PP
1426 true_items_text_loc = self._text_loc
1427 true_items = self._parse_items()
1428 false_items = [] # type: List[_Item]
1429 false_items_text_loc = begin_text_loc
27d52a19 1430
12b5dbc0 1431 # `!else`?
27d52a19 1432 self._skip_ws_and_comments()
12b5dbc0
PP
1433
1434 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1435 # Parse "false" items
1436 self._skip_ws_and_comments()
1437 false_items_text_loc = self._text_loc
1438 false_items = self._parse_items()
1439
1440 # Expect end of block
27d52a19
PP
1441 self._expect_pat(
1442 self._block_end_pat,
12b5dbc0 1443 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1444 )
1445
1446 # Return item
12b5dbc0
PP
1447 return _Cond(
1448 _Group(true_items, true_items_text_loc),
1449 _Group(false_items, false_items_text_loc),
1450 expr_str,
1451 expr,
1452 begin_text_loc,
1453 )
27d52a19 1454
320644e2
PP
1455 # Common left parenthesis pattern
1456 _left_paren_pat = re.compile(r"\(")
1457
1458 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1459 _macro_params_comma_pat = re.compile(",")
1460
1461 # Patterns for _try_parse_macro_def()
1462 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1463
1464 # Tries to parse a macro definition, adding it to `self._macro_defs`
1465 # and returning `True` on success.
1466 def _try_parse_macro_def(self):
1467 begin_text_loc = self._text_loc
1468
1469 # Match prefix
1470 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1471 # No match
1472 return False
1473
1474 # Expect a name
1475 self._skip_ws()
1476 name_text_loc = self._text_loc
1477 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1478
1479 # Validate name
1480 name = m.group(0)
1481
1482 if name in self._macro_defs:
1483 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1484
1485 # Expect `(`
1486 self._skip_ws()
1487 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1488
1489 # Try to parse comma-separated parameter names
1490 param_names = [] # type: List[str]
1491 expect_comma = False
1492
1493 while True:
1494 self._skip_ws()
1495
1496 # End?
1497 if self._try_parse_pat(self._right_paren_pat) is not None:
1498 # End
1499 break
1500
1501 # Comma?
1502 if expect_comma:
1503 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1504
1505 # Expect parameter name
1506 self._skip_ws()
1507 param_text_loc = self._text_loc
1508 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1509
1510 if m.group(0) in param_names:
1511 _raise_error(
1512 "Duplicate macro parameter named `{}`".format(m.group(0)),
1513 param_text_loc,
1514 )
1515
1516 param_names.append(m.group(0))
1517 expect_comma = True
1518
1519 # Expect items
1520 self._skip_ws_and_comments()
1521 items_text_loc = self._text_loc
1522 old_var_names = self._var_names.copy()
1523 old_label_names = self._label_names.copy()
1524 self._var_names = set() # type: Set[str]
1525 self._label_names = set() # type: Set[str]
1526 items = self._parse_items()
1527 self._var_names = old_var_names
1528 self._label_names = old_label_names
1529
1530 # Expect suffix
1531 self._expect_pat(
1532 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1533 )
1534
1535 # Register macro
1536 self._macro_defs[name] = _MacroDef(
1537 name, param_names, _Group(items, items_text_loc), begin_text_loc
1538 )
1539
1540 return True
1541
1542 # Patterns for _try_parse_macro_exp()
1543 _macro_exp_prefix_pat = re.compile(r"m\b")
1544 _macro_exp_colon_pat = re.compile(r":")
1545
1546 # Tries to parse a macro expansion, returning a macro expansion item
1547 # on success.
1548 def _try_parse_macro_exp(self):
1549 begin_text_loc = self._text_loc
1550
1551 # Match prefix
1552 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1553 # No match
1554 return
1555
1556 # Expect `:`
1557 self._skip_ws()
1558 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1559
1560 # Expect a macro name
1561 self._skip_ws()
1562 name_text_loc = self._text_loc
1563 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1564
1565 # Validate name
1566 name = m.group(0)
1567 macro_def = self._macro_defs.get(name)
1568
1569 if macro_def is None:
1570 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1571
1572 # Expect `(`
1573 self._skip_ws()
1574 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1575
1576 # Try to parse comma-separated parameter values
1577 params_text_loc = self._text_loc
1578 params = [] # type: List[_MacroExpParam]
1579 expect_comma = False
1580
1581 while True:
1582 self._skip_ws()
1583
1584 # End?
1585 if self._try_parse_pat(self._right_paren_pat) is not None:
1586 # End
1587 break
1588
1589 # Expect a Value
1590 if expect_comma:
1591 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1592
1593 self._skip_ws()
1594 param_text_loc = self._text_loc
1595 params.append(
1596 _MacroExpParam(
dbd84e74
PP
1597 *self._expect_expr(
1598 accept_const_int=True,
1599 allow_neg_int=True,
1600 accept_const_float=True,
1601 ),
6dd69a2a 1602 text_loc=param_text_loc
320644e2
PP
1603 )
1604 )
1605 expect_comma = True
1606
1607 # Validate parameter values
1608 if len(params) != len(macro_def.param_names):
1609 sing_plur = "" if len(params) == 1 else "s"
1610 _raise_error(
1611 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1612 len(params), sing_plur, len(macro_def.param_names)
1613 ),
1614 params_text_loc,
1615 )
1616
1617 # Return item
1618 return _MacroExp(name, params, begin_text_loc)
1619
71aaa3f7
PP
1620 # Tries to parse a base item (anything except a repetition),
1621 # returning it on success.
1622 def _try_parse_base_item(self):
1623 # Byte item?
1624 item = self._try_parse_byte()
1625
1626 if item is not None:
1627 return item
1628
1629 # String item?
1630 item = self._try_parse_str()
1631
1632 if item is not None:
1633 return item
1634
2adf4336
PP
1635 # Value, variable assignment, or byte order setting item?
1636 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1637
1638 if item is not None:
1639 return item
1640
2adf4336
PP
1641 # Label or offset setting item?
1642 item = self._try_parse_label_or_set_offset()
71aaa3f7 1643
676f6189
PP
1644 if item is not None:
1645 return item
1646
1647 # Offset alignment item?
1648 item = self._try_parse_align_offset()
1649
25ca454b
PP
1650 if item is not None:
1651 return item
1652
1653 # Filling item?
1654 item = self._try_parse_fill_until()
1655
71aaa3f7
PP
1656 if item is not None:
1657 return item
1658
1659 # Group item?
1660 item = self._try_parse_group()
1661
1662 if item is not None:
1663 return item
1664
320644e2 1665 # Repetition block item?
e57a18e1 1666 item = self._try_parse_rep_block()
71aaa3f7 1667
e57a18e1
PP
1668 if item is not None:
1669 return item
1670
27d52a19
PP
1671 # Conditional block item?
1672 item = self._try_parse_cond_block()
1673
1674 if item is not None:
1675 return item
1676
320644e2
PP
1677 # Macro expansion?
1678 item = self._try_parse_macro_exp()
1679
1680 if item is not None:
1681 return item
1682
e57a18e1
PP
1683 # Pattern for _try_parse_rep_post()
1684 _rep_post_prefix_pat = re.compile(r"\*")
1685
1686 # Tries to parse a post-item repetition, returning the expression
1687 # string and AST expression node on success.
1688 def _try_parse_rep_post(self):
71aaa3f7 1689 # Match prefix
e57a18e1 1690 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1691 # No match
2adf4336 1692 return
71aaa3f7 1693
e57a18e1 1694 # Return expression string and AST expression
71aaa3f7 1695 self._skip_ws_and_comments()
e57a18e1 1696 return self._expect_rep_mul_expr()
71aaa3f7 1697
1ca7b5e1
PP
1698 # Tries to parse an item, possibly followed by a repetition,
1699 # returning `True` on success.
1700 #
1701 # Appends any parsed item to `items`.
1702 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1703 self._skip_ws_and_comments()
1704
320644e2 1705 # Base item
71aaa3f7
PP
1706 item = self._try_parse_base_item()
1707
1708 if item is None:
320644e2 1709 return
71aaa3f7
PP
1710
1711 # Parse repetition if the base item is repeatable
1712 if isinstance(item, _RepableItem):
0e8e3169
PP
1713 self._skip_ws_and_comments()
1714 rep_text_loc = self._text_loc
e57a18e1 1715 rep_ret = self._try_parse_rep_post()
71aaa3f7 1716
2adf4336 1717 if rep_ret is not None:
6dd69a2a 1718 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
71aaa3f7 1719
1ca7b5e1
PP
1720 items.append(item)
1721 return True
71aaa3f7
PP
1722
1723 # Parses and returns items, skipping whitespaces, insignificant
1724 # symbols, and comments when allowed, and stopping at the first
1725 # unknown character.
320644e2
PP
1726 #
1727 # Accepts and registers macro definitions if `accept_macro_defs`
1728 # is `True`.
1729 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1730 items = [] # type: List[_Item]
1731
1732 while self._isnt_done():
1ca7b5e1
PP
1733 # Try to append item
1734 if not self._try_append_item(items):
320644e2
PP
1735 if accept_macro_defs and self._try_parse_macro_def():
1736 continue
1737
1ca7b5e1
PP
1738 # Unknown at this point
1739 break
71aaa3f7
PP
1740
1741 return items
1742
1743 # Parses the whole Normand input, setting `self._res` to the main
1744 # group item on success.
1745 def _parse(self):
1746 if len(self._normand.strip()) == 0:
1747 # Special case to make sure there's something to consume
1748 self._res = _Group([], self._text_loc)
1749 return
1750
1751 # Parse first level items
320644e2 1752 items = self._parse_items(True)
71aaa3f7
PP
1753
1754 # Make sure there's nothing left
1755 self._skip_ws_and_comments()
1756
1757 if self._isnt_done():
1758 self._raise_error(
1759 "Unexpected character `{}`".format(self._normand[self._at])
1760 )
1761
1762 # Set main group item
1763 self._res = _Group(items, self._text_loc)
1764
1765
1766# The return type of parse().
1767class ParseResult:
1768 @classmethod
1769 def _create(
1770 cls,
1771 data: bytearray,
e57a18e1
PP
1772 variables: VariablesT,
1773 labels: LabelsT,
71aaa3f7
PP
1774 offset: int,
1775 bo: Optional[ByteOrder],
1776 ):
1777 self = cls.__new__(cls)
1778 self._init(data, variables, labels, offset, bo)
1779 return self
1780
1781 def __init__(self, *args, **kwargs): # type: ignore
1782 raise NotImplementedError
1783
1784 def _init(
1785 self,
1786 data: bytearray,
e57a18e1
PP
1787 variables: VariablesT,
1788 labels: LabelsT,
71aaa3f7
PP
1789 offset: int,
1790 bo: Optional[ByteOrder],
1791 ):
1792 self._data = data
1793 self._vars = variables
1794 self._labels = labels
1795 self._offset = offset
1796 self._bo = bo
1797
1798 # Generated data.
1799 @property
1800 def data(self):
1801 return self._data
1802
1803 # Dictionary of updated variable names to their last computed value.
1804 @property
1805 def variables(self):
1806 return self._vars
1807
1808 # Dictionary of updated main group label names to their computed
1809 # value.
1810 @property
1811 def labels(self):
1812 return self._labels
1813
1814 # Updated offset.
1815 @property
1816 def offset(self):
1817 return self._offset
1818
1819 # Updated byte order.
1820 @property
1821 def byte_order(self):
1822 return self._bo
1823
1824
1825# Raises a parse error for the item `item`, creating it using the
1826# message `msg`.
1827def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1828 _raise_error(msg, item.text_loc)
1829
1830
1831# The `ICITTE` reserved name.
1832_icitte_name = "ICITTE"
1833
1834
2adf4336
PP
1835# Base node visitor.
1836#
1837# Calls the _visit_name() method for each name node which isn't the name
1838# of a call.
1839class _NodeVisitor(ast.NodeVisitor):
1840 def __init__(self):
71aaa3f7
PP
1841 self._parent_is_call = False
1842
1843 def generic_visit(self, node: ast.AST):
1844 if type(node) is ast.Call:
1845 self._parent_is_call = True
1846 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1847 self._visit_name(node.id)
71aaa3f7
PP
1848
1849 super().generic_visit(node)
1850 self._parent_is_call = False
1851
2adf4336
PP
1852 @abc.abstractmethod
1853 def _visit_name(self, name: str):
1854 ...
1855
71aaa3f7 1856
2adf4336
PP
1857# Expression validator: validates that all the names within the
1858# expression are allowed.
1859class _ExprValidator(_NodeVisitor):
320644e2 1860 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1861 super().__init__()
320644e2
PP
1862 self._expr_str = expr_str
1863 self._text_loc = text_loc
2adf4336 1864 self._allowed_names = allowed_names
2adf4336
PP
1865
1866 def _visit_name(self, name: str):
1867 # Make sure the name refers to a known and reachable
1868 # variable/label name.
e57a18e1 1869 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1870 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1871 name, self._expr_str
2adf4336
PP
1872 )
1873
05f81895 1874 allowed_names = self._allowed_names.copy()
e57a18e1 1875 allowed_names.add(_icitte_name)
2adf4336 1876
05f81895 1877 if len(allowed_names) > 0:
2adf4336
PP
1878 allowed_names_str = ", ".join(
1879 sorted(["`{}`".format(name) for name in allowed_names])
1880 )
1881 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1882
1883 _raise_error(
1884 msg,
320644e2 1885 self._text_loc,
2adf4336
PP
1886 )
1887
1888
2adf4336
PP
1889# Generator state.
1890class _GenState:
1891 def __init__(
1b8aa84a 1892 self,
e57a18e1
PP
1893 variables: VariablesT,
1894 labels: LabelsT,
1b8aa84a
PP
1895 offset: int,
1896 bo: Optional[ByteOrder],
2adf4336
PP
1897 ):
1898 self.variables = variables.copy()
1899 self.labels = labels.copy()
1900 self.offset = offset
1901 self.bo = bo
71aaa3f7 1902
320644e2
PP
1903 def __repr__(self):
1904 return "_GenState({}, {}, {}, {})".format(
1905 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1906 )
1907
1908
1909# Fixed-length number item instance.
1910class _FlNumItemInst:
f5dcb24c
PP
1911 def __init__(
1912 self,
1913 item: _FlNum,
1914 offset_in_data: int,
1915 state: _GenState,
1916 parse_error_msgs: List[ParseErrorMessage],
1917 ):
320644e2
PP
1918 self._item = item
1919 self._offset_in_data = offset_in_data
1920 self._state = state
f5dcb24c 1921 self._parse_error_msgs = parse_error_msgs
320644e2
PP
1922
1923 @property
1924 def item(self):
1925 return self._item
1926
1927 @property
1928 def offset_in_data(self):
1929 return self._offset_in_data
1930
1931 @property
1932 def state(self):
1933 return self._state
1934
f5dcb24c
PP
1935 @property
1936 def parse_error_msgs(self):
1937 return self._parse_error_msgs
1938
71aaa3f7 1939
2adf4336 1940# Generator of data and final state from a group item.
71aaa3f7
PP
1941#
1942# Generation happens in memory at construction time. After building, use
1943# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1944# get the resulting context.
2adf4336
PP
1945#
1946# The steps of generation are:
1947#
320644e2
PP
1948# 1. Handle each item in prefix order.
1949#
1950# The handlers append bytes to `self._data` and update some current
1951# state object (`_GenState` instance).
1952#
1953# When handling a fixed-length number item, try to evaluate its
1954# expression using the current state. If this fails, then it might be
1955# because the expression refers to a "future" label: save the current
1956# offset in `self._data` (generated data) and a snapshot of the
1957# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
f5dcb24c
PP
1958# object). _gen_fl_num_item_insts() will deal with this later. A
1959# `_FlNumItemInst` instance also contains a snapshot of the current
1960# parsing error messages (`self._parse_error_msgs`) which need to be
1961# taken into account when handling the instance later.
2adf4336 1962#
320644e2
PP
1963# When handling the items of a group, keep a map of immediate label
1964# names to their offset. Then, after having processed all the items,
1965# update the relevant saved state snapshots in
1966# `self._fl_num_item_insts` with those immediate label values.
1967# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1968#
320644e2
PP
1969# 2. Handle all the fixed-length number item instances of which the
1970# expression evaluation failed before.
2adf4336 1971#
320644e2
PP
1972# At this point, `self._fl_num_item_insts` contains everything that's
1973# needed to evaluate the expressions, including the values of
1974# "future" labels from the point of view of some fixed-length number
1975# item instance.
2adf4336 1976#
f5dcb24c
PP
1977# If an evaluation fails at this point, then it's a user error. Add
1978# to the parsing error all the saved parsing error messages of the
1979# instance. Those additional messages add precious context to the
1980# error.
71aaa3f7
PP
1981class _Gen:
1982 def __init__(
1983 self,
1984 group: _Group,
320644e2 1985 macro_defs: _MacroDefsT,
e57a18e1
PP
1986 variables: VariablesT,
1987 labels: LabelsT,
71aaa3f7
PP
1988 offset: int,
1989 bo: Optional[ByteOrder],
1990 ):
320644e2
PP
1991 self._macro_defs = macro_defs
1992 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
f5dcb24c 1993 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2adf4336 1994 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1995
1996 # Generated bytes.
1997 @property
1998 def data(self):
1999 return self._data
2000
2001 # Updated variables.
2002 @property
2003 def variables(self):
2adf4336 2004 return self._final_state.variables
71aaa3f7
PP
2005
2006 # Updated main group labels.
2007 @property
2008 def labels(self):
2adf4336 2009 return self._final_state.labels
71aaa3f7
PP
2010
2011 # Updated offset.
2012 @property
2013 def offset(self):
2adf4336 2014 return self._final_state.offset
71aaa3f7
PP
2015
2016 # Updated byte order.
2017 @property
2018 def bo(self):
2adf4336
PP
2019 return self._final_state.bo
2020
320644e2
PP
2021 # Evaluates the expression `expr` of which the original string is
2022 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
2023 # generation state `state`.
2024 #
269f6eb3
PP
2025 # If `allow_float` is `True`, then the type of the result may be
2026 # `float` too.
2adf4336 2027 @staticmethod
320644e2
PP
2028 def _eval_expr(
2029 expr_str: str,
2030 expr: ast.Expression,
2031 text_loc: TextLocation,
269f6eb3 2032 state: _GenState,
269f6eb3
PP
2033 allow_float: bool = False,
2034 ):
e57a18e1
PP
2035 syms = {} # type: VariablesT
2036 syms.update(state.labels)
71aaa3f7 2037
e57a18e1
PP
2038 # Set the `ICITTE` name to the current offset
2039 syms[_icitte_name] = state.offset
71aaa3f7
PP
2040
2041 # Add the current variables
2adf4336 2042 syms.update(state.variables)
71aaa3f7
PP
2043
2044 # Validate the node and its children
320644e2 2045 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
2046
2047 # Compile and evaluate expression node
2048 try:
320644e2 2049 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 2050 except Exception as exc:
320644e2
PP
2051 _raise_error(
2052 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2053 text_loc,
71aaa3f7
PP
2054 )
2055
27d52a19
PP
2056 # Convert `bool` result type to `int` to normalize
2057 if type(val) is bool:
2058 val = int(val)
2059
269f6eb3
PP
2060 # Validate result type
2061 expected_types = {int} # type: Set[type]
2062 type_msg = "`int`"
2063
2064 if allow_float:
2065 expected_types.add(float)
2066 type_msg += " or `float`"
2067
2068 if type(val) not in expected_types:
320644e2 2069 _raise_error(
269f6eb3 2070 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 2071 expr_str, type_msg, type(val).__name__
71aaa3f7 2072 ),
320644e2 2073 text_loc,
71aaa3f7
PP
2074 )
2075
2076 return val
2077
320644e2
PP
2078 # Evaluates the expression of `item` considering the current
2079 # generation state `state`.
2080 #
2081 # If `allow_float` is `True`, then the type of the result may be
2082 # `float` too.
2083 @staticmethod
2084 def _eval_item_expr(
25ca454b 2085 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
320644e2
PP
2086 state: _GenState,
2087 allow_float: bool = False,
2088 ):
2089 return _Gen._eval_expr(
2090 item.expr_str, item.expr, item.text_loc, state, allow_float
2091 )
2092
2093 # Handles the byte item `item`.
2094 def _handle_byte_item(self, item: _Byte, state: _GenState):
2095 self._data.append(item.val)
2096 state.offset += item.size
2097
2098 # Handles the string item `item`.
2099 def _handle_str_item(self, item: _Str, state: _GenState):
2100 self._data += item.data
2101 state.offset += item.size
2102
2103 # Handles the byte order setting item `item`.
2104 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2105 # Update current byte order
2106 state.bo = item.bo
2107
2108 # Handles the variable assignment item `item`.
2109 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2110 # Update variable
2111 state.variables[item.name] = self._eval_item_expr(item, state, True)
2112
2113 # Handles the fixed-length number item `item`.
2114 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2115 # Validate current byte order
2116 if state.bo is None and item.len > 8:
2117 _raise_error_for_item(
2118 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2119 item.expr_str
2120 ),
2121 item,
2122 )
2123
2124 # Try an immediate evaluation. If it fails, then keep everything
2125 # needed to (try to) generate the bytes of this item later.
2126 try:
2127 data = self._gen_fl_num_item_inst_data(item, state)
2128 except Exception:
2129 self._fl_num_item_insts.append(
f5dcb24c
PP
2130 _FlNumItemInst(
2131 item,
2132 len(self._data),
2133 copy.deepcopy(state),
2134 copy.deepcopy(self._parse_error_msgs),
2135 )
320644e2
PP
2136 )
2137
2138 # Reserve space in `self._data` for this instance
2139 data = bytes([0] * (item.len // 8))
2140
2141 # Append bytes
2142 self._data += data
2143
2144 # Update offset
2145 state.offset += len(data)
2146
05f81895
PP
2147 # Returns the size, in bytes, required to encode the value `val`
2148 # with LEB128 (signed version if `is_signed` is `True`).
2149 @staticmethod
2150 def _leb128_size_for_val(val: int, is_signed: bool):
2151 if val < 0:
2152 # Equivalent upper bound.
2153 #
2154 # For example, if `val` is -128, then the full integer for
2155 # this number of bits would be [-128, 127].
2156 val = -val - 1
2157
2158 # Number of bits (add one for the sign if needed)
2159 bits = val.bit_length() + int(is_signed)
2160
2161 if bits == 0:
2162 bits = 1
2163
2164 # Seven bits per byte
2165 return math.ceil(bits / 7)
2166
320644e2
PP
2167 # Handles the LEB128 integer item `item`.
2168 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2169 # Compute value
2170 val = self._eval_item_expr(item, state, False)
676f6189 2171
320644e2
PP
2172 # Size in bytes
2173 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2174
320644e2
PP
2175 # For each byte
2176 for _ in range(size):
2177 # Seven LSBs, MSB of the byte set (continue)
2178 self._data.append((val & 0x7F) | 0x80)
2179 val >>= 7
2adf4336 2180
320644e2
PP
2181 # Clear MSB of last byte (stop)
2182 self._data[-1] &= ~0x80
2adf4336 2183
320644e2
PP
2184 # Update offset
2185 state.offset += size
27d52a19 2186
320644e2
PP
2187 # Handles the group item `item`, removing the immediate labels from
2188 # `state` at the end if `remove_immediate_labels` is `True`.
2189 def _handle_group_item(
2190 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2191 ):
2192 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2193 immediate_labels = {} # type: LabelsT
27d52a19 2194
320644e2
PP
2195 # Handle each item
2196 for subitem in item.items:
2197 if type(subitem) is _Label:
2198 # Add to local immediate labels
2199 immediate_labels[subitem.name] = state.offset
2adf4336 2200
320644e2 2201 self._handle_item(subitem, state)
2adf4336 2202
320644e2
PP
2203 # Remove immediate labels from current state if needed
2204 if remove_immediate_labels:
2205 for name in immediate_labels:
2206 del state.labels[name]
2adf4336 2207
320644e2
PP
2208 # Add all immediate labels to all state snapshots since
2209 # `first_fl_num_item_inst_index`.
2210 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2211 inst.state.labels.update(immediate_labels)
2adf4336 2212
320644e2
PP
2213 # Handles the repetition item `item`.
2214 def _handle_rep_item(self, item: _Rep, state: _GenState):
2215 # Compute the repetition count
2216 mul = _Gen._eval_item_expr(item, state)
05f81895 2217
320644e2
PP
2218 # Validate result
2219 if mul < 0:
2220 _raise_error_for_item(
2221 "Invalid expression `{}`: unexpected negative result {:,}".format(
2222 item.expr_str, mul
2223 ),
2224 item,
2225 )
2adf4336 2226
320644e2
PP
2227 # Generate item data `mul` times
2228 for _ in range(mul):
2229 self._handle_item(item.item, state)
2adf4336 2230
320644e2 2231 # Handles the conditional item `item`.
12b5dbc0 2232 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2233 # Compute the conditional value
2234 val = _Gen._eval_item_expr(item, state)
2adf4336 2235
320644e2
PP
2236 # Generate item data if needed
2237 if val:
12b5dbc0
PP
2238 self._handle_item(item.true_item, state)
2239 else:
2240 self._handle_item(item.false_item, state)
2adf4336 2241
320644e2
PP
2242 # Evaluates the parameters of the macro expansion item `item`
2243 # considering the initial state `init_state` and returns a new state
2244 # to handle the items of the macro.
2245 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2246 # New state
2247 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2248
320644e2
PP
2249 # Evaluate the parameter expressions
2250 macro_def = self._macro_defs[item.name]
2adf4336 2251
320644e2
PP
2252 for param_name, param in zip(macro_def.param_names, item.params):
2253 exp_state.variables[param_name] = _Gen._eval_expr(
2254 param.expr_str, param.expr, param.text_loc, init_state, True
2255 )
2adf4336 2256
320644e2 2257 return exp_state
2adf4336 2258
320644e2
PP
2259 # Handles the macro expansion item `item`.
2260 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
f5dcb24c 2261 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
27d52a19 2262
f5dcb24c
PP
2263 try:
2264 # New state
2265 exp_state = self._eval_macro_exp_params(item, state)
2266
2267 # Process the contained group
2268 init_data_size = len(self._data)
2269 parse_error_msg = (
2270 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2271 parse_error_msg_text, item.text_loc
2272 )
2273 )
2274 self._parse_error_msgs.append(parse_error_msg)
2275 self._handle_item(self._macro_defs[item.name].group, exp_state)
2276 self._parse_error_msgs.pop()
2277 except ParseError as exc:
2278 _augment_error(exc, parse_error_msg_text, item.text_loc)
27d52a19 2279
320644e2
PP
2280 # Update state offset and return
2281 state.offset += len(self._data) - init_data_size
676f6189 2282
320644e2
PP
2283 # Handles the offset setting item `item`.
2284 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2285 state.offset = item.val
2adf4336 2286
25ca454b 2287 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2288 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2289 init_offset = state.offset
2290 align_bytes = item.val // 8
2291 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2292 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2293
25ca454b
PP
2294 # Handles the filling item `item` (adds padding).
2295 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2296 # Compute the new offset
2297 new_offset = _Gen._eval_item_expr(item, state)
2298
2299 # Validate the new offset
2300 if new_offset < state.offset:
2301 _raise_error_for_item(
2302 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2303 item.expr_str, new_offset, state.offset
2304 ),
2305 item,
2306 )
2307
2308 # Fill
2309 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2310
2311 # Update offset
2312 state.offset = new_offset
2313
320644e2
PP
2314 # Handles the label item `item`.
2315 def _handle_label_item(self, item: _Label, state: _GenState):
2316 state.labels[item.name] = state.offset
2adf4336 2317
320644e2
PP
2318 # Handles the item `item`, returning the updated next repetition
2319 # instance.
2320 def _handle_item(self, item: _Item, state: _GenState):
2321 return self._item_handlers[type(item)](item, state)
71aaa3f7 2322
320644e2
PP
2323 # Generates the data for a fixed-length integer item instance having
2324 # the value `val` and returns it.
2325 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2326 # Validate range
2327 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2328 _raise_error_for_item(
320644e2
PP
2329 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2330 val, item.len, item.expr_str
71aaa3f7
PP
2331 ),
2332 item,
2333 )
2334
2335 # Encode result on 64 bits (to extend the sign bit whatever the
2336 # value of `item.len`).
71aaa3f7
PP
2337 data = struct.pack(
2338 "{}{}".format(
2adf4336 2339 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2340 "Q" if val >= 0 else "q",
2341 ),
2342 val,
2343 )
2344
2345 # Keep only the requested length
2346 len_bytes = item.len // 8
2347
2adf4336 2348 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2349 # Big endian: keep last bytes
2350 data = data[-len_bytes:]
2351 else:
2352 # Little endian: keep first bytes
2adf4336 2353 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2354 data = data[:len_bytes]
2355
320644e2
PP
2356 # Return data
2357 return data
269f6eb3 2358
320644e2
PP
2359 # Generates the data for a fixed-length floating point number item
2360 # instance having the value `val` and returns it.
2361 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2362 # Validate length
2363 if item.len not in (32, 64):
2364 _raise_error_for_item(
2365 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2366 item.len, val
2367 ),
2368 item,
2369 )
2370
320644e2
PP
2371 # Encode and return result
2372 return struct.pack(
269f6eb3
PP
2373 "{}{}".format(
2374 ">" if state.bo in (None, ByteOrder.BE) else "<",
2375 "f" if item.len == 32 else "d",
2376 ),
2377 val,
2378 )
2379
320644e2
PP
2380 # Generates the data for a fixed-length number item instance and
2381 # returns it.
2382 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2383 # Compute value
e57a18e1 2384 val = self._eval_item_expr(item, state, True)
269f6eb3 2385
269f6eb3
PP
2386 # Handle depending on type
2387 if type(val) is int:
320644e2 2388 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2389 else:
2390 assert type(val) is float
320644e2 2391 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2392
320644e2
PP
2393 # Generates the data for all the fixed-length number item instances
2394 # and writes it at the correct offset within `self._data`.
2395 def _gen_fl_num_item_insts(self):
2396 for inst in self._fl_num_item_insts:
2397 # Generate bytes
f5dcb24c
PP
2398 try:
2399 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2400 except ParseError as exc:
2401 # Add all the saved parse error messages for this
2402 # instance.
2403 for msg in reversed(inst.parse_error_msgs):
2404 _add_error_msg(exc, msg.text, msg.text_location)
2405
2406 raise
05f81895 2407
320644e2
PP
2408 # Insert bytes into `self._data`
2409 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2410
2411 # Generates the data (`self._data`) and final state
2412 # (`self._final_state`) from `group` and the initial state `state`.
2413 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2414 # Initial state
2415 self._data = bytearray()
71aaa3f7
PP
2416
2417 # Item handlers
2418 self._item_handlers = {
676f6189 2419 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2420 _Byte: self._handle_byte_item,
27d52a19 2421 _Cond: self._handle_cond_item,
25ca454b 2422 _FillUntil: self._handle_fill_until_item,
269f6eb3 2423 _FlNum: self._handle_fl_num_item,
71aaa3f7 2424 _Group: self._handle_group_item,
2adf4336 2425 _Label: self._handle_label_item,
320644e2 2426 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2427 _Rep: self._handle_rep_item,
2adf4336
PP
2428 _SetBo: self._handle_set_bo_item,
2429 _SetOffset: self._handle_set_offset_item,
05f81895 2430 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2431 _Str: self._handle_str_item,
05f81895 2432 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2433 _VarAssign: self._handle_var_assign_item,
320644e2 2434 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2435
2436 # Handle the group item, _not_ removing the immediate labels
2437 # because the `labels` property offers them.
320644e2 2438 self._handle_group_item(group, state, False)
2adf4336
PP
2439
2440 # This is actually the final state
2441 self._final_state = state
71aaa3f7 2442
320644e2
PP
2443 # Generate all the fixed-length number bytes now that we know
2444 # their full state
2445 self._gen_fl_num_item_insts()
2446
71aaa3f7
PP
2447
2448# Returns a `ParseResult` instance containing the bytes encoded by the
2449# input string `normand`.
2450#
2451# `init_variables` is a dictionary of initial variable names (valid
2452# Python names) to integral values. A variable name must not be the
2453# reserved name `ICITTE`.
2454#
2455# `init_labels` is a dictionary of initial label names (valid Python
2456# names) to integral values. A label name must not be the reserved name
2457# `ICITTE`.
2458#
2459# `init_offset` is the initial offset.
2460#
2461# `init_byte_order` is the initial byte order.
2462#
2463# Raises `ParseError` on any parsing error.
2464def parse(
2465 normand: str,
e57a18e1
PP
2466 init_variables: Optional[VariablesT] = None,
2467 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2468 init_offset: int = 0,
2469 init_byte_order: Optional[ByteOrder] = None,
2470):
2471 if init_variables is None:
2472 init_variables = {}
2473
2474 if init_labels is None:
2475 init_labels = {}
2476
320644e2 2477 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2478 gen = _Gen(
320644e2
PP
2479 parser.res,
2480 parser.macro_defs,
71aaa3f7
PP
2481 init_variables,
2482 init_labels,
2483 init_offset,
2484 init_byte_order,
2485 )
2486 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2487 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2488 )
2489
2490
f5dcb24c
PP
2491# Raises a command-line error with the message `msg`.
2492def _raise_cli_error(msg: str) -> NoReturn:
2493 raise RuntimeError("Command-line error: {}".format(msg))
2494
2495
b2410769
PP
2496# Returns the `int` or `float` value out of a CLI assignment value.
2497def _val_from_assign_val_str(s: str, is_label: bool):
2498 s = s.strip()
2499
2500 # Floating point number?
2501 if not is_label:
2502 m = _const_float_pat.fullmatch(s)
2503
2504 if m is not None:
2505 return float(m.group(0))
2506
2507 # Integer?
2508 m = _const_int_pat.fullmatch(s)
2509
2510 if m is not None:
2511 return int(_norm_const_int(m.group(0)), 0)
2512
2513 exp = "an integer" if is_label else "a number"
2514 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2515
2516
2517# Returns a dictionary of string to numbers from the list of strings
f5dcb24c 2518# `args` containing `NAME=VAL` entries.
b2410769
PP
2519def _dict_from_arg(args: Optional[List[str]], is_label: bool):
2520 d = {} # type: VariablesT
f5dcb24c
PP
2521
2522 if args is None:
2523 return d
2524
2525 for arg in args:
b2410769 2526 m = re.match(r"({})\s*=\s*(.+)$".format(_py_name_pat.pattern), arg)
f5dcb24c
PP
2527
2528 if m is None:
b2410769 2529 _raise_cli_error("Invalid assignment `{}`".format(arg))
f5dcb24c 2530
b2410769 2531 d[m.group(1)] = _val_from_assign_val_str(m.group(2), is_label)
f5dcb24c
PP
2532
2533 return d
2534
2535
2536# Parses the command-line arguments and returns, in this order:
2537#
2538# 1. The input file path, or `None` if none.
2539# 2. The Normand input text.
2540# 3. The initial offset.
2541# 4. The initial byte order.
2542# 5. The initial variables.
2543# 6. The initial labels.
71aaa3f7
PP
2544def _parse_cli_args():
2545 import argparse
2546
2547 # Build parser
2548 ap = argparse.ArgumentParser()
2549 ap.add_argument(
2550 "--offset",
2551 metavar="OFFSET",
2552 action="store",
2553 type=int,
2554 default=0,
2555 help="initial offset (positive)",
2556 )
2557 ap.add_argument(
2558 "-b",
2559 "--byte-order",
2560 metavar="BO",
2561 choices=["be", "le"],
2562 type=str,
2563 help="initial byte order (`be` or `le`)",
2564 )
2565 ap.add_argument(
b2410769 2566 "-v",
71aaa3f7
PP
2567 "--var",
2568 metavar="NAME=VAL",
2569 action="append",
2570 help="add an initial variable (may be repeated)",
2571 )
2572 ap.add_argument(
2573 "-l",
2574 "--label",
2575 metavar="NAME=VAL",
2576 action="append",
2577 help="add an initial label (may be repeated)",
2578 )
2579 ap.add_argument(
2580 "--version", action="version", version="Normand {}".format(__version__)
2581 )
2582 ap.add_argument(
2583 "path",
2584 metavar="PATH",
2585 action="store",
2586 nargs="?",
2587 help="input path (none means standard input)",
2588 )
2589
2590 # Parse
f5dcb24c 2591 args = ap.parse_args()
71aaa3f7
PP
2592
2593 # Read input
2594 if args.path is None:
2595 normand = sys.stdin.read()
2596 else:
2597 with open(args.path) as f:
2598 normand = f.read()
2599
2600 # Variables and labels
b2410769
PP
2601 variables = _dict_from_arg(args.var, False)
2602 labels = _dict_from_arg(args.label, True)
71aaa3f7
PP
2603
2604 # Validate offset
2605 if args.offset < 0:
2606 _raise_cli_error("Invalid negative offset {}")
2607
2608 # Validate and set byte order
2609 bo = None # type: Optional[ByteOrder]
2610
2611 if args.byte_order is not None:
2612 if args.byte_order == "be":
2613 bo = ByteOrder.BE
2614 else:
2615 assert args.byte_order == "le"
2616 bo = ByteOrder.LE
2617
f5dcb24c 2618 # Return input and initial state
b2410769 2619 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
71aaa3f7 2620
71aaa3f7 2621
f5dcb24c
PP
2622# CLI entry point without exception handling.
2623def _run_cli_with_args(
2624 normand: str,
2625 offset: int,
2626 bo: Optional[ByteOrder],
2627 variables: VariablesT,
2628 labels: LabelsT,
2629):
2630 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
71aaa3f7
PP
2631
2632
2633# Prints the exception message `msg` and exits with status 1.
2634def _fail(msg: str) -> NoReturn:
2635 if not msg.endswith("."):
2636 msg += "."
2637
f5dcb24c 2638 print(msg.strip(), file=sys.stderr)
71aaa3f7
PP
2639 sys.exit(1)
2640
2641
2642# CLI entry point.
2643def _run_cli():
2644 try:
f5dcb24c
PP
2645 args = _parse_cli_args()
2646 except Exception as exc:
2647 _fail(str(exc))
2648
2649 try:
2650 _run_cli_with_args(*args[1:])
2651 except ParseError as exc:
2652 import os.path
2653
2654 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2655 fail_msg = ""
2656
2657 for msg in reversed(exc.messages):
2658 fail_msg += "{}{}:{} - {}".format(
2659 prefix,
2660 msg.text_location.line_no,
2661 msg.text_location.col_no,
2662 msg.text,
2663 )
2664
2665 if fail_msg[-1] not in ".:;":
2666 fail_msg += "."
2667
2668 fail_msg += "\n"
2669
2670 _fail(fail_msg.strip())
71aaa3f7
PP
2671 except Exception as exc:
2672 _fail(str(exc))
2673
2674
2675if __name__ == "__main__":
2676 _run_cli()
This page took 0.15042 seconds and 4 git commands to generate.