Add transformation block support
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
cd33dfe6 33__version__ = "0.21.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
f5dcb24c 41 "ParseErrorMessage",
71aaa3f7 42 "ParseResult",
e57a18e1 43 "TextLocation",
e57a18e1 44 "VariablesT",
71aaa3f7
PP
45]
46
47import re
48import abc
49import ast
cd33dfe6 50import bz2
71aaa3f7 51import sys
320644e2 52import copy
71aaa3f7 53import enum
cd33dfe6 54import gzip
05f81895 55import math
cd33dfe6
PP
56import base64
57import quopri
71aaa3f7 58import struct
e57a18e1 59import typing
cd33dfe6 60import functools
e57a18e1 61from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
62
63
64# Text location (line and column numbers).
e57a18e1 65class TextLocation:
71aaa3f7
PP
66 @classmethod
67 def _create(cls, line_no: int, col_no: int):
68 self = cls.__new__(cls)
69 self._init(line_no, col_no)
70 return self
71
72 def __init__(*args, **kwargs): # type: ignore
73 raise NotImplementedError
74
75 def _init(self, line_no: int, col_no: int):
76 self._line_no = line_no
77 self._col_no = col_no
78
79 # Line number.
80 @property
81 def line_no(self):
82 return self._line_no
83
84 # Column number.
85 @property
86 def col_no(self):
87 return self._col_no
88
2adf4336 89 def __repr__(self):
e57a18e1 90 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 91
71aaa3f7
PP
92
93# Any item.
94class _Item:
e57a18e1 95 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
96 self._text_loc = text_loc
97
98 # Source text location.
99 @property
100 def text_loc(self):
101 return self._text_loc
102
2adf4336
PP
103
104# Scalar item.
105class _ScalarItem(_Item):
71aaa3f7
PP
106 # Returns the size, in bytes, of this item.
107 @property
108 @abc.abstractmethod
109 def size(self) -> int:
110 ...
111
112
113# A repeatable item.
2adf4336 114class _RepableItem:
71aaa3f7
PP
115 pass
116
117
118# Single byte.
2adf4336 119class _Byte(_ScalarItem, _RepableItem):
e57a18e1 120 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
121 super().__init__(text_loc)
122 self._val = val
123
124 # Byte value.
125 @property
126 def val(self):
127 return self._val
128
129 @property
130 def size(self):
131 return 1
132
133 def __repr__(self):
676f6189 134 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
135
136
7a7b31e8
PP
137# Literal string.
138class _LitStr(_ScalarItem, _RepableItem):
e57a18e1 139 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
140 super().__init__(text_loc)
141 self._data = data
142
143 # Encoded bytes.
144 @property
145 def data(self):
146 return self._data
147
148 @property
149 def size(self):
150 return len(self._data)
151
152 def __repr__(self):
7a7b31e8 153 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
154
155
156# Byte order.
157@enum.unique
158class ByteOrder(enum.Enum):
159 # Big endian.
160 BE = "be"
161
162 # Little endian.
163 LE = "le"
164
165
2adf4336
PP
166# Byte order setting.
167class _SetBo(_Item):
e57a18e1 168 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 169 super().__init__(text_loc)
71aaa3f7
PP
170 self._bo = bo
171
172 @property
173 def bo(self):
174 return self._bo
175
2adf4336 176 def __repr__(self):
676f6189 177 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
178
179
180# Label.
181class _Label(_Item):
e57a18e1 182 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
183 super().__init__(text_loc)
184 self._name = name
185
186 # Label name.
187 @property
188 def name(self):
189 return self._name
190
71aaa3f7 191 def __repr__(self):
676f6189 192 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
193
194
2adf4336
PP
195# Offset setting.
196class _SetOffset(_Item):
e57a18e1 197 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
198 super().__init__(text_loc)
199 self._val = val
200
676f6189 201 # Offset value (bytes).
71aaa3f7
PP
202 @property
203 def val(self):
204 return self._val
205
71aaa3f7 206 def __repr__(self):
676f6189
PP
207 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
208
209
210# Offset alignment.
211class _AlignOffset(_Item):
e57a18e1 212 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
213 super().__init__(text_loc)
214 self._val = val
215 self._pad_val = pad_val
216
217 # Alignment value (bits).
218 @property
219 def val(self):
220 return self._val
221
222 # Padding byte value.
223 @property
224 def pad_val(self):
225 return self._pad_val
226
227 def __repr__(self):
228 return "_AlignOffset({}, {}, {})".format(
229 repr(self._val), repr(self._pad_val), repr(self._text_loc)
230 )
71aaa3f7
PP
231
232
233# Mixin of containing an AST expression and its string.
234class _ExprMixin:
235 def __init__(self, expr_str: str, expr: ast.Expression):
236 self._expr_str = expr_str
237 self._expr = expr
238
239 # Expression string.
240 @property
241 def expr_str(self):
242 return self._expr_str
243
244 # Expression node to evaluate.
245 @property
246 def expr(self):
247 return self._expr
248
249
25ca454b
PP
250# Fill until some offset.
251class _FillUntil(_Item, _ExprMixin):
252 def __init__(
253 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
254 ):
255 super().__init__(text_loc)
256 _ExprMixin.__init__(self, expr_str, expr)
257 self._pad_val = pad_val
258
259 # Padding byte value.
260 @property
261 def pad_val(self):
262 return self._pad_val
263
264 def __repr__(self):
265 return "_FillUntil({}, {}, {}, {})".format(
266 repr(self._expr_str),
267 repr(self._expr),
268 repr(self._pad_val),
269 repr(self._text_loc),
270 )
271
272
2adf4336
PP
273# Variable assignment.
274class _VarAssign(_Item, _ExprMixin):
71aaa3f7 275 def __init__(
e57a18e1 276 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
277 ):
278 super().__init__(text_loc)
279 _ExprMixin.__init__(self, expr_str, expr)
280 self._name = name
281
282 # Name.
283 @property
284 def name(self):
285 return self._name
286
71aaa3f7 287 def __repr__(self):
2adf4336 288 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
289 repr(self._name),
290 repr(self._expr_str),
291 repr(self._expr),
292 repr(self._text_loc),
71aaa3f7
PP
293 )
294
295
269f6eb3
PP
296# Fixed-length number, possibly needing more than one byte.
297class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 298 def __init__(
e57a18e1 299 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
300 ):
301 super().__init__(text_loc)
302 _ExprMixin.__init__(self, expr_str, expr)
303 self._len = len
304
305 # Length (bits).
306 @property
307 def len(self):
308 return self._len
309
310 @property
311 def size(self):
312 return self._len // 8
313
314 def __repr__(self):
269f6eb3 315 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
316 repr(self._expr_str),
317 repr(self._expr),
318 repr(self._len),
319 repr(self._text_loc),
71aaa3f7
PP
320 )
321
322
05f81895
PP
323# LEB128 integer.
324class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 325 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
326 super().__init__(text_loc)
327 _ExprMixin.__init__(self, expr_str, expr)
328
329 def __repr__(self):
330 return "{}({}, {}, {})".format(
331 self.__class__.__name__,
332 repr(self._expr_str),
333 repr(self._expr),
676f6189 334 repr(self._text_loc),
05f81895
PP
335 )
336
337
338# Unsigned LEB128 integer.
339class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343# Signed LEB128 integer.
344class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
345 pass
346
347
7a7b31e8
PP
348# String.
349class _Str(_Item, _RepableItem, _ExprMixin):
350 def __init__(
351 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
352 ):
353 super().__init__(text_loc)
354 _ExprMixin.__init__(self, expr_str, expr)
355 self._codec = codec
356
357 # Codec name.
358 @property
359 def codec(self):
360 return self._codec
361
362 def __repr__(self):
363 return "_Str({}, {}, {}, {})".format(
7a7b31e8
PP
364 repr(self._expr_str),
365 repr(self._expr),
366 repr(self._codec),
367 repr(self._text_loc),
368 )
369
370
71aaa3f7 371# Group of items.
2adf4336 372class _Group(_Item, _RepableItem):
e57a18e1 373 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
374 super().__init__(text_loc)
375 self._items = items
71aaa3f7
PP
376
377 # Contained items.
378 @property
379 def items(self):
380 return self._items
381
71aaa3f7 382 def __repr__(self):
676f6189 383 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
384
385
386# Repetition item.
cd33dfe6 387class _Rep(_Group, _ExprMixin):
2adf4336 388 def __init__(
cd33dfe6
PP
389 self,
390 items: List[_Item],
391 expr_str: str,
392 expr: ast.Expression,
393 text_loc: TextLocation,
2adf4336 394 ):
cd33dfe6 395 super().__init__(items, text_loc)
2adf4336 396 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 397
71aaa3f7 398 def __repr__(self):
2adf4336 399 return "_Rep({}, {}, {}, {})".format(
cd33dfe6 400 repr(self._items),
676f6189
PP
401 repr(self._expr_str),
402 repr(self._expr),
403 repr(self._text_loc),
71aaa3f7
PP
404 )
405
406
27d52a19
PP
407# Conditional item.
408class _Cond(_Item, _ExprMixin):
409 def __init__(
12b5dbc0 410 self,
cd33dfe6
PP
411 true_item: _Group,
412 false_item: _Group,
12b5dbc0
PP
413 expr_str: str,
414 expr: ast.Expression,
415 text_loc: TextLocation,
27d52a19
PP
416 ):
417 super().__init__(text_loc)
418 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
419 self._true_item = true_item
420 self._false_item = false_item
27d52a19 421
12b5dbc0 422 # Item when condition is true.
27d52a19 423 @property
12b5dbc0
PP
424 def true_item(self):
425 return self._true_item
426
427 # Item when condition is false.
428 @property
429 def false_item(self):
430 return self._false_item
27d52a19
PP
431
432 def __repr__(self):
12b5dbc0
PP
433 return "_Cond({}, {}, {}, {}, {})".format(
434 repr(self._true_item),
435 repr(self._false_item),
27d52a19
PP
436 repr(self._expr_str),
437 repr(self._expr),
438 repr(self._text_loc),
439 )
440
441
cd33dfe6
PP
442# Transformation.
443class _Trans(_Group, _RepableItem):
444 def __init__(
445 self,
446 items: List[_Item],
447 name: str,
448 func: Callable[[Union[bytes, bytearray]], bytes],
449 text_loc: TextLocation,
450 ):
451 super().__init__(items, text_loc)
452 self._name = name
453 self._func = func
454
455 @property
456 def name(self):
457 return self._name
458
459 # Transforms the data `data`.
460 def trans(self, data: Union[bytes, bytearray]):
461 return self._func(data)
462
463 def __repr__(self):
464 return "_Trans({}, {}, {}, {})".format(
465 repr(self._items),
466 repr(self._name),
467 repr(self._func),
468 repr(self._text_loc),
469 )
470
471
320644e2 472# Macro definition item.
cd33dfe6 473class _MacroDef(_Group):
320644e2 474 def __init__(
cd33dfe6
PP
475 self,
476 name: str,
477 param_names: List[str],
478 items: List[_Item],
479 text_loc: TextLocation,
320644e2 480 ):
cd33dfe6 481 super().__init__(items, text_loc)
320644e2
PP
482 self._name = name
483 self._param_names = param_names
320644e2
PP
484
485 # Name.
486 @property
487 def name(self):
488 return self._name
489
490 # Parameters.
491 @property
492 def param_names(self):
493 return self._param_names
494
320644e2
PP
495 def __repr__(self):
496 return "_MacroDef({}, {}, {}, {})".format(
497 repr(self._name),
498 repr(self._param_names),
cd33dfe6 499 repr(self._items),
320644e2
PP
500 repr(self._text_loc),
501 )
502
503
504# Macro expansion parameter.
505class _MacroExpParam:
506 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
507 self._expr_str = expr_str
508 self._expr = expr
509 self._text_loc = text_loc
510
511 # Expression string.
512 @property
513 def expr_str(self):
514 return self._expr_str
515
516 # Expression.
517 @property
518 def expr(self):
519 return self._expr
520
521 # Source text location.
522 @property
523 def text_loc(self):
524 return self._text_loc
525
526 def __repr__(self):
527 return "_MacroExpParam({}, {}, {})".format(
528 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
529 )
530
531
532# Macro expansion item.
533class _MacroExp(_Item, _RepableItem):
534 def __init__(
535 self,
536 name: str,
537 params: List[_MacroExpParam],
538 text_loc: TextLocation,
539 ):
540 super().__init__(text_loc)
541 self._name = name
542 self._params = params
543
544 # Name.
545 @property
546 def name(self):
547 return self._name
548
549 # Parameters.
550 @property
551 def params(self):
552 return self._params
553
554 def __repr__(self):
555 return "_MacroExp({}, {}, {})".format(
556 repr(self._name),
557 repr(self._params),
558 repr(self._text_loc),
559 )
2adf4336
PP
560
561
f5dcb24c
PP
562# A parsing error message: a string and a text location.
563class ParseErrorMessage:
564 @classmethod
565 def _create(cls, text: str, text_loc: TextLocation):
566 self = cls.__new__(cls)
567 self._init(text, text_loc)
568 return self
569
570 def __init__(self, *args, **kwargs): # type: ignore
571 raise NotImplementedError
572
573 def _init(self, text: str, text_loc: TextLocation):
574 self._text = text
575 self._text_loc = text_loc
576
577 # Message text.
578 @property
579 def text(self):
580 return self._text
581
582 # Source text location.
583 @property
584 def text_location(self):
585 return self._text_loc
586
587
588# A parsing error containing one or more messages (`ParseErrorMessage`).
71aaa3f7
PP
589class ParseError(RuntimeError):
590 @classmethod
e57a18e1 591 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
592 self = cls.__new__(cls)
593 self._init(msg, text_loc)
594 return self
595
596 def __init__(self, *args, **kwargs): # type: ignore
597 raise NotImplementedError
598
e57a18e1 599 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7 600 super().__init__(msg)
f5dcb24c
PP
601 self._msgs = [] # type: List[ParseErrorMessage]
602 self._add_msg(msg, text_loc)
71aaa3f7 603
f5dcb24c
PP
604 def _add_msg(self, msg: str, text_loc: TextLocation):
605 self._msgs.append(
606 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
607 msg, text_loc
608 )
609 )
610
611 # Parsing error messages.
612 #
613 # The first message is the most specific one.
71aaa3f7 614 @property
f5dcb24c
PP
615 def messages(self):
616 return self._msgs
71aaa3f7
PP
617
618
619# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 620def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
621 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
622
623
f5dcb24c
PP
624# Adds a message to the parsing error `exc`.
625def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
626 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
627
628
629# Appends a message to the parsing error `exc` and reraises it.
630def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
631 _add_error_msg(exc, msg, text_loc)
632 raise exc
633
634
b2410769
PP
635# Returns a normalized version (so as to be parseable by int()) of
636# the constant integer string `s`, possibly negative, dealing with
637# any radix suffix.
638def _norm_const_int(s: str):
639 neg = ""
640 pos = s
641
642 if s.startswith("-"):
643 neg = "-"
644 pos = s[1:]
645
646 for r in "xXoObB":
647 if pos.startswith("0" + r):
648 # Already correct
649 return s
650
651 # Try suffix
652 asm_suf_base = {
653 "h": "x",
654 "H": "x",
655 "q": "o",
656 "Q": "o",
657 "o": "o",
658 "O": "o",
659 "b": "b",
660 "B": "B",
661 }
662
663 for suf in asm_suf_base:
664 if pos[-1] == suf:
665 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
666
667 return s
668
669
7a7b31e8
PP
670# Encodes the string `s` using the codec `codec`, raising `ParseError`
671# with `text_loc` on encoding error.
672def _encode_str(s: str, codec: str, text_loc: TextLocation):
673 try:
674 return s.encode(codec)
675 except UnicodeEncodeError:
676 _raise_error(
677 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
678 )
679
680
e57a18e1 681# Variables dictionary type (for type hints).
7a7b31e8 682VariablesT = Dict[str, Union[int, float, str]]
e57a18e1
PP
683
684
685# Labels dictionary type (for type hints).
686LabelsT = Dict[str, int]
71aaa3f7
PP
687
688
b2410769 689# Common patterns.
71aaa3f7 690_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
b2410769
PP
691_pos_const_int_pat = re.compile(
692 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
693)
694_const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
695_const_float_pat = re.compile(
696 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
697)
71aaa3f7
PP
698
699
320644e2
PP
700# Macro definition dictionary.
701_MacroDefsT = Dict[str, _MacroDef]
702
703
71aaa3f7
PP
704# Normand parser.
705#
706# The constructor accepts a Normand input. After building, use the `res`
707# property to get the resulting main group.
708class _Parser:
709 # Builds a parser to parse the Normand input `normand`, parsing
710 # immediately.
e57a18e1 711 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
712 self._normand = normand
713 self._at = 0
714 self._line_no = 1
715 self._col_no = 1
716 self._label_names = set(labels.keys())
717 self._var_names = set(variables.keys())
320644e2 718 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
719 self._parse()
720
721 # Result (main group).
722 @property
723 def res(self):
724 return self._res
725
320644e2
PP
726 # Macro definitions.
727 @property
728 def macro_defs(self):
729 return self._macro_defs
730
71aaa3f7
PP
731 # Current text location.
732 @property
733 def _text_loc(self):
e57a18e1 734 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
735 self._line_no, self._col_no
736 )
737
738 # Returns `True` if this parser is done parsing.
739 def _is_done(self):
740 return self._at == len(self._normand)
741
742 # Returns `True` if this parser isn't done parsing.
743 def _isnt_done(self):
744 return not self._is_done()
745
746 # Raises a parse error, creating it using the message `msg` and the
747 # current text location.
748 def _raise_error(self, msg: str) -> NoReturn:
749 _raise_error(msg, self._text_loc)
750
751 # Tries to make the pattern `pat` match the current substring,
752 # returning the match object and updating `self._at`,
753 # `self._line_no`, and `self._col_no` on success.
754 def _try_parse_pat(self, pat: Pattern[str]):
755 m = pat.match(self._normand, self._at)
756
757 if m is None:
758 return
759
760 # Skip matched string
761 self._at += len(m.group(0))
762
763 # Update line number
764 self._line_no += m.group(0).count("\n")
765
766 # Update column number
767 for i in reversed(range(self._at)):
768 if self._normand[i] == "\n" or i == 0:
769 if i == 0:
770 self._col_no = self._at + 1
771 else:
772 self._col_no = self._at - i
773
774 break
775
776 # Return match object
777 return m
778
779 # Expects the pattern `pat` to match the current substring,
780 # returning the match object and updating `self._at`,
781 # `self._line_no`, and `self._col_no` on success, or raising a parse
782 # error with the message `error_msg` on error.
783 def _expect_pat(self, pat: Pattern[str], error_msg: str):
784 # Match
785 m = self._try_parse_pat(pat)
786
787 if m is None:
788 # No match: error
789 self._raise_error(error_msg)
790
791 # Return match object
792 return m
793
ba11fb1d
PP
794 # Patterns for _skip_*()
795 _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M)
796 _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M)
71aaa3f7 797 _ws_or_syms_or_comments_pat = re.compile(
ba11fb1d 798 r"(?:[\s/\\?&:;.,[\]_=|-]|{})*".format(_comment_pat.pattern), re.M
71aaa3f7
PP
799 )
800
ba11fb1d
PP
801 # Skips as many whitespaces and comments as possible, but not
802 # insignificant symbol characters.
803 def _skip_ws_and_comments(self):
804 self._try_parse_pat(self._ws_or_comments_pat)
805
71aaa3f7
PP
806 # Skips as many whitespaces, insignificant symbol characters, and
807 # comments as possible.
ba11fb1d 808 def _skip_ws_and_comments_and_syms(self):
71aaa3f7
PP
809 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
810
811 # Pattern for _try_parse_hex_byte()
812 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
813
814 # Tries to parse a hexadecimal byte, returning a byte item on
815 # success.
816 def _try_parse_hex_byte(self):
0e8e3169
PP
817 begin_text_loc = self._text_loc
818
71aaa3f7
PP
819 # Match initial nibble
820 m_high = self._try_parse_pat(self._nibble_pat)
821
822 if m_high is None:
823 # No match
824 return
825
826 # Expect another nibble
ba11fb1d 827 self._skip_ws_and_comments_and_syms()
71aaa3f7
PP
828 m_low = self._expect_pat(
829 self._nibble_pat, "Expecting another hexadecimal nibble"
830 )
831
832 # Return item
0e8e3169 833 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
834
835 # Patterns for _try_parse_bin_byte()
836 _bin_byte_bit_pat = re.compile(r"[01]")
6dd69a2a 837 _bin_byte_prefix_pat = re.compile(r"%+")
71aaa3f7
PP
838
839 # Tries to parse a binary byte, returning a byte item on success.
840 def _try_parse_bin_byte(self):
0e8e3169
PP
841 begin_text_loc = self._text_loc
842
71aaa3f7 843 # Match prefix
6dd69a2a
PP
844 m = self._try_parse_pat(self._bin_byte_prefix_pat)
845
846 if m is None:
71aaa3f7
PP
847 # No match
848 return
849
6dd69a2a
PP
850 # Expect as many bytes as there are `%` prefixes
851 items = [] # type: List[_Item]
71aaa3f7 852
6dd69a2a 853 for _ in range(len(m.group(0))):
ba11fb1d 854 self._skip_ws_and_comments_and_syms()
6dd69a2a
PP
855 byte_text_loc = self._text_loc
856 bits = [] # type: List[str]
857
858 # Expect eight bits
859 for _ in range(8):
ba11fb1d 860 self._skip_ws_and_comments_and_syms()
6dd69a2a
PP
861 m = self._expect_pat(
862 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
863 )
864 bits.append(m.group(0))
865
866 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
71aaa3f7
PP
867
868 # Return item
6dd69a2a
PP
869 if len(items) == 1:
870 return items[0]
871
872 # As group
873 return _Group(items, begin_text_loc)
71aaa3f7
PP
874
875 # Patterns for _try_parse_dec_byte()
320644e2 876 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
877 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
878
879 # Tries to parse a decimal byte, returning a byte item on success.
880 def _try_parse_dec_byte(self):
0e8e3169
PP
881 begin_text_loc = self._text_loc
882
71aaa3f7
PP
883 # Match prefix
884 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
885 # No match
886 return
887
888 # Expect the value
ba11fb1d 889 self._skip_ws_and_comments()
71aaa3f7
PP
890 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
891
892 # Compute value
893 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
894
895 # Validate
896 if val < -128 or val > 255:
0e8e3169 897 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
898
899 # Two's complement
05f81895 900 val %= 256
71aaa3f7
PP
901
902 # Return item
0e8e3169 903 return _Byte(val, begin_text_loc)
71aaa3f7
PP
904
905 # Tries to parse a byte, returning a byte item on success.
906 def _try_parse_byte(self):
907 # Hexadecimal
908 item = self._try_parse_hex_byte()
909
910 if item is not None:
911 return item
912
913 # Binary
914 item = self._try_parse_bin_byte()
915
916 if item is not None:
917 return item
918
919 # Decimal
920 item = self._try_parse_dec_byte()
921
922 if item is not None:
923 return item
924
71aaa3f7 925 # Strings corresponding to escape sequence characters
7a7b31e8 926 _lit_str_escape_seq_strs = {
71aaa3f7
PP
927 "0": "\0",
928 "a": "\a",
929 "b": "\b",
930 "e": "\x1b",
931 "f": "\f",
932 "n": "\n",
933 "r": "\r",
934 "t": "\t",
935 "v": "\v",
936 "\\": "\\",
937 '"': '"',
938 }
939
7a7b31e8
PP
940 # Patterns for _try_parse_lit_str()
941 _lit_str_prefix_suffix_pat = re.compile(r'"')
942 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
0e8e3169 943
7a7b31e8
PP
944 # Parses a literal string between double quotes (without an encoding
945 # prefix) and returns the resulting string.
946 def _try_parse_lit_str(self, with_prefix: bool):
947 # Match prefix if needed
948 if with_prefix:
949 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
950 # No match
951 return
71aaa3f7 952
7a7b31e8
PP
953 # Expect literal string
954 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
955
956 # Expect end of string
957 self._expect_pat(
958 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
959 )
960
961 # Replace escape sequences
962 val = m.group(0)
963
964 for ec in '0abefnrtv"\\':
965 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
966
967 # Return string
968 return val
969
970 # Patterns for _try_parse_utf_str_encoding()
971 _str_encoding_utf_prefix_pat = re.compile(r"u")
972 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
973
974 # Tries to parse a UTF encoding specification, returning the Python
975 # codec name on success.
976 def _try_parse_utf_str_encoding(self):
977 # Match prefix
978 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
71aaa3f7
PP
979 # No match
980 return
981
7a7b31e8
PP
982 # Expect UTF specification
983 m = self._expect_pat(
984 self._str_encoding_utf_pat,
985 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
986 )
71aaa3f7 987
7a7b31e8
PP
988 # Convert to codec name
989 return {
990 "8": "utf_8",
991 "16be": "utf_16_be",
992 "16le": "utf_16_le",
993 "32be": "utf_32_be",
994 "32le": "utf_32_le",
995 }[m.group(0)]
996
997 # Patterns for _try_parse_str_encoding()
998 _str_encoding_gen_prefix_pat = re.compile(r"s")
999 _str_encoding_colon_pat = re.compile(r":")
1000 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
1001
1002 # Tries to parse a string encoding specification, returning the
1003 # Python codec name on success.
1004 #
1005 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
1006 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
1007 # General prefix?
1008 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
1009 # Expect `:`
ba11fb1d 1010 self._skip_ws_and_comments()
7a7b31e8 1011 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
71aaa3f7 1012
7a7b31e8 1013 # Expect encoding specification
ba11fb1d 1014 self._skip_ws_and_comments()
71aaa3f7 1015
7a7b31e8
PP
1016 # UTF?
1017 codec = self._try_parse_utf_str_encoding()
71aaa3f7 1018
7a7b31e8
PP
1019 if codec is not None:
1020 return codec
71aaa3f7 1021
7a7b31e8
PP
1022 # Expect Latin
1023 m = self._expect_pat(
1024 self._str_encoding_non_utf_pat,
1025 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
1026 )
1027 return m.group(0)
71aaa3f7 1028
7a7b31e8
PP
1029 # UTF?
1030 if not req_gen_prefix:
1031 return self._try_parse_utf_str_encoding()
71aaa3f7 1032
7a7b31e8
PP
1033 # Patterns for _try_parse_str()
1034 _lit_str_prefix_pat = re.compile(r'"')
1035 _str_prefix_pat = re.compile(r'"|\{')
1036 _str_expr_pat = re.compile(r"[^}]+")
1037 _str_expr_suffix_pat = re.compile(r"\}")
1038
1039 # Tries to parse a string, returning a literal string or string item
1040 # on success.
1041 def _try_parse_str(self):
1042 begin_text_loc = self._text_loc
1043
1044 # Encoding
1045 codec = self._try_parse_str_encoding()
1046
1047 # Match prefix (expect if there's an encoding specification)
ba11fb1d 1048 self._skip_ws_and_comments()
7a7b31e8
PP
1049
1050 if codec is None:
1051 # No encoding: only a literal string (UTF-8) is legal
1052 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1053
1054 if m_prefix is None:
1055 return
1056 else:
1057 # Encoding present: expect a string prefix
1058 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1059
1060 # Literal string or expression?
1061 prefix = m_prefix.group(0)
1062
1063 if prefix == '"':
1064 # Expect literal string
1065 str_text_loc = self._text_loc
1066 val = self._try_parse_lit_str(False)
1067
1068 if val is None:
1069 self._raise_error("Expecting a literal string")
1070
1071 # Encode string
1072 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1073
1074 # Return item
1075 return _LitStr(data, begin_text_loc)
1076 else:
1077 # Expect expression
ba11fb1d 1078 self._skip_ws_and_comments()
7a7b31e8
PP
1079 expr_text_loc = self._text_loc
1080 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1081
1082 # Expect `}`
1083 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1084
1085 # Create an expression node from the expression string
1086 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1087
1088 # Return item
1089 assert codec is not None
1090 return _Str(expr_str, expr, codec, begin_text_loc)
71aaa3f7 1091
320644e2
PP
1092 # Common right parenthesis pattern
1093 _right_paren_pat = re.compile(r"\)")
1094
71aaa3f7 1095 # Patterns for _try_parse_group()
320644e2 1096 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
1097
1098 # Tries to parse a group, returning a group item on success.
1099 def _try_parse_group(self):
0e8e3169
PP
1100 begin_text_loc = self._text_loc
1101
71aaa3f7 1102 # Match prefix
261c5ecf
PP
1103 m_open = self._try_parse_pat(self._group_prefix_pat)
1104
1105 if m_open is None:
71aaa3f7
PP
1106 # No match
1107 return
1108
1109 # Parse items
1110 items = self._parse_items()
1111
1112 # Expect end of group
ba11fb1d 1113 self._skip_ws_and_comments_and_syms()
261c5ecf
PP
1114
1115 if m_open.group(0) == "(":
320644e2 1116 pat = self._right_paren_pat
261c5ecf
PP
1117 exp = ")"
1118 else:
1119 pat = self._block_end_pat
1120 exp = "!end"
1121
1122 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
1123
1124 # Return item
0e8e3169 1125 return _Group(items, begin_text_loc)
71aaa3f7
PP
1126
1127 # Returns a stripped expression string and an AST expression node
1128 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 1129 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
1130 # Create an expression node from the expression string
1131 expr_str = expr_str.strip().replace("\n", " ")
1132
1133 try:
1134 expr = ast.parse(expr_str, mode="eval")
1135 except SyntaxError:
1136 _raise_error(
1137 "Invalid expression `{}`: invalid syntax".format(expr_str),
1138 text_loc,
1139 )
1140
1141 return expr_str, expr
1142
7a7b31e8 1143 # Patterns for _try_parse_val()
05f81895 1144 _val_expr_pat = re.compile(r"([^}:]+):\s*")
7a7b31e8
PP
1145 _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
1146 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
71aaa3f7 1147
7a7b31e8
PP
1148 # Tries to parse a value (number or string) and format (fixed length
1149 # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
1150 # name), returning an item on success.
1151 def _try_parse_val(self):
71aaa3f7
PP
1152 begin_text_loc = self._text_loc
1153
1154 # Match
1155 m_expr = self._try_parse_pat(self._val_expr_pat)
1156
1157 if m_expr is None:
1158 # No match
1159 return
1160
71aaa3f7
PP
1161 # Create an expression node from the expression string
1162 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
1163
7a7b31e8 1164 # Fixed length?
ba11fb1d 1165 self._skip_ws_and_comments()
7a7b31e8 1166 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
05f81895 1167
7a7b31e8 1168 if m_fmt is None:
05f81895 1169 # LEB128?
7a7b31e8 1170 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
05f81895 1171
7a7b31e8
PP
1172 if m_fmt is None:
1173 # String encoding?
1174 codec = self._try_parse_str_encoding(True)
1175
1176 if codec is None:
1177 # At this point it's invalid
1178 self._raise_error(
1179 "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1180 )
1181 else:
1182 # Return string item
1183 return _Str(expr_str, expr, codec, begin_text_loc)
05f81895
PP
1184
1185 # Return LEB128 integer item
7a7b31e8 1186 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
05f81895
PP
1187 return cls(expr_str, expr, begin_text_loc)
1188 else:
269f6eb3
PP
1189 # Return fixed-length number item
1190 return _FlNum(
05f81895
PP
1191 expr_str,
1192 expr,
7a7b31e8 1193 int(m_fmt.group(0)),
05f81895
PP
1194 begin_text_loc,
1195 )
71aaa3f7 1196
320644e2 1197 # Patterns for _try_parse_var_assign()
7a7b31e8
PP
1198 _var_assign_name_equal_pat = re.compile(
1199 r"({})\s*=(?!=)".format(_py_name_pat.pattern)
1200 )
320644e2 1201 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 1202
2adf4336
PP
1203 # Tries to parse a variable assignment, returning a variable
1204 # assignment item on success.
1205 def _try_parse_var_assign(self):
71aaa3f7
PP
1206 begin_text_loc = self._text_loc
1207
1208 # Match
320644e2 1209 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
1210
1211 if m is None:
1212 # No match
1213 return
1214
1215 # Validate name
320644e2 1216 name = m.group(1)
71aaa3f7
PP
1217
1218 if name == _icitte_name:
0e8e3169
PP
1219 _raise_error(
1220 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1221 )
71aaa3f7
PP
1222
1223 if name in self._label_names:
0e8e3169 1224 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 1225
320644e2 1226 # Expect an expression
ba11fb1d 1227 self._skip_ws_and_comments()
320644e2 1228 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
1229
1230 # Create an expression node from the expression string
320644e2
PP
1231 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1232
1233 # Add to known variable names
1234 self._var_names.add(name)
71aaa3f7
PP
1235
1236 # Return item
2adf4336 1237 return _VarAssign(
71aaa3f7
PP
1238 name,
1239 expr_str,
1240 expr,
0e8e3169 1241 begin_text_loc,
71aaa3f7
PP
1242 )
1243
2adf4336 1244 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
1245 _bo_pat = re.compile(r"[bl]e")
1246
2adf4336
PP
1247 # Tries to parse a byte order name, returning a byte order setting
1248 # item on success.
1249 def _try_parse_set_bo(self):
0e8e3169
PP
1250 begin_text_loc = self._text_loc
1251
71aaa3f7
PP
1252 # Match
1253 m = self._try_parse_pat(self._bo_pat)
1254
1255 if m is None:
1256 # No match
1257 return
1258
1259 # Return corresponding item
1260 if m.group(0) == "be":
2adf4336 1261 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
1262 else:
1263 assert m.group(0) == "le"
2adf4336 1264 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
1265
1266 # Patterns for _try_parse_val_or_bo()
320644e2
PP
1267 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1268 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 1269
2adf4336
PP
1270 # Tries to parse a value, a variable assignment, or a byte order
1271 # setting, returning an item on success.
1272 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 1273 # Match prefix
2adf4336 1274 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
1275 # No match
1276 return
1277
ba11fb1d 1278 self._skip_ws_and_comments()
320644e2 1279
2adf4336
PP
1280 # Variable assignment item?
1281 item = self._try_parse_var_assign()
71aaa3f7
PP
1282
1283 if item is None:
7a7b31e8
PP
1284 # Value item?
1285 item = self._try_parse_val()
71aaa3f7
PP
1286
1287 if item is None:
2adf4336
PP
1288 # Byte order setting item?
1289 item = self._try_parse_set_bo()
71aaa3f7
PP
1290
1291 if item is None:
1292 # At this point it's invalid
2adf4336 1293 self._raise_error(
7a7b31e8 1294 "Expecting a fixed-length number, a string, a variable assignment, or a byte order setting"
2adf4336 1295 )
71aaa3f7
PP
1296
1297 # Expect suffix
ba11fb1d 1298 self._skip_ws_and_comments()
2adf4336 1299 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1300 return item
1301
2adf4336
PP
1302 # Tries to parse an offset setting value (after the initial `<`),
1303 # returning an offset item on success.
1304 def _try_parse_set_offset_val(self):
0e8e3169
PP
1305 begin_text_loc = self._text_loc
1306
71aaa3f7 1307 # Match
b2410769 1308 m = self._try_parse_pat(_pos_const_int_pat)
71aaa3f7
PP
1309
1310 if m is None:
1311 # No match
1312 return
1313
1314 # Return item
b2410769 1315 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1316
1317 # Tries to parse a label name (after the initial `<`), returning a
1318 # label item on success.
1319 def _try_parse_label_name(self):
0e8e3169
PP
1320 begin_text_loc = self._text_loc
1321
71aaa3f7
PP
1322 # Match
1323 m = self._try_parse_pat(_py_name_pat)
1324
1325 if m is None:
1326 # No match
1327 return
1328
1329 # Validate
1330 name = m.group(0)
1331
1332 if name == _icitte_name:
0e8e3169
PP
1333 _raise_error(
1334 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1335 )
71aaa3f7
PP
1336
1337 if name in self._label_names:
0e8e3169 1338 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1339
1340 if name in self._var_names:
0e8e3169 1341 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1342
1343 # Add to known label names
1344 self._label_names.add(name)
1345
1346 # Return item
0e8e3169 1347 return _Label(name, begin_text_loc)
71aaa3f7 1348
2adf4336 1349 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1350 _label_set_offset_prefix_pat = re.compile(r"<")
1351 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1352
2adf4336
PP
1353 # Tries to parse a label or an offset setting, returning an item on
1354 # success.
1355 def _try_parse_label_or_set_offset(self):
71aaa3f7 1356 # Match prefix
2adf4336 1357 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1358 # No match
1359 return
1360
2adf4336 1361 # Offset setting item?
ba11fb1d 1362 self._skip_ws_and_comments()
2adf4336 1363 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1364
1365 if item is None:
1366 # Label item?
1367 item = self._try_parse_label_name()
1368
1369 if item is None:
1370 # At this point it's invalid
2adf4336 1371 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1372
1373 # Expect suffix
ba11fb1d 1374 self._skip_ws_and_comments()
2adf4336 1375 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1376 return item
1377
25ca454b
PP
1378 # Pattern for _parse_pad_val()
1379 _pad_val_prefix_pat = re.compile(r"~")
1380
1381 # Tries to parse a padding value, returning the padding value, or 0
1382 # if none.
1383 def _parse_pad_val(self):
1384 # Padding value?
ba11fb1d 1385 self._skip_ws_and_comments()
25ca454b
PP
1386 pad_val = 0
1387
1388 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
ba11fb1d 1389 self._skip_ws_and_comments()
25ca454b
PP
1390 pad_val_text_loc = self._text_loc
1391 m = self._expect_pat(
b2410769 1392 _pos_const_int_pat,
25ca454b
PP
1393 "Expecting a positive constant integer (byte value)",
1394 )
1395
1396 # Validate
b2410769 1397 pad_val = int(_norm_const_int(m.group(0)), 0)
25ca454b
PP
1398
1399 if pad_val > 255:
1400 _raise_error(
1401 "Invalid padding byte value {}".format(pad_val),
1402 pad_val_text_loc,
1403 )
1404
1405 return pad_val
1406
676f6189 1407 # Patterns for _try_parse_align_offset()
320644e2
PP
1408 _align_offset_prefix_pat = re.compile(r"@")
1409 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1410
1411 # Tries to parse an offset alignment, returning an offset alignment
1412 # item on success.
1413 def _try_parse_align_offset(self):
1414 begin_text_loc = self._text_loc
1415
1416 # Match prefix
1417 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1418 # No match
1419 return
1420
320644e2 1421 # Expect an alignment
ba11fb1d 1422 self._skip_ws_and_comments()
676f6189
PP
1423 align_text_loc = self._text_loc
1424 m = self._expect_pat(
1425 self._align_offset_val_pat,
1426 "Expecting an alignment (positive multiple of eight bits)",
1427 )
1428
1429 # Validate alignment
320644e2 1430 val = int(m.group(0))
676f6189
PP
1431
1432 if val <= 0 or (val % 8) != 0:
1433 _raise_error(
1434 "Invalid alignment value {} (not a positive multiple of eight)".format(
1435 val
1436 ),
1437 align_text_loc,
1438 )
1439
25ca454b
PP
1440 # Padding value
1441 pad_val = self._parse_pad_val()
676f6189 1442
25ca454b
PP
1443 # Return item
1444 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1445
dbd84e74
PP
1446 # Patterns for _expect_expr()
1447 _inner_expr_prefix_pat = re.compile(r"\{")
1448 _inner_expr_pat = re.compile(r"[^}]+")
1449 _inner_expr_suffix_pat = re.compile(r"\}")
dbd84e74
PP
1450
1451 # Parses an expression outside a `{`/`}` context.
1452 #
1453 # This function accepts:
1454 #
1455 # • A Python expression within `{` and `}`.
1456 #
1457 # • A Python name.
1458 #
1459 # • If `accept_const_int` is `True`: a constant integer, which may
1460 # be negative if `allow_neg_int` is `True`.
1461 #
1462 # • If `accept_float` is `True`: a constant floating point number.
1463 #
1464 # Returns the stripped expression string and AST expression.
1465 def _expect_expr(
1466 self,
1467 accept_const_int: bool = False,
1468 allow_neg_int: bool = False,
1469 accept_const_float: bool = False,
7a7b31e8 1470 accept_lit_str: bool = False,
dbd84e74
PP
1471 ):
1472 begin_text_loc = self._text_loc
1473
1474 # Constant floating point number?
dbd84e74 1475 if accept_const_float:
b2410769 1476 m = self._try_parse_pat(_const_float_pat)
dbd84e74
PP
1477
1478 if m is not None:
1479 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1480
1481 # Constant integer?
dbd84e74 1482 if accept_const_int:
b2410769 1483 m = self._try_parse_pat(_const_int_pat)
dbd84e74
PP
1484
1485 if m is not None:
1486 # Negative and allowed?
1487 if m.group("neg") == "-" and not allow_neg_int:
1488 _raise_error(
1489 "Expecting a positive constant integer", begin_text_loc
1490 )
1491
b2410769 1492 expr_str = _norm_const_int(m.group(0))
dbd84e74
PP
1493 return self._ast_expr_from_str(expr_str, begin_text_loc)
1494
1495 # Name?
1496 m = self._try_parse_pat(_py_name_pat)
1497
1498 if m is not None:
1499 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1500
7a7b31e8
PP
1501 # Literal string
1502 if accept_lit_str:
1503 val = self._try_parse_lit_str(True)
1504
1505 if val is not None:
1506 return self._ast_expr_from_str(repr(val), begin_text_loc)
1507
dbd84e74
PP
1508 # Expect `{`
1509 msg_accepted_parts = ["a name", "or `{`"]
1510
7a7b31e8
PP
1511 if accept_lit_str:
1512 msg_accepted_parts.insert(0, "a literal string")
1513
dbd84e74
PP
1514 if accept_const_float:
1515 msg_accepted_parts.insert(0, "a constant floating point number")
1516
1517 if accept_const_int:
1518 msg_pos = "" if allow_neg_int else "positive "
1519 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1520
1521 if len(msg_accepted_parts) == 2:
1522 msg_accepted = " ".join(msg_accepted_parts)
1523 else:
1524 msg_accepted = ", ".join(msg_accepted_parts)
1525
1526 self._expect_pat(
1527 self._inner_expr_prefix_pat,
1528 "Expecting {}".format(msg_accepted),
1529 )
1530
1531 # Expect an expression
ba11fb1d 1532 self._skip_ws_and_comments()
dbd84e74
PP
1533 expr_text_loc = self._text_loc
1534 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1535 expr_str = m.group(0)
1536
1537 # Expect `}`
ba11fb1d 1538 self._skip_ws_and_comments()
dbd84e74
PP
1539 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1540
1541 return self._ast_expr_from_str(expr_str, expr_text_loc)
1542
25ca454b
PP
1543 # Patterns for _try_parse_fill_until()
1544 _fill_until_prefix_pat = re.compile(r"\+")
1545 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1546
25ca454b
PP
1547 # Tries to parse a filling, returning a filling item on success.
1548 def _try_parse_fill_until(self):
1549 begin_text_loc = self._text_loc
1550
1551 # Match prefix
1552 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1553 # No match
1554 return
1555
1556 # Expect expression
ba11fb1d 1557 self._skip_ws_and_comments()
dbd84e74 1558 expr_str, expr = self._expect_expr(accept_const_int=True)
25ca454b
PP
1559
1560 # Padding value
1561 pad_val = self._parse_pad_val()
676f6189
PP
1562
1563 # Return item
25ca454b 1564 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1565
27d52a19
PP
1566 # Parses the multiplier expression of a repetition (block or
1567 # post-item) and returns the expression string and AST node.
1568 def _expect_rep_mul_expr(self):
dbd84e74 1569 return self._expect_expr(accept_const_int=True)
27d52a19
PP
1570
1571 # Common block end pattern
320644e2 1572 _block_end_pat = re.compile(r"!end\b")
27d52a19 1573
e57a18e1 1574 # Pattern for _try_parse_rep_block()
320644e2 1575 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1576
1577 # Tries to parse a repetition block, returning a repetition item on
1578 # success.
1579 def _try_parse_rep_block(self):
1580 begin_text_loc = self._text_loc
1581
1582 # Match prefix
1583 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1584 # No match
1585 return
1586
1587 # Expect expression
1588 self._skip_ws_and_comments()
1589 expr_str, expr = self._expect_rep_mul_expr()
1590
1591 # Parse items
ba11fb1d 1592 self._skip_ws_and_comments_and_syms()
e57a18e1
PP
1593 items = self._parse_items()
1594
1595 # Expect end of block
ba11fb1d 1596 self._skip_ws_and_comments_and_syms()
e57a18e1 1597 self._expect_pat(
27d52a19 1598 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1599 )
1600
1601 # Return item
cd33dfe6 1602 return _Rep(items, expr_str, expr, begin_text_loc)
e57a18e1 1603
27d52a19 1604 # Pattern for _try_parse_cond_block()
320644e2 1605 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1606 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1607
1608 # Tries to parse a conditional block, returning a conditional item
1609 # on success.
1610 def _try_parse_cond_block(self):
1611 begin_text_loc = self._text_loc
1612
1613 # Match prefix
1614 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1615 # No match
1616 return
1617
1618 # Expect expression
1619 self._skip_ws_and_comments()
dbd84e74 1620 expr_str, expr = self._expect_expr()
27d52a19 1621
12b5dbc0 1622 # Parse "true" items
ba11fb1d 1623 self._skip_ws_and_comments_and_syms()
12b5dbc0
PP
1624 true_items_text_loc = self._text_loc
1625 true_items = self._parse_items()
1626 false_items = [] # type: List[_Item]
1627 false_items_text_loc = begin_text_loc
27d52a19 1628
12b5dbc0 1629 # `!else`?
ba11fb1d 1630 self._skip_ws_and_comments_and_syms()
12b5dbc0
PP
1631
1632 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1633 # Parse "false" items
ba11fb1d 1634 self._skip_ws_and_comments_and_syms()
12b5dbc0
PP
1635 false_items_text_loc = self._text_loc
1636 false_items = self._parse_items()
1637
1638 # Expect end of block
27d52a19
PP
1639 self._expect_pat(
1640 self._block_end_pat,
12b5dbc0 1641 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1642 )
1643
1644 # Return item
12b5dbc0
PP
1645 return _Cond(
1646 _Group(true_items, true_items_text_loc),
1647 _Group(false_items, false_items_text_loc),
1648 expr_str,
1649 expr,
1650 begin_text_loc,
1651 )
27d52a19 1652
cd33dfe6
PP
1653 # Pattern for _try_parse_trans_block()
1654 _trans_block_prefix_pat = re.compile(r"!t(?:ransform)?\b")
1655 _trans_block_type_pat = re.compile(
1656 r"(?:(?:base|b)64(?:u)?|(?:base|b)(?:16|32)|(?:ascii|a|base|b)85(?:p)?|(?:quopri|qp)(?:t)?|gzip|gz|bzip2|bz2)\b"
1657 )
1658
1659 # Tries to parse a transformation block, returning a transformation
1660 # block item on success.
1661 def _try_parse_trans_block(self):
1662 begin_text_loc = self._text_loc
1663
1664 # Match prefix
1665 if self._try_parse_pat(self._trans_block_prefix_pat) is None:
1666 # No match
1667 return
1668
1669 # Expect type
1670 self._skip_ws_and_comments()
1671 m = self._expect_pat(
1672 self._trans_block_type_pat, "Expecting a known transformation type"
1673 )
1674
1675 # Parse items
1676 self._skip_ws_and_comments_and_syms()
1677 items = self._parse_items()
1678
1679 # Expect end of block
1680 self._expect_pat(
1681 self._block_end_pat,
1682 "Expecting an item or `!end` (end of transformation block)",
1683 )
1684
1685 # Choose encoding function
1686 enc = m.group(0)
1687
1688 if enc in ("base64", "b64"):
1689 func = base64.standard_b64encode
1690 name = "standard Base64"
1691 elif enc in ("base64u", "b64u"):
1692 func = base64.urlsafe_b64encode
1693 name = "URL-safe Base64"
1694 elif enc in ("base32", "b32"):
1695 func = base64.b32encode
1696 name = "Base32"
1697 elif enc in ("base16", "b16"):
1698 func = base64.b16encode
1699 name = "Base16"
1700 elif enc in ("ascii85", "a85"):
1701 func = base64.a85encode
1702 name = "Ascii85"
1703 elif enc in ("ascii85p", "a85p"):
1704 func = functools.partial(base64.a85encode, pad=True)
1705 name = "padded Ascii85"
1706 elif enc in ("base85", "b85"):
1707 func = base64.b85encode
1708 name = "Base85"
1709 elif enc in ("base85p", "b85p"):
1710 func = functools.partial(base64.b85encode, pad=True)
1711 name = "padded Base85"
1712 elif enc in ("quopri", "qp"):
1713 func = quopri.encodestring
1714 name = "MIME quoted-printable"
1715 elif enc in ("quoprit", "qpt"):
1716 func = functools.partial(quopri.encodestring, quotetabs=True)
1717 name = "MIME quoted-printable (with quoted tabs)"
1718 elif enc in ("gzip", "gz"):
1719 func = gzip.compress
1720 name = "gzip"
1721 else:
1722 assert enc in ("bzip2", "bz2")
1723 func = bz2.compress
1724 name = "bzip2"
1725
1726 # Return item
1727 return _Trans(
1728 items,
1729 name,
1730 func,
1731 begin_text_loc,
1732 )
1733
320644e2
PP
1734 # Common left parenthesis pattern
1735 _left_paren_pat = re.compile(r"\(")
1736
1737 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1738 _macro_params_comma_pat = re.compile(",")
1739
1740 # Patterns for _try_parse_macro_def()
1741 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1742
1743 # Tries to parse a macro definition, adding it to `self._macro_defs`
1744 # and returning `True` on success.
1745 def _try_parse_macro_def(self):
1746 begin_text_loc = self._text_loc
1747
1748 # Match prefix
1749 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1750 # No match
1751 return False
1752
1753 # Expect a name
ba11fb1d 1754 self._skip_ws_and_comments()
320644e2
PP
1755 name_text_loc = self._text_loc
1756 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1757
1758 # Validate name
1759 name = m.group(0)
1760
1761 if name in self._macro_defs:
1762 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1763
1764 # Expect `(`
ba11fb1d 1765 self._skip_ws_and_comments()
320644e2
PP
1766 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1767
1768 # Try to parse comma-separated parameter names
1769 param_names = [] # type: List[str]
1770 expect_comma = False
1771
1772 while True:
ba11fb1d 1773 self._skip_ws_and_comments()
320644e2
PP
1774
1775 # End?
1776 if self._try_parse_pat(self._right_paren_pat) is not None:
1777 # End
1778 break
1779
1780 # Comma?
1781 if expect_comma:
1782 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1783
1784 # Expect parameter name
ba11fb1d 1785 self._skip_ws_and_comments()
320644e2
PP
1786 param_text_loc = self._text_loc
1787 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1788
1789 if m.group(0) in param_names:
1790 _raise_error(
1791 "Duplicate macro parameter named `{}`".format(m.group(0)),
1792 param_text_loc,
1793 )
1794
1795 param_names.append(m.group(0))
1796 expect_comma = True
1797
1798 # Expect items
ba11fb1d 1799 self._skip_ws_and_comments_and_syms()
320644e2
PP
1800 old_var_names = self._var_names.copy()
1801 old_label_names = self._label_names.copy()
1802 self._var_names = set() # type: Set[str]
1803 self._label_names = set() # type: Set[str]
1804 items = self._parse_items()
1805 self._var_names = old_var_names
1806 self._label_names = old_label_names
1807
1808 # Expect suffix
1809 self._expect_pat(
1810 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1811 )
1812
1813 # Register macro
cd33dfe6 1814 self._macro_defs[name] = _MacroDef(name, param_names, items, begin_text_loc)
320644e2
PP
1815
1816 return True
1817
1818 # Patterns for _try_parse_macro_exp()
1819 _macro_exp_prefix_pat = re.compile(r"m\b")
1820 _macro_exp_colon_pat = re.compile(r":")
1821
1822 # Tries to parse a macro expansion, returning a macro expansion item
1823 # on success.
1824 def _try_parse_macro_exp(self):
1825 begin_text_loc = self._text_loc
1826
1827 # Match prefix
1828 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1829 # No match
1830 return
1831
1832 # Expect `:`
ba11fb1d 1833 self._skip_ws_and_comments()
320644e2
PP
1834 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1835
1836 # Expect a macro name
ba11fb1d 1837 self._skip_ws_and_comments()
320644e2
PP
1838 name_text_loc = self._text_loc
1839 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1840
1841 # Validate name
1842 name = m.group(0)
1843 macro_def = self._macro_defs.get(name)
1844
1845 if macro_def is None:
1846 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1847
1848 # Expect `(`
ba11fb1d 1849 self._skip_ws_and_comments()
320644e2
PP
1850 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1851
1852 # Try to parse comma-separated parameter values
1853 params_text_loc = self._text_loc
1854 params = [] # type: List[_MacroExpParam]
1855 expect_comma = False
1856
1857 while True:
ba11fb1d 1858 self._skip_ws_and_comments()
320644e2
PP
1859
1860 # End?
1861 if self._try_parse_pat(self._right_paren_pat) is not None:
1862 # End
1863 break
1864
7a7b31e8 1865 # Expect a value
320644e2
PP
1866 if expect_comma:
1867 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1868
ba11fb1d 1869 self._skip_ws_and_comments()
320644e2
PP
1870 param_text_loc = self._text_loc
1871 params.append(
1872 _MacroExpParam(
dbd84e74
PP
1873 *self._expect_expr(
1874 accept_const_int=True,
1875 allow_neg_int=True,
1876 accept_const_float=True,
7a7b31e8 1877 accept_lit_str=True,
dbd84e74 1878 ),
6dd69a2a 1879 text_loc=param_text_loc
320644e2
PP
1880 )
1881 )
1882 expect_comma = True
1883
1884 # Validate parameter values
1885 if len(params) != len(macro_def.param_names):
1886 sing_plur = "" if len(params) == 1 else "s"
1887 _raise_error(
1888 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1889 len(params), sing_plur, len(macro_def.param_names)
1890 ),
1891 params_text_loc,
1892 )
1893
1894 # Return item
1895 return _MacroExp(name, params, begin_text_loc)
1896
71aaa3f7
PP
1897 # Tries to parse a base item (anything except a repetition),
1898 # returning it on success.
1899 def _try_parse_base_item(self):
1900 # Byte item?
1901 item = self._try_parse_byte()
1902
1903 if item is not None:
1904 return item
1905
1906 # String item?
1907 item = self._try_parse_str()
1908
1909 if item is not None:
1910 return item
1911
2adf4336
PP
1912 # Value, variable assignment, or byte order setting item?
1913 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1914
1915 if item is not None:
1916 return item
1917
2adf4336
PP
1918 # Label or offset setting item?
1919 item = self._try_parse_label_or_set_offset()
71aaa3f7 1920
676f6189
PP
1921 if item is not None:
1922 return item
1923
1924 # Offset alignment item?
1925 item = self._try_parse_align_offset()
1926
25ca454b
PP
1927 if item is not None:
1928 return item
1929
1930 # Filling item?
1931 item = self._try_parse_fill_until()
1932
71aaa3f7
PP
1933 if item is not None:
1934 return item
1935
1936 # Group item?
1937 item = self._try_parse_group()
1938
1939 if item is not None:
1940 return item
1941
320644e2 1942 # Repetition block item?
e57a18e1 1943 item = self._try_parse_rep_block()
71aaa3f7 1944
e57a18e1
PP
1945 if item is not None:
1946 return item
1947
27d52a19
PP
1948 # Conditional block item?
1949 item = self._try_parse_cond_block()
1950
1951 if item is not None:
1952 return item
1953
cd33dfe6 1954 # Macro expansion item?
320644e2
PP
1955 item = self._try_parse_macro_exp()
1956
1957 if item is not None:
1958 return item
1959
cd33dfe6
PP
1960 # Transformation block item?
1961 item = self._try_parse_trans_block()
1962
1963 if item is not None:
1964 return item
1965
e57a18e1
PP
1966 # Pattern for _try_parse_rep_post()
1967 _rep_post_prefix_pat = re.compile(r"\*")
1968
1969 # Tries to parse a post-item repetition, returning the expression
1970 # string and AST expression node on success.
1971 def _try_parse_rep_post(self):
71aaa3f7 1972 # Match prefix
e57a18e1 1973 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1974 # No match
2adf4336 1975 return
71aaa3f7 1976
e57a18e1 1977 # Return expression string and AST expression
71aaa3f7 1978 self._skip_ws_and_comments()
e57a18e1 1979 return self._expect_rep_mul_expr()
71aaa3f7 1980
1ca7b5e1
PP
1981 # Tries to parse an item, possibly followed by a repetition,
1982 # returning `True` on success.
1983 #
1984 # Appends any parsed item to `items`.
1985 def _try_append_item(self, items: List[_Item]):
ba11fb1d 1986 self._skip_ws_and_comments_and_syms()
71aaa3f7 1987
320644e2 1988 # Base item
71aaa3f7
PP
1989 item = self._try_parse_base_item()
1990
1991 if item is None:
320644e2 1992 return
71aaa3f7
PP
1993
1994 # Parse repetition if the base item is repeatable
1995 if isinstance(item, _RepableItem):
0e8e3169
PP
1996 self._skip_ws_and_comments()
1997 rep_text_loc = self._text_loc
e57a18e1 1998 rep_ret = self._try_parse_rep_post()
71aaa3f7 1999
2adf4336 2000 if rep_ret is not None:
cd33dfe6 2001 item = _Rep([item], *rep_ret, text_loc=rep_text_loc)
71aaa3f7 2002
1ca7b5e1
PP
2003 items.append(item)
2004 return True
71aaa3f7
PP
2005
2006 # Parses and returns items, skipping whitespaces, insignificant
2007 # symbols, and comments when allowed, and stopping at the first
2008 # unknown character.
320644e2
PP
2009 #
2010 # Accepts and registers macro definitions if `accept_macro_defs`
2011 # is `True`.
2012 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
2013 items = [] # type: List[_Item]
2014
2015 while self._isnt_done():
1ca7b5e1
PP
2016 # Try to append item
2017 if not self._try_append_item(items):
320644e2
PP
2018 if accept_macro_defs and self._try_parse_macro_def():
2019 continue
2020
1ca7b5e1
PP
2021 # Unknown at this point
2022 break
71aaa3f7
PP
2023
2024 return items
2025
2026 # Parses the whole Normand input, setting `self._res` to the main
2027 # group item on success.
2028 def _parse(self):
2029 if len(self._normand.strip()) == 0:
2030 # Special case to make sure there's something to consume
2031 self._res = _Group([], self._text_loc)
2032 return
2033
2034 # Parse first level items
320644e2 2035 items = self._parse_items(True)
71aaa3f7
PP
2036
2037 # Make sure there's nothing left
ba11fb1d 2038 self._skip_ws_and_comments_and_syms()
71aaa3f7
PP
2039
2040 if self._isnt_done():
2041 self._raise_error(
2042 "Unexpected character `{}`".format(self._normand[self._at])
2043 )
2044
2045 # Set main group item
2046 self._res = _Group(items, self._text_loc)
2047
2048
2049# The return type of parse().
2050class ParseResult:
2051 @classmethod
2052 def _create(
2053 cls,
2054 data: bytearray,
e57a18e1
PP
2055 variables: VariablesT,
2056 labels: LabelsT,
71aaa3f7
PP
2057 offset: int,
2058 bo: Optional[ByteOrder],
2059 ):
2060 self = cls.__new__(cls)
2061 self._init(data, variables, labels, offset, bo)
2062 return self
2063
2064 def __init__(self, *args, **kwargs): # type: ignore
2065 raise NotImplementedError
2066
2067 def _init(
2068 self,
2069 data: bytearray,
e57a18e1
PP
2070 variables: VariablesT,
2071 labels: LabelsT,
71aaa3f7
PP
2072 offset: int,
2073 bo: Optional[ByteOrder],
2074 ):
2075 self._data = data
2076 self._vars = variables
2077 self._labels = labels
2078 self._offset = offset
2079 self._bo = bo
2080
2081 # Generated data.
2082 @property
2083 def data(self):
2084 return self._data
2085
2086 # Dictionary of updated variable names to their last computed value.
2087 @property
2088 def variables(self):
2089 return self._vars
2090
2091 # Dictionary of updated main group label names to their computed
2092 # value.
2093 @property
2094 def labels(self):
2095 return self._labels
2096
2097 # Updated offset.
2098 @property
2099 def offset(self):
2100 return self._offset
2101
2102 # Updated byte order.
2103 @property
2104 def byte_order(self):
2105 return self._bo
2106
2107
2108# Raises a parse error for the item `item`, creating it using the
2109# message `msg`.
2110def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
2111 _raise_error(msg, item.text_loc)
2112
2113
2114# The `ICITTE` reserved name.
2115_icitte_name = "ICITTE"
2116
2117
2adf4336
PP
2118# Base node visitor.
2119#
2120# Calls the _visit_name() method for each name node which isn't the name
2121# of a call.
2122class _NodeVisitor(ast.NodeVisitor):
2123 def __init__(self):
71aaa3f7
PP
2124 self._parent_is_call = False
2125
2126 def generic_visit(self, node: ast.AST):
2127 if type(node) is ast.Call:
2128 self._parent_is_call = True
2129 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 2130 self._visit_name(node.id)
71aaa3f7
PP
2131
2132 super().generic_visit(node)
2133 self._parent_is_call = False
2134
2adf4336
PP
2135 @abc.abstractmethod
2136 def _visit_name(self, name: str):
2137 ...
2138
71aaa3f7 2139
2adf4336
PP
2140# Expression validator: validates that all the names within the
2141# expression are allowed.
2142class _ExprValidator(_NodeVisitor):
320644e2 2143 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 2144 super().__init__()
320644e2
PP
2145 self._expr_str = expr_str
2146 self._text_loc = text_loc
2adf4336 2147 self._allowed_names = allowed_names
2adf4336
PP
2148
2149 def _visit_name(self, name: str):
2150 # Make sure the name refers to a known and reachable
2151 # variable/label name.
e57a18e1 2152 if name != _icitte_name and name not in self._allowed_names:
2adf4336 2153 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 2154 name, self._expr_str
2adf4336
PP
2155 )
2156
05f81895 2157 allowed_names = self._allowed_names.copy()
e57a18e1 2158 allowed_names.add(_icitte_name)
2adf4336 2159
05f81895 2160 if len(allowed_names) > 0:
2adf4336
PP
2161 allowed_names_str = ", ".join(
2162 sorted(["`{}`".format(name) for name in allowed_names])
2163 )
2164 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2165
2166 _raise_error(
2167 msg,
320644e2 2168 self._text_loc,
2adf4336
PP
2169 )
2170
2171
2adf4336
PP
2172# Generator state.
2173class _GenState:
2174 def __init__(
1b8aa84a 2175 self,
e57a18e1
PP
2176 variables: VariablesT,
2177 labels: LabelsT,
1b8aa84a
PP
2178 offset: int,
2179 bo: Optional[ByteOrder],
2adf4336
PP
2180 ):
2181 self.variables = variables.copy()
2182 self.labels = labels.copy()
2183 self.offset = offset
2184 self.bo = bo
71aaa3f7 2185
320644e2
PP
2186 def __repr__(self):
2187 return "_GenState({}, {}, {}, {})".format(
2188 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2189 )
2190
2191
2192# Fixed-length number item instance.
2193class _FlNumItemInst:
f5dcb24c
PP
2194 def __init__(
2195 self,
2196 item: _FlNum,
2197 offset_in_data: int,
2198 state: _GenState,
2199 parse_error_msgs: List[ParseErrorMessage],
2200 ):
320644e2
PP
2201 self._item = item
2202 self._offset_in_data = offset_in_data
2203 self._state = state
f5dcb24c 2204 self._parse_error_msgs = parse_error_msgs
320644e2
PP
2205
2206 @property
2207 def item(self):
2208 return self._item
2209
2210 @property
2211 def offset_in_data(self):
2212 return self._offset_in_data
2213
2214 @property
2215 def state(self):
2216 return self._state
2217
f5dcb24c
PP
2218 @property
2219 def parse_error_msgs(self):
2220 return self._parse_error_msgs
2221
71aaa3f7 2222
2adf4336 2223# Generator of data and final state from a group item.
71aaa3f7
PP
2224#
2225# Generation happens in memory at construction time. After building, use
2226# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2227# get the resulting context.
2adf4336
PP
2228#
2229# The steps of generation are:
2230#
320644e2
PP
2231# 1. Handle each item in prefix order.
2232#
2233# The handlers append bytes to `self._data` and update some current
2234# state object (`_GenState` instance).
2235#
2236# When handling a fixed-length number item, try to evaluate its
2237# expression using the current state. If this fails, then it might be
2238# because the expression refers to a "future" label: save the current
2239# offset in `self._data` (generated data) and a snapshot of the
2240# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
f5dcb24c
PP
2241# object). _gen_fl_num_item_insts() will deal with this later. A
2242# `_FlNumItemInst` instance also contains a snapshot of the current
2243# parsing error messages (`self._parse_error_msgs`) which need to be
2244# taken into account when handling the instance later.
2adf4336 2245#
320644e2
PP
2246# When handling the items of a group, keep a map of immediate label
2247# names to their offset. Then, after having processed all the items,
2248# update the relevant saved state snapshots in
2249# `self._fl_num_item_insts` with those immediate label values.
2250# _gen_fl_num_item_insts() will deal with this later.
2adf4336 2251#
320644e2
PP
2252# 2. Handle all the fixed-length number item instances of which the
2253# expression evaluation failed before.
2adf4336 2254#
320644e2
PP
2255# At this point, `self._fl_num_item_insts` contains everything that's
2256# needed to evaluate the expressions, including the values of
2257# "future" labels from the point of view of some fixed-length number
2258# item instance.
2adf4336 2259#
f5dcb24c
PP
2260# If an evaluation fails at this point, then it's a user error. Add
2261# to the parsing error all the saved parsing error messages of the
2262# instance. Those additional messages add precious context to the
2263# error.
71aaa3f7
PP
2264class _Gen:
2265 def __init__(
2266 self,
2267 group: _Group,
320644e2 2268 macro_defs: _MacroDefsT,
e57a18e1
PP
2269 variables: VariablesT,
2270 labels: LabelsT,
71aaa3f7
PP
2271 offset: int,
2272 bo: Optional[ByteOrder],
2273 ):
320644e2
PP
2274 self._macro_defs = macro_defs
2275 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
f5dcb24c 2276 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
cd33dfe6 2277 self._in_trans = False
2adf4336 2278 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
2279
2280 # Generated bytes.
2281 @property
2282 def data(self):
2283 return self._data
2284
2285 # Updated variables.
2286 @property
2287 def variables(self):
2adf4336 2288 return self._final_state.variables
71aaa3f7
PP
2289
2290 # Updated main group labels.
2291 @property
2292 def labels(self):
2adf4336 2293 return self._final_state.labels
71aaa3f7
PP
2294
2295 # Updated offset.
2296 @property
2297 def offset(self):
2adf4336 2298 return self._final_state.offset
71aaa3f7
PP
2299
2300 # Updated byte order.
2301 @property
2302 def bo(self):
2adf4336
PP
2303 return self._final_state.bo
2304
320644e2
PP
2305 # Evaluates the expression `expr` of which the original string is
2306 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
2307 # generation state `state`.
2308 #
7a7b31e8 2309 # If `accept_float` is `True`, then the type of the result may be
269f6eb3 2310 # `float` too.
7a7b31e8
PP
2311 #
2312 # If `accept_str` is `True`, then the type of the result may be
2313 # `str` too.
2adf4336 2314 @staticmethod
320644e2
PP
2315 def _eval_expr(
2316 expr_str: str,
2317 expr: ast.Expression,
2318 text_loc: TextLocation,
269f6eb3 2319 state: _GenState,
7a7b31e8
PP
2320 accept_float: bool = False,
2321 accept_str: bool = False,
269f6eb3 2322 ):
e57a18e1
PP
2323 syms = {} # type: VariablesT
2324 syms.update(state.labels)
71aaa3f7 2325
e57a18e1
PP
2326 # Set the `ICITTE` name to the current offset
2327 syms[_icitte_name] = state.offset
71aaa3f7
PP
2328
2329 # Add the current variables
2adf4336 2330 syms.update(state.variables)
71aaa3f7
PP
2331
2332 # Validate the node and its children
320644e2 2333 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
2334
2335 # Compile and evaluate expression node
2336 try:
320644e2 2337 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 2338 except Exception as exc:
320644e2
PP
2339 _raise_error(
2340 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2341 text_loc,
71aaa3f7
PP
2342 )
2343
27d52a19
PP
2344 # Convert `bool` result type to `int` to normalize
2345 if type(val) is bool:
2346 val = int(val)
2347
269f6eb3
PP
2348 # Validate result type
2349 expected_types = {int} # type: Set[type]
269f6eb3 2350
7a7b31e8 2351 if accept_float:
269f6eb3 2352 expected_types.add(float)
7a7b31e8
PP
2353
2354 if accept_str:
2355 expected_types.add(str)
269f6eb3
PP
2356
2357 if type(val) not in expected_types:
7a7b31e8
PP
2358 expected_types_str = sorted(
2359 ["`{}`".format(t.__name__) for t in expected_types]
2360 )
2361
2362 if len(expected_types_str) == 1:
2363 msg_expected = expected_types_str[0]
2364 elif len(expected_types_str) == 2:
2365 msg_expected = " or ".join(expected_types_str)
2366 else:
2367 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2368 msg_expected = ", ".join(expected_types_str)
2369
320644e2 2370 _raise_error(
269f6eb3 2371 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
7a7b31e8 2372 expr_str, msg_expected, type(val).__name__
71aaa3f7 2373 ),
320644e2 2374 text_loc,
71aaa3f7
PP
2375 )
2376
2377 return val
2378
7a7b31e8
PP
2379 # Forwards to _eval_expr() with the expression and text location of
2380 # `item`.
320644e2
PP
2381 @staticmethod
2382 def _eval_item_expr(
7a7b31e8 2383 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
320644e2 2384 state: _GenState,
7a7b31e8
PP
2385 accept_float: bool = False,
2386 accept_str: bool = False,
320644e2
PP
2387 ):
2388 return _Gen._eval_expr(
7a7b31e8 2389 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
320644e2
PP
2390 )
2391
2392 # Handles the byte item `item`.
2393 def _handle_byte_item(self, item: _Byte, state: _GenState):
2394 self._data.append(item.val)
2395 state.offset += item.size
2396
7a7b31e8
PP
2397 # Handles the literal string item `item`.
2398 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
320644e2
PP
2399 self._data += item.data
2400 state.offset += item.size
2401
2402 # Handles the byte order setting item `item`.
2403 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2404 # Update current byte order
2405 state.bo = item.bo
2406
2407 # Handles the variable assignment item `item`.
2408 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2409 # Update variable
7a7b31e8
PP
2410 state.variables[item.name] = self._eval_item_expr(
2411 item, state, accept_float=True, accept_str=True
2412 )
320644e2
PP
2413
2414 # Handles the fixed-length number item `item`.
2415 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2416 # Validate current byte order
2417 if state.bo is None and item.len > 8:
2418 _raise_error_for_item(
2419 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2420 item.expr_str
2421 ),
2422 item,
2423 )
2424
2425 # Try an immediate evaluation. If it fails, then keep everything
2426 # needed to (try to) generate the bytes of this item later.
2427 try:
2428 data = self._gen_fl_num_item_inst_data(item, state)
2429 except Exception:
cd33dfe6
PP
2430 if self._in_trans:
2431 _raise_error_for_item(
2432 "Invalid expression `{}`: failed to evaluate within a transformation block".format(
2433 item.expr_str
2434 ),
2435 item,
2436 )
2437
320644e2 2438 self._fl_num_item_insts.append(
f5dcb24c
PP
2439 _FlNumItemInst(
2440 item,
2441 len(self._data),
2442 copy.deepcopy(state),
2443 copy.deepcopy(self._parse_error_msgs),
2444 )
320644e2
PP
2445 )
2446
2447 # Reserve space in `self._data` for this instance
2448 data = bytes([0] * (item.len // 8))
2449
2450 # Append bytes
2451 self._data += data
2452
2453 # Update offset
2454 state.offset += len(data)
2455
05f81895
PP
2456 # Returns the size, in bytes, required to encode the value `val`
2457 # with LEB128 (signed version if `is_signed` is `True`).
2458 @staticmethod
2459 def _leb128_size_for_val(val: int, is_signed: bool):
2460 if val < 0:
2461 # Equivalent upper bound.
2462 #
2463 # For example, if `val` is -128, then the full integer for
2464 # this number of bits would be [-128, 127].
2465 val = -val - 1
2466
2467 # Number of bits (add one for the sign if needed)
2468 bits = val.bit_length() + int(is_signed)
2469
2470 if bits == 0:
2471 bits = 1
2472
2473 # Seven bits per byte
2474 return math.ceil(bits / 7)
2475
320644e2
PP
2476 # Handles the LEB128 integer item `item`.
2477 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2478 # Compute value
7a7b31e8 2479 val = self._eval_item_expr(item, state)
676f6189 2480
320644e2
PP
2481 # Size in bytes
2482 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2483
320644e2
PP
2484 # For each byte
2485 for _ in range(size):
2486 # Seven LSBs, MSB of the byte set (continue)
2487 self._data.append((val & 0x7F) | 0x80)
2488 val >>= 7
2adf4336 2489
320644e2
PP
2490 # Clear MSB of last byte (stop)
2491 self._data[-1] &= ~0x80
2adf4336 2492
320644e2
PP
2493 # Update offset
2494 state.offset += size
27d52a19 2495
7a7b31e8
PP
2496 # Handles the string item `item`.
2497 def _handle_str_item(self, item: _Str, state: _GenState):
2498 # Compute value
2499 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2500
2501 # Encode
2502 data = _encode_str(val, item.codec, item.text_loc)
2503
2504 # Add to data
2505 self._data += data
2506
2507 # Update offset
2508 state.offset += len(data)
2509
320644e2
PP
2510 # Handles the group item `item`, removing the immediate labels from
2511 # `state` at the end if `remove_immediate_labels` is `True`.
2512 def _handle_group_item(
2513 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2514 ):
2515 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2516 immediate_labels = {} # type: LabelsT
27d52a19 2517
320644e2
PP
2518 # Handle each item
2519 for subitem in item.items:
2520 if type(subitem) is _Label:
2521 # Add to local immediate labels
2522 immediate_labels[subitem.name] = state.offset
2adf4336 2523
320644e2 2524 self._handle_item(subitem, state)
2adf4336 2525
320644e2
PP
2526 # Remove immediate labels from current state if needed
2527 if remove_immediate_labels:
2528 for name in immediate_labels:
2529 del state.labels[name]
2adf4336 2530
320644e2
PP
2531 # Add all immediate labels to all state snapshots since
2532 # `first_fl_num_item_inst_index`.
2533 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2534 inst.state.labels.update(immediate_labels)
2adf4336 2535
320644e2
PP
2536 # Handles the repetition item `item`.
2537 def _handle_rep_item(self, item: _Rep, state: _GenState):
2538 # Compute the repetition count
2539 mul = _Gen._eval_item_expr(item, state)
05f81895 2540
320644e2
PP
2541 # Validate result
2542 if mul < 0:
2543 _raise_error_for_item(
2544 "Invalid expression `{}`: unexpected negative result {:,}".format(
2545 item.expr_str, mul
2546 ),
2547 item,
2548 )
2adf4336 2549
cd33dfe6 2550 # Generate group data `mul` times
320644e2 2551 for _ in range(mul):
cd33dfe6 2552 self._handle_group_item(item, state)
2adf4336 2553
320644e2 2554 # Handles the conditional item `item`.
12b5dbc0 2555 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2556 # Compute the conditional value
2557 val = _Gen._eval_item_expr(item, state)
2adf4336 2558
cd33dfe6 2559 # Generate selected group data
320644e2 2560 if val:
cd33dfe6 2561 self._handle_group_item(item.true_item, state)
12b5dbc0 2562 else:
cd33dfe6
PP
2563 self._handle_group_item(item.false_item, state)
2564
2565 # Handles the transformation item `item`.
2566 def _handle_trans_item(self, item: _Trans, state: _GenState):
2567 init_in_trans = self._in_trans
2568 self._in_trans = True
2569 init_data_len = len(self._data)
2570 init_offset = state.offset
2571
2572 # Generate group data
2573 self._handle_group_item(item, state)
2574
2575 # Remove and keep group data
2576 to_trans = self._data[init_data_len:]
2577 del self._data[init_data_len:]
2578
2579 # Encode group data and append to current data
2580 try:
2581 transformed = item.trans(to_trans)
2582 except Exception as exc:
2583 _raise_error_for_item(
2584 "Cannot apply the {} transformation to this data: {}".format(
2585 item.name, exc
2586 ),
2587 item,
2588 )
2589
2590 self._data += transformed
2591
2592 # Update offset and restore
2593 state.offset = init_offset + len(transformed)
2594 self._in_trans = init_in_trans
2adf4336 2595
320644e2
PP
2596 # Evaluates the parameters of the macro expansion item `item`
2597 # considering the initial state `init_state` and returns a new state
2598 # to handle the items of the macro.
2599 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2600 # New state
2601 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2602
320644e2
PP
2603 # Evaluate the parameter expressions
2604 macro_def = self._macro_defs[item.name]
2adf4336 2605
320644e2
PP
2606 for param_name, param in zip(macro_def.param_names, item.params):
2607 exp_state.variables[param_name] = _Gen._eval_expr(
7a7b31e8
PP
2608 param.expr_str,
2609 param.expr,
2610 param.text_loc,
2611 init_state,
2612 accept_float=True,
2613 accept_str=True,
320644e2 2614 )
2adf4336 2615
320644e2 2616 return exp_state
2adf4336 2617
320644e2
PP
2618 # Handles the macro expansion item `item`.
2619 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
f5dcb24c 2620 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
27d52a19 2621
f5dcb24c
PP
2622 try:
2623 # New state
2624 exp_state = self._eval_macro_exp_params(item, state)
2625
2626 # Process the contained group
2627 init_data_size = len(self._data)
2628 parse_error_msg = (
2629 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2630 parse_error_msg_text, item.text_loc
2631 )
2632 )
2633 self._parse_error_msgs.append(parse_error_msg)
cd33dfe6 2634 self._handle_group_item(self._macro_defs[item.name], exp_state)
f5dcb24c
PP
2635 self._parse_error_msgs.pop()
2636 except ParseError as exc:
2637 _augment_error(exc, parse_error_msg_text, item.text_loc)
27d52a19 2638
320644e2
PP
2639 # Update state offset and return
2640 state.offset += len(self._data) - init_data_size
676f6189 2641
320644e2
PP
2642 # Handles the offset setting item `item`.
2643 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2644 state.offset = item.val
2adf4336 2645
25ca454b 2646 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2647 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2648 init_offset = state.offset
2649 align_bytes = item.val // 8
2650 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2651 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2652
25ca454b
PP
2653 # Handles the filling item `item` (adds padding).
2654 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2655 # Compute the new offset
2656 new_offset = _Gen._eval_item_expr(item, state)
2657
2658 # Validate the new offset
2659 if new_offset < state.offset:
2660 _raise_error_for_item(
2661 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2662 item.expr_str, new_offset, state.offset
2663 ),
2664 item,
2665 )
2666
2667 # Fill
2668 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2669
2670 # Update offset
2671 state.offset = new_offset
2672
320644e2
PP
2673 # Handles the label item `item`.
2674 def _handle_label_item(self, item: _Label, state: _GenState):
2675 state.labels[item.name] = state.offset
2adf4336 2676
320644e2
PP
2677 # Handles the item `item`, returning the updated next repetition
2678 # instance.
2679 def _handle_item(self, item: _Item, state: _GenState):
2680 return self._item_handlers[type(item)](item, state)
71aaa3f7 2681
320644e2
PP
2682 # Generates the data for a fixed-length integer item instance having
2683 # the value `val` and returns it.
2684 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2685 # Validate range
2686 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2687 _raise_error_for_item(
320644e2
PP
2688 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2689 val, item.len, item.expr_str
71aaa3f7
PP
2690 ),
2691 item,
2692 )
2693
2694 # Encode result on 64 bits (to extend the sign bit whatever the
2695 # value of `item.len`).
71aaa3f7
PP
2696 data = struct.pack(
2697 "{}{}".format(
2adf4336 2698 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2699 "Q" if val >= 0 else "q",
2700 ),
2701 val,
2702 )
2703
2704 # Keep only the requested length
2705 len_bytes = item.len // 8
2706
2adf4336 2707 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2708 # Big endian: keep last bytes
2709 data = data[-len_bytes:]
2710 else:
2711 # Little endian: keep first bytes
2adf4336 2712 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2713 data = data[:len_bytes]
2714
320644e2
PP
2715 # Return data
2716 return data
269f6eb3 2717
320644e2
PP
2718 # Generates the data for a fixed-length floating point number item
2719 # instance having the value `val` and returns it.
2720 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2721 # Validate length
2722 if item.len not in (32, 64):
2723 _raise_error_for_item(
2724 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2725 item.len, val
2726 ),
2727 item,
2728 )
2729
320644e2
PP
2730 # Encode and return result
2731 return struct.pack(
269f6eb3
PP
2732 "{}{}".format(
2733 ">" if state.bo in (None, ByteOrder.BE) else "<",
2734 "f" if item.len == 32 else "d",
2735 ),
2736 val,
2737 )
2738
320644e2
PP
2739 # Generates the data for a fixed-length number item instance and
2740 # returns it.
2741 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2742 # Compute value
e57a18e1 2743 val = self._eval_item_expr(item, state, True)
269f6eb3 2744
269f6eb3
PP
2745 # Handle depending on type
2746 if type(val) is int:
320644e2 2747 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2748 else:
2749 assert type(val) is float
320644e2 2750 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2751
320644e2
PP
2752 # Generates the data for all the fixed-length number item instances
2753 # and writes it at the correct offset within `self._data`.
2754 def _gen_fl_num_item_insts(self):
2755 for inst in self._fl_num_item_insts:
2756 # Generate bytes
f5dcb24c
PP
2757 try:
2758 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2759 except ParseError as exc:
2760 # Add all the saved parse error messages for this
2761 # instance.
2762 for msg in reversed(inst.parse_error_msgs):
2763 _add_error_msg(exc, msg.text, msg.text_location)
2764
2765 raise
05f81895 2766
320644e2
PP
2767 # Insert bytes into `self._data`
2768 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2769
2770 # Generates the data (`self._data`) and final state
2771 # (`self._final_state`) from `group` and the initial state `state`.
2772 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2773 # Initial state
2774 self._data = bytearray()
71aaa3f7
PP
2775
2776 # Item handlers
2777 self._item_handlers = {
676f6189 2778 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2779 _Byte: self._handle_byte_item,
27d52a19 2780 _Cond: self._handle_cond_item,
25ca454b 2781 _FillUntil: self._handle_fill_until_item,
269f6eb3 2782 _FlNum: self._handle_fl_num_item,
71aaa3f7 2783 _Group: self._handle_group_item,
2adf4336 2784 _Label: self._handle_label_item,
7a7b31e8 2785 _LitStr: self._handle_lit_str_item,
320644e2 2786 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2787 _Rep: self._handle_rep_item,
2adf4336
PP
2788 _SetBo: self._handle_set_bo_item,
2789 _SetOffset: self._handle_set_offset_item,
05f81895 2790 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2791 _Str: self._handle_str_item,
cd33dfe6 2792 _Trans: self._handle_trans_item,
05f81895 2793 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2794 _VarAssign: self._handle_var_assign_item,
320644e2 2795 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2796
2797 # Handle the group item, _not_ removing the immediate labels
2798 # because the `labels` property offers them.
320644e2 2799 self._handle_group_item(group, state, False)
2adf4336
PP
2800
2801 # This is actually the final state
2802 self._final_state = state
71aaa3f7 2803
320644e2
PP
2804 # Generate all the fixed-length number bytes now that we know
2805 # their full state
2806 self._gen_fl_num_item_insts()
2807
71aaa3f7
PP
2808
2809# Returns a `ParseResult` instance containing the bytes encoded by the
2810# input string `normand`.
2811#
2812# `init_variables` is a dictionary of initial variable names (valid
2813# Python names) to integral values. A variable name must not be the
2814# reserved name `ICITTE`.
2815#
2816# `init_labels` is a dictionary of initial label names (valid Python
2817# names) to integral values. A label name must not be the reserved name
2818# `ICITTE`.
2819#
2820# `init_offset` is the initial offset.
2821#
2822# `init_byte_order` is the initial byte order.
2823#
2824# Raises `ParseError` on any parsing error.
2825def parse(
2826 normand: str,
e57a18e1
PP
2827 init_variables: Optional[VariablesT] = None,
2828 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2829 init_offset: int = 0,
2830 init_byte_order: Optional[ByteOrder] = None,
2831):
2832 if init_variables is None:
2833 init_variables = {}
2834
2835 if init_labels is None:
2836 init_labels = {}
2837
320644e2 2838 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2839 gen = _Gen(
320644e2
PP
2840 parser.res,
2841 parser.macro_defs,
71aaa3f7
PP
2842 init_variables,
2843 init_labels,
2844 init_offset,
2845 init_byte_order,
2846 )
2847 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2848 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2849 )
2850
2851
f5dcb24c
PP
2852# Raises a command-line error with the message `msg`.
2853def _raise_cli_error(msg: str) -> NoReturn:
2854 raise RuntimeError("Command-line error: {}".format(msg))
2855
2856
b2410769
PP
2857# Returns the `int` or `float` value out of a CLI assignment value.
2858def _val_from_assign_val_str(s: str, is_label: bool):
2859 s = s.strip()
2860
2861 # Floating point number?
2862 if not is_label:
2863 m = _const_float_pat.fullmatch(s)
2864
2865 if m is not None:
2866 return float(m.group(0))
2867
2868 # Integer?
2869 m = _const_int_pat.fullmatch(s)
2870
2871 if m is not None:
2872 return int(_norm_const_int(m.group(0)), 0)
2873
2874 exp = "an integer" if is_label else "a number"
2875 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2876
2877
2878# Returns a dictionary of string to numbers from the list of strings
f5dcb24c 2879# `args` containing `NAME=VAL` entries.
7a7b31e8 2880def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
b2410769 2881 d = {} # type: VariablesT
f5dcb24c
PP
2882
2883 if args is None:
2884 return d
2885
2886 for arg in args:
7a7b31e8 2887 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
f5dcb24c
PP
2888
2889 if m is None:
b2410769 2890 _raise_cli_error("Invalid assignment `{}`".format(arg))
f5dcb24c 2891
7a7b31e8
PP
2892 if is_str_only:
2893 val = m.group(2)
2894 else:
2895 val = _val_from_assign_val_str(m.group(2), is_label)
2896
2897 d[m.group(1)] = val
f5dcb24c
PP
2898
2899 return d
2900
2901
2902# Parses the command-line arguments and returns, in this order:
2903#
2904# 1. The input file path, or `None` if none.
2905# 2. The Normand input text.
2906# 3. The initial offset.
2907# 4. The initial byte order.
2908# 5. The initial variables.
2909# 6. The initial labels.
71aaa3f7
PP
2910def _parse_cli_args():
2911 import argparse
2912
2913 # Build parser
2914 ap = argparse.ArgumentParser()
2915 ap.add_argument(
2916 "--offset",
2917 metavar="OFFSET",
2918 action="store",
2919 type=int,
2920 default=0,
2921 help="initial offset (positive)",
2922 )
2923 ap.add_argument(
2924 "-b",
2925 "--byte-order",
2926 metavar="BO",
2927 choices=["be", "le"],
2928 type=str,
2929 help="initial byte order (`be` or `le`)",
2930 )
2931 ap.add_argument(
b2410769 2932 "-v",
71aaa3f7
PP
2933 "--var",
2934 metavar="NAME=VAL",
2935 action="append",
7a7b31e8
PP
2936 help="add an initial numeric variable (may be repeated)",
2937 )
2938 ap.add_argument(
2939 "-s",
2940 "--var-str",
2941 metavar="NAME=VAL",
2942 action="append",
2943 help="add an initial string variable (may be repeated)",
71aaa3f7
PP
2944 )
2945 ap.add_argument(
2946 "-l",
2947 "--label",
2948 metavar="NAME=VAL",
2949 action="append",
2950 help="add an initial label (may be repeated)",
2951 )
2952 ap.add_argument(
2953 "--version", action="version", version="Normand {}".format(__version__)
2954 )
2955 ap.add_argument(
2956 "path",
2957 metavar="PATH",
2958 action="store",
2959 nargs="?",
2960 help="input path (none means standard input)",
2961 )
2962
2963 # Parse
f5dcb24c 2964 args = ap.parse_args()
71aaa3f7
PP
2965
2966 # Read input
2967 if args.path is None:
2968 normand = sys.stdin.read()
2969 else:
2970 with open(args.path) as f:
2971 normand = f.read()
2972
2973 # Variables and labels
7a7b31e8
PP
2974 variables = _dict_from_arg(args.var, False, False)
2975 variables.update(_dict_from_arg(args.var_str, False, True))
2976 labels = _dict_from_arg(args.label, True, False)
71aaa3f7
PP
2977
2978 # Validate offset
2979 if args.offset < 0:
2980 _raise_cli_error("Invalid negative offset {}")
2981
2982 # Validate and set byte order
2983 bo = None # type: Optional[ByteOrder]
2984
2985 if args.byte_order is not None:
2986 if args.byte_order == "be":
2987 bo = ByteOrder.BE
2988 else:
2989 assert args.byte_order == "le"
2990 bo = ByteOrder.LE
2991
f5dcb24c 2992 # Return input and initial state
b2410769 2993 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
71aaa3f7 2994
71aaa3f7 2995
f5dcb24c
PP
2996# CLI entry point without exception handling.
2997def _run_cli_with_args(
2998 normand: str,
2999 offset: int,
3000 bo: Optional[ByteOrder],
3001 variables: VariablesT,
3002 labels: LabelsT,
3003):
3004 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
71aaa3f7
PP
3005
3006
3007# Prints the exception message `msg` and exits with status 1.
3008def _fail(msg: str) -> NoReturn:
3009 if not msg.endswith("."):
3010 msg += "."
3011
f5dcb24c 3012 print(msg.strip(), file=sys.stderr)
71aaa3f7
PP
3013 sys.exit(1)
3014
3015
3016# CLI entry point.
3017def _run_cli():
3018 try:
f5dcb24c
PP
3019 args = _parse_cli_args()
3020 except Exception as exc:
3021 _fail(str(exc))
3022
3023 try:
3024 _run_cli_with_args(*args[1:])
3025 except ParseError as exc:
3026 import os.path
3027
3028 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
3029 fail_msg = ""
3030
3031 for msg in reversed(exc.messages):
3032 fail_msg += "{}{}:{} - {}".format(
3033 prefix,
3034 msg.text_location.line_no,
3035 msg.text_location.col_no,
3036 msg.text,
3037 )
3038
3039 if fail_msg[-1] not in ".:;":
3040 fail_msg += "."
3041
3042 fail_msg += "\n"
3043
3044 _fail(fail_msg.strip())
71aaa3f7
PP
3045 except Exception as exc:
3046 _fail(str(exc))
3047
3048
3049if __name__ == "__main__":
3050 _run_cli()
This page took 0.15371 seconds and 4 git commands to generate.