Parse comments between tokens
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
ba11fb1d 33__version__ = "0.20.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
f5dcb24c 41 "ParseErrorMessage",
71aaa3f7 42 "ParseResult",
e57a18e1 43 "TextLocation",
e57a18e1 44 "VariablesT",
71aaa3f7
PP
45]
46
47import re
48import abc
49import ast
50import sys
320644e2 51import copy
71aaa3f7 52import enum
05f81895 53import math
71aaa3f7 54import struct
e57a18e1
PP
55import typing
56from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
57
58
59# Text location (line and column numbers).
e57a18e1 60class TextLocation:
71aaa3f7
PP
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
2adf4336 84 def __repr__(self):
e57a18e1 85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 86
71aaa3f7
PP
87
88# Any item.
89class _Item:
e57a18e1 90 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
2adf4336
PP
98
99# Scalar item.
100class _ScalarItem(_Item):
71aaa3f7
PP
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108# A repeatable item.
2adf4336 109class _RepableItem:
71aaa3f7
PP
110 pass
111
112
113# Single byte.
2adf4336 114class _Byte(_ScalarItem, _RepableItem):
e57a18e1 115 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
676f6189 129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
130
131
7a7b31e8
PP
132# Literal string.
133class _LitStr(_ScalarItem, _RepableItem):
e57a18e1 134 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
7a7b31e8 148 return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
149
150
151# Byte order.
152@enum.unique
153class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
2adf4336
PP
161# Byte order setting.
162class _SetBo(_Item):
e57a18e1 163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 164 super().__init__(text_loc)
71aaa3f7
PP
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
2adf4336 171 def __repr__(self):
676f6189 172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
173
174
175# Label.
176class _Label(_Item):
e57a18e1 177 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
71aaa3f7 186 def __repr__(self):
676f6189 187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
188
189
2adf4336
PP
190# Offset setting.
191class _SetOffset(_Item):
e57a18e1 192 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
193 super().__init__(text_loc)
194 self._val = val
195
676f6189 196 # Offset value (bytes).
71aaa3f7
PP
197 @property
198 def val(self):
199 return self._val
200
71aaa3f7 201 def __repr__(self):
676f6189
PP
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205# Offset alignment.
206class _AlignOffset(_Item):
e57a18e1 207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
71aaa3f7
PP
226
227
228# Mixin of containing an AST expression and its string.
229class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
25ca454b
PP
245# Fill until some offset.
246class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
2adf4336
PP
268# Variable assignment.
269class _VarAssign(_Item, _ExprMixin):
71aaa3f7 270 def __init__(
e57a18e1 271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
71aaa3f7 282 def __repr__(self):
2adf4336 283 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
71aaa3f7
PP
288 )
289
290
269f6eb3
PP
291# Fixed-length number, possibly needing more than one byte.
292class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 293 def __init__(
e57a18e1 294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
269f6eb3 310 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
71aaa3f7
PP
315 )
316
317
05f81895
PP
318# LEB128 integer.
319class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
676f6189 329 repr(self._text_loc),
05f81895
PP
330 )
331
332
333# Unsigned LEB128 integer.
334class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338# Signed LEB128 integer.
339class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
7a7b31e8
PP
343# String.
344class _Str(_Item, _RepableItem, _ExprMixin):
345 def __init__(
346 self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation
347 ):
348 super().__init__(text_loc)
349 _ExprMixin.__init__(self, expr_str, expr)
350 self._codec = codec
351
352 # Codec name.
353 @property
354 def codec(self):
355 return self._codec
356
357 def __repr__(self):
358 return "_Str({}, {}, {}, {})".format(
359 self.__class__.__name__,
360 repr(self._expr_str),
361 repr(self._expr),
362 repr(self._codec),
363 repr(self._text_loc),
364 )
365
366
71aaa3f7 367# Group of items.
2adf4336 368class _Group(_Item, _RepableItem):
e57a18e1 369 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
370 super().__init__(text_loc)
371 self._items = items
71aaa3f7
PP
372
373 # Contained items.
374 @property
375 def items(self):
376 return self._items
377
71aaa3f7 378 def __repr__(self):
676f6189 379 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
380
381
382# Repetition item.
2adf4336
PP
383class _Rep(_Item, _ExprMixin):
384 def __init__(
e57a18e1 385 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 386 ):
71aaa3f7 387 super().__init__(text_loc)
2adf4336 388 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 389 self._item = item
71aaa3f7
PP
390
391 # Item to repeat.
392 @property
393 def item(self):
394 return self._item
395
71aaa3f7 396 def __repr__(self):
2adf4336 397 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
398 repr(self._item),
399 repr(self._expr_str),
400 repr(self._expr),
401 repr(self._text_loc),
71aaa3f7
PP
402 )
403
404
27d52a19
PP
405# Conditional item.
406class _Cond(_Item, _ExprMixin):
407 def __init__(
12b5dbc0
PP
408 self,
409 true_item: _Item,
410 false_item: _Item,
411 expr_str: str,
412 expr: ast.Expression,
413 text_loc: TextLocation,
27d52a19
PP
414 ):
415 super().__init__(text_loc)
416 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
417 self._true_item = true_item
418 self._false_item = false_item
27d52a19 419
12b5dbc0 420 # Item when condition is true.
27d52a19 421 @property
12b5dbc0
PP
422 def true_item(self):
423 return self._true_item
424
425 # Item when condition is false.
426 @property
427 def false_item(self):
428 return self._false_item
27d52a19
PP
429
430 def __repr__(self):
12b5dbc0
PP
431 return "_Cond({}, {}, {}, {}, {})".format(
432 repr(self._true_item),
433 repr(self._false_item),
27d52a19
PP
434 repr(self._expr_str),
435 repr(self._expr),
436 repr(self._text_loc),
437 )
438
439
320644e2
PP
440# Macro definition item.
441class _MacroDef(_Item):
442 def __init__(
443 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
444 ):
445 super().__init__(text_loc)
446 self._name = name
447 self._param_names = param_names
448 self._group = group
449
450 # Name.
451 @property
452 def name(self):
453 return self._name
454
455 # Parameters.
456 @property
457 def param_names(self):
458 return self._param_names
459
460 # Contained items.
461 @property
462 def group(self):
463 return self._group
464
465 def __repr__(self):
466 return "_MacroDef({}, {}, {}, {})".format(
467 repr(self._name),
468 repr(self._param_names),
469 repr(self._group),
470 repr(self._text_loc),
471 )
472
473
474# Macro expansion parameter.
475class _MacroExpParam:
476 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
477 self._expr_str = expr_str
478 self._expr = expr
479 self._text_loc = text_loc
480
481 # Expression string.
482 @property
483 def expr_str(self):
484 return self._expr_str
485
486 # Expression.
487 @property
488 def expr(self):
489 return self._expr
490
491 # Source text location.
492 @property
493 def text_loc(self):
494 return self._text_loc
495
496 def __repr__(self):
497 return "_MacroExpParam({}, {}, {})".format(
498 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
499 )
500
501
502# Macro expansion item.
503class _MacroExp(_Item, _RepableItem):
504 def __init__(
505 self,
506 name: str,
507 params: List[_MacroExpParam],
508 text_loc: TextLocation,
509 ):
510 super().__init__(text_loc)
511 self._name = name
512 self._params = params
513
514 # Name.
515 @property
516 def name(self):
517 return self._name
518
519 # Parameters.
520 @property
521 def params(self):
522 return self._params
523
524 def __repr__(self):
525 return "_MacroExp({}, {}, {})".format(
526 repr(self._name),
527 repr(self._params),
528 repr(self._text_loc),
529 )
2adf4336
PP
530
531
f5dcb24c
PP
532# A parsing error message: a string and a text location.
533class ParseErrorMessage:
534 @classmethod
535 def _create(cls, text: str, text_loc: TextLocation):
536 self = cls.__new__(cls)
537 self._init(text, text_loc)
538 return self
539
540 def __init__(self, *args, **kwargs): # type: ignore
541 raise NotImplementedError
542
543 def _init(self, text: str, text_loc: TextLocation):
544 self._text = text
545 self._text_loc = text_loc
546
547 # Message text.
548 @property
549 def text(self):
550 return self._text
551
552 # Source text location.
553 @property
554 def text_location(self):
555 return self._text_loc
556
557
558# A parsing error containing one or more messages (`ParseErrorMessage`).
71aaa3f7
PP
559class ParseError(RuntimeError):
560 @classmethod
e57a18e1 561 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
562 self = cls.__new__(cls)
563 self._init(msg, text_loc)
564 return self
565
566 def __init__(self, *args, **kwargs): # type: ignore
567 raise NotImplementedError
568
e57a18e1 569 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7 570 super().__init__(msg)
f5dcb24c
PP
571 self._msgs = [] # type: List[ParseErrorMessage]
572 self._add_msg(msg, text_loc)
71aaa3f7 573
f5dcb24c
PP
574 def _add_msg(self, msg: str, text_loc: TextLocation):
575 self._msgs.append(
576 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
577 msg, text_loc
578 )
579 )
580
581 # Parsing error messages.
582 #
583 # The first message is the most specific one.
71aaa3f7 584 @property
f5dcb24c
PP
585 def messages(self):
586 return self._msgs
71aaa3f7
PP
587
588
589# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 590def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
591 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
592
593
f5dcb24c
PP
594# Adds a message to the parsing error `exc`.
595def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
596 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
597
598
599# Appends a message to the parsing error `exc` and reraises it.
600def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
601 _add_error_msg(exc, msg, text_loc)
602 raise exc
603
604
b2410769
PP
605# Returns a normalized version (so as to be parseable by int()) of
606# the constant integer string `s`, possibly negative, dealing with
607# any radix suffix.
608def _norm_const_int(s: str):
609 neg = ""
610 pos = s
611
612 if s.startswith("-"):
613 neg = "-"
614 pos = s[1:]
615
616 for r in "xXoObB":
617 if pos.startswith("0" + r):
618 # Already correct
619 return s
620
621 # Try suffix
622 asm_suf_base = {
623 "h": "x",
624 "H": "x",
625 "q": "o",
626 "Q": "o",
627 "o": "o",
628 "O": "o",
629 "b": "b",
630 "B": "B",
631 }
632
633 for suf in asm_suf_base:
634 if pos[-1] == suf:
635 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
636
637 return s
638
639
7a7b31e8
PP
640# Encodes the string `s` using the codec `codec`, raising `ParseError`
641# with `text_loc` on encoding error.
642def _encode_str(s: str, codec: str, text_loc: TextLocation):
643 try:
644 return s.encode(codec)
645 except UnicodeEncodeError:
646 _raise_error(
647 "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc
648 )
649
650
e57a18e1 651# Variables dictionary type (for type hints).
7a7b31e8 652VariablesT = Dict[str, Union[int, float, str]]
e57a18e1
PP
653
654
655# Labels dictionary type (for type hints).
656LabelsT = Dict[str, int]
71aaa3f7
PP
657
658
b2410769 659# Common patterns.
71aaa3f7 660_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
b2410769
PP
661_pos_const_int_pat = re.compile(
662 r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b"
663)
664_const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
665_const_float_pat = re.compile(
666 r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)"
667)
71aaa3f7
PP
668
669
320644e2
PP
670# Macro definition dictionary.
671_MacroDefsT = Dict[str, _MacroDef]
672
673
71aaa3f7
PP
674# Normand parser.
675#
676# The constructor accepts a Normand input. After building, use the `res`
677# property to get the resulting main group.
678class _Parser:
679 # Builds a parser to parse the Normand input `normand`, parsing
680 # immediately.
e57a18e1 681 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
682 self._normand = normand
683 self._at = 0
684 self._line_no = 1
685 self._col_no = 1
686 self._label_names = set(labels.keys())
687 self._var_names = set(variables.keys())
320644e2 688 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
689 self._parse()
690
691 # Result (main group).
692 @property
693 def res(self):
694 return self._res
695
320644e2
PP
696 # Macro definitions.
697 @property
698 def macro_defs(self):
699 return self._macro_defs
700
71aaa3f7
PP
701 # Current text location.
702 @property
703 def _text_loc(self):
e57a18e1 704 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
705 self._line_no, self._col_no
706 )
707
708 # Returns `True` if this parser is done parsing.
709 def _is_done(self):
710 return self._at == len(self._normand)
711
712 # Returns `True` if this parser isn't done parsing.
713 def _isnt_done(self):
714 return not self._is_done()
715
716 # Raises a parse error, creating it using the message `msg` and the
717 # current text location.
718 def _raise_error(self, msg: str) -> NoReturn:
719 _raise_error(msg, self._text_loc)
720
721 # Tries to make the pattern `pat` match the current substring,
722 # returning the match object and updating `self._at`,
723 # `self._line_no`, and `self._col_no` on success.
724 def _try_parse_pat(self, pat: Pattern[str]):
725 m = pat.match(self._normand, self._at)
726
727 if m is None:
728 return
729
730 # Skip matched string
731 self._at += len(m.group(0))
732
733 # Update line number
734 self._line_no += m.group(0).count("\n")
735
736 # Update column number
737 for i in reversed(range(self._at)):
738 if self._normand[i] == "\n" or i == 0:
739 if i == 0:
740 self._col_no = self._at + 1
741 else:
742 self._col_no = self._at - i
743
744 break
745
746 # Return match object
747 return m
748
749 # Expects the pattern `pat` to match the current substring,
750 # returning the match object and updating `self._at`,
751 # `self._line_no`, and `self._col_no` on success, or raising a parse
752 # error with the message `error_msg` on error.
753 def _expect_pat(self, pat: Pattern[str], error_msg: str):
754 # Match
755 m = self._try_parse_pat(pat)
756
757 if m is None:
758 # No match: error
759 self._raise_error(error_msg)
760
761 # Return match object
762 return m
763
ba11fb1d
PP
764 # Patterns for _skip_*()
765 _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M)
766 _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M)
71aaa3f7 767 _ws_or_syms_or_comments_pat = re.compile(
ba11fb1d 768 r"(?:[\s/\\?&:;.,[\]_=|-]|{})*".format(_comment_pat.pattern), re.M
71aaa3f7
PP
769 )
770
ba11fb1d
PP
771 # Skips as many whitespaces and comments as possible, but not
772 # insignificant symbol characters.
773 def _skip_ws_and_comments(self):
774 self._try_parse_pat(self._ws_or_comments_pat)
775
71aaa3f7
PP
776 # Skips as many whitespaces, insignificant symbol characters, and
777 # comments as possible.
ba11fb1d 778 def _skip_ws_and_comments_and_syms(self):
71aaa3f7
PP
779 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
780
781 # Pattern for _try_parse_hex_byte()
782 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
783
784 # Tries to parse a hexadecimal byte, returning a byte item on
785 # success.
786 def _try_parse_hex_byte(self):
0e8e3169
PP
787 begin_text_loc = self._text_loc
788
71aaa3f7
PP
789 # Match initial nibble
790 m_high = self._try_parse_pat(self._nibble_pat)
791
792 if m_high is None:
793 # No match
794 return
795
796 # Expect another nibble
ba11fb1d 797 self._skip_ws_and_comments_and_syms()
71aaa3f7
PP
798 m_low = self._expect_pat(
799 self._nibble_pat, "Expecting another hexadecimal nibble"
800 )
801
802 # Return item
0e8e3169 803 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
804
805 # Patterns for _try_parse_bin_byte()
806 _bin_byte_bit_pat = re.compile(r"[01]")
6dd69a2a 807 _bin_byte_prefix_pat = re.compile(r"%+")
71aaa3f7
PP
808
809 # Tries to parse a binary byte, returning a byte item on success.
810 def _try_parse_bin_byte(self):
0e8e3169
PP
811 begin_text_loc = self._text_loc
812
71aaa3f7 813 # Match prefix
6dd69a2a
PP
814 m = self._try_parse_pat(self._bin_byte_prefix_pat)
815
816 if m is None:
71aaa3f7
PP
817 # No match
818 return
819
6dd69a2a
PP
820 # Expect as many bytes as there are `%` prefixes
821 items = [] # type: List[_Item]
71aaa3f7 822
6dd69a2a 823 for _ in range(len(m.group(0))):
ba11fb1d 824 self._skip_ws_and_comments_and_syms()
6dd69a2a
PP
825 byte_text_loc = self._text_loc
826 bits = [] # type: List[str]
827
828 # Expect eight bits
829 for _ in range(8):
ba11fb1d 830 self._skip_ws_and_comments_and_syms()
6dd69a2a
PP
831 m = self._expect_pat(
832 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
833 )
834 bits.append(m.group(0))
835
836 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
71aaa3f7
PP
837
838 # Return item
6dd69a2a
PP
839 if len(items) == 1:
840 return items[0]
841
842 # As group
843 return _Group(items, begin_text_loc)
71aaa3f7
PP
844
845 # Patterns for _try_parse_dec_byte()
320644e2 846 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
847 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
848
849 # Tries to parse a decimal byte, returning a byte item on success.
850 def _try_parse_dec_byte(self):
0e8e3169
PP
851 begin_text_loc = self._text_loc
852
71aaa3f7
PP
853 # Match prefix
854 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
855 # No match
856 return
857
858 # Expect the value
ba11fb1d 859 self._skip_ws_and_comments()
71aaa3f7
PP
860 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
861
862 # Compute value
863 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
864
865 # Validate
866 if val < -128 or val > 255:
0e8e3169 867 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
868
869 # Two's complement
05f81895 870 val %= 256
71aaa3f7
PP
871
872 # Return item
0e8e3169 873 return _Byte(val, begin_text_loc)
71aaa3f7
PP
874
875 # Tries to parse a byte, returning a byte item on success.
876 def _try_parse_byte(self):
877 # Hexadecimal
878 item = self._try_parse_hex_byte()
879
880 if item is not None:
881 return item
882
883 # Binary
884 item = self._try_parse_bin_byte()
885
886 if item is not None:
887 return item
888
889 # Decimal
890 item = self._try_parse_dec_byte()
891
892 if item is not None:
893 return item
894
71aaa3f7 895 # Strings corresponding to escape sequence characters
7a7b31e8 896 _lit_str_escape_seq_strs = {
71aaa3f7
PP
897 "0": "\0",
898 "a": "\a",
899 "b": "\b",
900 "e": "\x1b",
901 "f": "\f",
902 "n": "\n",
903 "r": "\r",
904 "t": "\t",
905 "v": "\v",
906 "\\": "\\",
907 '"': '"',
908 }
909
7a7b31e8
PP
910 # Patterns for _try_parse_lit_str()
911 _lit_str_prefix_suffix_pat = re.compile(r'"')
912 _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*')
0e8e3169 913
7a7b31e8
PP
914 # Parses a literal string between double quotes (without an encoding
915 # prefix) and returns the resulting string.
916 def _try_parse_lit_str(self, with_prefix: bool):
917 # Match prefix if needed
918 if with_prefix:
919 if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None:
920 # No match
921 return
71aaa3f7 922
7a7b31e8
PP
923 # Expect literal string
924 m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string")
925
926 # Expect end of string
927 self._expect_pat(
928 self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)'
929 )
930
931 # Replace escape sequences
932 val = m.group(0)
933
934 for ec in '0abefnrtv"\\':
935 val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec])
936
937 # Return string
938 return val
939
940 # Patterns for _try_parse_utf_str_encoding()
941 _str_encoding_utf_prefix_pat = re.compile(r"u")
942 _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b")
943
944 # Tries to parse a UTF encoding specification, returning the Python
945 # codec name on success.
946 def _try_parse_utf_str_encoding(self):
947 # Match prefix
948 if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None:
71aaa3f7
PP
949 # No match
950 return
951
7a7b31e8
PP
952 # Expect UTF specification
953 m = self._expect_pat(
954 self._str_encoding_utf_pat,
955 "Expecting `8`, `16be`, `16le`, `32be` or `32le`",
956 )
71aaa3f7 957
7a7b31e8
PP
958 # Convert to codec name
959 return {
960 "8": "utf_8",
961 "16be": "utf_16_be",
962 "16le": "utf_16_le",
963 "32be": "utf_32_be",
964 "32le": "utf_32_le",
965 }[m.group(0)]
966
967 # Patterns for _try_parse_str_encoding()
968 _str_encoding_gen_prefix_pat = re.compile(r"s")
969 _str_encoding_colon_pat = re.compile(r":")
970 _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b")
971
972 # Tries to parse a string encoding specification, returning the
973 # Python codec name on success.
974 #
975 # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`.
976 def _try_parse_str_encoding(self, req_gen_prefix: bool = False):
977 # General prefix?
978 if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None:
979 # Expect `:`
ba11fb1d 980 self._skip_ws_and_comments()
7a7b31e8 981 self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`")
71aaa3f7 982
7a7b31e8 983 # Expect encoding specification
ba11fb1d 984 self._skip_ws_and_comments()
71aaa3f7 985
7a7b31e8
PP
986 # UTF?
987 codec = self._try_parse_utf_str_encoding()
71aaa3f7 988
7a7b31e8
PP
989 if codec is not None:
990 return codec
71aaa3f7 991
7a7b31e8
PP
992 # Expect Latin
993 m = self._expect_pat(
994 self._str_encoding_non_utf_pat,
995 "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`",
996 )
997 return m.group(0)
71aaa3f7 998
7a7b31e8
PP
999 # UTF?
1000 if not req_gen_prefix:
1001 return self._try_parse_utf_str_encoding()
71aaa3f7 1002
7a7b31e8
PP
1003 # Patterns for _try_parse_str()
1004 _lit_str_prefix_pat = re.compile(r'"')
1005 _str_prefix_pat = re.compile(r'"|\{')
1006 _str_expr_pat = re.compile(r"[^}]+")
1007 _str_expr_suffix_pat = re.compile(r"\}")
1008
1009 # Tries to parse a string, returning a literal string or string item
1010 # on success.
1011 def _try_parse_str(self):
1012 begin_text_loc = self._text_loc
1013
1014 # Encoding
1015 codec = self._try_parse_str_encoding()
1016
1017 # Match prefix (expect if there's an encoding specification)
ba11fb1d 1018 self._skip_ws_and_comments()
7a7b31e8
PP
1019
1020 if codec is None:
1021 # No encoding: only a literal string (UTF-8) is legal
1022 m_prefix = self._try_parse_pat(self._lit_str_prefix_pat)
1023
1024 if m_prefix is None:
1025 return
1026 else:
1027 # Encoding present: expect a string prefix
1028 m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`')
1029
1030 # Literal string or expression?
1031 prefix = m_prefix.group(0)
1032
1033 if prefix == '"':
1034 # Expect literal string
1035 str_text_loc = self._text_loc
1036 val = self._try_parse_lit_str(False)
1037
1038 if val is None:
1039 self._raise_error("Expecting a literal string")
1040
1041 # Encode string
1042 data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc)
1043
1044 # Return item
1045 return _LitStr(data, begin_text_loc)
1046 else:
1047 # Expect expression
ba11fb1d 1048 self._skip_ws_and_comments()
7a7b31e8
PP
1049 expr_text_loc = self._text_loc
1050 m = self._expect_pat(self._str_expr_pat, "Expecting an expression")
1051
1052 # Expect `}`
1053 self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`")
1054
1055 # Create an expression node from the expression string
1056 expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc)
1057
1058 # Return item
1059 assert codec is not None
1060 return _Str(expr_str, expr, codec, begin_text_loc)
71aaa3f7 1061
320644e2
PP
1062 # Common right parenthesis pattern
1063 _right_paren_pat = re.compile(r"\)")
1064
71aaa3f7 1065 # Patterns for _try_parse_group()
320644e2 1066 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
1067
1068 # Tries to parse a group, returning a group item on success.
1069 def _try_parse_group(self):
0e8e3169
PP
1070 begin_text_loc = self._text_loc
1071
71aaa3f7 1072 # Match prefix
261c5ecf
PP
1073 m_open = self._try_parse_pat(self._group_prefix_pat)
1074
1075 if m_open is None:
71aaa3f7
PP
1076 # No match
1077 return
1078
1079 # Parse items
1080 items = self._parse_items()
1081
1082 # Expect end of group
ba11fb1d 1083 self._skip_ws_and_comments_and_syms()
261c5ecf
PP
1084
1085 if m_open.group(0) == "(":
320644e2 1086 pat = self._right_paren_pat
261c5ecf
PP
1087 exp = ")"
1088 else:
1089 pat = self._block_end_pat
1090 exp = "!end"
1091
1092 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
1093
1094 # Return item
0e8e3169 1095 return _Group(items, begin_text_loc)
71aaa3f7
PP
1096
1097 # Returns a stripped expression string and an AST expression node
1098 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 1099 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
1100 # Create an expression node from the expression string
1101 expr_str = expr_str.strip().replace("\n", " ")
1102
1103 try:
1104 expr = ast.parse(expr_str, mode="eval")
1105 except SyntaxError:
1106 _raise_error(
1107 "Invalid expression `{}`: invalid syntax".format(expr_str),
1108 text_loc,
1109 )
1110
1111 return expr_str, expr
1112
7a7b31e8 1113 # Patterns for _try_parse_val()
05f81895 1114 _val_expr_pat = re.compile(r"([^}:]+):\s*")
7a7b31e8
PP
1115 _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
1116 _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
71aaa3f7 1117
7a7b31e8
PP
1118 # Tries to parse a value (number or string) and format (fixed length
1119 # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
1120 # name), returning an item on success.
1121 def _try_parse_val(self):
71aaa3f7
PP
1122 begin_text_loc = self._text_loc
1123
1124 # Match
1125 m_expr = self._try_parse_pat(self._val_expr_pat)
1126
1127 if m_expr is None:
1128 # No match
1129 return
1130
71aaa3f7
PP
1131 # Create an expression node from the expression string
1132 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
1133
7a7b31e8 1134 # Fixed length?
ba11fb1d 1135 self._skip_ws_and_comments()
7a7b31e8 1136 m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
05f81895 1137
7a7b31e8 1138 if m_fmt is None:
05f81895 1139 # LEB128?
7a7b31e8 1140 m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat)
05f81895 1141
7a7b31e8
PP
1142 if m_fmt is None:
1143 # String encoding?
1144 codec = self._try_parse_str_encoding(True)
1145
1146 if codec is None:
1147 # At this point it's invalid
1148 self._raise_error(
1149 "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
1150 )
1151 else:
1152 # Return string item
1153 return _Str(expr_str, expr, codec, begin_text_loc)
05f81895
PP
1154
1155 # Return LEB128 integer item
7a7b31e8 1156 cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int
05f81895
PP
1157 return cls(expr_str, expr, begin_text_loc)
1158 else:
269f6eb3
PP
1159 # Return fixed-length number item
1160 return _FlNum(
05f81895
PP
1161 expr_str,
1162 expr,
7a7b31e8 1163 int(m_fmt.group(0)),
05f81895
PP
1164 begin_text_loc,
1165 )
71aaa3f7 1166
320644e2 1167 # Patterns for _try_parse_var_assign()
7a7b31e8
PP
1168 _var_assign_name_equal_pat = re.compile(
1169 r"({})\s*=(?!=)".format(_py_name_pat.pattern)
1170 )
320644e2 1171 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 1172
2adf4336
PP
1173 # Tries to parse a variable assignment, returning a variable
1174 # assignment item on success.
1175 def _try_parse_var_assign(self):
71aaa3f7
PP
1176 begin_text_loc = self._text_loc
1177
1178 # Match
320644e2 1179 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
1180
1181 if m is None:
1182 # No match
1183 return
1184
1185 # Validate name
320644e2 1186 name = m.group(1)
71aaa3f7
PP
1187
1188 if name == _icitte_name:
0e8e3169
PP
1189 _raise_error(
1190 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
1191 )
71aaa3f7
PP
1192
1193 if name in self._label_names:
0e8e3169 1194 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 1195
320644e2 1196 # Expect an expression
ba11fb1d 1197 self._skip_ws_and_comments()
320644e2 1198 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
1199
1200 # Create an expression node from the expression string
320644e2
PP
1201 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1202
1203 # Add to known variable names
1204 self._var_names.add(name)
71aaa3f7
PP
1205
1206 # Return item
2adf4336 1207 return _VarAssign(
71aaa3f7
PP
1208 name,
1209 expr_str,
1210 expr,
0e8e3169 1211 begin_text_loc,
71aaa3f7
PP
1212 )
1213
2adf4336 1214 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
1215 _bo_pat = re.compile(r"[bl]e")
1216
2adf4336
PP
1217 # Tries to parse a byte order name, returning a byte order setting
1218 # item on success.
1219 def _try_parse_set_bo(self):
0e8e3169
PP
1220 begin_text_loc = self._text_loc
1221
71aaa3f7
PP
1222 # Match
1223 m = self._try_parse_pat(self._bo_pat)
1224
1225 if m is None:
1226 # No match
1227 return
1228
1229 # Return corresponding item
1230 if m.group(0) == "be":
2adf4336 1231 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
1232 else:
1233 assert m.group(0) == "le"
2adf4336 1234 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
1235
1236 # Patterns for _try_parse_val_or_bo()
320644e2
PP
1237 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1238 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 1239
2adf4336
PP
1240 # Tries to parse a value, a variable assignment, or a byte order
1241 # setting, returning an item on success.
1242 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 1243 # Match prefix
2adf4336 1244 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
1245 # No match
1246 return
1247
ba11fb1d 1248 self._skip_ws_and_comments()
320644e2 1249
2adf4336
PP
1250 # Variable assignment item?
1251 item = self._try_parse_var_assign()
71aaa3f7
PP
1252
1253 if item is None:
7a7b31e8
PP
1254 # Value item?
1255 item = self._try_parse_val()
71aaa3f7
PP
1256
1257 if item is None:
2adf4336
PP
1258 # Byte order setting item?
1259 item = self._try_parse_set_bo()
71aaa3f7
PP
1260
1261 if item is None:
1262 # At this point it's invalid
2adf4336 1263 self._raise_error(
7a7b31e8 1264 "Expecting a fixed-length number, a string, a variable assignment, or a byte order setting"
2adf4336 1265 )
71aaa3f7
PP
1266
1267 # Expect suffix
ba11fb1d 1268 self._skip_ws_and_comments()
2adf4336 1269 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1270 return item
1271
2adf4336
PP
1272 # Tries to parse an offset setting value (after the initial `<`),
1273 # returning an offset item on success.
1274 def _try_parse_set_offset_val(self):
0e8e3169
PP
1275 begin_text_loc = self._text_loc
1276
71aaa3f7 1277 # Match
b2410769 1278 m = self._try_parse_pat(_pos_const_int_pat)
71aaa3f7
PP
1279
1280 if m is None:
1281 # No match
1282 return
1283
1284 # Return item
b2410769 1285 return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1286
1287 # Tries to parse a label name (after the initial `<`), returning a
1288 # label item on success.
1289 def _try_parse_label_name(self):
0e8e3169
PP
1290 begin_text_loc = self._text_loc
1291
71aaa3f7
PP
1292 # Match
1293 m = self._try_parse_pat(_py_name_pat)
1294
1295 if m is None:
1296 # No match
1297 return
1298
1299 # Validate
1300 name = m.group(0)
1301
1302 if name == _icitte_name:
0e8e3169
PP
1303 _raise_error(
1304 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1305 )
71aaa3f7
PP
1306
1307 if name in self._label_names:
0e8e3169 1308 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1309
1310 if name in self._var_names:
0e8e3169 1311 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1312
1313 # Add to known label names
1314 self._label_names.add(name)
1315
1316 # Return item
0e8e3169 1317 return _Label(name, begin_text_loc)
71aaa3f7 1318
2adf4336 1319 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1320 _label_set_offset_prefix_pat = re.compile(r"<")
1321 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1322
2adf4336
PP
1323 # Tries to parse a label or an offset setting, returning an item on
1324 # success.
1325 def _try_parse_label_or_set_offset(self):
71aaa3f7 1326 # Match prefix
2adf4336 1327 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1328 # No match
1329 return
1330
2adf4336 1331 # Offset setting item?
ba11fb1d 1332 self._skip_ws_and_comments()
2adf4336 1333 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1334
1335 if item is None:
1336 # Label item?
1337 item = self._try_parse_label_name()
1338
1339 if item is None:
1340 # At this point it's invalid
2adf4336 1341 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1342
1343 # Expect suffix
ba11fb1d 1344 self._skip_ws_and_comments()
2adf4336 1345 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1346 return item
1347
25ca454b
PP
1348 # Pattern for _parse_pad_val()
1349 _pad_val_prefix_pat = re.compile(r"~")
1350
1351 # Tries to parse a padding value, returning the padding value, or 0
1352 # if none.
1353 def _parse_pad_val(self):
1354 # Padding value?
ba11fb1d 1355 self._skip_ws_and_comments()
25ca454b
PP
1356 pad_val = 0
1357
1358 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
ba11fb1d 1359 self._skip_ws_and_comments()
25ca454b
PP
1360 pad_val_text_loc = self._text_loc
1361 m = self._expect_pat(
b2410769 1362 _pos_const_int_pat,
25ca454b
PP
1363 "Expecting a positive constant integer (byte value)",
1364 )
1365
1366 # Validate
b2410769 1367 pad_val = int(_norm_const_int(m.group(0)), 0)
25ca454b
PP
1368
1369 if pad_val > 255:
1370 _raise_error(
1371 "Invalid padding byte value {}".format(pad_val),
1372 pad_val_text_loc,
1373 )
1374
1375 return pad_val
1376
676f6189 1377 # Patterns for _try_parse_align_offset()
320644e2
PP
1378 _align_offset_prefix_pat = re.compile(r"@")
1379 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1380
1381 # Tries to parse an offset alignment, returning an offset alignment
1382 # item on success.
1383 def _try_parse_align_offset(self):
1384 begin_text_loc = self._text_loc
1385
1386 # Match prefix
1387 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1388 # No match
1389 return
1390
320644e2 1391 # Expect an alignment
ba11fb1d 1392 self._skip_ws_and_comments()
676f6189
PP
1393 align_text_loc = self._text_loc
1394 m = self._expect_pat(
1395 self._align_offset_val_pat,
1396 "Expecting an alignment (positive multiple of eight bits)",
1397 )
1398
1399 # Validate alignment
320644e2 1400 val = int(m.group(0))
676f6189
PP
1401
1402 if val <= 0 or (val % 8) != 0:
1403 _raise_error(
1404 "Invalid alignment value {} (not a positive multiple of eight)".format(
1405 val
1406 ),
1407 align_text_loc,
1408 )
1409
25ca454b
PP
1410 # Padding value
1411 pad_val = self._parse_pad_val()
676f6189 1412
25ca454b
PP
1413 # Return item
1414 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1415
dbd84e74
PP
1416 # Patterns for _expect_expr()
1417 _inner_expr_prefix_pat = re.compile(r"\{")
1418 _inner_expr_pat = re.compile(r"[^}]+")
1419 _inner_expr_suffix_pat = re.compile(r"\}")
dbd84e74
PP
1420
1421 # Parses an expression outside a `{`/`}` context.
1422 #
1423 # This function accepts:
1424 #
1425 # • A Python expression within `{` and `}`.
1426 #
1427 # • A Python name.
1428 #
1429 # • If `accept_const_int` is `True`: a constant integer, which may
1430 # be negative if `allow_neg_int` is `True`.
1431 #
1432 # • If `accept_float` is `True`: a constant floating point number.
1433 #
1434 # Returns the stripped expression string and AST expression.
1435 def _expect_expr(
1436 self,
1437 accept_const_int: bool = False,
1438 allow_neg_int: bool = False,
1439 accept_const_float: bool = False,
7a7b31e8 1440 accept_lit_str: bool = False,
dbd84e74
PP
1441 ):
1442 begin_text_loc = self._text_loc
1443
1444 # Constant floating point number?
dbd84e74 1445 if accept_const_float:
b2410769 1446 m = self._try_parse_pat(_const_float_pat)
dbd84e74
PP
1447
1448 if m is not None:
1449 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1450
1451 # Constant integer?
dbd84e74 1452 if accept_const_int:
b2410769 1453 m = self._try_parse_pat(_const_int_pat)
dbd84e74
PP
1454
1455 if m is not None:
1456 # Negative and allowed?
1457 if m.group("neg") == "-" and not allow_neg_int:
1458 _raise_error(
1459 "Expecting a positive constant integer", begin_text_loc
1460 )
1461
b2410769 1462 expr_str = _norm_const_int(m.group(0))
dbd84e74
PP
1463 return self._ast_expr_from_str(expr_str, begin_text_loc)
1464
1465 # Name?
1466 m = self._try_parse_pat(_py_name_pat)
1467
1468 if m is not None:
1469 return self._ast_expr_from_str(m.group(0), begin_text_loc)
1470
7a7b31e8
PP
1471 # Literal string
1472 if accept_lit_str:
1473 val = self._try_parse_lit_str(True)
1474
1475 if val is not None:
1476 return self._ast_expr_from_str(repr(val), begin_text_loc)
1477
dbd84e74
PP
1478 # Expect `{`
1479 msg_accepted_parts = ["a name", "or `{`"]
1480
7a7b31e8
PP
1481 if accept_lit_str:
1482 msg_accepted_parts.insert(0, "a literal string")
1483
dbd84e74
PP
1484 if accept_const_float:
1485 msg_accepted_parts.insert(0, "a constant floating point number")
1486
1487 if accept_const_int:
1488 msg_pos = "" if allow_neg_int else "positive "
1489 msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos))
1490
1491 if len(msg_accepted_parts) == 2:
1492 msg_accepted = " ".join(msg_accepted_parts)
1493 else:
1494 msg_accepted = ", ".join(msg_accepted_parts)
1495
1496 self._expect_pat(
1497 self._inner_expr_prefix_pat,
1498 "Expecting {}".format(msg_accepted),
1499 )
1500
1501 # Expect an expression
ba11fb1d 1502 self._skip_ws_and_comments()
dbd84e74
PP
1503 expr_text_loc = self._text_loc
1504 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1505 expr_str = m.group(0)
1506
1507 # Expect `}`
ba11fb1d 1508 self._skip_ws_and_comments()
dbd84e74
PP
1509 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1510
1511 return self._ast_expr_from_str(expr_str, expr_text_loc)
1512
25ca454b
PP
1513 # Patterns for _try_parse_fill_until()
1514 _fill_until_prefix_pat = re.compile(r"\+")
1515 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1516
25ca454b
PP
1517 # Tries to parse a filling, returning a filling item on success.
1518 def _try_parse_fill_until(self):
1519 begin_text_loc = self._text_loc
1520
1521 # Match prefix
1522 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1523 # No match
1524 return
1525
1526 # Expect expression
ba11fb1d 1527 self._skip_ws_and_comments()
dbd84e74 1528 expr_str, expr = self._expect_expr(accept_const_int=True)
25ca454b
PP
1529
1530 # Padding value
1531 pad_val = self._parse_pad_val()
676f6189
PP
1532
1533 # Return item
25ca454b 1534 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1535
27d52a19
PP
1536 # Parses the multiplier expression of a repetition (block or
1537 # post-item) and returns the expression string and AST node.
1538 def _expect_rep_mul_expr(self):
dbd84e74 1539 return self._expect_expr(accept_const_int=True)
27d52a19
PP
1540
1541 # Common block end pattern
320644e2 1542 _block_end_pat = re.compile(r"!end\b")
27d52a19 1543
e57a18e1 1544 # Pattern for _try_parse_rep_block()
320644e2 1545 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1546
1547 # Tries to parse a repetition block, returning a repetition item on
1548 # success.
1549 def _try_parse_rep_block(self):
1550 begin_text_loc = self._text_loc
1551
1552 # Match prefix
1553 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1554 # No match
1555 return
1556
1557 # Expect expression
1558 self._skip_ws_and_comments()
1559 expr_str, expr = self._expect_rep_mul_expr()
1560
1561 # Parse items
ba11fb1d 1562 self._skip_ws_and_comments_and_syms()
e57a18e1
PP
1563 items_text_loc = self._text_loc
1564 items = self._parse_items()
1565
1566 # Expect end of block
ba11fb1d 1567 self._skip_ws_and_comments_and_syms()
e57a18e1 1568 self._expect_pat(
27d52a19 1569 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1570 )
1571
1572 # Return item
1573 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1574
27d52a19 1575 # Pattern for _try_parse_cond_block()
320644e2 1576 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1577 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1578
1579 # Tries to parse a conditional block, returning a conditional item
1580 # on success.
1581 def _try_parse_cond_block(self):
1582 begin_text_loc = self._text_loc
1583
1584 # Match prefix
1585 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1586 # No match
1587 return
1588
1589 # Expect expression
1590 self._skip_ws_and_comments()
dbd84e74 1591 expr_str, expr = self._expect_expr()
27d52a19 1592
12b5dbc0 1593 # Parse "true" items
ba11fb1d 1594 self._skip_ws_and_comments_and_syms()
12b5dbc0
PP
1595 true_items_text_loc = self._text_loc
1596 true_items = self._parse_items()
1597 false_items = [] # type: List[_Item]
1598 false_items_text_loc = begin_text_loc
27d52a19 1599
12b5dbc0 1600 # `!else`?
ba11fb1d 1601 self._skip_ws_and_comments_and_syms()
12b5dbc0
PP
1602
1603 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1604 # Parse "false" items
ba11fb1d 1605 self._skip_ws_and_comments_and_syms()
12b5dbc0
PP
1606 false_items_text_loc = self._text_loc
1607 false_items = self._parse_items()
1608
1609 # Expect end of block
27d52a19
PP
1610 self._expect_pat(
1611 self._block_end_pat,
12b5dbc0 1612 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1613 )
1614
1615 # Return item
12b5dbc0
PP
1616 return _Cond(
1617 _Group(true_items, true_items_text_loc),
1618 _Group(false_items, false_items_text_loc),
1619 expr_str,
1620 expr,
1621 begin_text_loc,
1622 )
27d52a19 1623
320644e2
PP
1624 # Common left parenthesis pattern
1625 _left_paren_pat = re.compile(r"\(")
1626
1627 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1628 _macro_params_comma_pat = re.compile(",")
1629
1630 # Patterns for _try_parse_macro_def()
1631 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1632
1633 # Tries to parse a macro definition, adding it to `self._macro_defs`
1634 # and returning `True` on success.
1635 def _try_parse_macro_def(self):
1636 begin_text_loc = self._text_loc
1637
1638 # Match prefix
1639 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1640 # No match
1641 return False
1642
1643 # Expect a name
ba11fb1d 1644 self._skip_ws_and_comments()
320644e2
PP
1645 name_text_loc = self._text_loc
1646 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1647
1648 # Validate name
1649 name = m.group(0)
1650
1651 if name in self._macro_defs:
1652 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1653
1654 # Expect `(`
ba11fb1d 1655 self._skip_ws_and_comments()
320644e2
PP
1656 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1657
1658 # Try to parse comma-separated parameter names
1659 param_names = [] # type: List[str]
1660 expect_comma = False
1661
1662 while True:
ba11fb1d 1663 self._skip_ws_and_comments()
320644e2
PP
1664
1665 # End?
1666 if self._try_parse_pat(self._right_paren_pat) is not None:
1667 # End
1668 break
1669
1670 # Comma?
1671 if expect_comma:
1672 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1673
1674 # Expect parameter name
ba11fb1d 1675 self._skip_ws_and_comments()
320644e2
PP
1676 param_text_loc = self._text_loc
1677 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1678
1679 if m.group(0) in param_names:
1680 _raise_error(
1681 "Duplicate macro parameter named `{}`".format(m.group(0)),
1682 param_text_loc,
1683 )
1684
1685 param_names.append(m.group(0))
1686 expect_comma = True
1687
1688 # Expect items
ba11fb1d 1689 self._skip_ws_and_comments_and_syms()
320644e2
PP
1690 items_text_loc = self._text_loc
1691 old_var_names = self._var_names.copy()
1692 old_label_names = self._label_names.copy()
1693 self._var_names = set() # type: Set[str]
1694 self._label_names = set() # type: Set[str]
1695 items = self._parse_items()
1696 self._var_names = old_var_names
1697 self._label_names = old_label_names
1698
1699 # Expect suffix
1700 self._expect_pat(
1701 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1702 )
1703
1704 # Register macro
1705 self._macro_defs[name] = _MacroDef(
1706 name, param_names, _Group(items, items_text_loc), begin_text_loc
1707 )
1708
1709 return True
1710
1711 # Patterns for _try_parse_macro_exp()
1712 _macro_exp_prefix_pat = re.compile(r"m\b")
1713 _macro_exp_colon_pat = re.compile(r":")
1714
1715 # Tries to parse a macro expansion, returning a macro expansion item
1716 # on success.
1717 def _try_parse_macro_exp(self):
1718 begin_text_loc = self._text_loc
1719
1720 # Match prefix
1721 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1722 # No match
1723 return
1724
1725 # Expect `:`
ba11fb1d 1726 self._skip_ws_and_comments()
320644e2
PP
1727 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1728
1729 # Expect a macro name
ba11fb1d 1730 self._skip_ws_and_comments()
320644e2
PP
1731 name_text_loc = self._text_loc
1732 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1733
1734 # Validate name
1735 name = m.group(0)
1736 macro_def = self._macro_defs.get(name)
1737
1738 if macro_def is None:
1739 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1740
1741 # Expect `(`
ba11fb1d 1742 self._skip_ws_and_comments()
320644e2
PP
1743 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1744
1745 # Try to parse comma-separated parameter values
1746 params_text_loc = self._text_loc
1747 params = [] # type: List[_MacroExpParam]
1748 expect_comma = False
1749
1750 while True:
ba11fb1d 1751 self._skip_ws_and_comments()
320644e2
PP
1752
1753 # End?
1754 if self._try_parse_pat(self._right_paren_pat) is not None:
1755 # End
1756 break
1757
7a7b31e8 1758 # Expect a value
320644e2
PP
1759 if expect_comma:
1760 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1761
ba11fb1d 1762 self._skip_ws_and_comments()
320644e2
PP
1763 param_text_loc = self._text_loc
1764 params.append(
1765 _MacroExpParam(
dbd84e74
PP
1766 *self._expect_expr(
1767 accept_const_int=True,
1768 allow_neg_int=True,
1769 accept_const_float=True,
7a7b31e8 1770 accept_lit_str=True,
dbd84e74 1771 ),
6dd69a2a 1772 text_loc=param_text_loc
320644e2
PP
1773 )
1774 )
1775 expect_comma = True
1776
1777 # Validate parameter values
1778 if len(params) != len(macro_def.param_names):
1779 sing_plur = "" if len(params) == 1 else "s"
1780 _raise_error(
1781 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1782 len(params), sing_plur, len(macro_def.param_names)
1783 ),
1784 params_text_loc,
1785 )
1786
1787 # Return item
1788 return _MacroExp(name, params, begin_text_loc)
1789
71aaa3f7
PP
1790 # Tries to parse a base item (anything except a repetition),
1791 # returning it on success.
1792 def _try_parse_base_item(self):
1793 # Byte item?
1794 item = self._try_parse_byte()
1795
1796 if item is not None:
1797 return item
1798
1799 # String item?
1800 item = self._try_parse_str()
1801
1802 if item is not None:
1803 return item
1804
2adf4336
PP
1805 # Value, variable assignment, or byte order setting item?
1806 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1807
1808 if item is not None:
1809 return item
1810
2adf4336
PP
1811 # Label or offset setting item?
1812 item = self._try_parse_label_or_set_offset()
71aaa3f7 1813
676f6189
PP
1814 if item is not None:
1815 return item
1816
1817 # Offset alignment item?
1818 item = self._try_parse_align_offset()
1819
25ca454b
PP
1820 if item is not None:
1821 return item
1822
1823 # Filling item?
1824 item = self._try_parse_fill_until()
1825
71aaa3f7
PP
1826 if item is not None:
1827 return item
1828
1829 # Group item?
1830 item = self._try_parse_group()
1831
1832 if item is not None:
1833 return item
1834
320644e2 1835 # Repetition block item?
e57a18e1 1836 item = self._try_parse_rep_block()
71aaa3f7 1837
e57a18e1
PP
1838 if item is not None:
1839 return item
1840
27d52a19
PP
1841 # Conditional block item?
1842 item = self._try_parse_cond_block()
1843
1844 if item is not None:
1845 return item
1846
320644e2
PP
1847 # Macro expansion?
1848 item = self._try_parse_macro_exp()
1849
1850 if item is not None:
1851 return item
1852
e57a18e1
PP
1853 # Pattern for _try_parse_rep_post()
1854 _rep_post_prefix_pat = re.compile(r"\*")
1855
1856 # Tries to parse a post-item repetition, returning the expression
1857 # string and AST expression node on success.
1858 def _try_parse_rep_post(self):
71aaa3f7 1859 # Match prefix
e57a18e1 1860 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1861 # No match
2adf4336 1862 return
71aaa3f7 1863
e57a18e1 1864 # Return expression string and AST expression
71aaa3f7 1865 self._skip_ws_and_comments()
e57a18e1 1866 return self._expect_rep_mul_expr()
71aaa3f7 1867
1ca7b5e1
PP
1868 # Tries to parse an item, possibly followed by a repetition,
1869 # returning `True` on success.
1870 #
1871 # Appends any parsed item to `items`.
1872 def _try_append_item(self, items: List[_Item]):
ba11fb1d 1873 self._skip_ws_and_comments_and_syms()
71aaa3f7 1874
320644e2 1875 # Base item
71aaa3f7
PP
1876 item = self._try_parse_base_item()
1877
1878 if item is None:
320644e2 1879 return
71aaa3f7
PP
1880
1881 # Parse repetition if the base item is repeatable
1882 if isinstance(item, _RepableItem):
0e8e3169
PP
1883 self._skip_ws_and_comments()
1884 rep_text_loc = self._text_loc
e57a18e1 1885 rep_ret = self._try_parse_rep_post()
71aaa3f7 1886
2adf4336 1887 if rep_ret is not None:
6dd69a2a 1888 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
71aaa3f7 1889
1ca7b5e1
PP
1890 items.append(item)
1891 return True
71aaa3f7
PP
1892
1893 # Parses and returns items, skipping whitespaces, insignificant
1894 # symbols, and comments when allowed, and stopping at the first
1895 # unknown character.
320644e2
PP
1896 #
1897 # Accepts and registers macro definitions if `accept_macro_defs`
1898 # is `True`.
1899 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1900 items = [] # type: List[_Item]
1901
1902 while self._isnt_done():
1ca7b5e1
PP
1903 # Try to append item
1904 if not self._try_append_item(items):
320644e2
PP
1905 if accept_macro_defs and self._try_parse_macro_def():
1906 continue
1907
1ca7b5e1
PP
1908 # Unknown at this point
1909 break
71aaa3f7
PP
1910
1911 return items
1912
1913 # Parses the whole Normand input, setting `self._res` to the main
1914 # group item on success.
1915 def _parse(self):
1916 if len(self._normand.strip()) == 0:
1917 # Special case to make sure there's something to consume
1918 self._res = _Group([], self._text_loc)
1919 return
1920
1921 # Parse first level items
320644e2 1922 items = self._parse_items(True)
71aaa3f7
PP
1923
1924 # Make sure there's nothing left
ba11fb1d 1925 self._skip_ws_and_comments_and_syms()
71aaa3f7
PP
1926
1927 if self._isnt_done():
1928 self._raise_error(
1929 "Unexpected character `{}`".format(self._normand[self._at])
1930 )
1931
1932 # Set main group item
1933 self._res = _Group(items, self._text_loc)
1934
1935
1936# The return type of parse().
1937class ParseResult:
1938 @classmethod
1939 def _create(
1940 cls,
1941 data: bytearray,
e57a18e1
PP
1942 variables: VariablesT,
1943 labels: LabelsT,
71aaa3f7
PP
1944 offset: int,
1945 bo: Optional[ByteOrder],
1946 ):
1947 self = cls.__new__(cls)
1948 self._init(data, variables, labels, offset, bo)
1949 return self
1950
1951 def __init__(self, *args, **kwargs): # type: ignore
1952 raise NotImplementedError
1953
1954 def _init(
1955 self,
1956 data: bytearray,
e57a18e1
PP
1957 variables: VariablesT,
1958 labels: LabelsT,
71aaa3f7
PP
1959 offset: int,
1960 bo: Optional[ByteOrder],
1961 ):
1962 self._data = data
1963 self._vars = variables
1964 self._labels = labels
1965 self._offset = offset
1966 self._bo = bo
1967
1968 # Generated data.
1969 @property
1970 def data(self):
1971 return self._data
1972
1973 # Dictionary of updated variable names to their last computed value.
1974 @property
1975 def variables(self):
1976 return self._vars
1977
1978 # Dictionary of updated main group label names to their computed
1979 # value.
1980 @property
1981 def labels(self):
1982 return self._labels
1983
1984 # Updated offset.
1985 @property
1986 def offset(self):
1987 return self._offset
1988
1989 # Updated byte order.
1990 @property
1991 def byte_order(self):
1992 return self._bo
1993
1994
1995# Raises a parse error for the item `item`, creating it using the
1996# message `msg`.
1997def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1998 _raise_error(msg, item.text_loc)
1999
2000
2001# The `ICITTE` reserved name.
2002_icitte_name = "ICITTE"
2003
2004
2adf4336
PP
2005# Base node visitor.
2006#
2007# Calls the _visit_name() method for each name node which isn't the name
2008# of a call.
2009class _NodeVisitor(ast.NodeVisitor):
2010 def __init__(self):
71aaa3f7
PP
2011 self._parent_is_call = False
2012
2013 def generic_visit(self, node: ast.AST):
2014 if type(node) is ast.Call:
2015 self._parent_is_call = True
2016 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 2017 self._visit_name(node.id)
71aaa3f7
PP
2018
2019 super().generic_visit(node)
2020 self._parent_is_call = False
2021
2adf4336
PP
2022 @abc.abstractmethod
2023 def _visit_name(self, name: str):
2024 ...
2025
71aaa3f7 2026
2adf4336
PP
2027# Expression validator: validates that all the names within the
2028# expression are allowed.
2029class _ExprValidator(_NodeVisitor):
320644e2 2030 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 2031 super().__init__()
320644e2
PP
2032 self._expr_str = expr_str
2033 self._text_loc = text_loc
2adf4336 2034 self._allowed_names = allowed_names
2adf4336
PP
2035
2036 def _visit_name(self, name: str):
2037 # Make sure the name refers to a known and reachable
2038 # variable/label name.
e57a18e1 2039 if name != _icitte_name and name not in self._allowed_names:
2adf4336 2040 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 2041 name, self._expr_str
2adf4336
PP
2042 )
2043
05f81895 2044 allowed_names = self._allowed_names.copy()
e57a18e1 2045 allowed_names.add(_icitte_name)
2adf4336 2046
05f81895 2047 if len(allowed_names) > 0:
2adf4336
PP
2048 allowed_names_str = ", ".join(
2049 sorted(["`{}`".format(name) for name in allowed_names])
2050 )
2051 msg += "; the legal names are {{{}}}".format(allowed_names_str)
2052
2053 _raise_error(
2054 msg,
320644e2 2055 self._text_loc,
2adf4336
PP
2056 )
2057
2058
2adf4336
PP
2059# Generator state.
2060class _GenState:
2061 def __init__(
1b8aa84a 2062 self,
e57a18e1
PP
2063 variables: VariablesT,
2064 labels: LabelsT,
1b8aa84a
PP
2065 offset: int,
2066 bo: Optional[ByteOrder],
2adf4336
PP
2067 ):
2068 self.variables = variables.copy()
2069 self.labels = labels.copy()
2070 self.offset = offset
2071 self.bo = bo
71aaa3f7 2072
320644e2
PP
2073 def __repr__(self):
2074 return "_GenState({}, {}, {}, {})".format(
2075 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
2076 )
2077
2078
2079# Fixed-length number item instance.
2080class _FlNumItemInst:
f5dcb24c
PP
2081 def __init__(
2082 self,
2083 item: _FlNum,
2084 offset_in_data: int,
2085 state: _GenState,
2086 parse_error_msgs: List[ParseErrorMessage],
2087 ):
320644e2
PP
2088 self._item = item
2089 self._offset_in_data = offset_in_data
2090 self._state = state
f5dcb24c 2091 self._parse_error_msgs = parse_error_msgs
320644e2
PP
2092
2093 @property
2094 def item(self):
2095 return self._item
2096
2097 @property
2098 def offset_in_data(self):
2099 return self._offset_in_data
2100
2101 @property
2102 def state(self):
2103 return self._state
2104
f5dcb24c
PP
2105 @property
2106 def parse_error_msgs(self):
2107 return self._parse_error_msgs
2108
71aaa3f7 2109
2adf4336 2110# Generator of data and final state from a group item.
71aaa3f7
PP
2111#
2112# Generation happens in memory at construction time. After building, use
2113# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
2114# get the resulting context.
2adf4336
PP
2115#
2116# The steps of generation are:
2117#
320644e2
PP
2118# 1. Handle each item in prefix order.
2119#
2120# The handlers append bytes to `self._data` and update some current
2121# state object (`_GenState` instance).
2122#
2123# When handling a fixed-length number item, try to evaluate its
2124# expression using the current state. If this fails, then it might be
2125# because the expression refers to a "future" label: save the current
2126# offset in `self._data` (generated data) and a snapshot of the
2127# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
f5dcb24c
PP
2128# object). _gen_fl_num_item_insts() will deal with this later. A
2129# `_FlNumItemInst` instance also contains a snapshot of the current
2130# parsing error messages (`self._parse_error_msgs`) which need to be
2131# taken into account when handling the instance later.
2adf4336 2132#
320644e2
PP
2133# When handling the items of a group, keep a map of immediate label
2134# names to their offset. Then, after having processed all the items,
2135# update the relevant saved state snapshots in
2136# `self._fl_num_item_insts` with those immediate label values.
2137# _gen_fl_num_item_insts() will deal with this later.
2adf4336 2138#
320644e2
PP
2139# 2. Handle all the fixed-length number item instances of which the
2140# expression evaluation failed before.
2adf4336 2141#
320644e2
PP
2142# At this point, `self._fl_num_item_insts` contains everything that's
2143# needed to evaluate the expressions, including the values of
2144# "future" labels from the point of view of some fixed-length number
2145# item instance.
2adf4336 2146#
f5dcb24c
PP
2147# If an evaluation fails at this point, then it's a user error. Add
2148# to the parsing error all the saved parsing error messages of the
2149# instance. Those additional messages add precious context to the
2150# error.
71aaa3f7
PP
2151class _Gen:
2152 def __init__(
2153 self,
2154 group: _Group,
320644e2 2155 macro_defs: _MacroDefsT,
e57a18e1
PP
2156 variables: VariablesT,
2157 labels: LabelsT,
71aaa3f7
PP
2158 offset: int,
2159 bo: Optional[ByteOrder],
2160 ):
320644e2
PP
2161 self._macro_defs = macro_defs
2162 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
f5dcb24c 2163 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2adf4336 2164 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
2165
2166 # Generated bytes.
2167 @property
2168 def data(self):
2169 return self._data
2170
2171 # Updated variables.
2172 @property
2173 def variables(self):
2adf4336 2174 return self._final_state.variables
71aaa3f7
PP
2175
2176 # Updated main group labels.
2177 @property
2178 def labels(self):
2adf4336 2179 return self._final_state.labels
71aaa3f7
PP
2180
2181 # Updated offset.
2182 @property
2183 def offset(self):
2adf4336 2184 return self._final_state.offset
71aaa3f7
PP
2185
2186 # Updated byte order.
2187 @property
2188 def bo(self):
2adf4336
PP
2189 return self._final_state.bo
2190
320644e2
PP
2191 # Evaluates the expression `expr` of which the original string is
2192 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
2193 # generation state `state`.
2194 #
7a7b31e8 2195 # If `accept_float` is `True`, then the type of the result may be
269f6eb3 2196 # `float` too.
7a7b31e8
PP
2197 #
2198 # If `accept_str` is `True`, then the type of the result may be
2199 # `str` too.
2adf4336 2200 @staticmethod
320644e2
PP
2201 def _eval_expr(
2202 expr_str: str,
2203 expr: ast.Expression,
2204 text_loc: TextLocation,
269f6eb3 2205 state: _GenState,
7a7b31e8
PP
2206 accept_float: bool = False,
2207 accept_str: bool = False,
269f6eb3 2208 ):
e57a18e1
PP
2209 syms = {} # type: VariablesT
2210 syms.update(state.labels)
71aaa3f7 2211
e57a18e1
PP
2212 # Set the `ICITTE` name to the current offset
2213 syms[_icitte_name] = state.offset
71aaa3f7
PP
2214
2215 # Add the current variables
2adf4336 2216 syms.update(state.variables)
71aaa3f7
PP
2217
2218 # Validate the node and its children
320644e2 2219 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
2220
2221 # Compile and evaluate expression node
2222 try:
320644e2 2223 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 2224 except Exception as exc:
320644e2
PP
2225 _raise_error(
2226 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2227 text_loc,
71aaa3f7
PP
2228 )
2229
27d52a19
PP
2230 # Convert `bool` result type to `int` to normalize
2231 if type(val) is bool:
2232 val = int(val)
2233
269f6eb3
PP
2234 # Validate result type
2235 expected_types = {int} # type: Set[type]
269f6eb3 2236
7a7b31e8 2237 if accept_float:
269f6eb3 2238 expected_types.add(float)
7a7b31e8
PP
2239
2240 if accept_str:
2241 expected_types.add(str)
269f6eb3
PP
2242
2243 if type(val) not in expected_types:
7a7b31e8
PP
2244 expected_types_str = sorted(
2245 ["`{}`".format(t.__name__) for t in expected_types]
2246 )
2247
2248 if len(expected_types_str) == 1:
2249 msg_expected = expected_types_str[0]
2250 elif len(expected_types_str) == 2:
2251 msg_expected = " or ".join(expected_types_str)
2252 else:
2253 expected_types_str[-1] = "or {}".format(expected_types_str[-1])
2254 msg_expected = ", ".join(expected_types_str)
2255
320644e2 2256 _raise_error(
269f6eb3 2257 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
7a7b31e8 2258 expr_str, msg_expected, type(val).__name__
71aaa3f7 2259 ),
320644e2 2260 text_loc,
71aaa3f7
PP
2261 )
2262
2263 return val
2264
7a7b31e8
PP
2265 # Forwards to _eval_expr() with the expression and text location of
2266 # `item`.
320644e2
PP
2267 @staticmethod
2268 def _eval_item_expr(
7a7b31e8 2269 item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign],
320644e2 2270 state: _GenState,
7a7b31e8
PP
2271 accept_float: bool = False,
2272 accept_str: bool = False,
320644e2
PP
2273 ):
2274 return _Gen._eval_expr(
7a7b31e8 2275 item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str
320644e2
PP
2276 )
2277
2278 # Handles the byte item `item`.
2279 def _handle_byte_item(self, item: _Byte, state: _GenState):
2280 self._data.append(item.val)
2281 state.offset += item.size
2282
7a7b31e8
PP
2283 # Handles the literal string item `item`.
2284 def _handle_lit_str_item(self, item: _LitStr, state: _GenState):
320644e2
PP
2285 self._data += item.data
2286 state.offset += item.size
2287
2288 # Handles the byte order setting item `item`.
2289 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2290 # Update current byte order
2291 state.bo = item.bo
2292
2293 # Handles the variable assignment item `item`.
2294 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2295 # Update variable
7a7b31e8
PP
2296 state.variables[item.name] = self._eval_item_expr(
2297 item, state, accept_float=True, accept_str=True
2298 )
320644e2
PP
2299
2300 # Handles the fixed-length number item `item`.
2301 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2302 # Validate current byte order
2303 if state.bo is None and item.len > 8:
2304 _raise_error_for_item(
2305 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2306 item.expr_str
2307 ),
2308 item,
2309 )
2310
2311 # Try an immediate evaluation. If it fails, then keep everything
2312 # needed to (try to) generate the bytes of this item later.
2313 try:
2314 data = self._gen_fl_num_item_inst_data(item, state)
2315 except Exception:
2316 self._fl_num_item_insts.append(
f5dcb24c
PP
2317 _FlNumItemInst(
2318 item,
2319 len(self._data),
2320 copy.deepcopy(state),
2321 copy.deepcopy(self._parse_error_msgs),
2322 )
320644e2
PP
2323 )
2324
2325 # Reserve space in `self._data` for this instance
2326 data = bytes([0] * (item.len // 8))
2327
2328 # Append bytes
2329 self._data += data
2330
2331 # Update offset
2332 state.offset += len(data)
2333
05f81895
PP
2334 # Returns the size, in bytes, required to encode the value `val`
2335 # with LEB128 (signed version if `is_signed` is `True`).
2336 @staticmethod
2337 def _leb128_size_for_val(val: int, is_signed: bool):
2338 if val < 0:
2339 # Equivalent upper bound.
2340 #
2341 # For example, if `val` is -128, then the full integer for
2342 # this number of bits would be [-128, 127].
2343 val = -val - 1
2344
2345 # Number of bits (add one for the sign if needed)
2346 bits = val.bit_length() + int(is_signed)
2347
2348 if bits == 0:
2349 bits = 1
2350
2351 # Seven bits per byte
2352 return math.ceil(bits / 7)
2353
320644e2
PP
2354 # Handles the LEB128 integer item `item`.
2355 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2356 # Compute value
7a7b31e8 2357 val = self._eval_item_expr(item, state)
676f6189 2358
320644e2
PP
2359 # Size in bytes
2360 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2361
320644e2
PP
2362 # For each byte
2363 for _ in range(size):
2364 # Seven LSBs, MSB of the byte set (continue)
2365 self._data.append((val & 0x7F) | 0x80)
2366 val >>= 7
2adf4336 2367
320644e2
PP
2368 # Clear MSB of last byte (stop)
2369 self._data[-1] &= ~0x80
2adf4336 2370
320644e2
PP
2371 # Update offset
2372 state.offset += size
27d52a19 2373
7a7b31e8
PP
2374 # Handles the string item `item`.
2375 def _handle_str_item(self, item: _Str, state: _GenState):
2376 # Compute value
2377 val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True))
2378
2379 # Encode
2380 data = _encode_str(val, item.codec, item.text_loc)
2381
2382 # Add to data
2383 self._data += data
2384
2385 # Update offset
2386 state.offset += len(data)
2387
320644e2
PP
2388 # Handles the group item `item`, removing the immediate labels from
2389 # `state` at the end if `remove_immediate_labels` is `True`.
2390 def _handle_group_item(
2391 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2392 ):
2393 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2394 immediate_labels = {} # type: LabelsT
27d52a19 2395
320644e2
PP
2396 # Handle each item
2397 for subitem in item.items:
2398 if type(subitem) is _Label:
2399 # Add to local immediate labels
2400 immediate_labels[subitem.name] = state.offset
2adf4336 2401
320644e2 2402 self._handle_item(subitem, state)
2adf4336 2403
320644e2
PP
2404 # Remove immediate labels from current state if needed
2405 if remove_immediate_labels:
2406 for name in immediate_labels:
2407 del state.labels[name]
2adf4336 2408
320644e2
PP
2409 # Add all immediate labels to all state snapshots since
2410 # `first_fl_num_item_inst_index`.
2411 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2412 inst.state.labels.update(immediate_labels)
2adf4336 2413
320644e2
PP
2414 # Handles the repetition item `item`.
2415 def _handle_rep_item(self, item: _Rep, state: _GenState):
2416 # Compute the repetition count
2417 mul = _Gen._eval_item_expr(item, state)
05f81895 2418
320644e2
PP
2419 # Validate result
2420 if mul < 0:
2421 _raise_error_for_item(
2422 "Invalid expression `{}`: unexpected negative result {:,}".format(
2423 item.expr_str, mul
2424 ),
2425 item,
2426 )
2adf4336 2427
320644e2
PP
2428 # Generate item data `mul` times
2429 for _ in range(mul):
2430 self._handle_item(item.item, state)
2adf4336 2431
320644e2 2432 # Handles the conditional item `item`.
12b5dbc0 2433 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2434 # Compute the conditional value
2435 val = _Gen._eval_item_expr(item, state)
2adf4336 2436
320644e2
PP
2437 # Generate item data if needed
2438 if val:
12b5dbc0
PP
2439 self._handle_item(item.true_item, state)
2440 else:
2441 self._handle_item(item.false_item, state)
2adf4336 2442
320644e2
PP
2443 # Evaluates the parameters of the macro expansion item `item`
2444 # considering the initial state `init_state` and returns a new state
2445 # to handle the items of the macro.
2446 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2447 # New state
2448 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2449
320644e2
PP
2450 # Evaluate the parameter expressions
2451 macro_def = self._macro_defs[item.name]
2adf4336 2452
320644e2
PP
2453 for param_name, param in zip(macro_def.param_names, item.params):
2454 exp_state.variables[param_name] = _Gen._eval_expr(
7a7b31e8
PP
2455 param.expr_str,
2456 param.expr,
2457 param.text_loc,
2458 init_state,
2459 accept_float=True,
2460 accept_str=True,
320644e2 2461 )
2adf4336 2462
320644e2 2463 return exp_state
2adf4336 2464
320644e2
PP
2465 # Handles the macro expansion item `item`.
2466 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
f5dcb24c 2467 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
27d52a19 2468
f5dcb24c
PP
2469 try:
2470 # New state
2471 exp_state = self._eval_macro_exp_params(item, state)
2472
2473 # Process the contained group
2474 init_data_size = len(self._data)
2475 parse_error_msg = (
2476 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2477 parse_error_msg_text, item.text_loc
2478 )
2479 )
2480 self._parse_error_msgs.append(parse_error_msg)
2481 self._handle_item(self._macro_defs[item.name].group, exp_state)
2482 self._parse_error_msgs.pop()
2483 except ParseError as exc:
2484 _augment_error(exc, parse_error_msg_text, item.text_loc)
27d52a19 2485
320644e2
PP
2486 # Update state offset and return
2487 state.offset += len(self._data) - init_data_size
676f6189 2488
320644e2
PP
2489 # Handles the offset setting item `item`.
2490 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2491 state.offset = item.val
2adf4336 2492
25ca454b 2493 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2494 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2495 init_offset = state.offset
2496 align_bytes = item.val // 8
2497 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2498 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2499
25ca454b
PP
2500 # Handles the filling item `item` (adds padding).
2501 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2502 # Compute the new offset
2503 new_offset = _Gen._eval_item_expr(item, state)
2504
2505 # Validate the new offset
2506 if new_offset < state.offset:
2507 _raise_error_for_item(
2508 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2509 item.expr_str, new_offset, state.offset
2510 ),
2511 item,
2512 )
2513
2514 # Fill
2515 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2516
2517 # Update offset
2518 state.offset = new_offset
2519
320644e2
PP
2520 # Handles the label item `item`.
2521 def _handle_label_item(self, item: _Label, state: _GenState):
2522 state.labels[item.name] = state.offset
2adf4336 2523
320644e2
PP
2524 # Handles the item `item`, returning the updated next repetition
2525 # instance.
2526 def _handle_item(self, item: _Item, state: _GenState):
2527 return self._item_handlers[type(item)](item, state)
71aaa3f7 2528
320644e2
PP
2529 # Generates the data for a fixed-length integer item instance having
2530 # the value `val` and returns it.
2531 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2532 # Validate range
2533 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2534 _raise_error_for_item(
320644e2
PP
2535 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2536 val, item.len, item.expr_str
71aaa3f7
PP
2537 ),
2538 item,
2539 )
2540
2541 # Encode result on 64 bits (to extend the sign bit whatever the
2542 # value of `item.len`).
71aaa3f7
PP
2543 data = struct.pack(
2544 "{}{}".format(
2adf4336 2545 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2546 "Q" if val >= 0 else "q",
2547 ),
2548 val,
2549 )
2550
2551 # Keep only the requested length
2552 len_bytes = item.len // 8
2553
2adf4336 2554 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2555 # Big endian: keep last bytes
2556 data = data[-len_bytes:]
2557 else:
2558 # Little endian: keep first bytes
2adf4336 2559 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2560 data = data[:len_bytes]
2561
320644e2
PP
2562 # Return data
2563 return data
269f6eb3 2564
320644e2
PP
2565 # Generates the data for a fixed-length floating point number item
2566 # instance having the value `val` and returns it.
2567 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2568 # Validate length
2569 if item.len not in (32, 64):
2570 _raise_error_for_item(
2571 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2572 item.len, val
2573 ),
2574 item,
2575 )
2576
320644e2
PP
2577 # Encode and return result
2578 return struct.pack(
269f6eb3
PP
2579 "{}{}".format(
2580 ">" if state.bo in (None, ByteOrder.BE) else "<",
2581 "f" if item.len == 32 else "d",
2582 ),
2583 val,
2584 )
2585
320644e2
PP
2586 # Generates the data for a fixed-length number item instance and
2587 # returns it.
2588 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2589 # Compute value
e57a18e1 2590 val = self._eval_item_expr(item, state, True)
269f6eb3 2591
269f6eb3
PP
2592 # Handle depending on type
2593 if type(val) is int:
320644e2 2594 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2595 else:
2596 assert type(val) is float
320644e2 2597 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2598
320644e2
PP
2599 # Generates the data for all the fixed-length number item instances
2600 # and writes it at the correct offset within `self._data`.
2601 def _gen_fl_num_item_insts(self):
2602 for inst in self._fl_num_item_insts:
2603 # Generate bytes
f5dcb24c
PP
2604 try:
2605 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2606 except ParseError as exc:
2607 # Add all the saved parse error messages for this
2608 # instance.
2609 for msg in reversed(inst.parse_error_msgs):
2610 _add_error_msg(exc, msg.text, msg.text_location)
2611
2612 raise
05f81895 2613
320644e2
PP
2614 # Insert bytes into `self._data`
2615 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2616
2617 # Generates the data (`self._data`) and final state
2618 # (`self._final_state`) from `group` and the initial state `state`.
2619 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2620 # Initial state
2621 self._data = bytearray()
71aaa3f7
PP
2622
2623 # Item handlers
2624 self._item_handlers = {
676f6189 2625 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2626 _Byte: self._handle_byte_item,
27d52a19 2627 _Cond: self._handle_cond_item,
25ca454b 2628 _FillUntil: self._handle_fill_until_item,
269f6eb3 2629 _FlNum: self._handle_fl_num_item,
71aaa3f7 2630 _Group: self._handle_group_item,
2adf4336 2631 _Label: self._handle_label_item,
7a7b31e8 2632 _LitStr: self._handle_lit_str_item,
320644e2 2633 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2634 _Rep: self._handle_rep_item,
2adf4336
PP
2635 _SetBo: self._handle_set_bo_item,
2636 _SetOffset: self._handle_set_offset_item,
05f81895 2637 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2638 _Str: self._handle_str_item,
05f81895 2639 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2640 _VarAssign: self._handle_var_assign_item,
320644e2 2641 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2642
2643 # Handle the group item, _not_ removing the immediate labels
2644 # because the `labels` property offers them.
320644e2 2645 self._handle_group_item(group, state, False)
2adf4336
PP
2646
2647 # This is actually the final state
2648 self._final_state = state
71aaa3f7 2649
320644e2
PP
2650 # Generate all the fixed-length number bytes now that we know
2651 # their full state
2652 self._gen_fl_num_item_insts()
2653
71aaa3f7
PP
2654
2655# Returns a `ParseResult` instance containing the bytes encoded by the
2656# input string `normand`.
2657#
2658# `init_variables` is a dictionary of initial variable names (valid
2659# Python names) to integral values. A variable name must not be the
2660# reserved name `ICITTE`.
2661#
2662# `init_labels` is a dictionary of initial label names (valid Python
2663# names) to integral values. A label name must not be the reserved name
2664# `ICITTE`.
2665#
2666# `init_offset` is the initial offset.
2667#
2668# `init_byte_order` is the initial byte order.
2669#
2670# Raises `ParseError` on any parsing error.
2671def parse(
2672 normand: str,
e57a18e1
PP
2673 init_variables: Optional[VariablesT] = None,
2674 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2675 init_offset: int = 0,
2676 init_byte_order: Optional[ByteOrder] = None,
2677):
2678 if init_variables is None:
2679 init_variables = {}
2680
2681 if init_labels is None:
2682 init_labels = {}
2683
320644e2 2684 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2685 gen = _Gen(
320644e2
PP
2686 parser.res,
2687 parser.macro_defs,
71aaa3f7
PP
2688 init_variables,
2689 init_labels,
2690 init_offset,
2691 init_byte_order,
2692 )
2693 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2694 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2695 )
2696
2697
f5dcb24c
PP
2698# Raises a command-line error with the message `msg`.
2699def _raise_cli_error(msg: str) -> NoReturn:
2700 raise RuntimeError("Command-line error: {}".format(msg))
2701
2702
b2410769
PP
2703# Returns the `int` or `float` value out of a CLI assignment value.
2704def _val_from_assign_val_str(s: str, is_label: bool):
2705 s = s.strip()
2706
2707 # Floating point number?
2708 if not is_label:
2709 m = _const_float_pat.fullmatch(s)
2710
2711 if m is not None:
2712 return float(m.group(0))
2713
2714 # Integer?
2715 m = _const_int_pat.fullmatch(s)
2716
2717 if m is not None:
2718 return int(_norm_const_int(m.group(0)), 0)
2719
2720 exp = "an integer" if is_label else "a number"
2721 _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp))
2722
2723
2724# Returns a dictionary of string to numbers from the list of strings
f5dcb24c 2725# `args` containing `NAME=VAL` entries.
7a7b31e8 2726def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool):
b2410769 2727 d = {} # type: VariablesT
f5dcb24c
PP
2728
2729 if args is None:
2730 return d
2731
2732 for arg in args:
7a7b31e8 2733 m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg)
f5dcb24c
PP
2734
2735 if m is None:
b2410769 2736 _raise_cli_error("Invalid assignment `{}`".format(arg))
f5dcb24c 2737
7a7b31e8
PP
2738 if is_str_only:
2739 val = m.group(2)
2740 else:
2741 val = _val_from_assign_val_str(m.group(2), is_label)
2742
2743 d[m.group(1)] = val
f5dcb24c
PP
2744
2745 return d
2746
2747
2748# Parses the command-line arguments and returns, in this order:
2749#
2750# 1. The input file path, or `None` if none.
2751# 2. The Normand input text.
2752# 3. The initial offset.
2753# 4. The initial byte order.
2754# 5. The initial variables.
2755# 6. The initial labels.
71aaa3f7
PP
2756def _parse_cli_args():
2757 import argparse
2758
2759 # Build parser
2760 ap = argparse.ArgumentParser()
2761 ap.add_argument(
2762 "--offset",
2763 metavar="OFFSET",
2764 action="store",
2765 type=int,
2766 default=0,
2767 help="initial offset (positive)",
2768 )
2769 ap.add_argument(
2770 "-b",
2771 "--byte-order",
2772 metavar="BO",
2773 choices=["be", "le"],
2774 type=str,
2775 help="initial byte order (`be` or `le`)",
2776 )
2777 ap.add_argument(
b2410769 2778 "-v",
71aaa3f7
PP
2779 "--var",
2780 metavar="NAME=VAL",
2781 action="append",
7a7b31e8
PP
2782 help="add an initial numeric variable (may be repeated)",
2783 )
2784 ap.add_argument(
2785 "-s",
2786 "--var-str",
2787 metavar="NAME=VAL",
2788 action="append",
2789 help="add an initial string variable (may be repeated)",
71aaa3f7
PP
2790 )
2791 ap.add_argument(
2792 "-l",
2793 "--label",
2794 metavar="NAME=VAL",
2795 action="append",
2796 help="add an initial label (may be repeated)",
2797 )
2798 ap.add_argument(
2799 "--version", action="version", version="Normand {}".format(__version__)
2800 )
2801 ap.add_argument(
2802 "path",
2803 metavar="PATH",
2804 action="store",
2805 nargs="?",
2806 help="input path (none means standard input)",
2807 )
2808
2809 # Parse
f5dcb24c 2810 args = ap.parse_args()
71aaa3f7
PP
2811
2812 # Read input
2813 if args.path is None:
2814 normand = sys.stdin.read()
2815 else:
2816 with open(args.path) as f:
2817 normand = f.read()
2818
2819 # Variables and labels
7a7b31e8
PP
2820 variables = _dict_from_arg(args.var, False, False)
2821 variables.update(_dict_from_arg(args.var_str, False, True))
2822 labels = _dict_from_arg(args.label, True, False)
71aaa3f7
PP
2823
2824 # Validate offset
2825 if args.offset < 0:
2826 _raise_cli_error("Invalid negative offset {}")
2827
2828 # Validate and set byte order
2829 bo = None # type: Optional[ByteOrder]
2830
2831 if args.byte_order is not None:
2832 if args.byte_order == "be":
2833 bo = ByteOrder.BE
2834 else:
2835 assert args.byte_order == "le"
2836 bo = ByteOrder.LE
2837
f5dcb24c 2838 # Return input and initial state
b2410769 2839 return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels)
71aaa3f7 2840
71aaa3f7 2841
f5dcb24c
PP
2842# CLI entry point without exception handling.
2843def _run_cli_with_args(
2844 normand: str,
2845 offset: int,
2846 bo: Optional[ByteOrder],
2847 variables: VariablesT,
2848 labels: LabelsT,
2849):
2850 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
71aaa3f7
PP
2851
2852
2853# Prints the exception message `msg` and exits with status 1.
2854def _fail(msg: str) -> NoReturn:
2855 if not msg.endswith("."):
2856 msg += "."
2857
f5dcb24c 2858 print(msg.strip(), file=sys.stderr)
71aaa3f7
PP
2859 sys.exit(1)
2860
2861
2862# CLI entry point.
2863def _run_cli():
2864 try:
f5dcb24c
PP
2865 args = _parse_cli_args()
2866 except Exception as exc:
2867 _fail(str(exc))
2868
2869 try:
2870 _run_cli_with_args(*args[1:])
2871 except ParseError as exc:
2872 import os.path
2873
2874 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2875 fail_msg = ""
2876
2877 for msg in reversed(exc.messages):
2878 fail_msg += "{}{}:{} - {}".format(
2879 prefix,
2880 msg.text_location.line_no,
2881 msg.text_location.col_no,
2882 msg.text,
2883 )
2884
2885 if fail_msg[-1] not in ".:;":
2886 fail_msg += "."
2887
2888 fail_msg += "\n"
2889
2890 _fail(fail_msg.strip())
71aaa3f7
PP
2891 except Exception as exc:
2892 _fail(str(exc))
2893
2894
2895if __name__ == "__main__":
2896 _run_cli()
This page took 0.146019 seconds and 4 git commands to generate.