Make it possible to specify more that one byte with `%`
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
6dd69a2a 33__version__ = "0.16.0"
71aaa3f7 34__all__ = [
320644e2
PP
35 "__author__",
36 "__version__",
71aaa3f7 37 "ByteOrder",
320644e2 38 "LabelsT",
71aaa3f7
PP
39 "parse",
40 "ParseError",
f5dcb24c 41 "ParseErrorMessage",
71aaa3f7 42 "ParseResult",
e57a18e1 43 "TextLocation",
e57a18e1 44 "VariablesT",
71aaa3f7
PP
45]
46
47import re
48import abc
49import ast
50import sys
320644e2 51import copy
71aaa3f7 52import enum
05f81895 53import math
71aaa3f7 54import struct
e57a18e1
PP
55import typing
56from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
71aaa3f7
PP
57
58
59# Text location (line and column numbers).
e57a18e1 60class TextLocation:
71aaa3f7
PP
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
2adf4336 84 def __repr__(self):
e57a18e1 85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
2adf4336 86
71aaa3f7
PP
87
88# Any item.
89class _Item:
e57a18e1 90 def __init__(self, text_loc: TextLocation):
71aaa3f7
PP
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
2adf4336
PP
98
99# Scalar item.
100class _ScalarItem(_Item):
71aaa3f7
PP
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108# A repeatable item.
2adf4336 109class _RepableItem:
71aaa3f7
PP
110 pass
111
112
113# Single byte.
2adf4336 114class _Byte(_ScalarItem, _RepableItem):
e57a18e1 115 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
676f6189 129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
71aaa3f7
PP
130
131
132# String.
2adf4336 133class _Str(_ScalarItem, _RepableItem):
e57a18e1 134 def __init__(self, data: bytes, text_loc: TextLocation):
71aaa3f7
PP
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
676f6189 148 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
71aaa3f7
PP
149
150
151# Byte order.
152@enum.unique
153class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
2adf4336
PP
161# Byte order setting.
162class _SetBo(_Item):
e57a18e1 163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
0e8e3169 164 super().__init__(text_loc)
71aaa3f7
PP
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
2adf4336 171 def __repr__(self):
676f6189 172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
71aaa3f7
PP
173
174
175# Label.
176class _Label(_Item):
e57a18e1 177 def __init__(self, name: str, text_loc: TextLocation):
71aaa3f7
PP
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
71aaa3f7 186 def __repr__(self):
676f6189 187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
71aaa3f7
PP
188
189
2adf4336
PP
190# Offset setting.
191class _SetOffset(_Item):
e57a18e1 192 def __init__(self, val: int, text_loc: TextLocation):
71aaa3f7
PP
193 super().__init__(text_loc)
194 self._val = val
195
676f6189 196 # Offset value (bytes).
71aaa3f7
PP
197 @property
198 def val(self):
199 return self._val
200
71aaa3f7 201 def __repr__(self):
676f6189
PP
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205# Offset alignment.
206class _AlignOffset(_Item):
e57a18e1 207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
676f6189
PP
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
71aaa3f7
PP
226
227
228# Mixin of containing an AST expression and its string.
229class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
25ca454b
PP
245# Fill until some offset.
246class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
2adf4336
PP
268# Variable assignment.
269class _VarAssign(_Item, _ExprMixin):
71aaa3f7 270 def __init__(
e57a18e1 271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
71aaa3f7
PP
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
71aaa3f7 282 def __repr__(self):
2adf4336 283 return "_VarAssign({}, {}, {}, {})".format(
676f6189
PP
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
71aaa3f7
PP
288 )
289
290
269f6eb3
PP
291# Fixed-length number, possibly needing more than one byte.
292class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7 293 def __init__(
e57a18e1 294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
71aaa3f7
PP
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
269f6eb3 310 return "_FlNum({}, {}, {}, {})".format(
676f6189
PP
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
71aaa3f7
PP
315 )
316
317
05f81895
PP
318# LEB128 integer.
319class _Leb128Int(_Item, _RepableItem, _ExprMixin):
e57a18e1 320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
05f81895
PP
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
676f6189 329 repr(self._text_loc),
05f81895
PP
330 )
331
332
333# Unsigned LEB128 integer.
334class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338# Signed LEB128 integer.
339class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
71aaa3f7 343# Group of items.
2adf4336 344class _Group(_Item, _RepableItem):
e57a18e1 345 def __init__(self, items: List[_Item], text_loc: TextLocation):
71aaa3f7
PP
346 super().__init__(text_loc)
347 self._items = items
71aaa3f7
PP
348
349 # Contained items.
350 @property
351 def items(self):
352 return self._items
353
71aaa3f7 354 def __repr__(self):
676f6189 355 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
71aaa3f7
PP
356
357
358# Repetition item.
2adf4336
PP
359class _Rep(_Item, _ExprMixin):
360 def __init__(
e57a18e1 361 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
2adf4336 362 ):
71aaa3f7 363 super().__init__(text_loc)
2adf4336 364 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 365 self._item = item
71aaa3f7
PP
366
367 # Item to repeat.
368 @property
369 def item(self):
370 return self._item
371
71aaa3f7 372 def __repr__(self):
2adf4336 373 return "_Rep({}, {}, {}, {})".format(
676f6189
PP
374 repr(self._item),
375 repr(self._expr_str),
376 repr(self._expr),
377 repr(self._text_loc),
71aaa3f7
PP
378 )
379
380
27d52a19
PP
381# Conditional item.
382class _Cond(_Item, _ExprMixin):
383 def __init__(
12b5dbc0
PP
384 self,
385 true_item: _Item,
386 false_item: _Item,
387 expr_str: str,
388 expr: ast.Expression,
389 text_loc: TextLocation,
27d52a19
PP
390 ):
391 super().__init__(text_loc)
392 _ExprMixin.__init__(self, expr_str, expr)
12b5dbc0
PP
393 self._true_item = true_item
394 self._false_item = false_item
27d52a19 395
12b5dbc0 396 # Item when condition is true.
27d52a19 397 @property
12b5dbc0
PP
398 def true_item(self):
399 return self._true_item
400
401 # Item when condition is false.
402 @property
403 def false_item(self):
404 return self._false_item
27d52a19
PP
405
406 def __repr__(self):
12b5dbc0
PP
407 return "_Cond({}, {}, {}, {}, {})".format(
408 repr(self._true_item),
409 repr(self._false_item),
27d52a19
PP
410 repr(self._expr_str),
411 repr(self._expr),
412 repr(self._text_loc),
413 )
414
415
320644e2
PP
416# Macro definition item.
417class _MacroDef(_Item):
418 def __init__(
419 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
420 ):
421 super().__init__(text_loc)
422 self._name = name
423 self._param_names = param_names
424 self._group = group
425
426 # Name.
427 @property
428 def name(self):
429 return self._name
430
431 # Parameters.
432 @property
433 def param_names(self):
434 return self._param_names
435
436 # Contained items.
437 @property
438 def group(self):
439 return self._group
440
441 def __repr__(self):
442 return "_MacroDef({}, {}, {}, {})".format(
443 repr(self._name),
444 repr(self._param_names),
445 repr(self._group),
446 repr(self._text_loc),
447 )
448
449
450# Macro expansion parameter.
451class _MacroExpParam:
452 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
453 self._expr_str = expr_str
454 self._expr = expr
455 self._text_loc = text_loc
456
457 # Expression string.
458 @property
459 def expr_str(self):
460 return self._expr_str
461
462 # Expression.
463 @property
464 def expr(self):
465 return self._expr
466
467 # Source text location.
468 @property
469 def text_loc(self):
470 return self._text_loc
471
472 def __repr__(self):
473 return "_MacroExpParam({}, {}, {})".format(
474 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
475 )
476
477
478# Macro expansion item.
479class _MacroExp(_Item, _RepableItem):
480 def __init__(
481 self,
482 name: str,
483 params: List[_MacroExpParam],
484 text_loc: TextLocation,
485 ):
486 super().__init__(text_loc)
487 self._name = name
488 self._params = params
489
490 # Name.
491 @property
492 def name(self):
493 return self._name
494
495 # Parameters.
496 @property
497 def params(self):
498 return self._params
499
500 def __repr__(self):
501 return "_MacroExp({}, {}, {})".format(
502 repr(self._name),
503 repr(self._params),
504 repr(self._text_loc),
505 )
2adf4336
PP
506
507
f5dcb24c
PP
508# A parsing error message: a string and a text location.
509class ParseErrorMessage:
510 @classmethod
511 def _create(cls, text: str, text_loc: TextLocation):
512 self = cls.__new__(cls)
513 self._init(text, text_loc)
514 return self
515
516 def __init__(self, *args, **kwargs): # type: ignore
517 raise NotImplementedError
518
519 def _init(self, text: str, text_loc: TextLocation):
520 self._text = text
521 self._text_loc = text_loc
522
523 # Message text.
524 @property
525 def text(self):
526 return self._text
527
528 # Source text location.
529 @property
530 def text_location(self):
531 return self._text_loc
532
533
534# A parsing error containing one or more messages (`ParseErrorMessage`).
71aaa3f7
PP
535class ParseError(RuntimeError):
536 @classmethod
e57a18e1 537 def _create(cls, msg: str, text_loc: TextLocation):
71aaa3f7
PP
538 self = cls.__new__(cls)
539 self._init(msg, text_loc)
540 return self
541
542 def __init__(self, *args, **kwargs): # type: ignore
543 raise NotImplementedError
544
e57a18e1 545 def _init(self, msg: str, text_loc: TextLocation):
71aaa3f7 546 super().__init__(msg)
f5dcb24c
PP
547 self._msgs = [] # type: List[ParseErrorMessage]
548 self._add_msg(msg, text_loc)
71aaa3f7 549
f5dcb24c
PP
550 def _add_msg(self, msg: str, text_loc: TextLocation):
551 self._msgs.append(
552 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
553 msg, text_loc
554 )
555 )
556
557 # Parsing error messages.
558 #
559 # The first message is the most specific one.
71aaa3f7 560 @property
f5dcb24c
PP
561 def messages(self):
562 return self._msgs
71aaa3f7
PP
563
564
565# Raises a parsing error, forwarding the parameters to the constructor.
e57a18e1 566def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
71aaa3f7
PP
567 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
568
569
f5dcb24c
PP
570# Adds a message to the parsing error `exc`.
571def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
572 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
573
574
575# Appends a message to the parsing error `exc` and reraises it.
576def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
577 _add_error_msg(exc, msg, text_loc)
578 raise exc
579
580
e57a18e1
PP
581# Variables dictionary type (for type hints).
582VariablesT = Dict[str, Union[int, float]]
583
584
585# Labels dictionary type (for type hints).
586LabelsT = Dict[str, int]
71aaa3f7
PP
587
588
589# Python name pattern.
590_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
591
592
320644e2
PP
593# Macro definition dictionary.
594_MacroDefsT = Dict[str, _MacroDef]
595
596
71aaa3f7
PP
597# Normand parser.
598#
599# The constructor accepts a Normand input. After building, use the `res`
600# property to get the resulting main group.
601class _Parser:
602 # Builds a parser to parse the Normand input `normand`, parsing
603 # immediately.
e57a18e1 604 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
71aaa3f7
PP
605 self._normand = normand
606 self._at = 0
607 self._line_no = 1
608 self._col_no = 1
609 self._label_names = set(labels.keys())
610 self._var_names = set(variables.keys())
320644e2 611 self._macro_defs = {} # type: _MacroDefsT
71aaa3f7
PP
612 self._parse()
613
614 # Result (main group).
615 @property
616 def res(self):
617 return self._res
618
320644e2
PP
619 # Macro definitions.
620 @property
621 def macro_defs(self):
622 return self._macro_defs
623
71aaa3f7
PP
624 # Current text location.
625 @property
626 def _text_loc(self):
e57a18e1 627 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
71aaa3f7
PP
628 self._line_no, self._col_no
629 )
630
631 # Returns `True` if this parser is done parsing.
632 def _is_done(self):
633 return self._at == len(self._normand)
634
635 # Returns `True` if this parser isn't done parsing.
636 def _isnt_done(self):
637 return not self._is_done()
638
639 # Raises a parse error, creating it using the message `msg` and the
640 # current text location.
641 def _raise_error(self, msg: str) -> NoReturn:
642 _raise_error(msg, self._text_loc)
643
644 # Tries to make the pattern `pat` match the current substring,
645 # returning the match object and updating `self._at`,
646 # `self._line_no`, and `self._col_no` on success.
647 def _try_parse_pat(self, pat: Pattern[str]):
648 m = pat.match(self._normand, self._at)
649
650 if m is None:
651 return
652
653 # Skip matched string
654 self._at += len(m.group(0))
655
656 # Update line number
657 self._line_no += m.group(0).count("\n")
658
659 # Update column number
660 for i in reversed(range(self._at)):
661 if self._normand[i] == "\n" or i == 0:
662 if i == 0:
663 self._col_no = self._at + 1
664 else:
665 self._col_no = self._at - i
666
667 break
668
669 # Return match object
670 return m
671
672 # Expects the pattern `pat` to match the current substring,
673 # returning the match object and updating `self._at`,
674 # `self._line_no`, and `self._col_no` on success, or raising a parse
675 # error with the message `error_msg` on error.
676 def _expect_pat(self, pat: Pattern[str], error_msg: str):
677 # Match
678 m = self._try_parse_pat(pat)
679
680 if m is None:
681 # No match: error
682 self._raise_error(error_msg)
683
684 # Return match object
685 return m
686
687 # Pattern for _skip_ws_and_comments()
688 _ws_or_syms_or_comments_pat = re.compile(
25ca454b 689 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
71aaa3f7
PP
690 )
691
692 # Skips as many whitespaces, insignificant symbol characters, and
693 # comments as possible.
694 def _skip_ws_and_comments(self):
695 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
696
320644e2
PP
697 # Pattern for _skip_ws()
698 _ws_pat = re.compile(r"\s*")
699
700 # Skips as many whitespaces as possible.
701 def _skip_ws(self):
702 self._try_parse_pat(self._ws_pat)
703
71aaa3f7
PP
704 # Pattern for _try_parse_hex_byte()
705 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
706
707 # Tries to parse a hexadecimal byte, returning a byte item on
708 # success.
709 def _try_parse_hex_byte(self):
0e8e3169
PP
710 begin_text_loc = self._text_loc
711
71aaa3f7
PP
712 # Match initial nibble
713 m_high = self._try_parse_pat(self._nibble_pat)
714
715 if m_high is None:
716 # No match
717 return
718
719 # Expect another nibble
720 self._skip_ws_and_comments()
721 m_low = self._expect_pat(
722 self._nibble_pat, "Expecting another hexadecimal nibble"
723 )
724
725 # Return item
0e8e3169 726 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
727
728 # Patterns for _try_parse_bin_byte()
729 _bin_byte_bit_pat = re.compile(r"[01]")
6dd69a2a 730 _bin_byte_prefix_pat = re.compile(r"%+")
71aaa3f7
PP
731
732 # Tries to parse a binary byte, returning a byte item on success.
733 def _try_parse_bin_byte(self):
0e8e3169
PP
734 begin_text_loc = self._text_loc
735
71aaa3f7 736 # Match prefix
6dd69a2a
PP
737 m = self._try_parse_pat(self._bin_byte_prefix_pat)
738
739 if m is None:
71aaa3f7
PP
740 # No match
741 return
742
6dd69a2a
PP
743 # Expect as many bytes as there are `%` prefixes
744 items = [] # type: List[_Item]
71aaa3f7 745
6dd69a2a 746 for _ in range(len(m.group(0))):
71aaa3f7 747 self._skip_ws_and_comments()
6dd69a2a
PP
748 byte_text_loc = self._text_loc
749 bits = [] # type: List[str]
750
751 # Expect eight bits
752 for _ in range(8):
753 self._skip_ws_and_comments()
754 m = self._expect_pat(
755 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
756 )
757 bits.append(m.group(0))
758
759 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
71aaa3f7
PP
760
761 # Return item
6dd69a2a
PP
762 if len(items) == 1:
763 return items[0]
764
765 # As group
766 return _Group(items, begin_text_loc)
71aaa3f7
PP
767
768 # Patterns for _try_parse_dec_byte()
320644e2 769 _dec_byte_prefix_pat = re.compile(r"\$")
71aaa3f7
PP
770 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
771
772 # Tries to parse a decimal byte, returning a byte item on success.
773 def _try_parse_dec_byte(self):
0e8e3169
PP
774 begin_text_loc = self._text_loc
775
71aaa3f7
PP
776 # Match prefix
777 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
778 # No match
779 return
780
781 # Expect the value
320644e2 782 self._skip_ws()
71aaa3f7
PP
783 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
784
785 # Compute value
786 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
787
788 # Validate
789 if val < -128 or val > 255:
0e8e3169 790 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
791
792 # Two's complement
05f81895 793 val %= 256
71aaa3f7
PP
794
795 # Return item
0e8e3169 796 return _Byte(val, begin_text_loc)
71aaa3f7
PP
797
798 # Tries to parse a byte, returning a byte item on success.
799 def _try_parse_byte(self):
800 # Hexadecimal
801 item = self._try_parse_hex_byte()
802
803 if item is not None:
804 return item
805
806 # Binary
807 item = self._try_parse_bin_byte()
808
809 if item is not None:
810 return item
811
812 # Decimal
813 item = self._try_parse_dec_byte()
814
815 if item is not None:
816 return item
817
818 # Patterns for _try_parse_str()
819 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
820 _str_suffix_pat = re.compile(r'"')
821 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
822
823 # Strings corresponding to escape sequence characters
824 _str_escape_seq_strs = {
825 "0": "\0",
826 "a": "\a",
827 "b": "\b",
828 "e": "\x1b",
829 "f": "\f",
830 "n": "\n",
831 "r": "\r",
832 "t": "\t",
833 "v": "\v",
834 "\\": "\\",
835 '"': '"',
836 }
837
838 # Tries to parse a string, returning a string item on success.
839 def _try_parse_str(self):
0e8e3169
PP
840 begin_text_loc = self._text_loc
841
71aaa3f7
PP
842 # Match prefix
843 m = self._try_parse_pat(self._str_prefix_pat)
844
845 if m is None:
846 # No match
847 return
848
849 # Get encoding
850 encoding = "utf8"
851
852 if m.group("len") is not None:
853 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
854
855 # Actual string
856 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
857
858 # Expect end of string
859 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
860
861 # Replace escape sequences
862 val = m.group(0)
863
864 for ec in '0abefnrtv"\\':
865 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
866
867 # Encode
868 data = val.encode(encoding)
869
870 # Return item
0e8e3169 871 return _Str(data, begin_text_loc)
71aaa3f7 872
320644e2
PP
873 # Common right parenthesis pattern
874 _right_paren_pat = re.compile(r"\)")
875
71aaa3f7 876 # Patterns for _try_parse_group()
320644e2 877 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
71aaa3f7
PP
878
879 # Tries to parse a group, returning a group item on success.
880 def _try_parse_group(self):
0e8e3169
PP
881 begin_text_loc = self._text_loc
882
71aaa3f7 883 # Match prefix
261c5ecf
PP
884 m_open = self._try_parse_pat(self._group_prefix_pat)
885
886 if m_open is None:
71aaa3f7
PP
887 # No match
888 return
889
890 # Parse items
891 items = self._parse_items()
892
893 # Expect end of group
894 self._skip_ws_and_comments()
261c5ecf
PP
895
896 if m_open.group(0) == "(":
320644e2 897 pat = self._right_paren_pat
261c5ecf
PP
898 exp = ")"
899 else:
900 pat = self._block_end_pat
901 exp = "!end"
902
903 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
71aaa3f7
PP
904
905 # Return item
0e8e3169 906 return _Group(items, begin_text_loc)
71aaa3f7
PP
907
908 # Returns a stripped expression string and an AST expression node
909 # from the expression string `expr_str` at text location `text_loc`.
e57a18e1 910 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
71aaa3f7
PP
911 # Create an expression node from the expression string
912 expr_str = expr_str.strip().replace("\n", " ")
913
914 try:
915 expr = ast.parse(expr_str, mode="eval")
916 except SyntaxError:
917 _raise_error(
918 "Invalid expression `{}`: invalid syntax".format(expr_str),
919 text_loc,
920 )
921
922 return expr_str, expr
923
269f6eb3 924 # Patterns for _try_parse_num_and_attr()
05f81895 925 _val_expr_pat = re.compile(r"([^}:]+):\s*")
269f6eb3 926 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
05f81895 927 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
71aaa3f7 928
05f81895
PP
929 # Tries to parse a value and attribute (fixed length in bits or
930 # `leb128`), returning a value item on success.
269f6eb3 931 def _try_parse_num_and_attr(self):
71aaa3f7
PP
932 begin_text_loc = self._text_loc
933
934 # Match
935 m_expr = self._try_parse_pat(self._val_expr_pat)
936
937 if m_expr is None:
938 # No match
939 return
940
71aaa3f7
PP
941 # Create an expression node from the expression string
942 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
943
05f81895 944 # Length?
269f6eb3 945 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
05f81895
PP
946
947 if m_attr is None:
948 # LEB128?
949 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
950
951 if m_attr is None:
952 # At this point it's invalid
953 self._raise_error(
954 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
955 )
956
957 # Return LEB128 integer item
958 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
959 return cls(expr_str, expr, begin_text_loc)
960 else:
269f6eb3
PP
961 # Return fixed-length number item
962 return _FlNum(
05f81895
PP
963 expr_str,
964 expr,
965 int(m_attr.group(0)),
966 begin_text_loc,
967 )
71aaa3f7 968
320644e2
PP
969 # Patterns for _try_parse_var_assign()
970 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
971 _var_assign_expr_pat = re.compile(r"[^}]+")
71aaa3f7 972
2adf4336
PP
973 # Tries to parse a variable assignment, returning a variable
974 # assignment item on success.
975 def _try_parse_var_assign(self):
71aaa3f7
PP
976 begin_text_loc = self._text_loc
977
978 # Match
320644e2 979 m = self._try_parse_pat(self._var_assign_name_equal_pat)
71aaa3f7
PP
980
981 if m is None:
982 # No match
983 return
984
985 # Validate name
320644e2 986 name = m.group(1)
71aaa3f7
PP
987
988 if name == _icitte_name:
0e8e3169
PP
989 _raise_error(
990 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
991 )
71aaa3f7
PP
992
993 if name in self._label_names:
0e8e3169 994 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7 995
320644e2
PP
996 # Expect an expression
997 self._skip_ws()
998 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
71aaa3f7
PP
999
1000 # Create an expression node from the expression string
320644e2
PP
1001 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1002
1003 # Add to known variable names
1004 self._var_names.add(name)
71aaa3f7
PP
1005
1006 # Return item
2adf4336 1007 return _VarAssign(
71aaa3f7
PP
1008 name,
1009 expr_str,
1010 expr,
0e8e3169 1011 begin_text_loc,
71aaa3f7
PP
1012 )
1013
2adf4336 1014 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
1015 _bo_pat = re.compile(r"[bl]e")
1016
2adf4336
PP
1017 # Tries to parse a byte order name, returning a byte order setting
1018 # item on success.
1019 def _try_parse_set_bo(self):
0e8e3169
PP
1020 begin_text_loc = self._text_loc
1021
71aaa3f7
PP
1022 # Match
1023 m = self._try_parse_pat(self._bo_pat)
1024
1025 if m is None:
1026 # No match
1027 return
1028
1029 # Return corresponding item
1030 if m.group(0) == "be":
2adf4336 1031 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
1032 else:
1033 assert m.group(0) == "le"
2adf4336 1034 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
1035
1036 # Patterns for _try_parse_val_or_bo()
320644e2
PP
1037 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1038 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
71aaa3f7 1039
2adf4336
PP
1040 # Tries to parse a value, a variable assignment, or a byte order
1041 # setting, returning an item on success.
1042 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 1043 # Match prefix
2adf4336 1044 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
1045 # No match
1046 return
1047
320644e2
PP
1048 self._skip_ws()
1049
2adf4336
PP
1050 # Variable assignment item?
1051 item = self._try_parse_var_assign()
71aaa3f7
PP
1052
1053 if item is None:
269f6eb3
PP
1054 # Number item?
1055 item = self._try_parse_num_and_attr()
71aaa3f7
PP
1056
1057 if item is None:
2adf4336
PP
1058 # Byte order setting item?
1059 item = self._try_parse_set_bo()
71aaa3f7
PP
1060
1061 if item is None:
1062 # At this point it's invalid
2adf4336 1063 self._raise_error(
269f6eb3 1064 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
2adf4336 1065 )
71aaa3f7
PP
1066
1067 # Expect suffix
320644e2 1068 self._skip_ws()
2adf4336 1069 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
1070 return item
1071
fc21bb27
PP
1072 # Returns a normalized version (so as to be parseable by int()) of
1073 # the constant integer string `s`, possibly negative, dealing with
1074 # any radix suffix.
1075 @staticmethod
1076 def _norm_const_int(s: str):
1077 neg = ""
1078 pos = s
1079
1080 if s.startswith("-"):
1081 neg = "-"
1082 pos = s[1:]
1083
1084 for r in "xXoObB":
1085 if pos.startswith("0" + r):
1086 # Already correct
1087 return s
1088
1089 # Try suffix
1090 asm_suf_base = {
1091 "h": "x",
1092 "H": "x",
1093 "q": "o",
1094 "Q": "o",
1095 "o": "o",
1096 "O": "o",
1097 "b": "b",
1098 "B": "B",
1099 }
1100
1101 for suf in asm_suf_base:
1102 if pos[-1] == suf:
1103 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
1104
1105 return s
1106
320644e2 1107 # Common constant integer patterns
fc21bb27
PP
1108 _pos_const_int_pat = re.compile(
1109 r"0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+"
1110 )
320644e2 1111 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
71aaa3f7 1112
2adf4336
PP
1113 # Tries to parse an offset setting value (after the initial `<`),
1114 # returning an offset item on success.
1115 def _try_parse_set_offset_val(self):
0e8e3169
PP
1116 begin_text_loc = self._text_loc
1117
71aaa3f7
PP
1118 # Match
1119 m = self._try_parse_pat(self._pos_const_int_pat)
1120
1121 if m is None:
1122 # No match
1123 return
1124
1125 # Return item
fc21bb27 1126 return _SetOffset(int(self._norm_const_int(m.group(0)), 0), begin_text_loc)
71aaa3f7
PP
1127
1128 # Tries to parse a label name (after the initial `<`), returning a
1129 # label item on success.
1130 def _try_parse_label_name(self):
0e8e3169
PP
1131 begin_text_loc = self._text_loc
1132
71aaa3f7
PP
1133 # Match
1134 m = self._try_parse_pat(_py_name_pat)
1135
1136 if m is None:
1137 # No match
1138 return
1139
1140 # Validate
1141 name = m.group(0)
1142
1143 if name == _icitte_name:
0e8e3169
PP
1144 _raise_error(
1145 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1146 )
71aaa3f7
PP
1147
1148 if name in self._label_names:
0e8e3169 1149 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1150
1151 if name in self._var_names:
0e8e3169 1152 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
1153
1154 # Add to known label names
1155 self._label_names.add(name)
1156
1157 # Return item
0e8e3169 1158 return _Label(name, begin_text_loc)
71aaa3f7 1159
2adf4336 1160 # Patterns for _try_parse_label_or_set_offset()
320644e2
PP
1161 _label_set_offset_prefix_pat = re.compile(r"<")
1162 _label_set_offset_suffix_pat = re.compile(r">")
71aaa3f7 1163
2adf4336
PP
1164 # Tries to parse a label or an offset setting, returning an item on
1165 # success.
1166 def _try_parse_label_or_set_offset(self):
71aaa3f7 1167 # Match prefix
2adf4336 1168 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
1169 # No match
1170 return
1171
2adf4336 1172 # Offset setting item?
320644e2 1173 self._skip_ws()
2adf4336 1174 item = self._try_parse_set_offset_val()
71aaa3f7
PP
1175
1176 if item is None:
1177 # Label item?
1178 item = self._try_parse_label_name()
1179
1180 if item is None:
1181 # At this point it's invalid
2adf4336 1182 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
1183
1184 # Expect suffix
320644e2 1185 self._skip_ws()
2adf4336 1186 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
1187 return item
1188
25ca454b
PP
1189 # Pattern for _parse_pad_val()
1190 _pad_val_prefix_pat = re.compile(r"~")
1191
1192 # Tries to parse a padding value, returning the padding value, or 0
1193 # if none.
1194 def _parse_pad_val(self):
1195 # Padding value?
1196 self._skip_ws()
1197 pad_val = 0
1198
1199 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1200 self._skip_ws()
1201 pad_val_text_loc = self._text_loc
1202 m = self._expect_pat(
1203 self._pos_const_int_pat,
1204 "Expecting a positive constant integer (byte value)",
1205 )
1206
1207 # Validate
fc21bb27 1208 pad_val = int(self._norm_const_int(m.group(0)), 0)
25ca454b
PP
1209
1210 if pad_val > 255:
1211 _raise_error(
1212 "Invalid padding byte value {}".format(pad_val),
1213 pad_val_text_loc,
1214 )
1215
1216 return pad_val
1217
676f6189 1218 # Patterns for _try_parse_align_offset()
320644e2
PP
1219 _align_offset_prefix_pat = re.compile(r"@")
1220 _align_offset_val_pat = re.compile(r"\d+")
676f6189
PP
1221
1222 # Tries to parse an offset alignment, returning an offset alignment
1223 # item on success.
1224 def _try_parse_align_offset(self):
1225 begin_text_loc = self._text_loc
1226
1227 # Match prefix
1228 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1229 # No match
1230 return
1231
320644e2 1232 # Expect an alignment
25ca454b 1233 self._skip_ws()
676f6189
PP
1234 align_text_loc = self._text_loc
1235 m = self._expect_pat(
1236 self._align_offset_val_pat,
1237 "Expecting an alignment (positive multiple of eight bits)",
1238 )
1239
1240 # Validate alignment
320644e2 1241 val = int(m.group(0))
676f6189
PP
1242
1243 if val <= 0 or (val % 8) != 0:
1244 _raise_error(
1245 "Invalid alignment value {} (not a positive multiple of eight)".format(
1246 val
1247 ),
1248 align_text_loc,
1249 )
1250
25ca454b
PP
1251 # Padding value
1252 pad_val = self._parse_pad_val()
676f6189 1253
25ca454b
PP
1254 # Return item
1255 return _AlignOffset(val, pad_val, begin_text_loc)
676f6189 1256
25ca454b
PP
1257 # Patterns for _try_parse_fill_until()
1258 _fill_until_prefix_pat = re.compile(r"\+")
1259 _fill_until_pad_val_prefix_pat = re.compile(r"~")
676f6189 1260
25ca454b
PP
1261 # Tries to parse a filling, returning a filling item on success.
1262 def _try_parse_fill_until(self):
1263 begin_text_loc = self._text_loc
1264
1265 # Match prefix
1266 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1267 # No match
1268 return
1269
1270 # Expect expression
1271 self._skip_ws()
1272 expr_str, expr = self._expect_const_int_name_expr(True)
1273
1274 # Padding value
1275 pad_val = self._parse_pad_val()
676f6189
PP
1276
1277 # Return item
25ca454b 1278 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
676f6189 1279
e57a18e1 1280 # Patterns for _expect_rep_mul_expr()
320644e2
PP
1281 _inner_expr_prefix_pat = re.compile(r"\{")
1282 _inner_expr_pat = re.compile(r"[^}]+")
1283 _inner_expr_suffix_pat = re.compile(r"\}")
1284
1285 # Parses a constant integer if `accept_const_int` is `True`
1286 # (possibly negative if `allow_neg` is `True`), a name, or an
1287 # expression within `{` and `}`.
1288 def _expect_const_int_name_expr(
1289 self, accept_const_int: bool, allow_neg: bool = False
1290 ):
e57a18e1
PP
1291 expr_text_loc = self._text_loc
1292
1293 # Constant integer?
27d52a19
PP
1294 m = None
1295
320644e2
PP
1296 if accept_const_int:
1297 m = self._try_parse_pat(self._const_int_pat)
e57a18e1
PP
1298
1299 if m is None:
1300 # Name?
1301 m = self._try_parse_pat(_py_name_pat)
1302
1303 if m is None:
1304 # Expression?
320644e2
PP
1305 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1306 pos_msg = "" if allow_neg else "positive "
1307
1308 if accept_const_int:
1309 mid_msg = "a {}constant integer, a name, or `{{`".format(
1310 pos_msg
1311 )
27d52a19
PP
1312 else:
1313 mid_msg = "a name or `{`"
1314
e57a18e1 1315 # At this point it's invalid
27d52a19 1316 self._raise_error("Expecting {}".format(mid_msg))
e57a18e1
PP
1317
1318 # Expect an expression
320644e2 1319 self._skip_ws()
e57a18e1 1320 expr_text_loc = self._text_loc
320644e2 1321 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
e57a18e1
PP
1322 expr_str = m.group(0)
1323
1324 # Expect `}`
320644e2
PP
1325 self._skip_ws()
1326 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
e57a18e1
PP
1327 else:
1328 expr_str = m.group(0)
1329 else:
320644e2
PP
1330 if m.group("neg") == "-" and not allow_neg:
1331 _raise_error("Expecting a positive constant integer", expr_text_loc)
1332
fc21bb27 1333 expr_str = self._norm_const_int(m.group(0))
e57a18e1
PP
1334
1335 return self._ast_expr_from_str(expr_str, expr_text_loc)
1336
27d52a19
PP
1337 # Parses the multiplier expression of a repetition (block or
1338 # post-item) and returns the expression string and AST node.
1339 def _expect_rep_mul_expr(self):
320644e2 1340 return self._expect_const_int_name_expr(True)
27d52a19
PP
1341
1342 # Common block end pattern
320644e2 1343 _block_end_pat = re.compile(r"!end\b")
27d52a19 1344
e57a18e1 1345 # Pattern for _try_parse_rep_block()
320644e2 1346 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
e57a18e1
PP
1347
1348 # Tries to parse a repetition block, returning a repetition item on
1349 # success.
1350 def _try_parse_rep_block(self):
1351 begin_text_loc = self._text_loc
1352
1353 # Match prefix
1354 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1355 # No match
1356 return
1357
1358 # Expect expression
1359 self._skip_ws_and_comments()
1360 expr_str, expr = self._expect_rep_mul_expr()
1361
1362 # Parse items
1363 self._skip_ws_and_comments()
1364 items_text_loc = self._text_loc
1365 items = self._parse_items()
1366
1367 # Expect end of block
1368 self._skip_ws_and_comments()
1369 self._expect_pat(
27d52a19 1370 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
e57a18e1
PP
1371 )
1372
1373 # Return item
1374 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1375
27d52a19 1376 # Pattern for _try_parse_cond_block()
320644e2 1377 _cond_block_prefix_pat = re.compile(r"!if\b")
12b5dbc0 1378 _cond_block_else_pat = re.compile(r"!else\b")
27d52a19
PP
1379
1380 # Tries to parse a conditional block, returning a conditional item
1381 # on success.
1382 def _try_parse_cond_block(self):
1383 begin_text_loc = self._text_loc
1384
1385 # Match prefix
1386 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1387 # No match
1388 return
1389
1390 # Expect expression
1391 self._skip_ws_and_comments()
320644e2 1392 expr_str, expr = self._expect_const_int_name_expr(False)
27d52a19 1393
12b5dbc0 1394 # Parse "true" items
27d52a19 1395 self._skip_ws_and_comments()
12b5dbc0
PP
1396 true_items_text_loc = self._text_loc
1397 true_items = self._parse_items()
1398 false_items = [] # type: List[_Item]
1399 false_items_text_loc = begin_text_loc
27d52a19 1400
12b5dbc0 1401 # `!else`?
27d52a19 1402 self._skip_ws_and_comments()
12b5dbc0
PP
1403
1404 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1405 # Parse "false" items
1406 self._skip_ws_and_comments()
1407 false_items_text_loc = self._text_loc
1408 false_items = self._parse_items()
1409
1410 # Expect end of block
27d52a19
PP
1411 self._expect_pat(
1412 self._block_end_pat,
12b5dbc0 1413 "Expecting an item, `!else`, or `!end` (end of conditional block)",
27d52a19
PP
1414 )
1415
1416 # Return item
12b5dbc0
PP
1417 return _Cond(
1418 _Group(true_items, true_items_text_loc),
1419 _Group(false_items, false_items_text_loc),
1420 expr_str,
1421 expr,
1422 begin_text_loc,
1423 )
27d52a19 1424
320644e2
PP
1425 # Common left parenthesis pattern
1426 _left_paren_pat = re.compile(r"\(")
1427
1428 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1429 _macro_params_comma_pat = re.compile(",")
1430
1431 # Patterns for _try_parse_macro_def()
1432 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1433
1434 # Tries to parse a macro definition, adding it to `self._macro_defs`
1435 # and returning `True` on success.
1436 def _try_parse_macro_def(self):
1437 begin_text_loc = self._text_loc
1438
1439 # Match prefix
1440 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1441 # No match
1442 return False
1443
1444 # Expect a name
1445 self._skip_ws()
1446 name_text_loc = self._text_loc
1447 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1448
1449 # Validate name
1450 name = m.group(0)
1451
1452 if name in self._macro_defs:
1453 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1454
1455 # Expect `(`
1456 self._skip_ws()
1457 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1458
1459 # Try to parse comma-separated parameter names
1460 param_names = [] # type: List[str]
1461 expect_comma = False
1462
1463 while True:
1464 self._skip_ws()
1465
1466 # End?
1467 if self._try_parse_pat(self._right_paren_pat) is not None:
1468 # End
1469 break
1470
1471 # Comma?
1472 if expect_comma:
1473 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1474
1475 # Expect parameter name
1476 self._skip_ws()
1477 param_text_loc = self._text_loc
1478 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1479
1480 if m.group(0) in param_names:
1481 _raise_error(
1482 "Duplicate macro parameter named `{}`".format(m.group(0)),
1483 param_text_loc,
1484 )
1485
1486 param_names.append(m.group(0))
1487 expect_comma = True
1488
1489 # Expect items
1490 self._skip_ws_and_comments()
1491 items_text_loc = self._text_loc
1492 old_var_names = self._var_names.copy()
1493 old_label_names = self._label_names.copy()
1494 self._var_names = set() # type: Set[str]
1495 self._label_names = set() # type: Set[str]
1496 items = self._parse_items()
1497 self._var_names = old_var_names
1498 self._label_names = old_label_names
1499
1500 # Expect suffix
1501 self._expect_pat(
1502 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1503 )
1504
1505 # Register macro
1506 self._macro_defs[name] = _MacroDef(
1507 name, param_names, _Group(items, items_text_loc), begin_text_loc
1508 )
1509
1510 return True
1511
1512 # Patterns for _try_parse_macro_exp()
1513 _macro_exp_prefix_pat = re.compile(r"m\b")
1514 _macro_exp_colon_pat = re.compile(r":")
1515
1516 # Tries to parse a macro expansion, returning a macro expansion item
1517 # on success.
1518 def _try_parse_macro_exp(self):
1519 begin_text_loc = self._text_loc
1520
1521 # Match prefix
1522 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1523 # No match
1524 return
1525
1526 # Expect `:`
1527 self._skip_ws()
1528 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1529
1530 # Expect a macro name
1531 self._skip_ws()
1532 name_text_loc = self._text_loc
1533 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1534
1535 # Validate name
1536 name = m.group(0)
1537 macro_def = self._macro_defs.get(name)
1538
1539 if macro_def is None:
1540 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1541
1542 # Expect `(`
1543 self._skip_ws()
1544 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1545
1546 # Try to parse comma-separated parameter values
1547 params_text_loc = self._text_loc
1548 params = [] # type: List[_MacroExpParam]
1549 expect_comma = False
1550
1551 while True:
1552 self._skip_ws()
1553
1554 # End?
1555 if self._try_parse_pat(self._right_paren_pat) is not None:
1556 # End
1557 break
1558
1559 # Expect a Value
1560 if expect_comma:
1561 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1562
1563 self._skip_ws()
1564 param_text_loc = self._text_loc
1565 params.append(
1566 _MacroExpParam(
6dd69a2a
PP
1567 *self._expect_const_int_name_expr(True, True),
1568 text_loc=param_text_loc
320644e2
PP
1569 )
1570 )
1571 expect_comma = True
1572
1573 # Validate parameter values
1574 if len(params) != len(macro_def.param_names):
1575 sing_plur = "" if len(params) == 1 else "s"
1576 _raise_error(
1577 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1578 len(params), sing_plur, len(macro_def.param_names)
1579 ),
1580 params_text_loc,
1581 )
1582
1583 # Return item
1584 return _MacroExp(name, params, begin_text_loc)
1585
71aaa3f7
PP
1586 # Tries to parse a base item (anything except a repetition),
1587 # returning it on success.
1588 def _try_parse_base_item(self):
1589 # Byte item?
1590 item = self._try_parse_byte()
1591
1592 if item is not None:
1593 return item
1594
1595 # String item?
1596 item = self._try_parse_str()
1597
1598 if item is not None:
1599 return item
1600
2adf4336
PP
1601 # Value, variable assignment, or byte order setting item?
1602 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
1603
1604 if item is not None:
1605 return item
1606
2adf4336
PP
1607 # Label or offset setting item?
1608 item = self._try_parse_label_or_set_offset()
71aaa3f7 1609
676f6189
PP
1610 if item is not None:
1611 return item
1612
1613 # Offset alignment item?
1614 item = self._try_parse_align_offset()
1615
25ca454b
PP
1616 if item is not None:
1617 return item
1618
1619 # Filling item?
1620 item = self._try_parse_fill_until()
1621
71aaa3f7
PP
1622 if item is not None:
1623 return item
1624
1625 # Group item?
1626 item = self._try_parse_group()
1627
1628 if item is not None:
1629 return item
1630
320644e2 1631 # Repetition block item?
e57a18e1 1632 item = self._try_parse_rep_block()
71aaa3f7 1633
e57a18e1
PP
1634 if item is not None:
1635 return item
1636
27d52a19
PP
1637 # Conditional block item?
1638 item = self._try_parse_cond_block()
1639
1640 if item is not None:
1641 return item
1642
320644e2
PP
1643 # Macro expansion?
1644 item = self._try_parse_macro_exp()
1645
1646 if item is not None:
1647 return item
1648
e57a18e1
PP
1649 # Pattern for _try_parse_rep_post()
1650 _rep_post_prefix_pat = re.compile(r"\*")
1651
1652 # Tries to parse a post-item repetition, returning the expression
1653 # string and AST expression node on success.
1654 def _try_parse_rep_post(self):
71aaa3f7 1655 # Match prefix
e57a18e1 1656 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
71aaa3f7 1657 # No match
2adf4336 1658 return
71aaa3f7 1659
e57a18e1 1660 # Return expression string and AST expression
71aaa3f7 1661 self._skip_ws_and_comments()
e57a18e1 1662 return self._expect_rep_mul_expr()
71aaa3f7 1663
1ca7b5e1
PP
1664 # Tries to parse an item, possibly followed by a repetition,
1665 # returning `True` on success.
1666 #
1667 # Appends any parsed item to `items`.
1668 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
1669 self._skip_ws_and_comments()
1670
320644e2 1671 # Base item
71aaa3f7
PP
1672 item = self._try_parse_base_item()
1673
1674 if item is None:
320644e2 1675 return
71aaa3f7
PP
1676
1677 # Parse repetition if the base item is repeatable
1678 if isinstance(item, _RepableItem):
0e8e3169
PP
1679 self._skip_ws_and_comments()
1680 rep_text_loc = self._text_loc
e57a18e1 1681 rep_ret = self._try_parse_rep_post()
71aaa3f7 1682
2adf4336 1683 if rep_ret is not None:
6dd69a2a 1684 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
71aaa3f7 1685
1ca7b5e1
PP
1686 items.append(item)
1687 return True
71aaa3f7
PP
1688
1689 # Parses and returns items, skipping whitespaces, insignificant
1690 # symbols, and comments when allowed, and stopping at the first
1691 # unknown character.
320644e2
PP
1692 #
1693 # Accepts and registers macro definitions if `accept_macro_defs`
1694 # is `True`.
1695 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
71aaa3f7
PP
1696 items = [] # type: List[_Item]
1697
1698 while self._isnt_done():
1ca7b5e1
PP
1699 # Try to append item
1700 if not self._try_append_item(items):
320644e2
PP
1701 if accept_macro_defs and self._try_parse_macro_def():
1702 continue
1703
1ca7b5e1
PP
1704 # Unknown at this point
1705 break
71aaa3f7
PP
1706
1707 return items
1708
1709 # Parses the whole Normand input, setting `self._res` to the main
1710 # group item on success.
1711 def _parse(self):
1712 if len(self._normand.strip()) == 0:
1713 # Special case to make sure there's something to consume
1714 self._res = _Group([], self._text_loc)
1715 return
1716
1717 # Parse first level items
320644e2 1718 items = self._parse_items(True)
71aaa3f7
PP
1719
1720 # Make sure there's nothing left
1721 self._skip_ws_and_comments()
1722
1723 if self._isnt_done():
1724 self._raise_error(
1725 "Unexpected character `{}`".format(self._normand[self._at])
1726 )
1727
1728 # Set main group item
1729 self._res = _Group(items, self._text_loc)
1730
1731
1732# The return type of parse().
1733class ParseResult:
1734 @classmethod
1735 def _create(
1736 cls,
1737 data: bytearray,
e57a18e1
PP
1738 variables: VariablesT,
1739 labels: LabelsT,
71aaa3f7
PP
1740 offset: int,
1741 bo: Optional[ByteOrder],
1742 ):
1743 self = cls.__new__(cls)
1744 self._init(data, variables, labels, offset, bo)
1745 return self
1746
1747 def __init__(self, *args, **kwargs): # type: ignore
1748 raise NotImplementedError
1749
1750 def _init(
1751 self,
1752 data: bytearray,
e57a18e1
PP
1753 variables: VariablesT,
1754 labels: LabelsT,
71aaa3f7
PP
1755 offset: int,
1756 bo: Optional[ByteOrder],
1757 ):
1758 self._data = data
1759 self._vars = variables
1760 self._labels = labels
1761 self._offset = offset
1762 self._bo = bo
1763
1764 # Generated data.
1765 @property
1766 def data(self):
1767 return self._data
1768
1769 # Dictionary of updated variable names to their last computed value.
1770 @property
1771 def variables(self):
1772 return self._vars
1773
1774 # Dictionary of updated main group label names to their computed
1775 # value.
1776 @property
1777 def labels(self):
1778 return self._labels
1779
1780 # Updated offset.
1781 @property
1782 def offset(self):
1783 return self._offset
1784
1785 # Updated byte order.
1786 @property
1787 def byte_order(self):
1788 return self._bo
1789
1790
1791# Raises a parse error for the item `item`, creating it using the
1792# message `msg`.
1793def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1794 _raise_error(msg, item.text_loc)
1795
1796
1797# The `ICITTE` reserved name.
1798_icitte_name = "ICITTE"
1799
1800
2adf4336
PP
1801# Base node visitor.
1802#
1803# Calls the _visit_name() method for each name node which isn't the name
1804# of a call.
1805class _NodeVisitor(ast.NodeVisitor):
1806 def __init__(self):
71aaa3f7
PP
1807 self._parent_is_call = False
1808
1809 def generic_visit(self, node: ast.AST):
1810 if type(node) is ast.Call:
1811 self._parent_is_call = True
1812 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1813 self._visit_name(node.id)
71aaa3f7
PP
1814
1815 super().generic_visit(node)
1816 self._parent_is_call = False
1817
2adf4336
PP
1818 @abc.abstractmethod
1819 def _visit_name(self, name: str):
1820 ...
1821
71aaa3f7 1822
2adf4336
PP
1823# Expression validator: validates that all the names within the
1824# expression are allowed.
1825class _ExprValidator(_NodeVisitor):
320644e2 1826 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
2adf4336 1827 super().__init__()
320644e2
PP
1828 self._expr_str = expr_str
1829 self._text_loc = text_loc
2adf4336 1830 self._allowed_names = allowed_names
2adf4336
PP
1831
1832 def _visit_name(self, name: str):
1833 # Make sure the name refers to a known and reachable
1834 # variable/label name.
e57a18e1 1835 if name != _icitte_name and name not in self._allowed_names:
2adf4336 1836 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
320644e2 1837 name, self._expr_str
2adf4336
PP
1838 )
1839
05f81895 1840 allowed_names = self._allowed_names.copy()
e57a18e1 1841 allowed_names.add(_icitte_name)
2adf4336 1842
05f81895 1843 if len(allowed_names) > 0:
2adf4336
PP
1844 allowed_names_str = ", ".join(
1845 sorted(["`{}`".format(name) for name in allowed_names])
1846 )
1847 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1848
1849 _raise_error(
1850 msg,
320644e2 1851 self._text_loc,
2adf4336
PP
1852 )
1853
1854
2adf4336
PP
1855# Generator state.
1856class _GenState:
1857 def __init__(
1b8aa84a 1858 self,
e57a18e1
PP
1859 variables: VariablesT,
1860 labels: LabelsT,
1b8aa84a
PP
1861 offset: int,
1862 bo: Optional[ByteOrder],
2adf4336
PP
1863 ):
1864 self.variables = variables.copy()
1865 self.labels = labels.copy()
1866 self.offset = offset
1867 self.bo = bo
71aaa3f7 1868
320644e2
PP
1869 def __repr__(self):
1870 return "_GenState({}, {}, {}, {})".format(
1871 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1872 )
1873
1874
1875# Fixed-length number item instance.
1876class _FlNumItemInst:
f5dcb24c
PP
1877 def __init__(
1878 self,
1879 item: _FlNum,
1880 offset_in_data: int,
1881 state: _GenState,
1882 parse_error_msgs: List[ParseErrorMessage],
1883 ):
320644e2
PP
1884 self._item = item
1885 self._offset_in_data = offset_in_data
1886 self._state = state
f5dcb24c 1887 self._parse_error_msgs = parse_error_msgs
320644e2
PP
1888
1889 @property
1890 def item(self):
1891 return self._item
1892
1893 @property
1894 def offset_in_data(self):
1895 return self._offset_in_data
1896
1897 @property
1898 def state(self):
1899 return self._state
1900
f5dcb24c
PP
1901 @property
1902 def parse_error_msgs(self):
1903 return self._parse_error_msgs
1904
71aaa3f7 1905
2adf4336 1906# Generator of data and final state from a group item.
71aaa3f7
PP
1907#
1908# Generation happens in memory at construction time. After building, use
1909# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1910# get the resulting context.
2adf4336
PP
1911#
1912# The steps of generation are:
1913#
320644e2
PP
1914# 1. Handle each item in prefix order.
1915#
1916# The handlers append bytes to `self._data` and update some current
1917# state object (`_GenState` instance).
1918#
1919# When handling a fixed-length number item, try to evaluate its
1920# expression using the current state. If this fails, then it might be
1921# because the expression refers to a "future" label: save the current
1922# offset in `self._data` (generated data) and a snapshot of the
1923# current state within `self._fl_num_item_insts` (`_FlNumItemInst`
f5dcb24c
PP
1924# object). _gen_fl_num_item_insts() will deal with this later. A
1925# `_FlNumItemInst` instance also contains a snapshot of the current
1926# parsing error messages (`self._parse_error_msgs`) which need to be
1927# taken into account when handling the instance later.
2adf4336 1928#
320644e2
PP
1929# When handling the items of a group, keep a map of immediate label
1930# names to their offset. Then, after having processed all the items,
1931# update the relevant saved state snapshots in
1932# `self._fl_num_item_insts` with those immediate label values.
1933# _gen_fl_num_item_insts() will deal with this later.
2adf4336 1934#
320644e2
PP
1935# 2. Handle all the fixed-length number item instances of which the
1936# expression evaluation failed before.
2adf4336 1937#
320644e2
PP
1938# At this point, `self._fl_num_item_insts` contains everything that's
1939# needed to evaluate the expressions, including the values of
1940# "future" labels from the point of view of some fixed-length number
1941# item instance.
2adf4336 1942#
f5dcb24c
PP
1943# If an evaluation fails at this point, then it's a user error. Add
1944# to the parsing error all the saved parsing error messages of the
1945# instance. Those additional messages add precious context to the
1946# error.
71aaa3f7
PP
1947class _Gen:
1948 def __init__(
1949 self,
1950 group: _Group,
320644e2 1951 macro_defs: _MacroDefsT,
e57a18e1
PP
1952 variables: VariablesT,
1953 labels: LabelsT,
71aaa3f7
PP
1954 offset: int,
1955 bo: Optional[ByteOrder],
1956 ):
320644e2
PP
1957 self._macro_defs = macro_defs
1958 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
f5dcb24c 1959 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
2adf4336 1960 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1961
1962 # Generated bytes.
1963 @property
1964 def data(self):
1965 return self._data
1966
1967 # Updated variables.
1968 @property
1969 def variables(self):
2adf4336 1970 return self._final_state.variables
71aaa3f7
PP
1971
1972 # Updated main group labels.
1973 @property
1974 def labels(self):
2adf4336 1975 return self._final_state.labels
71aaa3f7
PP
1976
1977 # Updated offset.
1978 @property
1979 def offset(self):
2adf4336 1980 return self._final_state.offset
71aaa3f7
PP
1981
1982 # Updated byte order.
1983 @property
1984 def bo(self):
2adf4336
PP
1985 return self._final_state.bo
1986
320644e2
PP
1987 # Evaluates the expression `expr` of which the original string is
1988 # `expr_str` at the location `text_loc` considering the current
2adf4336
PP
1989 # generation state `state`.
1990 #
269f6eb3
PP
1991 # If `allow_float` is `True`, then the type of the result may be
1992 # `float` too.
2adf4336 1993 @staticmethod
320644e2
PP
1994 def _eval_expr(
1995 expr_str: str,
1996 expr: ast.Expression,
1997 text_loc: TextLocation,
269f6eb3 1998 state: _GenState,
269f6eb3
PP
1999 allow_float: bool = False,
2000 ):
e57a18e1
PP
2001 syms = {} # type: VariablesT
2002 syms.update(state.labels)
71aaa3f7 2003
e57a18e1
PP
2004 # Set the `ICITTE` name to the current offset
2005 syms[_icitte_name] = state.offset
71aaa3f7
PP
2006
2007 # Add the current variables
2adf4336 2008 syms.update(state.variables)
71aaa3f7
PP
2009
2010 # Validate the node and its children
320644e2 2011 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
71aaa3f7
PP
2012
2013 # Compile and evaluate expression node
2014 try:
320644e2 2015 val = eval(compile(expr, "", "eval"), None, syms)
71aaa3f7 2016 except Exception as exc:
320644e2
PP
2017 _raise_error(
2018 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2019 text_loc,
71aaa3f7
PP
2020 )
2021
27d52a19
PP
2022 # Convert `bool` result type to `int` to normalize
2023 if type(val) is bool:
2024 val = int(val)
2025
269f6eb3
PP
2026 # Validate result type
2027 expected_types = {int} # type: Set[type]
2028 type_msg = "`int`"
2029
2030 if allow_float:
2031 expected_types.add(float)
2032 type_msg += " or `float`"
2033
2034 if type(val) not in expected_types:
320644e2 2035 _raise_error(
269f6eb3 2036 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
320644e2 2037 expr_str, type_msg, type(val).__name__
71aaa3f7 2038 ),
320644e2 2039 text_loc,
71aaa3f7
PP
2040 )
2041
2042 return val
2043
320644e2
PP
2044 # Evaluates the expression of `item` considering the current
2045 # generation state `state`.
2046 #
2047 # If `allow_float` is `True`, then the type of the result may be
2048 # `float` too.
2049 @staticmethod
2050 def _eval_item_expr(
25ca454b 2051 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
320644e2
PP
2052 state: _GenState,
2053 allow_float: bool = False,
2054 ):
2055 return _Gen._eval_expr(
2056 item.expr_str, item.expr, item.text_loc, state, allow_float
2057 )
2058
2059 # Handles the byte item `item`.
2060 def _handle_byte_item(self, item: _Byte, state: _GenState):
2061 self._data.append(item.val)
2062 state.offset += item.size
2063
2064 # Handles the string item `item`.
2065 def _handle_str_item(self, item: _Str, state: _GenState):
2066 self._data += item.data
2067 state.offset += item.size
2068
2069 # Handles the byte order setting item `item`.
2070 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2071 # Update current byte order
2072 state.bo = item.bo
2073
2074 # Handles the variable assignment item `item`.
2075 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2076 # Update variable
2077 state.variables[item.name] = self._eval_item_expr(item, state, True)
2078
2079 # Handles the fixed-length number item `item`.
2080 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2081 # Validate current byte order
2082 if state.bo is None and item.len > 8:
2083 _raise_error_for_item(
2084 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2085 item.expr_str
2086 ),
2087 item,
2088 )
2089
2090 # Try an immediate evaluation. If it fails, then keep everything
2091 # needed to (try to) generate the bytes of this item later.
2092 try:
2093 data = self._gen_fl_num_item_inst_data(item, state)
2094 except Exception:
2095 self._fl_num_item_insts.append(
f5dcb24c
PP
2096 _FlNumItemInst(
2097 item,
2098 len(self._data),
2099 copy.deepcopy(state),
2100 copy.deepcopy(self._parse_error_msgs),
2101 )
320644e2
PP
2102 )
2103
2104 # Reserve space in `self._data` for this instance
2105 data = bytes([0] * (item.len // 8))
2106
2107 # Append bytes
2108 self._data += data
2109
2110 # Update offset
2111 state.offset += len(data)
2112
05f81895
PP
2113 # Returns the size, in bytes, required to encode the value `val`
2114 # with LEB128 (signed version if `is_signed` is `True`).
2115 @staticmethod
2116 def _leb128_size_for_val(val: int, is_signed: bool):
2117 if val < 0:
2118 # Equivalent upper bound.
2119 #
2120 # For example, if `val` is -128, then the full integer for
2121 # this number of bits would be [-128, 127].
2122 val = -val - 1
2123
2124 # Number of bits (add one for the sign if needed)
2125 bits = val.bit_length() + int(is_signed)
2126
2127 if bits == 0:
2128 bits = 1
2129
2130 # Seven bits per byte
2131 return math.ceil(bits / 7)
2132
320644e2
PP
2133 # Handles the LEB128 integer item `item`.
2134 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2135 # Compute value
2136 val = self._eval_item_expr(item, state, False)
676f6189 2137
320644e2
PP
2138 # Size in bytes
2139 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
05f81895 2140
320644e2
PP
2141 # For each byte
2142 for _ in range(size):
2143 # Seven LSBs, MSB of the byte set (continue)
2144 self._data.append((val & 0x7F) | 0x80)
2145 val >>= 7
2adf4336 2146
320644e2
PP
2147 # Clear MSB of last byte (stop)
2148 self._data[-1] &= ~0x80
2adf4336 2149
320644e2
PP
2150 # Update offset
2151 state.offset += size
27d52a19 2152
320644e2
PP
2153 # Handles the group item `item`, removing the immediate labels from
2154 # `state` at the end if `remove_immediate_labels` is `True`.
2155 def _handle_group_item(
2156 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2157 ):
2158 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2159 immediate_labels = {} # type: LabelsT
27d52a19 2160
320644e2
PP
2161 # Handle each item
2162 for subitem in item.items:
2163 if type(subitem) is _Label:
2164 # Add to local immediate labels
2165 immediate_labels[subitem.name] = state.offset
2adf4336 2166
320644e2 2167 self._handle_item(subitem, state)
2adf4336 2168
320644e2
PP
2169 # Remove immediate labels from current state if needed
2170 if remove_immediate_labels:
2171 for name in immediate_labels:
2172 del state.labels[name]
2adf4336 2173
320644e2
PP
2174 # Add all immediate labels to all state snapshots since
2175 # `first_fl_num_item_inst_index`.
2176 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2177 inst.state.labels.update(immediate_labels)
2adf4336 2178
320644e2
PP
2179 # Handles the repetition item `item`.
2180 def _handle_rep_item(self, item: _Rep, state: _GenState):
2181 # Compute the repetition count
2182 mul = _Gen._eval_item_expr(item, state)
05f81895 2183
320644e2
PP
2184 # Validate result
2185 if mul < 0:
2186 _raise_error_for_item(
2187 "Invalid expression `{}`: unexpected negative result {:,}".format(
2188 item.expr_str, mul
2189 ),
2190 item,
2191 )
2adf4336 2192
320644e2
PP
2193 # Generate item data `mul` times
2194 for _ in range(mul):
2195 self._handle_item(item.item, state)
2adf4336 2196
320644e2 2197 # Handles the conditional item `item`.
12b5dbc0 2198 def _handle_cond_item(self, item: _Cond, state: _GenState):
320644e2
PP
2199 # Compute the conditional value
2200 val = _Gen._eval_item_expr(item, state)
2adf4336 2201
320644e2
PP
2202 # Generate item data if needed
2203 if val:
12b5dbc0
PP
2204 self._handle_item(item.true_item, state)
2205 else:
2206 self._handle_item(item.false_item, state)
2adf4336 2207
320644e2
PP
2208 # Evaluates the parameters of the macro expansion item `item`
2209 # considering the initial state `init_state` and returns a new state
2210 # to handle the items of the macro.
2211 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2212 # New state
2213 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2adf4336 2214
320644e2
PP
2215 # Evaluate the parameter expressions
2216 macro_def = self._macro_defs[item.name]
2adf4336 2217
320644e2
PP
2218 for param_name, param in zip(macro_def.param_names, item.params):
2219 exp_state.variables[param_name] = _Gen._eval_expr(
2220 param.expr_str, param.expr, param.text_loc, init_state, True
2221 )
2adf4336 2222
320644e2 2223 return exp_state
2adf4336 2224
320644e2
PP
2225 # Handles the macro expansion item `item`.
2226 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
f5dcb24c 2227 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
27d52a19 2228
f5dcb24c
PP
2229 try:
2230 # New state
2231 exp_state = self._eval_macro_exp_params(item, state)
2232
2233 # Process the contained group
2234 init_data_size = len(self._data)
2235 parse_error_msg = (
2236 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2237 parse_error_msg_text, item.text_loc
2238 )
2239 )
2240 self._parse_error_msgs.append(parse_error_msg)
2241 self._handle_item(self._macro_defs[item.name].group, exp_state)
2242 self._parse_error_msgs.pop()
2243 except ParseError as exc:
2244 _augment_error(exc, parse_error_msg_text, item.text_loc)
27d52a19 2245
320644e2
PP
2246 # Update state offset and return
2247 state.offset += len(self._data) - init_data_size
676f6189 2248
320644e2
PP
2249 # Handles the offset setting item `item`.
2250 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
676f6189 2251 state.offset = item.val
2adf4336 2252
25ca454b 2253 # Handles the offset alignment item `item` (adds padding).
320644e2
PP
2254 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2255 init_offset = state.offset
2256 align_bytes = item.val // 8
2257 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2258 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2adf4336 2259
25ca454b
PP
2260 # Handles the filling item `item` (adds padding).
2261 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2262 # Compute the new offset
2263 new_offset = _Gen._eval_item_expr(item, state)
2264
2265 # Validate the new offset
2266 if new_offset < state.offset:
2267 _raise_error_for_item(
2268 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2269 item.expr_str, new_offset, state.offset
2270 ),
2271 item,
2272 )
2273
2274 # Fill
2275 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2276
2277 # Update offset
2278 state.offset = new_offset
2279
320644e2
PP
2280 # Handles the label item `item`.
2281 def _handle_label_item(self, item: _Label, state: _GenState):
2282 state.labels[item.name] = state.offset
2adf4336 2283
320644e2
PP
2284 # Handles the item `item`, returning the updated next repetition
2285 # instance.
2286 def _handle_item(self, item: _Item, state: _GenState):
2287 return self._item_handlers[type(item)](item, state)
71aaa3f7 2288
320644e2
PP
2289 # Generates the data for a fixed-length integer item instance having
2290 # the value `val` and returns it.
2291 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
71aaa3f7
PP
2292 # Validate range
2293 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2294 _raise_error_for_item(
320644e2
PP
2295 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2296 val, item.len, item.expr_str
71aaa3f7
PP
2297 ),
2298 item,
2299 )
2300
2301 # Encode result on 64 bits (to extend the sign bit whatever the
2302 # value of `item.len`).
71aaa3f7
PP
2303 data = struct.pack(
2304 "{}{}".format(
2adf4336 2305 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
2306 "Q" if val >= 0 else "q",
2307 ),
2308 val,
2309 )
2310
2311 # Keep only the requested length
2312 len_bytes = item.len // 8
2313
2adf4336 2314 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
2315 # Big endian: keep last bytes
2316 data = data[-len_bytes:]
2317 else:
2318 # Little endian: keep first bytes
2adf4336 2319 assert state.bo == ByteOrder.LE
71aaa3f7
PP
2320 data = data[:len_bytes]
2321
320644e2
PP
2322 # Return data
2323 return data
269f6eb3 2324
320644e2
PP
2325 # Generates the data for a fixed-length floating point number item
2326 # instance having the value `val` and returns it.
2327 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
269f6eb3
PP
2328 # Validate length
2329 if item.len not in (32, 64):
2330 _raise_error_for_item(
2331 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2332 item.len, val
2333 ),
2334 item,
2335 )
2336
320644e2
PP
2337 # Encode and return result
2338 return struct.pack(
269f6eb3
PP
2339 "{}{}".format(
2340 ">" if state.bo in (None, ByteOrder.BE) else "<",
2341 "f" if item.len == 32 else "d",
2342 ),
2343 val,
2344 )
2345
320644e2
PP
2346 # Generates the data for a fixed-length number item instance and
2347 # returns it.
2348 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
269f6eb3 2349 # Compute value
e57a18e1 2350 val = self._eval_item_expr(item, state, True)
269f6eb3 2351
269f6eb3
PP
2352 # Handle depending on type
2353 if type(val) is int:
320644e2 2354 return self._gen_fl_int_item_inst_data(val, item, state)
269f6eb3
PP
2355 else:
2356 assert type(val) is float
320644e2 2357 return self._gen_fl_float_item_inst_data(val, item, state)
05f81895 2358
320644e2
PP
2359 # Generates the data for all the fixed-length number item instances
2360 # and writes it at the correct offset within `self._data`.
2361 def _gen_fl_num_item_insts(self):
2362 for inst in self._fl_num_item_insts:
2363 # Generate bytes
f5dcb24c
PP
2364 try:
2365 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2366 except ParseError as exc:
2367 # Add all the saved parse error messages for this
2368 # instance.
2369 for msg in reversed(inst.parse_error_msgs):
2370 _add_error_msg(exc, msg.text, msg.text_location)
2371
2372 raise
05f81895 2373
320644e2
PP
2374 # Insert bytes into `self._data`
2375 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2adf4336
PP
2376
2377 # Generates the data (`self._data`) and final state
2378 # (`self._final_state`) from `group` and the initial state `state`.
2379 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
2380 # Initial state
2381 self._data = bytearray()
71aaa3f7
PP
2382
2383 # Item handlers
2384 self._item_handlers = {
676f6189 2385 _AlignOffset: self._handle_align_offset_item,
71aaa3f7 2386 _Byte: self._handle_byte_item,
27d52a19 2387 _Cond: self._handle_cond_item,
25ca454b 2388 _FillUntil: self._handle_fill_until_item,
269f6eb3 2389 _FlNum: self._handle_fl_num_item,
71aaa3f7 2390 _Group: self._handle_group_item,
2adf4336 2391 _Label: self._handle_label_item,
320644e2 2392 _MacroExp: self._handle_macro_exp_item,
71aaa3f7 2393 _Rep: self._handle_rep_item,
2adf4336
PP
2394 _SetBo: self._handle_set_bo_item,
2395 _SetOffset: self._handle_set_offset_item,
05f81895 2396 _SLeb128Int: self._handle_leb128_int_item,
2adf4336 2397 _Str: self._handle_str_item,
05f81895 2398 _ULeb128Int: self._handle_leb128_int_item,
2adf4336 2399 _VarAssign: self._handle_var_assign_item,
320644e2 2400 } # type: Dict[type, Callable[[Any, _GenState], None]]
2adf4336
PP
2401
2402 # Handle the group item, _not_ removing the immediate labels
2403 # because the `labels` property offers them.
320644e2 2404 self._handle_group_item(group, state, False)
2adf4336
PP
2405
2406 # This is actually the final state
2407 self._final_state = state
71aaa3f7 2408
320644e2
PP
2409 # Generate all the fixed-length number bytes now that we know
2410 # their full state
2411 self._gen_fl_num_item_insts()
2412
71aaa3f7
PP
2413
2414# Returns a `ParseResult` instance containing the bytes encoded by the
2415# input string `normand`.
2416#
2417# `init_variables` is a dictionary of initial variable names (valid
2418# Python names) to integral values. A variable name must not be the
2419# reserved name `ICITTE`.
2420#
2421# `init_labels` is a dictionary of initial label names (valid Python
2422# names) to integral values. A label name must not be the reserved name
2423# `ICITTE`.
2424#
2425# `init_offset` is the initial offset.
2426#
2427# `init_byte_order` is the initial byte order.
2428#
2429# Raises `ParseError` on any parsing error.
2430def parse(
2431 normand: str,
e57a18e1
PP
2432 init_variables: Optional[VariablesT] = None,
2433 init_labels: Optional[LabelsT] = None,
71aaa3f7
PP
2434 init_offset: int = 0,
2435 init_byte_order: Optional[ByteOrder] = None,
2436):
2437 if init_variables is None:
2438 init_variables = {}
2439
2440 if init_labels is None:
2441 init_labels = {}
2442
320644e2 2443 parser = _Parser(normand, init_variables, init_labels)
71aaa3f7 2444 gen = _Gen(
320644e2
PP
2445 parser.res,
2446 parser.macro_defs,
71aaa3f7
PP
2447 init_variables,
2448 init_labels,
2449 init_offset,
2450 init_byte_order,
2451 )
2452 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2453 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2454 )
2455
2456
f5dcb24c
PP
2457# Raises a command-line error with the message `msg`.
2458def _raise_cli_error(msg: str) -> NoReturn:
2459 raise RuntimeError("Command-line error: {}".format(msg))
2460
2461
2462# Returns a dictionary of string to integers from the list of strings
2463# `args` containing `NAME=VAL` entries.
2464def _dict_from_arg(args: Optional[List[str]]):
2465 d = {} # type: LabelsT
2466
2467 if args is None:
2468 return d
2469
2470 for arg in args:
2471 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2472
2473 if m is None:
2474 _raise_cli_error("Invalid assignment {}".format(arg))
2475
2476 d[m.group(1)] = int(m.group(2))
2477
2478 return d
2479
2480
2481# Parses the command-line arguments and returns, in this order:
2482#
2483# 1. The input file path, or `None` if none.
2484# 2. The Normand input text.
2485# 3. The initial offset.
2486# 4. The initial byte order.
2487# 5. The initial variables.
2488# 6. The initial labels.
71aaa3f7
PP
2489def _parse_cli_args():
2490 import argparse
2491
2492 # Build parser
2493 ap = argparse.ArgumentParser()
2494 ap.add_argument(
2495 "--offset",
2496 metavar="OFFSET",
2497 action="store",
2498 type=int,
2499 default=0,
2500 help="initial offset (positive)",
2501 )
2502 ap.add_argument(
2503 "-b",
2504 "--byte-order",
2505 metavar="BO",
2506 choices=["be", "le"],
2507 type=str,
2508 help="initial byte order (`be` or `le`)",
2509 )
2510 ap.add_argument(
2511 "--var",
2512 metavar="NAME=VAL",
2513 action="append",
2514 help="add an initial variable (may be repeated)",
2515 )
2516 ap.add_argument(
2517 "-l",
2518 "--label",
2519 metavar="NAME=VAL",
2520 action="append",
2521 help="add an initial label (may be repeated)",
2522 )
2523 ap.add_argument(
2524 "--version", action="version", version="Normand {}".format(__version__)
2525 )
2526 ap.add_argument(
2527 "path",
2528 metavar="PATH",
2529 action="store",
2530 nargs="?",
2531 help="input path (none means standard input)",
2532 )
2533
2534 # Parse
f5dcb24c 2535 args = ap.parse_args()
71aaa3f7
PP
2536
2537 # Read input
2538 if args.path is None:
2539 normand = sys.stdin.read()
2540 else:
2541 with open(args.path) as f:
2542 normand = f.read()
2543
2544 # Variables and labels
e57a18e1 2545 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
71aaa3f7
PP
2546 labels = _dict_from_arg(args.label)
2547
2548 # Validate offset
2549 if args.offset < 0:
2550 _raise_cli_error("Invalid negative offset {}")
2551
2552 # Validate and set byte order
2553 bo = None # type: Optional[ByteOrder]
2554
2555 if args.byte_order is not None:
2556 if args.byte_order == "be":
2557 bo = ByteOrder.BE
2558 else:
2559 assert args.byte_order == "le"
2560 bo = ByteOrder.LE
2561
f5dcb24c
PP
2562 # Return input and initial state
2563 return args.path, normand, args.offset, bo, variables, labels
71aaa3f7 2564
71aaa3f7 2565
f5dcb24c
PP
2566# CLI entry point without exception handling.
2567def _run_cli_with_args(
2568 normand: str,
2569 offset: int,
2570 bo: Optional[ByteOrder],
2571 variables: VariablesT,
2572 labels: LabelsT,
2573):
2574 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
71aaa3f7
PP
2575
2576
2577# Prints the exception message `msg` and exits with status 1.
2578def _fail(msg: str) -> NoReturn:
2579 if not msg.endswith("."):
2580 msg += "."
2581
f5dcb24c 2582 print(msg.strip(), file=sys.stderr)
71aaa3f7
PP
2583 sys.exit(1)
2584
2585
2586# CLI entry point.
2587def _run_cli():
2588 try:
f5dcb24c
PP
2589 args = _parse_cli_args()
2590 except Exception as exc:
2591 _fail(str(exc))
2592
2593 try:
2594 _run_cli_with_args(*args[1:])
2595 except ParseError as exc:
2596 import os.path
2597
2598 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2599 fail_msg = ""
2600
2601 for msg in reversed(exc.messages):
2602 fail_msg += "{}{}:{} - {}".format(
2603 prefix,
2604 msg.text_location.line_no,
2605 msg.text_location.col_no,
2606 msg.text,
2607 )
2608
2609 if fail_msg[-1] not in ".:;":
2610 fail_msg += "."
2611
2612 fail_msg += "\n"
2613
2614 _fail(fail_msg.strip())
71aaa3f7
PP
2615 except Exception as exc:
2616 _fail(str(exc))
2617
2618
2619if __name__ == "__main__":
2620 _run_cli()
This page took 0.186217 seconds and 4 git commands to generate.