Add variable repetition count support
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
2adf4336 33__version__ = "0.3.0"
71aaa3f7
PP
34__all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "VarsT",
41 "__author__",
42 "__version__",
43]
44
45import re
46import abc
47import ast
48import sys
49import enum
50import struct
2adf4336
PP
51from typing import (
52 Any,
53 Set,
54 Dict,
55 List,
56 Tuple,
57 Union,
58 Pattern,
59 Callable,
60 NoReturn,
61 Optional,
62)
71aaa3f7
PP
63
64
65# Text location (line and column numbers).
66class TextLoc:
67 @classmethod
68 def _create(cls, line_no: int, col_no: int):
69 self = cls.__new__(cls)
70 self._init(line_no, col_no)
71 return self
72
73 def __init__(*args, **kwargs): # type: ignore
74 raise NotImplementedError
75
76 def _init(self, line_no: int, col_no: int):
77 self._line_no = line_no
78 self._col_no = col_no
79
80 # Line number.
81 @property
82 def line_no(self):
83 return self._line_no
84
85 # Column number.
86 @property
87 def col_no(self):
88 return self._col_no
89
2adf4336
PP
90 def __repr__(self):
91 return "TextLoc({}, {})".format(self._line_no, self._col_no)
92
71aaa3f7
PP
93
94# Any item.
95class _Item:
96 def __init__(self, text_loc: TextLoc):
97 self._text_loc = text_loc
98
99 # Source text location.
100 @property
101 def text_loc(self):
102 return self._text_loc
103
2adf4336
PP
104
105# Scalar item.
106class _ScalarItem(_Item):
71aaa3f7
PP
107 # Returns the size, in bytes, of this item.
108 @property
109 @abc.abstractmethod
110 def size(self) -> int:
111 ...
112
113
114# A repeatable item.
2adf4336 115class _RepableItem:
71aaa3f7
PP
116 pass
117
118
119# Single byte.
2adf4336 120class _Byte(_ScalarItem, _RepableItem):
71aaa3f7
PP
121 def __init__(self, val: int, text_loc: TextLoc):
122 super().__init__(text_loc)
123 self._val = val
124
125 # Byte value.
126 @property
127 def val(self):
128 return self._val
129
130 @property
131 def size(self):
132 return 1
133
134 def __repr__(self):
135 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
136
137
138# String.
2adf4336 139class _Str(_ScalarItem, _RepableItem):
71aaa3f7
PP
140 def __init__(self, data: bytes, text_loc: TextLoc):
141 super().__init__(text_loc)
142 self._data = data
143
144 # Encoded bytes.
145 @property
146 def data(self):
147 return self._data
148
149 @property
150 def size(self):
151 return len(self._data)
152
153 def __repr__(self):
154 return "_Str({}, {})".format(repr(self._data), self._text_loc)
155
156
157# Byte order.
158@enum.unique
159class ByteOrder(enum.Enum):
160 # Big endian.
161 BE = "be"
162
163 # Little endian.
164 LE = "le"
165
166
2adf4336
PP
167# Byte order setting.
168class _SetBo(_Item):
0e8e3169
PP
169 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
170 super().__init__(text_loc)
71aaa3f7
PP
171 self._bo = bo
172
173 @property
174 def bo(self):
175 return self._bo
176
2adf4336
PP
177 def __repr__(self):
178 return "_SetBo({}, {})".format(repr(self._bo), self._text_loc)
71aaa3f7
PP
179
180
181# Label.
182class _Label(_Item):
183 def __init__(self, name: str, text_loc: TextLoc):
184 super().__init__(text_loc)
185 self._name = name
186
187 # Label name.
188 @property
189 def name(self):
190 return self._name
191
71aaa3f7
PP
192 def __repr__(self):
193 return "_Label({}, {})".format(repr(self._name), self._text_loc)
194
195
2adf4336
PP
196# Offset setting.
197class _SetOffset(_Item):
71aaa3f7
PP
198 def __init__(self, val: int, text_loc: TextLoc):
199 super().__init__(text_loc)
200 self._val = val
201
202 # Offset value.
203 @property
204 def val(self):
205 return self._val
206
71aaa3f7 207 def __repr__(self):
2adf4336 208 return "_SetOffset({}, {})".format(repr(self._val), self._text_loc)
71aaa3f7
PP
209
210
211# Mixin of containing an AST expression and its string.
212class _ExprMixin:
213 def __init__(self, expr_str: str, expr: ast.Expression):
214 self._expr_str = expr_str
215 self._expr = expr
216
217 # Expression string.
218 @property
219 def expr_str(self):
220 return self._expr_str
221
222 # Expression node to evaluate.
223 @property
224 def expr(self):
225 return self._expr
226
227
2adf4336
PP
228# Variable assignment.
229class _VarAssign(_Item, _ExprMixin):
71aaa3f7
PP
230 def __init__(
231 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
232 ):
233 super().__init__(text_loc)
234 _ExprMixin.__init__(self, expr_str, expr)
235 self._name = name
236
237 # Name.
238 @property
239 def name(self):
240 return self._name
241
71aaa3f7 242 def __repr__(self):
2adf4336 243 return "_VarAssign({}, {}, {}, {})".format(
71aaa3f7
PP
244 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
245 )
246
247
248# Value, possibly needing more than one byte.
2adf4336 249class _Val(_ScalarItem, _RepableItem, _ExprMixin):
71aaa3f7
PP
250 def __init__(
251 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
252 ):
253 super().__init__(text_loc)
254 _ExprMixin.__init__(self, expr_str, expr)
255 self._len = len
256
257 # Length (bits).
258 @property
259 def len(self):
260 return self._len
261
262 @property
263 def size(self):
264 return self._len // 8
265
266 def __repr__(self):
267 return "_Val({}, {}, {}, {})".format(
268 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
269 )
270
271
71aaa3f7 272# Group of items.
2adf4336 273class _Group(_Item, _RepableItem):
71aaa3f7
PP
274 def __init__(self, items: List[_Item], text_loc: TextLoc):
275 super().__init__(text_loc)
276 self._items = items
71aaa3f7
PP
277
278 # Contained items.
279 @property
280 def items(self):
281 return self._items
282
71aaa3f7
PP
283 def __repr__(self):
284 return "_Group({}, {})".format(repr(self._items), self._text_loc)
285
286
287# Repetition item.
2adf4336
PP
288class _Rep(_Item, _ExprMixin):
289 def __init__(
290 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLoc
291 ):
71aaa3f7 292 super().__init__(text_loc)
2adf4336 293 _ExprMixin.__init__(self, expr_str, expr)
71aaa3f7 294 self._item = item
71aaa3f7
PP
295
296 # Item to repeat.
297 @property
298 def item(self):
299 return self._item
300
71aaa3f7 301 def __repr__(self):
2adf4336
PP
302 return "_Rep({}, {}, {}, {})".format(
303 repr(self._item), repr(self._expr_str), repr(self._expr), self._text_loc
71aaa3f7
PP
304 )
305
306
2adf4336
PP
307# Expression item type.
308_ExprItemT = Union[_Val, _VarAssign, _Rep]
309
310
71aaa3f7
PP
311# A parsing error containing a message and a text location.
312class ParseError(RuntimeError):
313 @classmethod
314 def _create(cls, msg: str, text_loc: TextLoc):
315 self = cls.__new__(cls)
316 self._init(msg, text_loc)
317 return self
318
319 def __init__(self, *args, **kwargs): # type: ignore
320 raise NotImplementedError
321
322 def _init(self, msg: str, text_loc: TextLoc):
323 super().__init__(msg)
324 self._text_loc = text_loc
325
326 # Source text location.
327 @property
328 def text_loc(self):
329 return self._text_loc
330
331
332# Raises a parsing error, forwarding the parameters to the constructor.
333def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
334 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
335
336
2adf4336 337# Variable/label dictionary type.
71aaa3f7
PP
338VarsT = Dict[str, int]
339
340
341# Python name pattern.
342_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
343
344
345# Normand parser.
346#
347# The constructor accepts a Normand input. After building, use the `res`
348# property to get the resulting main group.
349class _Parser:
350 # Builds a parser to parse the Normand input `normand`, parsing
351 # immediately.
352 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
353 self._normand = normand
354 self._at = 0
355 self._line_no = 1
356 self._col_no = 1
357 self._label_names = set(labels.keys())
358 self._var_names = set(variables.keys())
359 self._parse()
360
361 # Result (main group).
362 @property
363 def res(self):
364 return self._res
365
366 # Current text location.
367 @property
368 def _text_loc(self):
369 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
370 self._line_no, self._col_no
371 )
372
373 # Returns `True` if this parser is done parsing.
374 def _is_done(self):
375 return self._at == len(self._normand)
376
377 # Returns `True` if this parser isn't done parsing.
378 def _isnt_done(self):
379 return not self._is_done()
380
381 # Raises a parse error, creating it using the message `msg` and the
382 # current text location.
383 def _raise_error(self, msg: str) -> NoReturn:
384 _raise_error(msg, self._text_loc)
385
386 # Tries to make the pattern `pat` match the current substring,
387 # returning the match object and updating `self._at`,
388 # `self._line_no`, and `self._col_no` on success.
389 def _try_parse_pat(self, pat: Pattern[str]):
390 m = pat.match(self._normand, self._at)
391
392 if m is None:
393 return
394
395 # Skip matched string
396 self._at += len(m.group(0))
397
398 # Update line number
399 self._line_no += m.group(0).count("\n")
400
401 # Update column number
402 for i in reversed(range(self._at)):
403 if self._normand[i] == "\n" or i == 0:
404 if i == 0:
405 self._col_no = self._at + 1
406 else:
407 self._col_no = self._at - i
408
409 break
410
411 # Return match object
412 return m
413
414 # Expects the pattern `pat` to match the current substring,
415 # returning the match object and updating `self._at`,
416 # `self._line_no`, and `self._col_no` on success, or raising a parse
417 # error with the message `error_msg` on error.
418 def _expect_pat(self, pat: Pattern[str], error_msg: str):
419 # Match
420 m = self._try_parse_pat(pat)
421
422 if m is None:
423 # No match: error
424 self._raise_error(error_msg)
425
426 # Return match object
427 return m
428
429 # Pattern for _skip_ws_and_comments()
430 _ws_or_syms_or_comments_pat = re.compile(
431 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
432 )
433
434 # Skips as many whitespaces, insignificant symbol characters, and
435 # comments as possible.
436 def _skip_ws_and_comments(self):
437 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
438
439 # Pattern for _try_parse_hex_byte()
440 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
441
442 # Tries to parse a hexadecimal byte, returning a byte item on
443 # success.
444 def _try_parse_hex_byte(self):
0e8e3169
PP
445 begin_text_loc = self._text_loc
446
71aaa3f7
PP
447 # Match initial nibble
448 m_high = self._try_parse_pat(self._nibble_pat)
449
450 if m_high is None:
451 # No match
452 return
453
454 # Expect another nibble
455 self._skip_ws_and_comments()
456 m_low = self._expect_pat(
457 self._nibble_pat, "Expecting another hexadecimal nibble"
458 )
459
460 # Return item
0e8e3169 461 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
462
463 # Patterns for _try_parse_bin_byte()
464 _bin_byte_bit_pat = re.compile(r"[01]")
465 _bin_byte_prefix_pat = re.compile(r"%")
466
467 # Tries to parse a binary byte, returning a byte item on success.
468 def _try_parse_bin_byte(self):
0e8e3169
PP
469 begin_text_loc = self._text_loc
470
71aaa3f7
PP
471 # Match prefix
472 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
473 # No match
474 return
475
476 # Expect eight bits
477 bits = [] # type: List[str]
478
479 for _ in range(8):
480 self._skip_ws_and_comments()
481 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
482 bits.append(m.group(0))
483
484 # Return item
0e8e3169 485 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
486
487 # Patterns for _try_parse_dec_byte()
488 _dec_byte_prefix_pat = re.compile(r"\$\s*")
489 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
490
491 # Tries to parse a decimal byte, returning a byte item on success.
492 def _try_parse_dec_byte(self):
0e8e3169
PP
493 begin_text_loc = self._text_loc
494
71aaa3f7
PP
495 # Match prefix
496 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
497 # No match
498 return
499
500 # Expect the value
501 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
502
503 # Compute value
504 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
505
506 # Validate
507 if val < -128 or val > 255:
0e8e3169 508 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
509
510 # Two's complement
511 val = val % 256
512
513 # Return item
0e8e3169 514 return _Byte(val, begin_text_loc)
71aaa3f7
PP
515
516 # Tries to parse a byte, returning a byte item on success.
517 def _try_parse_byte(self):
518 # Hexadecimal
519 item = self._try_parse_hex_byte()
520
521 if item is not None:
522 return item
523
524 # Binary
525 item = self._try_parse_bin_byte()
526
527 if item is not None:
528 return item
529
530 # Decimal
531 item = self._try_parse_dec_byte()
532
533 if item is not None:
534 return item
535
536 # Patterns for _try_parse_str()
537 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
538 _str_suffix_pat = re.compile(r'"')
539 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
540
541 # Strings corresponding to escape sequence characters
542 _str_escape_seq_strs = {
543 "0": "\0",
544 "a": "\a",
545 "b": "\b",
546 "e": "\x1b",
547 "f": "\f",
548 "n": "\n",
549 "r": "\r",
550 "t": "\t",
551 "v": "\v",
552 "\\": "\\",
553 '"': '"',
554 }
555
556 # Tries to parse a string, returning a string item on success.
557 def _try_parse_str(self):
0e8e3169
PP
558 begin_text_loc = self._text_loc
559
71aaa3f7
PP
560 # Match prefix
561 m = self._try_parse_pat(self._str_prefix_pat)
562
563 if m is None:
564 # No match
565 return
566
567 # Get encoding
568 encoding = "utf8"
569
570 if m.group("len") is not None:
571 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
572
573 # Actual string
574 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
575
576 # Expect end of string
577 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
578
579 # Replace escape sequences
580 val = m.group(0)
581
582 for ec in '0abefnrtv"\\':
583 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
584
585 # Encode
586 data = val.encode(encoding)
587
588 # Return item
0e8e3169 589 return _Str(data, begin_text_loc)
71aaa3f7
PP
590
591 # Patterns for _try_parse_group()
592 _group_prefix_pat = re.compile(r"\(")
593 _group_suffix_pat = re.compile(r"\)")
594
595 # Tries to parse a group, returning a group item on success.
596 def _try_parse_group(self):
0e8e3169
PP
597 begin_text_loc = self._text_loc
598
71aaa3f7
PP
599 # Match prefix
600 if self._try_parse_pat(self._group_prefix_pat) is None:
601 # No match
602 return
603
604 # Parse items
605 items = self._parse_items()
606
607 # Expect end of group
608 self._skip_ws_and_comments()
609 self._expect_pat(
610 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
611 )
612
613 # Return item
0e8e3169 614 return _Group(items, begin_text_loc)
71aaa3f7
PP
615
616 # Returns a stripped expression string and an AST expression node
617 # from the expression string `expr_str` at text location `text_loc`.
618 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
619 # Create an expression node from the expression string
620 expr_str = expr_str.strip().replace("\n", " ")
621
622 try:
623 expr = ast.parse(expr_str, mode="eval")
624 except SyntaxError:
625 _raise_error(
626 "Invalid expression `{}`: invalid syntax".format(expr_str),
627 text_loc,
628 )
629
630 return expr_str, expr
631
632 # Patterns for _try_parse_val_and_len()
633 _val_expr_pat = re.compile(r"([^}:]+):")
634 _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)")
635
636 # Tries to parse a value and length, returning a value item on
637 # success.
638 def _try_parse_val_and_len(self):
639 begin_text_loc = self._text_loc
640
641 # Match
642 m_expr = self._try_parse_pat(self._val_expr_pat)
643
644 if m_expr is None:
645 # No match
646 return
647
648 # Expect a length
649 m_len = self._expect_pat(
650 self._val_len_pat, "Expecting a length (multiple of eight bits)"
651 )
652
653 # Create an expression node from the expression string
654 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
655
656 # Return item
657 return _Val(
658 expr_str,
659 expr,
660 int(m_len.group(1)),
0e8e3169 661 begin_text_loc,
71aaa3f7
PP
662 )
663
664 # Patterns for _try_parse_val_and_len()
2adf4336 665 _var_assign_pat = re.compile(
71aaa3f7
PP
666 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
667 )
668
2adf4336
PP
669 # Tries to parse a variable assignment, returning a variable
670 # assignment item on success.
671 def _try_parse_var_assign(self):
71aaa3f7
PP
672 begin_text_loc = self._text_loc
673
674 # Match
2adf4336 675 m = self._try_parse_pat(self._var_assign_pat)
71aaa3f7
PP
676
677 if m is None:
678 # No match
679 return
680
681 # Validate name
682 name = m.group("name")
683
684 if name == _icitte_name:
0e8e3169
PP
685 _raise_error(
686 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
687 )
71aaa3f7
PP
688
689 if name in self._label_names:
0e8e3169 690 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
691
692 # Add to known variable names
693 self._var_names.add(name)
694
695 # Create an expression node from the expression string
696 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
697
698 # Return item
2adf4336 699 return _VarAssign(
71aaa3f7
PP
700 name,
701 expr_str,
702 expr,
0e8e3169 703 begin_text_loc,
71aaa3f7
PP
704 )
705
2adf4336 706 # Pattern for _try_parse_set_bo()
71aaa3f7
PP
707 _bo_pat = re.compile(r"[bl]e")
708
2adf4336
PP
709 # Tries to parse a byte order name, returning a byte order setting
710 # item on success.
711 def _try_parse_set_bo(self):
0e8e3169
PP
712 begin_text_loc = self._text_loc
713
71aaa3f7
PP
714 # Match
715 m = self._try_parse_pat(self._bo_pat)
716
717 if m is None:
718 # No match
719 return
720
721 # Return corresponding item
722 if m.group(0) == "be":
2adf4336 723 return _SetBo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
724 else:
725 assert m.group(0) == "le"
2adf4336 726 return _SetBo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
727
728 # Patterns for _try_parse_val_or_bo()
2adf4336
PP
729 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
730 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
71aaa3f7 731
2adf4336
PP
732 # Tries to parse a value, a variable assignment, or a byte order
733 # setting, returning an item on success.
734 def _try_parse_val_or_var_assign_or_set_bo(self):
71aaa3f7 735 # Match prefix
2adf4336 736 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
71aaa3f7
PP
737 # No match
738 return
739
2adf4336
PP
740 # Variable assignment item?
741 item = self._try_parse_var_assign()
71aaa3f7
PP
742
743 if item is None:
744 # Value item?
745 item = self._try_parse_val_and_len()
746
747 if item is None:
2adf4336
PP
748 # Byte order setting item?
749 item = self._try_parse_set_bo()
71aaa3f7
PP
750
751 if item is None:
752 # At this point it's invalid
2adf4336
PP
753 self._raise_error(
754 "Expecting a value, a variable assignment, or a byte order setting"
755 )
71aaa3f7
PP
756
757 # Expect suffix
2adf4336 758 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
71aaa3f7
PP
759 return item
760
2adf4336 761 # Pattern for _try_parse_set_offset_val() and _try_parse_rep()
71aaa3f7
PP
762 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
763
2adf4336
PP
764 # Tries to parse an offset setting value (after the initial `<`),
765 # returning an offset item on success.
766 def _try_parse_set_offset_val(self):
0e8e3169
PP
767 begin_text_loc = self._text_loc
768
71aaa3f7
PP
769 # Match
770 m = self._try_parse_pat(self._pos_const_int_pat)
771
772 if m is None:
773 # No match
774 return
775
776 # Return item
2adf4336 777 return _SetOffset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
778
779 # Tries to parse a label name (after the initial `<`), returning a
780 # label item on success.
781 def _try_parse_label_name(self):
0e8e3169
PP
782 begin_text_loc = self._text_loc
783
71aaa3f7
PP
784 # Match
785 m = self._try_parse_pat(_py_name_pat)
786
787 if m is None:
788 # No match
789 return
790
791 # Validate
792 name = m.group(0)
793
794 if name == _icitte_name:
0e8e3169
PP
795 _raise_error(
796 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
797 )
71aaa3f7
PP
798
799 if name in self._label_names:
0e8e3169 800 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
801
802 if name in self._var_names:
0e8e3169 803 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
804
805 # Add to known label names
806 self._label_names.add(name)
807
808 # Return item
0e8e3169 809 return _Label(name, begin_text_loc)
71aaa3f7 810
2adf4336
PP
811 # Patterns for _try_parse_label_or_set_offset()
812 _label_set_offset_prefix_pat = re.compile(r"<\s*")
813 _label_set_offset_suffix_pat = re.compile(r"\s*>")
71aaa3f7 814
2adf4336
PP
815 # Tries to parse a label or an offset setting, returning an item on
816 # success.
817 def _try_parse_label_or_set_offset(self):
71aaa3f7 818 # Match prefix
2adf4336 819 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
71aaa3f7
PP
820 # No match
821 return
822
2adf4336
PP
823 # Offset setting item?
824 item = self._try_parse_set_offset_val()
71aaa3f7
PP
825
826 if item is None:
827 # Label item?
828 item = self._try_parse_label_name()
829
830 if item is None:
831 # At this point it's invalid
2adf4336 832 self._raise_error("Expecting a label name or an offset setting value")
71aaa3f7
PP
833
834 # Expect suffix
2adf4336 835 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
71aaa3f7
PP
836 return item
837
838 # Tries to parse a base item (anything except a repetition),
839 # returning it on success.
840 def _try_parse_base_item(self):
841 # Byte item?
842 item = self._try_parse_byte()
843
844 if item is not None:
845 return item
846
847 # String item?
848 item = self._try_parse_str()
849
850 if item is not None:
851 return item
852
2adf4336
PP
853 # Value, variable assignment, or byte order setting item?
854 item = self._try_parse_val_or_var_assign_or_set_bo()
71aaa3f7
PP
855
856 if item is not None:
857 return item
858
2adf4336
PP
859 # Label or offset setting item?
860 item = self._try_parse_label_or_set_offset()
71aaa3f7
PP
861
862 if item is not None:
863 return item
864
865 # Group item?
866 item = self._try_parse_group()
867
868 if item is not None:
869 return item
870
871 # Pattern for _try_parse_rep()
872 _rep_prefix_pat = re.compile(r"\*\s*")
2adf4336
PP
873 _rep_expr_prefix_pat = re.compile(r"\{")
874 _rep_expr_pat = re.compile(r"[^}p]+")
875 _rep_expr_suffix_pat = re.compile(r"\}")
71aaa3f7 876
2adf4336
PP
877 # Tries to parse a repetition, returning the expression string and
878 # AST expression node on success.
71aaa3f7 879 def _try_parse_rep(self):
71aaa3f7
PP
880 # Match prefix
881 if self._try_parse_pat(self._rep_prefix_pat) is None:
882 # No match
2adf4336 883 return
71aaa3f7
PP
884
885 # Expect and return a decimal multiplier
886 self._skip_ws_and_comments()
2adf4336
PP
887
888 # Integer?
889 m = self._try_parse_pat(self._pos_const_int_pat)
890
891 if m is None:
892 # Expression?
893 if self._try_parse_pat(self._rep_expr_prefix_pat) is None:
894 # At this point it's invalid
895 self._raise_error("Expecting a positive integral multiplier or `{`")
896
897 # Expect an expression
898 expr_str_loc = self._text_loc
899 m = self._expect_pat(self._rep_expr_pat, "Expecting an expression")
900 expr_str = self._ast_expr_from_str(m.group(0), expr_str_loc)
901
902 # Expect `}`
903 self._expect_pat(self._rep_expr_suffix_pat, "Expecting `}`")
904 expr_str = m.group(0)
905 else:
906 expr_str_loc = self._text_loc
907 expr_str = m.group(0)
908
909 return self._ast_expr_from_str(expr_str, expr_str_loc)
71aaa3f7 910
1ca7b5e1
PP
911 # Tries to parse an item, possibly followed by a repetition,
912 # returning `True` on success.
913 #
914 # Appends any parsed item to `items`.
915 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
916 self._skip_ws_and_comments()
917
918 # Parse a base item
919 item = self._try_parse_base_item()
920
921 if item is None:
922 # No item
1ca7b5e1 923 return False
71aaa3f7
PP
924
925 # Parse repetition if the base item is repeatable
926 if isinstance(item, _RepableItem):
0e8e3169
PP
927 self._skip_ws_and_comments()
928 rep_text_loc = self._text_loc
2adf4336 929 rep_ret = self._try_parse_rep()
71aaa3f7 930
2adf4336
PP
931 if rep_ret is not None:
932 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
71aaa3f7 933
1ca7b5e1
PP
934 items.append(item)
935 return True
71aaa3f7
PP
936
937 # Parses and returns items, skipping whitespaces, insignificant
938 # symbols, and comments when allowed, and stopping at the first
939 # unknown character.
940 def _parse_items(self) -> List[_Item]:
941 items = [] # type: List[_Item]
942
943 while self._isnt_done():
1ca7b5e1
PP
944 # Try to append item
945 if not self._try_append_item(items):
946 # Unknown at this point
947 break
71aaa3f7
PP
948
949 return items
950
951 # Parses the whole Normand input, setting `self._res` to the main
952 # group item on success.
953 def _parse(self):
954 if len(self._normand.strip()) == 0:
955 # Special case to make sure there's something to consume
956 self._res = _Group([], self._text_loc)
957 return
958
959 # Parse first level items
960 items = self._parse_items()
961
962 # Make sure there's nothing left
963 self._skip_ws_and_comments()
964
965 if self._isnt_done():
966 self._raise_error(
967 "Unexpected character `{}`".format(self._normand[self._at])
968 )
969
970 # Set main group item
971 self._res = _Group(items, self._text_loc)
972
973
974# The return type of parse().
975class ParseResult:
976 @classmethod
977 def _create(
978 cls,
979 data: bytearray,
980 variables: VarsT,
981 labels: VarsT,
982 offset: int,
983 bo: Optional[ByteOrder],
984 ):
985 self = cls.__new__(cls)
986 self._init(data, variables, labels, offset, bo)
987 return self
988
989 def __init__(self, *args, **kwargs): # type: ignore
990 raise NotImplementedError
991
992 def _init(
993 self,
994 data: bytearray,
995 variables: VarsT,
996 labels: VarsT,
997 offset: int,
998 bo: Optional[ByteOrder],
999 ):
1000 self._data = data
1001 self._vars = variables
1002 self._labels = labels
1003 self._offset = offset
1004 self._bo = bo
1005
1006 # Generated data.
1007 @property
1008 def data(self):
1009 return self._data
1010
1011 # Dictionary of updated variable names to their last computed value.
1012 @property
1013 def variables(self):
1014 return self._vars
1015
1016 # Dictionary of updated main group label names to their computed
1017 # value.
1018 @property
1019 def labels(self):
1020 return self._labels
1021
1022 # Updated offset.
1023 @property
1024 def offset(self):
1025 return self._offset
1026
1027 # Updated byte order.
1028 @property
1029 def byte_order(self):
1030 return self._bo
1031
1032
1033# Raises a parse error for the item `item`, creating it using the
1034# message `msg`.
1035def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1036 _raise_error(msg, item.text_loc)
1037
1038
1039# The `ICITTE` reserved name.
1040_icitte_name = "ICITTE"
1041
1042
2adf4336
PP
1043# Base node visitor.
1044#
1045# Calls the _visit_name() method for each name node which isn't the name
1046# of a call.
1047class _NodeVisitor(ast.NodeVisitor):
1048 def __init__(self):
71aaa3f7
PP
1049 self._parent_is_call = False
1050
1051 def generic_visit(self, node: ast.AST):
1052 if type(node) is ast.Call:
1053 self._parent_is_call = True
1054 elif type(node) is ast.Name and not self._parent_is_call:
2adf4336 1055 self._visit_name(node.id)
71aaa3f7
PP
1056
1057 super().generic_visit(node)
1058 self._parent_is_call = False
1059
2adf4336
PP
1060 @abc.abstractmethod
1061 def _visit_name(self, name: str):
1062 ...
1063
71aaa3f7 1064
2adf4336
PP
1065# Expression validator: validates that all the names within the
1066# expression are allowed.
1067class _ExprValidator(_NodeVisitor):
1068 def __init__(self, item: _ExprItemT, allowed_names: Set[str], icitte_allowed: bool):
1069 super().__init__()
1070 self._item = item
1071 self._allowed_names = allowed_names
1072 self._icitte_allowed = icitte_allowed
1073
1074 def _visit_name(self, name: str):
1075 # Make sure the name refers to a known and reachable
1076 # variable/label name.
1077 if name == _icitte_name and not self._icitte_allowed:
1078 _raise_error(
1079 "Illegal reserved name `{}` in expression `{}`".format(
1080 _icitte_name, self._item.expr_str
1081 ),
1082 self._item.text_loc,
1083 )
1084 elif name != _icitte_name and name not in self._allowed_names:
1085 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1086 name, self._item.expr_str
1087 )
1088
1089 if len(self._allowed_names) > 0:
1090 allowed_names = self._allowed_names.copy()
1091
1092 if self._icitte_allowed:
1093 allowed_names.add(_icitte_name)
1094
1095 allowed_names_str = ", ".join(
1096 sorted(["`{}`".format(name) for name in allowed_names])
1097 )
1098 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1099
1100 _raise_error(
1101 msg,
1102 self._item.text_loc,
1103 )
1104
1105
1106# Expression visitor getting all the contained names.
1107class _ExprNamesVisitor(_NodeVisitor):
71aaa3f7 1108 def __init__(self):
2adf4336
PP
1109 self._parent_is_call = False
1110 self._names = set() # type: Set[str]
1111
1112 @property
1113 def names(self):
1114 return self._names
71aaa3f7 1115
2adf4336
PP
1116 def _visit_name(self, name: str):
1117 self._names.add(name)
71aaa3f7 1118
71aaa3f7 1119
2adf4336
PP
1120# Generator state.
1121class _GenState:
1122 def __init__(
1123 self, variables: VarsT, labels: VarsT, offset: int, bo: Optional[ByteOrder]
1124 ):
1125 self.variables = variables.copy()
1126 self.labels = labels.copy()
1127 self.offset = offset
1128 self.bo = bo
71aaa3f7
PP
1129
1130
2adf4336 1131# Generator of data and final state from a group item.
71aaa3f7
PP
1132#
1133# Generation happens in memory at construction time. After building, use
1134# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1135# get the resulting context.
2adf4336
PP
1136#
1137# The steps of generation are:
1138#
1139# 1. Validate that each repetition expression uses only reachable names
1140# and not `ICITTE`.
1141#
1142# 2. Compute and keep the effective repetition count for each repetition
1143# instance.
1144#
1145# 3. Generate bytes, updating the initial state as it goes which becomes
1146# the final state after the operation.
1147#
1148# During the generation, when handling a `_Rep` item, we already have
1149# the effective repetition count of the instance.
1150#
1151# When handling a `_Group` item, first update the current labels with
1152# all the immediate (not nested) labels, and then handle each
1153# contained item. This gives contained item access to "future" outer
1154# labels. Then remove the immediate labels from the state so that
1155# outer items don't have access to inner labels.
71aaa3f7
PP
1156class _Gen:
1157 def __init__(
1158 self,
1159 group: _Group,
1160 variables: VarsT,
1161 labels: VarsT,
1162 offset: int,
1163 bo: Optional[ByteOrder],
1164 ):
2adf4336
PP
1165 self._validate_rep_exprs(group, set(variables.keys()), set(labels.keys()))
1166 self._rep_instance_vals = self._compute_rep_instance_vals(
1167 group, _GenState(variables, labels, offset, bo)
1168 )
1169 self._gen(group, _GenState(variables, labels, offset, bo))
71aaa3f7
PP
1170
1171 # Generated bytes.
1172 @property
1173 def data(self):
1174 return self._data
1175
1176 # Updated variables.
1177 @property
1178 def variables(self):
2adf4336 1179 return self._final_state.variables
71aaa3f7
PP
1180
1181 # Updated main group labels.
1182 @property
1183 def labels(self):
2adf4336 1184 return self._final_state.labels
71aaa3f7
PP
1185
1186 # Updated offset.
1187 @property
1188 def offset(self):
2adf4336 1189 return self._final_state.offset
71aaa3f7
PP
1190
1191 # Updated byte order.
1192 @property
1193 def bo(self):
2adf4336
PP
1194 return self._final_state.bo
1195
1196 # Returns the set of used, non-called names within the AST
1197 # expression `expr`.
1198 @staticmethod
1199 def _names_of_expr(expr: ast.Expression):
1200 visitor = _ExprNamesVisitor()
1201 visitor.visit(expr)
1202 return visitor.names
1203
1204 # Validates that all the repetition expressions within `group` don't
1205 # refer, directly or indirectly, to subsequent labels.
71aaa3f7 1206 #
2adf4336
PP
1207 # The strategy here is to keep a set of allowed label names, per
1208 # group, initialized to `allowed_label_names`, and a set of allowed
1209 # variable names initialized to `allowed_variable_names`.
1210 #
1211 # Then, depending on the type of `item`:
1212 #
1213 # `_Label`:
1214 # Add its name to the local allowed label names: a label
1215 # occurring before a repetition, and not within a nested group,
1216 # is always reachable.
1217 #
1218 # `_VarAssign`:
1219 # If all the names within its expression are allowed, then add
1220 # its name to the allowed variable names.
1221 #
1222 # Otherwise, remove its name from the allowed variable names (if
1223 # it's in there): a variable which refers to an unreachable name
1224 # is unreachable itself.
1225 #
1226 # `_Rep`:
1227 # Make sure all the names within its expression are allowed.
1228 #
1229 # `_Group`:
1230 # Call this function for each contained item with a _copy_ of
1231 # the current allowed label names and the same current allowed
1232 # variable names.
1233 @staticmethod
1234 def _validate_rep_exprs(
1235 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1236 ):
1237 if type(item) is _Label:
1238 allowed_label_names.add(item.name)
1239 elif type(item) is _VarAssign:
1240 # Check if this variable name is allowed
1241 allowed = True
1242
1243 for name in _Gen._names_of_expr(item.expr):
1244 if name not in (
1245 allowed_label_names | allowed_variable_names | {_icitte_name}
1246 ):
1247 # Not allowed
1248 allowed = False
1249 break
1250
1251 if allowed:
1252 allowed_variable_names.add(item.name)
1253 elif item.name in allowed_variable_names:
1254 allowed_variable_names.remove(item.name)
71aaa3f7 1255 elif type(item) is _Rep:
2adf4336
PP
1256 # Validate the expression first
1257 _ExprValidator(
1258 item, allowed_label_names | allowed_variable_names, False
1259 ).visit(item.expr)
1260
1261 # Validate inner item
1262 _Gen._validate_rep_exprs(
1263 item.item, allowed_variable_names, allowed_label_names
1264 )
1265 elif type(item) is _Group:
1266 # Copy `allowed_label_names` so that this frame cannot
1267 # access the nested label names.
1268 group_allowed_label_names = allowed_label_names.copy()
71aaa3f7 1269
2adf4336
PP
1270 for subitem in item.items:
1271 _Gen._validate_rep_exprs(
1272 subitem, allowed_variable_names, group_allowed_label_names
1273 )
71aaa3f7 1274
2adf4336
PP
1275 # Evaluates the expression of `item` considering the current
1276 # generation state `state`.
1277 #
1278 # If `allow_icitte` is `True`, then the `ICITTE` name is available
1279 # for the expression to evaluate.
1280 @staticmethod
1281 def _eval_item_expr(item: _ExprItemT, state: _GenState, allow_icitte: bool):
1282 syms = state.labels.copy()
71aaa3f7 1283
2adf4336
PP
1284 # Set the `ICITTE` name to the current offset, if any
1285 if allow_icitte:
1286 syms[_icitte_name] = state.offset
71aaa3f7
PP
1287
1288 # Add the current variables
2adf4336 1289 syms.update(state.variables)
71aaa3f7
PP
1290
1291 # Validate the node and its children
2adf4336 1292 _ExprValidator(item, set(syms.keys()), True).visit(item.expr)
71aaa3f7
PP
1293
1294 # Compile and evaluate expression node
1295 try:
1296 val = eval(compile(item.expr, "", "eval"), None, syms)
1297 except Exception as exc:
1298 _raise_error_for_item(
1299 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1300 item,
1301 )
1302
1303 # Validate result
1304 if type(val) is not int:
1305 _raise_error_for_item(
2adf4336 1306 "Invalid expression `{}`: expecting result type `int`, not `{}`".format(
71aaa3f7
PP
1307 item.expr_str, type(val).__name__
1308 ),
1309 item,
1310 )
1311
1312 return val
1313
2adf4336
PP
1314 # Computes the effective value (multiplier) for each repetition
1315 # instance, filling `instance_vals` (if not `None`) and returning
1316 # `instance_vals`.
1317 #
1318 # At this point it must be known that, for a given repetition, its
1319 # expression only contains reachable names.
1320 #
1321 # When handling a `_Rep` item, this function appends its effective
1322 # multiplier to `instance_vals` _before_ handling its repeated item.
1323 #
1324 # When handling a `_VarAssign` item, this function only evaluates it if
1325 # all its names are reachable.
1326 @staticmethod
1327 def _compute_rep_instance_vals(
1328 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1329 ):
1330 if instance_vals is None:
1331 instance_vals = []
1332
1333 if isinstance(item, _ScalarItem):
1334 state.offset += item.size
1335 elif type(item) is _Label:
1336 state.labels[item.name] = state.offset
1337 elif type(item) is _VarAssign:
1338 # Check if all the names are reachable
1339 do_eval = True
1340
1341 for name in _Gen._names_of_expr(item.expr):
1342 if (
1343 name != _icitte_name
1344 and name not in state.variables
1345 and name not in state.labels
1346 ):
1347 # A name is unknown: cannot evaluate
1348 do_eval = False
1349 break
1350
1351 if do_eval:
1352 # Evaluate the expression and keep the result
1353 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1354 elif type(item) is _SetOffset:
1355 state.offset = item.val
1356 elif type(item) is _Rep:
1357 # Evaluate the expression and keep the result
1358 val = _Gen._eval_item_expr(item, state, False)
1359
1360 # Validate result
1361 if val < 0:
1362 _raise_error_for_item(
1363 "Invalid expression `{}`: unexpected negative result {:,}".format(
1364 item.expr_str, val
1365 ),
1366 item,
1367 )
1368
1369 # Add to repetition instance values
1370 instance_vals.append(val)
1371
1372 # Process the repeated item `val` times
1373 for _ in range(val):
1374 _Gen._compute_rep_instance_vals(item.item, state, instance_vals)
1375 elif type(item) is _Group:
1376 prev_labels = state.labels.copy()
1377
1378 # Process each item
1379 for subitem in item.items:
1380 _Gen._compute_rep_instance_vals(subitem, state, instance_vals)
1381
1382 state.labels = prev_labels
1383
1384 return instance_vals
1385
1386 def _zero_item_size(self, item: _Item, next_rep_instance: int):
1387 return 0, next_rep_instance
1388
1389 def _scalar_item_size(self, item: _ScalarItem, next_rep_instance: int):
1390 return item.size, next_rep_instance
1391
1392 def _group_item_size(self, item: _Group, next_rep_instance: int):
1393 size = 0
1394
1395 for subitem in item.items:
1396 subitem_size, next_rep_instance = self._item_size(
1397 subitem, next_rep_instance
1398 )
1399 size += subitem_size
1400
1401 return size, next_rep_instance
1402
1403 def _rep_item_size(self, item: _Rep, next_rep_instance: int):
1404 # Get the value from `self._rep_instance_vals` _before_
1405 # incrementing `next_rep_instance` to honor the order of
1406 # _compute_rep_instance_vals().
1407 mul = self._rep_instance_vals[next_rep_instance]
1408 next_rep_instance += 1
1409 size = 0
1410
1411 for _ in range(mul):
1412 iter_size, next_rep_instance = self._item_size(item.item, next_rep_instance)
1413 size += iter_size
1414
1415 return size, next_rep_instance
1416
1417 # Returns the size of `item` and the new next repetition instance.
1418 def _item_size(self, item: _Item, next_rep_instance: int):
1419 return self._item_size_funcs[type(item)](item, next_rep_instance)
1420
1421 # Handles the byte item `item`.
1422 def _handle_byte_item(self, item: _Byte, state: _GenState, next_rep_instance: int):
1423 self._data.append(item.val)
1424 state.offset += item.size
1425 return next_rep_instance
1426
1427 # Handles the string item `item`.
1428 def _handle_str_item(self, item: _Str, state: _GenState, next_rep_instance: int):
1429 self._data += item.data
1430 state.offset += item.size
1431 return next_rep_instance
1432
1433 # Handles the byte order setting item `item`.
1434 def _handle_set_bo_item(
1435 self, item: _SetBo, state: _GenState, next_rep_instance: int
1436 ):
1437 # Update current byte order
1438 state.bo = item.bo
1439 return next_rep_instance
1440
1441 # Handles the variable assignment item `item`.
1442 def _handle_var_assign_item(
1443 self, item: _VarAssign, state: _GenState, next_rep_instance: int
1444 ):
71aaa3f7 1445 # Update variable
2adf4336
PP
1446 state.variables[item.name] = self._eval_item_expr(item, state, True)
1447 return next_rep_instance
71aaa3f7 1448
2adf4336
PP
1449 # Handles the value item `item`.
1450 def _handle_val_item(self, item: _Val, state: _GenState, next_rep_instance: int):
71aaa3f7 1451 # Compute value
2adf4336 1452 val = self._eval_item_expr(item, state, True)
71aaa3f7
PP
1453
1454 # Validate range
1455 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1456 _raise_error_for_item(
1457 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
2adf4336 1458 val, item.len, item.expr_str, state.offset
71aaa3f7
PP
1459 ),
1460 item,
1461 )
1462
1463 # Encode result on 64 bits (to extend the sign bit whatever the
1464 # value of `item.len`).
2adf4336 1465 if state.bo is None and item.len > 8:
71aaa3f7
PP
1466 _raise_error_for_item(
1467 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1468 item.expr_str
1469 ),
1470 item,
1471 )
1472
1473 data = struct.pack(
1474 "{}{}".format(
2adf4336 1475 ">" if state.bo in (None, ByteOrder.BE) else "<",
71aaa3f7
PP
1476 "Q" if val >= 0 else "q",
1477 ),
1478 val,
1479 )
1480
1481 # Keep only the requested length
1482 len_bytes = item.len // 8
1483
2adf4336 1484 if state.bo in (None, ByteOrder.BE):
71aaa3f7
PP
1485 # Big endian: keep last bytes
1486 data = data[-len_bytes:]
1487 else:
1488 # Little endian: keep first bytes
2adf4336 1489 assert state.bo == ByteOrder.LE
71aaa3f7
PP
1490 data = data[:len_bytes]
1491
1492 # Append to current bytes and update offset
1493 self._data += data
2adf4336
PP
1494 state.offset += len(data)
1495 return next_rep_instance
71aaa3f7 1496
2adf4336
PP
1497 # Handles the group item `item`, only removing the immediate labels
1498 # from `state.labels` if `remove_immediate_labels` is `True`.
1499 def _handle_group_item(
1500 self,
1501 item: _Group,
1502 state: _GenState,
1503 next_rep_instance: int,
1504 remove_immediate_labels: bool = True,
1505 ):
1506 # Compute the values of the immediate (not nested) labels. Those
1507 # labels are reachable by any expression within the group.
1508 offset = state.offset
1509 immediate_label_names = set() # type: Set[str]
1510 tmp_next_rep_instance = next_rep_instance
71aaa3f7 1511
2adf4336
PP
1512 for subitem in item.items:
1513 if type(subitem) is _SetOffset:
1514 # Update offset
1515 offset = subitem.val
1516 elif type(subitem) is _Label:
1517 # New immediate label
1518 state.labels[subitem.name] = offset
1519 immediate_label_names.add(subitem.name)
1520
1521 subitem_size, tmp_next_rep_instance = self._item_size(
1522 subitem, tmp_next_rep_instance
1523 )
1524 offset += subitem_size
71aaa3f7 1525
2adf4336 1526 # Handle each item now with the actual state
71aaa3f7 1527 for subitem in item.items:
2adf4336
PP
1528 next_rep_instance = self._handle_item(subitem, state, next_rep_instance)
1529
1530 # Remove immediate labels if required so that outer items won't
1531 # reach inner labels.
1532 if remove_immediate_labels:
1533 for name in immediate_label_names:
1534 del state.labels[name]
71aaa3f7 1535
2adf4336 1536 return next_rep_instance
71aaa3f7 1537
2adf4336
PP
1538 # Handles the repetition item `item`.
1539 def _handle_rep_item(self, item: _Rep, state: _GenState, next_rep_instance: int):
1540 mul = self._rep_instance_vals[next_rep_instance]
1541 next_rep_instance += 1
71aaa3f7 1542
2adf4336
PP
1543 for _ in range(mul):
1544 next_rep_instance = self._handle_item(item.item, state, next_rep_instance)
71aaa3f7 1545
2adf4336 1546 return next_rep_instance
71aaa3f7 1547
2adf4336
PP
1548 # Handles the offset setting item `item`.
1549 def _handle_set_offset_item(
1550 self, item: _SetOffset, state: _GenState, next_rep_instance: int
1551 ):
1552 state.offset = item.val
1553 return next_rep_instance
1554
1555 # Handles the label item `item`.
1556 def _handle_label_item(
1557 self, item: _Label, state: _GenState, next_rep_instance: int
1558 ):
1559 return next_rep_instance
1560
1561 # Handles the item `item`, returning the updated next repetition
1562 # instance.
1563 def _handle_item(self, item: _Item, state: _GenState, next_rep_instance: int):
1564 return self._item_handlers[type(item)](item, state, next_rep_instance)
1565
1566 # Generates the data (`self._data`) and final state
1567 # (`self._final_state`) from `group` and the initial state `state`.
1568 def _gen(self, group: _Group, state: _GenState):
71aaa3f7
PP
1569 # Initial state
1570 self._data = bytearray()
71aaa3f7
PP
1571
1572 # Item handlers
1573 self._item_handlers = {
1574 _Byte: self._handle_byte_item,
71aaa3f7 1575 _Group: self._handle_group_item,
2adf4336 1576 _Label: self._handle_label_item,
71aaa3f7 1577 _Rep: self._handle_rep_item,
2adf4336
PP
1578 _SetBo: self._handle_set_bo_item,
1579 _SetOffset: self._handle_set_offset_item,
1580 _Str: self._handle_str_item,
1581 _Val: self._handle_val_item,
1582 _VarAssign: self._handle_var_assign_item,
1583 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
1584
1585 # Item size getters
1586 self._item_size_funcs = {
1587 _Byte: self._scalar_item_size,
1588 _Group: self._group_item_size,
1589 _Label: self._zero_item_size,
1590 _Rep: self._rep_item_size,
1591 _SetBo: self._zero_item_size,
1592 _SetOffset: self._zero_item_size,
1593 _Str: self._scalar_item_size,
1594 _Val: self._scalar_item_size,
1595 _VarAssign: self._zero_item_size,
1596 } # type: Dict[type, Callable[[Any, int], Tuple[int, int]]]
1597
1598 # Handle the group item, _not_ removing the immediate labels
1599 # because the `labels` property offers them.
1600 self._handle_group_item(group, state, 0, False)
1601
1602 # This is actually the final state
1603 self._final_state = state
71aaa3f7
PP
1604
1605
1606# Returns a `ParseResult` instance containing the bytes encoded by the
1607# input string `normand`.
1608#
1609# `init_variables` is a dictionary of initial variable names (valid
1610# Python names) to integral values. A variable name must not be the
1611# reserved name `ICITTE`.
1612#
1613# `init_labels` is a dictionary of initial label names (valid Python
1614# names) to integral values. A label name must not be the reserved name
1615# `ICITTE`.
1616#
1617# `init_offset` is the initial offset.
1618#
1619# `init_byte_order` is the initial byte order.
1620#
1621# Raises `ParseError` on any parsing error.
1622def parse(
1623 normand: str,
1624 init_variables: Optional[VarsT] = None,
1625 init_labels: Optional[VarsT] = None,
1626 init_offset: int = 0,
1627 init_byte_order: Optional[ByteOrder] = None,
1628):
1629 if init_variables is None:
1630 init_variables = {}
1631
1632 if init_labels is None:
1633 init_labels = {}
1634
1635 gen = _Gen(
1636 _Parser(normand, init_variables, init_labels).res,
1637 init_variables,
1638 init_labels,
1639 init_offset,
1640 init_byte_order,
1641 )
1642 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1643 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1644 )
1645
1646
1647# Parses the command-line arguments.
1648def _parse_cli_args():
1649 import argparse
1650
1651 # Build parser
1652 ap = argparse.ArgumentParser()
1653 ap.add_argument(
1654 "--offset",
1655 metavar="OFFSET",
1656 action="store",
1657 type=int,
1658 default=0,
1659 help="initial offset (positive)",
1660 )
1661 ap.add_argument(
1662 "-b",
1663 "--byte-order",
1664 metavar="BO",
1665 choices=["be", "le"],
1666 type=str,
1667 help="initial byte order (`be` or `le`)",
1668 )
1669 ap.add_argument(
1670 "--var",
1671 metavar="NAME=VAL",
1672 action="append",
1673 help="add an initial variable (may be repeated)",
1674 )
1675 ap.add_argument(
1676 "-l",
1677 "--label",
1678 metavar="NAME=VAL",
1679 action="append",
1680 help="add an initial label (may be repeated)",
1681 )
1682 ap.add_argument(
1683 "--version", action="version", version="Normand {}".format(__version__)
1684 )
1685 ap.add_argument(
1686 "path",
1687 metavar="PATH",
1688 action="store",
1689 nargs="?",
1690 help="input path (none means standard input)",
1691 )
1692
1693 # Parse
1694 return ap.parse_args()
1695
1696
1697# Raises a command-line error with the message `msg`.
1698def _raise_cli_error(msg: str) -> NoReturn:
1699 raise RuntimeError("Command-line error: {}".format(msg))
1700
1701
1702# Returns a dictionary of string to integers from the list of strings
1703# `args` containing `NAME=VAL` entries.
1704def _dict_from_arg(args: Optional[List[str]]):
1705 d = {} # type: Dict[str, int]
1706
1707 if args is None:
1708 return d
1709
1710 for arg in args:
1711 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1712
1713 if m is None:
1714 _raise_cli_error("Invalid assignment {}".format(arg))
1715
2e1c1acd
PP
1716 d[m.group(1)] = int(m.group(2))
1717
71aaa3f7
PP
1718 return d
1719
1720
1721# CLI entry point without exception handling.
1722def _try_run_cli():
1723 import os.path
1724
1725 # Parse arguments
1726 args = _parse_cli_args()
1727
1728 # Read input
1729 if args.path is None:
1730 normand = sys.stdin.read()
1731 else:
1732 with open(args.path) as f:
1733 normand = f.read()
1734
1735 # Variables and labels
1736 variables = _dict_from_arg(args.var)
1737 labels = _dict_from_arg(args.label)
1738
1739 # Validate offset
1740 if args.offset < 0:
1741 _raise_cli_error("Invalid negative offset {}")
1742
1743 # Validate and set byte order
1744 bo = None # type: Optional[ByteOrder]
1745
1746 if args.byte_order is not None:
1747 if args.byte_order == "be":
1748 bo = ByteOrder.BE
1749 else:
1750 assert args.byte_order == "le"
1751 bo = ByteOrder.LE
1752
1753 # Parse
1754 try:
1755 res = parse(normand, variables, labels, args.offset, bo)
1756 except ParseError as exc:
1757 prefix = ""
1758
1759 if args.path is not None:
1760 prefix = "{}:".format(os.path.abspath(args.path))
1761
1762 _fail(
1763 "{}{}:{} - {}".format(
1764 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1765 )
1766 )
1767
1768 # Print
1769 sys.stdout.buffer.write(res.data)
1770
1771
1772# Prints the exception message `msg` and exits with status 1.
1773def _fail(msg: str) -> NoReturn:
1774 if not msg.endswith("."):
1775 msg += "."
1776
1777 print(msg, file=sys.stderr)
1778 sys.exit(1)
1779
1780
1781# CLI entry point.
1782def _run_cli():
1783 try:
1784 _try_run_cli()
1785 except Exception as exc:
1786 _fail(str(exc))
1787
1788
1789if __name__ == "__main__":
1790 _run_cli()
This page took 0.089365 seconds and 4 git commands to generate.