normand.py: add comment about the portable module
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.2.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "VarsT",
41 "__author__",
42 "__version__",
43 ]
44
45 import re
46 import abc
47 import ast
48 import sys
49 import enum
50 import struct
51 from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional
52
53
54 # Text location (line and column numbers).
55 class TextLoc:
56 @classmethod
57 def _create(cls, line_no: int, col_no: int):
58 self = cls.__new__(cls)
59 self._init(line_no, col_no)
60 return self
61
62 def __init__(*args, **kwargs): # type: ignore
63 raise NotImplementedError
64
65 def _init(self, line_no: int, col_no: int):
66 self._line_no = line_no
67 self._col_no = col_no
68
69 # Line number.
70 @property
71 def line_no(self):
72 return self._line_no
73
74 # Column number.
75 @property
76 def col_no(self):
77 return self._col_no
78
79
80 # Any item.
81 class _Item:
82 def __init__(self, text_loc: TextLoc):
83 self._text_loc = text_loc
84
85 # Source text location.
86 @property
87 def text_loc(self):
88 return self._text_loc
89
90 # Returns the size, in bytes, of this item.
91 @property
92 @abc.abstractmethod
93 def size(self) -> int:
94 ...
95
96
97 # A repeatable item.
98 class _RepableItem(_Item):
99 pass
100
101
102 # Single byte.
103 class _Byte(_RepableItem):
104 def __init__(self, val: int, text_loc: TextLoc):
105 super().__init__(text_loc)
106 self._val = val
107
108 # Byte value.
109 @property
110 def val(self):
111 return self._val
112
113 @property
114 def size(self):
115 return 1
116
117 def __repr__(self):
118 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
119
120
121 # String.
122 class _Str(_RepableItem):
123 def __init__(self, data: bytes, text_loc: TextLoc):
124 super().__init__(text_loc)
125 self._data = data
126
127 # Encoded bytes.
128 @property
129 def data(self):
130 return self._data
131
132 @property
133 def size(self):
134 return len(self._data)
135
136 def __repr__(self):
137 return "_Str({}, {})".format(repr(self._data), self._text_loc)
138
139
140 # Byte order.
141 @enum.unique
142 class ByteOrder(enum.Enum):
143 # Big endian.
144 BE = "be"
145
146 # Little endian.
147 LE = "le"
148
149
150 # Byte order.
151 class _Bo(_Item):
152 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
153 super().__init__(text_loc)
154 self._bo = bo
155
156 @property
157 def bo(self):
158 return self._bo
159
160 @property
161 def size(self):
162 return 0
163
164
165 # Label.
166 class _Label(_Item):
167 def __init__(self, name: str, text_loc: TextLoc):
168 super().__init__(text_loc)
169 self._name = name
170
171 # Label name.
172 @property
173 def name(self):
174 return self._name
175
176 @property
177 def size(self):
178 return 0
179
180 def __repr__(self):
181 return "_Label({}, {})".format(repr(self._name), self._text_loc)
182
183
184 # Offset.
185 class _Offset(_Item):
186 def __init__(self, val: int, text_loc: TextLoc):
187 super().__init__(text_loc)
188 self._val = val
189
190 # Offset value.
191 @property
192 def val(self):
193 return self._val
194
195 @property
196 def size(self):
197 return 0
198
199 def __repr__(self):
200 return "_Offset({}, {})".format(repr(self._val), self._text_loc)
201
202
203 # Mixin of containing an AST expression and its string.
204 class _ExprMixin:
205 def __init__(self, expr_str: str, expr: ast.Expression):
206 self._expr_str = expr_str
207 self._expr = expr
208
209 # Expression string.
210 @property
211 def expr_str(self):
212 return self._expr_str
213
214 # Expression node to evaluate.
215 @property
216 def expr(self):
217 return self._expr
218
219
220 # Variable.
221 class _Var(_Item, _ExprMixin):
222 def __init__(
223 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
224 ):
225 super().__init__(text_loc)
226 _ExprMixin.__init__(self, expr_str, expr)
227 self._name = name
228
229 # Name.
230 @property
231 def name(self):
232 return self._name
233
234 @property
235 def size(self):
236 return 0
237
238 def __repr__(self):
239 return "_Var({}, {}, {}, {})".format(
240 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
241 )
242
243
244 # Value, possibly needing more than one byte.
245 class _Val(_RepableItem, _ExprMixin):
246 def __init__(
247 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
248 ):
249 super().__init__(text_loc)
250 _ExprMixin.__init__(self, expr_str, expr)
251 self._len = len
252
253 # Length (bits).
254 @property
255 def len(self):
256 return self._len
257
258 @property
259 def size(self):
260 return self._len // 8
261
262 def __repr__(self):
263 return "_Val({}, {}, {}, {})".format(
264 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
265 )
266
267
268 # Expression item type.
269 _ExprItemT = Union[_Val, _Var]
270
271
272 # Group of items.
273 class _Group(_RepableItem):
274 def __init__(self, items: List[_Item], text_loc: TextLoc):
275 super().__init__(text_loc)
276 self._items = items
277 self._size = sum([item.size for item in self._items])
278
279 # Contained items.
280 @property
281 def items(self):
282 return self._items
283
284 @property
285 def size(self):
286 return self._size
287
288 def __repr__(self):
289 return "_Group({}, {})".format(repr(self._items), self._text_loc)
290
291
292 # Repetition item.
293 class _Rep(_Item):
294 def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc):
295 super().__init__(text_loc)
296 self._item = item
297 self._mul = mul
298
299 # Item to repeat.
300 @property
301 def item(self):
302 return self._item
303
304 # Repetition multiplier.
305 @property
306 def mul(self):
307 return self._mul
308
309 @property
310 def size(self):
311 return self._item.size * self._mul
312
313 def __repr__(self):
314 return "_Rep({}, {}, {})".format(
315 repr(self._item), repr(self._mul), self._text_loc
316 )
317
318
319 # A parsing error containing a message and a text location.
320 class ParseError(RuntimeError):
321 @classmethod
322 def _create(cls, msg: str, text_loc: TextLoc):
323 self = cls.__new__(cls)
324 self._init(msg, text_loc)
325 return self
326
327 def __init__(self, *args, **kwargs): # type: ignore
328 raise NotImplementedError
329
330 def _init(self, msg: str, text_loc: TextLoc):
331 super().__init__(msg)
332 self._text_loc = text_loc
333
334 # Source text location.
335 @property
336 def text_loc(self):
337 return self._text_loc
338
339
340 # Raises a parsing error, forwarding the parameters to the constructor.
341 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
342 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
343
344
345 # Variable (and label) dictionary type.
346 VarsT = Dict[str, int]
347
348
349 # Python name pattern.
350 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
351
352
353 # Normand parser.
354 #
355 # The constructor accepts a Normand input. After building, use the `res`
356 # property to get the resulting main group.
357 class _Parser:
358 # Builds a parser to parse the Normand input `normand`, parsing
359 # immediately.
360 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
361 self._normand = normand
362 self._at = 0
363 self._line_no = 1
364 self._col_no = 1
365 self._label_names = set(labels.keys())
366 self._var_names = set(variables.keys())
367 self._parse()
368
369 # Result (main group).
370 @property
371 def res(self):
372 return self._res
373
374 # Current text location.
375 @property
376 def _text_loc(self):
377 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
378 self._line_no, self._col_no
379 )
380
381 # Returns `True` if this parser is done parsing.
382 def _is_done(self):
383 return self._at == len(self._normand)
384
385 # Returns `True` if this parser isn't done parsing.
386 def _isnt_done(self):
387 return not self._is_done()
388
389 # Raises a parse error, creating it using the message `msg` and the
390 # current text location.
391 def _raise_error(self, msg: str) -> NoReturn:
392 _raise_error(msg, self._text_loc)
393
394 # Tries to make the pattern `pat` match the current substring,
395 # returning the match object and updating `self._at`,
396 # `self._line_no`, and `self._col_no` on success.
397 def _try_parse_pat(self, pat: Pattern[str]):
398 m = pat.match(self._normand, self._at)
399
400 if m is None:
401 return
402
403 # Skip matched string
404 self._at += len(m.group(0))
405
406 # Update line number
407 self._line_no += m.group(0).count("\n")
408
409 # Update column number
410 for i in reversed(range(self._at)):
411 if self._normand[i] == "\n" or i == 0:
412 if i == 0:
413 self._col_no = self._at + 1
414 else:
415 self._col_no = self._at - i
416
417 break
418
419 # Return match object
420 return m
421
422 # Expects the pattern `pat` to match the current substring,
423 # returning the match object and updating `self._at`,
424 # `self._line_no`, and `self._col_no` on success, or raising a parse
425 # error with the message `error_msg` on error.
426 def _expect_pat(self, pat: Pattern[str], error_msg: str):
427 # Match
428 m = self._try_parse_pat(pat)
429
430 if m is None:
431 # No match: error
432 self._raise_error(error_msg)
433
434 # Return match object
435 return m
436
437 # Pattern for _skip_ws_and_comments()
438 _ws_or_syms_or_comments_pat = re.compile(
439 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
440 )
441
442 # Skips as many whitespaces, insignificant symbol characters, and
443 # comments as possible.
444 def _skip_ws_and_comments(self):
445 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
446
447 # Pattern for _try_parse_hex_byte()
448 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
449
450 # Tries to parse a hexadecimal byte, returning a byte item on
451 # success.
452 def _try_parse_hex_byte(self):
453 begin_text_loc = self._text_loc
454
455 # Match initial nibble
456 m_high = self._try_parse_pat(self._nibble_pat)
457
458 if m_high is None:
459 # No match
460 return
461
462 # Expect another nibble
463 self._skip_ws_and_comments()
464 m_low = self._expect_pat(
465 self._nibble_pat, "Expecting another hexadecimal nibble"
466 )
467
468 # Return item
469 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
470
471 # Patterns for _try_parse_bin_byte()
472 _bin_byte_bit_pat = re.compile(r"[01]")
473 _bin_byte_prefix_pat = re.compile(r"%")
474
475 # Tries to parse a binary byte, returning a byte item on success.
476 def _try_parse_bin_byte(self):
477 begin_text_loc = self._text_loc
478
479 # Match prefix
480 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
481 # No match
482 return
483
484 # Expect eight bits
485 bits = [] # type: List[str]
486
487 for _ in range(8):
488 self._skip_ws_and_comments()
489 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
490 bits.append(m.group(0))
491
492 # Return item
493 return _Byte(int("".join(bits), 2), begin_text_loc)
494
495 # Patterns for _try_parse_dec_byte()
496 _dec_byte_prefix_pat = re.compile(r"\$\s*")
497 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
498
499 # Tries to parse a decimal byte, returning a byte item on success.
500 def _try_parse_dec_byte(self):
501 begin_text_loc = self._text_loc
502
503 # Match prefix
504 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
505 # No match
506 return
507
508 # Expect the value
509 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
510
511 # Compute value
512 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
513
514 # Validate
515 if val < -128 or val > 255:
516 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
517
518 # Two's complement
519 val = val % 256
520
521 # Return item
522 return _Byte(val, begin_text_loc)
523
524 # Tries to parse a byte, returning a byte item on success.
525 def _try_parse_byte(self):
526 # Hexadecimal
527 item = self._try_parse_hex_byte()
528
529 if item is not None:
530 return item
531
532 # Binary
533 item = self._try_parse_bin_byte()
534
535 if item is not None:
536 return item
537
538 # Decimal
539 item = self._try_parse_dec_byte()
540
541 if item is not None:
542 return item
543
544 # Patterns for _try_parse_str()
545 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
546 _str_suffix_pat = re.compile(r'"')
547 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
548
549 # Strings corresponding to escape sequence characters
550 _str_escape_seq_strs = {
551 "0": "\0",
552 "a": "\a",
553 "b": "\b",
554 "e": "\x1b",
555 "f": "\f",
556 "n": "\n",
557 "r": "\r",
558 "t": "\t",
559 "v": "\v",
560 "\\": "\\",
561 '"': '"',
562 }
563
564 # Tries to parse a string, returning a string item on success.
565 def _try_parse_str(self):
566 begin_text_loc = self._text_loc
567
568 # Match prefix
569 m = self._try_parse_pat(self._str_prefix_pat)
570
571 if m is None:
572 # No match
573 return
574
575 # Get encoding
576 encoding = "utf8"
577
578 if m.group("len") is not None:
579 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
580
581 # Actual string
582 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
583
584 # Expect end of string
585 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
586
587 # Replace escape sequences
588 val = m.group(0)
589
590 for ec in '0abefnrtv"\\':
591 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
592
593 # Encode
594 data = val.encode(encoding)
595
596 # Return item
597 return _Str(data, begin_text_loc)
598
599 # Patterns for _try_parse_group()
600 _group_prefix_pat = re.compile(r"\(")
601 _group_suffix_pat = re.compile(r"\)")
602
603 # Tries to parse a group, returning a group item on success.
604 def _try_parse_group(self):
605 begin_text_loc = self._text_loc
606
607 # Match prefix
608 if self._try_parse_pat(self._group_prefix_pat) is None:
609 # No match
610 return
611
612 # Parse items
613 items = self._parse_items()
614
615 # Expect end of group
616 self._skip_ws_and_comments()
617 self._expect_pat(
618 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
619 )
620
621 # Return item
622 return _Group(items, begin_text_loc)
623
624 # Returns a stripped expression string and an AST expression node
625 # from the expression string `expr_str` at text location `text_loc`.
626 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
627 # Create an expression node from the expression string
628 expr_str = expr_str.strip().replace("\n", " ")
629
630 try:
631 expr = ast.parse(expr_str, mode="eval")
632 except SyntaxError:
633 _raise_error(
634 "Invalid expression `{}`: invalid syntax".format(expr_str),
635 text_loc,
636 )
637
638 return expr_str, expr
639
640 # Patterns for _try_parse_val_and_len()
641 _val_expr_pat = re.compile(r"([^}:]+):")
642 _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)")
643
644 # Tries to parse a value and length, returning a value item on
645 # success.
646 def _try_parse_val_and_len(self):
647 begin_text_loc = self._text_loc
648
649 # Match
650 m_expr = self._try_parse_pat(self._val_expr_pat)
651
652 if m_expr is None:
653 # No match
654 return
655
656 # Expect a length
657 m_len = self._expect_pat(
658 self._val_len_pat, "Expecting a length (multiple of eight bits)"
659 )
660
661 # Create an expression node from the expression string
662 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
663
664 # Return item
665 return _Val(
666 expr_str,
667 expr,
668 int(m_len.group(1)),
669 begin_text_loc,
670 )
671
672 # Patterns for _try_parse_val_and_len()
673 _var_pat = re.compile(
674 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
675 )
676
677 # Tries to parse a variable, returning a variable item on success.
678 def _try_parse_var(self):
679 begin_text_loc = self._text_loc
680
681 # Match
682 m = self._try_parse_pat(self._var_pat)
683
684 if m is None:
685 # No match
686 return
687
688 # Validate name
689 name = m.group("name")
690
691 if name == _icitte_name:
692 _raise_error(
693 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
694 )
695
696 if name in self._label_names:
697 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
698
699 # Add to known variable names
700 self._var_names.add(name)
701
702 # Create an expression node from the expression string
703 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
704
705 # Return item
706 return _Var(
707 name,
708 expr_str,
709 expr,
710 begin_text_loc,
711 )
712
713 # Pattern for _try_parse_bo_name()
714 _bo_pat = re.compile(r"[bl]e")
715
716 # Tries to parse a byte order name, returning a byte order item on
717 # success.
718 def _try_parse_bo_name(self):
719 begin_text_loc = self._text_loc
720
721 # Match
722 m = self._try_parse_pat(self._bo_pat)
723
724 if m is None:
725 # No match
726 return
727
728 # Return corresponding item
729 if m.group(0) == "be":
730 return _Bo(ByteOrder.BE, begin_text_loc)
731 else:
732 assert m.group(0) == "le"
733 return _Bo(ByteOrder.LE, begin_text_loc)
734
735 # Patterns for _try_parse_val_or_bo()
736 _val_var_bo_prefix_pat = re.compile(r"\{\s*")
737 _val_var_bo_suffix_pat = re.compile(r"\s*}")
738
739 # Tries to parse a value, a variable, or a byte order, returning an
740 # item on success.
741 def _try_parse_val_or_var_or_bo(self):
742 # Match prefix
743 if self._try_parse_pat(self._val_var_bo_prefix_pat) is None:
744 # No match
745 return
746
747 # Variable item?
748 item = self._try_parse_var()
749
750 if item is None:
751 # Value item?
752 item = self._try_parse_val_and_len()
753
754 if item is None:
755 # Byte order item?
756 item = self._try_parse_bo_name()
757
758 if item is None:
759 # At this point it's invalid
760 self._raise_error("Expecting a value, a variable, or a byte order")
761
762 # Expect suffix
763 self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`")
764 return item
765
766 # Pattern for _try_parse_offset_val() and _try_parse_rep()
767 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
768
769 # Tries to parse an offset value (after the initial `<`), returning
770 # an offset item on success.
771 def _try_parse_offset_val(self):
772 begin_text_loc = self._text_loc
773
774 # Match
775 m = self._try_parse_pat(self._pos_const_int_pat)
776
777 if m is None:
778 # No match
779 return
780
781 # Return item
782 return _Offset(int(m.group(0), 0), begin_text_loc)
783
784 # Tries to parse a label name (after the initial `<`), returning a
785 # label item on success.
786 def _try_parse_label_name(self):
787 begin_text_loc = self._text_loc
788
789 # Match
790 m = self._try_parse_pat(_py_name_pat)
791
792 if m is None:
793 # No match
794 return
795
796 # Validate
797 name = m.group(0)
798
799 if name == _icitte_name:
800 _raise_error(
801 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
802 )
803
804 if name in self._label_names:
805 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
806
807 if name in self._var_names:
808 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
809
810 # Add to known label names
811 self._label_names.add(name)
812
813 # Return item
814 return _Label(name, begin_text_loc)
815
816 # Patterns for _try_parse_label_or_offset()
817 _label_offset_prefix_pat = re.compile(r"<\s*")
818 _label_offset_suffix_pat = re.compile(r"\s*>")
819
820 # Tries to parse a label or an offset, returning an item on success.
821 def _try_parse_label_or_offset(self):
822 # Match prefix
823 if self._try_parse_pat(self._label_offset_prefix_pat) is None:
824 # No match
825 return
826
827 # Offset item?
828 item = self._try_parse_offset_val()
829
830 if item is None:
831 # Label item?
832 item = self._try_parse_label_name()
833
834 if item is None:
835 # At this point it's invalid
836 self._raise_error("Expecting a label name or an offset value")
837
838 # Expect suffix
839 self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`")
840 return item
841
842 # Tries to parse a base item (anything except a repetition),
843 # returning it on success.
844 def _try_parse_base_item(self):
845 # Byte item?
846 item = self._try_parse_byte()
847
848 if item is not None:
849 return item
850
851 # String item?
852 item = self._try_parse_str()
853
854 if item is not None:
855 return item
856
857 # Value, variable, or byte order item?
858 item = self._try_parse_val_or_var_or_bo()
859
860 if item is not None:
861 return item
862
863 # Label or offset item?
864 item = self._try_parse_label_or_offset()
865
866 if item is not None:
867 return item
868
869 # Group item?
870 item = self._try_parse_group()
871
872 if item is not None:
873 return item
874
875 # Pattern for _try_parse_rep()
876 _rep_prefix_pat = re.compile(r"\*\s*")
877
878 # Tries to parse a repetition, returning the multiplier on success,
879 # or 1 otherwise.
880 def _try_parse_rep(self):
881 # Match prefix
882 if self._try_parse_pat(self._rep_prefix_pat) is None:
883 # No match
884 return 1
885
886 # Expect and return a decimal multiplier
887 self._skip_ws_and_comments()
888 m = self._expect_pat(
889 self._pos_const_int_pat, "Expecting a positive integral multiplier"
890 )
891 return int(m.group(0), 0)
892
893 # Tries to parse an item, possibly followed by a repetition,
894 # returning `True` on success.
895 #
896 # Appends any parsed item to `items`.
897 def _try_append_item(self, items: List[_Item]):
898 self._skip_ws_and_comments()
899
900 # Parse a base item
901 item = self._try_parse_base_item()
902
903 if item is None:
904 # No item
905 return False
906
907 # Parse repetition if the base item is repeatable
908 if isinstance(item, _RepableItem):
909 self._skip_ws_and_comments()
910 rep_text_loc = self._text_loc
911 rep = self._try_parse_rep()
912
913 if rep == 0:
914 # No item, but that's okay
915 return True
916 elif rep > 1:
917 # Convert to repetition item
918 item = _Rep(item, rep, rep_text_loc)
919
920 items.append(item)
921 return True
922
923 # Parses and returns items, skipping whitespaces, insignificant
924 # symbols, and comments when allowed, and stopping at the first
925 # unknown character.
926 def _parse_items(self) -> List[_Item]:
927 items = [] # type: List[_Item]
928
929 while self._isnt_done():
930 # Try to append item
931 if not self._try_append_item(items):
932 # Unknown at this point
933 break
934
935 return items
936
937 # Parses the whole Normand input, setting `self._res` to the main
938 # group item on success.
939 def _parse(self):
940 if len(self._normand.strip()) == 0:
941 # Special case to make sure there's something to consume
942 self._res = _Group([], self._text_loc)
943 return
944
945 # Parse first level items
946 items = self._parse_items()
947
948 # Make sure there's nothing left
949 self._skip_ws_and_comments()
950
951 if self._isnt_done():
952 self._raise_error(
953 "Unexpected character `{}`".format(self._normand[self._at])
954 )
955
956 # Set main group item
957 self._res = _Group(items, self._text_loc)
958
959
960 # The return type of parse().
961 class ParseResult:
962 @classmethod
963 def _create(
964 cls,
965 data: bytearray,
966 variables: VarsT,
967 labels: VarsT,
968 offset: int,
969 bo: Optional[ByteOrder],
970 ):
971 self = cls.__new__(cls)
972 self._init(data, variables, labels, offset, bo)
973 return self
974
975 def __init__(self, *args, **kwargs): # type: ignore
976 raise NotImplementedError
977
978 def _init(
979 self,
980 data: bytearray,
981 variables: VarsT,
982 labels: VarsT,
983 offset: int,
984 bo: Optional[ByteOrder],
985 ):
986 self._data = data
987 self._vars = variables
988 self._labels = labels
989 self._offset = offset
990 self._bo = bo
991
992 # Generated data.
993 @property
994 def data(self):
995 return self._data
996
997 # Dictionary of updated variable names to their last computed value.
998 @property
999 def variables(self):
1000 return self._vars
1001
1002 # Dictionary of updated main group label names to their computed
1003 # value.
1004 @property
1005 def labels(self):
1006 return self._labels
1007
1008 # Updated offset.
1009 @property
1010 def offset(self):
1011 return self._offset
1012
1013 # Updated byte order.
1014 @property
1015 def byte_order(self):
1016 return self._bo
1017
1018
1019 # Raises a parse error for the item `item`, creating it using the
1020 # message `msg`.
1021 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1022 _raise_error(msg, item.text_loc)
1023
1024
1025 # The `ICITTE` reserved name.
1026 _icitte_name = "ICITTE"
1027
1028
1029 # Value expression validator.
1030 class _ExprValidator(ast.NodeVisitor):
1031 def __init__(self, item: _ExprItemT, syms: VarsT):
1032 self._item = item
1033 self._syms = syms
1034 self._parent_is_call = False
1035
1036 def generic_visit(self, node: ast.AST):
1037 if type(node) is ast.Call:
1038 self._parent_is_call = True
1039 elif type(node) is ast.Name and not self._parent_is_call:
1040 # Make sure the name refers to a known label name
1041 if node.id != _icitte_name and node.id not in self._syms:
1042 _raise_error(
1043 "Unknown variable/label name `{}` in expression `{}`".format(
1044 node.id, self._item.expr_str
1045 ),
1046 self._item.text_loc,
1047 )
1048
1049 # TODO: Restrict the set of allowed node types
1050
1051 super().generic_visit(node)
1052 self._parent_is_call = False
1053
1054
1055 # Keeper of labels for a given group instance.
1056 #
1057 # A group instance is one iteration of a given group.
1058 class _GroupInstanceLabels:
1059 def __init__(self):
1060 self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]]
1061
1062 # Assigns the labels `labels` to a new instance of `group`.
1063 def add(self, group: _Group, labels: VarsT):
1064 if group not in self._instance_labels:
1065 self._instance_labels[group] = {}
1066
1067 spec_instance_labels = self._instance_labels[group]
1068 spec_instance_labels[len(spec_instance_labels)] = labels.copy()
1069
1070 # Returns the labels (not a copy) of the instance `instance_index`
1071 # of the group `group`.
1072 def labels(self, group: _Group, instance_index: int):
1073 return self._instance_labels[group][instance_index]
1074
1075
1076 # Generator of data and labels from a group item.
1077 #
1078 # Generation happens in memory at construction time. After building, use
1079 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1080 # get the resulting context.
1081 class _Gen:
1082 def __init__(
1083 self,
1084 group: _Group,
1085 variables: VarsT,
1086 labels: VarsT,
1087 offset: int,
1088 bo: Optional[ByteOrder],
1089 ):
1090 self._group_instance_labels = _GroupInstanceLabels()
1091 self._resolve_labels(group, offset, labels.copy())
1092 self._vars = variables.copy()
1093 self._offset = offset
1094 self._bo = bo
1095 self._main_group = group
1096 self._gen()
1097
1098 # Generated bytes.
1099 @property
1100 def data(self):
1101 return self._data
1102
1103 # Updated variables.
1104 @property
1105 def variables(self):
1106 return self._vars
1107
1108 # Updated main group labels.
1109 @property
1110 def labels(self):
1111 return self._group_instance_labels.labels(self._main_group, 0)
1112
1113 # Updated offset.
1114 @property
1115 def offset(self):
1116 return self._offset
1117
1118 # Updated byte order.
1119 @property
1120 def bo(self):
1121 return self._bo
1122
1123 # Fills `self._group_instance_labels` with the labels for each group
1124 # instance in `item`, starting at current offset `offset` with the
1125 # current labels `labels`.
1126 #
1127 # Returns the new current offset.
1128 def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int:
1129 if type(item) is _Group:
1130 # First pass: compute immediate labels of this instance
1131 group_labels = labels.copy()
1132 group_offset = offset
1133
1134 for subitem in item.items:
1135 if type(subitem) is _Offset:
1136 group_offset = subitem.val
1137 elif type(subitem) is _Label:
1138 assert subitem.name not in group_labels
1139 group_labels[subitem.name] = group_offset
1140 else:
1141 group_offset += subitem.size
1142
1143 # Add to group instance labels
1144 self._group_instance_labels.add(item, group_labels)
1145
1146 # Second pass: handle each item
1147 for subitem in item.items:
1148 offset = self._resolve_labels(subitem, offset, group_labels)
1149 elif type(item) is _Rep:
1150 for _ in range(item.mul):
1151 offset = self._resolve_labels(item.item, offset, labels)
1152 elif type(item) is _Offset:
1153 offset = item.val
1154 else:
1155 offset += item.size
1156
1157 return offset
1158
1159 def _handle_byte_item(self, item: _Byte):
1160 self._data.append(item.val)
1161 self._offset += item.size
1162
1163 def _handle_str_item(self, item: _Str):
1164 self._data += item.data
1165 self._offset += item.size
1166
1167 def _handle_bo_item(self, item: _Bo):
1168 self._bo = item.bo
1169
1170 def _eval_expr(self, item: _ExprItemT):
1171 # Get the labels of the current group instance as the initial
1172 # symbols (copied because we're adding stuff).
1173 assert self._cur_group is not None
1174 syms = self._group_instance_labels.labels(
1175 self._cur_group, self._group_instance_indexes[self._cur_group]
1176 ).copy()
1177
1178 # Set the `ICITTE` name to the current offset (before encoding)
1179 syms[_icitte_name] = self._offset
1180
1181 # Add the current variables
1182 syms.update(self._vars)
1183
1184 # Validate the node and its children
1185 _ExprValidator(item, syms).visit(item.expr)
1186
1187 # Compile and evaluate expression node
1188 try:
1189 val = eval(compile(item.expr, "", "eval"), None, syms)
1190 except Exception as exc:
1191 _raise_error_for_item(
1192 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1193 item,
1194 )
1195
1196 # Validate result
1197 if type(val) is not int:
1198 _raise_error_for_item(
1199 "Invalid expression `{}`: unexpected result type `{}`".format(
1200 item.expr_str, type(val).__name__
1201 ),
1202 item,
1203 )
1204
1205 return val
1206
1207 def _handle_var_item(self, item: _Var):
1208 # Update variable
1209 self._vars[item.name] = self._eval_expr(item)
1210
1211 def _handle_val_item(self, item: _Val):
1212 # Compute value
1213 val = self._eval_expr(item)
1214
1215 # Validate range
1216 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1217 _raise_error_for_item(
1218 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1219 val, item.len, item.expr_str, self._offset
1220 ),
1221 item,
1222 )
1223
1224 # Encode result on 64 bits (to extend the sign bit whatever the
1225 # value of `item.len`).
1226 if self._bo is None and item.len > 8:
1227 _raise_error_for_item(
1228 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1229 item.expr_str
1230 ),
1231 item,
1232 )
1233
1234 data = struct.pack(
1235 "{}{}".format(
1236 ">" if self._bo in (None, ByteOrder.BE) else "<",
1237 "Q" if val >= 0 else "q",
1238 ),
1239 val,
1240 )
1241
1242 # Keep only the requested length
1243 len_bytes = item.len // 8
1244
1245 if self._bo in (None, ByteOrder.BE):
1246 # Big endian: keep last bytes
1247 data = data[-len_bytes:]
1248 else:
1249 # Little endian: keep first bytes
1250 assert self._bo == ByteOrder.LE
1251 data = data[:len_bytes]
1252
1253 # Append to current bytes and update offset
1254 self._data += data
1255 self._offset += len(data)
1256
1257 def _handle_group_item(self, item: _Group):
1258 # Update the instance index of `item`
1259 if item not in self._group_instance_indexes:
1260 self._group_instance_indexes[item] = 0
1261 else:
1262 self._group_instance_indexes[item] += 1
1263
1264 # Changed current group
1265 old_cur_group = self._cur_group
1266 self._cur_group = item
1267
1268 # Handle each item
1269 for subitem in item.items:
1270 self._handle_item(subitem)
1271
1272 # Restore current group
1273 self._cur_group = old_cur_group
1274
1275 def _handle_rep_item(self, item: _Rep):
1276 for _ in range(item.mul):
1277 self._handle_item(item.item)
1278
1279 def _handle_offset_item(self, item: _Offset):
1280 self._offset = item.val
1281
1282 def _handle_item(self, item: _Item):
1283 if type(item) in self._item_handlers:
1284 self._item_handlers[type(item)](item)
1285
1286 def _gen(self):
1287 # Initial state
1288 self._data = bytearray()
1289 self._group_instance_indexes = {} # type: Dict[_Group, int]
1290 self._cur_group = None
1291
1292 # Item handlers
1293 self._item_handlers = {
1294 _Byte: self._handle_byte_item,
1295 _Str: self._handle_str_item,
1296 _Bo: self._handle_bo_item,
1297 _Val: self._handle_val_item,
1298 _Var: self._handle_var_item,
1299 _Group: self._handle_group_item,
1300 _Rep: self._handle_rep_item,
1301 _Offset: self._handle_offset_item,
1302 } # type: Dict[type, Callable[[Any], None]]
1303
1304 # Handle the group item
1305 self._handle_item(self._main_group)
1306
1307
1308 # Returns a `ParseResult` instance containing the bytes encoded by the
1309 # input string `normand`.
1310 #
1311 # `init_variables` is a dictionary of initial variable names (valid
1312 # Python names) to integral values. A variable name must not be the
1313 # reserved name `ICITTE`.
1314 #
1315 # `init_labels` is a dictionary of initial label names (valid Python
1316 # names) to integral values. A label name must not be the reserved name
1317 # `ICITTE`.
1318 #
1319 # `init_offset` is the initial offset.
1320 #
1321 # `init_byte_order` is the initial byte order.
1322 #
1323 # Raises `ParseError` on any parsing error.
1324 def parse(
1325 normand: str,
1326 init_variables: Optional[VarsT] = None,
1327 init_labels: Optional[VarsT] = None,
1328 init_offset: int = 0,
1329 init_byte_order: Optional[ByteOrder] = None,
1330 ):
1331 if init_variables is None:
1332 init_variables = {}
1333
1334 if init_labels is None:
1335 init_labels = {}
1336
1337 gen = _Gen(
1338 _Parser(normand, init_variables, init_labels).res,
1339 init_variables,
1340 init_labels,
1341 init_offset,
1342 init_byte_order,
1343 )
1344 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1345 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1346 )
1347
1348
1349 # Parses the command-line arguments.
1350 def _parse_cli_args():
1351 import argparse
1352
1353 # Build parser
1354 ap = argparse.ArgumentParser()
1355 ap.add_argument(
1356 "--offset",
1357 metavar="OFFSET",
1358 action="store",
1359 type=int,
1360 default=0,
1361 help="initial offset (positive)",
1362 )
1363 ap.add_argument(
1364 "-b",
1365 "--byte-order",
1366 metavar="BO",
1367 choices=["be", "le"],
1368 type=str,
1369 help="initial byte order (`be` or `le`)",
1370 )
1371 ap.add_argument(
1372 "--var",
1373 metavar="NAME=VAL",
1374 action="append",
1375 help="add an initial variable (may be repeated)",
1376 )
1377 ap.add_argument(
1378 "-l",
1379 "--label",
1380 metavar="NAME=VAL",
1381 action="append",
1382 help="add an initial label (may be repeated)",
1383 )
1384 ap.add_argument(
1385 "--version", action="version", version="Normand {}".format(__version__)
1386 )
1387 ap.add_argument(
1388 "path",
1389 metavar="PATH",
1390 action="store",
1391 nargs="?",
1392 help="input path (none means standard input)",
1393 )
1394
1395 # Parse
1396 return ap.parse_args()
1397
1398
1399 # Raises a command-line error with the message `msg`.
1400 def _raise_cli_error(msg: str) -> NoReturn:
1401 raise RuntimeError("Command-line error: {}".format(msg))
1402
1403
1404 # Returns a dictionary of string to integers from the list of strings
1405 # `args` containing `NAME=VAL` entries.
1406 def _dict_from_arg(args: Optional[List[str]]):
1407 d = {} # type: Dict[str, int]
1408
1409 if args is None:
1410 return d
1411
1412 for arg in args:
1413 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1414
1415 if m is None:
1416 _raise_cli_error("Invalid assignment {}".format(arg))
1417
1418 return d
1419
1420
1421 # CLI entry point without exception handling.
1422 def _try_run_cli():
1423 import os.path
1424
1425 # Parse arguments
1426 args = _parse_cli_args()
1427
1428 # Read input
1429 if args.path is None:
1430 normand = sys.stdin.read()
1431 else:
1432 with open(args.path) as f:
1433 normand = f.read()
1434
1435 # Variables and labels
1436 variables = _dict_from_arg(args.var)
1437 labels = _dict_from_arg(args.label)
1438
1439 # Validate offset
1440 if args.offset < 0:
1441 _raise_cli_error("Invalid negative offset {}")
1442
1443 # Validate and set byte order
1444 bo = None # type: Optional[ByteOrder]
1445
1446 if args.byte_order is not None:
1447 if args.byte_order == "be":
1448 bo = ByteOrder.BE
1449 else:
1450 assert args.byte_order == "le"
1451 bo = ByteOrder.LE
1452
1453 # Parse
1454 try:
1455 res = parse(normand, variables, labels, args.offset, bo)
1456 except ParseError as exc:
1457 prefix = ""
1458
1459 if args.path is not None:
1460 prefix = "{}:".format(os.path.abspath(args.path))
1461
1462 _fail(
1463 "{}{}:{} - {}".format(
1464 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1465 )
1466 )
1467
1468 # Print
1469 sys.stdout.buffer.write(res.data)
1470
1471
1472 # Prints the exception message `msg` and exits with status 1.
1473 def _fail(msg: str) -> NoReturn:
1474 if not msg.endswith("."):
1475 msg += "."
1476
1477 print(msg, file=sys.stderr)
1478 sys.exit(1)
1479
1480
1481 # CLI entry point.
1482 def _run_cli():
1483 try:
1484 _try_run_cli()
1485 except Exception as exc:
1486 _fail(str(exc))
1487
1488
1489 if __name__ == "__main__":
1490 _run_cli()
This page took 0.060089 seconds and 4 git commands to generate.