fd308eda4fe02ca3fb0f9ec8b3121ccc9539390e
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 __author__ = "Philippe Proulx"
25 __version__ = "0.1.1"
26 __all__ = [
27 "ByteOrder",
28 "parse",
29 "ParseError",
30 "ParseResult",
31 "TextLoc",
32 "VarsT",
33 "__author__",
34 "__version__",
35 ]
36
37 import re
38 import abc
39 import ast
40 import sys
41 import enum
42 import struct
43 from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional
44
45
46 # Text location (line and column numbers).
47 class TextLoc:
48 @classmethod
49 def _create(cls, line_no: int, col_no: int):
50 self = cls.__new__(cls)
51 self._init(line_no, col_no)
52 return self
53
54 def __init__(*args, **kwargs): # type: ignore
55 raise NotImplementedError
56
57 def _init(self, line_no: int, col_no: int):
58 self._line_no = line_no
59 self._col_no = col_no
60
61 # Line number.
62 @property
63 def line_no(self):
64 return self._line_no
65
66 # Column number.
67 @property
68 def col_no(self):
69 return self._col_no
70
71
72 # Any item.
73 class _Item:
74 def __init__(self, text_loc: TextLoc):
75 self._text_loc = text_loc
76
77 # Source text location.
78 @property
79 def text_loc(self):
80 return self._text_loc
81
82 # Returns the size, in bytes, of this item.
83 @property
84 @abc.abstractmethod
85 def size(self) -> int:
86 ...
87
88
89 # A repeatable item.
90 class _RepableItem(_Item):
91 pass
92
93
94 # Single byte.
95 class _Byte(_RepableItem):
96 def __init__(self, val: int, text_loc: TextLoc):
97 super().__init__(text_loc)
98 self._val = val
99
100 # Byte value.
101 @property
102 def val(self):
103 return self._val
104
105 @property
106 def size(self):
107 return 1
108
109 def __repr__(self):
110 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
111
112
113 # String.
114 class _Str(_RepableItem):
115 def __init__(self, data: bytes, text_loc: TextLoc):
116 super().__init__(text_loc)
117 self._data = data
118
119 # Encoded bytes.
120 @property
121 def data(self):
122 return self._data
123
124 @property
125 def size(self):
126 return len(self._data)
127
128 def __repr__(self):
129 return "_Str({}, {})".format(repr(self._data), self._text_loc)
130
131
132 # Byte order.
133 @enum.unique
134 class ByteOrder(enum.Enum):
135 # Big endian.
136 BE = "be"
137
138 # Little endian.
139 LE = "le"
140
141
142 # Byte order.
143 class _Bo(_Item):
144 def __init__(self, bo: ByteOrder):
145 self._bo = bo
146
147 @property
148 def bo(self):
149 return self._bo
150
151 @property
152 def size(self):
153 return 0
154
155
156 # Label.
157 class _Label(_Item):
158 def __init__(self, name: str, text_loc: TextLoc):
159 super().__init__(text_loc)
160 self._name = name
161
162 # Label name.
163 @property
164 def name(self):
165 return self._name
166
167 @property
168 def size(self):
169 return 0
170
171 def __repr__(self):
172 return "_Label({}, {})".format(repr(self._name), self._text_loc)
173
174
175 # Offset.
176 class _Offset(_Item):
177 def __init__(self, val: int, text_loc: TextLoc):
178 super().__init__(text_loc)
179 self._val = val
180
181 # Offset value.
182 @property
183 def val(self):
184 return self._val
185
186 @property
187 def size(self):
188 return 0
189
190 def __repr__(self):
191 return "_Offset({}, {})".format(repr(self._val), self._text_loc)
192
193
194 # Mixin of containing an AST expression and its string.
195 class _ExprMixin:
196 def __init__(self, expr_str: str, expr: ast.Expression):
197 self._expr_str = expr_str
198 self._expr = expr
199
200 # Expression string.
201 @property
202 def expr_str(self):
203 return self._expr_str
204
205 # Expression node to evaluate.
206 @property
207 def expr(self):
208 return self._expr
209
210
211 # Variable.
212 class _Var(_Item, _ExprMixin):
213 def __init__(
214 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
215 ):
216 super().__init__(text_loc)
217 _ExprMixin.__init__(self, expr_str, expr)
218 self._name = name
219
220 # Name.
221 @property
222 def name(self):
223 return self._name
224
225 @property
226 def size(self):
227 return 0
228
229 def __repr__(self):
230 return "_Var({}, {}, {}, {})".format(
231 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
232 )
233
234
235 # Value, possibly needing more than one byte.
236 class _Val(_RepableItem, _ExprMixin):
237 def __init__(
238 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
239 ):
240 super().__init__(text_loc)
241 _ExprMixin.__init__(self, expr_str, expr)
242 self._len = len
243
244 # Length (bits).
245 @property
246 def len(self):
247 return self._len
248
249 @property
250 def size(self):
251 return self._len // 8
252
253 def __repr__(self):
254 return "_Val({}, {}, {}, {})".format(
255 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
256 )
257
258
259 # Expression item type.
260 _ExprItemT = Union[_Val, _Var]
261
262
263 # Group of items.
264 class _Group(_RepableItem):
265 def __init__(self, items: List[_Item], text_loc: TextLoc):
266 super().__init__(text_loc)
267 self._items = items
268 self._size = sum([item.size for item in self._items])
269
270 # Contained items.
271 @property
272 def items(self):
273 return self._items
274
275 @property
276 def size(self):
277 return self._size
278
279 def __repr__(self):
280 return "_Group({}, {})".format(repr(self._items), self._text_loc)
281
282
283 # Repetition item.
284 class _Rep(_Item):
285 def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc):
286 super().__init__(text_loc)
287 self._item = item
288 self._mul = mul
289
290 # Item to repeat.
291 @property
292 def item(self):
293 return self._item
294
295 # Repetition multiplier.
296 @property
297 def mul(self):
298 return self._mul
299
300 @property
301 def size(self):
302 return self._item.size * self._mul
303
304 def __repr__(self):
305 return "_Rep({}, {}, {})".format(
306 repr(self._item), repr(self._mul), self._text_loc
307 )
308
309
310 # A parsing error containing a message and a text location.
311 class ParseError(RuntimeError):
312 @classmethod
313 def _create(cls, msg: str, text_loc: TextLoc):
314 self = cls.__new__(cls)
315 self._init(msg, text_loc)
316 return self
317
318 def __init__(self, *args, **kwargs): # type: ignore
319 raise NotImplementedError
320
321 def _init(self, msg: str, text_loc: TextLoc):
322 super().__init__(msg)
323 self._text_loc = text_loc
324
325 # Source text location.
326 @property
327 def text_loc(self):
328 return self._text_loc
329
330
331 # Raises a parsing error, forwarding the parameters to the constructor.
332 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
333 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
334
335
336 # Variable (and label) dictionary type.
337 VarsT = Dict[str, int]
338
339
340 # Python name pattern.
341 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
342
343
344 # Normand parser.
345 #
346 # The constructor accepts a Normand input. After building, use the `res`
347 # property to get the resulting main group.
348 class _Parser:
349 # Builds a parser to parse the Normand input `normand`, parsing
350 # immediately.
351 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
352 self._normand = normand
353 self._at = 0
354 self._line_no = 1
355 self._col_no = 1
356 self._label_names = set(labels.keys())
357 self._var_names = set(variables.keys())
358 self._parse()
359
360 # Result (main group).
361 @property
362 def res(self):
363 return self._res
364
365 # Current text location.
366 @property
367 def _text_loc(self):
368 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
369 self._line_no, self._col_no
370 )
371
372 # Returns `True` if this parser is done parsing.
373 def _is_done(self):
374 return self._at == len(self._normand)
375
376 # Returns `True` if this parser isn't done parsing.
377 def _isnt_done(self):
378 return not self._is_done()
379
380 # Raises a parse error, creating it using the message `msg` and the
381 # current text location.
382 def _raise_error(self, msg: str) -> NoReturn:
383 _raise_error(msg, self._text_loc)
384
385 # Tries to make the pattern `pat` match the current substring,
386 # returning the match object and updating `self._at`,
387 # `self._line_no`, and `self._col_no` on success.
388 def _try_parse_pat(self, pat: Pattern[str]):
389 m = pat.match(self._normand, self._at)
390
391 if m is None:
392 return
393
394 # Skip matched string
395 self._at += len(m.group(0))
396
397 # Update line number
398 self._line_no += m.group(0).count("\n")
399
400 # Update column number
401 for i in reversed(range(self._at)):
402 if self._normand[i] == "\n" or i == 0:
403 if i == 0:
404 self._col_no = self._at + 1
405 else:
406 self._col_no = self._at - i
407
408 break
409
410 # Return match object
411 return m
412
413 # Expects the pattern `pat` to match the current substring,
414 # returning the match object and updating `self._at`,
415 # `self._line_no`, and `self._col_no` on success, or raising a parse
416 # error with the message `error_msg` on error.
417 def _expect_pat(self, pat: Pattern[str], error_msg: str):
418 # Match
419 m = self._try_parse_pat(pat)
420
421 if m is None:
422 # No match: error
423 self._raise_error(error_msg)
424
425 # Return match object
426 return m
427
428 # Pattern for _skip_ws_and_comments()
429 _ws_or_syms_or_comments_pat = re.compile(
430 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
431 )
432
433 # Skips as many whitespaces, insignificant symbol characters, and
434 # comments as possible.
435 def _skip_ws_and_comments(self):
436 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
437
438 # Pattern for _try_parse_hex_byte()
439 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
440
441 # Tries to parse a hexadecimal byte, returning a byte item on
442 # success.
443 def _try_parse_hex_byte(self):
444 # Match initial nibble
445 m_high = self._try_parse_pat(self._nibble_pat)
446
447 if m_high is None:
448 # No match
449 return
450
451 # Expect another nibble
452 self._skip_ws_and_comments()
453 m_low = self._expect_pat(
454 self._nibble_pat, "Expecting another hexadecimal nibble"
455 )
456
457 # Return item
458 return _Byte(int(m_high.group(0) + m_low.group(0), 16), self._text_loc)
459
460 # Patterns for _try_parse_bin_byte()
461 _bin_byte_bit_pat = re.compile(r"[01]")
462 _bin_byte_prefix_pat = re.compile(r"%")
463
464 # Tries to parse a binary byte, returning a byte item on success.
465 def _try_parse_bin_byte(self):
466 # Match prefix
467 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
468 # No match
469 return
470
471 # Expect eight bits
472 bits = [] # type: List[str]
473
474 for _ in range(8):
475 self._skip_ws_and_comments()
476 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
477 bits.append(m.group(0))
478
479 # Return item
480 return _Byte(int("".join(bits), 2), self._text_loc)
481
482 # Patterns for _try_parse_dec_byte()
483 _dec_byte_prefix_pat = re.compile(r"\$\s*")
484 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
485
486 # Tries to parse a decimal byte, returning a byte item on success.
487 def _try_parse_dec_byte(self):
488 # Match prefix
489 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
490 # No match
491 return
492
493 # Expect the value
494 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
495
496 # Compute value
497 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
498
499 # Validate
500 if val < -128 or val > 255:
501 self._raise_error("Invalid decimal byte value {}".format(val))
502
503 # Two's complement
504 val = val % 256
505
506 # Return item
507 return _Byte(val, self._text_loc)
508
509 # Tries to parse a byte, returning a byte item on success.
510 def _try_parse_byte(self):
511 # Hexadecimal
512 item = self._try_parse_hex_byte()
513
514 if item is not None:
515 return item
516
517 # Binary
518 item = self._try_parse_bin_byte()
519
520 if item is not None:
521 return item
522
523 # Decimal
524 item = self._try_parse_dec_byte()
525
526 if item is not None:
527 return item
528
529 # Patterns for _try_parse_str()
530 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
531 _str_suffix_pat = re.compile(r'"')
532 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
533
534 # Strings corresponding to escape sequence characters
535 _str_escape_seq_strs = {
536 "0": "\0",
537 "a": "\a",
538 "b": "\b",
539 "e": "\x1b",
540 "f": "\f",
541 "n": "\n",
542 "r": "\r",
543 "t": "\t",
544 "v": "\v",
545 "\\": "\\",
546 '"': '"',
547 }
548
549 # Tries to parse a string, returning a string item on success.
550 def _try_parse_str(self):
551 # Match prefix
552 m = self._try_parse_pat(self._str_prefix_pat)
553
554 if m is None:
555 # No match
556 return
557
558 # Get encoding
559 encoding = "utf8"
560
561 if m.group("len") is not None:
562 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
563
564 # Actual string
565 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
566
567 # Expect end of string
568 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
569
570 # Replace escape sequences
571 val = m.group(0)
572
573 for ec in '0abefnrtv"\\':
574 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
575
576 # Encode
577 data = val.encode(encoding)
578
579 # Return item
580 return _Str(data, self._text_loc)
581
582 # Patterns for _try_parse_group()
583 _group_prefix_pat = re.compile(r"\(")
584 _group_suffix_pat = re.compile(r"\)")
585
586 # Tries to parse a group, returning a group item on success.
587 def _try_parse_group(self):
588 # Match prefix
589 if self._try_parse_pat(self._group_prefix_pat) is None:
590 # No match
591 return
592
593 # Parse items
594 items = self._parse_items()
595
596 # Expect end of group
597 self._skip_ws_and_comments()
598 self._expect_pat(
599 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
600 )
601
602 # Return item
603 return _Group(items, self._text_loc)
604
605 # Returns a stripped expression string and an AST expression node
606 # from the expression string `expr_str` at text location `text_loc`.
607 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
608 # Create an expression node from the expression string
609 expr_str = expr_str.strip().replace("\n", " ")
610
611 try:
612 expr = ast.parse(expr_str, mode="eval")
613 except SyntaxError:
614 _raise_error(
615 "Invalid expression `{}`: invalid syntax".format(expr_str),
616 text_loc,
617 )
618
619 return expr_str, expr
620
621 # Patterns for _try_parse_val_and_len()
622 _val_expr_pat = re.compile(r"([^}:]+):")
623 _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)")
624
625 # Tries to parse a value and length, returning a value item on
626 # success.
627 def _try_parse_val_and_len(self):
628 begin_text_loc = self._text_loc
629
630 # Match
631 m_expr = self._try_parse_pat(self._val_expr_pat)
632
633 if m_expr is None:
634 # No match
635 return
636
637 # Expect a length
638 m_len = self._expect_pat(
639 self._val_len_pat, "Expecting a length (multiple of eight bits)"
640 )
641
642 # Create an expression node from the expression string
643 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
644
645 # Return item
646 return _Val(
647 expr_str,
648 expr,
649 int(m_len.group(1)),
650 self._text_loc,
651 )
652
653 # Patterns for _try_parse_val_and_len()
654 _var_pat = re.compile(
655 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
656 )
657
658 # Tries to parse a variable, returning a variable item on success.
659 def _try_parse_var(self):
660 begin_text_loc = self._text_loc
661
662 # Match
663 m = self._try_parse_pat(self._var_pat)
664
665 if m is None:
666 # No match
667 return
668
669 # Validate name
670 name = m.group("name")
671
672 if name == _icitte_name:
673 self._raise_error("`{}` is a reserved variable name".format(_icitte_name))
674
675 if name in self._label_names:
676 self._raise_error("Existing label named `{}`".format(name))
677
678 # Add to known variable names
679 self._var_names.add(name)
680
681 # Create an expression node from the expression string
682 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
683
684 # Return item
685 return _Var(
686 name,
687 expr_str,
688 expr,
689 self._text_loc,
690 )
691
692 # Pattern for _try_parse_bo_name()
693 _bo_pat = re.compile(r"[bl]e")
694
695 # Tries to parse a byte order name, returning a byte order item on
696 # success.
697 def _try_parse_bo_name(self):
698 # Match
699 m = self._try_parse_pat(self._bo_pat)
700
701 if m is None:
702 # No match
703 return
704
705 # Return corresponding item
706 if m.group(0) == "be":
707 return _Bo(ByteOrder.BE)
708 else:
709 assert m.group(0) == "le"
710 return _Bo(ByteOrder.LE)
711
712 # Patterns for _try_parse_val_or_bo()
713 _val_var_bo_prefix_pat = re.compile(r"\{\s*")
714 _val_var_bo_suffix_pat = re.compile(r"\s*}")
715
716 # Tries to parse a value, a variable, or a byte order, returning an
717 # item on success.
718 def _try_parse_val_or_var_or_bo(self):
719 # Match prefix
720 if self._try_parse_pat(self._val_var_bo_prefix_pat) is None:
721 # No match
722 return
723
724 # Variable item?
725 item = self._try_parse_var()
726
727 if item is None:
728 # Value item?
729 item = self._try_parse_val_and_len()
730
731 if item is None:
732 # Byte order item?
733 item = self._try_parse_bo_name()
734
735 if item is None:
736 # At this point it's invalid
737 self._raise_error("Expecting a value, a variable, or a byte order")
738
739 # Expect suffix
740 self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`")
741 return item
742
743 # Pattern for _try_parse_offset_val() and _try_parse_rep()
744 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
745
746 # Tries to parse an offset value (after the initial `<`), returning
747 # an offset item on success.
748 def _try_parse_offset_val(self):
749 # Match
750 m = self._try_parse_pat(self._pos_const_int_pat)
751
752 if m is None:
753 # No match
754 return
755
756 # Return item
757 return _Offset(int(m.group(0), 0), self._text_loc)
758
759 # Tries to parse a label name (after the initial `<`), returning a
760 # label item on success.
761 def _try_parse_label_name(self):
762 # Match
763 m = self._try_parse_pat(_py_name_pat)
764
765 if m is None:
766 # No match
767 return
768
769 # Validate
770 name = m.group(0)
771
772 if name == _icitte_name:
773 self._raise_error("`{}` is a reserved label name".format(_icitte_name))
774
775 if name in self._label_names:
776 self._raise_error("Duplicate label name `{}`".format(name))
777
778 if name in self._var_names:
779 self._raise_error("Existing variable named `{}`".format(name))
780
781 # Add to known label names
782 self._label_names.add(name)
783
784 # Return item
785 return _Label(name, self._text_loc)
786
787 # Patterns for _try_parse_label_or_offset()
788 _label_offset_prefix_pat = re.compile(r"<\s*")
789 _label_offset_suffix_pat = re.compile(r"\s*>")
790
791 # Tries to parse a label or an offset, returning an item on success.
792 def _try_parse_label_or_offset(self):
793 # Match prefix
794 if self._try_parse_pat(self._label_offset_prefix_pat) is None:
795 # No match
796 return
797
798 # Offset item?
799 item = self._try_parse_offset_val()
800
801 if item is None:
802 # Label item?
803 item = self._try_parse_label_name()
804
805 if item is None:
806 # At this point it's invalid
807 self._raise_error("Expecting a label name or an offset value")
808
809 # Expect suffix
810 self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`")
811 return item
812
813 # Tries to parse a base item (anything except a repetition),
814 # returning it on success.
815 def _try_parse_base_item(self):
816 # Byte item?
817 item = self._try_parse_byte()
818
819 if item is not None:
820 return item
821
822 # String item?
823 item = self._try_parse_str()
824
825 if item is not None:
826 return item
827
828 # Value, variable, or byte order item?
829 item = self._try_parse_val_or_var_or_bo()
830
831 if item is not None:
832 return item
833
834 # Label or offset item?
835 item = self._try_parse_label_or_offset()
836
837 if item is not None:
838 return item
839
840 # Group item?
841 item = self._try_parse_group()
842
843 if item is not None:
844 return item
845
846 # Pattern for _try_parse_rep()
847 _rep_prefix_pat = re.compile(r"\*\s*")
848
849 # Tries to parse a repetition, returning the multiplier on success,
850 # or 1 otherwise.
851 def _try_parse_rep(self):
852 self._skip_ws_and_comments()
853
854 # Match prefix
855 if self._try_parse_pat(self._rep_prefix_pat) is None:
856 # No match
857 return 1
858
859 # Expect and return a decimal multiplier
860 self._skip_ws_and_comments()
861 m = self._expect_pat(
862 self._pos_const_int_pat, "Expecting a positive integral multiplier"
863 )
864 return int(m.group(0), 0)
865
866 # Tries to parse an item, possibly followed by a repetition,
867 # returning `True` on success.
868 #
869 # Appends any parsed item to `items`.
870 def _try_append_item(self, items: List[_Item]):
871 self._skip_ws_and_comments()
872
873 # Parse a base item
874 item = self._try_parse_base_item()
875
876 if item is None:
877 # No item
878 return False
879
880 # Parse repetition if the base item is repeatable
881 if isinstance(item, _RepableItem):
882 rep = self._try_parse_rep()
883
884 if rep == 0:
885 # No item, but that's okay
886 return True
887 elif rep > 1:
888 # Convert to repetition item
889 item = _Rep(item, rep, self._text_loc)
890
891 items.append(item)
892 return True
893
894 # Parses and returns items, skipping whitespaces, insignificant
895 # symbols, and comments when allowed, and stopping at the first
896 # unknown character.
897 def _parse_items(self) -> List[_Item]:
898 items = [] # type: List[_Item]
899
900 while self._isnt_done():
901 # Try to append item
902 if not self._try_append_item(items):
903 # Unknown at this point
904 break
905
906 return items
907
908 # Parses the whole Normand input, setting `self._res` to the main
909 # group item on success.
910 def _parse(self):
911 if len(self._normand.strip()) == 0:
912 # Special case to make sure there's something to consume
913 self._res = _Group([], self._text_loc)
914 return
915
916 # Parse first level items
917 items = self._parse_items()
918
919 # Make sure there's nothing left
920 self._skip_ws_and_comments()
921
922 if self._isnt_done():
923 self._raise_error(
924 "Unexpected character `{}`".format(self._normand[self._at])
925 )
926
927 # Set main group item
928 self._res = _Group(items, self._text_loc)
929
930
931 # The return type of parse().
932 class ParseResult:
933 @classmethod
934 def _create(
935 cls,
936 data: bytearray,
937 variables: VarsT,
938 labels: VarsT,
939 offset: int,
940 bo: Optional[ByteOrder],
941 ):
942 self = cls.__new__(cls)
943 self._init(data, variables, labels, offset, bo)
944 return self
945
946 def __init__(self, *args, **kwargs): # type: ignore
947 raise NotImplementedError
948
949 def _init(
950 self,
951 data: bytearray,
952 variables: VarsT,
953 labels: VarsT,
954 offset: int,
955 bo: Optional[ByteOrder],
956 ):
957 self._data = data
958 self._vars = variables
959 self._labels = labels
960 self._offset = offset
961 self._bo = bo
962
963 # Generated data.
964 @property
965 def data(self):
966 return self._data
967
968 # Dictionary of updated variable names to their last computed value.
969 @property
970 def variables(self):
971 return self._vars
972
973 # Dictionary of updated main group label names to their computed
974 # value.
975 @property
976 def labels(self):
977 return self._labels
978
979 # Updated offset.
980 @property
981 def offset(self):
982 return self._offset
983
984 # Updated byte order.
985 @property
986 def byte_order(self):
987 return self._bo
988
989
990 # Raises a parse error for the item `item`, creating it using the
991 # message `msg`.
992 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
993 _raise_error(msg, item.text_loc)
994
995
996 # The `ICITTE` reserved name.
997 _icitte_name = "ICITTE"
998
999
1000 # Value expression validator.
1001 class _ExprValidator(ast.NodeVisitor):
1002 def __init__(self, item: _ExprItemT, syms: VarsT):
1003 self._item = item
1004 self._syms = syms
1005 self._parent_is_call = False
1006
1007 def generic_visit(self, node: ast.AST):
1008 if type(node) is ast.Call:
1009 self._parent_is_call = True
1010 elif type(node) is ast.Name and not self._parent_is_call:
1011 # Make sure the name refers to a known label name
1012 if node.id != _icitte_name and node.id not in self._syms:
1013 _raise_error(
1014 "Unknown variable/label name `{}` in expression `{}`".format(
1015 node.id, self._item.expr_str
1016 ),
1017 self._item.text_loc,
1018 )
1019
1020 # TODO: Restrict the set of allowed node types
1021
1022 super().generic_visit(node)
1023 self._parent_is_call = False
1024
1025
1026 # Keeper of labels for a given group instance.
1027 #
1028 # A group instance is one iteration of a given group.
1029 class _GroupInstanceLabels:
1030 def __init__(self):
1031 self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]]
1032
1033 # Assigns the labels `labels` to a new instance of `group`.
1034 def add(self, group: _Group, labels: VarsT):
1035 if group not in self._instance_labels:
1036 self._instance_labels[group] = {}
1037
1038 spec_instance_labels = self._instance_labels[group]
1039 spec_instance_labels[len(spec_instance_labels)] = labels.copy()
1040
1041 # Returns the labels (not a copy) of the instance `instance_index`
1042 # of the group `group`.
1043 def labels(self, group: _Group, instance_index: int):
1044 return self._instance_labels[group][instance_index]
1045
1046
1047 # Generator of data and labels from a group item.
1048 #
1049 # Generation happens in memory at construction time. After building, use
1050 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1051 # get the resulting context.
1052 class _Gen:
1053 def __init__(
1054 self,
1055 group: _Group,
1056 variables: VarsT,
1057 labels: VarsT,
1058 offset: int,
1059 bo: Optional[ByteOrder],
1060 ):
1061 self._group_instance_labels = _GroupInstanceLabels()
1062 self._resolve_labels(group, offset, labels.copy())
1063 self._vars = variables.copy()
1064 self._offset = offset
1065 self._bo = bo
1066 self._main_group = group
1067 self._gen()
1068
1069 # Generated bytes.
1070 @property
1071 def data(self):
1072 return self._data
1073
1074 # Updated variables.
1075 @property
1076 def variables(self):
1077 return self._vars
1078
1079 # Updated main group labels.
1080 @property
1081 def labels(self):
1082 return self._group_instance_labels.labels(self._main_group, 0)
1083
1084 # Updated offset.
1085 @property
1086 def offset(self):
1087 return self._offset
1088
1089 # Updated byte order.
1090 @property
1091 def bo(self):
1092 return self._bo
1093
1094 # Fills `self._group_instance_labels` with the labels for each group
1095 # instance in `item`, starting at current offset `offset` with the
1096 # current labels `labels`.
1097 #
1098 # Returns the new current offset.
1099 def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int:
1100 if type(item) is _Group:
1101 # First pass: compute immediate labels of this instance
1102 group_labels = labels.copy()
1103 group_offset = offset
1104
1105 for subitem in item.items:
1106 if type(subitem) is _Offset:
1107 group_offset = subitem.val
1108 elif type(subitem) is _Label:
1109 assert subitem.name not in group_labels
1110 group_labels[subitem.name] = group_offset
1111 else:
1112 group_offset += subitem.size
1113
1114 # Add to group instance labels
1115 self._group_instance_labels.add(item, group_labels)
1116
1117 # Second pass: handle each item
1118 for subitem in item.items:
1119 offset = self._resolve_labels(subitem, offset, group_labels)
1120 elif type(item) is _Rep:
1121 for _ in range(item.mul):
1122 offset = self._resolve_labels(item.item, offset, labels)
1123 elif type(item) is _Offset:
1124 offset = item.val
1125 else:
1126 offset += item.size
1127
1128 return offset
1129
1130 def _handle_byte_item(self, item: _Byte):
1131 self._data.append(item.val)
1132 self._offset += item.size
1133
1134 def _handle_str_item(self, item: _Str):
1135 self._data += item.data
1136 self._offset += item.size
1137
1138 def _handle_bo_item(self, item: _Bo):
1139 self._bo = item.bo
1140
1141 def _eval_expr(self, item: _ExprItemT):
1142 # Get the labels of the current group instance as the initial
1143 # symbols (copied because we're adding stuff).
1144 assert self._cur_group is not None
1145 syms = self._group_instance_labels.labels(
1146 self._cur_group, self._group_instance_indexes[self._cur_group]
1147 ).copy()
1148
1149 # Set the `ICITTE` name to the current offset (before encoding)
1150 syms[_icitte_name] = self._offset
1151
1152 # Add the current variables
1153 syms.update(self._vars)
1154
1155 # Validate the node and its children
1156 _ExprValidator(item, syms).visit(item.expr)
1157
1158 # Compile and evaluate expression node
1159 try:
1160 val = eval(compile(item.expr, "", "eval"), None, syms)
1161 except Exception as exc:
1162 _raise_error_for_item(
1163 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1164 item,
1165 )
1166
1167 # Validate result
1168 if type(val) is not int:
1169 _raise_error_for_item(
1170 "Invalid expression `{}`: unexpected result type `{}`".format(
1171 item.expr_str, type(val).__name__
1172 ),
1173 item,
1174 )
1175
1176 return val
1177
1178 def _handle_var_item(self, item: _Var):
1179 # Update variable
1180 self._vars[item.name] = self._eval_expr(item)
1181
1182 def _handle_val_item(self, item: _Val):
1183 # Compute value
1184 val = self._eval_expr(item)
1185
1186 # Validate range
1187 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1188 _raise_error_for_item(
1189 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1190 val, item.len, item.expr_str, self._offset
1191 ),
1192 item,
1193 )
1194
1195 # Encode result on 64 bits (to extend the sign bit whatever the
1196 # value of `item.len`).
1197 if self._bo is None and item.len > 8:
1198 _raise_error_for_item(
1199 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1200 item.expr_str
1201 ),
1202 item,
1203 )
1204
1205 data = struct.pack(
1206 "{}{}".format(
1207 ">" if self._bo in (None, ByteOrder.BE) else "<",
1208 "Q" if val >= 0 else "q",
1209 ),
1210 val,
1211 )
1212
1213 # Keep only the requested length
1214 len_bytes = item.len // 8
1215
1216 if self._bo in (None, ByteOrder.BE):
1217 # Big endian: keep last bytes
1218 data = data[-len_bytes:]
1219 else:
1220 # Little endian: keep first bytes
1221 assert self._bo == ByteOrder.LE
1222 data = data[:len_bytes]
1223
1224 # Append to current bytes and update offset
1225 self._data += data
1226 self._offset += len(data)
1227
1228 def _handle_group_item(self, item: _Group):
1229 # Update the instance index of `item`
1230 if item not in self._group_instance_indexes:
1231 self._group_instance_indexes[item] = 0
1232 else:
1233 self._group_instance_indexes[item] += 1
1234
1235 # Changed current group
1236 old_cur_group = self._cur_group
1237 self._cur_group = item
1238
1239 # Handle each item
1240 for subitem in item.items:
1241 self._handle_item(subitem)
1242
1243 # Restore current group
1244 self._cur_group = old_cur_group
1245
1246 def _handle_rep_item(self, item: _Rep):
1247 for _ in range(item.mul):
1248 self._handle_item(item.item)
1249
1250 def _handle_offset_item(self, item: _Offset):
1251 self._offset = item.val
1252
1253 def _handle_item(self, item: _Item):
1254 if type(item) in self._item_handlers:
1255 self._item_handlers[type(item)](item)
1256
1257 def _gen(self):
1258 # Initial state
1259 self._data = bytearray()
1260 self._group_instance_indexes = {} # type: Dict[_Group, int]
1261 self._cur_group = None
1262
1263 # Item handlers
1264 self._item_handlers = {
1265 _Byte: self._handle_byte_item,
1266 _Str: self._handle_str_item,
1267 _Bo: self._handle_bo_item,
1268 _Val: self._handle_val_item,
1269 _Var: self._handle_var_item,
1270 _Group: self._handle_group_item,
1271 _Rep: self._handle_rep_item,
1272 _Offset: self._handle_offset_item,
1273 } # type: Dict[type, Callable[[Any], None]]
1274
1275 # Handle the group item
1276 self._handle_item(self._main_group)
1277
1278
1279 # Returns a `ParseResult` instance containing the bytes encoded by the
1280 # input string `normand`.
1281 #
1282 # `init_variables` is a dictionary of initial variable names (valid
1283 # Python names) to integral values. A variable name must not be the
1284 # reserved name `ICITTE`.
1285 #
1286 # `init_labels` is a dictionary of initial label names (valid Python
1287 # names) to integral values. A label name must not be the reserved name
1288 # `ICITTE`.
1289 #
1290 # `init_offset` is the initial offset.
1291 #
1292 # `init_byte_order` is the initial byte order.
1293 #
1294 # Raises `ParseError` on any parsing error.
1295 def parse(
1296 normand: str,
1297 init_variables: Optional[VarsT] = None,
1298 init_labels: Optional[VarsT] = None,
1299 init_offset: int = 0,
1300 init_byte_order: Optional[ByteOrder] = None,
1301 ):
1302 if init_variables is None:
1303 init_variables = {}
1304
1305 if init_labels is None:
1306 init_labels = {}
1307
1308 gen = _Gen(
1309 _Parser(normand, init_variables, init_labels).res,
1310 init_variables,
1311 init_labels,
1312 init_offset,
1313 init_byte_order,
1314 )
1315 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1316 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1317 )
1318
1319
1320 # Parses the command-line arguments.
1321 def _parse_cli_args():
1322 import argparse
1323
1324 # Build parser
1325 ap = argparse.ArgumentParser()
1326 ap.add_argument(
1327 "--offset",
1328 metavar="OFFSET",
1329 action="store",
1330 type=int,
1331 default=0,
1332 help="initial offset (positive)",
1333 )
1334 ap.add_argument(
1335 "-b",
1336 "--byte-order",
1337 metavar="BO",
1338 choices=["be", "le"],
1339 type=str,
1340 help="initial byte order (`be` or `le`)",
1341 )
1342 ap.add_argument(
1343 "--var",
1344 metavar="NAME=VAL",
1345 action="append",
1346 help="add an initial variable (may be repeated)",
1347 )
1348 ap.add_argument(
1349 "-l",
1350 "--label",
1351 metavar="NAME=VAL",
1352 action="append",
1353 help="add an initial label (may be repeated)",
1354 )
1355 ap.add_argument(
1356 "--version", action="version", version="Normand {}".format(__version__)
1357 )
1358 ap.add_argument(
1359 "path",
1360 metavar="PATH",
1361 action="store",
1362 nargs="?",
1363 help="input path (none means standard input)",
1364 )
1365
1366 # Parse
1367 return ap.parse_args()
1368
1369
1370 # Raises a command-line error with the message `msg`.
1371 def _raise_cli_error(msg: str) -> NoReturn:
1372 raise RuntimeError("Command-line error: {}".format(msg))
1373
1374
1375 # Returns a dictionary of string to integers from the list of strings
1376 # `args` containing `NAME=VAL` entries.
1377 def _dict_from_arg(args: Optional[List[str]]):
1378 d = {} # type: Dict[str, int]
1379
1380 if args is None:
1381 return d
1382
1383 for arg in args:
1384 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1385
1386 if m is None:
1387 _raise_cli_error("Invalid assignment {}".format(arg))
1388
1389 return d
1390
1391
1392 # CLI entry point without exception handling.
1393 def _try_run_cli():
1394 import os.path
1395
1396 # Parse arguments
1397 args = _parse_cli_args()
1398
1399 # Read input
1400 if args.path is None:
1401 normand = sys.stdin.read()
1402 else:
1403 with open(args.path) as f:
1404 normand = f.read()
1405
1406 # Variables and labels
1407 variables = _dict_from_arg(args.var)
1408 labels = _dict_from_arg(args.label)
1409
1410 # Validate offset
1411 if args.offset < 0:
1412 _raise_cli_error("Invalid negative offset {}")
1413
1414 # Validate and set byte order
1415 bo = None # type: Optional[ByteOrder]
1416
1417 if args.byte_order is not None:
1418 if args.byte_order == "be":
1419 bo = ByteOrder.BE
1420 else:
1421 assert args.byte_order == "le"
1422 bo = ByteOrder.LE
1423
1424 # Parse
1425 try:
1426 res = parse(normand, variables, labels, args.offset, bo)
1427 except ParseError as exc:
1428 prefix = ""
1429
1430 if args.path is not None:
1431 prefix = "{}:".format(os.path.abspath(args.path))
1432
1433 _fail(
1434 "{}{}:{} - {}".format(
1435 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1436 )
1437 )
1438
1439 # Print
1440 sys.stdout.buffer.write(res.data)
1441
1442
1443 # Prints the exception message `msg` and exits with status 1.
1444 def _fail(msg: str) -> NoReturn:
1445 if not msg.endswith("."):
1446 msg += "."
1447
1448 print(msg, file=sys.stderr)
1449 sys.exit(1)
1450
1451
1452 # CLI entry point.
1453 def _run_cli():
1454 try:
1455 _try_run_cli()
1456 except Exception as exc:
1457 _fail(str(exc))
1458
1459
1460 if __name__ == "__main__":
1461 _run_cli()
This page took 0.056783 seconds and 3 git commands to generate.