Make text locations of some items and errors more precise
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 __author__ = "Philippe Proulx"
25 __version__ = "0.2.0"
26 __all__ = [
27 "ByteOrder",
28 "parse",
29 "ParseError",
30 "ParseResult",
31 "TextLoc",
32 "VarsT",
33 "__author__",
34 "__version__",
35 ]
36
37 import re
38 import abc
39 import ast
40 import sys
41 import enum
42 import struct
43 from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional
44
45
46 # Text location (line and column numbers).
47 class TextLoc:
48 @classmethod
49 def _create(cls, line_no: int, col_no: int):
50 self = cls.__new__(cls)
51 self._init(line_no, col_no)
52 return self
53
54 def __init__(*args, **kwargs): # type: ignore
55 raise NotImplementedError
56
57 def _init(self, line_no: int, col_no: int):
58 self._line_no = line_no
59 self._col_no = col_no
60
61 # Line number.
62 @property
63 def line_no(self):
64 return self._line_no
65
66 # Column number.
67 @property
68 def col_no(self):
69 return self._col_no
70
71
72 # Any item.
73 class _Item:
74 def __init__(self, text_loc: TextLoc):
75 self._text_loc = text_loc
76
77 # Source text location.
78 @property
79 def text_loc(self):
80 return self._text_loc
81
82 # Returns the size, in bytes, of this item.
83 @property
84 @abc.abstractmethod
85 def size(self) -> int:
86 ...
87
88
89 # A repeatable item.
90 class _RepableItem(_Item):
91 pass
92
93
94 # Single byte.
95 class _Byte(_RepableItem):
96 def __init__(self, val: int, text_loc: TextLoc):
97 super().__init__(text_loc)
98 self._val = val
99
100 # Byte value.
101 @property
102 def val(self):
103 return self._val
104
105 @property
106 def size(self):
107 return 1
108
109 def __repr__(self):
110 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
111
112
113 # String.
114 class _Str(_RepableItem):
115 def __init__(self, data: bytes, text_loc: TextLoc):
116 super().__init__(text_loc)
117 self._data = data
118
119 # Encoded bytes.
120 @property
121 def data(self):
122 return self._data
123
124 @property
125 def size(self):
126 return len(self._data)
127
128 def __repr__(self):
129 return "_Str({}, {})".format(repr(self._data), self._text_loc)
130
131
132 # Byte order.
133 @enum.unique
134 class ByteOrder(enum.Enum):
135 # Big endian.
136 BE = "be"
137
138 # Little endian.
139 LE = "le"
140
141
142 # Byte order.
143 class _Bo(_Item):
144 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
145 super().__init__(text_loc)
146 self._bo = bo
147
148 @property
149 def bo(self):
150 return self._bo
151
152 @property
153 def size(self):
154 return 0
155
156
157 # Label.
158 class _Label(_Item):
159 def __init__(self, name: str, text_loc: TextLoc):
160 super().__init__(text_loc)
161 self._name = name
162
163 # Label name.
164 @property
165 def name(self):
166 return self._name
167
168 @property
169 def size(self):
170 return 0
171
172 def __repr__(self):
173 return "_Label({}, {})".format(repr(self._name), self._text_loc)
174
175
176 # Offset.
177 class _Offset(_Item):
178 def __init__(self, val: int, text_loc: TextLoc):
179 super().__init__(text_loc)
180 self._val = val
181
182 # Offset value.
183 @property
184 def val(self):
185 return self._val
186
187 @property
188 def size(self):
189 return 0
190
191 def __repr__(self):
192 return "_Offset({}, {})".format(repr(self._val), self._text_loc)
193
194
195 # Mixin of containing an AST expression and its string.
196 class _ExprMixin:
197 def __init__(self, expr_str: str, expr: ast.Expression):
198 self._expr_str = expr_str
199 self._expr = expr
200
201 # Expression string.
202 @property
203 def expr_str(self):
204 return self._expr_str
205
206 # Expression node to evaluate.
207 @property
208 def expr(self):
209 return self._expr
210
211
212 # Variable.
213 class _Var(_Item, _ExprMixin):
214 def __init__(
215 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
216 ):
217 super().__init__(text_loc)
218 _ExprMixin.__init__(self, expr_str, expr)
219 self._name = name
220
221 # Name.
222 @property
223 def name(self):
224 return self._name
225
226 @property
227 def size(self):
228 return 0
229
230 def __repr__(self):
231 return "_Var({}, {}, {}, {})".format(
232 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
233 )
234
235
236 # Value, possibly needing more than one byte.
237 class _Val(_RepableItem, _ExprMixin):
238 def __init__(
239 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
240 ):
241 super().__init__(text_loc)
242 _ExprMixin.__init__(self, expr_str, expr)
243 self._len = len
244
245 # Length (bits).
246 @property
247 def len(self):
248 return self._len
249
250 @property
251 def size(self):
252 return self._len // 8
253
254 def __repr__(self):
255 return "_Val({}, {}, {}, {})".format(
256 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
257 )
258
259
260 # Expression item type.
261 _ExprItemT = Union[_Val, _Var]
262
263
264 # Group of items.
265 class _Group(_RepableItem):
266 def __init__(self, items: List[_Item], text_loc: TextLoc):
267 super().__init__(text_loc)
268 self._items = items
269 self._size = sum([item.size for item in self._items])
270
271 # Contained items.
272 @property
273 def items(self):
274 return self._items
275
276 @property
277 def size(self):
278 return self._size
279
280 def __repr__(self):
281 return "_Group({}, {})".format(repr(self._items), self._text_loc)
282
283
284 # Repetition item.
285 class _Rep(_Item):
286 def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc):
287 super().__init__(text_loc)
288 self._item = item
289 self._mul = mul
290
291 # Item to repeat.
292 @property
293 def item(self):
294 return self._item
295
296 # Repetition multiplier.
297 @property
298 def mul(self):
299 return self._mul
300
301 @property
302 def size(self):
303 return self._item.size * self._mul
304
305 def __repr__(self):
306 return "_Rep({}, {}, {})".format(
307 repr(self._item), repr(self._mul), self._text_loc
308 )
309
310
311 # A parsing error containing a message and a text location.
312 class ParseError(RuntimeError):
313 @classmethod
314 def _create(cls, msg: str, text_loc: TextLoc):
315 self = cls.__new__(cls)
316 self._init(msg, text_loc)
317 return self
318
319 def __init__(self, *args, **kwargs): # type: ignore
320 raise NotImplementedError
321
322 def _init(self, msg: str, text_loc: TextLoc):
323 super().__init__(msg)
324 self._text_loc = text_loc
325
326 # Source text location.
327 @property
328 def text_loc(self):
329 return self._text_loc
330
331
332 # Raises a parsing error, forwarding the parameters to the constructor.
333 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
334 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
335
336
337 # Variable (and label) dictionary type.
338 VarsT = Dict[str, int]
339
340
341 # Python name pattern.
342 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
343
344
345 # Normand parser.
346 #
347 # The constructor accepts a Normand input. After building, use the `res`
348 # property to get the resulting main group.
349 class _Parser:
350 # Builds a parser to parse the Normand input `normand`, parsing
351 # immediately.
352 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
353 self._normand = normand
354 self._at = 0
355 self._line_no = 1
356 self._col_no = 1
357 self._label_names = set(labels.keys())
358 self._var_names = set(variables.keys())
359 self._parse()
360
361 # Result (main group).
362 @property
363 def res(self):
364 return self._res
365
366 # Current text location.
367 @property
368 def _text_loc(self):
369 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
370 self._line_no, self._col_no
371 )
372
373 # Returns `True` if this parser is done parsing.
374 def _is_done(self):
375 return self._at == len(self._normand)
376
377 # Returns `True` if this parser isn't done parsing.
378 def _isnt_done(self):
379 return not self._is_done()
380
381 # Raises a parse error, creating it using the message `msg` and the
382 # current text location.
383 def _raise_error(self, msg: str) -> NoReturn:
384 _raise_error(msg, self._text_loc)
385
386 # Tries to make the pattern `pat` match the current substring,
387 # returning the match object and updating `self._at`,
388 # `self._line_no`, and `self._col_no` on success.
389 def _try_parse_pat(self, pat: Pattern[str]):
390 m = pat.match(self._normand, self._at)
391
392 if m is None:
393 return
394
395 # Skip matched string
396 self._at += len(m.group(0))
397
398 # Update line number
399 self._line_no += m.group(0).count("\n")
400
401 # Update column number
402 for i in reversed(range(self._at)):
403 if self._normand[i] == "\n" or i == 0:
404 if i == 0:
405 self._col_no = self._at + 1
406 else:
407 self._col_no = self._at - i
408
409 break
410
411 # Return match object
412 return m
413
414 # Expects the pattern `pat` to match the current substring,
415 # returning the match object and updating `self._at`,
416 # `self._line_no`, and `self._col_no` on success, or raising a parse
417 # error with the message `error_msg` on error.
418 def _expect_pat(self, pat: Pattern[str], error_msg: str):
419 # Match
420 m = self._try_parse_pat(pat)
421
422 if m is None:
423 # No match: error
424 self._raise_error(error_msg)
425
426 # Return match object
427 return m
428
429 # Pattern for _skip_ws_and_comments()
430 _ws_or_syms_or_comments_pat = re.compile(
431 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
432 )
433
434 # Skips as many whitespaces, insignificant symbol characters, and
435 # comments as possible.
436 def _skip_ws_and_comments(self):
437 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
438
439 # Pattern for _try_parse_hex_byte()
440 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
441
442 # Tries to parse a hexadecimal byte, returning a byte item on
443 # success.
444 def _try_parse_hex_byte(self):
445 begin_text_loc = self._text_loc
446
447 # Match initial nibble
448 m_high = self._try_parse_pat(self._nibble_pat)
449
450 if m_high is None:
451 # No match
452 return
453
454 # Expect another nibble
455 self._skip_ws_and_comments()
456 m_low = self._expect_pat(
457 self._nibble_pat, "Expecting another hexadecimal nibble"
458 )
459
460 # Return item
461 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
462
463 # Patterns for _try_parse_bin_byte()
464 _bin_byte_bit_pat = re.compile(r"[01]")
465 _bin_byte_prefix_pat = re.compile(r"%")
466
467 # Tries to parse a binary byte, returning a byte item on success.
468 def _try_parse_bin_byte(self):
469 begin_text_loc = self._text_loc
470
471 # Match prefix
472 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
473 # No match
474 return
475
476 # Expect eight bits
477 bits = [] # type: List[str]
478
479 for _ in range(8):
480 self._skip_ws_and_comments()
481 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
482 bits.append(m.group(0))
483
484 # Return item
485 return _Byte(int("".join(bits), 2), begin_text_loc)
486
487 # Patterns for _try_parse_dec_byte()
488 _dec_byte_prefix_pat = re.compile(r"\$\s*")
489 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
490
491 # Tries to parse a decimal byte, returning a byte item on success.
492 def _try_parse_dec_byte(self):
493 begin_text_loc = self._text_loc
494
495 # Match prefix
496 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
497 # No match
498 return
499
500 # Expect the value
501 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
502
503 # Compute value
504 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
505
506 # Validate
507 if val < -128 or val > 255:
508 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
509
510 # Two's complement
511 val = val % 256
512
513 # Return item
514 return _Byte(val, begin_text_loc)
515
516 # Tries to parse a byte, returning a byte item on success.
517 def _try_parse_byte(self):
518 # Hexadecimal
519 item = self._try_parse_hex_byte()
520
521 if item is not None:
522 return item
523
524 # Binary
525 item = self._try_parse_bin_byte()
526
527 if item is not None:
528 return item
529
530 # Decimal
531 item = self._try_parse_dec_byte()
532
533 if item is not None:
534 return item
535
536 # Patterns for _try_parse_str()
537 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
538 _str_suffix_pat = re.compile(r'"')
539 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
540
541 # Strings corresponding to escape sequence characters
542 _str_escape_seq_strs = {
543 "0": "\0",
544 "a": "\a",
545 "b": "\b",
546 "e": "\x1b",
547 "f": "\f",
548 "n": "\n",
549 "r": "\r",
550 "t": "\t",
551 "v": "\v",
552 "\\": "\\",
553 '"': '"',
554 }
555
556 # Tries to parse a string, returning a string item on success.
557 def _try_parse_str(self):
558 begin_text_loc = self._text_loc
559
560 # Match prefix
561 m = self._try_parse_pat(self._str_prefix_pat)
562
563 if m is None:
564 # No match
565 return
566
567 # Get encoding
568 encoding = "utf8"
569
570 if m.group("len") is not None:
571 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
572
573 # Actual string
574 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
575
576 # Expect end of string
577 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
578
579 # Replace escape sequences
580 val = m.group(0)
581
582 for ec in '0abefnrtv"\\':
583 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
584
585 # Encode
586 data = val.encode(encoding)
587
588 # Return item
589 return _Str(data, begin_text_loc)
590
591 # Patterns for _try_parse_group()
592 _group_prefix_pat = re.compile(r"\(")
593 _group_suffix_pat = re.compile(r"\)")
594
595 # Tries to parse a group, returning a group item on success.
596 def _try_parse_group(self):
597 begin_text_loc = self._text_loc
598
599 # Match prefix
600 if self._try_parse_pat(self._group_prefix_pat) is None:
601 # No match
602 return
603
604 # Parse items
605 items = self._parse_items()
606
607 # Expect end of group
608 self._skip_ws_and_comments()
609 self._expect_pat(
610 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
611 )
612
613 # Return item
614 return _Group(items, begin_text_loc)
615
616 # Returns a stripped expression string and an AST expression node
617 # from the expression string `expr_str` at text location `text_loc`.
618 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
619 # Create an expression node from the expression string
620 expr_str = expr_str.strip().replace("\n", " ")
621
622 try:
623 expr = ast.parse(expr_str, mode="eval")
624 except SyntaxError:
625 _raise_error(
626 "Invalid expression `{}`: invalid syntax".format(expr_str),
627 text_loc,
628 )
629
630 return expr_str, expr
631
632 # Patterns for _try_parse_val_and_len()
633 _val_expr_pat = re.compile(r"([^}:]+):")
634 _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)")
635
636 # Tries to parse a value and length, returning a value item on
637 # success.
638 def _try_parse_val_and_len(self):
639 begin_text_loc = self._text_loc
640
641 # Match
642 m_expr = self._try_parse_pat(self._val_expr_pat)
643
644 if m_expr is None:
645 # No match
646 return
647
648 # Expect a length
649 m_len = self._expect_pat(
650 self._val_len_pat, "Expecting a length (multiple of eight bits)"
651 )
652
653 # Create an expression node from the expression string
654 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
655
656 # Return item
657 return _Val(
658 expr_str,
659 expr,
660 int(m_len.group(1)),
661 begin_text_loc,
662 )
663
664 # Patterns for _try_parse_val_and_len()
665 _var_pat = re.compile(
666 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
667 )
668
669 # Tries to parse a variable, returning a variable item on success.
670 def _try_parse_var(self):
671 begin_text_loc = self._text_loc
672
673 # Match
674 m = self._try_parse_pat(self._var_pat)
675
676 if m is None:
677 # No match
678 return
679
680 # Validate name
681 name = m.group("name")
682
683 if name == _icitte_name:
684 _raise_error(
685 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
686 )
687
688 if name in self._label_names:
689 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
690
691 # Add to known variable names
692 self._var_names.add(name)
693
694 # Create an expression node from the expression string
695 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
696
697 # Return item
698 return _Var(
699 name,
700 expr_str,
701 expr,
702 begin_text_loc,
703 )
704
705 # Pattern for _try_parse_bo_name()
706 _bo_pat = re.compile(r"[bl]e")
707
708 # Tries to parse a byte order name, returning a byte order item on
709 # success.
710 def _try_parse_bo_name(self):
711 begin_text_loc = self._text_loc
712
713 # Match
714 m = self._try_parse_pat(self._bo_pat)
715
716 if m is None:
717 # No match
718 return
719
720 # Return corresponding item
721 if m.group(0) == "be":
722 return _Bo(ByteOrder.BE, begin_text_loc)
723 else:
724 assert m.group(0) == "le"
725 return _Bo(ByteOrder.LE, begin_text_loc)
726
727 # Patterns for _try_parse_val_or_bo()
728 _val_var_bo_prefix_pat = re.compile(r"\{\s*")
729 _val_var_bo_suffix_pat = re.compile(r"\s*}")
730
731 # Tries to parse a value, a variable, or a byte order, returning an
732 # item on success.
733 def _try_parse_val_or_var_or_bo(self):
734 # Match prefix
735 if self._try_parse_pat(self._val_var_bo_prefix_pat) is None:
736 # No match
737 return
738
739 # Variable item?
740 item = self._try_parse_var()
741
742 if item is None:
743 # Value item?
744 item = self._try_parse_val_and_len()
745
746 if item is None:
747 # Byte order item?
748 item = self._try_parse_bo_name()
749
750 if item is None:
751 # At this point it's invalid
752 self._raise_error("Expecting a value, a variable, or a byte order")
753
754 # Expect suffix
755 self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`")
756 return item
757
758 # Pattern for _try_parse_offset_val() and _try_parse_rep()
759 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
760
761 # Tries to parse an offset value (after the initial `<`), returning
762 # an offset item on success.
763 def _try_parse_offset_val(self):
764 begin_text_loc = self._text_loc
765
766 # Match
767 m = self._try_parse_pat(self._pos_const_int_pat)
768
769 if m is None:
770 # No match
771 return
772
773 # Return item
774 return _Offset(int(m.group(0), 0), begin_text_loc)
775
776 # Tries to parse a label name (after the initial `<`), returning a
777 # label item on success.
778 def _try_parse_label_name(self):
779 begin_text_loc = self._text_loc
780
781 # Match
782 m = self._try_parse_pat(_py_name_pat)
783
784 if m is None:
785 # No match
786 return
787
788 # Validate
789 name = m.group(0)
790
791 if name == _icitte_name:
792 _raise_error(
793 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
794 )
795
796 if name in self._label_names:
797 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
798
799 if name in self._var_names:
800 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
801
802 # Add to known label names
803 self._label_names.add(name)
804
805 # Return item
806 return _Label(name, begin_text_loc)
807
808 # Patterns for _try_parse_label_or_offset()
809 _label_offset_prefix_pat = re.compile(r"<\s*")
810 _label_offset_suffix_pat = re.compile(r"\s*>")
811
812 # Tries to parse a label or an offset, returning an item on success.
813 def _try_parse_label_or_offset(self):
814 # Match prefix
815 if self._try_parse_pat(self._label_offset_prefix_pat) is None:
816 # No match
817 return
818
819 # Offset item?
820 item = self._try_parse_offset_val()
821
822 if item is None:
823 # Label item?
824 item = self._try_parse_label_name()
825
826 if item is None:
827 # At this point it's invalid
828 self._raise_error("Expecting a label name or an offset value")
829
830 # Expect suffix
831 self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`")
832 return item
833
834 # Tries to parse a base item (anything except a repetition),
835 # returning it on success.
836 def _try_parse_base_item(self):
837 # Byte item?
838 item = self._try_parse_byte()
839
840 if item is not None:
841 return item
842
843 # String item?
844 item = self._try_parse_str()
845
846 if item is not None:
847 return item
848
849 # Value, variable, or byte order item?
850 item = self._try_parse_val_or_var_or_bo()
851
852 if item is not None:
853 return item
854
855 # Label or offset item?
856 item = self._try_parse_label_or_offset()
857
858 if item is not None:
859 return item
860
861 # Group item?
862 item = self._try_parse_group()
863
864 if item is not None:
865 return item
866
867 # Pattern for _try_parse_rep()
868 _rep_prefix_pat = re.compile(r"\*\s*")
869
870 # Tries to parse a repetition, returning the multiplier on success,
871 # or 1 otherwise.
872 def _try_parse_rep(self):
873 # Match prefix
874 if self._try_parse_pat(self._rep_prefix_pat) is None:
875 # No match
876 return 1
877
878 # Expect and return a decimal multiplier
879 self._skip_ws_and_comments()
880 m = self._expect_pat(
881 self._pos_const_int_pat, "Expecting a positive integral multiplier"
882 )
883 return int(m.group(0), 0)
884
885 # Tries to parse an item, possibly followed by a repetition,
886 # returning `True` on success.
887 #
888 # Appends any parsed item to `items`.
889 def _try_append_item(self, items: List[_Item]):
890 self._skip_ws_and_comments()
891
892 # Parse a base item
893 item = self._try_parse_base_item()
894
895 if item is None:
896 # No item
897 return False
898
899 # Parse repetition if the base item is repeatable
900 if isinstance(item, _RepableItem):
901 self._skip_ws_and_comments()
902 rep_text_loc = self._text_loc
903 rep = self._try_parse_rep()
904
905 if rep == 0:
906 # No item, but that's okay
907 return True
908 elif rep > 1:
909 # Convert to repetition item
910 item = _Rep(item, rep, rep_text_loc)
911
912 items.append(item)
913 return True
914
915 # Parses and returns items, skipping whitespaces, insignificant
916 # symbols, and comments when allowed, and stopping at the first
917 # unknown character.
918 def _parse_items(self) -> List[_Item]:
919 items = [] # type: List[_Item]
920
921 while self._isnt_done():
922 # Try to append item
923 if not self._try_append_item(items):
924 # Unknown at this point
925 break
926
927 return items
928
929 # Parses the whole Normand input, setting `self._res` to the main
930 # group item on success.
931 def _parse(self):
932 if len(self._normand.strip()) == 0:
933 # Special case to make sure there's something to consume
934 self._res = _Group([], self._text_loc)
935 return
936
937 # Parse first level items
938 items = self._parse_items()
939
940 # Make sure there's nothing left
941 self._skip_ws_and_comments()
942
943 if self._isnt_done():
944 self._raise_error(
945 "Unexpected character `{}`".format(self._normand[self._at])
946 )
947
948 # Set main group item
949 self._res = _Group(items, self._text_loc)
950
951
952 # The return type of parse().
953 class ParseResult:
954 @classmethod
955 def _create(
956 cls,
957 data: bytearray,
958 variables: VarsT,
959 labels: VarsT,
960 offset: int,
961 bo: Optional[ByteOrder],
962 ):
963 self = cls.__new__(cls)
964 self._init(data, variables, labels, offset, bo)
965 return self
966
967 def __init__(self, *args, **kwargs): # type: ignore
968 raise NotImplementedError
969
970 def _init(
971 self,
972 data: bytearray,
973 variables: VarsT,
974 labels: VarsT,
975 offset: int,
976 bo: Optional[ByteOrder],
977 ):
978 self._data = data
979 self._vars = variables
980 self._labels = labels
981 self._offset = offset
982 self._bo = bo
983
984 # Generated data.
985 @property
986 def data(self):
987 return self._data
988
989 # Dictionary of updated variable names to their last computed value.
990 @property
991 def variables(self):
992 return self._vars
993
994 # Dictionary of updated main group label names to their computed
995 # value.
996 @property
997 def labels(self):
998 return self._labels
999
1000 # Updated offset.
1001 @property
1002 def offset(self):
1003 return self._offset
1004
1005 # Updated byte order.
1006 @property
1007 def byte_order(self):
1008 return self._bo
1009
1010
1011 # Raises a parse error for the item `item`, creating it using the
1012 # message `msg`.
1013 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1014 _raise_error(msg, item.text_loc)
1015
1016
1017 # The `ICITTE` reserved name.
1018 _icitte_name = "ICITTE"
1019
1020
1021 # Value expression validator.
1022 class _ExprValidator(ast.NodeVisitor):
1023 def __init__(self, item: _ExprItemT, syms: VarsT):
1024 self._item = item
1025 self._syms = syms
1026 self._parent_is_call = False
1027
1028 def generic_visit(self, node: ast.AST):
1029 if type(node) is ast.Call:
1030 self._parent_is_call = True
1031 elif type(node) is ast.Name and not self._parent_is_call:
1032 # Make sure the name refers to a known label name
1033 if node.id != _icitte_name and node.id not in self._syms:
1034 _raise_error(
1035 "Unknown variable/label name `{}` in expression `{}`".format(
1036 node.id, self._item.expr_str
1037 ),
1038 self._item.text_loc,
1039 )
1040
1041 # TODO: Restrict the set of allowed node types
1042
1043 super().generic_visit(node)
1044 self._parent_is_call = False
1045
1046
1047 # Keeper of labels for a given group instance.
1048 #
1049 # A group instance is one iteration of a given group.
1050 class _GroupInstanceLabels:
1051 def __init__(self):
1052 self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]]
1053
1054 # Assigns the labels `labels` to a new instance of `group`.
1055 def add(self, group: _Group, labels: VarsT):
1056 if group not in self._instance_labels:
1057 self._instance_labels[group] = {}
1058
1059 spec_instance_labels = self._instance_labels[group]
1060 spec_instance_labels[len(spec_instance_labels)] = labels.copy()
1061
1062 # Returns the labels (not a copy) of the instance `instance_index`
1063 # of the group `group`.
1064 def labels(self, group: _Group, instance_index: int):
1065 return self._instance_labels[group][instance_index]
1066
1067
1068 # Generator of data and labels from a group item.
1069 #
1070 # Generation happens in memory at construction time. After building, use
1071 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1072 # get the resulting context.
1073 class _Gen:
1074 def __init__(
1075 self,
1076 group: _Group,
1077 variables: VarsT,
1078 labels: VarsT,
1079 offset: int,
1080 bo: Optional[ByteOrder],
1081 ):
1082 self._group_instance_labels = _GroupInstanceLabels()
1083 self._resolve_labels(group, offset, labels.copy())
1084 self._vars = variables.copy()
1085 self._offset = offset
1086 self._bo = bo
1087 self._main_group = group
1088 self._gen()
1089
1090 # Generated bytes.
1091 @property
1092 def data(self):
1093 return self._data
1094
1095 # Updated variables.
1096 @property
1097 def variables(self):
1098 return self._vars
1099
1100 # Updated main group labels.
1101 @property
1102 def labels(self):
1103 return self._group_instance_labels.labels(self._main_group, 0)
1104
1105 # Updated offset.
1106 @property
1107 def offset(self):
1108 return self._offset
1109
1110 # Updated byte order.
1111 @property
1112 def bo(self):
1113 return self._bo
1114
1115 # Fills `self._group_instance_labels` with the labels for each group
1116 # instance in `item`, starting at current offset `offset` with the
1117 # current labels `labels`.
1118 #
1119 # Returns the new current offset.
1120 def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int:
1121 if type(item) is _Group:
1122 # First pass: compute immediate labels of this instance
1123 group_labels = labels.copy()
1124 group_offset = offset
1125
1126 for subitem in item.items:
1127 if type(subitem) is _Offset:
1128 group_offset = subitem.val
1129 elif type(subitem) is _Label:
1130 assert subitem.name not in group_labels
1131 group_labels[subitem.name] = group_offset
1132 else:
1133 group_offset += subitem.size
1134
1135 # Add to group instance labels
1136 self._group_instance_labels.add(item, group_labels)
1137
1138 # Second pass: handle each item
1139 for subitem in item.items:
1140 offset = self._resolve_labels(subitem, offset, group_labels)
1141 elif type(item) is _Rep:
1142 for _ in range(item.mul):
1143 offset = self._resolve_labels(item.item, offset, labels)
1144 elif type(item) is _Offset:
1145 offset = item.val
1146 else:
1147 offset += item.size
1148
1149 return offset
1150
1151 def _handle_byte_item(self, item: _Byte):
1152 self._data.append(item.val)
1153 self._offset += item.size
1154
1155 def _handle_str_item(self, item: _Str):
1156 self._data += item.data
1157 self._offset += item.size
1158
1159 def _handle_bo_item(self, item: _Bo):
1160 self._bo = item.bo
1161
1162 def _eval_expr(self, item: _ExprItemT):
1163 # Get the labels of the current group instance as the initial
1164 # symbols (copied because we're adding stuff).
1165 assert self._cur_group is not None
1166 syms = self._group_instance_labels.labels(
1167 self._cur_group, self._group_instance_indexes[self._cur_group]
1168 ).copy()
1169
1170 # Set the `ICITTE` name to the current offset (before encoding)
1171 syms[_icitte_name] = self._offset
1172
1173 # Add the current variables
1174 syms.update(self._vars)
1175
1176 # Validate the node and its children
1177 _ExprValidator(item, syms).visit(item.expr)
1178
1179 # Compile and evaluate expression node
1180 try:
1181 val = eval(compile(item.expr, "", "eval"), None, syms)
1182 except Exception as exc:
1183 _raise_error_for_item(
1184 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1185 item,
1186 )
1187
1188 # Validate result
1189 if type(val) is not int:
1190 _raise_error_for_item(
1191 "Invalid expression `{}`: unexpected result type `{}`".format(
1192 item.expr_str, type(val).__name__
1193 ),
1194 item,
1195 )
1196
1197 return val
1198
1199 def _handle_var_item(self, item: _Var):
1200 # Update variable
1201 self._vars[item.name] = self._eval_expr(item)
1202
1203 def _handle_val_item(self, item: _Val):
1204 # Compute value
1205 val = self._eval_expr(item)
1206
1207 # Validate range
1208 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1209 _raise_error_for_item(
1210 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1211 val, item.len, item.expr_str, self._offset
1212 ),
1213 item,
1214 )
1215
1216 # Encode result on 64 bits (to extend the sign bit whatever the
1217 # value of `item.len`).
1218 if self._bo is None and item.len > 8:
1219 _raise_error_for_item(
1220 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1221 item.expr_str
1222 ),
1223 item,
1224 )
1225
1226 data = struct.pack(
1227 "{}{}".format(
1228 ">" if self._bo in (None, ByteOrder.BE) else "<",
1229 "Q" if val >= 0 else "q",
1230 ),
1231 val,
1232 )
1233
1234 # Keep only the requested length
1235 len_bytes = item.len // 8
1236
1237 if self._bo in (None, ByteOrder.BE):
1238 # Big endian: keep last bytes
1239 data = data[-len_bytes:]
1240 else:
1241 # Little endian: keep first bytes
1242 assert self._bo == ByteOrder.LE
1243 data = data[:len_bytes]
1244
1245 # Append to current bytes and update offset
1246 self._data += data
1247 self._offset += len(data)
1248
1249 def _handle_group_item(self, item: _Group):
1250 # Update the instance index of `item`
1251 if item not in self._group_instance_indexes:
1252 self._group_instance_indexes[item] = 0
1253 else:
1254 self._group_instance_indexes[item] += 1
1255
1256 # Changed current group
1257 old_cur_group = self._cur_group
1258 self._cur_group = item
1259
1260 # Handle each item
1261 for subitem in item.items:
1262 self._handle_item(subitem)
1263
1264 # Restore current group
1265 self._cur_group = old_cur_group
1266
1267 def _handle_rep_item(self, item: _Rep):
1268 for _ in range(item.mul):
1269 self._handle_item(item.item)
1270
1271 def _handle_offset_item(self, item: _Offset):
1272 self._offset = item.val
1273
1274 def _handle_item(self, item: _Item):
1275 if type(item) in self._item_handlers:
1276 self._item_handlers[type(item)](item)
1277
1278 def _gen(self):
1279 # Initial state
1280 self._data = bytearray()
1281 self._group_instance_indexes = {} # type: Dict[_Group, int]
1282 self._cur_group = None
1283
1284 # Item handlers
1285 self._item_handlers = {
1286 _Byte: self._handle_byte_item,
1287 _Str: self._handle_str_item,
1288 _Bo: self._handle_bo_item,
1289 _Val: self._handle_val_item,
1290 _Var: self._handle_var_item,
1291 _Group: self._handle_group_item,
1292 _Rep: self._handle_rep_item,
1293 _Offset: self._handle_offset_item,
1294 } # type: Dict[type, Callable[[Any], None]]
1295
1296 # Handle the group item
1297 self._handle_item(self._main_group)
1298
1299
1300 # Returns a `ParseResult` instance containing the bytes encoded by the
1301 # input string `normand`.
1302 #
1303 # `init_variables` is a dictionary of initial variable names (valid
1304 # Python names) to integral values. A variable name must not be the
1305 # reserved name `ICITTE`.
1306 #
1307 # `init_labels` is a dictionary of initial label names (valid Python
1308 # names) to integral values. A label name must not be the reserved name
1309 # `ICITTE`.
1310 #
1311 # `init_offset` is the initial offset.
1312 #
1313 # `init_byte_order` is the initial byte order.
1314 #
1315 # Raises `ParseError` on any parsing error.
1316 def parse(
1317 normand: str,
1318 init_variables: Optional[VarsT] = None,
1319 init_labels: Optional[VarsT] = None,
1320 init_offset: int = 0,
1321 init_byte_order: Optional[ByteOrder] = None,
1322 ):
1323 if init_variables is None:
1324 init_variables = {}
1325
1326 if init_labels is None:
1327 init_labels = {}
1328
1329 gen = _Gen(
1330 _Parser(normand, init_variables, init_labels).res,
1331 init_variables,
1332 init_labels,
1333 init_offset,
1334 init_byte_order,
1335 )
1336 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1337 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1338 )
1339
1340
1341 # Parses the command-line arguments.
1342 def _parse_cli_args():
1343 import argparse
1344
1345 # Build parser
1346 ap = argparse.ArgumentParser()
1347 ap.add_argument(
1348 "--offset",
1349 metavar="OFFSET",
1350 action="store",
1351 type=int,
1352 default=0,
1353 help="initial offset (positive)",
1354 )
1355 ap.add_argument(
1356 "-b",
1357 "--byte-order",
1358 metavar="BO",
1359 choices=["be", "le"],
1360 type=str,
1361 help="initial byte order (`be` or `le`)",
1362 )
1363 ap.add_argument(
1364 "--var",
1365 metavar="NAME=VAL",
1366 action="append",
1367 help="add an initial variable (may be repeated)",
1368 )
1369 ap.add_argument(
1370 "-l",
1371 "--label",
1372 metavar="NAME=VAL",
1373 action="append",
1374 help="add an initial label (may be repeated)",
1375 )
1376 ap.add_argument(
1377 "--version", action="version", version="Normand {}".format(__version__)
1378 )
1379 ap.add_argument(
1380 "path",
1381 metavar="PATH",
1382 action="store",
1383 nargs="?",
1384 help="input path (none means standard input)",
1385 )
1386
1387 # Parse
1388 return ap.parse_args()
1389
1390
1391 # Raises a command-line error with the message `msg`.
1392 def _raise_cli_error(msg: str) -> NoReturn:
1393 raise RuntimeError("Command-line error: {}".format(msg))
1394
1395
1396 # Returns a dictionary of string to integers from the list of strings
1397 # `args` containing `NAME=VAL` entries.
1398 def _dict_from_arg(args: Optional[List[str]]):
1399 d = {} # type: Dict[str, int]
1400
1401 if args is None:
1402 return d
1403
1404 for arg in args:
1405 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1406
1407 if m is None:
1408 _raise_cli_error("Invalid assignment {}".format(arg))
1409
1410 return d
1411
1412
1413 # CLI entry point without exception handling.
1414 def _try_run_cli():
1415 import os.path
1416
1417 # Parse arguments
1418 args = _parse_cli_args()
1419
1420 # Read input
1421 if args.path is None:
1422 normand = sys.stdin.read()
1423 else:
1424 with open(args.path) as f:
1425 normand = f.read()
1426
1427 # Variables and labels
1428 variables = _dict_from_arg(args.var)
1429 labels = _dict_from_arg(args.label)
1430
1431 # Validate offset
1432 if args.offset < 0:
1433 _raise_cli_error("Invalid negative offset {}")
1434
1435 # Validate and set byte order
1436 bo = None # type: Optional[ByteOrder]
1437
1438 if args.byte_order is not None:
1439 if args.byte_order == "be":
1440 bo = ByteOrder.BE
1441 else:
1442 assert args.byte_order == "le"
1443 bo = ByteOrder.LE
1444
1445 # Parse
1446 try:
1447 res = parse(normand, variables, labels, args.offset, bo)
1448 except ParseError as exc:
1449 prefix = ""
1450
1451 if args.path is not None:
1452 prefix = "{}:".format(os.path.abspath(args.path))
1453
1454 _fail(
1455 "{}{}:{} - {}".format(
1456 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1457 )
1458 )
1459
1460 # Print
1461 sys.stdout.buffer.write(res.data)
1462
1463
1464 # Prints the exception message `msg` and exits with status 1.
1465 def _fail(msg: str) -> NoReturn:
1466 if not msg.endswith("."):
1467 msg += "."
1468
1469 print(msg, file=sys.stderr)
1470 sys.exit(1)
1471
1472
1473 # CLI entry point.
1474 def _run_cli():
1475 try:
1476 _try_run_cli()
1477 except Exception as exc:
1478 _fail(str(exc))
1479
1480
1481 if __name__ == "__main__":
1482 _run_cli()
This page took 0.059486 seconds and 4 git commands to generate.