31c3aff5f134b285db979ebaa08a55b1279d69af
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 __author__ = "Philippe Proulx"
25 __version__ = "0.1.0"
26 __all__ = [
27 "ByteOrder",
28 "parse",
29 "ParseError",
30 "ParseResult",
31 "TextLoc",
32 "VarsT",
33 "__author__",
34 "__version__",
35 ]
36
37 import re
38 import abc
39 import ast
40 import sys
41 import enum
42 import struct
43 from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional
44
45
46 # Text location (line and column numbers).
47 class TextLoc:
48 @classmethod
49 def _create(cls, line_no: int, col_no: int):
50 self = cls.__new__(cls)
51 self._init(line_no, col_no)
52 return self
53
54 def __init__(*args, **kwargs): # type: ignore
55 raise NotImplementedError
56
57 def _init(self, line_no: int, col_no: int):
58 self._line_no = line_no
59 self._col_no = col_no
60
61 # Line number.
62 @property
63 def line_no(self):
64 return self._line_no
65
66 # Column number.
67 @property
68 def col_no(self):
69 return self._col_no
70
71
72 # Any item.
73 class _Item:
74 def __init__(self, text_loc: TextLoc):
75 self._text_loc = text_loc
76
77 # Source text location.
78 @property
79 def text_loc(self):
80 return self._text_loc
81
82 # Returns the size, in bytes, of this item.
83 @property
84 @abc.abstractmethod
85 def size(self) -> int:
86 ...
87
88
89 # A repeatable item.
90 class _RepableItem(_Item):
91 pass
92
93
94 # Single byte.
95 class _Byte(_RepableItem):
96 def __init__(self, val: int, text_loc: TextLoc):
97 super().__init__(text_loc)
98 self._val = val
99
100 # Byte value.
101 @property
102 def val(self):
103 return self._val
104
105 @property
106 def size(self):
107 return 1
108
109 def __repr__(self):
110 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
111
112
113 # String.
114 class _Str(_RepableItem):
115 def __init__(self, data: bytes, text_loc: TextLoc):
116 super().__init__(text_loc)
117 self._data = data
118
119 # Encoded bytes.
120 @property
121 def data(self):
122 return self._data
123
124 @property
125 def size(self):
126 return len(self._data)
127
128 def __repr__(self):
129 return "_Str({}, {})".format(repr(self._data), self._text_loc)
130
131
132 # Byte order.
133 @enum.unique
134 class ByteOrder(enum.Enum):
135 # Big endian.
136 BE = "be"
137
138 # Little endian.
139 LE = "le"
140
141
142 # Byte order.
143 class _Bo(_Item):
144 def __init__(self, bo: ByteOrder):
145 self._bo = bo
146
147 @property
148 def bo(self):
149 return self._bo
150
151 @property
152 def size(self):
153 return 0
154
155
156 # Label.
157 class _Label(_Item):
158 def __init__(self, name: str, text_loc: TextLoc):
159 super().__init__(text_loc)
160 self._name = name
161
162 # Label name.
163 @property
164 def name(self):
165 return self._name
166
167 @property
168 def size(self):
169 return 0
170
171 def __repr__(self):
172 return "_Label({}, {})".format(repr(self._name), self._text_loc)
173
174
175 # Offset.
176 class _Offset(_Item):
177 def __init__(self, val: int, text_loc: TextLoc):
178 super().__init__(text_loc)
179 self._val = val
180
181 # Offset value.
182 @property
183 def val(self):
184 return self._val
185
186 @property
187 def size(self):
188 return 0
189
190 def __repr__(self):
191 return "_Offset({}, {})".format(repr(self._val), self._text_loc)
192
193
194 # Mixin of containing an AST expression and its string.
195 class _ExprMixin:
196 def __init__(self, expr_str: str, expr: ast.Expression):
197 self._expr_str = expr_str
198 self._expr = expr
199
200 # Expression string.
201 @property
202 def expr_str(self):
203 return self._expr_str
204
205 # Expression node to evaluate.
206 @property
207 def expr(self):
208 return self._expr
209
210
211 # Variable.
212 class _Var(_Item, _ExprMixin):
213 def __init__(
214 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
215 ):
216 super().__init__(text_loc)
217 _ExprMixin.__init__(self, expr_str, expr)
218 self._name = name
219
220 # Name.
221 @property
222 def name(self):
223 return self._name
224
225 @property
226 def size(self):
227 return 0
228
229 def __repr__(self):
230 return "_Var({}, {}, {}, {})".format(
231 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
232 )
233
234
235 # Value, possibly needing more than one byte.
236 class _Val(_RepableItem, _ExprMixin):
237 def __init__(
238 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
239 ):
240 super().__init__(text_loc)
241 _ExprMixin.__init__(self, expr_str, expr)
242 self._len = len
243
244 # Length (bits).
245 @property
246 def len(self):
247 return self._len
248
249 @property
250 def size(self):
251 return self._len // 8
252
253 def __repr__(self):
254 return "_Val({}, {}, {}, {})".format(
255 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
256 )
257
258
259 # Expression item type.
260 _ExprItemT = Union[_Val, _Var]
261
262
263 # Group of items.
264 class _Group(_RepableItem):
265 def __init__(self, items: List[_Item], text_loc: TextLoc):
266 super().__init__(text_loc)
267 self._items = items
268 self._size = sum([item.size for item in self._items])
269
270 # Contained items.
271 @property
272 def items(self):
273 return self._items
274
275 @property
276 def size(self):
277 return self._size
278
279 def __repr__(self):
280 return "_Group({}, {})".format(repr(self._items), self._text_loc)
281
282
283 # Repetition item.
284 class _Rep(_Item):
285 def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc):
286 super().__init__(text_loc)
287 self._item = item
288 self._mul = mul
289
290 # Item to repeat.
291 @property
292 def item(self):
293 return self._item
294
295 # Repetition multiplier.
296 @property
297 def mul(self):
298 return self._mul
299
300 @property
301 def size(self):
302 return self._item.size * self._mul
303
304 def __repr__(self):
305 return "_Rep({}, {}, {})".format(
306 repr(self._item), repr(self._mul), self._text_loc
307 )
308
309
310 # A parsing error containing a message and a text location.
311 class ParseError(RuntimeError):
312 @classmethod
313 def _create(cls, msg: str, text_loc: TextLoc):
314 self = cls.__new__(cls)
315 self._init(msg, text_loc)
316 return self
317
318 def __init__(self, *args, **kwargs): # type: ignore
319 raise NotImplementedError
320
321 def _init(self, msg: str, text_loc: TextLoc):
322 super().__init__(msg)
323 self._text_loc = text_loc
324
325 # Source text location.
326 @property
327 def text_loc(self):
328 return self._text_loc
329
330
331 # Raises a parsing error, forwarding the parameters to the constructor.
332 def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
333 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
334
335
336 # Variable (and label) dictionary type.
337 VarsT = Dict[str, int]
338
339
340 # Python name pattern.
341 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
342
343
344 # Normand parser.
345 #
346 # The constructor accepts a Normand input. After building, use the `res`
347 # property to get the resulting main group.
348 class _Parser:
349 # Builds a parser to parse the Normand input `normand`, parsing
350 # immediately.
351 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
352 self._normand = normand
353 self._at = 0
354 self._line_no = 1
355 self._col_no = 1
356 self._label_names = set(labels.keys())
357 self._var_names = set(variables.keys())
358 self._parse()
359
360 # Result (main group).
361 @property
362 def res(self):
363 return self._res
364
365 # Current text location.
366 @property
367 def _text_loc(self):
368 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
369 self._line_no, self._col_no
370 )
371
372 # Returns `True` if this parser is done parsing.
373 def _is_done(self):
374 return self._at == len(self._normand)
375
376 # Returns `True` if this parser isn't done parsing.
377 def _isnt_done(self):
378 return not self._is_done()
379
380 # Raises a parse error, creating it using the message `msg` and the
381 # current text location.
382 def _raise_error(self, msg: str) -> NoReturn:
383 _raise_error(msg, self._text_loc)
384
385 # Tries to make the pattern `pat` match the current substring,
386 # returning the match object and updating `self._at`,
387 # `self._line_no`, and `self._col_no` on success.
388 def _try_parse_pat(self, pat: Pattern[str]):
389 m = pat.match(self._normand, self._at)
390
391 if m is None:
392 return
393
394 # Skip matched string
395 self._at += len(m.group(0))
396
397 # Update line number
398 self._line_no += m.group(0).count("\n")
399
400 # Update column number
401 for i in reversed(range(self._at)):
402 if self._normand[i] == "\n" or i == 0:
403 if i == 0:
404 self._col_no = self._at + 1
405 else:
406 self._col_no = self._at - i
407
408 break
409
410 # Return match object
411 return m
412
413 # Expects the pattern `pat` to match the current substring,
414 # returning the match object and updating `self._at`,
415 # `self._line_no`, and `self._col_no` on success, or raising a parse
416 # error with the message `error_msg` on error.
417 def _expect_pat(self, pat: Pattern[str], error_msg: str):
418 # Match
419 m = self._try_parse_pat(pat)
420
421 if m is None:
422 # No match: error
423 self._raise_error(error_msg)
424
425 # Return match object
426 return m
427
428 # Pattern for _skip_ws_and_comments()
429 _ws_or_syms_or_comments_pat = re.compile(
430 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
431 )
432
433 # Skips as many whitespaces, insignificant symbol characters, and
434 # comments as possible.
435 def _skip_ws_and_comments(self):
436 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
437
438 # Pattern for _try_parse_hex_byte()
439 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
440
441 # Tries to parse a hexadecimal byte, returning a byte item on
442 # success.
443 def _try_parse_hex_byte(self):
444 # Match initial nibble
445 m_high = self._try_parse_pat(self._nibble_pat)
446
447 if m_high is None:
448 # No match
449 return
450
451 # Expect another nibble
452 self._skip_ws_and_comments()
453 m_low = self._expect_pat(
454 self._nibble_pat, "Expecting another hexadecimal nibble"
455 )
456
457 # Return item
458 return _Byte(int(m_high.group(0) + m_low.group(0), 16), self._text_loc)
459
460 # Patterns for _try_parse_bin_byte()
461 _bin_byte_bit_pat = re.compile(r"[01]")
462 _bin_byte_prefix_pat = re.compile(r"%")
463
464 # Tries to parse a binary byte, returning a byte item on success.
465 def _try_parse_bin_byte(self):
466 # Match prefix
467 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
468 # No match
469 return
470
471 # Expect eight bits
472 bits = [] # type: List[str]
473
474 for _ in range(8):
475 self._skip_ws_and_comments()
476 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
477 bits.append(m.group(0))
478
479 # Return item
480 return _Byte(int("".join(bits), 2), self._text_loc)
481
482 # Patterns for _try_parse_dec_byte()
483 _dec_byte_prefix_pat = re.compile(r"\$\s*")
484 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
485
486 # Tries to parse a decimal byte, returning a byte item on success.
487 def _try_parse_dec_byte(self):
488 # Match prefix
489 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
490 # No match
491 return
492
493 # Expect the value
494 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
495
496 # Compute value
497 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
498
499 # Validate
500 if val < -128 or val > 255:
501 self._raise_error("Invalid decimal byte value {}".format(val))
502
503 # Two's complement
504 val = val % 256
505
506 # Return item
507 return _Byte(val, self._text_loc)
508
509 # Tries to parse a byte, returning a byte item on success.
510 def _try_parse_byte(self):
511 # Hexadecimal
512 item = self._try_parse_hex_byte()
513
514 if item is not None:
515 return item
516
517 # Binary
518 item = self._try_parse_bin_byte()
519
520 if item is not None:
521 return item
522
523 # Decimal
524 item = self._try_parse_dec_byte()
525
526 if item is not None:
527 return item
528
529 # Patterns for _try_parse_str()
530 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
531 _str_suffix_pat = re.compile(r'"')
532 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
533
534 # Strings corresponding to escape sequence characters
535 _str_escape_seq_strs = {
536 "0": "\0",
537 "a": "\a",
538 "b": "\b",
539 "e": "\x1b",
540 "f": "\f",
541 "n": "\n",
542 "r": "\r",
543 "t": "\t",
544 "v": "\v",
545 "\\": "\\",
546 '"': '"',
547 }
548
549 # Tries to parse a string, returning a string item on success.
550 def _try_parse_str(self):
551 # Match prefix
552 m = self._try_parse_pat(self._str_prefix_pat)
553
554 if m is None:
555 # No match
556 return
557
558 # Get encoding
559 encoding = "utf8"
560
561 if m.group("len") is not None:
562 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
563
564 # Actual string
565 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
566
567 # Expect end of string
568 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
569
570 # Replace escape sequences
571 val = m.group(0)
572
573 for ec in '0abefnrtv"\\':
574 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
575
576 # Encode
577 data = val.encode(encoding)
578
579 # Return item
580 return _Str(data, self._text_loc)
581
582 # Patterns for _try_parse_group()
583 _group_prefix_pat = re.compile(r"\(")
584 _group_suffix_pat = re.compile(r"\)")
585
586 # Tries to parse a group, returning a group item on success.
587 def _try_parse_group(self):
588 # Match prefix
589 if self._try_parse_pat(self._group_prefix_pat) is None:
590 # No match
591 return
592
593 # Parse items
594 items = self._parse_items()
595
596 # Expect end of group
597 self._skip_ws_and_comments()
598 self._expect_pat(
599 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
600 )
601
602 # Return item
603 return _Group(items, self._text_loc)
604
605 # Returns a stripped expression string and an AST expression node
606 # from the expression string `expr_str` at text location `text_loc`.
607 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
608 # Create an expression node from the expression string
609 expr_str = expr_str.strip().replace("\n", " ")
610
611 try:
612 expr = ast.parse(expr_str, mode="eval")
613 except SyntaxError:
614 _raise_error(
615 "Invalid expression `{}`: invalid syntax".format(expr_str),
616 text_loc,
617 )
618
619 return expr_str, expr
620
621 # Patterns for _try_parse_val_and_len()
622 _val_expr_pat = re.compile(r"([^}:]+):")
623 _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)")
624
625 # Tries to parse a value and length, returning a value item on
626 # success.
627 def _try_parse_val_and_len(self):
628 begin_text_loc = self._text_loc
629
630 # Match
631 m_expr = self._try_parse_pat(self._val_expr_pat)
632
633 if m_expr is None:
634 # No match
635 return
636
637 # Expect a length
638 m_len = self._expect_pat(
639 self._val_len_pat, "Expecting a length (multiple of eight bits)"
640 )
641
642 # Create an expression node from the expression string
643 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
644
645 # Return item
646 return _Val(
647 expr_str,
648 expr,
649 int(m_len.group(1)),
650 self._text_loc,
651 )
652
653 # Patterns for _try_parse_val_and_len()
654 _var_pat = re.compile(
655 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
656 )
657
658 # Tries to parse a variable, returning a variable item on success.
659 def _try_parse_var(self):
660 begin_text_loc = self._text_loc
661
662 # Match
663 m = self._try_parse_pat(self._var_pat)
664
665 if m is None:
666 # No match
667 return
668
669 # Validate name
670 name = m.group("name")
671
672 if name == _icitte_name:
673 self._raise_error("`{}` is a reserved variable name".format(_icitte_name))
674
675 if name in self._label_names:
676 self._raise_error("Existing label named `{}`".format(name))
677
678 # Add to known variable names
679 self._var_names.add(name)
680
681 # Create an expression node from the expression string
682 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
683
684 # Return item
685 return _Var(
686 name,
687 expr_str,
688 expr,
689 self._text_loc,
690 )
691
692 # Pattern for _try_parse_bo_name()
693 _bo_pat = re.compile(r"[bl]e")
694
695 # Tries to parse a byte order name, returning a byte order item on
696 # success.
697 def _try_parse_bo_name(self):
698 # Match
699 m = self._try_parse_pat(self._bo_pat)
700
701 if m is None:
702 # No match
703 return
704
705 # Return corresponding item
706 if m.group(0) == "be":
707 return _Bo(ByteOrder.BE)
708 else:
709 assert m.group(0) == "le"
710 return _Bo(ByteOrder.LE)
711
712 # Patterns for _try_parse_val_or_bo()
713 _val_var_bo_prefix_pat = re.compile(r"\{\s*")
714 _val_var_bo_suffix_pat = re.compile(r"\s*}")
715
716 # Tries to parse a value, a variable, or a byte order, returning an
717 # item on success.
718 def _try_parse_val_or_var_or_bo(self):
719 # Match prefix
720 if self._try_parse_pat(self._val_var_bo_prefix_pat) is None:
721 # No match
722 return
723
724 # Variable item?
725 item = self._try_parse_var()
726
727 if item is None:
728 # Value item?
729 item = self._try_parse_val_and_len()
730
731 if item is None:
732 # Byte order item?
733 item = self._try_parse_bo_name()
734
735 if item is None:
736 # At this point it's invalid
737 self._raise_error("Expecting a value, a variable, or a byte order")
738
739 # Expect suffix
740 self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`")
741 return item
742
743 # Pattern for _try_parse_offset_val() and _try_parse_rep()
744 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
745
746 # Tries to parse an offset value (after the initial `<`), returning
747 # an offset item on success.
748 def _try_parse_offset_val(self):
749 # Match
750 m = self._try_parse_pat(self._pos_const_int_pat)
751
752 if m is None:
753 # No match
754 return
755
756 # Return item
757 return _Offset(int(m.group(0), 0), self._text_loc)
758
759 # Tries to parse a label name (after the initial `<`), returning a
760 # label item on success.
761 def _try_parse_label_name(self):
762 # Match
763 m = self._try_parse_pat(_py_name_pat)
764
765 if m is None:
766 # No match
767 return
768
769 # Validate
770 name = m.group(0)
771
772 if name == _icitte_name:
773 self._raise_error("`{}` is a reserved label name".format(_icitte_name))
774
775 if name in self._label_names:
776 self._raise_error("Duplicate label name `{}`".format(name))
777
778 if name in self._var_names:
779 self._raise_error("Existing variable named `{}`".format(name))
780
781 # Add to known label names
782 self._label_names.add(name)
783
784 # Return item
785 return _Label(name, self._text_loc)
786
787 # Patterns for _try_parse_label_or_offset()
788 _label_offset_prefix_pat = re.compile(r"<\s*")
789 _label_offset_suffix_pat = re.compile(r"\s*>")
790
791 # Tries to parse a label or an offset, returning an item on success.
792 def _try_parse_label_or_offset(self):
793 # Match prefix
794 if self._try_parse_pat(self._label_offset_prefix_pat) is None:
795 # No match
796 return
797
798 # Offset item?
799 item = self._try_parse_offset_val()
800
801 if item is None:
802 # Label item?
803 item = self._try_parse_label_name()
804
805 if item is None:
806 # At this point it's invalid
807 self._raise_error("Expecting a label name or an offset value")
808
809 # Expect suffix
810 self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`")
811 return item
812
813 # Tries to parse a base item (anything except a repetition),
814 # returning it on success.
815 def _try_parse_base_item(self):
816 # Byte item?
817 item = self._try_parse_byte()
818
819 if item is not None:
820 return item
821
822 # String item?
823 item = self._try_parse_str()
824
825 if item is not None:
826 return item
827
828 # Value, variable, or byte order item?
829 item = self._try_parse_val_or_var_or_bo()
830
831 if item is not None:
832 return item
833
834 # Label or offset item?
835 item = self._try_parse_label_or_offset()
836
837 if item is not None:
838 return item
839
840 # Group item?
841 item = self._try_parse_group()
842
843 if item is not None:
844 return item
845
846 # Pattern for _try_parse_rep()
847 _rep_prefix_pat = re.compile(r"\*\s*")
848
849 # Tries to parse a repetition, returning the multiplier on success,
850 # or 1 otherwise.
851 def _try_parse_rep(self):
852 self._skip_ws_and_comments()
853
854 # Match prefix
855 if self._try_parse_pat(self._rep_prefix_pat) is None:
856 # No match
857 return 1
858
859 # Expect and return a decimal multiplier
860 self._skip_ws_and_comments()
861 m = self._expect_pat(
862 self._pos_const_int_pat, "Expecting a positive integral multiplier"
863 )
864 return int(m.group(0), 0)
865
866 # Tries to parse a repeatable item followed or not by a repetition,
867 # returning an item on success.
868 def _try_parse_item(self):
869 self._skip_ws_and_comments()
870
871 # Parse a base item
872 item = self._try_parse_base_item()
873
874 if item is None:
875 # No item
876 return
877
878 # Parse repetition if the base item is repeatable
879 if isinstance(item, _RepableItem):
880 rep = self._try_parse_rep()
881
882 if rep == 0:
883 # No item
884 return
885 elif rep > 1:
886 # Convert to repetition item
887 item = _Rep(item, rep, self._text_loc)
888
889 return item
890
891 # Parses and returns items, skipping whitespaces, insignificant
892 # symbols, and comments when allowed, and stopping at the first
893 # unknown character.
894 def _parse_items(self) -> List[_Item]:
895 items = [] # type: List[_Item]
896
897 while self._isnt_done():
898 # Try to parse item
899 item = self._try_parse_item()
900
901 if item is not None:
902 # Append new item
903 items.append(item)
904 continue
905
906 # Unknown at this point
907 break
908
909 return items
910
911 # Parses the whole Normand input, setting `self._res` to the main
912 # group item on success.
913 def _parse(self):
914 if len(self._normand.strip()) == 0:
915 # Special case to make sure there's something to consume
916 self._res = _Group([], self._text_loc)
917 return
918
919 # Parse first level items
920 items = self._parse_items()
921
922 # Make sure there's nothing left
923 self._skip_ws_and_comments()
924
925 if self._isnt_done():
926 self._raise_error(
927 "Unexpected character `{}`".format(self._normand[self._at])
928 )
929
930 # Set main group item
931 self._res = _Group(items, self._text_loc)
932
933
934 # The return type of parse().
935 class ParseResult:
936 @classmethod
937 def _create(
938 cls,
939 data: bytearray,
940 variables: VarsT,
941 labels: VarsT,
942 offset: int,
943 bo: Optional[ByteOrder],
944 ):
945 self = cls.__new__(cls)
946 self._init(data, variables, labels, offset, bo)
947 return self
948
949 def __init__(self, *args, **kwargs): # type: ignore
950 raise NotImplementedError
951
952 def _init(
953 self,
954 data: bytearray,
955 variables: VarsT,
956 labels: VarsT,
957 offset: int,
958 bo: Optional[ByteOrder],
959 ):
960 self._data = data
961 self._vars = variables
962 self._labels = labels
963 self._offset = offset
964 self._bo = bo
965
966 # Generated data.
967 @property
968 def data(self):
969 return self._data
970
971 # Dictionary of updated variable names to their last computed value.
972 @property
973 def variables(self):
974 return self._vars
975
976 # Dictionary of updated main group label names to their computed
977 # value.
978 @property
979 def labels(self):
980 return self._labels
981
982 # Updated offset.
983 @property
984 def offset(self):
985 return self._offset
986
987 # Updated byte order.
988 @property
989 def byte_order(self):
990 return self._bo
991
992
993 # Raises a parse error for the item `item`, creating it using the
994 # message `msg`.
995 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
996 _raise_error(msg, item.text_loc)
997
998
999 # The `ICITTE` reserved name.
1000 _icitte_name = "ICITTE"
1001
1002
1003 # Value expression validator.
1004 class _ExprValidator(ast.NodeVisitor):
1005 def __init__(self, item: _ExprItemT, syms: VarsT):
1006 self._item = item
1007 self._syms = syms
1008 self._parent_is_call = False
1009
1010 def generic_visit(self, node: ast.AST):
1011 if type(node) is ast.Call:
1012 self._parent_is_call = True
1013 elif type(node) is ast.Name and not self._parent_is_call:
1014 # Make sure the name refers to a known label name
1015 if node.id != _icitte_name and node.id not in self._syms:
1016 _raise_error(
1017 "Unknown variable/label name `{}` in expression `{}`".format(
1018 node.id, self._item.expr_str
1019 ),
1020 self._item.text_loc,
1021 )
1022
1023 # TODO: Restrict the set of allowed node types
1024
1025 super().generic_visit(node)
1026 self._parent_is_call = False
1027
1028
1029 # Keeper of labels for a given group instance.
1030 #
1031 # A group instance is one iteration of a given group.
1032 class _GroupInstanceLabels:
1033 def __init__(self):
1034 self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]]
1035
1036 # Assigns the labels `labels` to a new instance of `group`.
1037 def add(self, group: _Group, labels: VarsT):
1038 if group not in self._instance_labels:
1039 self._instance_labels[group] = {}
1040
1041 spec_instance_labels = self._instance_labels[group]
1042 spec_instance_labels[len(spec_instance_labels)] = labels.copy()
1043
1044 # Returns the labels (not a copy) of the instance `instance_index`
1045 # of the group `group`.
1046 def labels(self, group: _Group, instance_index: int):
1047 return self._instance_labels[group][instance_index]
1048
1049
1050 # Generator of data and labels from a group item.
1051 #
1052 # Generation happens in memory at construction time. After building, use
1053 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1054 # get the resulting context.
1055 class _Gen:
1056 def __init__(
1057 self,
1058 group: _Group,
1059 variables: VarsT,
1060 labels: VarsT,
1061 offset: int,
1062 bo: Optional[ByteOrder],
1063 ):
1064 self._group_instance_labels = _GroupInstanceLabels()
1065 self._resolve_labels(group, offset, labels.copy())
1066 self._vars = variables.copy()
1067 self._offset = offset
1068 self._bo = bo
1069 self._main_group = group
1070 self._gen()
1071
1072 # Generated bytes.
1073 @property
1074 def data(self):
1075 return self._data
1076
1077 # Updated variables.
1078 @property
1079 def variables(self):
1080 return self._vars
1081
1082 # Updated main group labels.
1083 @property
1084 def labels(self):
1085 return self._group_instance_labels.labels(self._main_group, 0)
1086
1087 # Updated offset.
1088 @property
1089 def offset(self):
1090 return self._offset
1091
1092 # Updated byte order.
1093 @property
1094 def bo(self):
1095 return self._bo
1096
1097 # Fills `self._group_instance_labels` with the labels for each group
1098 # instance in `item`, starting at current offset `offset` with the
1099 # current labels `labels`.
1100 #
1101 # Returns the new current offset.
1102 def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int:
1103 if type(item) is _Group:
1104 # First pass: compute immediate labels of this instance
1105 group_labels = labels.copy()
1106 group_offset = offset
1107
1108 for subitem in item.items:
1109 if type(subitem) is _Offset:
1110 group_offset = subitem.val
1111 elif type(subitem) is _Label:
1112 assert subitem.name not in group_labels
1113 group_labels[subitem.name] = group_offset
1114 else:
1115 group_offset += subitem.size
1116
1117 # Add to group instance labels
1118 self._group_instance_labels.add(item, group_labels)
1119
1120 # Second pass: handle each item
1121 for subitem in item.items:
1122 offset = self._resolve_labels(subitem, offset, group_labels)
1123 elif type(item) is _Rep:
1124 for _ in range(item.mul):
1125 offset = self._resolve_labels(item.item, offset, labels)
1126 elif type(item) is _Offset:
1127 offset = item.val
1128 else:
1129 offset += item.size
1130
1131 return offset
1132
1133 def _handle_byte_item(self, item: _Byte):
1134 self._data.append(item.val)
1135 self._offset += item.size
1136
1137 def _handle_str_item(self, item: _Str):
1138 self._data += item.data
1139 self._offset += item.size
1140
1141 def _handle_bo_item(self, item: _Bo):
1142 self._bo = item.bo
1143
1144 def _eval_expr(self, item: _ExprItemT):
1145 # Get the labels of the current group instance as the initial
1146 # symbols (copied because we're adding stuff).
1147 assert self._cur_group is not None
1148 syms = self._group_instance_labels.labels(
1149 self._cur_group, self._group_instance_indexes[self._cur_group]
1150 ).copy()
1151
1152 # Set the `ICITTE` name to the current offset (before encoding)
1153 syms[_icitte_name] = self._offset
1154
1155 # Add the current variables
1156 syms.update(self._vars)
1157
1158 # Validate the node and its children
1159 _ExprValidator(item, syms).visit(item.expr)
1160
1161 # Compile and evaluate expression node
1162 try:
1163 val = eval(compile(item.expr, "", "eval"), None, syms)
1164 except Exception as exc:
1165 _raise_error_for_item(
1166 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1167 item,
1168 )
1169
1170 # Validate result
1171 if type(val) is not int:
1172 _raise_error_for_item(
1173 "Invalid expression `{}`: unexpected result type `{}`".format(
1174 item.expr_str, type(val).__name__
1175 ),
1176 item,
1177 )
1178
1179 return val
1180
1181 def _handle_var_item(self, item: _Var):
1182 # Update variable
1183 self._vars[item.name] = self._eval_expr(item)
1184
1185 def _handle_val_item(self, item: _Val):
1186 # Compute value
1187 val = self._eval_expr(item)
1188
1189 # Validate range
1190 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1191 _raise_error_for_item(
1192 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1193 val, item.len, item.expr_str, self._offset
1194 ),
1195 item,
1196 )
1197
1198 # Encode result on 64 bits (to extend the sign bit whatever the
1199 # value of `item.len`).
1200 if self._bo is None and item.len > 8:
1201 _raise_error_for_item(
1202 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1203 item.expr_str
1204 ),
1205 item,
1206 )
1207
1208 data = struct.pack(
1209 "{}{}".format(
1210 ">" if self._bo in (None, ByteOrder.BE) else "<",
1211 "Q" if val >= 0 else "q",
1212 ),
1213 val,
1214 )
1215
1216 # Keep only the requested length
1217 len_bytes = item.len // 8
1218
1219 if self._bo in (None, ByteOrder.BE):
1220 # Big endian: keep last bytes
1221 data = data[-len_bytes:]
1222 else:
1223 # Little endian: keep first bytes
1224 assert self._bo == ByteOrder.LE
1225 data = data[:len_bytes]
1226
1227 # Append to current bytes and update offset
1228 self._data += data
1229 self._offset += len(data)
1230
1231 def _handle_group_item(self, item: _Group):
1232 # Update the instance index of `item`
1233 if item not in self._group_instance_indexes:
1234 self._group_instance_indexes[item] = 0
1235 else:
1236 self._group_instance_indexes[item] += 1
1237
1238 # Changed current group
1239 old_cur_group = self._cur_group
1240 self._cur_group = item
1241
1242 # Handle each item
1243 for subitem in item.items:
1244 self._handle_item(subitem)
1245
1246 # Restore current group
1247 self._cur_group = old_cur_group
1248
1249 def _handle_rep_item(self, item: _Rep):
1250 for _ in range(item.mul):
1251 self._handle_item(item.item)
1252
1253 def _handle_offset_item(self, item: _Offset):
1254 self._offset = item.val
1255
1256 def _handle_item(self, item: _Item):
1257 if type(item) in self._item_handlers:
1258 self._item_handlers[type(item)](item)
1259
1260 def _gen(self):
1261 # Initial state
1262 self._data = bytearray()
1263 self._group_instance_indexes = {} # type: Dict[_Group, int]
1264 self._cur_group = None
1265
1266 # Item handlers
1267 self._item_handlers = {
1268 _Byte: self._handle_byte_item,
1269 _Str: self._handle_str_item,
1270 _Bo: self._handle_bo_item,
1271 _Val: self._handle_val_item,
1272 _Var: self._handle_var_item,
1273 _Group: self._handle_group_item,
1274 _Rep: self._handle_rep_item,
1275 _Offset: self._handle_offset_item,
1276 } # type: Dict[type, Callable[[Any], None]]
1277
1278 # Handle the group item
1279 self._handle_item(self._main_group)
1280
1281
1282 # Returns a `ParseResult` instance containing the bytes encoded by the
1283 # input string `normand`.
1284 #
1285 # `init_variables` is a dictionary of initial variable names (valid
1286 # Python names) to integral values. A variable name must not be the
1287 # reserved name `ICITTE`.
1288 #
1289 # `init_labels` is a dictionary of initial label names (valid Python
1290 # names) to integral values. A label name must not be the reserved name
1291 # `ICITTE`.
1292 #
1293 # `init_offset` is the initial offset.
1294 #
1295 # `init_byte_order` is the initial byte order.
1296 #
1297 # Raises `ParseError` on any parsing error.
1298 def parse(
1299 normand: str,
1300 init_variables: Optional[VarsT] = None,
1301 init_labels: Optional[VarsT] = None,
1302 init_offset: int = 0,
1303 init_byte_order: Optional[ByteOrder] = None,
1304 ):
1305 if init_variables is None:
1306 init_variables = {}
1307
1308 if init_labels is None:
1309 init_labels = {}
1310
1311 gen = _Gen(
1312 _Parser(normand, init_variables, init_labels).res,
1313 init_variables,
1314 init_labels,
1315 init_offset,
1316 init_byte_order,
1317 )
1318 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1319 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1320 )
1321
1322
1323 # Parses the command-line arguments.
1324 def _parse_cli_args():
1325 import argparse
1326
1327 # Build parser
1328 ap = argparse.ArgumentParser()
1329 ap.add_argument(
1330 "--offset",
1331 metavar="OFFSET",
1332 action="store",
1333 type=int,
1334 default=0,
1335 help="initial offset (positive)",
1336 )
1337 ap.add_argument(
1338 "-b",
1339 "--byte-order",
1340 metavar="BO",
1341 choices=["be", "le"],
1342 type=str,
1343 help="initial byte order (`be` or `le`)",
1344 )
1345 ap.add_argument(
1346 "--var",
1347 metavar="NAME=VAL",
1348 action="append",
1349 help="add an initial variable (may be repeated)",
1350 )
1351 ap.add_argument(
1352 "-l",
1353 "--label",
1354 metavar="NAME=VAL",
1355 action="append",
1356 help="add an initial label (may be repeated)",
1357 )
1358 ap.add_argument(
1359 "--version", action="version", version="Normand {}".format(__version__)
1360 )
1361 ap.add_argument(
1362 "path",
1363 metavar="PATH",
1364 action="store",
1365 nargs="?",
1366 help="input path (none means standard input)",
1367 )
1368
1369 # Parse
1370 return ap.parse_args()
1371
1372
1373 # Raises a command-line error with the message `msg`.
1374 def _raise_cli_error(msg: str) -> NoReturn:
1375 raise RuntimeError("Command-line error: {}".format(msg))
1376
1377
1378 # Returns a dictionary of string to integers from the list of strings
1379 # `args` containing `NAME=VAL` entries.
1380 def _dict_from_arg(args: Optional[List[str]]):
1381 d = {} # type: Dict[str, int]
1382
1383 if args is None:
1384 return d
1385
1386 for arg in args:
1387 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1388
1389 if m is None:
1390 _raise_cli_error("Invalid assignment {}".format(arg))
1391
1392 return d
1393
1394
1395 # CLI entry point without exception handling.
1396 def _try_run_cli():
1397 import os.path
1398
1399 # Parse arguments
1400 args = _parse_cli_args()
1401
1402 # Read input
1403 if args.path is None:
1404 normand = sys.stdin.read()
1405 else:
1406 with open(args.path) as f:
1407 normand = f.read()
1408
1409 # Variables and labels
1410 variables = _dict_from_arg(args.var)
1411 labels = _dict_from_arg(args.label)
1412
1413 # Validate offset
1414 if args.offset < 0:
1415 _raise_cli_error("Invalid negative offset {}")
1416
1417 # Validate and set byte order
1418 bo = None # type: Optional[ByteOrder]
1419
1420 if args.byte_order is not None:
1421 if args.byte_order == "be":
1422 bo = ByteOrder.BE
1423 else:
1424 assert args.byte_order == "le"
1425 bo = ByteOrder.LE
1426
1427 # Parse
1428 try:
1429 res = parse(normand, variables, labels, args.offset, bo)
1430 except ParseError as exc:
1431 prefix = ""
1432
1433 if args.path is not None:
1434 prefix = "{}:".format(os.path.abspath(args.path))
1435
1436 _fail(
1437 "{}{}:{} - {}".format(
1438 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1439 )
1440 )
1441
1442 # Print
1443 sys.stdout.buffer.write(res.data)
1444
1445
1446 # Prints the exception message `msg` and exits with status 1.
1447 def _fail(msg: str) -> NoReturn:
1448 if not msg.endswith("."):
1449 msg += "."
1450
1451 print(msg, file=sys.stderr)
1452 sys.exit(1)
1453
1454
1455 # CLI entry point.
1456 def _run_cli():
1457 try:
1458 _try_run_cli()
1459 except Exception as exc:
1460 _fail(str(exc))
1461
1462
1463 if __name__ == "__main__":
1464 _run_cli()
This page took 0.074056 seconds and 3 git commands to generate.