normand.py: add comment about the portable module
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
5a993698
PP
24# This module is the portable Normand processor. It offers both the
25# parse() function and the command-line tool (run the module itself)
26# without external dependencies except a `typing` module for Python 3.4.
27#
28# Feel free to copy this module file to your own project to use Normand.
29#
30# Upstream repository: <https://github.com/efficios/normand>.
31
71aaa3f7 32__author__ = "Philippe Proulx"
0e8e3169 33__version__ = "0.2.0"
71aaa3f7
PP
34__all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLoc",
40 "VarsT",
41 "__author__",
42 "__version__",
43]
44
45import re
46import abc
47import ast
48import sys
49import enum
50import struct
51from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional
52
53
54# Text location (line and column numbers).
55class TextLoc:
56 @classmethod
57 def _create(cls, line_no: int, col_no: int):
58 self = cls.__new__(cls)
59 self._init(line_no, col_no)
60 return self
61
62 def __init__(*args, **kwargs): # type: ignore
63 raise NotImplementedError
64
65 def _init(self, line_no: int, col_no: int):
66 self._line_no = line_no
67 self._col_no = col_no
68
69 # Line number.
70 @property
71 def line_no(self):
72 return self._line_no
73
74 # Column number.
75 @property
76 def col_no(self):
77 return self._col_no
78
79
80# Any item.
81class _Item:
82 def __init__(self, text_loc: TextLoc):
83 self._text_loc = text_loc
84
85 # Source text location.
86 @property
87 def text_loc(self):
88 return self._text_loc
89
90 # Returns the size, in bytes, of this item.
91 @property
92 @abc.abstractmethod
93 def size(self) -> int:
94 ...
95
96
97# A repeatable item.
98class _RepableItem(_Item):
99 pass
100
101
102# Single byte.
103class _Byte(_RepableItem):
104 def __init__(self, val: int, text_loc: TextLoc):
105 super().__init__(text_loc)
106 self._val = val
107
108 # Byte value.
109 @property
110 def val(self):
111 return self._val
112
113 @property
114 def size(self):
115 return 1
116
117 def __repr__(self):
118 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
119
120
121# String.
122class _Str(_RepableItem):
123 def __init__(self, data: bytes, text_loc: TextLoc):
124 super().__init__(text_loc)
125 self._data = data
126
127 # Encoded bytes.
128 @property
129 def data(self):
130 return self._data
131
132 @property
133 def size(self):
134 return len(self._data)
135
136 def __repr__(self):
137 return "_Str({}, {})".format(repr(self._data), self._text_loc)
138
139
140# Byte order.
141@enum.unique
142class ByteOrder(enum.Enum):
143 # Big endian.
144 BE = "be"
145
146 # Little endian.
147 LE = "le"
148
149
150# Byte order.
151class _Bo(_Item):
0e8e3169
PP
152 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
153 super().__init__(text_loc)
71aaa3f7
PP
154 self._bo = bo
155
156 @property
157 def bo(self):
158 return self._bo
159
160 @property
161 def size(self):
162 return 0
163
164
165# Label.
166class _Label(_Item):
167 def __init__(self, name: str, text_loc: TextLoc):
168 super().__init__(text_loc)
169 self._name = name
170
171 # Label name.
172 @property
173 def name(self):
174 return self._name
175
176 @property
177 def size(self):
178 return 0
179
180 def __repr__(self):
181 return "_Label({}, {})".format(repr(self._name), self._text_loc)
182
183
184# Offset.
185class _Offset(_Item):
186 def __init__(self, val: int, text_loc: TextLoc):
187 super().__init__(text_loc)
188 self._val = val
189
190 # Offset value.
191 @property
192 def val(self):
193 return self._val
194
195 @property
196 def size(self):
197 return 0
198
199 def __repr__(self):
200 return "_Offset({}, {})".format(repr(self._val), self._text_loc)
201
202
203# Mixin of containing an AST expression and its string.
204class _ExprMixin:
205 def __init__(self, expr_str: str, expr: ast.Expression):
206 self._expr_str = expr_str
207 self._expr = expr
208
209 # Expression string.
210 @property
211 def expr_str(self):
212 return self._expr_str
213
214 # Expression node to evaluate.
215 @property
216 def expr(self):
217 return self._expr
218
219
220# Variable.
221class _Var(_Item, _ExprMixin):
222 def __init__(
223 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
224 ):
225 super().__init__(text_loc)
226 _ExprMixin.__init__(self, expr_str, expr)
227 self._name = name
228
229 # Name.
230 @property
231 def name(self):
232 return self._name
233
234 @property
235 def size(self):
236 return 0
237
238 def __repr__(self):
239 return "_Var({}, {}, {}, {})".format(
240 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
241 )
242
243
244# Value, possibly needing more than one byte.
245class _Val(_RepableItem, _ExprMixin):
246 def __init__(
247 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
248 ):
249 super().__init__(text_loc)
250 _ExprMixin.__init__(self, expr_str, expr)
251 self._len = len
252
253 # Length (bits).
254 @property
255 def len(self):
256 return self._len
257
258 @property
259 def size(self):
260 return self._len // 8
261
262 def __repr__(self):
263 return "_Val({}, {}, {}, {})".format(
264 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
265 )
266
267
268# Expression item type.
269_ExprItemT = Union[_Val, _Var]
270
271
272# Group of items.
273class _Group(_RepableItem):
274 def __init__(self, items: List[_Item], text_loc: TextLoc):
275 super().__init__(text_loc)
276 self._items = items
277 self._size = sum([item.size for item in self._items])
278
279 # Contained items.
280 @property
281 def items(self):
282 return self._items
283
284 @property
285 def size(self):
286 return self._size
287
288 def __repr__(self):
289 return "_Group({}, {})".format(repr(self._items), self._text_loc)
290
291
292# Repetition item.
293class _Rep(_Item):
294 def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc):
295 super().__init__(text_loc)
296 self._item = item
297 self._mul = mul
298
299 # Item to repeat.
300 @property
301 def item(self):
302 return self._item
303
304 # Repetition multiplier.
305 @property
306 def mul(self):
307 return self._mul
308
309 @property
310 def size(self):
311 return self._item.size * self._mul
312
313 def __repr__(self):
314 return "_Rep({}, {}, {})".format(
315 repr(self._item), repr(self._mul), self._text_loc
316 )
317
318
319# A parsing error containing a message and a text location.
320class ParseError(RuntimeError):
321 @classmethod
322 def _create(cls, msg: str, text_loc: TextLoc):
323 self = cls.__new__(cls)
324 self._init(msg, text_loc)
325 return self
326
327 def __init__(self, *args, **kwargs): # type: ignore
328 raise NotImplementedError
329
330 def _init(self, msg: str, text_loc: TextLoc):
331 super().__init__(msg)
332 self._text_loc = text_loc
333
334 # Source text location.
335 @property
336 def text_loc(self):
337 return self._text_loc
338
339
340# Raises a parsing error, forwarding the parameters to the constructor.
341def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
342 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
343
344
345# Variable (and label) dictionary type.
346VarsT = Dict[str, int]
347
348
349# Python name pattern.
350_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
351
352
353# Normand parser.
354#
355# The constructor accepts a Normand input. After building, use the `res`
356# property to get the resulting main group.
357class _Parser:
358 # Builds a parser to parse the Normand input `normand`, parsing
359 # immediately.
360 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
361 self._normand = normand
362 self._at = 0
363 self._line_no = 1
364 self._col_no = 1
365 self._label_names = set(labels.keys())
366 self._var_names = set(variables.keys())
367 self._parse()
368
369 # Result (main group).
370 @property
371 def res(self):
372 return self._res
373
374 # Current text location.
375 @property
376 def _text_loc(self):
377 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
378 self._line_no, self._col_no
379 )
380
381 # Returns `True` if this parser is done parsing.
382 def _is_done(self):
383 return self._at == len(self._normand)
384
385 # Returns `True` if this parser isn't done parsing.
386 def _isnt_done(self):
387 return not self._is_done()
388
389 # Raises a parse error, creating it using the message `msg` and the
390 # current text location.
391 def _raise_error(self, msg: str) -> NoReturn:
392 _raise_error(msg, self._text_loc)
393
394 # Tries to make the pattern `pat` match the current substring,
395 # returning the match object and updating `self._at`,
396 # `self._line_no`, and `self._col_no` on success.
397 def _try_parse_pat(self, pat: Pattern[str]):
398 m = pat.match(self._normand, self._at)
399
400 if m is None:
401 return
402
403 # Skip matched string
404 self._at += len(m.group(0))
405
406 # Update line number
407 self._line_no += m.group(0).count("\n")
408
409 # Update column number
410 for i in reversed(range(self._at)):
411 if self._normand[i] == "\n" or i == 0:
412 if i == 0:
413 self._col_no = self._at + 1
414 else:
415 self._col_no = self._at - i
416
417 break
418
419 # Return match object
420 return m
421
422 # Expects the pattern `pat` to match the current substring,
423 # returning the match object and updating `self._at`,
424 # `self._line_no`, and `self._col_no` on success, or raising a parse
425 # error with the message `error_msg` on error.
426 def _expect_pat(self, pat: Pattern[str], error_msg: str):
427 # Match
428 m = self._try_parse_pat(pat)
429
430 if m is None:
431 # No match: error
432 self._raise_error(error_msg)
433
434 # Return match object
435 return m
436
437 # Pattern for _skip_ws_and_comments()
438 _ws_or_syms_or_comments_pat = re.compile(
439 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
440 )
441
442 # Skips as many whitespaces, insignificant symbol characters, and
443 # comments as possible.
444 def _skip_ws_and_comments(self):
445 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
446
447 # Pattern for _try_parse_hex_byte()
448 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
449
450 # Tries to parse a hexadecimal byte, returning a byte item on
451 # success.
452 def _try_parse_hex_byte(self):
0e8e3169
PP
453 begin_text_loc = self._text_loc
454
71aaa3f7
PP
455 # Match initial nibble
456 m_high = self._try_parse_pat(self._nibble_pat)
457
458 if m_high is None:
459 # No match
460 return
461
462 # Expect another nibble
463 self._skip_ws_and_comments()
464 m_low = self._expect_pat(
465 self._nibble_pat, "Expecting another hexadecimal nibble"
466 )
467
468 # Return item
0e8e3169 469 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
470
471 # Patterns for _try_parse_bin_byte()
472 _bin_byte_bit_pat = re.compile(r"[01]")
473 _bin_byte_prefix_pat = re.compile(r"%")
474
475 # Tries to parse a binary byte, returning a byte item on success.
476 def _try_parse_bin_byte(self):
0e8e3169
PP
477 begin_text_loc = self._text_loc
478
71aaa3f7
PP
479 # Match prefix
480 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
481 # No match
482 return
483
484 # Expect eight bits
485 bits = [] # type: List[str]
486
487 for _ in range(8):
488 self._skip_ws_and_comments()
489 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
490 bits.append(m.group(0))
491
492 # Return item
0e8e3169 493 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
494
495 # Patterns for _try_parse_dec_byte()
496 _dec_byte_prefix_pat = re.compile(r"\$\s*")
497 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
498
499 # Tries to parse a decimal byte, returning a byte item on success.
500 def _try_parse_dec_byte(self):
0e8e3169
PP
501 begin_text_loc = self._text_loc
502
71aaa3f7
PP
503 # Match prefix
504 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
505 # No match
506 return
507
508 # Expect the value
509 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
510
511 # Compute value
512 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
513
514 # Validate
515 if val < -128 or val > 255:
0e8e3169 516 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
517
518 # Two's complement
519 val = val % 256
520
521 # Return item
0e8e3169 522 return _Byte(val, begin_text_loc)
71aaa3f7
PP
523
524 # Tries to parse a byte, returning a byte item on success.
525 def _try_parse_byte(self):
526 # Hexadecimal
527 item = self._try_parse_hex_byte()
528
529 if item is not None:
530 return item
531
532 # Binary
533 item = self._try_parse_bin_byte()
534
535 if item is not None:
536 return item
537
538 # Decimal
539 item = self._try_parse_dec_byte()
540
541 if item is not None:
542 return item
543
544 # Patterns for _try_parse_str()
545 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
546 _str_suffix_pat = re.compile(r'"')
547 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
548
549 # Strings corresponding to escape sequence characters
550 _str_escape_seq_strs = {
551 "0": "\0",
552 "a": "\a",
553 "b": "\b",
554 "e": "\x1b",
555 "f": "\f",
556 "n": "\n",
557 "r": "\r",
558 "t": "\t",
559 "v": "\v",
560 "\\": "\\",
561 '"': '"',
562 }
563
564 # Tries to parse a string, returning a string item on success.
565 def _try_parse_str(self):
0e8e3169
PP
566 begin_text_loc = self._text_loc
567
71aaa3f7
PP
568 # Match prefix
569 m = self._try_parse_pat(self._str_prefix_pat)
570
571 if m is None:
572 # No match
573 return
574
575 # Get encoding
576 encoding = "utf8"
577
578 if m.group("len") is not None:
579 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
580
581 # Actual string
582 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
583
584 # Expect end of string
585 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
586
587 # Replace escape sequences
588 val = m.group(0)
589
590 for ec in '0abefnrtv"\\':
591 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
592
593 # Encode
594 data = val.encode(encoding)
595
596 # Return item
0e8e3169 597 return _Str(data, begin_text_loc)
71aaa3f7
PP
598
599 # Patterns for _try_parse_group()
600 _group_prefix_pat = re.compile(r"\(")
601 _group_suffix_pat = re.compile(r"\)")
602
603 # Tries to parse a group, returning a group item on success.
604 def _try_parse_group(self):
0e8e3169
PP
605 begin_text_loc = self._text_loc
606
71aaa3f7
PP
607 # Match prefix
608 if self._try_parse_pat(self._group_prefix_pat) is None:
609 # No match
610 return
611
612 # Parse items
613 items = self._parse_items()
614
615 # Expect end of group
616 self._skip_ws_and_comments()
617 self._expect_pat(
618 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
619 )
620
621 # Return item
0e8e3169 622 return _Group(items, begin_text_loc)
71aaa3f7
PP
623
624 # Returns a stripped expression string and an AST expression node
625 # from the expression string `expr_str` at text location `text_loc`.
626 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
627 # Create an expression node from the expression string
628 expr_str = expr_str.strip().replace("\n", " ")
629
630 try:
631 expr = ast.parse(expr_str, mode="eval")
632 except SyntaxError:
633 _raise_error(
634 "Invalid expression `{}`: invalid syntax".format(expr_str),
635 text_loc,
636 )
637
638 return expr_str, expr
639
640 # Patterns for _try_parse_val_and_len()
641 _val_expr_pat = re.compile(r"([^}:]+):")
642 _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)")
643
644 # Tries to parse a value and length, returning a value item on
645 # success.
646 def _try_parse_val_and_len(self):
647 begin_text_loc = self._text_loc
648
649 # Match
650 m_expr = self._try_parse_pat(self._val_expr_pat)
651
652 if m_expr is None:
653 # No match
654 return
655
656 # Expect a length
657 m_len = self._expect_pat(
658 self._val_len_pat, "Expecting a length (multiple of eight bits)"
659 )
660
661 # Create an expression node from the expression string
662 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
663
664 # Return item
665 return _Val(
666 expr_str,
667 expr,
668 int(m_len.group(1)),
0e8e3169 669 begin_text_loc,
71aaa3f7
PP
670 )
671
672 # Patterns for _try_parse_val_and_len()
673 _var_pat = re.compile(
674 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
675 )
676
677 # Tries to parse a variable, returning a variable item on success.
678 def _try_parse_var(self):
679 begin_text_loc = self._text_loc
680
681 # Match
682 m = self._try_parse_pat(self._var_pat)
683
684 if m is None:
685 # No match
686 return
687
688 # Validate name
689 name = m.group("name")
690
691 if name == _icitte_name:
0e8e3169
PP
692 _raise_error(
693 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
694 )
71aaa3f7
PP
695
696 if name in self._label_names:
0e8e3169 697 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
698
699 # Add to known variable names
700 self._var_names.add(name)
701
702 # Create an expression node from the expression string
703 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
704
705 # Return item
706 return _Var(
707 name,
708 expr_str,
709 expr,
0e8e3169 710 begin_text_loc,
71aaa3f7
PP
711 )
712
713 # Pattern for _try_parse_bo_name()
714 _bo_pat = re.compile(r"[bl]e")
715
716 # Tries to parse a byte order name, returning a byte order item on
717 # success.
718 def _try_parse_bo_name(self):
0e8e3169
PP
719 begin_text_loc = self._text_loc
720
71aaa3f7
PP
721 # Match
722 m = self._try_parse_pat(self._bo_pat)
723
724 if m is None:
725 # No match
726 return
727
728 # Return corresponding item
729 if m.group(0) == "be":
0e8e3169 730 return _Bo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
731 else:
732 assert m.group(0) == "le"
0e8e3169 733 return _Bo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
734
735 # Patterns for _try_parse_val_or_bo()
736 _val_var_bo_prefix_pat = re.compile(r"\{\s*")
737 _val_var_bo_suffix_pat = re.compile(r"\s*}")
738
739 # Tries to parse a value, a variable, or a byte order, returning an
740 # item on success.
741 def _try_parse_val_or_var_or_bo(self):
742 # Match prefix
743 if self._try_parse_pat(self._val_var_bo_prefix_pat) is None:
744 # No match
745 return
746
747 # Variable item?
748 item = self._try_parse_var()
749
750 if item is None:
751 # Value item?
752 item = self._try_parse_val_and_len()
753
754 if item is None:
755 # Byte order item?
756 item = self._try_parse_bo_name()
757
758 if item is None:
759 # At this point it's invalid
760 self._raise_error("Expecting a value, a variable, or a byte order")
761
762 # Expect suffix
763 self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`")
764 return item
765
766 # Pattern for _try_parse_offset_val() and _try_parse_rep()
767 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
768
769 # Tries to parse an offset value (after the initial `<`), returning
770 # an offset item on success.
771 def _try_parse_offset_val(self):
0e8e3169
PP
772 begin_text_loc = self._text_loc
773
71aaa3f7
PP
774 # Match
775 m = self._try_parse_pat(self._pos_const_int_pat)
776
777 if m is None:
778 # No match
779 return
780
781 # Return item
0e8e3169 782 return _Offset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
783
784 # Tries to parse a label name (after the initial `<`), returning a
785 # label item on success.
786 def _try_parse_label_name(self):
0e8e3169
PP
787 begin_text_loc = self._text_loc
788
71aaa3f7
PP
789 # Match
790 m = self._try_parse_pat(_py_name_pat)
791
792 if m is None:
793 # No match
794 return
795
796 # Validate
797 name = m.group(0)
798
799 if name == _icitte_name:
0e8e3169
PP
800 _raise_error(
801 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
802 )
71aaa3f7
PP
803
804 if name in self._label_names:
0e8e3169 805 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
806
807 if name in self._var_names:
0e8e3169 808 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
809
810 # Add to known label names
811 self._label_names.add(name)
812
813 # Return item
0e8e3169 814 return _Label(name, begin_text_loc)
71aaa3f7
PP
815
816 # Patterns for _try_parse_label_or_offset()
817 _label_offset_prefix_pat = re.compile(r"<\s*")
818 _label_offset_suffix_pat = re.compile(r"\s*>")
819
820 # Tries to parse a label or an offset, returning an item on success.
821 def _try_parse_label_or_offset(self):
822 # Match prefix
823 if self._try_parse_pat(self._label_offset_prefix_pat) is None:
824 # No match
825 return
826
827 # Offset item?
828 item = self._try_parse_offset_val()
829
830 if item is None:
831 # Label item?
832 item = self._try_parse_label_name()
833
834 if item is None:
835 # At this point it's invalid
836 self._raise_error("Expecting a label name or an offset value")
837
838 # Expect suffix
839 self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`")
840 return item
841
842 # Tries to parse a base item (anything except a repetition),
843 # returning it on success.
844 def _try_parse_base_item(self):
845 # Byte item?
846 item = self._try_parse_byte()
847
848 if item is not None:
849 return item
850
851 # String item?
852 item = self._try_parse_str()
853
854 if item is not None:
855 return item
856
857 # Value, variable, or byte order item?
858 item = self._try_parse_val_or_var_or_bo()
859
860 if item is not None:
861 return item
862
863 # Label or offset item?
864 item = self._try_parse_label_or_offset()
865
866 if item is not None:
867 return item
868
869 # Group item?
870 item = self._try_parse_group()
871
872 if item is not None:
873 return item
874
875 # Pattern for _try_parse_rep()
876 _rep_prefix_pat = re.compile(r"\*\s*")
877
878 # Tries to parse a repetition, returning the multiplier on success,
879 # or 1 otherwise.
880 def _try_parse_rep(self):
71aaa3f7
PP
881 # Match prefix
882 if self._try_parse_pat(self._rep_prefix_pat) is None:
883 # No match
884 return 1
885
886 # Expect and return a decimal multiplier
887 self._skip_ws_and_comments()
888 m = self._expect_pat(
889 self._pos_const_int_pat, "Expecting a positive integral multiplier"
890 )
891 return int(m.group(0), 0)
892
1ca7b5e1
PP
893 # Tries to parse an item, possibly followed by a repetition,
894 # returning `True` on success.
895 #
896 # Appends any parsed item to `items`.
897 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
898 self._skip_ws_and_comments()
899
900 # Parse a base item
901 item = self._try_parse_base_item()
902
903 if item is None:
904 # No item
1ca7b5e1 905 return False
71aaa3f7
PP
906
907 # Parse repetition if the base item is repeatable
908 if isinstance(item, _RepableItem):
0e8e3169
PP
909 self._skip_ws_and_comments()
910 rep_text_loc = self._text_loc
71aaa3f7
PP
911 rep = self._try_parse_rep()
912
913 if rep == 0:
1ca7b5e1
PP
914 # No item, but that's okay
915 return True
71aaa3f7
PP
916 elif rep > 1:
917 # Convert to repetition item
0e8e3169 918 item = _Rep(item, rep, rep_text_loc)
71aaa3f7 919
1ca7b5e1
PP
920 items.append(item)
921 return True
71aaa3f7
PP
922
923 # Parses and returns items, skipping whitespaces, insignificant
924 # symbols, and comments when allowed, and stopping at the first
925 # unknown character.
926 def _parse_items(self) -> List[_Item]:
927 items = [] # type: List[_Item]
928
929 while self._isnt_done():
1ca7b5e1
PP
930 # Try to append item
931 if not self._try_append_item(items):
932 # Unknown at this point
933 break
71aaa3f7
PP
934
935 return items
936
937 # Parses the whole Normand input, setting `self._res` to the main
938 # group item on success.
939 def _parse(self):
940 if len(self._normand.strip()) == 0:
941 # Special case to make sure there's something to consume
942 self._res = _Group([], self._text_loc)
943 return
944
945 # Parse first level items
946 items = self._parse_items()
947
948 # Make sure there's nothing left
949 self._skip_ws_and_comments()
950
951 if self._isnt_done():
952 self._raise_error(
953 "Unexpected character `{}`".format(self._normand[self._at])
954 )
955
956 # Set main group item
957 self._res = _Group(items, self._text_loc)
958
959
960# The return type of parse().
961class ParseResult:
962 @classmethod
963 def _create(
964 cls,
965 data: bytearray,
966 variables: VarsT,
967 labels: VarsT,
968 offset: int,
969 bo: Optional[ByteOrder],
970 ):
971 self = cls.__new__(cls)
972 self._init(data, variables, labels, offset, bo)
973 return self
974
975 def __init__(self, *args, **kwargs): # type: ignore
976 raise NotImplementedError
977
978 def _init(
979 self,
980 data: bytearray,
981 variables: VarsT,
982 labels: VarsT,
983 offset: int,
984 bo: Optional[ByteOrder],
985 ):
986 self._data = data
987 self._vars = variables
988 self._labels = labels
989 self._offset = offset
990 self._bo = bo
991
992 # Generated data.
993 @property
994 def data(self):
995 return self._data
996
997 # Dictionary of updated variable names to their last computed value.
998 @property
999 def variables(self):
1000 return self._vars
1001
1002 # Dictionary of updated main group label names to their computed
1003 # value.
1004 @property
1005 def labels(self):
1006 return self._labels
1007
1008 # Updated offset.
1009 @property
1010 def offset(self):
1011 return self._offset
1012
1013 # Updated byte order.
1014 @property
1015 def byte_order(self):
1016 return self._bo
1017
1018
1019# Raises a parse error for the item `item`, creating it using the
1020# message `msg`.
1021def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1022 _raise_error(msg, item.text_loc)
1023
1024
1025# The `ICITTE` reserved name.
1026_icitte_name = "ICITTE"
1027
1028
1029# Value expression validator.
1030class _ExprValidator(ast.NodeVisitor):
1031 def __init__(self, item: _ExprItemT, syms: VarsT):
1032 self._item = item
1033 self._syms = syms
1034 self._parent_is_call = False
1035
1036 def generic_visit(self, node: ast.AST):
1037 if type(node) is ast.Call:
1038 self._parent_is_call = True
1039 elif type(node) is ast.Name and not self._parent_is_call:
1040 # Make sure the name refers to a known label name
1041 if node.id != _icitte_name and node.id not in self._syms:
1042 _raise_error(
1043 "Unknown variable/label name `{}` in expression `{}`".format(
1044 node.id, self._item.expr_str
1045 ),
1046 self._item.text_loc,
1047 )
1048
1049 # TODO: Restrict the set of allowed node types
1050
1051 super().generic_visit(node)
1052 self._parent_is_call = False
1053
1054
1055# Keeper of labels for a given group instance.
1056#
1057# A group instance is one iteration of a given group.
1058class _GroupInstanceLabels:
1059 def __init__(self):
1060 self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]]
1061
1062 # Assigns the labels `labels` to a new instance of `group`.
1063 def add(self, group: _Group, labels: VarsT):
1064 if group not in self._instance_labels:
1065 self._instance_labels[group] = {}
1066
1067 spec_instance_labels = self._instance_labels[group]
1068 spec_instance_labels[len(spec_instance_labels)] = labels.copy()
1069
1070 # Returns the labels (not a copy) of the instance `instance_index`
1071 # of the group `group`.
1072 def labels(self, group: _Group, instance_index: int):
1073 return self._instance_labels[group][instance_index]
1074
1075
1076# Generator of data and labels from a group item.
1077#
1078# Generation happens in memory at construction time. After building, use
1079# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1080# get the resulting context.
1081class _Gen:
1082 def __init__(
1083 self,
1084 group: _Group,
1085 variables: VarsT,
1086 labels: VarsT,
1087 offset: int,
1088 bo: Optional[ByteOrder],
1089 ):
1090 self._group_instance_labels = _GroupInstanceLabels()
1091 self._resolve_labels(group, offset, labels.copy())
1092 self._vars = variables.copy()
1093 self._offset = offset
1094 self._bo = bo
1095 self._main_group = group
1096 self._gen()
1097
1098 # Generated bytes.
1099 @property
1100 def data(self):
1101 return self._data
1102
1103 # Updated variables.
1104 @property
1105 def variables(self):
1106 return self._vars
1107
1108 # Updated main group labels.
1109 @property
1110 def labels(self):
1111 return self._group_instance_labels.labels(self._main_group, 0)
1112
1113 # Updated offset.
1114 @property
1115 def offset(self):
1116 return self._offset
1117
1118 # Updated byte order.
1119 @property
1120 def bo(self):
1121 return self._bo
1122
1123 # Fills `self._group_instance_labels` with the labels for each group
1124 # instance in `item`, starting at current offset `offset` with the
1125 # current labels `labels`.
1126 #
1127 # Returns the new current offset.
1128 def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int:
1129 if type(item) is _Group:
1130 # First pass: compute immediate labels of this instance
1131 group_labels = labels.copy()
1132 group_offset = offset
1133
1134 for subitem in item.items:
1135 if type(subitem) is _Offset:
1136 group_offset = subitem.val
1137 elif type(subitem) is _Label:
1138 assert subitem.name not in group_labels
1139 group_labels[subitem.name] = group_offset
1140 else:
1141 group_offset += subitem.size
1142
1143 # Add to group instance labels
1144 self._group_instance_labels.add(item, group_labels)
1145
1146 # Second pass: handle each item
1147 for subitem in item.items:
1148 offset = self._resolve_labels(subitem, offset, group_labels)
1149 elif type(item) is _Rep:
1150 for _ in range(item.mul):
1151 offset = self._resolve_labels(item.item, offset, labels)
1152 elif type(item) is _Offset:
1153 offset = item.val
1154 else:
1155 offset += item.size
1156
1157 return offset
1158
1159 def _handle_byte_item(self, item: _Byte):
1160 self._data.append(item.val)
1161 self._offset += item.size
1162
1163 def _handle_str_item(self, item: _Str):
1164 self._data += item.data
1165 self._offset += item.size
1166
1167 def _handle_bo_item(self, item: _Bo):
1168 self._bo = item.bo
1169
1170 def _eval_expr(self, item: _ExprItemT):
1171 # Get the labels of the current group instance as the initial
1172 # symbols (copied because we're adding stuff).
1173 assert self._cur_group is not None
1174 syms = self._group_instance_labels.labels(
1175 self._cur_group, self._group_instance_indexes[self._cur_group]
1176 ).copy()
1177
1178 # Set the `ICITTE` name to the current offset (before encoding)
1179 syms[_icitte_name] = self._offset
1180
1181 # Add the current variables
1182 syms.update(self._vars)
1183
1184 # Validate the node and its children
1185 _ExprValidator(item, syms).visit(item.expr)
1186
1187 # Compile and evaluate expression node
1188 try:
1189 val = eval(compile(item.expr, "", "eval"), None, syms)
1190 except Exception as exc:
1191 _raise_error_for_item(
1192 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1193 item,
1194 )
1195
1196 # Validate result
1197 if type(val) is not int:
1198 _raise_error_for_item(
1199 "Invalid expression `{}`: unexpected result type `{}`".format(
1200 item.expr_str, type(val).__name__
1201 ),
1202 item,
1203 )
1204
1205 return val
1206
1207 def _handle_var_item(self, item: _Var):
1208 # Update variable
1209 self._vars[item.name] = self._eval_expr(item)
1210
1211 def _handle_val_item(self, item: _Val):
1212 # Compute value
1213 val = self._eval_expr(item)
1214
1215 # Validate range
1216 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1217 _raise_error_for_item(
1218 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1219 val, item.len, item.expr_str, self._offset
1220 ),
1221 item,
1222 )
1223
1224 # Encode result on 64 bits (to extend the sign bit whatever the
1225 # value of `item.len`).
1226 if self._bo is None and item.len > 8:
1227 _raise_error_for_item(
1228 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1229 item.expr_str
1230 ),
1231 item,
1232 )
1233
1234 data = struct.pack(
1235 "{}{}".format(
1236 ">" if self._bo in (None, ByteOrder.BE) else "<",
1237 "Q" if val >= 0 else "q",
1238 ),
1239 val,
1240 )
1241
1242 # Keep only the requested length
1243 len_bytes = item.len // 8
1244
1245 if self._bo in (None, ByteOrder.BE):
1246 # Big endian: keep last bytes
1247 data = data[-len_bytes:]
1248 else:
1249 # Little endian: keep first bytes
1250 assert self._bo == ByteOrder.LE
1251 data = data[:len_bytes]
1252
1253 # Append to current bytes and update offset
1254 self._data += data
1255 self._offset += len(data)
1256
1257 def _handle_group_item(self, item: _Group):
1258 # Update the instance index of `item`
1259 if item not in self._group_instance_indexes:
1260 self._group_instance_indexes[item] = 0
1261 else:
1262 self._group_instance_indexes[item] += 1
1263
1264 # Changed current group
1265 old_cur_group = self._cur_group
1266 self._cur_group = item
1267
1268 # Handle each item
1269 for subitem in item.items:
1270 self._handle_item(subitem)
1271
1272 # Restore current group
1273 self._cur_group = old_cur_group
1274
1275 def _handle_rep_item(self, item: _Rep):
1276 for _ in range(item.mul):
1277 self._handle_item(item.item)
1278
1279 def _handle_offset_item(self, item: _Offset):
1280 self._offset = item.val
1281
1282 def _handle_item(self, item: _Item):
1283 if type(item) in self._item_handlers:
1284 self._item_handlers[type(item)](item)
1285
1286 def _gen(self):
1287 # Initial state
1288 self._data = bytearray()
1289 self._group_instance_indexes = {} # type: Dict[_Group, int]
1290 self._cur_group = None
1291
1292 # Item handlers
1293 self._item_handlers = {
1294 _Byte: self._handle_byte_item,
1295 _Str: self._handle_str_item,
1296 _Bo: self._handle_bo_item,
1297 _Val: self._handle_val_item,
1298 _Var: self._handle_var_item,
1299 _Group: self._handle_group_item,
1300 _Rep: self._handle_rep_item,
1301 _Offset: self._handle_offset_item,
1302 } # type: Dict[type, Callable[[Any], None]]
1303
1304 # Handle the group item
1305 self._handle_item(self._main_group)
1306
1307
1308# Returns a `ParseResult` instance containing the bytes encoded by the
1309# input string `normand`.
1310#
1311# `init_variables` is a dictionary of initial variable names (valid
1312# Python names) to integral values. A variable name must not be the
1313# reserved name `ICITTE`.
1314#
1315# `init_labels` is a dictionary of initial label names (valid Python
1316# names) to integral values. A label name must not be the reserved name
1317# `ICITTE`.
1318#
1319# `init_offset` is the initial offset.
1320#
1321# `init_byte_order` is the initial byte order.
1322#
1323# Raises `ParseError` on any parsing error.
1324def parse(
1325 normand: str,
1326 init_variables: Optional[VarsT] = None,
1327 init_labels: Optional[VarsT] = None,
1328 init_offset: int = 0,
1329 init_byte_order: Optional[ByteOrder] = None,
1330):
1331 if init_variables is None:
1332 init_variables = {}
1333
1334 if init_labels is None:
1335 init_labels = {}
1336
1337 gen = _Gen(
1338 _Parser(normand, init_variables, init_labels).res,
1339 init_variables,
1340 init_labels,
1341 init_offset,
1342 init_byte_order,
1343 )
1344 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1345 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1346 )
1347
1348
1349# Parses the command-line arguments.
1350def _parse_cli_args():
1351 import argparse
1352
1353 # Build parser
1354 ap = argparse.ArgumentParser()
1355 ap.add_argument(
1356 "--offset",
1357 metavar="OFFSET",
1358 action="store",
1359 type=int,
1360 default=0,
1361 help="initial offset (positive)",
1362 )
1363 ap.add_argument(
1364 "-b",
1365 "--byte-order",
1366 metavar="BO",
1367 choices=["be", "le"],
1368 type=str,
1369 help="initial byte order (`be` or `le`)",
1370 )
1371 ap.add_argument(
1372 "--var",
1373 metavar="NAME=VAL",
1374 action="append",
1375 help="add an initial variable (may be repeated)",
1376 )
1377 ap.add_argument(
1378 "-l",
1379 "--label",
1380 metavar="NAME=VAL",
1381 action="append",
1382 help="add an initial label (may be repeated)",
1383 )
1384 ap.add_argument(
1385 "--version", action="version", version="Normand {}".format(__version__)
1386 )
1387 ap.add_argument(
1388 "path",
1389 metavar="PATH",
1390 action="store",
1391 nargs="?",
1392 help="input path (none means standard input)",
1393 )
1394
1395 # Parse
1396 return ap.parse_args()
1397
1398
1399# Raises a command-line error with the message `msg`.
1400def _raise_cli_error(msg: str) -> NoReturn:
1401 raise RuntimeError("Command-line error: {}".format(msg))
1402
1403
1404# Returns a dictionary of string to integers from the list of strings
1405# `args` containing `NAME=VAL` entries.
1406def _dict_from_arg(args: Optional[List[str]]):
1407 d = {} # type: Dict[str, int]
1408
1409 if args is None:
1410 return d
1411
1412 for arg in args:
1413 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1414
1415 if m is None:
1416 _raise_cli_error("Invalid assignment {}".format(arg))
1417
1418 return d
1419
1420
1421# CLI entry point without exception handling.
1422def _try_run_cli():
1423 import os.path
1424
1425 # Parse arguments
1426 args = _parse_cli_args()
1427
1428 # Read input
1429 if args.path is None:
1430 normand = sys.stdin.read()
1431 else:
1432 with open(args.path) as f:
1433 normand = f.read()
1434
1435 # Variables and labels
1436 variables = _dict_from_arg(args.var)
1437 labels = _dict_from_arg(args.label)
1438
1439 # Validate offset
1440 if args.offset < 0:
1441 _raise_cli_error("Invalid negative offset {}")
1442
1443 # Validate and set byte order
1444 bo = None # type: Optional[ByteOrder]
1445
1446 if args.byte_order is not None:
1447 if args.byte_order == "be":
1448 bo = ByteOrder.BE
1449 else:
1450 assert args.byte_order == "le"
1451 bo = ByteOrder.LE
1452
1453 # Parse
1454 try:
1455 res = parse(normand, variables, labels, args.offset, bo)
1456 except ParseError as exc:
1457 prefix = ""
1458
1459 if args.path is not None:
1460 prefix = "{}:".format(os.path.abspath(args.path))
1461
1462 _fail(
1463 "{}{}:{} - {}".format(
1464 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1465 )
1466 )
1467
1468 # Print
1469 sys.stdout.buffer.write(res.data)
1470
1471
1472# Prints the exception message `msg` and exits with status 1.
1473def _fail(msg: str) -> NoReturn:
1474 if not msg.endswith("."):
1475 msg += "."
1476
1477 print(msg, file=sys.stderr)
1478 sys.exit(1)
1479
1480
1481# CLI entry point.
1482def _run_cli():
1483 try:
1484 _try_run_cli()
1485 except Exception as exc:
1486 _fail(str(exc))
1487
1488
1489if __name__ == "__main__":
1490 _run_cli()
This page took 0.07463 seconds and 4 git commands to generate.