Make text locations of some items and errors more precise
[normand.git] / normand / normand.py
CommitLineData
71aaa3f7
PP
1# The MIT License (MIT)
2#
3# Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24__author__ = "Philippe Proulx"
0e8e3169 25__version__ = "0.2.0"
71aaa3f7
PP
26__all__ = [
27 "ByteOrder",
28 "parse",
29 "ParseError",
30 "ParseResult",
31 "TextLoc",
32 "VarsT",
33 "__author__",
34 "__version__",
35]
36
37import re
38import abc
39import ast
40import sys
41import enum
42import struct
43from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional
44
45
46# Text location (line and column numbers).
47class TextLoc:
48 @classmethod
49 def _create(cls, line_no: int, col_no: int):
50 self = cls.__new__(cls)
51 self._init(line_no, col_no)
52 return self
53
54 def __init__(*args, **kwargs): # type: ignore
55 raise NotImplementedError
56
57 def _init(self, line_no: int, col_no: int):
58 self._line_no = line_no
59 self._col_no = col_no
60
61 # Line number.
62 @property
63 def line_no(self):
64 return self._line_no
65
66 # Column number.
67 @property
68 def col_no(self):
69 return self._col_no
70
71
72# Any item.
73class _Item:
74 def __init__(self, text_loc: TextLoc):
75 self._text_loc = text_loc
76
77 # Source text location.
78 @property
79 def text_loc(self):
80 return self._text_loc
81
82 # Returns the size, in bytes, of this item.
83 @property
84 @abc.abstractmethod
85 def size(self) -> int:
86 ...
87
88
89# A repeatable item.
90class _RepableItem(_Item):
91 pass
92
93
94# Single byte.
95class _Byte(_RepableItem):
96 def __init__(self, val: int, text_loc: TextLoc):
97 super().__init__(text_loc)
98 self._val = val
99
100 # Byte value.
101 @property
102 def val(self):
103 return self._val
104
105 @property
106 def size(self):
107 return 1
108
109 def __repr__(self):
110 return "_Byte({}, {})".format(hex(self._val), self._text_loc)
111
112
113# String.
114class _Str(_RepableItem):
115 def __init__(self, data: bytes, text_loc: TextLoc):
116 super().__init__(text_loc)
117 self._data = data
118
119 # Encoded bytes.
120 @property
121 def data(self):
122 return self._data
123
124 @property
125 def size(self):
126 return len(self._data)
127
128 def __repr__(self):
129 return "_Str({}, {})".format(repr(self._data), self._text_loc)
130
131
132# Byte order.
133@enum.unique
134class ByteOrder(enum.Enum):
135 # Big endian.
136 BE = "be"
137
138 # Little endian.
139 LE = "le"
140
141
142# Byte order.
143class _Bo(_Item):
0e8e3169
PP
144 def __init__(self, bo: ByteOrder, text_loc: TextLoc):
145 super().__init__(text_loc)
71aaa3f7
PP
146 self._bo = bo
147
148 @property
149 def bo(self):
150 return self._bo
151
152 @property
153 def size(self):
154 return 0
155
156
157# Label.
158class _Label(_Item):
159 def __init__(self, name: str, text_loc: TextLoc):
160 super().__init__(text_loc)
161 self._name = name
162
163 # Label name.
164 @property
165 def name(self):
166 return self._name
167
168 @property
169 def size(self):
170 return 0
171
172 def __repr__(self):
173 return "_Label({}, {})".format(repr(self._name), self._text_loc)
174
175
176# Offset.
177class _Offset(_Item):
178 def __init__(self, val: int, text_loc: TextLoc):
179 super().__init__(text_loc)
180 self._val = val
181
182 # Offset value.
183 @property
184 def val(self):
185 return self._val
186
187 @property
188 def size(self):
189 return 0
190
191 def __repr__(self):
192 return "_Offset({}, {})".format(repr(self._val), self._text_loc)
193
194
195# Mixin of containing an AST expression and its string.
196class _ExprMixin:
197 def __init__(self, expr_str: str, expr: ast.Expression):
198 self._expr_str = expr_str
199 self._expr = expr
200
201 # Expression string.
202 @property
203 def expr_str(self):
204 return self._expr_str
205
206 # Expression node to evaluate.
207 @property
208 def expr(self):
209 return self._expr
210
211
212# Variable.
213class _Var(_Item, _ExprMixin):
214 def __init__(
215 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc
216 ):
217 super().__init__(text_loc)
218 _ExprMixin.__init__(self, expr_str, expr)
219 self._name = name
220
221 # Name.
222 @property
223 def name(self):
224 return self._name
225
226 @property
227 def size(self):
228 return 0
229
230 def __repr__(self):
231 return "_Var({}, {}, {}, {})".format(
232 repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc
233 )
234
235
236# Value, possibly needing more than one byte.
237class _Val(_RepableItem, _ExprMixin):
238 def __init__(
239 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc
240 ):
241 super().__init__(text_loc)
242 _ExprMixin.__init__(self, expr_str, expr)
243 self._len = len
244
245 # Length (bits).
246 @property
247 def len(self):
248 return self._len
249
250 @property
251 def size(self):
252 return self._len // 8
253
254 def __repr__(self):
255 return "_Val({}, {}, {}, {})".format(
256 repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc
257 )
258
259
260# Expression item type.
261_ExprItemT = Union[_Val, _Var]
262
263
264# Group of items.
265class _Group(_RepableItem):
266 def __init__(self, items: List[_Item], text_loc: TextLoc):
267 super().__init__(text_loc)
268 self._items = items
269 self._size = sum([item.size for item in self._items])
270
271 # Contained items.
272 @property
273 def items(self):
274 return self._items
275
276 @property
277 def size(self):
278 return self._size
279
280 def __repr__(self):
281 return "_Group({}, {})".format(repr(self._items), self._text_loc)
282
283
284# Repetition item.
285class _Rep(_Item):
286 def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc):
287 super().__init__(text_loc)
288 self._item = item
289 self._mul = mul
290
291 # Item to repeat.
292 @property
293 def item(self):
294 return self._item
295
296 # Repetition multiplier.
297 @property
298 def mul(self):
299 return self._mul
300
301 @property
302 def size(self):
303 return self._item.size * self._mul
304
305 def __repr__(self):
306 return "_Rep({}, {}, {})".format(
307 repr(self._item), repr(self._mul), self._text_loc
308 )
309
310
311# A parsing error containing a message and a text location.
312class ParseError(RuntimeError):
313 @classmethod
314 def _create(cls, msg: str, text_loc: TextLoc):
315 self = cls.__new__(cls)
316 self._init(msg, text_loc)
317 return self
318
319 def __init__(self, *args, **kwargs): # type: ignore
320 raise NotImplementedError
321
322 def _init(self, msg: str, text_loc: TextLoc):
323 super().__init__(msg)
324 self._text_loc = text_loc
325
326 # Source text location.
327 @property
328 def text_loc(self):
329 return self._text_loc
330
331
332# Raises a parsing error, forwarding the parameters to the constructor.
333def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn:
334 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
335
336
337# Variable (and label) dictionary type.
338VarsT = Dict[str, int]
339
340
341# Python name pattern.
342_py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
343
344
345# Normand parser.
346#
347# The constructor accepts a Normand input. After building, use the `res`
348# property to get the resulting main group.
349class _Parser:
350 # Builds a parser to parse the Normand input `normand`, parsing
351 # immediately.
352 def __init__(self, normand: str, variables: VarsT, labels: VarsT):
353 self._normand = normand
354 self._at = 0
355 self._line_no = 1
356 self._col_no = 1
357 self._label_names = set(labels.keys())
358 self._var_names = set(variables.keys())
359 self._parse()
360
361 # Result (main group).
362 @property
363 def res(self):
364 return self._res
365
366 # Current text location.
367 @property
368 def _text_loc(self):
369 return TextLoc._create( # pyright: ignore[reportPrivateUsage]
370 self._line_no, self._col_no
371 )
372
373 # Returns `True` if this parser is done parsing.
374 def _is_done(self):
375 return self._at == len(self._normand)
376
377 # Returns `True` if this parser isn't done parsing.
378 def _isnt_done(self):
379 return not self._is_done()
380
381 # Raises a parse error, creating it using the message `msg` and the
382 # current text location.
383 def _raise_error(self, msg: str) -> NoReturn:
384 _raise_error(msg, self._text_loc)
385
386 # Tries to make the pattern `pat` match the current substring,
387 # returning the match object and updating `self._at`,
388 # `self._line_no`, and `self._col_no` on success.
389 def _try_parse_pat(self, pat: Pattern[str]):
390 m = pat.match(self._normand, self._at)
391
392 if m is None:
393 return
394
395 # Skip matched string
396 self._at += len(m.group(0))
397
398 # Update line number
399 self._line_no += m.group(0).count("\n")
400
401 # Update column number
402 for i in reversed(range(self._at)):
403 if self._normand[i] == "\n" or i == 0:
404 if i == 0:
405 self._col_no = self._at + 1
406 else:
407 self._col_no = self._at - i
408
409 break
410
411 # Return match object
412 return m
413
414 # Expects the pattern `pat` to match the current substring,
415 # returning the match object and updating `self._at`,
416 # `self._line_no`, and `self._col_no` on success, or raising a parse
417 # error with the message `error_msg` on error.
418 def _expect_pat(self, pat: Pattern[str], error_msg: str):
419 # Match
420 m = self._try_parse_pat(pat)
421
422 if m is None:
423 # No match: error
424 self._raise_error(error_msg)
425
426 # Return match object
427 return m
428
429 # Pattern for _skip_ws_and_comments()
430 _ws_or_syms_or_comments_pat = re.compile(
431 r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
432 )
433
434 # Skips as many whitespaces, insignificant symbol characters, and
435 # comments as possible.
436 def _skip_ws_and_comments(self):
437 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
438
439 # Pattern for _try_parse_hex_byte()
440 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
441
442 # Tries to parse a hexadecimal byte, returning a byte item on
443 # success.
444 def _try_parse_hex_byte(self):
0e8e3169
PP
445 begin_text_loc = self._text_loc
446
71aaa3f7
PP
447 # Match initial nibble
448 m_high = self._try_parse_pat(self._nibble_pat)
449
450 if m_high is None:
451 # No match
452 return
453
454 # Expect another nibble
455 self._skip_ws_and_comments()
456 m_low = self._expect_pat(
457 self._nibble_pat, "Expecting another hexadecimal nibble"
458 )
459
460 # Return item
0e8e3169 461 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
71aaa3f7
PP
462
463 # Patterns for _try_parse_bin_byte()
464 _bin_byte_bit_pat = re.compile(r"[01]")
465 _bin_byte_prefix_pat = re.compile(r"%")
466
467 # Tries to parse a binary byte, returning a byte item on success.
468 def _try_parse_bin_byte(self):
0e8e3169
PP
469 begin_text_loc = self._text_loc
470
71aaa3f7
PP
471 # Match prefix
472 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
473 # No match
474 return
475
476 # Expect eight bits
477 bits = [] # type: List[str]
478
479 for _ in range(8):
480 self._skip_ws_and_comments()
481 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
482 bits.append(m.group(0))
483
484 # Return item
0e8e3169 485 return _Byte(int("".join(bits), 2), begin_text_loc)
71aaa3f7
PP
486
487 # Patterns for _try_parse_dec_byte()
488 _dec_byte_prefix_pat = re.compile(r"\$\s*")
489 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
490
491 # Tries to parse a decimal byte, returning a byte item on success.
492 def _try_parse_dec_byte(self):
0e8e3169
PP
493 begin_text_loc = self._text_loc
494
71aaa3f7
PP
495 # Match prefix
496 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
497 # No match
498 return
499
500 # Expect the value
501 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
502
503 # Compute value
504 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
505
506 # Validate
507 if val < -128 or val > 255:
0e8e3169 508 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
71aaa3f7
PP
509
510 # Two's complement
511 val = val % 256
512
513 # Return item
0e8e3169 514 return _Byte(val, begin_text_loc)
71aaa3f7
PP
515
516 # Tries to parse a byte, returning a byte item on success.
517 def _try_parse_byte(self):
518 # Hexadecimal
519 item = self._try_parse_hex_byte()
520
521 if item is not None:
522 return item
523
524 # Binary
525 item = self._try_parse_bin_byte()
526
527 if item is not None:
528 return item
529
530 # Decimal
531 item = self._try_parse_dec_byte()
532
533 if item is not None:
534 return item
535
536 # Patterns for _try_parse_str()
537 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
538 _str_suffix_pat = re.compile(r'"')
539 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
540
541 # Strings corresponding to escape sequence characters
542 _str_escape_seq_strs = {
543 "0": "\0",
544 "a": "\a",
545 "b": "\b",
546 "e": "\x1b",
547 "f": "\f",
548 "n": "\n",
549 "r": "\r",
550 "t": "\t",
551 "v": "\v",
552 "\\": "\\",
553 '"': '"',
554 }
555
556 # Tries to parse a string, returning a string item on success.
557 def _try_parse_str(self):
0e8e3169
PP
558 begin_text_loc = self._text_loc
559
71aaa3f7
PP
560 # Match prefix
561 m = self._try_parse_pat(self._str_prefix_pat)
562
563 if m is None:
564 # No match
565 return
566
567 # Get encoding
568 encoding = "utf8"
569
570 if m.group("len") is not None:
571 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
572
573 # Actual string
574 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
575
576 # Expect end of string
577 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
578
579 # Replace escape sequences
580 val = m.group(0)
581
582 for ec in '0abefnrtv"\\':
583 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
584
585 # Encode
586 data = val.encode(encoding)
587
588 # Return item
0e8e3169 589 return _Str(data, begin_text_loc)
71aaa3f7
PP
590
591 # Patterns for _try_parse_group()
592 _group_prefix_pat = re.compile(r"\(")
593 _group_suffix_pat = re.compile(r"\)")
594
595 # Tries to parse a group, returning a group item on success.
596 def _try_parse_group(self):
0e8e3169
PP
597 begin_text_loc = self._text_loc
598
71aaa3f7
PP
599 # Match prefix
600 if self._try_parse_pat(self._group_prefix_pat) is None:
601 # No match
602 return
603
604 # Parse items
605 items = self._parse_items()
606
607 # Expect end of group
608 self._skip_ws_and_comments()
609 self._expect_pat(
610 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
611 )
612
613 # Return item
0e8e3169 614 return _Group(items, begin_text_loc)
71aaa3f7
PP
615
616 # Returns a stripped expression string and an AST expression node
617 # from the expression string `expr_str` at text location `text_loc`.
618 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc):
619 # Create an expression node from the expression string
620 expr_str = expr_str.strip().replace("\n", " ")
621
622 try:
623 expr = ast.parse(expr_str, mode="eval")
624 except SyntaxError:
625 _raise_error(
626 "Invalid expression `{}`: invalid syntax".format(expr_str),
627 text_loc,
628 )
629
630 return expr_str, expr
631
632 # Patterns for _try_parse_val_and_len()
633 _val_expr_pat = re.compile(r"([^}:]+):")
634 _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)")
635
636 # Tries to parse a value and length, returning a value item on
637 # success.
638 def _try_parse_val_and_len(self):
639 begin_text_loc = self._text_loc
640
641 # Match
642 m_expr = self._try_parse_pat(self._val_expr_pat)
643
644 if m_expr is None:
645 # No match
646 return
647
648 # Expect a length
649 m_len = self._expect_pat(
650 self._val_len_pat, "Expecting a length (multiple of eight bits)"
651 )
652
653 # Create an expression node from the expression string
654 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
655
656 # Return item
657 return _Val(
658 expr_str,
659 expr,
660 int(m_len.group(1)),
0e8e3169 661 begin_text_loc,
71aaa3f7
PP
662 )
663
664 # Patterns for _try_parse_val_and_len()
665 _var_pat = re.compile(
666 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
667 )
668
669 # Tries to parse a variable, returning a variable item on success.
670 def _try_parse_var(self):
671 begin_text_loc = self._text_loc
672
673 # Match
674 m = self._try_parse_pat(self._var_pat)
675
676 if m is None:
677 # No match
678 return
679
680 # Validate name
681 name = m.group("name")
682
683 if name == _icitte_name:
0e8e3169
PP
684 _raise_error(
685 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
686 )
71aaa3f7
PP
687
688 if name in self._label_names:
0e8e3169 689 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
690
691 # Add to known variable names
692 self._var_names.add(name)
693
694 # Create an expression node from the expression string
695 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
696
697 # Return item
698 return _Var(
699 name,
700 expr_str,
701 expr,
0e8e3169 702 begin_text_loc,
71aaa3f7
PP
703 )
704
705 # Pattern for _try_parse_bo_name()
706 _bo_pat = re.compile(r"[bl]e")
707
708 # Tries to parse a byte order name, returning a byte order item on
709 # success.
710 def _try_parse_bo_name(self):
0e8e3169
PP
711 begin_text_loc = self._text_loc
712
71aaa3f7
PP
713 # Match
714 m = self._try_parse_pat(self._bo_pat)
715
716 if m is None:
717 # No match
718 return
719
720 # Return corresponding item
721 if m.group(0) == "be":
0e8e3169 722 return _Bo(ByteOrder.BE, begin_text_loc)
71aaa3f7
PP
723 else:
724 assert m.group(0) == "le"
0e8e3169 725 return _Bo(ByteOrder.LE, begin_text_loc)
71aaa3f7
PP
726
727 # Patterns for _try_parse_val_or_bo()
728 _val_var_bo_prefix_pat = re.compile(r"\{\s*")
729 _val_var_bo_suffix_pat = re.compile(r"\s*}")
730
731 # Tries to parse a value, a variable, or a byte order, returning an
732 # item on success.
733 def _try_parse_val_or_var_or_bo(self):
734 # Match prefix
735 if self._try_parse_pat(self._val_var_bo_prefix_pat) is None:
736 # No match
737 return
738
739 # Variable item?
740 item = self._try_parse_var()
741
742 if item is None:
743 # Value item?
744 item = self._try_parse_val_and_len()
745
746 if item is None:
747 # Byte order item?
748 item = self._try_parse_bo_name()
749
750 if item is None:
751 # At this point it's invalid
752 self._raise_error("Expecting a value, a variable, or a byte order")
753
754 # Expect suffix
755 self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`")
756 return item
757
758 # Pattern for _try_parse_offset_val() and _try_parse_rep()
759 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
760
761 # Tries to parse an offset value (after the initial `<`), returning
762 # an offset item on success.
763 def _try_parse_offset_val(self):
0e8e3169
PP
764 begin_text_loc = self._text_loc
765
71aaa3f7
PP
766 # Match
767 m = self._try_parse_pat(self._pos_const_int_pat)
768
769 if m is None:
770 # No match
771 return
772
773 # Return item
0e8e3169 774 return _Offset(int(m.group(0), 0), begin_text_loc)
71aaa3f7
PP
775
776 # Tries to parse a label name (after the initial `<`), returning a
777 # label item on success.
778 def _try_parse_label_name(self):
0e8e3169
PP
779 begin_text_loc = self._text_loc
780
71aaa3f7
PP
781 # Match
782 m = self._try_parse_pat(_py_name_pat)
783
784 if m is None:
785 # No match
786 return
787
788 # Validate
789 name = m.group(0)
790
791 if name == _icitte_name:
0e8e3169
PP
792 _raise_error(
793 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
794 )
71aaa3f7
PP
795
796 if name in self._label_names:
0e8e3169 797 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
71aaa3f7
PP
798
799 if name in self._var_names:
0e8e3169 800 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
71aaa3f7
PP
801
802 # Add to known label names
803 self._label_names.add(name)
804
805 # Return item
0e8e3169 806 return _Label(name, begin_text_loc)
71aaa3f7
PP
807
808 # Patterns for _try_parse_label_or_offset()
809 _label_offset_prefix_pat = re.compile(r"<\s*")
810 _label_offset_suffix_pat = re.compile(r"\s*>")
811
812 # Tries to parse a label or an offset, returning an item on success.
813 def _try_parse_label_or_offset(self):
814 # Match prefix
815 if self._try_parse_pat(self._label_offset_prefix_pat) is None:
816 # No match
817 return
818
819 # Offset item?
820 item = self._try_parse_offset_val()
821
822 if item is None:
823 # Label item?
824 item = self._try_parse_label_name()
825
826 if item is None:
827 # At this point it's invalid
828 self._raise_error("Expecting a label name or an offset value")
829
830 # Expect suffix
831 self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`")
832 return item
833
834 # Tries to parse a base item (anything except a repetition),
835 # returning it on success.
836 def _try_parse_base_item(self):
837 # Byte item?
838 item = self._try_parse_byte()
839
840 if item is not None:
841 return item
842
843 # String item?
844 item = self._try_parse_str()
845
846 if item is not None:
847 return item
848
849 # Value, variable, or byte order item?
850 item = self._try_parse_val_or_var_or_bo()
851
852 if item is not None:
853 return item
854
855 # Label or offset item?
856 item = self._try_parse_label_or_offset()
857
858 if item is not None:
859 return item
860
861 # Group item?
862 item = self._try_parse_group()
863
864 if item is not None:
865 return item
866
867 # Pattern for _try_parse_rep()
868 _rep_prefix_pat = re.compile(r"\*\s*")
869
870 # Tries to parse a repetition, returning the multiplier on success,
871 # or 1 otherwise.
872 def _try_parse_rep(self):
71aaa3f7
PP
873 # Match prefix
874 if self._try_parse_pat(self._rep_prefix_pat) is None:
875 # No match
876 return 1
877
878 # Expect and return a decimal multiplier
879 self._skip_ws_and_comments()
880 m = self._expect_pat(
881 self._pos_const_int_pat, "Expecting a positive integral multiplier"
882 )
883 return int(m.group(0), 0)
884
1ca7b5e1
PP
885 # Tries to parse an item, possibly followed by a repetition,
886 # returning `True` on success.
887 #
888 # Appends any parsed item to `items`.
889 def _try_append_item(self, items: List[_Item]):
71aaa3f7
PP
890 self._skip_ws_and_comments()
891
892 # Parse a base item
893 item = self._try_parse_base_item()
894
895 if item is None:
896 # No item
1ca7b5e1 897 return False
71aaa3f7
PP
898
899 # Parse repetition if the base item is repeatable
900 if isinstance(item, _RepableItem):
0e8e3169
PP
901 self._skip_ws_and_comments()
902 rep_text_loc = self._text_loc
71aaa3f7
PP
903 rep = self._try_parse_rep()
904
905 if rep == 0:
1ca7b5e1
PP
906 # No item, but that's okay
907 return True
71aaa3f7
PP
908 elif rep > 1:
909 # Convert to repetition item
0e8e3169 910 item = _Rep(item, rep, rep_text_loc)
71aaa3f7 911
1ca7b5e1
PP
912 items.append(item)
913 return True
71aaa3f7
PP
914
915 # Parses and returns items, skipping whitespaces, insignificant
916 # symbols, and comments when allowed, and stopping at the first
917 # unknown character.
918 def _parse_items(self) -> List[_Item]:
919 items = [] # type: List[_Item]
920
921 while self._isnt_done():
1ca7b5e1
PP
922 # Try to append item
923 if not self._try_append_item(items):
924 # Unknown at this point
925 break
71aaa3f7
PP
926
927 return items
928
929 # Parses the whole Normand input, setting `self._res` to the main
930 # group item on success.
931 def _parse(self):
932 if len(self._normand.strip()) == 0:
933 # Special case to make sure there's something to consume
934 self._res = _Group([], self._text_loc)
935 return
936
937 # Parse first level items
938 items = self._parse_items()
939
940 # Make sure there's nothing left
941 self._skip_ws_and_comments()
942
943 if self._isnt_done():
944 self._raise_error(
945 "Unexpected character `{}`".format(self._normand[self._at])
946 )
947
948 # Set main group item
949 self._res = _Group(items, self._text_loc)
950
951
952# The return type of parse().
953class ParseResult:
954 @classmethod
955 def _create(
956 cls,
957 data: bytearray,
958 variables: VarsT,
959 labels: VarsT,
960 offset: int,
961 bo: Optional[ByteOrder],
962 ):
963 self = cls.__new__(cls)
964 self._init(data, variables, labels, offset, bo)
965 return self
966
967 def __init__(self, *args, **kwargs): # type: ignore
968 raise NotImplementedError
969
970 def _init(
971 self,
972 data: bytearray,
973 variables: VarsT,
974 labels: VarsT,
975 offset: int,
976 bo: Optional[ByteOrder],
977 ):
978 self._data = data
979 self._vars = variables
980 self._labels = labels
981 self._offset = offset
982 self._bo = bo
983
984 # Generated data.
985 @property
986 def data(self):
987 return self._data
988
989 # Dictionary of updated variable names to their last computed value.
990 @property
991 def variables(self):
992 return self._vars
993
994 # Dictionary of updated main group label names to their computed
995 # value.
996 @property
997 def labels(self):
998 return self._labels
999
1000 # Updated offset.
1001 @property
1002 def offset(self):
1003 return self._offset
1004
1005 # Updated byte order.
1006 @property
1007 def byte_order(self):
1008 return self._bo
1009
1010
1011# Raises a parse error for the item `item`, creating it using the
1012# message `msg`.
1013def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1014 _raise_error(msg, item.text_loc)
1015
1016
1017# The `ICITTE` reserved name.
1018_icitte_name = "ICITTE"
1019
1020
1021# Value expression validator.
1022class _ExprValidator(ast.NodeVisitor):
1023 def __init__(self, item: _ExprItemT, syms: VarsT):
1024 self._item = item
1025 self._syms = syms
1026 self._parent_is_call = False
1027
1028 def generic_visit(self, node: ast.AST):
1029 if type(node) is ast.Call:
1030 self._parent_is_call = True
1031 elif type(node) is ast.Name and not self._parent_is_call:
1032 # Make sure the name refers to a known label name
1033 if node.id != _icitte_name and node.id not in self._syms:
1034 _raise_error(
1035 "Unknown variable/label name `{}` in expression `{}`".format(
1036 node.id, self._item.expr_str
1037 ),
1038 self._item.text_loc,
1039 )
1040
1041 # TODO: Restrict the set of allowed node types
1042
1043 super().generic_visit(node)
1044 self._parent_is_call = False
1045
1046
1047# Keeper of labels for a given group instance.
1048#
1049# A group instance is one iteration of a given group.
1050class _GroupInstanceLabels:
1051 def __init__(self):
1052 self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]]
1053
1054 # Assigns the labels `labels` to a new instance of `group`.
1055 def add(self, group: _Group, labels: VarsT):
1056 if group not in self._instance_labels:
1057 self._instance_labels[group] = {}
1058
1059 spec_instance_labels = self._instance_labels[group]
1060 spec_instance_labels[len(spec_instance_labels)] = labels.copy()
1061
1062 # Returns the labels (not a copy) of the instance `instance_index`
1063 # of the group `group`.
1064 def labels(self, group: _Group, instance_index: int):
1065 return self._instance_labels[group][instance_index]
1066
1067
1068# Generator of data and labels from a group item.
1069#
1070# Generation happens in memory at construction time. After building, use
1071# the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1072# get the resulting context.
1073class _Gen:
1074 def __init__(
1075 self,
1076 group: _Group,
1077 variables: VarsT,
1078 labels: VarsT,
1079 offset: int,
1080 bo: Optional[ByteOrder],
1081 ):
1082 self._group_instance_labels = _GroupInstanceLabels()
1083 self._resolve_labels(group, offset, labels.copy())
1084 self._vars = variables.copy()
1085 self._offset = offset
1086 self._bo = bo
1087 self._main_group = group
1088 self._gen()
1089
1090 # Generated bytes.
1091 @property
1092 def data(self):
1093 return self._data
1094
1095 # Updated variables.
1096 @property
1097 def variables(self):
1098 return self._vars
1099
1100 # Updated main group labels.
1101 @property
1102 def labels(self):
1103 return self._group_instance_labels.labels(self._main_group, 0)
1104
1105 # Updated offset.
1106 @property
1107 def offset(self):
1108 return self._offset
1109
1110 # Updated byte order.
1111 @property
1112 def bo(self):
1113 return self._bo
1114
1115 # Fills `self._group_instance_labels` with the labels for each group
1116 # instance in `item`, starting at current offset `offset` with the
1117 # current labels `labels`.
1118 #
1119 # Returns the new current offset.
1120 def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int:
1121 if type(item) is _Group:
1122 # First pass: compute immediate labels of this instance
1123 group_labels = labels.copy()
1124 group_offset = offset
1125
1126 for subitem in item.items:
1127 if type(subitem) is _Offset:
1128 group_offset = subitem.val
1129 elif type(subitem) is _Label:
1130 assert subitem.name not in group_labels
1131 group_labels[subitem.name] = group_offset
1132 else:
1133 group_offset += subitem.size
1134
1135 # Add to group instance labels
1136 self._group_instance_labels.add(item, group_labels)
1137
1138 # Second pass: handle each item
1139 for subitem in item.items:
1140 offset = self._resolve_labels(subitem, offset, group_labels)
1141 elif type(item) is _Rep:
1142 for _ in range(item.mul):
1143 offset = self._resolve_labels(item.item, offset, labels)
1144 elif type(item) is _Offset:
1145 offset = item.val
1146 else:
1147 offset += item.size
1148
1149 return offset
1150
1151 def _handle_byte_item(self, item: _Byte):
1152 self._data.append(item.val)
1153 self._offset += item.size
1154
1155 def _handle_str_item(self, item: _Str):
1156 self._data += item.data
1157 self._offset += item.size
1158
1159 def _handle_bo_item(self, item: _Bo):
1160 self._bo = item.bo
1161
1162 def _eval_expr(self, item: _ExprItemT):
1163 # Get the labels of the current group instance as the initial
1164 # symbols (copied because we're adding stuff).
1165 assert self._cur_group is not None
1166 syms = self._group_instance_labels.labels(
1167 self._cur_group, self._group_instance_indexes[self._cur_group]
1168 ).copy()
1169
1170 # Set the `ICITTE` name to the current offset (before encoding)
1171 syms[_icitte_name] = self._offset
1172
1173 # Add the current variables
1174 syms.update(self._vars)
1175
1176 # Validate the node and its children
1177 _ExprValidator(item, syms).visit(item.expr)
1178
1179 # Compile and evaluate expression node
1180 try:
1181 val = eval(compile(item.expr, "", "eval"), None, syms)
1182 except Exception as exc:
1183 _raise_error_for_item(
1184 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1185 item,
1186 )
1187
1188 # Validate result
1189 if type(val) is not int:
1190 _raise_error_for_item(
1191 "Invalid expression `{}`: unexpected result type `{}`".format(
1192 item.expr_str, type(val).__name__
1193 ),
1194 item,
1195 )
1196
1197 return val
1198
1199 def _handle_var_item(self, item: _Var):
1200 # Update variable
1201 self._vars[item.name] = self._eval_expr(item)
1202
1203 def _handle_val_item(self, item: _Val):
1204 # Compute value
1205 val = self._eval_expr(item)
1206
1207 # Validate range
1208 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1209 _raise_error_for_item(
1210 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1211 val, item.len, item.expr_str, self._offset
1212 ),
1213 item,
1214 )
1215
1216 # Encode result on 64 bits (to extend the sign bit whatever the
1217 # value of `item.len`).
1218 if self._bo is None and item.len > 8:
1219 _raise_error_for_item(
1220 "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format(
1221 item.expr_str
1222 ),
1223 item,
1224 )
1225
1226 data = struct.pack(
1227 "{}{}".format(
1228 ">" if self._bo in (None, ByteOrder.BE) else "<",
1229 "Q" if val >= 0 else "q",
1230 ),
1231 val,
1232 )
1233
1234 # Keep only the requested length
1235 len_bytes = item.len // 8
1236
1237 if self._bo in (None, ByteOrder.BE):
1238 # Big endian: keep last bytes
1239 data = data[-len_bytes:]
1240 else:
1241 # Little endian: keep first bytes
1242 assert self._bo == ByteOrder.LE
1243 data = data[:len_bytes]
1244
1245 # Append to current bytes and update offset
1246 self._data += data
1247 self._offset += len(data)
1248
1249 def _handle_group_item(self, item: _Group):
1250 # Update the instance index of `item`
1251 if item not in self._group_instance_indexes:
1252 self._group_instance_indexes[item] = 0
1253 else:
1254 self._group_instance_indexes[item] += 1
1255
1256 # Changed current group
1257 old_cur_group = self._cur_group
1258 self._cur_group = item
1259
1260 # Handle each item
1261 for subitem in item.items:
1262 self._handle_item(subitem)
1263
1264 # Restore current group
1265 self._cur_group = old_cur_group
1266
1267 def _handle_rep_item(self, item: _Rep):
1268 for _ in range(item.mul):
1269 self._handle_item(item.item)
1270
1271 def _handle_offset_item(self, item: _Offset):
1272 self._offset = item.val
1273
1274 def _handle_item(self, item: _Item):
1275 if type(item) in self._item_handlers:
1276 self._item_handlers[type(item)](item)
1277
1278 def _gen(self):
1279 # Initial state
1280 self._data = bytearray()
1281 self._group_instance_indexes = {} # type: Dict[_Group, int]
1282 self._cur_group = None
1283
1284 # Item handlers
1285 self._item_handlers = {
1286 _Byte: self._handle_byte_item,
1287 _Str: self._handle_str_item,
1288 _Bo: self._handle_bo_item,
1289 _Val: self._handle_val_item,
1290 _Var: self._handle_var_item,
1291 _Group: self._handle_group_item,
1292 _Rep: self._handle_rep_item,
1293 _Offset: self._handle_offset_item,
1294 } # type: Dict[type, Callable[[Any], None]]
1295
1296 # Handle the group item
1297 self._handle_item(self._main_group)
1298
1299
1300# Returns a `ParseResult` instance containing the bytes encoded by the
1301# input string `normand`.
1302#
1303# `init_variables` is a dictionary of initial variable names (valid
1304# Python names) to integral values. A variable name must not be the
1305# reserved name `ICITTE`.
1306#
1307# `init_labels` is a dictionary of initial label names (valid Python
1308# names) to integral values. A label name must not be the reserved name
1309# `ICITTE`.
1310#
1311# `init_offset` is the initial offset.
1312#
1313# `init_byte_order` is the initial byte order.
1314#
1315# Raises `ParseError` on any parsing error.
1316def parse(
1317 normand: str,
1318 init_variables: Optional[VarsT] = None,
1319 init_labels: Optional[VarsT] = None,
1320 init_offset: int = 0,
1321 init_byte_order: Optional[ByteOrder] = None,
1322):
1323 if init_variables is None:
1324 init_variables = {}
1325
1326 if init_labels is None:
1327 init_labels = {}
1328
1329 gen = _Gen(
1330 _Parser(normand, init_variables, init_labels).res,
1331 init_variables,
1332 init_labels,
1333 init_offset,
1334 init_byte_order,
1335 )
1336 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
1337 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
1338 )
1339
1340
1341# Parses the command-line arguments.
1342def _parse_cli_args():
1343 import argparse
1344
1345 # Build parser
1346 ap = argparse.ArgumentParser()
1347 ap.add_argument(
1348 "--offset",
1349 metavar="OFFSET",
1350 action="store",
1351 type=int,
1352 default=0,
1353 help="initial offset (positive)",
1354 )
1355 ap.add_argument(
1356 "-b",
1357 "--byte-order",
1358 metavar="BO",
1359 choices=["be", "le"],
1360 type=str,
1361 help="initial byte order (`be` or `le`)",
1362 )
1363 ap.add_argument(
1364 "--var",
1365 metavar="NAME=VAL",
1366 action="append",
1367 help="add an initial variable (may be repeated)",
1368 )
1369 ap.add_argument(
1370 "-l",
1371 "--label",
1372 metavar="NAME=VAL",
1373 action="append",
1374 help="add an initial label (may be repeated)",
1375 )
1376 ap.add_argument(
1377 "--version", action="version", version="Normand {}".format(__version__)
1378 )
1379 ap.add_argument(
1380 "path",
1381 metavar="PATH",
1382 action="store",
1383 nargs="?",
1384 help="input path (none means standard input)",
1385 )
1386
1387 # Parse
1388 return ap.parse_args()
1389
1390
1391# Raises a command-line error with the message `msg`.
1392def _raise_cli_error(msg: str) -> NoReturn:
1393 raise RuntimeError("Command-line error: {}".format(msg))
1394
1395
1396# Returns a dictionary of string to integers from the list of strings
1397# `args` containing `NAME=VAL` entries.
1398def _dict_from_arg(args: Optional[List[str]]):
1399 d = {} # type: Dict[str, int]
1400
1401 if args is None:
1402 return d
1403
1404 for arg in args:
1405 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
1406
1407 if m is None:
1408 _raise_cli_error("Invalid assignment {}".format(arg))
1409
1410 return d
1411
1412
1413# CLI entry point without exception handling.
1414def _try_run_cli():
1415 import os.path
1416
1417 # Parse arguments
1418 args = _parse_cli_args()
1419
1420 # Read input
1421 if args.path is None:
1422 normand = sys.stdin.read()
1423 else:
1424 with open(args.path) as f:
1425 normand = f.read()
1426
1427 # Variables and labels
1428 variables = _dict_from_arg(args.var)
1429 labels = _dict_from_arg(args.label)
1430
1431 # Validate offset
1432 if args.offset < 0:
1433 _raise_cli_error("Invalid negative offset {}")
1434
1435 # Validate and set byte order
1436 bo = None # type: Optional[ByteOrder]
1437
1438 if args.byte_order is not None:
1439 if args.byte_order == "be":
1440 bo = ByteOrder.BE
1441 else:
1442 assert args.byte_order == "le"
1443 bo = ByteOrder.LE
1444
1445 # Parse
1446 try:
1447 res = parse(normand, variables, labels, args.offset, bo)
1448 except ParseError as exc:
1449 prefix = ""
1450
1451 if args.path is not None:
1452 prefix = "{}:".format(os.path.abspath(args.path))
1453
1454 _fail(
1455 "{}{}:{} - {}".format(
1456 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
1457 )
1458 )
1459
1460 # Print
1461 sys.stdout.buffer.write(res.data)
1462
1463
1464# Prints the exception message `msg` and exits with status 1.
1465def _fail(msg: str) -> NoReturn:
1466 if not msg.endswith("."):
1467 msg += "."
1468
1469 print(msg, file=sys.stderr)
1470 sys.exit(1)
1471
1472
1473# CLI entry point.
1474def _run_cli():
1475 try:
1476 _try_run_cli()
1477 except Exception as exc:
1478 _fail(str(exc))
1479
1480
1481if __name__ == "__main__":
1482 _run_cli()
This page took 0.074643 seconds and 4 git commands to generate.