Commit | Line | Data |
---|---|---|
71aaa3f7 PP |
1 | # The MIT License (MIT) |
2 | # | |
3 | # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com> | |
4 | # | |
5 | # Permission is hereby granted, free of charge, to any person obtaining | |
6 | # a copy of this software and associated documentation files (the | |
7 | # "Software"), to deal in the Software without restriction, including | |
8 | # without limitation the rights to use, copy, modify, merge, publish, | |
9 | # distribute, sublicense, and/or sell copies of the Software, and to | |
10 | # permit persons to whom the Software is furnished to do so, subject to | |
11 | # the following conditions: | |
12 | # | |
13 | # The above copyright notice and this permission notice shall be | |
14 | # included in all copies or substantial portions of the Software. | |
15 | # | |
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
20 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
23 | ||
5a993698 PP |
24 | # This module is the portable Normand processor. It offers both the |
25 | # parse() function and the command-line tool (run the module itself) | |
26 | # without external dependencies except a `typing` module for Python 3.4. | |
27 | # | |
28 | # Feel free to copy this module file to your own project to use Normand. | |
29 | # | |
30 | # Upstream repository: <https://github.com/efficios/normand>. | |
31 | ||
71aaa3f7 | 32 | __author__ = "Philippe Proulx" |
2e1c1acd | 33 | __version__ = "0.2.1" |
71aaa3f7 PP |
34 | __all__ = [ |
35 | "ByteOrder", | |
36 | "parse", | |
37 | "ParseError", | |
38 | "ParseResult", | |
39 | "TextLoc", | |
40 | "VarsT", | |
41 | "__author__", | |
42 | "__version__", | |
43 | ] | |
44 | ||
45 | import re | |
46 | import abc | |
47 | import ast | |
48 | import sys | |
49 | import enum | |
50 | import struct | |
51 | from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional | |
52 | ||
53 | ||
54 | # Text location (line and column numbers). | |
55 | class TextLoc: | |
56 | @classmethod | |
57 | def _create(cls, line_no: int, col_no: int): | |
58 | self = cls.__new__(cls) | |
59 | self._init(line_no, col_no) | |
60 | return self | |
61 | ||
62 | def __init__(*args, **kwargs): # type: ignore | |
63 | raise NotImplementedError | |
64 | ||
65 | def _init(self, line_no: int, col_no: int): | |
66 | self._line_no = line_no | |
67 | self._col_no = col_no | |
68 | ||
69 | # Line number. | |
70 | @property | |
71 | def line_no(self): | |
72 | return self._line_no | |
73 | ||
74 | # Column number. | |
75 | @property | |
76 | def col_no(self): | |
77 | return self._col_no | |
78 | ||
79 | ||
80 | # Any item. | |
81 | class _Item: | |
82 | def __init__(self, text_loc: TextLoc): | |
83 | self._text_loc = text_loc | |
84 | ||
85 | # Source text location. | |
86 | @property | |
87 | def text_loc(self): | |
88 | return self._text_loc | |
89 | ||
90 | # Returns the size, in bytes, of this item. | |
91 | @property | |
92 | @abc.abstractmethod | |
93 | def size(self) -> int: | |
94 | ... | |
95 | ||
96 | ||
97 | # A repeatable item. | |
98 | class _RepableItem(_Item): | |
99 | pass | |
100 | ||
101 | ||
102 | # Single byte. | |
103 | class _Byte(_RepableItem): | |
104 | def __init__(self, val: int, text_loc: TextLoc): | |
105 | super().__init__(text_loc) | |
106 | self._val = val | |
107 | ||
108 | # Byte value. | |
109 | @property | |
110 | def val(self): | |
111 | return self._val | |
112 | ||
113 | @property | |
114 | def size(self): | |
115 | return 1 | |
116 | ||
117 | def __repr__(self): | |
118 | return "_Byte({}, {})".format(hex(self._val), self._text_loc) | |
119 | ||
120 | ||
121 | # String. | |
122 | class _Str(_RepableItem): | |
123 | def __init__(self, data: bytes, text_loc: TextLoc): | |
124 | super().__init__(text_loc) | |
125 | self._data = data | |
126 | ||
127 | # Encoded bytes. | |
128 | @property | |
129 | def data(self): | |
130 | return self._data | |
131 | ||
132 | @property | |
133 | def size(self): | |
134 | return len(self._data) | |
135 | ||
136 | def __repr__(self): | |
137 | return "_Str({}, {})".format(repr(self._data), self._text_loc) | |
138 | ||
139 | ||
140 | # Byte order. | |
141 | @enum.unique | |
142 | class ByteOrder(enum.Enum): | |
143 | # Big endian. | |
144 | BE = "be" | |
145 | ||
146 | # Little endian. | |
147 | LE = "le" | |
148 | ||
149 | ||
150 | # Byte order. | |
151 | class _Bo(_Item): | |
0e8e3169 PP |
152 | def __init__(self, bo: ByteOrder, text_loc: TextLoc): |
153 | super().__init__(text_loc) | |
71aaa3f7 PP |
154 | self._bo = bo |
155 | ||
156 | @property | |
157 | def bo(self): | |
158 | return self._bo | |
159 | ||
160 | @property | |
161 | def size(self): | |
162 | return 0 | |
163 | ||
164 | ||
165 | # Label. | |
166 | class _Label(_Item): | |
167 | def __init__(self, name: str, text_loc: TextLoc): | |
168 | super().__init__(text_loc) | |
169 | self._name = name | |
170 | ||
171 | # Label name. | |
172 | @property | |
173 | def name(self): | |
174 | return self._name | |
175 | ||
176 | @property | |
177 | def size(self): | |
178 | return 0 | |
179 | ||
180 | def __repr__(self): | |
181 | return "_Label({}, {})".format(repr(self._name), self._text_loc) | |
182 | ||
183 | ||
184 | # Offset. | |
185 | class _Offset(_Item): | |
186 | def __init__(self, val: int, text_loc: TextLoc): | |
187 | super().__init__(text_loc) | |
188 | self._val = val | |
189 | ||
190 | # Offset value. | |
191 | @property | |
192 | def val(self): | |
193 | return self._val | |
194 | ||
195 | @property | |
196 | def size(self): | |
197 | return 0 | |
198 | ||
199 | def __repr__(self): | |
200 | return "_Offset({}, {})".format(repr(self._val), self._text_loc) | |
201 | ||
202 | ||
203 | # Mixin of containing an AST expression and its string. | |
204 | class _ExprMixin: | |
205 | def __init__(self, expr_str: str, expr: ast.Expression): | |
206 | self._expr_str = expr_str | |
207 | self._expr = expr | |
208 | ||
209 | # Expression string. | |
210 | @property | |
211 | def expr_str(self): | |
212 | return self._expr_str | |
213 | ||
214 | # Expression node to evaluate. | |
215 | @property | |
216 | def expr(self): | |
217 | return self._expr | |
218 | ||
219 | ||
220 | # Variable. | |
221 | class _Var(_Item, _ExprMixin): | |
222 | def __init__( | |
223 | self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc | |
224 | ): | |
225 | super().__init__(text_loc) | |
226 | _ExprMixin.__init__(self, expr_str, expr) | |
227 | self._name = name | |
228 | ||
229 | # Name. | |
230 | @property | |
231 | def name(self): | |
232 | return self._name | |
233 | ||
234 | @property | |
235 | def size(self): | |
236 | return 0 | |
237 | ||
238 | def __repr__(self): | |
239 | return "_Var({}, {}, {}, {})".format( | |
240 | repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc | |
241 | ) | |
242 | ||
243 | ||
244 | # Value, possibly needing more than one byte. | |
245 | class _Val(_RepableItem, _ExprMixin): | |
246 | def __init__( | |
247 | self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc | |
248 | ): | |
249 | super().__init__(text_loc) | |
250 | _ExprMixin.__init__(self, expr_str, expr) | |
251 | self._len = len | |
252 | ||
253 | # Length (bits). | |
254 | @property | |
255 | def len(self): | |
256 | return self._len | |
257 | ||
258 | @property | |
259 | def size(self): | |
260 | return self._len // 8 | |
261 | ||
262 | def __repr__(self): | |
263 | return "_Val({}, {}, {}, {})".format( | |
264 | repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc | |
265 | ) | |
266 | ||
267 | ||
268 | # Expression item type. | |
269 | _ExprItemT = Union[_Val, _Var] | |
270 | ||
271 | ||
272 | # Group of items. | |
273 | class _Group(_RepableItem): | |
274 | def __init__(self, items: List[_Item], text_loc: TextLoc): | |
275 | super().__init__(text_loc) | |
276 | self._items = items | |
277 | self._size = sum([item.size for item in self._items]) | |
278 | ||
279 | # Contained items. | |
280 | @property | |
281 | def items(self): | |
282 | return self._items | |
283 | ||
284 | @property | |
285 | def size(self): | |
286 | return self._size | |
287 | ||
288 | def __repr__(self): | |
289 | return "_Group({}, {})".format(repr(self._items), self._text_loc) | |
290 | ||
291 | ||
292 | # Repetition item. | |
293 | class _Rep(_Item): | |
294 | def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc): | |
295 | super().__init__(text_loc) | |
296 | self._item = item | |
297 | self._mul = mul | |
298 | ||
299 | # Item to repeat. | |
300 | @property | |
301 | def item(self): | |
302 | return self._item | |
303 | ||
304 | # Repetition multiplier. | |
305 | @property | |
306 | def mul(self): | |
307 | return self._mul | |
308 | ||
309 | @property | |
310 | def size(self): | |
311 | return self._item.size * self._mul | |
312 | ||
313 | def __repr__(self): | |
314 | return "_Rep({}, {}, {})".format( | |
315 | repr(self._item), repr(self._mul), self._text_loc | |
316 | ) | |
317 | ||
318 | ||
319 | # A parsing error containing a message and a text location. | |
320 | class ParseError(RuntimeError): | |
321 | @classmethod | |
322 | def _create(cls, msg: str, text_loc: TextLoc): | |
323 | self = cls.__new__(cls) | |
324 | self._init(msg, text_loc) | |
325 | return self | |
326 | ||
327 | def __init__(self, *args, **kwargs): # type: ignore | |
328 | raise NotImplementedError | |
329 | ||
330 | def _init(self, msg: str, text_loc: TextLoc): | |
331 | super().__init__(msg) | |
332 | self._text_loc = text_loc | |
333 | ||
334 | # Source text location. | |
335 | @property | |
336 | def text_loc(self): | |
337 | return self._text_loc | |
338 | ||
339 | ||
340 | # Raises a parsing error, forwarding the parameters to the constructor. | |
341 | def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn: | |
342 | raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage] | |
343 | ||
344 | ||
345 | # Variable (and label) dictionary type. | |
346 | VarsT = Dict[str, int] | |
347 | ||
348 | ||
349 | # Python name pattern. | |
350 | _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") | |
351 | ||
352 | ||
353 | # Normand parser. | |
354 | # | |
355 | # The constructor accepts a Normand input. After building, use the `res` | |
356 | # property to get the resulting main group. | |
357 | class _Parser: | |
358 | # Builds a parser to parse the Normand input `normand`, parsing | |
359 | # immediately. | |
360 | def __init__(self, normand: str, variables: VarsT, labels: VarsT): | |
361 | self._normand = normand | |
362 | self._at = 0 | |
363 | self._line_no = 1 | |
364 | self._col_no = 1 | |
365 | self._label_names = set(labels.keys()) | |
366 | self._var_names = set(variables.keys()) | |
367 | self._parse() | |
368 | ||
369 | # Result (main group). | |
370 | @property | |
371 | def res(self): | |
372 | return self._res | |
373 | ||
374 | # Current text location. | |
375 | @property | |
376 | def _text_loc(self): | |
377 | return TextLoc._create( # pyright: ignore[reportPrivateUsage] | |
378 | self._line_no, self._col_no | |
379 | ) | |
380 | ||
381 | # Returns `True` if this parser is done parsing. | |
382 | def _is_done(self): | |
383 | return self._at == len(self._normand) | |
384 | ||
385 | # Returns `True` if this parser isn't done parsing. | |
386 | def _isnt_done(self): | |
387 | return not self._is_done() | |
388 | ||
389 | # Raises a parse error, creating it using the message `msg` and the | |
390 | # current text location. | |
391 | def _raise_error(self, msg: str) -> NoReturn: | |
392 | _raise_error(msg, self._text_loc) | |
393 | ||
394 | # Tries to make the pattern `pat` match the current substring, | |
395 | # returning the match object and updating `self._at`, | |
396 | # `self._line_no`, and `self._col_no` on success. | |
397 | def _try_parse_pat(self, pat: Pattern[str]): | |
398 | m = pat.match(self._normand, self._at) | |
399 | ||
400 | if m is None: | |
401 | return | |
402 | ||
403 | # Skip matched string | |
404 | self._at += len(m.group(0)) | |
405 | ||
406 | # Update line number | |
407 | self._line_no += m.group(0).count("\n") | |
408 | ||
409 | # Update column number | |
410 | for i in reversed(range(self._at)): | |
411 | if self._normand[i] == "\n" or i == 0: | |
412 | if i == 0: | |
413 | self._col_no = self._at + 1 | |
414 | else: | |
415 | self._col_no = self._at - i | |
416 | ||
417 | break | |
418 | ||
419 | # Return match object | |
420 | return m | |
421 | ||
422 | # Expects the pattern `pat` to match the current substring, | |
423 | # returning the match object and updating `self._at`, | |
424 | # `self._line_no`, and `self._col_no` on success, or raising a parse | |
425 | # error with the message `error_msg` on error. | |
426 | def _expect_pat(self, pat: Pattern[str], error_msg: str): | |
427 | # Match | |
428 | m = self._try_parse_pat(pat) | |
429 | ||
430 | if m is None: | |
431 | # No match: error | |
432 | self._raise_error(error_msg) | |
433 | ||
434 | # Return match object | |
435 | return m | |
436 | ||
437 | # Pattern for _skip_ws_and_comments() | |
438 | _ws_or_syms_or_comments_pat = re.compile( | |
439 | r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*" | |
440 | ) | |
441 | ||
442 | # Skips as many whitespaces, insignificant symbol characters, and | |
443 | # comments as possible. | |
444 | def _skip_ws_and_comments(self): | |
445 | self._try_parse_pat(self._ws_or_syms_or_comments_pat) | |
446 | ||
447 | # Pattern for _try_parse_hex_byte() | |
448 | _nibble_pat = re.compile(r"[A-Fa-f0-9]") | |
449 | ||
450 | # Tries to parse a hexadecimal byte, returning a byte item on | |
451 | # success. | |
452 | def _try_parse_hex_byte(self): | |
0e8e3169 PP |
453 | begin_text_loc = self._text_loc |
454 | ||
71aaa3f7 PP |
455 | # Match initial nibble |
456 | m_high = self._try_parse_pat(self._nibble_pat) | |
457 | ||
458 | if m_high is None: | |
459 | # No match | |
460 | return | |
461 | ||
462 | # Expect another nibble | |
463 | self._skip_ws_and_comments() | |
464 | m_low = self._expect_pat( | |
465 | self._nibble_pat, "Expecting another hexadecimal nibble" | |
466 | ) | |
467 | ||
468 | # Return item | |
0e8e3169 | 469 | return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc) |
71aaa3f7 PP |
470 | |
471 | # Patterns for _try_parse_bin_byte() | |
472 | _bin_byte_bit_pat = re.compile(r"[01]") | |
473 | _bin_byte_prefix_pat = re.compile(r"%") | |
474 | ||
475 | # Tries to parse a binary byte, returning a byte item on success. | |
476 | def _try_parse_bin_byte(self): | |
0e8e3169 PP |
477 | begin_text_loc = self._text_loc |
478 | ||
71aaa3f7 PP |
479 | # Match prefix |
480 | if self._try_parse_pat(self._bin_byte_prefix_pat) is None: | |
481 | # No match | |
482 | return | |
483 | ||
484 | # Expect eight bits | |
485 | bits = [] # type: List[str] | |
486 | ||
487 | for _ in range(8): | |
488 | self._skip_ws_and_comments() | |
489 | m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)") | |
490 | bits.append(m.group(0)) | |
491 | ||
492 | # Return item | |
0e8e3169 | 493 | return _Byte(int("".join(bits), 2), begin_text_loc) |
71aaa3f7 PP |
494 | |
495 | # Patterns for _try_parse_dec_byte() | |
496 | _dec_byte_prefix_pat = re.compile(r"\$\s*") | |
497 | _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)") | |
498 | ||
499 | # Tries to parse a decimal byte, returning a byte item on success. | |
500 | def _try_parse_dec_byte(self): | |
0e8e3169 PP |
501 | begin_text_loc = self._text_loc |
502 | ||
71aaa3f7 PP |
503 | # Match prefix |
504 | if self._try_parse_pat(self._dec_byte_prefix_pat) is None: | |
505 | # No match | |
506 | return | |
507 | ||
508 | # Expect the value | |
509 | m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant") | |
510 | ||
511 | # Compute value | |
512 | val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1) | |
513 | ||
514 | # Validate | |
515 | if val < -128 or val > 255: | |
0e8e3169 | 516 | _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc) |
71aaa3f7 PP |
517 | |
518 | # Two's complement | |
519 | val = val % 256 | |
520 | ||
521 | # Return item | |
0e8e3169 | 522 | return _Byte(val, begin_text_loc) |
71aaa3f7 PP |
523 | |
524 | # Tries to parse a byte, returning a byte item on success. | |
525 | def _try_parse_byte(self): | |
526 | # Hexadecimal | |
527 | item = self._try_parse_hex_byte() | |
528 | ||
529 | if item is not None: | |
530 | return item | |
531 | ||
532 | # Binary | |
533 | item = self._try_parse_bin_byte() | |
534 | ||
535 | if item is not None: | |
536 | return item | |
537 | ||
538 | # Decimal | |
539 | item = self._try_parse_dec_byte() | |
540 | ||
541 | if item is not None: | |
542 | return item | |
543 | ||
544 | # Patterns for _try_parse_str() | |
545 | _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"') | |
546 | _str_suffix_pat = re.compile(r'"') | |
547 | _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*') | |
548 | ||
549 | # Strings corresponding to escape sequence characters | |
550 | _str_escape_seq_strs = { | |
551 | "0": "\0", | |
552 | "a": "\a", | |
553 | "b": "\b", | |
554 | "e": "\x1b", | |
555 | "f": "\f", | |
556 | "n": "\n", | |
557 | "r": "\r", | |
558 | "t": "\t", | |
559 | "v": "\v", | |
560 | "\\": "\\", | |
561 | '"': '"', | |
562 | } | |
563 | ||
564 | # Tries to parse a string, returning a string item on success. | |
565 | def _try_parse_str(self): | |
0e8e3169 PP |
566 | begin_text_loc = self._text_loc |
567 | ||
71aaa3f7 PP |
568 | # Match prefix |
569 | m = self._try_parse_pat(self._str_prefix_pat) | |
570 | ||
571 | if m is None: | |
572 | # No match | |
573 | return | |
574 | ||
575 | # Get encoding | |
576 | encoding = "utf8" | |
577 | ||
578 | if m.group("len") is not None: | |
579 | encoding = "utf_{}_{}".format(m.group("len"), m.group("bo")) | |
580 | ||
581 | # Actual string | |
582 | m = self._expect_pat(self._str_str_pat, "Expecting a literal string") | |
583 | ||
584 | # Expect end of string | |
585 | self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)') | |
586 | ||
587 | # Replace escape sequences | |
588 | val = m.group(0) | |
589 | ||
590 | for ec in '0abefnrtv"\\': | |
591 | val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec]) | |
592 | ||
593 | # Encode | |
594 | data = val.encode(encoding) | |
595 | ||
596 | # Return item | |
0e8e3169 | 597 | return _Str(data, begin_text_loc) |
71aaa3f7 PP |
598 | |
599 | # Patterns for _try_parse_group() | |
600 | _group_prefix_pat = re.compile(r"\(") | |
601 | _group_suffix_pat = re.compile(r"\)") | |
602 | ||
603 | # Tries to parse a group, returning a group item on success. | |
604 | def _try_parse_group(self): | |
0e8e3169 PP |
605 | begin_text_loc = self._text_loc |
606 | ||
71aaa3f7 PP |
607 | # Match prefix |
608 | if self._try_parse_pat(self._group_prefix_pat) is None: | |
609 | # No match | |
610 | return | |
611 | ||
612 | # Parse items | |
613 | items = self._parse_items() | |
614 | ||
615 | # Expect end of group | |
616 | self._skip_ws_and_comments() | |
617 | self._expect_pat( | |
618 | self._group_suffix_pat, "Expecting an item or `)` (end of group)" | |
619 | ) | |
620 | ||
621 | # Return item | |
0e8e3169 | 622 | return _Group(items, begin_text_loc) |
71aaa3f7 PP |
623 | |
624 | # Returns a stripped expression string and an AST expression node | |
625 | # from the expression string `expr_str` at text location `text_loc`. | |
626 | def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc): | |
627 | # Create an expression node from the expression string | |
628 | expr_str = expr_str.strip().replace("\n", " ") | |
629 | ||
630 | try: | |
631 | expr = ast.parse(expr_str, mode="eval") | |
632 | except SyntaxError: | |
633 | _raise_error( | |
634 | "Invalid expression `{}`: invalid syntax".format(expr_str), | |
635 | text_loc, | |
636 | ) | |
637 | ||
638 | return expr_str, expr | |
639 | ||
640 | # Patterns for _try_parse_val_and_len() | |
641 | _val_expr_pat = re.compile(r"([^}:]+):") | |
642 | _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)") | |
643 | ||
644 | # Tries to parse a value and length, returning a value item on | |
645 | # success. | |
646 | def _try_parse_val_and_len(self): | |
647 | begin_text_loc = self._text_loc | |
648 | ||
649 | # Match | |
650 | m_expr = self._try_parse_pat(self._val_expr_pat) | |
651 | ||
652 | if m_expr is None: | |
653 | # No match | |
654 | return | |
655 | ||
656 | # Expect a length | |
657 | m_len = self._expect_pat( | |
658 | self._val_len_pat, "Expecting a length (multiple of eight bits)" | |
659 | ) | |
660 | ||
661 | # Create an expression node from the expression string | |
662 | expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc) | |
663 | ||
664 | # Return item | |
665 | return _Val( | |
666 | expr_str, | |
667 | expr, | |
668 | int(m_len.group(1)), | |
0e8e3169 | 669 | begin_text_loc, |
71aaa3f7 PP |
670 | ) |
671 | ||
672 | # Patterns for _try_parse_val_and_len() | |
673 | _var_pat = re.compile( | |
674 | r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern) | |
675 | ) | |
676 | ||
677 | # Tries to parse a variable, returning a variable item on success. | |
678 | def _try_parse_var(self): | |
679 | begin_text_loc = self._text_loc | |
680 | ||
681 | # Match | |
682 | m = self._try_parse_pat(self._var_pat) | |
683 | ||
684 | if m is None: | |
685 | # No match | |
686 | return | |
687 | ||
688 | # Validate name | |
689 | name = m.group("name") | |
690 | ||
691 | if name == _icitte_name: | |
0e8e3169 PP |
692 | _raise_error( |
693 | "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc | |
694 | ) | |
71aaa3f7 PP |
695 | |
696 | if name in self._label_names: | |
0e8e3169 | 697 | _raise_error("Existing label named `{}`".format(name), begin_text_loc) |
71aaa3f7 PP |
698 | |
699 | # Add to known variable names | |
700 | self._var_names.add(name) | |
701 | ||
702 | # Create an expression node from the expression string | |
703 | expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc) | |
704 | ||
705 | # Return item | |
706 | return _Var( | |
707 | name, | |
708 | expr_str, | |
709 | expr, | |
0e8e3169 | 710 | begin_text_loc, |
71aaa3f7 PP |
711 | ) |
712 | ||
713 | # Pattern for _try_parse_bo_name() | |
714 | _bo_pat = re.compile(r"[bl]e") | |
715 | ||
716 | # Tries to parse a byte order name, returning a byte order item on | |
717 | # success. | |
718 | def _try_parse_bo_name(self): | |
0e8e3169 PP |
719 | begin_text_loc = self._text_loc |
720 | ||
71aaa3f7 PP |
721 | # Match |
722 | m = self._try_parse_pat(self._bo_pat) | |
723 | ||
724 | if m is None: | |
725 | # No match | |
726 | return | |
727 | ||
728 | # Return corresponding item | |
729 | if m.group(0) == "be": | |
0e8e3169 | 730 | return _Bo(ByteOrder.BE, begin_text_loc) |
71aaa3f7 PP |
731 | else: |
732 | assert m.group(0) == "le" | |
0e8e3169 | 733 | return _Bo(ByteOrder.LE, begin_text_loc) |
71aaa3f7 PP |
734 | |
735 | # Patterns for _try_parse_val_or_bo() | |
736 | _val_var_bo_prefix_pat = re.compile(r"\{\s*") | |
737 | _val_var_bo_suffix_pat = re.compile(r"\s*}") | |
738 | ||
739 | # Tries to parse a value, a variable, or a byte order, returning an | |
740 | # item on success. | |
741 | def _try_parse_val_or_var_or_bo(self): | |
742 | # Match prefix | |
743 | if self._try_parse_pat(self._val_var_bo_prefix_pat) is None: | |
744 | # No match | |
745 | return | |
746 | ||
747 | # Variable item? | |
748 | item = self._try_parse_var() | |
749 | ||
750 | if item is None: | |
751 | # Value item? | |
752 | item = self._try_parse_val_and_len() | |
753 | ||
754 | if item is None: | |
755 | # Byte order item? | |
756 | item = self._try_parse_bo_name() | |
757 | ||
758 | if item is None: | |
759 | # At this point it's invalid | |
760 | self._raise_error("Expecting a value, a variable, or a byte order") | |
761 | ||
762 | # Expect suffix | |
763 | self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`") | |
764 | return item | |
765 | ||
766 | # Pattern for _try_parse_offset_val() and _try_parse_rep() | |
767 | _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+") | |
768 | ||
769 | # Tries to parse an offset value (after the initial `<`), returning | |
770 | # an offset item on success. | |
771 | def _try_parse_offset_val(self): | |
0e8e3169 PP |
772 | begin_text_loc = self._text_loc |
773 | ||
71aaa3f7 PP |
774 | # Match |
775 | m = self._try_parse_pat(self._pos_const_int_pat) | |
776 | ||
777 | if m is None: | |
778 | # No match | |
779 | return | |
780 | ||
781 | # Return item | |
0e8e3169 | 782 | return _Offset(int(m.group(0), 0), begin_text_loc) |
71aaa3f7 PP |
783 | |
784 | # Tries to parse a label name (after the initial `<`), returning a | |
785 | # label item on success. | |
786 | def _try_parse_label_name(self): | |
0e8e3169 PP |
787 | begin_text_loc = self._text_loc |
788 | ||
71aaa3f7 PP |
789 | # Match |
790 | m = self._try_parse_pat(_py_name_pat) | |
791 | ||
792 | if m is None: | |
793 | # No match | |
794 | return | |
795 | ||
796 | # Validate | |
797 | name = m.group(0) | |
798 | ||
799 | if name == _icitte_name: | |
0e8e3169 PP |
800 | _raise_error( |
801 | "`{}` is a reserved label name".format(_icitte_name), begin_text_loc | |
802 | ) | |
71aaa3f7 PP |
803 | |
804 | if name in self._label_names: | |
0e8e3169 | 805 | _raise_error("Duplicate label name `{}`".format(name), begin_text_loc) |
71aaa3f7 PP |
806 | |
807 | if name in self._var_names: | |
0e8e3169 | 808 | _raise_error("Existing variable named `{}`".format(name), begin_text_loc) |
71aaa3f7 PP |
809 | |
810 | # Add to known label names | |
811 | self._label_names.add(name) | |
812 | ||
813 | # Return item | |
0e8e3169 | 814 | return _Label(name, begin_text_loc) |
71aaa3f7 PP |
815 | |
816 | # Patterns for _try_parse_label_or_offset() | |
817 | _label_offset_prefix_pat = re.compile(r"<\s*") | |
818 | _label_offset_suffix_pat = re.compile(r"\s*>") | |
819 | ||
820 | # Tries to parse a label or an offset, returning an item on success. | |
821 | def _try_parse_label_or_offset(self): | |
822 | # Match prefix | |
823 | if self._try_parse_pat(self._label_offset_prefix_pat) is None: | |
824 | # No match | |
825 | return | |
826 | ||
827 | # Offset item? | |
828 | item = self._try_parse_offset_val() | |
829 | ||
830 | if item is None: | |
831 | # Label item? | |
832 | item = self._try_parse_label_name() | |
833 | ||
834 | if item is None: | |
835 | # At this point it's invalid | |
836 | self._raise_error("Expecting a label name or an offset value") | |
837 | ||
838 | # Expect suffix | |
839 | self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`") | |
840 | return item | |
841 | ||
842 | # Tries to parse a base item (anything except a repetition), | |
843 | # returning it on success. | |
844 | def _try_parse_base_item(self): | |
845 | # Byte item? | |
846 | item = self._try_parse_byte() | |
847 | ||
848 | if item is not None: | |
849 | return item | |
850 | ||
851 | # String item? | |
852 | item = self._try_parse_str() | |
853 | ||
854 | if item is not None: | |
855 | return item | |
856 | ||
857 | # Value, variable, or byte order item? | |
858 | item = self._try_parse_val_or_var_or_bo() | |
859 | ||
860 | if item is not None: | |
861 | return item | |
862 | ||
863 | # Label or offset item? | |
864 | item = self._try_parse_label_or_offset() | |
865 | ||
866 | if item is not None: | |
867 | return item | |
868 | ||
869 | # Group item? | |
870 | item = self._try_parse_group() | |
871 | ||
872 | if item is not None: | |
873 | return item | |
874 | ||
875 | # Pattern for _try_parse_rep() | |
876 | _rep_prefix_pat = re.compile(r"\*\s*") | |
877 | ||
878 | # Tries to parse a repetition, returning the multiplier on success, | |
879 | # or 1 otherwise. | |
880 | def _try_parse_rep(self): | |
71aaa3f7 PP |
881 | # Match prefix |
882 | if self._try_parse_pat(self._rep_prefix_pat) is None: | |
883 | # No match | |
884 | return 1 | |
885 | ||
886 | # Expect and return a decimal multiplier | |
887 | self._skip_ws_and_comments() | |
888 | m = self._expect_pat( | |
889 | self._pos_const_int_pat, "Expecting a positive integral multiplier" | |
890 | ) | |
891 | return int(m.group(0), 0) | |
892 | ||
1ca7b5e1 PP |
893 | # Tries to parse an item, possibly followed by a repetition, |
894 | # returning `True` on success. | |
895 | # | |
896 | # Appends any parsed item to `items`. | |
897 | def _try_append_item(self, items: List[_Item]): | |
71aaa3f7 PP |
898 | self._skip_ws_and_comments() |
899 | ||
900 | # Parse a base item | |
901 | item = self._try_parse_base_item() | |
902 | ||
903 | if item is None: | |
904 | # No item | |
1ca7b5e1 | 905 | return False |
71aaa3f7 PP |
906 | |
907 | # Parse repetition if the base item is repeatable | |
908 | if isinstance(item, _RepableItem): | |
0e8e3169 PP |
909 | self._skip_ws_and_comments() |
910 | rep_text_loc = self._text_loc | |
71aaa3f7 PP |
911 | rep = self._try_parse_rep() |
912 | ||
913 | if rep == 0: | |
1ca7b5e1 PP |
914 | # No item, but that's okay |
915 | return True | |
71aaa3f7 PP |
916 | elif rep > 1: |
917 | # Convert to repetition item | |
0e8e3169 | 918 | item = _Rep(item, rep, rep_text_loc) |
71aaa3f7 | 919 | |
1ca7b5e1 PP |
920 | items.append(item) |
921 | return True | |
71aaa3f7 PP |
922 | |
923 | # Parses and returns items, skipping whitespaces, insignificant | |
924 | # symbols, and comments when allowed, and stopping at the first | |
925 | # unknown character. | |
926 | def _parse_items(self) -> List[_Item]: | |
927 | items = [] # type: List[_Item] | |
928 | ||
929 | while self._isnt_done(): | |
1ca7b5e1 PP |
930 | # Try to append item |
931 | if not self._try_append_item(items): | |
932 | # Unknown at this point | |
933 | break | |
71aaa3f7 PP |
934 | |
935 | return items | |
936 | ||
937 | # Parses the whole Normand input, setting `self._res` to the main | |
938 | # group item on success. | |
939 | def _parse(self): | |
940 | if len(self._normand.strip()) == 0: | |
941 | # Special case to make sure there's something to consume | |
942 | self._res = _Group([], self._text_loc) | |
943 | return | |
944 | ||
945 | # Parse first level items | |
946 | items = self._parse_items() | |
947 | ||
948 | # Make sure there's nothing left | |
949 | self._skip_ws_and_comments() | |
950 | ||
951 | if self._isnt_done(): | |
952 | self._raise_error( | |
953 | "Unexpected character `{}`".format(self._normand[self._at]) | |
954 | ) | |
955 | ||
956 | # Set main group item | |
957 | self._res = _Group(items, self._text_loc) | |
958 | ||
959 | ||
960 | # The return type of parse(). | |
961 | class ParseResult: | |
962 | @classmethod | |
963 | def _create( | |
964 | cls, | |
965 | data: bytearray, | |
966 | variables: VarsT, | |
967 | labels: VarsT, | |
968 | offset: int, | |
969 | bo: Optional[ByteOrder], | |
970 | ): | |
971 | self = cls.__new__(cls) | |
972 | self._init(data, variables, labels, offset, bo) | |
973 | return self | |
974 | ||
975 | def __init__(self, *args, **kwargs): # type: ignore | |
976 | raise NotImplementedError | |
977 | ||
978 | def _init( | |
979 | self, | |
980 | data: bytearray, | |
981 | variables: VarsT, | |
982 | labels: VarsT, | |
983 | offset: int, | |
984 | bo: Optional[ByteOrder], | |
985 | ): | |
986 | self._data = data | |
987 | self._vars = variables | |
988 | self._labels = labels | |
989 | self._offset = offset | |
990 | self._bo = bo | |
991 | ||
992 | # Generated data. | |
993 | @property | |
994 | def data(self): | |
995 | return self._data | |
996 | ||
997 | # Dictionary of updated variable names to their last computed value. | |
998 | @property | |
999 | def variables(self): | |
1000 | return self._vars | |
1001 | ||
1002 | # Dictionary of updated main group label names to their computed | |
1003 | # value. | |
1004 | @property | |
1005 | def labels(self): | |
1006 | return self._labels | |
1007 | ||
1008 | # Updated offset. | |
1009 | @property | |
1010 | def offset(self): | |
1011 | return self._offset | |
1012 | ||
1013 | # Updated byte order. | |
1014 | @property | |
1015 | def byte_order(self): | |
1016 | return self._bo | |
1017 | ||
1018 | ||
1019 | # Raises a parse error for the item `item`, creating it using the | |
1020 | # message `msg`. | |
1021 | def _raise_error_for_item(msg: str, item: _Item) -> NoReturn: | |
1022 | _raise_error(msg, item.text_loc) | |
1023 | ||
1024 | ||
1025 | # The `ICITTE` reserved name. | |
1026 | _icitte_name = "ICITTE" | |
1027 | ||
1028 | ||
1029 | # Value expression validator. | |
1030 | class _ExprValidator(ast.NodeVisitor): | |
1031 | def __init__(self, item: _ExprItemT, syms: VarsT): | |
1032 | self._item = item | |
1033 | self._syms = syms | |
1034 | self._parent_is_call = False | |
1035 | ||
1036 | def generic_visit(self, node: ast.AST): | |
1037 | if type(node) is ast.Call: | |
1038 | self._parent_is_call = True | |
1039 | elif type(node) is ast.Name and not self._parent_is_call: | |
1040 | # Make sure the name refers to a known label name | |
1041 | if node.id != _icitte_name and node.id not in self._syms: | |
1042 | _raise_error( | |
1043 | "Unknown variable/label name `{}` in expression `{}`".format( | |
1044 | node.id, self._item.expr_str | |
1045 | ), | |
1046 | self._item.text_loc, | |
1047 | ) | |
1048 | ||
1049 | # TODO: Restrict the set of allowed node types | |
1050 | ||
1051 | super().generic_visit(node) | |
1052 | self._parent_is_call = False | |
1053 | ||
1054 | ||
1055 | # Keeper of labels for a given group instance. | |
1056 | # | |
1057 | # A group instance is one iteration of a given group. | |
1058 | class _GroupInstanceLabels: | |
1059 | def __init__(self): | |
1060 | self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]] | |
1061 | ||
1062 | # Assigns the labels `labels` to a new instance of `group`. | |
1063 | def add(self, group: _Group, labels: VarsT): | |
1064 | if group not in self._instance_labels: | |
1065 | self._instance_labels[group] = {} | |
1066 | ||
1067 | spec_instance_labels = self._instance_labels[group] | |
1068 | spec_instance_labels[len(spec_instance_labels)] = labels.copy() | |
1069 | ||
1070 | # Returns the labels (not a copy) of the instance `instance_index` | |
1071 | # of the group `group`. | |
1072 | def labels(self, group: _Group, instance_index: int): | |
1073 | return self._instance_labels[group][instance_index] | |
1074 | ||
1075 | ||
1076 | # Generator of data and labels from a group item. | |
1077 | # | |
1078 | # Generation happens in memory at construction time. After building, use | |
1079 | # the `data`, `variables`, `labels`, `offset`, and `bo` properties to | |
1080 | # get the resulting context. | |
1081 | class _Gen: | |
1082 | def __init__( | |
1083 | self, | |
1084 | group: _Group, | |
1085 | variables: VarsT, | |
1086 | labels: VarsT, | |
1087 | offset: int, | |
1088 | bo: Optional[ByteOrder], | |
1089 | ): | |
1090 | self._group_instance_labels = _GroupInstanceLabels() | |
1091 | self._resolve_labels(group, offset, labels.copy()) | |
1092 | self._vars = variables.copy() | |
1093 | self._offset = offset | |
1094 | self._bo = bo | |
1095 | self._main_group = group | |
1096 | self._gen() | |
1097 | ||
1098 | # Generated bytes. | |
1099 | @property | |
1100 | def data(self): | |
1101 | return self._data | |
1102 | ||
1103 | # Updated variables. | |
1104 | @property | |
1105 | def variables(self): | |
1106 | return self._vars | |
1107 | ||
1108 | # Updated main group labels. | |
1109 | @property | |
1110 | def labels(self): | |
1111 | return self._group_instance_labels.labels(self._main_group, 0) | |
1112 | ||
1113 | # Updated offset. | |
1114 | @property | |
1115 | def offset(self): | |
1116 | return self._offset | |
1117 | ||
1118 | # Updated byte order. | |
1119 | @property | |
1120 | def bo(self): | |
1121 | return self._bo | |
1122 | ||
1123 | # Fills `self._group_instance_labels` with the labels for each group | |
1124 | # instance in `item`, starting at current offset `offset` with the | |
1125 | # current labels `labels`. | |
1126 | # | |
1127 | # Returns the new current offset. | |
1128 | def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int: | |
1129 | if type(item) is _Group: | |
1130 | # First pass: compute immediate labels of this instance | |
1131 | group_labels = labels.copy() | |
1132 | group_offset = offset | |
1133 | ||
1134 | for subitem in item.items: | |
1135 | if type(subitem) is _Offset: | |
1136 | group_offset = subitem.val | |
1137 | elif type(subitem) is _Label: | |
1138 | assert subitem.name not in group_labels | |
1139 | group_labels[subitem.name] = group_offset | |
1140 | else: | |
1141 | group_offset += subitem.size | |
1142 | ||
1143 | # Add to group instance labels | |
1144 | self._group_instance_labels.add(item, group_labels) | |
1145 | ||
1146 | # Second pass: handle each item | |
1147 | for subitem in item.items: | |
1148 | offset = self._resolve_labels(subitem, offset, group_labels) | |
1149 | elif type(item) is _Rep: | |
1150 | for _ in range(item.mul): | |
1151 | offset = self._resolve_labels(item.item, offset, labels) | |
1152 | elif type(item) is _Offset: | |
1153 | offset = item.val | |
1154 | else: | |
1155 | offset += item.size | |
1156 | ||
1157 | return offset | |
1158 | ||
1159 | def _handle_byte_item(self, item: _Byte): | |
1160 | self._data.append(item.val) | |
1161 | self._offset += item.size | |
1162 | ||
1163 | def _handle_str_item(self, item: _Str): | |
1164 | self._data += item.data | |
1165 | self._offset += item.size | |
1166 | ||
1167 | def _handle_bo_item(self, item: _Bo): | |
1168 | self._bo = item.bo | |
1169 | ||
1170 | def _eval_expr(self, item: _ExprItemT): | |
1171 | # Get the labels of the current group instance as the initial | |
1172 | # symbols (copied because we're adding stuff). | |
1173 | assert self._cur_group is not None | |
1174 | syms = self._group_instance_labels.labels( | |
1175 | self._cur_group, self._group_instance_indexes[self._cur_group] | |
1176 | ).copy() | |
1177 | ||
1178 | # Set the `ICITTE` name to the current offset (before encoding) | |
1179 | syms[_icitte_name] = self._offset | |
1180 | ||
1181 | # Add the current variables | |
1182 | syms.update(self._vars) | |
1183 | ||
1184 | # Validate the node and its children | |
1185 | _ExprValidator(item, syms).visit(item.expr) | |
1186 | ||
1187 | # Compile and evaluate expression node | |
1188 | try: | |
1189 | val = eval(compile(item.expr, "", "eval"), None, syms) | |
1190 | except Exception as exc: | |
1191 | _raise_error_for_item( | |
1192 | "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc), | |
1193 | item, | |
1194 | ) | |
1195 | ||
1196 | # Validate result | |
1197 | if type(val) is not int: | |
1198 | _raise_error_for_item( | |
1199 | "Invalid expression `{}`: unexpected result type `{}`".format( | |
1200 | item.expr_str, type(val).__name__ | |
1201 | ), | |
1202 | item, | |
1203 | ) | |
1204 | ||
1205 | return val | |
1206 | ||
1207 | def _handle_var_item(self, item: _Var): | |
1208 | # Update variable | |
1209 | self._vars[item.name] = self._eval_expr(item) | |
1210 | ||
1211 | def _handle_val_item(self, item: _Val): | |
1212 | # Compute value | |
1213 | val = self._eval_expr(item) | |
1214 | ||
1215 | # Validate range | |
1216 | if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1: | |
1217 | _raise_error_for_item( | |
1218 | "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format( | |
1219 | val, item.len, item.expr_str, self._offset | |
1220 | ), | |
1221 | item, | |
1222 | ) | |
1223 | ||
1224 | # Encode result on 64 bits (to extend the sign bit whatever the | |
1225 | # value of `item.len`). | |
1226 | if self._bo is None and item.len > 8: | |
1227 | _raise_error_for_item( | |
1228 | "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format( | |
1229 | item.expr_str | |
1230 | ), | |
1231 | item, | |
1232 | ) | |
1233 | ||
1234 | data = struct.pack( | |
1235 | "{}{}".format( | |
1236 | ">" if self._bo in (None, ByteOrder.BE) else "<", | |
1237 | "Q" if val >= 0 else "q", | |
1238 | ), | |
1239 | val, | |
1240 | ) | |
1241 | ||
1242 | # Keep only the requested length | |
1243 | len_bytes = item.len // 8 | |
1244 | ||
1245 | if self._bo in (None, ByteOrder.BE): | |
1246 | # Big endian: keep last bytes | |
1247 | data = data[-len_bytes:] | |
1248 | else: | |
1249 | # Little endian: keep first bytes | |
1250 | assert self._bo == ByteOrder.LE | |
1251 | data = data[:len_bytes] | |
1252 | ||
1253 | # Append to current bytes and update offset | |
1254 | self._data += data | |
1255 | self._offset += len(data) | |
1256 | ||
1257 | def _handle_group_item(self, item: _Group): | |
1258 | # Update the instance index of `item` | |
1259 | if item not in self._group_instance_indexes: | |
1260 | self._group_instance_indexes[item] = 0 | |
1261 | else: | |
1262 | self._group_instance_indexes[item] += 1 | |
1263 | ||
1264 | # Changed current group | |
1265 | old_cur_group = self._cur_group | |
1266 | self._cur_group = item | |
1267 | ||
1268 | # Handle each item | |
1269 | for subitem in item.items: | |
1270 | self._handle_item(subitem) | |
1271 | ||
1272 | # Restore current group | |
1273 | self._cur_group = old_cur_group | |
1274 | ||
1275 | def _handle_rep_item(self, item: _Rep): | |
1276 | for _ in range(item.mul): | |
1277 | self._handle_item(item.item) | |
1278 | ||
1279 | def _handle_offset_item(self, item: _Offset): | |
1280 | self._offset = item.val | |
1281 | ||
1282 | def _handle_item(self, item: _Item): | |
1283 | if type(item) in self._item_handlers: | |
1284 | self._item_handlers[type(item)](item) | |
1285 | ||
1286 | def _gen(self): | |
1287 | # Initial state | |
1288 | self._data = bytearray() | |
1289 | self._group_instance_indexes = {} # type: Dict[_Group, int] | |
1290 | self._cur_group = None | |
1291 | ||
1292 | # Item handlers | |
1293 | self._item_handlers = { | |
1294 | _Byte: self._handle_byte_item, | |
1295 | _Str: self._handle_str_item, | |
1296 | _Bo: self._handle_bo_item, | |
1297 | _Val: self._handle_val_item, | |
1298 | _Var: self._handle_var_item, | |
1299 | _Group: self._handle_group_item, | |
1300 | _Rep: self._handle_rep_item, | |
1301 | _Offset: self._handle_offset_item, | |
1302 | } # type: Dict[type, Callable[[Any], None]] | |
1303 | ||
1304 | # Handle the group item | |
1305 | self._handle_item(self._main_group) | |
1306 | ||
1307 | ||
1308 | # Returns a `ParseResult` instance containing the bytes encoded by the | |
1309 | # input string `normand`. | |
1310 | # | |
1311 | # `init_variables` is a dictionary of initial variable names (valid | |
1312 | # Python names) to integral values. A variable name must not be the | |
1313 | # reserved name `ICITTE`. | |
1314 | # | |
1315 | # `init_labels` is a dictionary of initial label names (valid Python | |
1316 | # names) to integral values. A label name must not be the reserved name | |
1317 | # `ICITTE`. | |
1318 | # | |
1319 | # `init_offset` is the initial offset. | |
1320 | # | |
1321 | # `init_byte_order` is the initial byte order. | |
1322 | # | |
1323 | # Raises `ParseError` on any parsing error. | |
1324 | def parse( | |
1325 | normand: str, | |
1326 | init_variables: Optional[VarsT] = None, | |
1327 | init_labels: Optional[VarsT] = None, | |
1328 | init_offset: int = 0, | |
1329 | init_byte_order: Optional[ByteOrder] = None, | |
1330 | ): | |
1331 | if init_variables is None: | |
1332 | init_variables = {} | |
1333 | ||
1334 | if init_labels is None: | |
1335 | init_labels = {} | |
1336 | ||
1337 | gen = _Gen( | |
1338 | _Parser(normand, init_variables, init_labels).res, | |
1339 | init_variables, | |
1340 | init_labels, | |
1341 | init_offset, | |
1342 | init_byte_order, | |
1343 | ) | |
1344 | return ParseResult._create( # pyright: ignore[reportPrivateUsage] | |
1345 | gen.data, gen.variables, gen.labels, gen.offset, gen.bo | |
1346 | ) | |
1347 | ||
1348 | ||
1349 | # Parses the command-line arguments. | |
1350 | def _parse_cli_args(): | |
1351 | import argparse | |
1352 | ||
1353 | # Build parser | |
1354 | ap = argparse.ArgumentParser() | |
1355 | ap.add_argument( | |
1356 | "--offset", | |
1357 | metavar="OFFSET", | |
1358 | action="store", | |
1359 | type=int, | |
1360 | default=0, | |
1361 | help="initial offset (positive)", | |
1362 | ) | |
1363 | ap.add_argument( | |
1364 | "-b", | |
1365 | "--byte-order", | |
1366 | metavar="BO", | |
1367 | choices=["be", "le"], | |
1368 | type=str, | |
1369 | help="initial byte order (`be` or `le`)", | |
1370 | ) | |
1371 | ap.add_argument( | |
1372 | "--var", | |
1373 | metavar="NAME=VAL", | |
1374 | action="append", | |
1375 | help="add an initial variable (may be repeated)", | |
1376 | ) | |
1377 | ap.add_argument( | |
1378 | "-l", | |
1379 | "--label", | |
1380 | metavar="NAME=VAL", | |
1381 | action="append", | |
1382 | help="add an initial label (may be repeated)", | |
1383 | ) | |
1384 | ap.add_argument( | |
1385 | "--version", action="version", version="Normand {}".format(__version__) | |
1386 | ) | |
1387 | ap.add_argument( | |
1388 | "path", | |
1389 | metavar="PATH", | |
1390 | action="store", | |
1391 | nargs="?", | |
1392 | help="input path (none means standard input)", | |
1393 | ) | |
1394 | ||
1395 | # Parse | |
1396 | return ap.parse_args() | |
1397 | ||
1398 | ||
1399 | # Raises a command-line error with the message `msg`. | |
1400 | def _raise_cli_error(msg: str) -> NoReturn: | |
1401 | raise RuntimeError("Command-line error: {}".format(msg)) | |
1402 | ||
1403 | ||
1404 | # Returns a dictionary of string to integers from the list of strings | |
1405 | # `args` containing `NAME=VAL` entries. | |
1406 | def _dict_from_arg(args: Optional[List[str]]): | |
1407 | d = {} # type: Dict[str, int] | |
1408 | ||
1409 | if args is None: | |
1410 | return d | |
1411 | ||
1412 | for arg in args: | |
1413 | m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg) | |
1414 | ||
1415 | if m is None: | |
1416 | _raise_cli_error("Invalid assignment {}".format(arg)) | |
1417 | ||
2e1c1acd PP |
1418 | d[m.group(1)] = int(m.group(2)) |
1419 | ||
71aaa3f7 PP |
1420 | return d |
1421 | ||
1422 | ||
1423 | # CLI entry point without exception handling. | |
1424 | def _try_run_cli(): | |
1425 | import os.path | |
1426 | ||
1427 | # Parse arguments | |
1428 | args = _parse_cli_args() | |
1429 | ||
1430 | # Read input | |
1431 | if args.path is None: | |
1432 | normand = sys.stdin.read() | |
1433 | else: | |
1434 | with open(args.path) as f: | |
1435 | normand = f.read() | |
1436 | ||
1437 | # Variables and labels | |
1438 | variables = _dict_from_arg(args.var) | |
1439 | labels = _dict_from_arg(args.label) | |
1440 | ||
1441 | # Validate offset | |
1442 | if args.offset < 0: | |
1443 | _raise_cli_error("Invalid negative offset {}") | |
1444 | ||
1445 | # Validate and set byte order | |
1446 | bo = None # type: Optional[ByteOrder] | |
1447 | ||
1448 | if args.byte_order is not None: | |
1449 | if args.byte_order == "be": | |
1450 | bo = ByteOrder.BE | |
1451 | else: | |
1452 | assert args.byte_order == "le" | |
1453 | bo = ByteOrder.LE | |
1454 | ||
1455 | # Parse | |
1456 | try: | |
1457 | res = parse(normand, variables, labels, args.offset, bo) | |
1458 | except ParseError as exc: | |
1459 | prefix = "" | |
1460 | ||
1461 | if args.path is not None: | |
1462 | prefix = "{}:".format(os.path.abspath(args.path)) | |
1463 | ||
1464 | _fail( | |
1465 | "{}{}:{} - {}".format( | |
1466 | prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc) | |
1467 | ) | |
1468 | ) | |
1469 | ||
1470 | ||
1471 | sys.stdout.buffer.write(res.data) | |
1472 | ||
1473 | ||
1474 | # Prints the exception message `msg` and exits with status 1. | |
1475 | def _fail(msg: str) -> NoReturn: | |
1476 | if not msg.endswith("."): | |
1477 | msg += "." | |
1478 | ||
1479 | print(msg, file=sys.stderr) | |
1480 | sys.exit(1) | |
1481 | ||
1482 | ||
1483 | # CLI entry point. | |
1484 | def _run_cli(): | |
1485 | try: | |
1486 | _try_run_cli() | |
1487 | except Exception as exc: | |
1488 | _fail(str(exc)) | |
1489 | ||
1490 | ||
1491 | if __name__ == "__main__": | |
1492 | _run_cli() |