Commit | Line | Data |
---|---|---|
71aaa3f7 PP |
1 | # The MIT License (MIT) |
2 | # | |
3 | # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com> | |
4 | # | |
5 | # Permission is hereby granted, free of charge, to any person obtaining | |
6 | # a copy of this software and associated documentation files (the | |
7 | # "Software"), to deal in the Software without restriction, including | |
8 | # without limitation the rights to use, copy, modify, merge, publish, | |
9 | # distribute, sublicense, and/or sell copies of the Software, and to | |
10 | # permit persons to whom the Software is furnished to do so, subject to | |
11 | # the following conditions: | |
12 | # | |
13 | # The above copyright notice and this permission notice shall be | |
14 | # included in all copies or substantial portions of the Software. | |
15 | # | |
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
20 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
23 | ||
24 | __author__ = "Philippe Proulx" | |
0e8e3169 | 25 | __version__ = "0.2.0" |
71aaa3f7 PP |
26 | __all__ = [ |
27 | "ByteOrder", | |
28 | "parse", | |
29 | "ParseError", | |
30 | "ParseResult", | |
31 | "TextLoc", | |
32 | "VarsT", | |
33 | "__author__", | |
34 | "__version__", | |
35 | ] | |
36 | ||
37 | import re | |
38 | import abc | |
39 | import ast | |
40 | import sys | |
41 | import enum | |
42 | import struct | |
43 | from typing import Any, Dict, List, Union, Pattern, Callable, NoReturn, Optional | |
44 | ||
45 | ||
46 | # Text location (line and column numbers). | |
47 | class TextLoc: | |
48 | @classmethod | |
49 | def _create(cls, line_no: int, col_no: int): | |
50 | self = cls.__new__(cls) | |
51 | self._init(line_no, col_no) | |
52 | return self | |
53 | ||
54 | def __init__(*args, **kwargs): # type: ignore | |
55 | raise NotImplementedError | |
56 | ||
57 | def _init(self, line_no: int, col_no: int): | |
58 | self._line_no = line_no | |
59 | self._col_no = col_no | |
60 | ||
61 | # Line number. | |
62 | @property | |
63 | def line_no(self): | |
64 | return self._line_no | |
65 | ||
66 | # Column number. | |
67 | @property | |
68 | def col_no(self): | |
69 | return self._col_no | |
70 | ||
71 | ||
72 | # Any item. | |
73 | class _Item: | |
74 | def __init__(self, text_loc: TextLoc): | |
75 | self._text_loc = text_loc | |
76 | ||
77 | # Source text location. | |
78 | @property | |
79 | def text_loc(self): | |
80 | return self._text_loc | |
81 | ||
82 | # Returns the size, in bytes, of this item. | |
83 | @property | |
84 | @abc.abstractmethod | |
85 | def size(self) -> int: | |
86 | ... | |
87 | ||
88 | ||
89 | # A repeatable item. | |
90 | class _RepableItem(_Item): | |
91 | pass | |
92 | ||
93 | ||
94 | # Single byte. | |
95 | class _Byte(_RepableItem): | |
96 | def __init__(self, val: int, text_loc: TextLoc): | |
97 | super().__init__(text_loc) | |
98 | self._val = val | |
99 | ||
100 | # Byte value. | |
101 | @property | |
102 | def val(self): | |
103 | return self._val | |
104 | ||
105 | @property | |
106 | def size(self): | |
107 | return 1 | |
108 | ||
109 | def __repr__(self): | |
110 | return "_Byte({}, {})".format(hex(self._val), self._text_loc) | |
111 | ||
112 | ||
113 | # String. | |
114 | class _Str(_RepableItem): | |
115 | def __init__(self, data: bytes, text_loc: TextLoc): | |
116 | super().__init__(text_loc) | |
117 | self._data = data | |
118 | ||
119 | # Encoded bytes. | |
120 | @property | |
121 | def data(self): | |
122 | return self._data | |
123 | ||
124 | @property | |
125 | def size(self): | |
126 | return len(self._data) | |
127 | ||
128 | def __repr__(self): | |
129 | return "_Str({}, {})".format(repr(self._data), self._text_loc) | |
130 | ||
131 | ||
132 | # Byte order. | |
133 | @enum.unique | |
134 | class ByteOrder(enum.Enum): | |
135 | # Big endian. | |
136 | BE = "be" | |
137 | ||
138 | # Little endian. | |
139 | LE = "le" | |
140 | ||
141 | ||
142 | # Byte order. | |
143 | class _Bo(_Item): | |
0e8e3169 PP |
144 | def __init__(self, bo: ByteOrder, text_loc: TextLoc): |
145 | super().__init__(text_loc) | |
71aaa3f7 PP |
146 | self._bo = bo |
147 | ||
148 | @property | |
149 | def bo(self): | |
150 | return self._bo | |
151 | ||
152 | @property | |
153 | def size(self): | |
154 | return 0 | |
155 | ||
156 | ||
157 | # Label. | |
158 | class _Label(_Item): | |
159 | def __init__(self, name: str, text_loc: TextLoc): | |
160 | super().__init__(text_loc) | |
161 | self._name = name | |
162 | ||
163 | # Label name. | |
164 | @property | |
165 | def name(self): | |
166 | return self._name | |
167 | ||
168 | @property | |
169 | def size(self): | |
170 | return 0 | |
171 | ||
172 | def __repr__(self): | |
173 | return "_Label({}, {})".format(repr(self._name), self._text_loc) | |
174 | ||
175 | ||
176 | # Offset. | |
177 | class _Offset(_Item): | |
178 | def __init__(self, val: int, text_loc: TextLoc): | |
179 | super().__init__(text_loc) | |
180 | self._val = val | |
181 | ||
182 | # Offset value. | |
183 | @property | |
184 | def val(self): | |
185 | return self._val | |
186 | ||
187 | @property | |
188 | def size(self): | |
189 | return 0 | |
190 | ||
191 | def __repr__(self): | |
192 | return "_Offset({}, {})".format(repr(self._val), self._text_loc) | |
193 | ||
194 | ||
195 | # Mixin of containing an AST expression and its string. | |
196 | class _ExprMixin: | |
197 | def __init__(self, expr_str: str, expr: ast.Expression): | |
198 | self._expr_str = expr_str | |
199 | self._expr = expr | |
200 | ||
201 | # Expression string. | |
202 | @property | |
203 | def expr_str(self): | |
204 | return self._expr_str | |
205 | ||
206 | # Expression node to evaluate. | |
207 | @property | |
208 | def expr(self): | |
209 | return self._expr | |
210 | ||
211 | ||
212 | # Variable. | |
213 | class _Var(_Item, _ExprMixin): | |
214 | def __init__( | |
215 | self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLoc | |
216 | ): | |
217 | super().__init__(text_loc) | |
218 | _ExprMixin.__init__(self, expr_str, expr) | |
219 | self._name = name | |
220 | ||
221 | # Name. | |
222 | @property | |
223 | def name(self): | |
224 | return self._name | |
225 | ||
226 | @property | |
227 | def size(self): | |
228 | return 0 | |
229 | ||
230 | def __repr__(self): | |
231 | return "_Var({}, {}, {}, {})".format( | |
232 | repr(self._name), repr(self._expr_str), repr(self._expr), self._text_loc | |
233 | ) | |
234 | ||
235 | ||
236 | # Value, possibly needing more than one byte. | |
237 | class _Val(_RepableItem, _ExprMixin): | |
238 | def __init__( | |
239 | self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLoc | |
240 | ): | |
241 | super().__init__(text_loc) | |
242 | _ExprMixin.__init__(self, expr_str, expr) | |
243 | self._len = len | |
244 | ||
245 | # Length (bits). | |
246 | @property | |
247 | def len(self): | |
248 | return self._len | |
249 | ||
250 | @property | |
251 | def size(self): | |
252 | return self._len // 8 | |
253 | ||
254 | def __repr__(self): | |
255 | return "_Val({}, {}, {}, {})".format( | |
256 | repr(self._expr_str), repr(self._expr), repr(self._len), self._text_loc | |
257 | ) | |
258 | ||
259 | ||
260 | # Expression item type. | |
261 | _ExprItemT = Union[_Val, _Var] | |
262 | ||
263 | ||
264 | # Group of items. | |
265 | class _Group(_RepableItem): | |
266 | def __init__(self, items: List[_Item], text_loc: TextLoc): | |
267 | super().__init__(text_loc) | |
268 | self._items = items | |
269 | self._size = sum([item.size for item in self._items]) | |
270 | ||
271 | # Contained items. | |
272 | @property | |
273 | def items(self): | |
274 | return self._items | |
275 | ||
276 | @property | |
277 | def size(self): | |
278 | return self._size | |
279 | ||
280 | def __repr__(self): | |
281 | return "_Group({}, {})".format(repr(self._items), self._text_loc) | |
282 | ||
283 | ||
284 | # Repetition item. | |
285 | class _Rep(_Item): | |
286 | def __init__(self, item: _RepableItem, mul: int, text_loc: TextLoc): | |
287 | super().__init__(text_loc) | |
288 | self._item = item | |
289 | self._mul = mul | |
290 | ||
291 | # Item to repeat. | |
292 | @property | |
293 | def item(self): | |
294 | return self._item | |
295 | ||
296 | # Repetition multiplier. | |
297 | @property | |
298 | def mul(self): | |
299 | return self._mul | |
300 | ||
301 | @property | |
302 | def size(self): | |
303 | return self._item.size * self._mul | |
304 | ||
305 | def __repr__(self): | |
306 | return "_Rep({}, {}, {})".format( | |
307 | repr(self._item), repr(self._mul), self._text_loc | |
308 | ) | |
309 | ||
310 | ||
311 | # A parsing error containing a message and a text location. | |
312 | class ParseError(RuntimeError): | |
313 | @classmethod | |
314 | def _create(cls, msg: str, text_loc: TextLoc): | |
315 | self = cls.__new__(cls) | |
316 | self._init(msg, text_loc) | |
317 | return self | |
318 | ||
319 | def __init__(self, *args, **kwargs): # type: ignore | |
320 | raise NotImplementedError | |
321 | ||
322 | def _init(self, msg: str, text_loc: TextLoc): | |
323 | super().__init__(msg) | |
324 | self._text_loc = text_loc | |
325 | ||
326 | # Source text location. | |
327 | @property | |
328 | def text_loc(self): | |
329 | return self._text_loc | |
330 | ||
331 | ||
332 | # Raises a parsing error, forwarding the parameters to the constructor. | |
333 | def _raise_error(msg: str, text_loc: TextLoc) -> NoReturn: | |
334 | raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage] | |
335 | ||
336 | ||
337 | # Variable (and label) dictionary type. | |
338 | VarsT = Dict[str, int] | |
339 | ||
340 | ||
341 | # Python name pattern. | |
342 | _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") | |
343 | ||
344 | ||
345 | # Normand parser. | |
346 | # | |
347 | # The constructor accepts a Normand input. After building, use the `res` | |
348 | # property to get the resulting main group. | |
349 | class _Parser: | |
350 | # Builds a parser to parse the Normand input `normand`, parsing | |
351 | # immediately. | |
352 | def __init__(self, normand: str, variables: VarsT, labels: VarsT): | |
353 | self._normand = normand | |
354 | self._at = 0 | |
355 | self._line_no = 1 | |
356 | self._col_no = 1 | |
357 | self._label_names = set(labels.keys()) | |
358 | self._var_names = set(variables.keys()) | |
359 | self._parse() | |
360 | ||
361 | # Result (main group). | |
362 | @property | |
363 | def res(self): | |
364 | return self._res | |
365 | ||
366 | # Current text location. | |
367 | @property | |
368 | def _text_loc(self): | |
369 | return TextLoc._create( # pyright: ignore[reportPrivateUsage] | |
370 | self._line_no, self._col_no | |
371 | ) | |
372 | ||
373 | # Returns `True` if this parser is done parsing. | |
374 | def _is_done(self): | |
375 | return self._at == len(self._normand) | |
376 | ||
377 | # Returns `True` if this parser isn't done parsing. | |
378 | def _isnt_done(self): | |
379 | return not self._is_done() | |
380 | ||
381 | # Raises a parse error, creating it using the message `msg` and the | |
382 | # current text location. | |
383 | def _raise_error(self, msg: str) -> NoReturn: | |
384 | _raise_error(msg, self._text_loc) | |
385 | ||
386 | # Tries to make the pattern `pat` match the current substring, | |
387 | # returning the match object and updating `self._at`, | |
388 | # `self._line_no`, and `self._col_no` on success. | |
389 | def _try_parse_pat(self, pat: Pattern[str]): | |
390 | m = pat.match(self._normand, self._at) | |
391 | ||
392 | if m is None: | |
393 | return | |
394 | ||
395 | # Skip matched string | |
396 | self._at += len(m.group(0)) | |
397 | ||
398 | # Update line number | |
399 | self._line_no += m.group(0).count("\n") | |
400 | ||
401 | # Update column number | |
402 | for i in reversed(range(self._at)): | |
403 | if self._normand[i] == "\n" or i == 0: | |
404 | if i == 0: | |
405 | self._col_no = self._at + 1 | |
406 | else: | |
407 | self._col_no = self._at - i | |
408 | ||
409 | break | |
410 | ||
411 | # Return match object | |
412 | return m | |
413 | ||
414 | # Expects the pattern `pat` to match the current substring, | |
415 | # returning the match object and updating `self._at`, | |
416 | # `self._line_no`, and `self._col_no` on success, or raising a parse | |
417 | # error with the message `error_msg` on error. | |
418 | def _expect_pat(self, pat: Pattern[str], error_msg: str): | |
419 | # Match | |
420 | m = self._try_parse_pat(pat) | |
421 | ||
422 | if m is None: | |
423 | # No match: error | |
424 | self._raise_error(error_msg) | |
425 | ||
426 | # Return match object | |
427 | return m | |
428 | ||
429 | # Pattern for _skip_ws_and_comments() | |
430 | _ws_or_syms_or_comments_pat = re.compile( | |
431 | r"(?:[\s!@/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*" | |
432 | ) | |
433 | ||
434 | # Skips as many whitespaces, insignificant symbol characters, and | |
435 | # comments as possible. | |
436 | def _skip_ws_and_comments(self): | |
437 | self._try_parse_pat(self._ws_or_syms_or_comments_pat) | |
438 | ||
439 | # Pattern for _try_parse_hex_byte() | |
440 | _nibble_pat = re.compile(r"[A-Fa-f0-9]") | |
441 | ||
442 | # Tries to parse a hexadecimal byte, returning a byte item on | |
443 | # success. | |
444 | def _try_parse_hex_byte(self): | |
0e8e3169 PP |
445 | begin_text_loc = self._text_loc |
446 | ||
71aaa3f7 PP |
447 | # Match initial nibble |
448 | m_high = self._try_parse_pat(self._nibble_pat) | |
449 | ||
450 | if m_high is None: | |
451 | # No match | |
452 | return | |
453 | ||
454 | # Expect another nibble | |
455 | self._skip_ws_and_comments() | |
456 | m_low = self._expect_pat( | |
457 | self._nibble_pat, "Expecting another hexadecimal nibble" | |
458 | ) | |
459 | ||
460 | # Return item | |
0e8e3169 | 461 | return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc) |
71aaa3f7 PP |
462 | |
463 | # Patterns for _try_parse_bin_byte() | |
464 | _bin_byte_bit_pat = re.compile(r"[01]") | |
465 | _bin_byte_prefix_pat = re.compile(r"%") | |
466 | ||
467 | # Tries to parse a binary byte, returning a byte item on success. | |
468 | def _try_parse_bin_byte(self): | |
0e8e3169 PP |
469 | begin_text_loc = self._text_loc |
470 | ||
71aaa3f7 PP |
471 | # Match prefix |
472 | if self._try_parse_pat(self._bin_byte_prefix_pat) is None: | |
473 | # No match | |
474 | return | |
475 | ||
476 | # Expect eight bits | |
477 | bits = [] # type: List[str] | |
478 | ||
479 | for _ in range(8): | |
480 | self._skip_ws_and_comments() | |
481 | m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)") | |
482 | bits.append(m.group(0)) | |
483 | ||
484 | # Return item | |
0e8e3169 | 485 | return _Byte(int("".join(bits), 2), begin_text_loc) |
71aaa3f7 PP |
486 | |
487 | # Patterns for _try_parse_dec_byte() | |
488 | _dec_byte_prefix_pat = re.compile(r"\$\s*") | |
489 | _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)") | |
490 | ||
491 | # Tries to parse a decimal byte, returning a byte item on success. | |
492 | def _try_parse_dec_byte(self): | |
0e8e3169 PP |
493 | begin_text_loc = self._text_loc |
494 | ||
71aaa3f7 PP |
495 | # Match prefix |
496 | if self._try_parse_pat(self._dec_byte_prefix_pat) is None: | |
497 | # No match | |
498 | return | |
499 | ||
500 | # Expect the value | |
501 | m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant") | |
502 | ||
503 | # Compute value | |
504 | val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1) | |
505 | ||
506 | # Validate | |
507 | if val < -128 or val > 255: | |
0e8e3169 | 508 | _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc) |
71aaa3f7 PP |
509 | |
510 | # Two's complement | |
511 | val = val % 256 | |
512 | ||
513 | # Return item | |
0e8e3169 | 514 | return _Byte(val, begin_text_loc) |
71aaa3f7 PP |
515 | |
516 | # Tries to parse a byte, returning a byte item on success. | |
517 | def _try_parse_byte(self): | |
518 | # Hexadecimal | |
519 | item = self._try_parse_hex_byte() | |
520 | ||
521 | if item is not None: | |
522 | return item | |
523 | ||
524 | # Binary | |
525 | item = self._try_parse_bin_byte() | |
526 | ||
527 | if item is not None: | |
528 | return item | |
529 | ||
530 | # Decimal | |
531 | item = self._try_parse_dec_byte() | |
532 | ||
533 | if item is not None: | |
534 | return item | |
535 | ||
536 | # Patterns for _try_parse_str() | |
537 | _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"') | |
538 | _str_suffix_pat = re.compile(r'"') | |
539 | _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*') | |
540 | ||
541 | # Strings corresponding to escape sequence characters | |
542 | _str_escape_seq_strs = { | |
543 | "0": "\0", | |
544 | "a": "\a", | |
545 | "b": "\b", | |
546 | "e": "\x1b", | |
547 | "f": "\f", | |
548 | "n": "\n", | |
549 | "r": "\r", | |
550 | "t": "\t", | |
551 | "v": "\v", | |
552 | "\\": "\\", | |
553 | '"': '"', | |
554 | } | |
555 | ||
556 | # Tries to parse a string, returning a string item on success. | |
557 | def _try_parse_str(self): | |
0e8e3169 PP |
558 | begin_text_loc = self._text_loc |
559 | ||
71aaa3f7 PP |
560 | # Match prefix |
561 | m = self._try_parse_pat(self._str_prefix_pat) | |
562 | ||
563 | if m is None: | |
564 | # No match | |
565 | return | |
566 | ||
567 | # Get encoding | |
568 | encoding = "utf8" | |
569 | ||
570 | if m.group("len") is not None: | |
571 | encoding = "utf_{}_{}".format(m.group("len"), m.group("bo")) | |
572 | ||
573 | # Actual string | |
574 | m = self._expect_pat(self._str_str_pat, "Expecting a literal string") | |
575 | ||
576 | # Expect end of string | |
577 | self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)') | |
578 | ||
579 | # Replace escape sequences | |
580 | val = m.group(0) | |
581 | ||
582 | for ec in '0abefnrtv"\\': | |
583 | val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec]) | |
584 | ||
585 | # Encode | |
586 | data = val.encode(encoding) | |
587 | ||
588 | # Return item | |
0e8e3169 | 589 | return _Str(data, begin_text_loc) |
71aaa3f7 PP |
590 | |
591 | # Patterns for _try_parse_group() | |
592 | _group_prefix_pat = re.compile(r"\(") | |
593 | _group_suffix_pat = re.compile(r"\)") | |
594 | ||
595 | # Tries to parse a group, returning a group item on success. | |
596 | def _try_parse_group(self): | |
0e8e3169 PP |
597 | begin_text_loc = self._text_loc |
598 | ||
71aaa3f7 PP |
599 | # Match prefix |
600 | if self._try_parse_pat(self._group_prefix_pat) is None: | |
601 | # No match | |
602 | return | |
603 | ||
604 | # Parse items | |
605 | items = self._parse_items() | |
606 | ||
607 | # Expect end of group | |
608 | self._skip_ws_and_comments() | |
609 | self._expect_pat( | |
610 | self._group_suffix_pat, "Expecting an item or `)` (end of group)" | |
611 | ) | |
612 | ||
613 | # Return item | |
0e8e3169 | 614 | return _Group(items, begin_text_loc) |
71aaa3f7 PP |
615 | |
616 | # Returns a stripped expression string and an AST expression node | |
617 | # from the expression string `expr_str` at text location `text_loc`. | |
618 | def _ast_expr_from_str(self, expr_str: str, text_loc: TextLoc): | |
619 | # Create an expression node from the expression string | |
620 | expr_str = expr_str.strip().replace("\n", " ") | |
621 | ||
622 | try: | |
623 | expr = ast.parse(expr_str, mode="eval") | |
624 | except SyntaxError: | |
625 | _raise_error( | |
626 | "Invalid expression `{}`: invalid syntax".format(expr_str), | |
627 | text_loc, | |
628 | ) | |
629 | ||
630 | return expr_str, expr | |
631 | ||
632 | # Patterns for _try_parse_val_and_len() | |
633 | _val_expr_pat = re.compile(r"([^}:]+):") | |
634 | _val_len_pat = re.compile(r"\s*(8|16|24|32|40|48|56|64)") | |
635 | ||
636 | # Tries to parse a value and length, returning a value item on | |
637 | # success. | |
638 | def _try_parse_val_and_len(self): | |
639 | begin_text_loc = self._text_loc | |
640 | ||
641 | # Match | |
642 | m_expr = self._try_parse_pat(self._val_expr_pat) | |
643 | ||
644 | if m_expr is None: | |
645 | # No match | |
646 | return | |
647 | ||
648 | # Expect a length | |
649 | m_len = self._expect_pat( | |
650 | self._val_len_pat, "Expecting a length (multiple of eight bits)" | |
651 | ) | |
652 | ||
653 | # Create an expression node from the expression string | |
654 | expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc) | |
655 | ||
656 | # Return item | |
657 | return _Val( | |
658 | expr_str, | |
659 | expr, | |
660 | int(m_len.group(1)), | |
0e8e3169 | 661 | begin_text_loc, |
71aaa3f7 PP |
662 | ) |
663 | ||
664 | # Patterns for _try_parse_val_and_len() | |
665 | _var_pat = re.compile( | |
666 | r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern) | |
667 | ) | |
668 | ||
669 | # Tries to parse a variable, returning a variable item on success. | |
670 | def _try_parse_var(self): | |
671 | begin_text_loc = self._text_loc | |
672 | ||
673 | # Match | |
674 | m = self._try_parse_pat(self._var_pat) | |
675 | ||
676 | if m is None: | |
677 | # No match | |
678 | return | |
679 | ||
680 | # Validate name | |
681 | name = m.group("name") | |
682 | ||
683 | if name == _icitte_name: | |
0e8e3169 PP |
684 | _raise_error( |
685 | "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc | |
686 | ) | |
71aaa3f7 PP |
687 | |
688 | if name in self._label_names: | |
0e8e3169 | 689 | _raise_error("Existing label named `{}`".format(name), begin_text_loc) |
71aaa3f7 PP |
690 | |
691 | # Add to known variable names | |
692 | self._var_names.add(name) | |
693 | ||
694 | # Create an expression node from the expression string | |
695 | expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc) | |
696 | ||
697 | # Return item | |
698 | return _Var( | |
699 | name, | |
700 | expr_str, | |
701 | expr, | |
0e8e3169 | 702 | begin_text_loc, |
71aaa3f7 PP |
703 | ) |
704 | ||
705 | # Pattern for _try_parse_bo_name() | |
706 | _bo_pat = re.compile(r"[bl]e") | |
707 | ||
708 | # Tries to parse a byte order name, returning a byte order item on | |
709 | # success. | |
710 | def _try_parse_bo_name(self): | |
0e8e3169 PP |
711 | begin_text_loc = self._text_loc |
712 | ||
71aaa3f7 PP |
713 | # Match |
714 | m = self._try_parse_pat(self._bo_pat) | |
715 | ||
716 | if m is None: | |
717 | # No match | |
718 | return | |
719 | ||
720 | # Return corresponding item | |
721 | if m.group(0) == "be": | |
0e8e3169 | 722 | return _Bo(ByteOrder.BE, begin_text_loc) |
71aaa3f7 PP |
723 | else: |
724 | assert m.group(0) == "le" | |
0e8e3169 | 725 | return _Bo(ByteOrder.LE, begin_text_loc) |
71aaa3f7 PP |
726 | |
727 | # Patterns for _try_parse_val_or_bo() | |
728 | _val_var_bo_prefix_pat = re.compile(r"\{\s*") | |
729 | _val_var_bo_suffix_pat = re.compile(r"\s*}") | |
730 | ||
731 | # Tries to parse a value, a variable, or a byte order, returning an | |
732 | # item on success. | |
733 | def _try_parse_val_or_var_or_bo(self): | |
734 | # Match prefix | |
735 | if self._try_parse_pat(self._val_var_bo_prefix_pat) is None: | |
736 | # No match | |
737 | return | |
738 | ||
739 | # Variable item? | |
740 | item = self._try_parse_var() | |
741 | ||
742 | if item is None: | |
743 | # Value item? | |
744 | item = self._try_parse_val_and_len() | |
745 | ||
746 | if item is None: | |
747 | # Byte order item? | |
748 | item = self._try_parse_bo_name() | |
749 | ||
750 | if item is None: | |
751 | # At this point it's invalid | |
752 | self._raise_error("Expecting a value, a variable, or a byte order") | |
753 | ||
754 | # Expect suffix | |
755 | self._expect_pat(self._val_var_bo_suffix_pat, "Expecting `}`") | |
756 | return item | |
757 | ||
758 | # Pattern for _try_parse_offset_val() and _try_parse_rep() | |
759 | _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+") | |
760 | ||
761 | # Tries to parse an offset value (after the initial `<`), returning | |
762 | # an offset item on success. | |
763 | def _try_parse_offset_val(self): | |
0e8e3169 PP |
764 | begin_text_loc = self._text_loc |
765 | ||
71aaa3f7 PP |
766 | # Match |
767 | m = self._try_parse_pat(self._pos_const_int_pat) | |
768 | ||
769 | if m is None: | |
770 | # No match | |
771 | return | |
772 | ||
773 | # Return item | |
0e8e3169 | 774 | return _Offset(int(m.group(0), 0), begin_text_loc) |
71aaa3f7 PP |
775 | |
776 | # Tries to parse a label name (after the initial `<`), returning a | |
777 | # label item on success. | |
778 | def _try_parse_label_name(self): | |
0e8e3169 PP |
779 | begin_text_loc = self._text_loc |
780 | ||
71aaa3f7 PP |
781 | # Match |
782 | m = self._try_parse_pat(_py_name_pat) | |
783 | ||
784 | if m is None: | |
785 | # No match | |
786 | return | |
787 | ||
788 | # Validate | |
789 | name = m.group(0) | |
790 | ||
791 | if name == _icitte_name: | |
0e8e3169 PP |
792 | _raise_error( |
793 | "`{}` is a reserved label name".format(_icitte_name), begin_text_loc | |
794 | ) | |
71aaa3f7 PP |
795 | |
796 | if name in self._label_names: | |
0e8e3169 | 797 | _raise_error("Duplicate label name `{}`".format(name), begin_text_loc) |
71aaa3f7 PP |
798 | |
799 | if name in self._var_names: | |
0e8e3169 | 800 | _raise_error("Existing variable named `{}`".format(name), begin_text_loc) |
71aaa3f7 PP |
801 | |
802 | # Add to known label names | |
803 | self._label_names.add(name) | |
804 | ||
805 | # Return item | |
0e8e3169 | 806 | return _Label(name, begin_text_loc) |
71aaa3f7 PP |
807 | |
808 | # Patterns for _try_parse_label_or_offset() | |
809 | _label_offset_prefix_pat = re.compile(r"<\s*") | |
810 | _label_offset_suffix_pat = re.compile(r"\s*>") | |
811 | ||
812 | # Tries to parse a label or an offset, returning an item on success. | |
813 | def _try_parse_label_or_offset(self): | |
814 | # Match prefix | |
815 | if self._try_parse_pat(self._label_offset_prefix_pat) is None: | |
816 | # No match | |
817 | return | |
818 | ||
819 | # Offset item? | |
820 | item = self._try_parse_offset_val() | |
821 | ||
822 | if item is None: | |
823 | # Label item? | |
824 | item = self._try_parse_label_name() | |
825 | ||
826 | if item is None: | |
827 | # At this point it's invalid | |
828 | self._raise_error("Expecting a label name or an offset value") | |
829 | ||
830 | # Expect suffix | |
831 | self._expect_pat(self._label_offset_suffix_pat, "Expecting `>`") | |
832 | return item | |
833 | ||
834 | # Tries to parse a base item (anything except a repetition), | |
835 | # returning it on success. | |
836 | def _try_parse_base_item(self): | |
837 | # Byte item? | |
838 | item = self._try_parse_byte() | |
839 | ||
840 | if item is not None: | |
841 | return item | |
842 | ||
843 | # String item? | |
844 | item = self._try_parse_str() | |
845 | ||
846 | if item is not None: | |
847 | return item | |
848 | ||
849 | # Value, variable, or byte order item? | |
850 | item = self._try_parse_val_or_var_or_bo() | |
851 | ||
852 | if item is not None: | |
853 | return item | |
854 | ||
855 | # Label or offset item? | |
856 | item = self._try_parse_label_or_offset() | |
857 | ||
858 | if item is not None: | |
859 | return item | |
860 | ||
861 | # Group item? | |
862 | item = self._try_parse_group() | |
863 | ||
864 | if item is not None: | |
865 | return item | |
866 | ||
867 | # Pattern for _try_parse_rep() | |
868 | _rep_prefix_pat = re.compile(r"\*\s*") | |
869 | ||
870 | # Tries to parse a repetition, returning the multiplier on success, | |
871 | # or 1 otherwise. | |
872 | def _try_parse_rep(self): | |
71aaa3f7 PP |
873 | # Match prefix |
874 | if self._try_parse_pat(self._rep_prefix_pat) is None: | |
875 | # No match | |
876 | return 1 | |
877 | ||
878 | # Expect and return a decimal multiplier | |
879 | self._skip_ws_and_comments() | |
880 | m = self._expect_pat( | |
881 | self._pos_const_int_pat, "Expecting a positive integral multiplier" | |
882 | ) | |
883 | return int(m.group(0), 0) | |
884 | ||
1ca7b5e1 PP |
885 | # Tries to parse an item, possibly followed by a repetition, |
886 | # returning `True` on success. | |
887 | # | |
888 | # Appends any parsed item to `items`. | |
889 | def _try_append_item(self, items: List[_Item]): | |
71aaa3f7 PP |
890 | self._skip_ws_and_comments() |
891 | ||
892 | # Parse a base item | |
893 | item = self._try_parse_base_item() | |
894 | ||
895 | if item is None: | |
896 | # No item | |
1ca7b5e1 | 897 | return False |
71aaa3f7 PP |
898 | |
899 | # Parse repetition if the base item is repeatable | |
900 | if isinstance(item, _RepableItem): | |
0e8e3169 PP |
901 | self._skip_ws_and_comments() |
902 | rep_text_loc = self._text_loc | |
71aaa3f7 PP |
903 | rep = self._try_parse_rep() |
904 | ||
905 | if rep == 0: | |
1ca7b5e1 PP |
906 | # No item, but that's okay |
907 | return True | |
71aaa3f7 PP |
908 | elif rep > 1: |
909 | # Convert to repetition item | |
0e8e3169 | 910 | item = _Rep(item, rep, rep_text_loc) |
71aaa3f7 | 911 | |
1ca7b5e1 PP |
912 | items.append(item) |
913 | return True | |
71aaa3f7 PP |
914 | |
915 | # Parses and returns items, skipping whitespaces, insignificant | |
916 | # symbols, and comments when allowed, and stopping at the first | |
917 | # unknown character. | |
918 | def _parse_items(self) -> List[_Item]: | |
919 | items = [] # type: List[_Item] | |
920 | ||
921 | while self._isnt_done(): | |
1ca7b5e1 PP |
922 | # Try to append item |
923 | if not self._try_append_item(items): | |
924 | # Unknown at this point | |
925 | break | |
71aaa3f7 PP |
926 | |
927 | return items | |
928 | ||
929 | # Parses the whole Normand input, setting `self._res` to the main | |
930 | # group item on success. | |
931 | def _parse(self): | |
932 | if len(self._normand.strip()) == 0: | |
933 | # Special case to make sure there's something to consume | |
934 | self._res = _Group([], self._text_loc) | |
935 | return | |
936 | ||
937 | # Parse first level items | |
938 | items = self._parse_items() | |
939 | ||
940 | # Make sure there's nothing left | |
941 | self._skip_ws_and_comments() | |
942 | ||
943 | if self._isnt_done(): | |
944 | self._raise_error( | |
945 | "Unexpected character `{}`".format(self._normand[self._at]) | |
946 | ) | |
947 | ||
948 | # Set main group item | |
949 | self._res = _Group(items, self._text_loc) | |
950 | ||
951 | ||
952 | # The return type of parse(). | |
953 | class ParseResult: | |
954 | @classmethod | |
955 | def _create( | |
956 | cls, | |
957 | data: bytearray, | |
958 | variables: VarsT, | |
959 | labels: VarsT, | |
960 | offset: int, | |
961 | bo: Optional[ByteOrder], | |
962 | ): | |
963 | self = cls.__new__(cls) | |
964 | self._init(data, variables, labels, offset, bo) | |
965 | return self | |
966 | ||
967 | def __init__(self, *args, **kwargs): # type: ignore | |
968 | raise NotImplementedError | |
969 | ||
970 | def _init( | |
971 | self, | |
972 | data: bytearray, | |
973 | variables: VarsT, | |
974 | labels: VarsT, | |
975 | offset: int, | |
976 | bo: Optional[ByteOrder], | |
977 | ): | |
978 | self._data = data | |
979 | self._vars = variables | |
980 | self._labels = labels | |
981 | self._offset = offset | |
982 | self._bo = bo | |
983 | ||
984 | # Generated data. | |
985 | @property | |
986 | def data(self): | |
987 | return self._data | |
988 | ||
989 | # Dictionary of updated variable names to their last computed value. | |
990 | @property | |
991 | def variables(self): | |
992 | return self._vars | |
993 | ||
994 | # Dictionary of updated main group label names to their computed | |
995 | # value. | |
996 | @property | |
997 | def labels(self): | |
998 | return self._labels | |
999 | ||
1000 | # Updated offset. | |
1001 | @property | |
1002 | def offset(self): | |
1003 | return self._offset | |
1004 | ||
1005 | # Updated byte order. | |
1006 | @property | |
1007 | def byte_order(self): | |
1008 | return self._bo | |
1009 | ||
1010 | ||
1011 | # Raises a parse error for the item `item`, creating it using the | |
1012 | # message `msg`. | |
1013 | def _raise_error_for_item(msg: str, item: _Item) -> NoReturn: | |
1014 | _raise_error(msg, item.text_loc) | |
1015 | ||
1016 | ||
1017 | # The `ICITTE` reserved name. | |
1018 | _icitte_name = "ICITTE" | |
1019 | ||
1020 | ||
1021 | # Value expression validator. | |
1022 | class _ExprValidator(ast.NodeVisitor): | |
1023 | def __init__(self, item: _ExprItemT, syms: VarsT): | |
1024 | self._item = item | |
1025 | self._syms = syms | |
1026 | self._parent_is_call = False | |
1027 | ||
1028 | def generic_visit(self, node: ast.AST): | |
1029 | if type(node) is ast.Call: | |
1030 | self._parent_is_call = True | |
1031 | elif type(node) is ast.Name and not self._parent_is_call: | |
1032 | # Make sure the name refers to a known label name | |
1033 | if node.id != _icitte_name and node.id not in self._syms: | |
1034 | _raise_error( | |
1035 | "Unknown variable/label name `{}` in expression `{}`".format( | |
1036 | node.id, self._item.expr_str | |
1037 | ), | |
1038 | self._item.text_loc, | |
1039 | ) | |
1040 | ||
1041 | # TODO: Restrict the set of allowed node types | |
1042 | ||
1043 | super().generic_visit(node) | |
1044 | self._parent_is_call = False | |
1045 | ||
1046 | ||
1047 | # Keeper of labels for a given group instance. | |
1048 | # | |
1049 | # A group instance is one iteration of a given group. | |
1050 | class _GroupInstanceLabels: | |
1051 | def __init__(self): | |
1052 | self._instance_labels = {} # type: Dict[_Group, Dict[int, VarsT]] | |
1053 | ||
1054 | # Assigns the labels `labels` to a new instance of `group`. | |
1055 | def add(self, group: _Group, labels: VarsT): | |
1056 | if group not in self._instance_labels: | |
1057 | self._instance_labels[group] = {} | |
1058 | ||
1059 | spec_instance_labels = self._instance_labels[group] | |
1060 | spec_instance_labels[len(spec_instance_labels)] = labels.copy() | |
1061 | ||
1062 | # Returns the labels (not a copy) of the instance `instance_index` | |
1063 | # of the group `group`. | |
1064 | def labels(self, group: _Group, instance_index: int): | |
1065 | return self._instance_labels[group][instance_index] | |
1066 | ||
1067 | ||
1068 | # Generator of data and labels from a group item. | |
1069 | # | |
1070 | # Generation happens in memory at construction time. After building, use | |
1071 | # the `data`, `variables`, `labels`, `offset`, and `bo` properties to | |
1072 | # get the resulting context. | |
1073 | class _Gen: | |
1074 | def __init__( | |
1075 | self, | |
1076 | group: _Group, | |
1077 | variables: VarsT, | |
1078 | labels: VarsT, | |
1079 | offset: int, | |
1080 | bo: Optional[ByteOrder], | |
1081 | ): | |
1082 | self._group_instance_labels = _GroupInstanceLabels() | |
1083 | self._resolve_labels(group, offset, labels.copy()) | |
1084 | self._vars = variables.copy() | |
1085 | self._offset = offset | |
1086 | self._bo = bo | |
1087 | self._main_group = group | |
1088 | self._gen() | |
1089 | ||
1090 | # Generated bytes. | |
1091 | @property | |
1092 | def data(self): | |
1093 | return self._data | |
1094 | ||
1095 | # Updated variables. | |
1096 | @property | |
1097 | def variables(self): | |
1098 | return self._vars | |
1099 | ||
1100 | # Updated main group labels. | |
1101 | @property | |
1102 | def labels(self): | |
1103 | return self._group_instance_labels.labels(self._main_group, 0) | |
1104 | ||
1105 | # Updated offset. | |
1106 | @property | |
1107 | def offset(self): | |
1108 | return self._offset | |
1109 | ||
1110 | # Updated byte order. | |
1111 | @property | |
1112 | def bo(self): | |
1113 | return self._bo | |
1114 | ||
1115 | # Fills `self._group_instance_labels` with the labels for each group | |
1116 | # instance in `item`, starting at current offset `offset` with the | |
1117 | # current labels `labels`. | |
1118 | # | |
1119 | # Returns the new current offset. | |
1120 | def _resolve_labels(self, item: _Item, offset: int, labels: VarsT) -> int: | |
1121 | if type(item) is _Group: | |
1122 | # First pass: compute immediate labels of this instance | |
1123 | group_labels = labels.copy() | |
1124 | group_offset = offset | |
1125 | ||
1126 | for subitem in item.items: | |
1127 | if type(subitem) is _Offset: | |
1128 | group_offset = subitem.val | |
1129 | elif type(subitem) is _Label: | |
1130 | assert subitem.name not in group_labels | |
1131 | group_labels[subitem.name] = group_offset | |
1132 | else: | |
1133 | group_offset += subitem.size | |
1134 | ||
1135 | # Add to group instance labels | |
1136 | self._group_instance_labels.add(item, group_labels) | |
1137 | ||
1138 | # Second pass: handle each item | |
1139 | for subitem in item.items: | |
1140 | offset = self._resolve_labels(subitem, offset, group_labels) | |
1141 | elif type(item) is _Rep: | |
1142 | for _ in range(item.mul): | |
1143 | offset = self._resolve_labels(item.item, offset, labels) | |
1144 | elif type(item) is _Offset: | |
1145 | offset = item.val | |
1146 | else: | |
1147 | offset += item.size | |
1148 | ||
1149 | return offset | |
1150 | ||
1151 | def _handle_byte_item(self, item: _Byte): | |
1152 | self._data.append(item.val) | |
1153 | self._offset += item.size | |
1154 | ||
1155 | def _handle_str_item(self, item: _Str): | |
1156 | self._data += item.data | |
1157 | self._offset += item.size | |
1158 | ||
1159 | def _handle_bo_item(self, item: _Bo): | |
1160 | self._bo = item.bo | |
1161 | ||
1162 | def _eval_expr(self, item: _ExprItemT): | |
1163 | # Get the labels of the current group instance as the initial | |
1164 | # symbols (copied because we're adding stuff). | |
1165 | assert self._cur_group is not None | |
1166 | syms = self._group_instance_labels.labels( | |
1167 | self._cur_group, self._group_instance_indexes[self._cur_group] | |
1168 | ).copy() | |
1169 | ||
1170 | # Set the `ICITTE` name to the current offset (before encoding) | |
1171 | syms[_icitte_name] = self._offset | |
1172 | ||
1173 | # Add the current variables | |
1174 | syms.update(self._vars) | |
1175 | ||
1176 | # Validate the node and its children | |
1177 | _ExprValidator(item, syms).visit(item.expr) | |
1178 | ||
1179 | # Compile and evaluate expression node | |
1180 | try: | |
1181 | val = eval(compile(item.expr, "", "eval"), None, syms) | |
1182 | except Exception as exc: | |
1183 | _raise_error_for_item( | |
1184 | "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc), | |
1185 | item, | |
1186 | ) | |
1187 | ||
1188 | # Validate result | |
1189 | if type(val) is not int: | |
1190 | _raise_error_for_item( | |
1191 | "Invalid expression `{}`: unexpected result type `{}`".format( | |
1192 | item.expr_str, type(val).__name__ | |
1193 | ), | |
1194 | item, | |
1195 | ) | |
1196 | ||
1197 | return val | |
1198 | ||
1199 | def _handle_var_item(self, item: _Var): | |
1200 | # Update variable | |
1201 | self._vars[item.name] = self._eval_expr(item) | |
1202 | ||
1203 | def _handle_val_item(self, item: _Val): | |
1204 | # Compute value | |
1205 | val = self._eval_expr(item) | |
1206 | ||
1207 | # Validate range | |
1208 | if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1: | |
1209 | _raise_error_for_item( | |
1210 | "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format( | |
1211 | val, item.len, item.expr_str, self._offset | |
1212 | ), | |
1213 | item, | |
1214 | ) | |
1215 | ||
1216 | # Encode result on 64 bits (to extend the sign bit whatever the | |
1217 | # value of `item.len`). | |
1218 | if self._bo is None and item.len > 8: | |
1219 | _raise_error_for_item( | |
1220 | "Current byte order isn't defined at first value (`{}`) to encode on more than 8 bits".format( | |
1221 | item.expr_str | |
1222 | ), | |
1223 | item, | |
1224 | ) | |
1225 | ||
1226 | data = struct.pack( | |
1227 | "{}{}".format( | |
1228 | ">" if self._bo in (None, ByteOrder.BE) else "<", | |
1229 | "Q" if val >= 0 else "q", | |
1230 | ), | |
1231 | val, | |
1232 | ) | |
1233 | ||
1234 | # Keep only the requested length | |
1235 | len_bytes = item.len // 8 | |
1236 | ||
1237 | if self._bo in (None, ByteOrder.BE): | |
1238 | # Big endian: keep last bytes | |
1239 | data = data[-len_bytes:] | |
1240 | else: | |
1241 | # Little endian: keep first bytes | |
1242 | assert self._bo == ByteOrder.LE | |
1243 | data = data[:len_bytes] | |
1244 | ||
1245 | # Append to current bytes and update offset | |
1246 | self._data += data | |
1247 | self._offset += len(data) | |
1248 | ||
1249 | def _handle_group_item(self, item: _Group): | |
1250 | # Update the instance index of `item` | |
1251 | if item not in self._group_instance_indexes: | |
1252 | self._group_instance_indexes[item] = 0 | |
1253 | else: | |
1254 | self._group_instance_indexes[item] += 1 | |
1255 | ||
1256 | # Changed current group | |
1257 | old_cur_group = self._cur_group | |
1258 | self._cur_group = item | |
1259 | ||
1260 | # Handle each item | |
1261 | for subitem in item.items: | |
1262 | self._handle_item(subitem) | |
1263 | ||
1264 | # Restore current group | |
1265 | self._cur_group = old_cur_group | |
1266 | ||
1267 | def _handle_rep_item(self, item: _Rep): | |
1268 | for _ in range(item.mul): | |
1269 | self._handle_item(item.item) | |
1270 | ||
1271 | def _handle_offset_item(self, item: _Offset): | |
1272 | self._offset = item.val | |
1273 | ||
1274 | def _handle_item(self, item: _Item): | |
1275 | if type(item) in self._item_handlers: | |
1276 | self._item_handlers[type(item)](item) | |
1277 | ||
1278 | def _gen(self): | |
1279 | # Initial state | |
1280 | self._data = bytearray() | |
1281 | self._group_instance_indexes = {} # type: Dict[_Group, int] | |
1282 | self._cur_group = None | |
1283 | ||
1284 | # Item handlers | |
1285 | self._item_handlers = { | |
1286 | _Byte: self._handle_byte_item, | |
1287 | _Str: self._handle_str_item, | |
1288 | _Bo: self._handle_bo_item, | |
1289 | _Val: self._handle_val_item, | |
1290 | _Var: self._handle_var_item, | |
1291 | _Group: self._handle_group_item, | |
1292 | _Rep: self._handle_rep_item, | |
1293 | _Offset: self._handle_offset_item, | |
1294 | } # type: Dict[type, Callable[[Any], None]] | |
1295 | ||
1296 | # Handle the group item | |
1297 | self._handle_item(self._main_group) | |
1298 | ||
1299 | ||
1300 | # Returns a `ParseResult` instance containing the bytes encoded by the | |
1301 | # input string `normand`. | |
1302 | # | |
1303 | # `init_variables` is a dictionary of initial variable names (valid | |
1304 | # Python names) to integral values. A variable name must not be the | |
1305 | # reserved name `ICITTE`. | |
1306 | # | |
1307 | # `init_labels` is a dictionary of initial label names (valid Python | |
1308 | # names) to integral values. A label name must not be the reserved name | |
1309 | # `ICITTE`. | |
1310 | # | |
1311 | # `init_offset` is the initial offset. | |
1312 | # | |
1313 | # `init_byte_order` is the initial byte order. | |
1314 | # | |
1315 | # Raises `ParseError` on any parsing error. | |
1316 | def parse( | |
1317 | normand: str, | |
1318 | init_variables: Optional[VarsT] = None, | |
1319 | init_labels: Optional[VarsT] = None, | |
1320 | init_offset: int = 0, | |
1321 | init_byte_order: Optional[ByteOrder] = None, | |
1322 | ): | |
1323 | if init_variables is None: | |
1324 | init_variables = {} | |
1325 | ||
1326 | if init_labels is None: | |
1327 | init_labels = {} | |
1328 | ||
1329 | gen = _Gen( | |
1330 | _Parser(normand, init_variables, init_labels).res, | |
1331 | init_variables, | |
1332 | init_labels, | |
1333 | init_offset, | |
1334 | init_byte_order, | |
1335 | ) | |
1336 | return ParseResult._create( # pyright: ignore[reportPrivateUsage] | |
1337 | gen.data, gen.variables, gen.labels, gen.offset, gen.bo | |
1338 | ) | |
1339 | ||
1340 | ||
1341 | # Parses the command-line arguments. | |
1342 | def _parse_cli_args(): | |
1343 | import argparse | |
1344 | ||
1345 | # Build parser | |
1346 | ap = argparse.ArgumentParser() | |
1347 | ap.add_argument( | |
1348 | "--offset", | |
1349 | metavar="OFFSET", | |
1350 | action="store", | |
1351 | type=int, | |
1352 | default=0, | |
1353 | help="initial offset (positive)", | |
1354 | ) | |
1355 | ap.add_argument( | |
1356 | "-b", | |
1357 | "--byte-order", | |
1358 | metavar="BO", | |
1359 | choices=["be", "le"], | |
1360 | type=str, | |
1361 | help="initial byte order (`be` or `le`)", | |
1362 | ) | |
1363 | ap.add_argument( | |
1364 | "--var", | |
1365 | metavar="NAME=VAL", | |
1366 | action="append", | |
1367 | help="add an initial variable (may be repeated)", | |
1368 | ) | |
1369 | ap.add_argument( | |
1370 | "-l", | |
1371 | "--label", | |
1372 | metavar="NAME=VAL", | |
1373 | action="append", | |
1374 | help="add an initial label (may be repeated)", | |
1375 | ) | |
1376 | ap.add_argument( | |
1377 | "--version", action="version", version="Normand {}".format(__version__) | |
1378 | ) | |
1379 | ap.add_argument( | |
1380 | "path", | |
1381 | metavar="PATH", | |
1382 | action="store", | |
1383 | nargs="?", | |
1384 | help="input path (none means standard input)", | |
1385 | ) | |
1386 | ||
1387 | # Parse | |
1388 | return ap.parse_args() | |
1389 | ||
1390 | ||
1391 | # Raises a command-line error with the message `msg`. | |
1392 | def _raise_cli_error(msg: str) -> NoReturn: | |
1393 | raise RuntimeError("Command-line error: {}".format(msg)) | |
1394 | ||
1395 | ||
1396 | # Returns a dictionary of string to integers from the list of strings | |
1397 | # `args` containing `NAME=VAL` entries. | |
1398 | def _dict_from_arg(args: Optional[List[str]]): | |
1399 | d = {} # type: Dict[str, int] | |
1400 | ||
1401 | if args is None: | |
1402 | return d | |
1403 | ||
1404 | for arg in args: | |
1405 | m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg) | |
1406 | ||
1407 | if m is None: | |
1408 | _raise_cli_error("Invalid assignment {}".format(arg)) | |
1409 | ||
1410 | return d | |
1411 | ||
1412 | ||
1413 | # CLI entry point without exception handling. | |
1414 | def _try_run_cli(): | |
1415 | import os.path | |
1416 | ||
1417 | # Parse arguments | |
1418 | args = _parse_cli_args() | |
1419 | ||
1420 | # Read input | |
1421 | if args.path is None: | |
1422 | normand = sys.stdin.read() | |
1423 | else: | |
1424 | with open(args.path) as f: | |
1425 | normand = f.read() | |
1426 | ||
1427 | # Variables and labels | |
1428 | variables = _dict_from_arg(args.var) | |
1429 | labels = _dict_from_arg(args.label) | |
1430 | ||
1431 | # Validate offset | |
1432 | if args.offset < 0: | |
1433 | _raise_cli_error("Invalid negative offset {}") | |
1434 | ||
1435 | # Validate and set byte order | |
1436 | bo = None # type: Optional[ByteOrder] | |
1437 | ||
1438 | if args.byte_order is not None: | |
1439 | if args.byte_order == "be": | |
1440 | bo = ByteOrder.BE | |
1441 | else: | |
1442 | assert args.byte_order == "le" | |
1443 | bo = ByteOrder.LE | |
1444 | ||
1445 | # Parse | |
1446 | try: | |
1447 | res = parse(normand, variables, labels, args.offset, bo) | |
1448 | except ParseError as exc: | |
1449 | prefix = "" | |
1450 | ||
1451 | if args.path is not None: | |
1452 | prefix = "{}:".format(os.path.abspath(args.path)) | |
1453 | ||
1454 | _fail( | |
1455 | "{}{}:{} - {}".format( | |
1456 | prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc) | |
1457 | ) | |
1458 | ) | |
1459 | ||
1460 | ||
1461 | sys.stdout.buffer.write(res.data) | |
1462 | ||
1463 | ||
1464 | # Prints the exception message `msg` and exits with status 1. | |
1465 | def _fail(msg: str) -> NoReturn: | |
1466 | if not msg.endswith("."): | |
1467 | msg += "." | |
1468 | ||
1469 | print(msg, file=sys.stderr) | |
1470 | sys.exit(1) | |
1471 | ||
1472 | ||
1473 | # CLI entry point. | |
1474 | def _run_cli(): | |
1475 | try: | |
1476 | _try_run_cli() | |
1477 | except Exception as exc: | |
1478 | _fail(str(exc)) | |
1479 | ||
1480 | ||
1481 | if __name__ == "__main__": | |
1482 | _run_cli() |