Commit | Line | Data |
---|---|---|
22a9c970 PP |
1 | # SPDX-FileCopyrightText: 2023 Philippe Proulx <eeppeliteloop@gmail.com> |
2 | # SPDX-License-Identifier: MIT | |
3 | # | |
e612fc1e PP |
4 | # The MIT License (MIT) |
5 | # | |
6 | # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com> | |
7 | # | |
8 | # Permission is hereby granted, free of charge, to any person obtaining | |
9 | # a copy of this software and associated documentation files (the | |
10 | # "Software"), to deal in the Software without restriction, including | |
11 | # without limitation the rights to use, copy, modify, merge, publish, | |
12 | # distribute, sublicense, and/or sell copies of the Software, and to | |
13 | # permit persons to whom the Software is furnished to do so, subject to | |
14 | # the following conditions: | |
15 | # | |
16 | # The above copyright notice and this permission notice shall be | |
17 | # included in all copies or substantial portions of the Software. | |
18 | # | |
19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
20 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
21 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
22 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
23 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
24 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
25 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
26 | ||
27 | # This module is the portable Normand processor. It offers both the | |
28 | # parse() function and the command-line tool (run the module itself) | |
29 | # without external dependencies except a `typing` module for Python 3.4. | |
30 | # | |
31 | # Feel free to copy this module file to your own project to use Normand. | |
32 | # | |
33 | # Upstream repository: <https://github.com/efficios/normand>. | |
34 | ||
35 | __author__ = "Philippe Proulx" | |
36 | __version__ = "0.23.0" | |
37 | __all__ = [ | |
38 | "__author__", | |
39 | "__version__", | |
40 | "ByteOrder", | |
41 | "LabelsT", | |
42 | "parse", | |
43 | "ParseError", | |
44 | "ParseErrorMessage", | |
45 | "ParseResult", | |
46 | "TextLocation", | |
47 | "VariablesT", | |
48 | ] | |
49 | ||
50 | import re | |
51 | import abc | |
52 | import ast | |
53 | import bz2 | |
54 | import sys | |
55 | import copy | |
56 | import enum | |
57 | import gzip | |
58 | import math | |
59 | import base64 | |
60 | import quopri | |
61 | import struct | |
62 | import typing | |
63 | import functools | |
64 | from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional | |
65 | ||
66 | ||
67 | # Text location (line and column numbers). | |
68 | class TextLocation: | |
69 | @classmethod | |
70 | def _create(cls, line_no: int, col_no: int): | |
71 | self = cls.__new__(cls) | |
72 | self._init(line_no, col_no) | |
73 | return self | |
74 | ||
75 | def __init__(*args, **kwargs): # type: ignore | |
76 | raise NotImplementedError | |
77 | ||
78 | def _init(self, line_no: int, col_no: int): | |
79 | self._line_no = line_no | |
80 | self._col_no = col_no | |
81 | ||
82 | # Line number. | |
83 | @property | |
84 | def line_no(self): | |
85 | return self._line_no | |
86 | ||
87 | # Column number. | |
88 | @property | |
89 | def col_no(self): | |
90 | return self._col_no | |
91 | ||
92 | def __repr__(self): | |
93 | return "TextLocation({}, {})".format(self._line_no, self._col_no) | |
94 | ||
95 | ||
96 | # Any item. | |
97 | class _Item: | |
98 | def __init__(self, text_loc: TextLocation): | |
99 | self._text_loc = text_loc | |
100 | ||
101 | # Source text location. | |
102 | @property | |
103 | def text_loc(self): | |
104 | return self._text_loc | |
105 | ||
106 | ||
107 | # Scalar item. | |
108 | class _ScalarItem(_Item): | |
109 | # Returns the size, in bytes, of this item. | |
110 | @property | |
111 | @abc.abstractmethod | |
112 | def size(self) -> int: | |
113 | ... | |
114 | ||
115 | ||
116 | # A repeatable item. | |
117 | class _RepableItem: | |
118 | pass | |
119 | ||
120 | ||
121 | # Single byte. | |
122 | class _Byte(_ScalarItem, _RepableItem): | |
123 | def __init__(self, val: int, text_loc: TextLocation): | |
124 | super().__init__(text_loc) | |
125 | self._val = val | |
126 | ||
127 | # Byte value. | |
128 | @property | |
129 | def val(self): | |
130 | return self._val | |
131 | ||
132 | @property | |
133 | def size(self): | |
134 | return 1 | |
135 | ||
136 | def __repr__(self): | |
137 | return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc)) | |
138 | ||
139 | ||
140 | # Literal string. | |
141 | class _LitStr(_ScalarItem, _RepableItem): | |
142 | def __init__(self, data: bytes, text_loc: TextLocation): | |
143 | super().__init__(text_loc) | |
144 | self._data = data | |
145 | ||
146 | # Encoded bytes. | |
147 | @property | |
148 | def data(self): | |
149 | return self._data | |
150 | ||
151 | @property | |
152 | def size(self): | |
153 | return len(self._data) | |
154 | ||
155 | def __repr__(self): | |
156 | return "_LitStr({}, {})".format(repr(self._data), repr(self._text_loc)) | |
157 | ||
158 | ||
159 | # Byte order. | |
160 | @enum.unique | |
161 | class ByteOrder(enum.Enum): | |
162 | # Big endian. | |
163 | BE = "be" | |
164 | ||
165 | # Little endian. | |
166 | LE = "le" | |
167 | ||
168 | ||
169 | # Byte order setting. | |
170 | class _SetBo(_Item): | |
171 | def __init__(self, bo: ByteOrder, text_loc: TextLocation): | |
172 | super().__init__(text_loc) | |
173 | self._bo = bo | |
174 | ||
175 | @property | |
176 | def bo(self): | |
177 | return self._bo | |
178 | ||
179 | def __repr__(self): | |
180 | return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc)) | |
181 | ||
182 | ||
183 | # Label. | |
184 | class _Label(_Item): | |
185 | def __init__(self, name: str, text_loc: TextLocation): | |
186 | super().__init__(text_loc) | |
187 | self._name = name | |
188 | ||
189 | # Label name. | |
190 | @property | |
191 | def name(self): | |
192 | return self._name | |
193 | ||
194 | def __repr__(self): | |
195 | return "_Label({}, {})".format(repr(self._name), repr(self._text_loc)) | |
196 | ||
197 | ||
198 | # Offset setting. | |
199 | class _SetOffset(_Item): | |
200 | def __init__(self, val: int, text_loc: TextLocation): | |
201 | super().__init__(text_loc) | |
202 | self._val = val | |
203 | ||
204 | # Offset value (bytes). | |
205 | @property | |
206 | def val(self): | |
207 | return self._val | |
208 | ||
209 | def __repr__(self): | |
210 | return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc)) | |
211 | ||
212 | ||
213 | # Offset alignment. | |
214 | class _AlignOffset(_Item): | |
215 | def __init__(self, val: int, pad_val: int, text_loc: TextLocation): | |
216 | super().__init__(text_loc) | |
217 | self._val = val | |
218 | self._pad_val = pad_val | |
219 | ||
220 | # Alignment value (bits). | |
221 | @property | |
222 | def val(self): | |
223 | return self._val | |
224 | ||
225 | # Padding byte value. | |
226 | @property | |
227 | def pad_val(self): | |
228 | return self._pad_val | |
229 | ||
230 | def __repr__(self): | |
231 | return "_AlignOffset({}, {}, {})".format( | |
232 | repr(self._val), repr(self._pad_val), repr(self._text_loc) | |
233 | ) | |
234 | ||
235 | ||
236 | # Mixin of containing an AST expression and its string. | |
237 | class _ExprMixin: | |
238 | def __init__(self, expr_str: str, expr: ast.Expression): | |
239 | self._expr_str = expr_str | |
240 | self._expr = expr | |
241 | ||
242 | # Expression string. | |
243 | @property | |
244 | def expr_str(self): | |
245 | return self._expr_str | |
246 | ||
247 | # Expression node to evaluate. | |
248 | @property | |
249 | def expr(self): | |
250 | return self._expr | |
251 | ||
252 | ||
253 | # Fill until some offset. | |
254 | class _FillUntil(_Item, _ExprMixin): | |
255 | def __init__( | |
256 | self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation | |
257 | ): | |
258 | super().__init__(text_loc) | |
259 | _ExprMixin.__init__(self, expr_str, expr) | |
260 | self._pad_val = pad_val | |
261 | ||
262 | # Padding byte value. | |
263 | @property | |
264 | def pad_val(self): | |
265 | return self._pad_val | |
266 | ||
267 | def __repr__(self): | |
268 | return "_FillUntil({}, {}, {}, {})".format( | |
269 | repr(self._expr_str), | |
270 | repr(self._expr), | |
271 | repr(self._pad_val), | |
272 | repr(self._text_loc), | |
273 | ) | |
274 | ||
275 | ||
276 | # Variable assignment. | |
277 | class _VarAssign(_Item, _ExprMixin): | |
278 | def __init__( | |
279 | self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation | |
280 | ): | |
281 | super().__init__(text_loc) | |
282 | _ExprMixin.__init__(self, expr_str, expr) | |
283 | self._name = name | |
284 | ||
285 | # Name. | |
286 | @property | |
287 | def name(self): | |
288 | return self._name | |
289 | ||
290 | def __repr__(self): | |
291 | return "_VarAssign({}, {}, {}, {})".format( | |
292 | repr(self._name), | |
293 | repr(self._expr_str), | |
294 | repr(self._expr), | |
295 | repr(self._text_loc), | |
296 | ) | |
297 | ||
298 | ||
299 | # Fixed-length number, possibly needing more than one byte. | |
300 | class _FlNum(_ScalarItem, _RepableItem, _ExprMixin): | |
301 | def __init__( | |
302 | self, | |
303 | expr_str: str, | |
304 | expr: ast.Expression, | |
305 | len: int, | |
306 | bo: Optional[ByteOrder], | |
307 | text_loc: TextLocation, | |
308 | ): | |
309 | super().__init__(text_loc) | |
310 | _ExprMixin.__init__(self, expr_str, expr) | |
311 | self._len = len | |
312 | self._bo = bo | |
313 | ||
314 | # Length (bits). | |
315 | @property | |
316 | def len(self): | |
317 | return self._len | |
318 | ||
319 | # Byte order override. | |
320 | @property | |
321 | def bo(self): | |
322 | return self._bo | |
323 | ||
324 | @property | |
325 | def size(self): | |
326 | return self._len // 8 | |
327 | ||
328 | def __repr__(self): | |
329 | return "_FlNum({}, {}, {}, {}, {})".format( | |
330 | repr(self._expr_str), | |
331 | repr(self._expr), | |
332 | repr(self._len), | |
333 | repr(self._bo), | |
334 | repr(self._text_loc), | |
335 | ) | |
336 | ||
337 | ||
338 | # LEB128 integer. | |
339 | class _Leb128Int(_Item, _RepableItem, _ExprMixin): | |
340 | def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation): | |
341 | super().__init__(text_loc) | |
342 | _ExprMixin.__init__(self, expr_str, expr) | |
343 | ||
344 | def __repr__(self): | |
345 | return "{}({}, {}, {})".format( | |
346 | self.__class__.__name__, | |
347 | repr(self._expr_str), | |
348 | repr(self._expr), | |
349 | repr(self._text_loc), | |
350 | ) | |
351 | ||
352 | ||
353 | # Unsigned LEB128 integer. | |
354 | class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin): | |
355 | pass | |
356 | ||
357 | ||
358 | # Signed LEB128 integer. | |
359 | class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin): | |
360 | pass | |
361 | ||
362 | ||
363 | # String. | |
364 | class _Str(_Item, _RepableItem, _ExprMixin): | |
365 | def __init__( | |
366 | self, expr_str: str, expr: ast.Expression, codec: str, text_loc: TextLocation | |
367 | ): | |
368 | super().__init__(text_loc) | |
369 | _ExprMixin.__init__(self, expr_str, expr) | |
370 | self._codec = codec | |
371 | ||
372 | # Codec name. | |
373 | @property | |
374 | def codec(self): | |
375 | return self._codec | |
376 | ||
377 | def __repr__(self): | |
378 | return "_Str({}, {}, {}, {})".format( | |
379 | repr(self._expr_str), | |
380 | repr(self._expr), | |
381 | repr(self._codec), | |
382 | repr(self._text_loc), | |
383 | ) | |
384 | ||
385 | ||
386 | # Group of items. | |
387 | class _Group(_Item, _RepableItem): | |
388 | def __init__(self, items: List[_Item], text_loc: TextLocation): | |
389 | super().__init__(text_loc) | |
390 | self._items = items | |
391 | ||
392 | # Contained items. | |
393 | @property | |
394 | def items(self): | |
395 | return self._items | |
396 | ||
397 | def __repr__(self): | |
398 | return "_Group({}, {})".format(repr(self._items), repr(self._text_loc)) | |
399 | ||
400 | ||
401 | # Repetition item. | |
402 | class _Rep(_Group, _ExprMixin): | |
403 | def __init__( | |
404 | self, | |
405 | items: List[_Item], | |
406 | expr_str: str, | |
407 | expr: ast.Expression, | |
408 | text_loc: TextLocation, | |
409 | ): | |
410 | super().__init__(items, text_loc) | |
411 | _ExprMixin.__init__(self, expr_str, expr) | |
412 | ||
413 | def __repr__(self): | |
414 | return "_Rep({}, {}, {}, {})".format( | |
415 | repr(self._items), | |
416 | repr(self._expr_str), | |
417 | repr(self._expr), | |
418 | repr(self._text_loc), | |
419 | ) | |
420 | ||
421 | ||
422 | # Conditional item. | |
423 | class _Cond(_Item, _ExprMixin): | |
424 | def __init__( | |
425 | self, | |
426 | true_item: _Group, | |
427 | false_item: _Group, | |
428 | expr_str: str, | |
429 | expr: ast.Expression, | |
430 | text_loc: TextLocation, | |
431 | ): | |
432 | super().__init__(text_loc) | |
433 | _ExprMixin.__init__(self, expr_str, expr) | |
434 | self._true_item = true_item | |
435 | self._false_item = false_item | |
436 | ||
437 | # Item when condition is true. | |
438 | @property | |
439 | def true_item(self): | |
440 | return self._true_item | |
441 | ||
442 | # Item when condition is false. | |
443 | @property | |
444 | def false_item(self): | |
445 | return self._false_item | |
446 | ||
447 | def __repr__(self): | |
448 | return "_Cond({}, {}, {}, {}, {})".format( | |
449 | repr(self._true_item), | |
450 | repr(self._false_item), | |
451 | repr(self._expr_str), | |
452 | repr(self._expr), | |
453 | repr(self._text_loc), | |
454 | ) | |
455 | ||
456 | ||
457 | # Transformation. | |
458 | class _Trans(_Group, _RepableItem): | |
459 | def __init__( | |
460 | self, | |
461 | items: List[_Item], | |
462 | name: str, | |
463 | func: Callable[[Union[bytes, bytearray]], bytes], | |
464 | text_loc: TextLocation, | |
465 | ): | |
466 | super().__init__(items, text_loc) | |
467 | self._name = name | |
468 | self._func = func | |
469 | ||
470 | @property | |
471 | def name(self): | |
472 | return self._name | |
473 | ||
474 | # Transforms the data `data`. | |
475 | def trans(self, data: Union[bytes, bytearray]): | |
476 | return self._func(data) | |
477 | ||
478 | def __repr__(self): | |
479 | return "_Trans({}, {}, {}, {})".format( | |
480 | repr(self._items), | |
481 | repr(self._name), | |
482 | repr(self._func), | |
483 | repr(self._text_loc), | |
484 | ) | |
485 | ||
486 | ||
487 | # Macro definition item. | |
488 | class _MacroDef(_Group): | |
489 | def __init__( | |
490 | self, | |
491 | name: str, | |
492 | param_names: List[str], | |
493 | items: List[_Item], | |
494 | text_loc: TextLocation, | |
495 | ): | |
496 | super().__init__(items, text_loc) | |
497 | self._name = name | |
498 | self._param_names = param_names | |
499 | ||
500 | # Name. | |
501 | @property | |
502 | def name(self): | |
503 | return self._name | |
504 | ||
505 | # Parameters. | |
506 | @property | |
507 | def param_names(self): | |
508 | return self._param_names | |
509 | ||
510 | def __repr__(self): | |
511 | return "_MacroDef({}, {}, {}, {})".format( | |
512 | repr(self._name), | |
513 | repr(self._param_names), | |
514 | repr(self._items), | |
515 | repr(self._text_loc), | |
516 | ) | |
517 | ||
518 | ||
519 | # Macro expansion parameter. | |
520 | class _MacroExpParam: | |
521 | def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation): | |
522 | self._expr_str = expr_str | |
523 | self._expr = expr | |
524 | self._text_loc = text_loc | |
525 | ||
526 | # Expression string. | |
527 | @property | |
528 | def expr_str(self): | |
529 | return self._expr_str | |
530 | ||
531 | # Expression. | |
532 | @property | |
533 | def expr(self): | |
534 | return self._expr | |
535 | ||
536 | # Source text location. | |
537 | @property | |
538 | def text_loc(self): | |
539 | return self._text_loc | |
540 | ||
541 | def __repr__(self): | |
542 | return "_MacroExpParam({}, {}, {})".format( | |
543 | repr(self._expr_str), repr(self._expr), repr(self._text_loc) | |
544 | ) | |
545 | ||
546 | ||
547 | # Macro expansion item. | |
548 | class _MacroExp(_Item, _RepableItem): | |
549 | def __init__( | |
550 | self, | |
551 | name: str, | |
552 | params: List[_MacroExpParam], | |
553 | text_loc: TextLocation, | |
554 | ): | |
555 | super().__init__(text_loc) | |
556 | self._name = name | |
557 | self._params = params | |
558 | ||
559 | # Name. | |
560 | @property | |
561 | def name(self): | |
562 | return self._name | |
563 | ||
564 | # Parameters. | |
565 | @property | |
566 | def params(self): | |
567 | return self._params | |
568 | ||
569 | def __repr__(self): | |
570 | return "_MacroExp({}, {}, {})".format( | |
571 | repr(self._name), | |
572 | repr(self._params), | |
573 | repr(self._text_loc), | |
574 | ) | |
575 | ||
576 | ||
577 | # A parsing error message: a string and a text location. | |
578 | class ParseErrorMessage: | |
579 | @classmethod | |
580 | def _create(cls, text: str, text_loc: TextLocation): | |
581 | self = cls.__new__(cls) | |
582 | self._init(text, text_loc) | |
583 | return self | |
584 | ||
585 | def __init__(self, *args, **kwargs): # type: ignore | |
586 | raise NotImplementedError | |
587 | ||
588 | def _init(self, text: str, text_loc: TextLocation): | |
589 | self._text = text | |
590 | self._text_loc = text_loc | |
591 | ||
592 | # Message text. | |
593 | @property | |
594 | def text(self): | |
595 | return self._text | |
596 | ||
597 | # Source text location. | |
598 | @property | |
599 | def text_location(self): | |
600 | return self._text_loc | |
601 | ||
602 | ||
603 | # A parsing error containing one or more messages (`ParseErrorMessage`). | |
604 | class ParseError(RuntimeError): | |
605 | @classmethod | |
606 | def _create(cls, msg: str, text_loc: TextLocation): | |
607 | self = cls.__new__(cls) | |
608 | self._init(msg, text_loc) | |
609 | return self | |
610 | ||
611 | def __init__(self, *args, **kwargs): # type: ignore | |
612 | raise NotImplementedError | |
613 | ||
614 | def _init(self, msg: str, text_loc: TextLocation): | |
615 | super().__init__(msg) | |
616 | self._msgs = [] # type: List[ParseErrorMessage] | |
617 | self._add_msg(msg, text_loc) | |
618 | ||
619 | def _add_msg(self, msg: str, text_loc: TextLocation): | |
620 | self._msgs.append( | |
621 | ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage] | |
622 | msg, text_loc | |
623 | ) | |
624 | ) | |
625 | ||
626 | # Parsing error messages. | |
627 | # | |
628 | # The first message is the most specific one. | |
629 | @property | |
630 | def messages(self): | |
631 | return self._msgs | |
632 | ||
633 | ||
634 | # Raises a parsing error, forwarding the parameters to the constructor. | |
635 | def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn: | |
636 | raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage] | |
637 | ||
638 | ||
639 | # Adds a message to the parsing error `exc`. | |
640 | def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation): | |
641 | exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage] | |
642 | ||
643 | ||
644 | # Appends a message to the parsing error `exc` and reraises it. | |
645 | def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn: | |
646 | _add_error_msg(exc, msg, text_loc) | |
647 | raise exc | |
648 | ||
649 | ||
650 | # Returns a normalized version (so as to be parseable by int()) of | |
651 | # the constant integer string `s`, possibly negative, dealing with | |
652 | # any radix suffix. | |
653 | def _norm_const_int(s: str): | |
654 | neg = "" | |
655 | pos = s | |
656 | ||
657 | if s.startswith("-"): | |
658 | neg = "-" | |
659 | pos = s[1:] | |
660 | ||
661 | for r in "xXoObB": | |
662 | if pos.startswith("0" + r): | |
663 | # Already correct | |
664 | return s | |
665 | ||
666 | # Try suffix | |
667 | asm_suf_base = { | |
668 | "h": "x", | |
669 | "H": "x", | |
670 | "q": "o", | |
671 | "Q": "o", | |
672 | "o": "o", | |
673 | "O": "o", | |
674 | "b": "b", | |
675 | "B": "B", | |
676 | } | |
677 | ||
678 | for suf in asm_suf_base: | |
679 | if pos[-1] == suf: | |
680 | s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf)) | |
681 | ||
682 | return s | |
683 | ||
684 | ||
685 | # Encodes the string `s` using the codec `codec`, raising `ParseError` | |
686 | # with `text_loc` on encoding error. | |
687 | def _encode_str(s: str, codec: str, text_loc: TextLocation): | |
688 | try: | |
689 | return s.encode(codec) | |
690 | except UnicodeEncodeError: | |
691 | _raise_error( | |
692 | "Cannot encode `{}` with the `{}` encoding".format(s, codec), text_loc | |
693 | ) | |
694 | ||
695 | ||
696 | # Variables dictionary type (for type hints). | |
697 | VariablesT = Dict[str, Union[int, float, str]] | |
698 | ||
699 | ||
700 | # Labels dictionary type (for type hints). | |
701 | LabelsT = Dict[str, int] | |
702 | ||
703 | ||
704 | # Common patterns. | |
705 | _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") | |
706 | _pos_const_int_pat = re.compile( | |
707 | r"(?:0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+)\b" | |
708 | ) | |
709 | _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern)) | |
710 | _const_float_pat = re.compile( | |
711 | r"[-+]?(?:(?:\d*\.\d+)|(?:\d+\.))(?:[Ee][+-]?\d+)?(?=\W|)" | |
712 | ) | |
713 | ||
714 | ||
715 | # Macro definition dictionary. | |
716 | _MacroDefsT = Dict[str, _MacroDef] | |
717 | ||
718 | ||
719 | # Normand parser. | |
720 | # | |
721 | # The constructor accepts a Normand input. After building, use the `res` | |
722 | # property to get the resulting main group. | |
723 | class _Parser: | |
724 | # Builds a parser to parse the Normand input `normand`, parsing | |
725 | # immediately. | |
726 | def __init__(self, normand: str, variables: VariablesT, labels: LabelsT): | |
727 | self._normand = normand | |
728 | self._at = 0 | |
729 | self._line_no = 1 | |
730 | self._col_no = 1 | |
731 | self._label_names = set(labels.keys()) | |
732 | self._var_names = set(variables.keys()) | |
733 | self._macro_defs = {} # type: _MacroDefsT | |
734 | self._base_item_parse_funcs = [ | |
735 | self._try_parse_byte, | |
736 | self._try_parse_str, | |
737 | self._try_parse_val, | |
738 | self._try_parse_var_assign, | |
739 | self._try_parse_set_bo, | |
740 | self._try_parse_label_or_set_offset, | |
741 | self._try_parse_align_offset, | |
742 | self._try_parse_fill_until, | |
743 | self._try_parse_group, | |
744 | self._try_parse_rep_block, | |
745 | self._try_parse_cond_block, | |
746 | self._try_parse_macro_exp, | |
747 | self._try_parse_trans_block, | |
748 | ] | |
749 | self._parse() | |
750 | ||
751 | # Result (main group). | |
752 | @property | |
753 | def res(self): | |
754 | return self._res | |
755 | ||
756 | # Macro definitions. | |
757 | @property | |
758 | def macro_defs(self): | |
759 | return self._macro_defs | |
760 | ||
761 | # Current text location. | |
762 | @property | |
763 | def _text_loc(self): | |
764 | return TextLocation._create( # pyright: ignore[reportPrivateUsage] | |
765 | self._line_no, self._col_no | |
766 | ) | |
767 | ||
768 | # Returns `True` if this parser is done parsing. | |
769 | def _is_done(self): | |
770 | return self._at == len(self._normand) | |
771 | ||
772 | # Returns `True` if this parser isn't done parsing. | |
773 | def _isnt_done(self): | |
774 | return not self._is_done() | |
775 | ||
776 | # Raises a parse error, creating it using the message `msg` and the | |
777 | # current text location. | |
778 | def _raise_error(self, msg: str) -> NoReturn: | |
779 | _raise_error(msg, self._text_loc) | |
780 | ||
781 | # Tries to make the pattern `pat` match the current substring, | |
782 | # returning the match object and updating `self._at`, | |
783 | # `self._line_no`, and `self._col_no` on success. | |
784 | def _try_parse_pat(self, pat: Pattern[str]): | |
785 | m = pat.match(self._normand, self._at) | |
786 | ||
787 | if m is None: | |
788 | return | |
789 | ||
790 | # Skip matched string | |
791 | self._at += len(m.group(0)) | |
792 | ||
793 | # Update line number | |
794 | self._line_no += m.group(0).count("\n") | |
795 | ||
796 | # Update column number | |
797 | for i in reversed(range(self._at)): | |
798 | if self._normand[i] == "\n" or i == 0: | |
799 | if i == 0: | |
800 | self._col_no = self._at + 1 | |
801 | else: | |
802 | self._col_no = self._at - i | |
803 | ||
804 | break | |
805 | ||
806 | # Return match object | |
807 | return m | |
808 | ||
809 | # Expects the pattern `pat` to match the current substring, | |
810 | # returning the match object and updating `self._at`, | |
811 | # `self._line_no`, and `self._col_no` on success, or raising a parse | |
812 | # error with the message `error_msg` on error. | |
813 | def _expect_pat(self, pat: Pattern[str], error_msg: str): | |
814 | # Match | |
815 | m = self._try_parse_pat(pat) | |
816 | ||
817 | if m is None: | |
818 | # No match: error | |
819 | self._raise_error(error_msg) | |
820 | ||
821 | # Return match object | |
822 | return m | |
823 | ||
824 | # Patterns for _skip_*() | |
825 | _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M) | |
826 | _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M) | |
827 | _ws_or_syms_or_comments_pat = re.compile( | |
828 | r"(?:[\s/\\?&:;.,_=|-]|{})*".format(_comment_pat.pattern), re.M | |
829 | ) | |
830 | ||
831 | # Skips as many whitespaces and comments as possible, but not | |
832 | # insignificant symbol characters. | |
833 | def _skip_ws_and_comments(self): | |
834 | self._try_parse_pat(self._ws_or_comments_pat) | |
835 | ||
836 | # Skips as many whitespaces, insignificant symbol characters, and | |
837 | # comments as possible. | |
838 | def _skip_ws_and_comments_and_syms(self): | |
839 | self._try_parse_pat(self._ws_or_syms_or_comments_pat) | |
840 | ||
841 | # Pattern for _try_parse_hex_byte() | |
842 | _nibble_pat = re.compile(r"[A-Fa-f0-9]") | |
843 | ||
844 | # Tries to parse a hexadecimal byte, returning a byte item on | |
845 | # success. | |
846 | def _try_parse_hex_byte(self): | |
847 | begin_text_loc = self._text_loc | |
848 | ||
849 | # Match initial nibble | |
850 | m_high = self._try_parse_pat(self._nibble_pat) | |
851 | ||
852 | if m_high is None: | |
853 | # No match | |
854 | return | |
855 | ||
856 | # Expect another nibble | |
857 | self._skip_ws_and_comments_and_syms() | |
858 | m_low = self._expect_pat( | |
859 | self._nibble_pat, "Expecting another hexadecimal nibble" | |
860 | ) | |
861 | ||
862 | # Return item | |
863 | return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc) | |
864 | ||
865 | # Patterns for _try_parse_bin_byte() | |
866 | _bin_byte_bit_pat = re.compile(r"[01]") | |
867 | _bin_byte_prefix_pat = re.compile(r"%+") | |
868 | ||
869 | # Tries to parse a binary byte, returning a byte item on success. | |
870 | def _try_parse_bin_byte(self): | |
871 | begin_text_loc = self._text_loc | |
872 | ||
873 | # Match prefix | |
874 | m = self._try_parse_pat(self._bin_byte_prefix_pat) | |
875 | ||
876 | if m is None: | |
877 | # No match | |
878 | return | |
879 | ||
880 | # Expect as many bytes as there are `%` prefixes | |
881 | items = [] # type: List[_Item] | |
882 | ||
883 | for _ in range(len(m.group(0))): | |
884 | self._skip_ws_and_comments_and_syms() | |
885 | byte_text_loc = self._text_loc | |
886 | bits = [] # type: List[str] | |
887 | ||
888 | # Expect eight bits | |
889 | for _ in range(8): | |
890 | self._skip_ws_and_comments_and_syms() | |
891 | m = self._expect_pat( | |
892 | self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)" | |
893 | ) | |
894 | bits.append(m.group(0)) | |
895 | ||
896 | items.append(_Byte(int("".join(bits), 2), byte_text_loc)) | |
897 | ||
898 | # Return item | |
899 | if len(items) == 1: | |
900 | return items[0] | |
901 | ||
902 | # As group | |
903 | return _Group(items, begin_text_loc) | |
904 | ||
905 | # Patterns for _try_parse_dec_byte() | |
906 | _dec_byte_prefix_pat = re.compile(r"\$") | |
907 | _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)") | |
908 | ||
909 | # Tries to parse a decimal byte, returning a byte item on success. | |
910 | def _try_parse_dec_byte(self): | |
911 | begin_text_loc = self._text_loc | |
912 | ||
913 | # Match prefix | |
914 | if self._try_parse_pat(self._dec_byte_prefix_pat) is None: | |
915 | # No match | |
916 | return | |
917 | ||
918 | # Expect the value | |
919 | self._skip_ws_and_comments() | |
920 | m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant") | |
921 | ||
922 | # Compute value | |
923 | val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1) | |
924 | ||
925 | # Validate | |
926 | if val < -128 or val > 255: | |
927 | _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc) | |
928 | ||
929 | # Two's complement | |
930 | val %= 256 | |
931 | ||
932 | # Return item | |
933 | return _Byte(val, begin_text_loc) | |
934 | ||
935 | # Tries to parse a byte, returning a byte item on success. | |
936 | def _try_parse_byte(self): | |
937 | # Hexadecimal | |
938 | item = self._try_parse_hex_byte() | |
939 | ||
940 | if item is not None: | |
941 | return item | |
942 | ||
943 | # Binary | |
944 | item = self._try_parse_bin_byte() | |
945 | ||
946 | if item is not None: | |
947 | return item | |
948 | ||
949 | # Decimal | |
950 | item = self._try_parse_dec_byte() | |
951 | ||
952 | if item is not None: | |
953 | return item | |
954 | ||
955 | # Strings corresponding to escape sequence characters | |
956 | _lit_str_escape_seq_strs = { | |
957 | "0": "\0", | |
958 | "a": "\a", | |
959 | "b": "\b", | |
960 | "e": "\x1b", | |
961 | "f": "\f", | |
962 | "n": "\n", | |
963 | "r": "\r", | |
964 | "t": "\t", | |
965 | "v": "\v", | |
966 | "\\": "\\", | |
967 | '"': '"', | |
968 | } | |
969 | ||
970 | # Patterns for _try_parse_lit_str() | |
971 | _lit_str_prefix_suffix_pat = re.compile(r'"') | |
972 | _lit_str_contents_pat = re.compile(r'(?:(?:\\.)|[^"])*') | |
973 | ||
974 | # Parses a literal string between double quotes (without an encoding | |
975 | # prefix) and returns the resulting string. | |
976 | def _try_parse_lit_str(self, with_prefix: bool): | |
977 | # Match prefix if needed | |
978 | if with_prefix: | |
979 | if self._try_parse_pat(self._lit_str_prefix_suffix_pat) is None: | |
980 | # No match | |
981 | return | |
982 | ||
983 | # Expect literal string | |
984 | m = self._expect_pat(self._lit_str_contents_pat, "Expecting a literal string") | |
985 | ||
986 | # Expect end of string | |
987 | self._expect_pat( | |
988 | self._lit_str_prefix_suffix_pat, 'Expecting `"` (end of literal string)' | |
989 | ) | |
990 | ||
991 | # Replace escape sequences | |
992 | val = m.group(0) | |
993 | ||
994 | for ec in '0abefnrtv"\\': | |
995 | val = val.replace(r"\{}".format(ec), self._lit_str_escape_seq_strs[ec]) | |
996 | ||
997 | # Return string | |
998 | return val | |
999 | ||
1000 | # Patterns for _try_parse_utf_str_encoding() | |
1001 | _str_encoding_utf_prefix_pat = re.compile(r"u") | |
1002 | _str_encoding_utf_pat = re.compile(r"(?:8|(?:(?:16|32)(?:[bl]e)))\b") | |
1003 | ||
1004 | # Tries to parse a UTF encoding specification, returning the Python | |
1005 | # codec name on success. | |
1006 | def _try_parse_utf_str_encoding(self): | |
1007 | # Match prefix | |
1008 | if self._try_parse_pat(self._str_encoding_utf_prefix_pat) is None: | |
1009 | # No match | |
1010 | return | |
1011 | ||
1012 | # Expect UTF specification | |
1013 | m = self._expect_pat( | |
1014 | self._str_encoding_utf_pat, | |
1015 | "Expecting `8`, `16be`, `16le`, `32be` or `32le`", | |
1016 | ) | |
1017 | ||
1018 | # Convert to codec name | |
1019 | return { | |
1020 | "8": "utf_8", | |
1021 | "16be": "utf_16_be", | |
1022 | "16le": "utf_16_le", | |
1023 | "32be": "utf_32_be", | |
1024 | "32le": "utf_32_le", | |
1025 | }[m.group(0)] | |
1026 | ||
1027 | # Patterns for _try_parse_str_encoding() | |
1028 | _str_encoding_gen_prefix_pat = re.compile(r"s") | |
1029 | _str_encoding_colon_pat = re.compile(r":") | |
1030 | _str_encoding_non_utf_pat = re.compile(r"latin(?:[1-9]|10)\b") | |
1031 | ||
1032 | # Tries to parse a string encoding specification, returning the | |
1033 | # Python codec name on success. | |
1034 | # | |
1035 | # Requires the general prefix (`s:`) if `req_gen_prefix` is `True`. | |
1036 | def _try_parse_str_encoding(self, req_gen_prefix: bool = False): | |
1037 | # General prefix? | |
1038 | if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None: | |
1039 | # Expect `:` | |
1040 | self._skip_ws_and_comments() | |
1041 | self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`") | |
1042 | ||
1043 | # Expect encoding specification | |
1044 | self._skip_ws_and_comments() | |
1045 | ||
1046 | # UTF? | |
1047 | codec = self._try_parse_utf_str_encoding() | |
1048 | ||
1049 | if codec is not None: | |
1050 | return codec | |
1051 | ||
1052 | # Expect Latin | |
1053 | m = self._expect_pat( | |
1054 | self._str_encoding_non_utf_pat, | |
1055 | "Expecting `u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`", | |
1056 | ) | |
1057 | return m.group(0) | |
1058 | ||
1059 | # UTF? | |
1060 | if not req_gen_prefix: | |
1061 | return self._try_parse_utf_str_encoding() | |
1062 | ||
1063 | # Patterns for _try_parse_str() | |
1064 | _lit_str_prefix_pat = re.compile(r'"') | |
1065 | _str_prefix_pat = re.compile(r'"|\{') | |
1066 | _str_expr_pat = re.compile(r"[^}]+") | |
1067 | _str_expr_suffix_pat = re.compile(r"\}") | |
1068 | ||
1069 | # Tries to parse a string, returning a literal string or string item | |
1070 | # on success. | |
1071 | def _try_parse_str(self): | |
1072 | begin_text_loc = self._text_loc | |
1073 | ||
1074 | # Encoding | |
1075 | codec = self._try_parse_str_encoding() | |
1076 | ||
1077 | # Match prefix (expect if there's an encoding specification) | |
1078 | self._skip_ws_and_comments() | |
1079 | ||
1080 | if codec is None: | |
1081 | # No encoding: only a literal string (UTF-8) is legal | |
1082 | m_prefix = self._try_parse_pat(self._lit_str_prefix_pat) | |
1083 | ||
1084 | if m_prefix is None: | |
1085 | return | |
1086 | else: | |
1087 | # Encoding present: expect a string prefix | |
1088 | m_prefix = self._expect_pat(self._str_prefix_pat, 'Expecting `"` or `{`') | |
1089 | ||
1090 | # Literal string or expression? | |
1091 | prefix = m_prefix.group(0) | |
1092 | ||
1093 | if prefix == '"': | |
1094 | # Expect literal string | |
1095 | str_text_loc = self._text_loc | |
1096 | val = self._try_parse_lit_str(False) | |
1097 | ||
1098 | if val is None: | |
1099 | self._raise_error("Expecting a literal string") | |
1100 | ||
1101 | # Encode string | |
1102 | data = _encode_str(val, "utf_8" if codec is None else codec, str_text_loc) | |
1103 | ||
1104 | # Return item | |
1105 | return _LitStr(data, begin_text_loc) | |
1106 | else: | |
1107 | # Expect expression | |
1108 | self._skip_ws_and_comments() | |
1109 | expr_text_loc = self._text_loc | |
1110 | m = self._expect_pat(self._str_expr_pat, "Expecting an expression") | |
1111 | ||
1112 | # Expect `}` | |
1113 | self._expect_pat(self._str_expr_suffix_pat, "Expecting `}`") | |
1114 | ||
1115 | # Create an expression node from the expression string | |
1116 | expr_str, expr = self._ast_expr_from_str(m.group(0), expr_text_loc) | |
1117 | ||
1118 | # Return item | |
1119 | assert codec is not None | |
1120 | return _Str(expr_str, expr, codec, begin_text_loc) | |
1121 | ||
1122 | # Common right parenthesis pattern | |
1123 | _right_paren_pat = re.compile(r"\)") | |
1124 | ||
1125 | # Patterns for _try_parse_group() | |
1126 | _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b") | |
1127 | ||
1128 | # Tries to parse a group, returning a group item on success. | |
1129 | def _try_parse_group(self): | |
1130 | begin_text_loc = self._text_loc | |
1131 | ||
1132 | # Match prefix | |
1133 | m_open = self._try_parse_pat(self._group_prefix_pat) | |
1134 | ||
1135 | if m_open is None: | |
1136 | # No match | |
1137 | return | |
1138 | ||
1139 | # Parse items | |
1140 | items = self._parse_items() | |
1141 | ||
1142 | # Expect end of group | |
1143 | self._skip_ws_and_comments_and_syms() | |
1144 | ||
1145 | if m_open.group(0) == "(": | |
1146 | pat = self._right_paren_pat | |
1147 | exp = ")" | |
1148 | else: | |
1149 | pat = self._block_end_pat | |
1150 | exp = "!end" | |
1151 | ||
1152 | self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp)) | |
1153 | ||
1154 | # Return item | |
1155 | return _Group(items, begin_text_loc) | |
1156 | ||
1157 | # Returns a stripped expression string and an AST expression node | |
1158 | # from the expression string `expr_str` at text location `text_loc`. | |
1159 | def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation): | |
1160 | # Create an expression node from the expression string | |
1161 | expr_str = expr_str.strip().replace("\n", " ") | |
1162 | ||
1163 | try: | |
1164 | expr = ast.parse(expr_str, mode="eval") | |
1165 | except SyntaxError: | |
1166 | _raise_error( | |
1167 | "Invalid expression `{}`: invalid syntax".format(expr_str), | |
1168 | text_loc, | |
1169 | ) | |
1170 | ||
1171 | return expr_str, expr | |
1172 | ||
1173 | # Returns a `ByteOrder` value from the _valid_ byte order string | |
1174 | # `bo_str`. | |
1175 | @staticmethod | |
1176 | def _bo_from_str(bo_str: str): | |
1177 | return { | |
1178 | "be": ByteOrder.BE, | |
1179 | "le": ByteOrder.LE, | |
1180 | }[bo_str] | |
1181 | ||
1182 | # Patterns for _try_parse_val() | |
1183 | _val_prefix_pat = re.compile(r"\[") | |
1184 | _val_expr_pat = re.compile(r"([^\]:]+):") | |
1185 | _fl_num_len_fmt_pat = re.compile(r"(?P<len>8|16|24|32|40|48|56|64)(?P<bo>[bl]e)?") | |
1186 | _leb128_int_fmt_pat = re.compile(r"(u|s)leb128") | |
1187 | _val_suffix_pat = re.compile(r"]") | |
1188 | ||
1189 | # Tries to parse a value (number or string) and format (fixed length | |
1190 | # in bits and optional byte order override, `uleb128`, `sleb128`, or | |
1191 | # `s:` followed with an encoding name), returning an item on | |
1192 | # success. | |
1193 | def _try_parse_val(self): | |
1194 | # Match prefix | |
1195 | if self._try_parse_pat(self._val_prefix_pat) is None: | |
1196 | # No match | |
1197 | return | |
1198 | ||
1199 | # Expect expression and `:` | |
1200 | self._skip_ws_and_comments() | |
1201 | expr_text_loc = self._text_loc | |
1202 | m = self._expect_pat(self._val_expr_pat, "Expecting an expression") | |
1203 | ||
1204 | # Create an expression node from the expression string | |
1205 | expr_str, expr = self._ast_expr_from_str(m.group(1), expr_text_loc) | |
1206 | ||
1207 | # Fixed length? | |
1208 | self._skip_ws_and_comments() | |
1209 | m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat) | |
1210 | ||
1211 | if m_fmt is not None: | |
1212 | # Byte order override | |
1213 | if m_fmt.group("bo") is None: | |
1214 | bo = None | |
1215 | else: | |
1216 | bo = self._bo_from_str(m_fmt.group("bo")) | |
1217 | ||
1218 | # Create fixed-length number item | |
1219 | item = _FlNum( | |
1220 | expr_str, | |
1221 | expr, | |
1222 | int(m_fmt.group("len")), | |
1223 | bo, | |
1224 | expr_text_loc, | |
1225 | ) | |
1226 | else: | |
1227 | # LEB128? | |
1228 | m_fmt = self._try_parse_pat(self._leb128_int_fmt_pat) | |
1229 | ||
1230 | if m_fmt is not None: | |
1231 | # Create LEB128 integer item | |
1232 | cls = _ULeb128Int if m_fmt.group(1) == "u" else _SLeb128Int | |
1233 | item = cls(expr_str, expr, expr_text_loc) | |
1234 | else: | |
1235 | # String encoding? | |
1236 | codec = self._try_parse_str_encoding(True) | |
1237 | ||
1238 | if codec is not None: | |
1239 | # Create string item | |
1240 | item = _Str(expr_str, expr, codec, expr_text_loc) | |
1241 | else: | |
1242 | # At this point it's invalid | |
1243 | self._raise_error( | |
1244 | "Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)" | |
1245 | ) | |
1246 | ||
1247 | # Expect `]` | |
1248 | self._skip_ws_and_comments() | |
1249 | m = self._expect_pat(self._val_suffix_pat, "Expecting `]`") | |
1250 | ||
1251 | # Return item | |
1252 | return item | |
1253 | ||
1254 | # Patterns for _try_parse_var_assign() | |
1255 | _var_assign_prefix_pat = re.compile(r"\{") | |
1256 | _var_assign_equal_pat = re.compile(r"=") | |
1257 | _var_assign_expr_pat = re.compile(r"[^}]+") | |
1258 | _var_assign_suffix_pat = re.compile(r"\}") | |
1259 | ||
1260 | # Tries to parse a variable assignment, returning a variable | |
1261 | # assignment item on success. | |
1262 | def _try_parse_var_assign(self): | |
1263 | # Match prefix | |
1264 | if self._try_parse_pat(self._var_assign_prefix_pat) is None: | |
1265 | # No match | |
1266 | return | |
1267 | ||
1268 | # Expect a name | |
1269 | self._skip_ws_and_comments() | |
1270 | name_text_loc = self._text_loc | |
1271 | m = self._expect_pat(_py_name_pat, "Expecting a valid Python name") | |
1272 | name = m.group(0) | |
1273 | ||
1274 | # Expect `=` | |
1275 | self._skip_ws_and_comments() | |
1276 | self._expect_pat(self._var_assign_equal_pat, "Expecting `=`") | |
1277 | ||
1278 | # Expect expression | |
1279 | self._skip_ws_and_comments() | |
1280 | expr_text_loc = self._text_loc | |
1281 | m_expr = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression") | |
1282 | ||
1283 | # Expect `}` | |
1284 | self._skip_ws_and_comments() | |
1285 | self._expect_pat(self._var_assign_suffix_pat, "Expecting `}`") | |
1286 | ||
1287 | # Validate name | |
1288 | if name == _icitte_name: | |
1289 | _raise_error( | |
1290 | "`{}` is a reserved variable name".format(_icitte_name), name_text_loc | |
1291 | ) | |
1292 | ||
1293 | if name in self._label_names: | |
1294 | _raise_error("Existing label named `{}`".format(name), name_text_loc) | |
1295 | ||
1296 | # Create an expression node from the expression string | |
1297 | expr_str, expr = self._ast_expr_from_str(m_expr.group(0), expr_text_loc) | |
1298 | ||
1299 | # Add to known variable names | |
1300 | self._var_names.add(name) | |
1301 | ||
1302 | # Return item | |
1303 | return _VarAssign( | |
1304 | name, | |
1305 | expr_str, | |
1306 | expr, | |
1307 | name_text_loc, | |
1308 | ) | |
1309 | ||
1310 | # Pattern for _try_parse_set_bo() | |
1311 | _set_bo_pat = re.compile(r"!([bl]e)\b") | |
1312 | ||
1313 | # Tries to parse a byte order setting, returning a byte order | |
1314 | # setting item on success. | |
1315 | def _try_parse_set_bo(self): | |
1316 | begin_text_loc = self._text_loc | |
1317 | ||
1318 | # Match | |
1319 | m = self._try_parse_pat(self._set_bo_pat) | |
1320 | ||
1321 | if m is None: | |
1322 | # No match | |
1323 | return | |
1324 | ||
1325 | # Return corresponding item | |
1326 | if m.group(1) == "be": | |
1327 | bo = ByteOrder.BE | |
1328 | else: | |
1329 | assert m.group(1) == "le" | |
1330 | bo = ByteOrder.LE | |
1331 | ||
1332 | return _SetBo(bo, begin_text_loc) | |
1333 | ||
1334 | # Tries to parse an offset setting value (after the initial `<`), | |
1335 | # returning an offset item on success. | |
1336 | def _try_parse_set_offset_val(self): | |
1337 | begin_text_loc = self._text_loc | |
1338 | ||
1339 | # Match | |
1340 | m = self._try_parse_pat(_pos_const_int_pat) | |
1341 | ||
1342 | if m is None: | |
1343 | # No match | |
1344 | return | |
1345 | ||
1346 | # Return item | |
1347 | return _SetOffset(int(_norm_const_int(m.group(0)), 0), begin_text_loc) | |
1348 | ||
1349 | # Tries to parse a label name (after the initial `<`), returning a | |
1350 | # label item on success. | |
1351 | def _try_parse_label_name(self): | |
1352 | begin_text_loc = self._text_loc | |
1353 | ||
1354 | # Match | |
1355 | m = self._try_parse_pat(_py_name_pat) | |
1356 | ||
1357 | if m is None: | |
1358 | # No match | |
1359 | return | |
1360 | ||
1361 | # Validate | |
1362 | name = m.group(0) | |
1363 | ||
1364 | if name == _icitte_name: | |
1365 | _raise_error( | |
1366 | "`{}` is a reserved label name".format(_icitte_name), begin_text_loc | |
1367 | ) | |
1368 | ||
1369 | if name in self._label_names: | |
1370 | _raise_error("Duplicate label name `{}`".format(name), begin_text_loc) | |
1371 | ||
1372 | if name in self._var_names: | |
1373 | _raise_error("Existing variable named `{}`".format(name), begin_text_loc) | |
1374 | ||
1375 | # Add to known label names | |
1376 | self._label_names.add(name) | |
1377 | ||
1378 | # Return item | |
1379 | return _Label(name, begin_text_loc) | |
1380 | ||
1381 | # Patterns for _try_parse_label_or_set_offset() | |
1382 | _label_set_offset_prefix_pat = re.compile(r"<") | |
1383 | _label_set_offset_suffix_pat = re.compile(r">") | |
1384 | ||
1385 | # Tries to parse a label or an offset setting, returning an item on | |
1386 | # success. | |
1387 | def _try_parse_label_or_set_offset(self): | |
1388 | # Match prefix | |
1389 | if self._try_parse_pat(self._label_set_offset_prefix_pat) is None: | |
1390 | # No match | |
1391 | return | |
1392 | ||
1393 | # Offset setting item? | |
1394 | self._skip_ws_and_comments() | |
1395 | item = self._try_parse_set_offset_val() | |
1396 | ||
1397 | if item is None: | |
1398 | # Label item? | |
1399 | item = self._try_parse_label_name() | |
1400 | ||
1401 | if item is None: | |
1402 | # At this point it's invalid | |
1403 | self._raise_error("Expecting a label name or an offset setting value") | |
1404 | ||
1405 | # Expect suffix | |
1406 | self._skip_ws_and_comments() | |
1407 | self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`") | |
1408 | return item | |
1409 | ||
1410 | # Pattern for _parse_pad_val() | |
1411 | _pad_val_prefix_pat = re.compile(r"~") | |
1412 | ||
1413 | # Tries to parse a padding value, returning the padding value, or 0 | |
1414 | # if none. | |
1415 | def _parse_pad_val(self): | |
1416 | # Padding value? | |
1417 | self._skip_ws_and_comments() | |
1418 | pad_val = 0 | |
1419 | ||
1420 | if self._try_parse_pat(self._pad_val_prefix_pat) is not None: | |
1421 | self._skip_ws_and_comments() | |
1422 | pad_val_text_loc = self._text_loc | |
1423 | m = self._expect_pat( | |
1424 | _pos_const_int_pat, | |
1425 | "Expecting a positive constant integer (byte value)", | |
1426 | ) | |
1427 | ||
1428 | # Validate | |
1429 | pad_val = int(_norm_const_int(m.group(0)), 0) | |
1430 | ||
1431 | if pad_val > 255: | |
1432 | _raise_error( | |
1433 | "Invalid padding byte value {}".format(pad_val), | |
1434 | pad_val_text_loc, | |
1435 | ) | |
1436 | ||
1437 | return pad_val | |
1438 | ||
1439 | # Patterns for _try_parse_align_offset() | |
1440 | _align_offset_prefix_pat = re.compile(r"@") | |
1441 | _align_offset_val_pat = re.compile(r"\d+") | |
1442 | ||
1443 | # Tries to parse an offset alignment, returning an offset alignment | |
1444 | # item on success. | |
1445 | def _try_parse_align_offset(self): | |
1446 | begin_text_loc = self._text_loc | |
1447 | ||
1448 | # Match prefix | |
1449 | if self._try_parse_pat(self._align_offset_prefix_pat) is None: | |
1450 | # No match | |
1451 | return | |
1452 | ||
1453 | # Expect an alignment | |
1454 | self._skip_ws_and_comments() | |
1455 | align_text_loc = self._text_loc | |
1456 | m = self._expect_pat( | |
1457 | self._align_offset_val_pat, | |
1458 | "Expecting an alignment (positive multiple of eight bits)", | |
1459 | ) | |
1460 | ||
1461 | # Validate alignment | |
1462 | val = int(m.group(0)) | |
1463 | ||
1464 | if val <= 0 or (val % 8) != 0: | |
1465 | _raise_error( | |
1466 | "Invalid alignment value {} (not a positive multiple of eight)".format( | |
1467 | val | |
1468 | ), | |
1469 | align_text_loc, | |
1470 | ) | |
1471 | ||
1472 | # Padding value | |
1473 | pad_val = self._parse_pad_val() | |
1474 | ||
1475 | # Return item | |
1476 | return _AlignOffset(val, pad_val, begin_text_loc) | |
1477 | ||
1478 | # Patterns for _expect_expr() | |
1479 | _inner_expr_prefix_pat = re.compile(r"\{") | |
1480 | _inner_expr_pat = re.compile(r"[^}]+") | |
1481 | _inner_expr_suffix_pat = re.compile(r"\}") | |
1482 | ||
1483 | # Parses an expression outside a `{`/`}` context. | |
1484 | # | |
1485 | # This function accepts: | |
1486 | # | |
1487 | # • A Python expression within `{` and `}`. | |
1488 | # | |
1489 | # • A Python name. | |
1490 | # | |
1491 | # • If `accept_const_int` is `True`: a constant integer, which may | |
1492 | # be negative if `allow_neg_int` is `True`. | |
1493 | # | |
1494 | # • If `accept_float` is `True`: a constant floating point number. | |
1495 | # | |
1496 | # Returns the stripped expression string and AST expression. | |
1497 | def _expect_expr( | |
1498 | self, | |
1499 | accept_const_int: bool = False, | |
1500 | allow_neg_int: bool = False, | |
1501 | accept_const_float: bool = False, | |
1502 | accept_lit_str: bool = False, | |
1503 | ): | |
1504 | begin_text_loc = self._text_loc | |
1505 | ||
1506 | # Constant floating point number? | |
1507 | if accept_const_float: | |
1508 | m = self._try_parse_pat(_const_float_pat) | |
1509 | ||
1510 | if m is not None: | |
1511 | return self._ast_expr_from_str(m.group(0), begin_text_loc) | |
1512 | ||
1513 | # Constant integer? | |
1514 | if accept_const_int: | |
1515 | m = self._try_parse_pat(_const_int_pat) | |
1516 | ||
1517 | if m is not None: | |
1518 | # Negative and allowed? | |
1519 | if m.group("neg") == "-" and not allow_neg_int: | |
1520 | _raise_error( | |
1521 | "Expecting a positive constant integer", begin_text_loc | |
1522 | ) | |
1523 | ||
1524 | expr_str = _norm_const_int(m.group(0)) | |
1525 | return self._ast_expr_from_str(expr_str, begin_text_loc) | |
1526 | ||
1527 | # Name? | |
1528 | m = self._try_parse_pat(_py_name_pat) | |
1529 | ||
1530 | if m is not None: | |
1531 | return self._ast_expr_from_str(m.group(0), begin_text_loc) | |
1532 | ||
1533 | # Literal string | |
1534 | if accept_lit_str: | |
1535 | val = self._try_parse_lit_str(True) | |
1536 | ||
1537 | if val is not None: | |
1538 | return self._ast_expr_from_str(repr(val), begin_text_loc) | |
1539 | ||
1540 | # Expect `{` | |
1541 | msg_accepted_parts = ["a name", "or `{`"] | |
1542 | ||
1543 | if accept_lit_str: | |
1544 | msg_accepted_parts.insert(0, "a literal string") | |
1545 | ||
1546 | if accept_const_float: | |
1547 | msg_accepted_parts.insert(0, "a constant floating point number") | |
1548 | ||
1549 | if accept_const_int: | |
1550 | msg_pos = "" if allow_neg_int else "positive " | |
1551 | msg_accepted_parts.insert(0, "a {}constant integer".format(msg_pos)) | |
1552 | ||
1553 | if len(msg_accepted_parts) == 2: | |
1554 | msg_accepted = " ".join(msg_accepted_parts) | |
1555 | else: | |
1556 | msg_accepted = ", ".join(msg_accepted_parts) | |
1557 | ||
1558 | self._expect_pat( | |
1559 | self._inner_expr_prefix_pat, | |
1560 | "Expecting {}".format(msg_accepted), | |
1561 | ) | |
1562 | ||
1563 | # Expect an expression | |
1564 | self._skip_ws_and_comments() | |
1565 | expr_text_loc = self._text_loc | |
1566 | m = self._expect_pat(self._inner_expr_pat, "Expecting an expression") | |
1567 | expr_str = m.group(0) | |
1568 | ||
1569 | # Expect `}` | |
1570 | self._skip_ws_and_comments() | |
1571 | self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`") | |
1572 | ||
1573 | return self._ast_expr_from_str(expr_str, expr_text_loc) | |
1574 | ||
1575 | # Patterns for _try_parse_fill_until() | |
1576 | _fill_until_prefix_pat = re.compile(r"\+") | |
1577 | _fill_until_pad_val_prefix_pat = re.compile(r"~") | |
1578 | ||
1579 | # Tries to parse a filling, returning a filling item on success. | |
1580 | def _try_parse_fill_until(self): | |
1581 | begin_text_loc = self._text_loc | |
1582 | ||
1583 | # Match prefix | |
1584 | if self._try_parse_pat(self._fill_until_prefix_pat) is None: | |
1585 | # No match | |
1586 | return | |
1587 | ||
1588 | # Expect expression | |
1589 | self._skip_ws_and_comments() | |
1590 | expr_str, expr = self._expect_expr(accept_const_int=True) | |
1591 | ||
1592 | # Padding value | |
1593 | pad_val = self._parse_pad_val() | |
1594 | ||
1595 | # Return item | |
1596 | return _FillUntil(expr_str, expr, pad_val, begin_text_loc) | |
1597 | ||
1598 | # Parses the multiplier expression of a repetition (block or | |
1599 | # post-item) and returns the expression string and AST node. | |
1600 | def _expect_rep_mul_expr(self): | |
1601 | return self._expect_expr(accept_const_int=True) | |
1602 | ||
1603 | # Common block end pattern | |
1604 | _block_end_pat = re.compile(r"!end\b") | |
1605 | ||
1606 | # Pattern for _try_parse_rep_block() | |
1607 | _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b") | |
1608 | ||
1609 | # Tries to parse a repetition block, returning a repetition item on | |
1610 | # success. | |
1611 | def _try_parse_rep_block(self): | |
1612 | begin_text_loc = self._text_loc | |
1613 | ||
1614 | # Match prefix | |
1615 | if self._try_parse_pat(self._rep_block_prefix_pat) is None: | |
1616 | # No match | |
1617 | return | |
1618 | ||
1619 | # Expect expression | |
1620 | self._skip_ws_and_comments() | |
1621 | expr_str, expr = self._expect_rep_mul_expr() | |
1622 | ||
1623 | # Parse items | |
1624 | self._skip_ws_and_comments_and_syms() | |
1625 | items = self._parse_items() | |
1626 | ||
1627 | # Expect end of block | |
1628 | self._skip_ws_and_comments_and_syms() | |
1629 | self._expect_pat( | |
1630 | self._block_end_pat, "Expecting an item or `!end` (end of repetition block)" | |
1631 | ) | |
1632 | ||
1633 | # Return item | |
1634 | return _Rep(items, expr_str, expr, begin_text_loc) | |
1635 | ||
1636 | # Pattern for _try_parse_cond_block() | |
1637 | _cond_block_prefix_pat = re.compile(r"!if\b") | |
1638 | _cond_block_else_pat = re.compile(r"!else\b") | |
1639 | ||
1640 | # Tries to parse a conditional block, returning a conditional item | |
1641 | # on success. | |
1642 | def _try_parse_cond_block(self): | |
1643 | begin_text_loc = self._text_loc | |
1644 | ||
1645 | # Match prefix | |
1646 | if self._try_parse_pat(self._cond_block_prefix_pat) is None: | |
1647 | # No match | |
1648 | return | |
1649 | ||
1650 | # Expect expression | |
1651 | self._skip_ws_and_comments() | |
1652 | expr_str, expr = self._expect_expr() | |
1653 | ||
1654 | # Parse "true" items | |
1655 | self._skip_ws_and_comments_and_syms() | |
1656 | true_items_text_loc = self._text_loc | |
1657 | true_items = self._parse_items() | |
1658 | false_items = [] # type: List[_Item] | |
1659 | false_items_text_loc = begin_text_loc | |
1660 | ||
1661 | # `!else`? | |
1662 | self._skip_ws_and_comments_and_syms() | |
1663 | ||
1664 | if self._try_parse_pat(self._cond_block_else_pat) is not None: | |
1665 | # Parse "false" items | |
1666 | self._skip_ws_and_comments_and_syms() | |
1667 | false_items_text_loc = self._text_loc | |
1668 | false_items = self._parse_items() | |
1669 | ||
1670 | # Expect end of block | |
1671 | self._expect_pat( | |
1672 | self._block_end_pat, | |
1673 | "Expecting an item, `!else`, or `!end` (end of conditional block)", | |
1674 | ) | |
1675 | ||
1676 | # Return item | |
1677 | return _Cond( | |
1678 | _Group(true_items, true_items_text_loc), | |
1679 | _Group(false_items, false_items_text_loc), | |
1680 | expr_str, | |
1681 | expr, | |
1682 | begin_text_loc, | |
1683 | ) | |
1684 | ||
1685 | # Pattern for _try_parse_trans_block() | |
1686 | _trans_block_prefix_pat = re.compile(r"!t(?:ransform)?\b") | |
1687 | _trans_block_type_pat = re.compile( | |
1688 | r"(?:(?:base|b)64(?:u)?|(?:base|b)(?:16|32)|(?:ascii|a|base|b)85(?:p)?|(?:quopri|qp)(?:t)?|gzip|gz|bzip2|bz2)\b" | |
1689 | ) | |
1690 | ||
1691 | # Tries to parse a transformation block, returning a transformation | |
1692 | # block item on success. | |
1693 | def _try_parse_trans_block(self): | |
1694 | begin_text_loc = self._text_loc | |
1695 | ||
1696 | # Match prefix | |
1697 | if self._try_parse_pat(self._trans_block_prefix_pat) is None: | |
1698 | # No match | |
1699 | return | |
1700 | ||
1701 | # Expect type | |
1702 | self._skip_ws_and_comments() | |
1703 | m = self._expect_pat( | |
1704 | self._trans_block_type_pat, "Expecting a known transformation type" | |
1705 | ) | |
1706 | ||
1707 | # Parse items | |
1708 | self._skip_ws_and_comments_and_syms() | |
1709 | items = self._parse_items() | |
1710 | ||
1711 | # Expect end of block | |
1712 | self._expect_pat( | |
1713 | self._block_end_pat, | |
1714 | "Expecting an item or `!end` (end of transformation block)", | |
1715 | ) | |
1716 | ||
1717 | # Choose encoding function | |
1718 | enc = m.group(0) | |
1719 | ||
1720 | if enc in ("base64", "b64"): | |
1721 | func = base64.standard_b64encode | |
1722 | name = "standard Base64" | |
1723 | elif enc in ("base64u", "b64u"): | |
1724 | func = base64.urlsafe_b64encode | |
1725 | name = "URL-safe Base64" | |
1726 | elif enc in ("base32", "b32"): | |
1727 | func = base64.b32encode | |
1728 | name = "Base32" | |
1729 | elif enc in ("base16", "b16"): | |
1730 | func = base64.b16encode | |
1731 | name = "Base16" | |
1732 | elif enc in ("ascii85", "a85"): | |
1733 | func = base64.a85encode | |
1734 | name = "Ascii85" | |
1735 | elif enc in ("ascii85p", "a85p"): | |
1736 | func = functools.partial(base64.a85encode, pad=True) | |
1737 | name = "padded Ascii85" | |
1738 | elif enc in ("base85", "b85"): | |
1739 | func = base64.b85encode | |
1740 | name = "Base85" | |
1741 | elif enc in ("base85p", "b85p"): | |
1742 | func = functools.partial(base64.b85encode, pad=True) | |
1743 | name = "padded Base85" | |
1744 | elif enc in ("quopri", "qp"): | |
1745 | func = quopri.encodestring | |
1746 | name = "MIME quoted-printable" | |
1747 | elif enc in ("quoprit", "qpt"): | |
1748 | func = functools.partial(quopri.encodestring, quotetabs=True) | |
1749 | name = "MIME quoted-printable (with quoted tabs)" | |
1750 | elif enc in ("gzip", "gz"): | |
1751 | func = gzip.compress | |
1752 | name = "gzip" | |
1753 | else: | |
1754 | assert enc in ("bzip2", "bz2") | |
1755 | func = bz2.compress | |
1756 | name = "bzip2" | |
1757 | ||
1758 | # Return item | |
1759 | return _Trans( | |
1760 | items, | |
1761 | name, | |
1762 | func, | |
1763 | begin_text_loc, | |
1764 | ) | |
1765 | ||
1766 | # Common left parenthesis pattern | |
1767 | _left_paren_pat = re.compile(r"\(") | |
1768 | ||
1769 | # Patterns for _try_parse_macro_def() and _try_parse_macro_exp() | |
1770 | _macro_params_comma_pat = re.compile(",") | |
1771 | ||
1772 | # Patterns for _try_parse_macro_def() | |
1773 | _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b") | |
1774 | ||
1775 | # Tries to parse a macro definition, adding it to `self._macro_defs` | |
1776 | # and returning `True` on success. | |
1777 | def _try_parse_macro_def(self): | |
1778 | begin_text_loc = self._text_loc | |
1779 | ||
1780 | # Match prefix | |
1781 | if self._try_parse_pat(self._macro_def_prefix_pat) is None: | |
1782 | # No match | |
1783 | return False | |
1784 | ||
1785 | # Expect a name | |
1786 | self._skip_ws_and_comments() | |
1787 | name_text_loc = self._text_loc | |
1788 | m = self._expect_pat(_py_name_pat, "Expecting a valid macro name") | |
1789 | ||
1790 | # Validate name | |
1791 | name = m.group(0) | |
1792 | ||
1793 | if name in self._macro_defs: | |
1794 | _raise_error("Duplicate macro named `{}`".format(name), name_text_loc) | |
1795 | ||
1796 | # Expect `(` | |
1797 | self._skip_ws_and_comments() | |
1798 | self._expect_pat(self._left_paren_pat, "Expecting `(`") | |
1799 | ||
1800 | # Try to parse comma-separated parameter names | |
1801 | param_names = [] # type: List[str] | |
1802 | expect_comma = False | |
1803 | ||
1804 | while True: | |
1805 | self._skip_ws_and_comments() | |
1806 | ||
1807 | # End? | |
1808 | if self._try_parse_pat(self._right_paren_pat) is not None: | |
1809 | # End | |
1810 | break | |
1811 | ||
1812 | # Comma? | |
1813 | if expect_comma: | |
1814 | self._expect_pat(self._macro_params_comma_pat, "Expecting `,`") | |
1815 | ||
1816 | # Expect parameter name | |
1817 | self._skip_ws_and_comments() | |
1818 | param_text_loc = self._text_loc | |
1819 | m = self._expect_pat(_py_name_pat, "Expecting valid parameter name") | |
1820 | ||
1821 | if m.group(0) in param_names: | |
1822 | _raise_error( | |
1823 | "Duplicate macro parameter named `{}`".format(m.group(0)), | |
1824 | param_text_loc, | |
1825 | ) | |
1826 | ||
1827 | param_names.append(m.group(0)) | |
1828 | expect_comma = True | |
1829 | ||
1830 | # Expect items | |
1831 | self._skip_ws_and_comments_and_syms() | |
1832 | old_var_names = self._var_names.copy() | |
1833 | old_label_names = self._label_names.copy() | |
1834 | self._var_names = set() # type: Set[str] | |
1835 | self._label_names = set() # type: Set[str] | |
1836 | items = self._parse_items() | |
1837 | self._var_names = old_var_names | |
1838 | self._label_names = old_label_names | |
1839 | ||
1840 | # Expect suffix | |
1841 | self._expect_pat( | |
1842 | self._block_end_pat, "Expecting an item or `!end` (end of macro block)" | |
1843 | ) | |
1844 | ||
1845 | # Register macro | |
1846 | self._macro_defs[name] = _MacroDef(name, param_names, items, begin_text_loc) | |
1847 | ||
1848 | return True | |
1849 | ||
1850 | # Patterns for _try_parse_macro_exp() | |
1851 | _macro_exp_prefix_pat = re.compile(r"m\b") | |
1852 | _macro_exp_colon_pat = re.compile(r":") | |
1853 | ||
1854 | # Tries to parse a macro expansion, returning a macro expansion item | |
1855 | # on success. | |
1856 | def _try_parse_macro_exp(self): | |
1857 | begin_text_loc = self._text_loc | |
1858 | ||
1859 | # Match prefix | |
1860 | if self._try_parse_pat(self._macro_exp_prefix_pat) is None: | |
1861 | # No match | |
1862 | return | |
1863 | ||
1864 | # Expect `:` | |
1865 | self._skip_ws_and_comments() | |
1866 | self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`") | |
1867 | ||
1868 | # Expect a macro name | |
1869 | self._skip_ws_and_comments() | |
1870 | name_text_loc = self._text_loc | |
1871 | m = self._expect_pat(_py_name_pat, "Expecting a valid macro name") | |
1872 | ||
1873 | # Validate name | |
1874 | name = m.group(0) | |
1875 | macro_def = self._macro_defs.get(name) | |
1876 | ||
1877 | if macro_def is None: | |
1878 | _raise_error("Unknown macro name `{}`".format(name), name_text_loc) | |
1879 | ||
1880 | # Expect `(` | |
1881 | self._skip_ws_and_comments() | |
1882 | self._expect_pat(self._left_paren_pat, "Expecting `(`") | |
1883 | ||
1884 | # Try to parse comma-separated parameter values | |
1885 | params_text_loc = self._text_loc | |
1886 | params = [] # type: List[_MacroExpParam] | |
1887 | expect_comma = False | |
1888 | ||
1889 | while True: | |
1890 | self._skip_ws_and_comments() | |
1891 | ||
1892 | # End? | |
1893 | if self._try_parse_pat(self._right_paren_pat) is not None: | |
1894 | # End | |
1895 | break | |
1896 | ||
1897 | # Expect a value | |
1898 | if expect_comma: | |
1899 | self._expect_pat(self._macro_params_comma_pat, "Expecting `,`") | |
1900 | ||
1901 | self._skip_ws_and_comments() | |
1902 | param_text_loc = self._text_loc | |
1903 | params.append( | |
1904 | _MacroExpParam( | |
1905 | *self._expect_expr( | |
1906 | accept_const_int=True, | |
1907 | allow_neg_int=True, | |
1908 | accept_const_float=True, | |
1909 | accept_lit_str=True, | |
1910 | ), | |
1911 | text_loc=param_text_loc | |
1912 | ) | |
1913 | ) | |
1914 | expect_comma = True | |
1915 | ||
1916 | # Validate parameter values | |
1917 | if len(params) != len(macro_def.param_names): | |
1918 | sing_plur = "" if len(params) == 1 else "s" | |
1919 | _raise_error( | |
1920 | "Macro expansion passes {} parameter{} while the definition expects {}".format( | |
1921 | len(params), sing_plur, len(macro_def.param_names) | |
1922 | ), | |
1923 | params_text_loc, | |
1924 | ) | |
1925 | ||
1926 | # Return item | |
1927 | return _MacroExp(name, params, begin_text_loc) | |
1928 | ||
1929 | # Tries to parse a base item (anything except a post-item | |
1930 | # repetition), returning it on success. | |
1931 | def _try_parse_base_item(self): | |
1932 | for func in self._base_item_parse_funcs: | |
1933 | item = func() | |
1934 | ||
1935 | if item is not None: | |
1936 | return item | |
1937 | ||
1938 | # Pattern for _try_parse_rep_post() | |
1939 | _rep_post_prefix_pat = re.compile(r"\*") | |
1940 | ||
1941 | # Tries to parse a post-item repetition, returning the expression | |
1942 | # string and AST expression node on success. | |
1943 | def _try_parse_rep_post(self): | |
1944 | # Match prefix | |
1945 | if self._try_parse_pat(self._rep_post_prefix_pat) is None: | |
1946 | # No match | |
1947 | return | |
1948 | ||
1949 | # Return expression string and AST expression | |
1950 | self._skip_ws_and_comments() | |
1951 | return self._expect_rep_mul_expr() | |
1952 | ||
1953 | # Tries to parse an item, possibly followed by a repetition, | |
1954 | # returning `True` on success. | |
1955 | # | |
1956 | # Appends any parsed item to `items`. | |
1957 | def _try_append_item(self, items: List[_Item]): | |
1958 | self._skip_ws_and_comments_and_syms() | |
1959 | ||
1960 | # Base item | |
1961 | item = self._try_parse_base_item() | |
1962 | ||
1963 | if item is None: | |
1964 | return | |
1965 | ||
1966 | # Parse repetition if the base item is repeatable | |
1967 | if isinstance(item, _RepableItem): | |
1968 | self._skip_ws_and_comments() | |
1969 | rep_text_loc = self._text_loc | |
1970 | rep_ret = self._try_parse_rep_post() | |
1971 | ||
1972 | if rep_ret is not None: | |
1973 | item = _Rep([item], *rep_ret, text_loc=rep_text_loc) | |
1974 | ||
1975 | items.append(item) | |
1976 | return True | |
1977 | ||
1978 | # Parses and returns items, skipping whitespaces, insignificant | |
1979 | # symbols, and comments when allowed, and stopping at the first | |
1980 | # unknown character. | |
1981 | # | |
1982 | # Accepts and registers macro definitions if `accept_macro_defs` | |
1983 | # is `True`. | |
1984 | def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]: | |
1985 | items = [] # type: List[_Item] | |
1986 | ||
1987 | while self._isnt_done(): | |
1988 | # Try to append item | |
1989 | if not self._try_append_item(items): | |
1990 | if accept_macro_defs and self._try_parse_macro_def(): | |
1991 | continue | |
1992 | ||
1993 | # Unknown at this point | |
1994 | break | |
1995 | ||
1996 | return items | |
1997 | ||
1998 | # Parses the whole Normand input, setting `self._res` to the main | |
1999 | # group item on success. | |
2000 | def _parse(self): | |
2001 | if len(self._normand.strip()) == 0: | |
2002 | # Special case to make sure there's something to consume | |
2003 | self._res = _Group([], self._text_loc) | |
2004 | return | |
2005 | ||
2006 | # Parse first level items | |
2007 | items = self._parse_items(True) | |
2008 | ||
2009 | # Make sure there's nothing left | |
2010 | self._skip_ws_and_comments_and_syms() | |
2011 | ||
2012 | if self._isnt_done(): | |
2013 | self._raise_error( | |
2014 | "Unexpected character `{}`".format(self._normand[self._at]) | |
2015 | ) | |
2016 | ||
2017 | # Set main group item | |
2018 | self._res = _Group(items, self._text_loc) | |
2019 | ||
2020 | ||
2021 | # The return type of parse(). | |
2022 | class ParseResult: | |
2023 | @classmethod | |
2024 | def _create( | |
2025 | cls, | |
2026 | data: bytearray, | |
2027 | variables: VariablesT, | |
2028 | labels: LabelsT, | |
2029 | offset: int, | |
2030 | bo: Optional[ByteOrder], | |
2031 | ): | |
2032 | self = cls.__new__(cls) | |
2033 | self._init(data, variables, labels, offset, bo) | |
2034 | return self | |
2035 | ||
2036 | def __init__(self, *args, **kwargs): # type: ignore | |
2037 | raise NotImplementedError | |
2038 | ||
2039 | def _init( | |
2040 | self, | |
2041 | data: bytearray, | |
2042 | variables: VariablesT, | |
2043 | labels: LabelsT, | |
2044 | offset: int, | |
2045 | bo: Optional[ByteOrder], | |
2046 | ): | |
2047 | self._data = data | |
2048 | self._vars = variables | |
2049 | self._labels = labels | |
2050 | self._offset = offset | |
2051 | self._bo = bo | |
2052 | ||
2053 | # Generated data. | |
2054 | @property | |
2055 | def data(self): | |
2056 | return self._data | |
2057 | ||
2058 | # Dictionary of updated variable names to their last computed value. | |
2059 | @property | |
2060 | def variables(self): | |
2061 | return self._vars | |
2062 | ||
2063 | # Dictionary of updated main group label names to their computed | |
2064 | # value. | |
2065 | @property | |
2066 | def labels(self): | |
2067 | return self._labels | |
2068 | ||
2069 | # Updated offset. | |
2070 | @property | |
2071 | def offset(self): | |
2072 | return self._offset | |
2073 | ||
2074 | # Updated byte order. | |
2075 | @property | |
2076 | def byte_order(self): | |
2077 | return self._bo | |
2078 | ||
2079 | ||
2080 | # Raises a parse error for the item `item`, creating it using the | |
2081 | # message `msg`. | |
2082 | def _raise_error_for_item(msg: str, item: _Item) -> NoReturn: | |
2083 | _raise_error(msg, item.text_loc) | |
2084 | ||
2085 | ||
2086 | # The `ICITTE` reserved name. | |
2087 | _icitte_name = "ICITTE" | |
2088 | ||
2089 | ||
2090 | # Base node visitor. | |
2091 | # | |
2092 | # Calls the _visit_name() method for each name node which isn't the name | |
2093 | # of a call. | |
2094 | class _NodeVisitor(ast.NodeVisitor): | |
2095 | def __init__(self): | |
2096 | self._parent_is_call = False | |
2097 | ||
2098 | def generic_visit(self, node: ast.AST): | |
2099 | if type(node) is ast.Call: | |
2100 | self._parent_is_call = True | |
2101 | elif type(node) is ast.Name and not self._parent_is_call: | |
2102 | self._visit_name(node.id) | |
2103 | ||
2104 | super().generic_visit(node) | |
2105 | self._parent_is_call = False | |
2106 | ||
2107 | @abc.abstractmethod | |
2108 | def _visit_name(self, name: str): | |
2109 | ... | |
2110 | ||
2111 | ||
2112 | # Expression validator: validates that all the names within the | |
2113 | # expression are allowed. | |
2114 | class _ExprValidator(_NodeVisitor): | |
2115 | def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]): | |
2116 | super().__init__() | |
2117 | self._expr_str = expr_str | |
2118 | self._text_loc = text_loc | |
2119 | self._allowed_names = allowed_names | |
2120 | ||
2121 | def _visit_name(self, name: str): | |
2122 | # Make sure the name refers to a known and reachable | |
2123 | # variable/label name. | |
2124 | if name != _icitte_name and name not in self._allowed_names: | |
2125 | msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format( | |
2126 | name, self._expr_str | |
2127 | ) | |
2128 | ||
2129 | allowed_names = self._allowed_names.copy() | |
2130 | allowed_names.add(_icitte_name) | |
2131 | ||
2132 | if len(allowed_names) > 0: | |
2133 | allowed_names_str = ", ".join( | |
2134 | sorted(["`{}`".format(name) for name in allowed_names]) | |
2135 | ) | |
2136 | msg += "; the legal names are {{{}}}".format(allowed_names_str) | |
2137 | ||
2138 | _raise_error( | |
2139 | msg, | |
2140 | self._text_loc, | |
2141 | ) | |
2142 | ||
2143 | ||
2144 | # Generator state. | |
2145 | class _GenState: | |
2146 | def __init__( | |
2147 | self, | |
2148 | variables: VariablesT, | |
2149 | labels: LabelsT, | |
2150 | offset: int, | |
2151 | bo: Optional[ByteOrder], | |
2152 | ): | |
2153 | self.variables = variables.copy() | |
2154 | self.labels = labels.copy() | |
2155 | self.offset = offset | |
2156 | self.bo = bo | |
2157 | ||
2158 | def __repr__(self): | |
2159 | return "_GenState({}, {}, {}, {})".format( | |
2160 | repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo) | |
2161 | ) | |
2162 | ||
2163 | ||
2164 | # Fixed-length number item instance. | |
2165 | class _FlNumItemInst: | |
2166 | def __init__( | |
2167 | self, | |
2168 | item: _FlNum, | |
2169 | offset_in_data: int, | |
2170 | state: _GenState, | |
2171 | parse_error_msgs: List[ParseErrorMessage], | |
2172 | ): | |
2173 | self._item = item | |
2174 | self._offset_in_data = offset_in_data | |
2175 | self._state = state | |
2176 | self._parse_error_msgs = parse_error_msgs | |
2177 | ||
2178 | @property | |
2179 | def item(self): | |
2180 | return self._item | |
2181 | ||
2182 | @property | |
2183 | def offset_in_data(self): | |
2184 | return self._offset_in_data | |
2185 | ||
2186 | @property | |
2187 | def state(self): | |
2188 | return self._state | |
2189 | ||
2190 | @property | |
2191 | def parse_error_msgs(self): | |
2192 | return self._parse_error_msgs | |
2193 | ||
2194 | ||
2195 | # Generator of data and final state from a group item. | |
2196 | # | |
2197 | # Generation happens in memory at construction time. After building, use | |
2198 | # the `data`, `variables`, `labels`, `offset`, and `bo` properties to | |
2199 | # get the resulting context. | |
2200 | # | |
2201 | # The steps of generation are: | |
2202 | # | |
2203 | # 1. Handle each item in prefix order. | |
2204 | # | |
2205 | # The handlers append bytes to `self._data` and update some current | |
2206 | # state object (`_GenState` instance). | |
2207 | # | |
2208 | # When handling a fixed-length number item, try to evaluate its | |
2209 | # expression using the current state. If this fails, then it might be | |
2210 | # because the expression refers to a "future" label: save the current | |
2211 | # offset in `self._data` (generated data) and a snapshot of the | |
2212 | # current state within `self._fl_num_item_insts` (`_FlNumItemInst` | |
2213 | # object). _gen_fl_num_item_insts() will deal with this later. A | |
2214 | # `_FlNumItemInst` instance also contains a snapshot of the current | |
2215 | # parsing error messages (`self._parse_error_msgs`) which need to be | |
2216 | # taken into account when handling the instance later. | |
2217 | # | |
2218 | # When handling the items of a group, keep a map of immediate label | |
2219 | # names to their offset. Then, after having processed all the items, | |
2220 | # update the relevant saved state snapshots in | |
2221 | # `self._fl_num_item_insts` with those immediate label values. | |
2222 | # _gen_fl_num_item_insts() will deal with this later. | |
2223 | # | |
2224 | # 2. Handle all the fixed-length number item instances of which the | |
2225 | # expression evaluation failed before. | |
2226 | # | |
2227 | # At this point, `self._fl_num_item_insts` contains everything that's | |
2228 | # needed to evaluate the expressions, including the values of | |
2229 | # "future" labels from the point of view of some fixed-length number | |
2230 | # item instance. | |
2231 | # | |
2232 | # If an evaluation fails at this point, then it's a user error. Add | |
2233 | # to the parsing error all the saved parsing error messages of the | |
2234 | # instance. Those additional messages add precious context to the | |
2235 | # error. | |
2236 | class _Gen: | |
2237 | def __init__( | |
2238 | self, | |
2239 | group: _Group, | |
2240 | macro_defs: _MacroDefsT, | |
2241 | variables: VariablesT, | |
2242 | labels: LabelsT, | |
2243 | offset: int, | |
2244 | bo: Optional[ByteOrder], | |
2245 | ): | |
2246 | self._macro_defs = macro_defs | |
2247 | self._fl_num_item_insts = [] # type: List[_FlNumItemInst] | |
2248 | self._parse_error_msgs = [] # type: List[ParseErrorMessage] | |
2249 | self._in_trans = False | |
2250 | self._gen(group, _GenState(variables, labels, offset, bo)) | |
2251 | ||
2252 | # Generated bytes. | |
2253 | @property | |
2254 | def data(self): | |
2255 | return self._data | |
2256 | ||
2257 | # Updated variables. | |
2258 | @property | |
2259 | def variables(self): | |
2260 | return self._final_state.variables | |
2261 | ||
2262 | # Updated main group labels. | |
2263 | @property | |
2264 | def labels(self): | |
2265 | return self._final_state.labels | |
2266 | ||
2267 | # Updated offset. | |
2268 | @property | |
2269 | def offset(self): | |
2270 | return self._final_state.offset | |
2271 | ||
2272 | # Updated byte order. | |
2273 | @property | |
2274 | def bo(self): | |
2275 | return self._final_state.bo | |
2276 | ||
2277 | # Evaluates the expression `expr` of which the original string is | |
2278 | # `expr_str` at the location `text_loc` considering the current | |
2279 | # generation state `state`. | |
2280 | # | |
2281 | # If `accept_float` is `True`, then the type of the result may be | |
2282 | # `float` too. | |
2283 | # | |
2284 | # If `accept_str` is `True`, then the type of the result may be | |
2285 | # `str` too. | |
2286 | @staticmethod | |
2287 | def _eval_expr( | |
2288 | expr_str: str, | |
2289 | expr: ast.Expression, | |
2290 | text_loc: TextLocation, | |
2291 | state: _GenState, | |
2292 | accept_float: bool = False, | |
2293 | accept_str: bool = False, | |
2294 | ): | |
2295 | syms = {} # type: VariablesT | |
2296 | syms.update(state.labels) | |
2297 | ||
2298 | # Set the `ICITTE` name to the current offset | |
2299 | syms[_icitte_name] = state.offset | |
2300 | ||
2301 | # Add the current variables | |
2302 | syms.update(state.variables) | |
2303 | ||
2304 | # Validate the node and its children | |
2305 | _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr) | |
2306 | ||
2307 | # Compile and evaluate expression node | |
2308 | try: | |
2309 | val = eval(compile(expr, "", "eval"), None, syms) | |
2310 | except Exception as exc: | |
2311 | _raise_error( | |
2312 | "Failed to evaluate expression `{}`: {}".format(expr_str, exc), | |
2313 | text_loc, | |
2314 | ) | |
2315 | ||
2316 | # Convert `bool` result type to `int` to normalize | |
2317 | if type(val) is bool: | |
2318 | val = int(val) | |
2319 | ||
2320 | # Validate result type | |
2321 | expected_types = {int} # type: Set[type] | |
2322 | ||
2323 | if accept_float: | |
2324 | expected_types.add(float) | |
2325 | ||
2326 | if accept_str: | |
2327 | expected_types.add(str) | |
2328 | ||
2329 | if type(val) not in expected_types: | |
2330 | expected_types_str = sorted( | |
2331 | ["`{}`".format(t.__name__) for t in expected_types] | |
2332 | ) | |
2333 | ||
2334 | if len(expected_types_str) == 1: | |
2335 | msg_expected = expected_types_str[0] | |
2336 | elif len(expected_types_str) == 2: | |
2337 | msg_expected = " or ".join(expected_types_str) | |
2338 | else: | |
2339 | expected_types_str[-1] = "or {}".format(expected_types_str[-1]) | |
2340 | msg_expected = ", ".join(expected_types_str) | |
2341 | ||
2342 | _raise_error( | |
2343 | "Invalid expression `{}`: expecting result type {}, not `{}`".format( | |
2344 | expr_str, msg_expected, type(val).__name__ | |
2345 | ), | |
2346 | text_loc, | |
2347 | ) | |
2348 | ||
2349 | return val | |
2350 | ||
2351 | # Forwards to _eval_expr() with the expression and text location of | |
2352 | # `item`. | |
2353 | @staticmethod | |
2354 | def _eval_item_expr( | |
2355 | item: Union[_Cond, _FillUntil, _FlNum, _Leb128Int, _Rep, _Str, _VarAssign], | |
2356 | state: _GenState, | |
2357 | accept_float: bool = False, | |
2358 | accept_str: bool = False, | |
2359 | ): | |
2360 | return _Gen._eval_expr( | |
2361 | item.expr_str, item.expr, item.text_loc, state, accept_float, accept_str | |
2362 | ) | |
2363 | ||
2364 | # Handles the byte item `item`. | |
2365 | def _handle_byte_item(self, item: _Byte, state: _GenState): | |
2366 | self._data.append(item.val) | |
2367 | state.offset += item.size | |
2368 | ||
2369 | # Handles the literal string item `item`. | |
2370 | def _handle_lit_str_item(self, item: _LitStr, state: _GenState): | |
2371 | self._data += item.data | |
2372 | state.offset += item.size | |
2373 | ||
2374 | # Handles the byte order setting item `item`. | |
2375 | def _handle_set_bo_item(self, item: _SetBo, state: _GenState): | |
2376 | # Update current byte order | |
2377 | state.bo = item.bo | |
2378 | ||
2379 | # Handles the variable assignment item `item`. | |
2380 | def _handle_var_assign_item(self, item: _VarAssign, state: _GenState): | |
2381 | # Update variable | |
2382 | state.variables[item.name] = self._eval_item_expr( | |
2383 | item, state, accept_float=True, accept_str=True | |
2384 | ) | |
2385 | ||
2386 | # Returns the effective byte order to use to encode the fixed-length | |
2387 | # number `item` considering the current state `state`. | |
2388 | @staticmethod | |
2389 | def _fl_num_item_effective_bo(item: _FlNum, state: _GenState): | |
2390 | return state.bo if item.bo is None else item.bo | |
2391 | ||
2392 | # Handles the fixed-length number item `item`. | |
2393 | def _handle_fl_num_item(self, item: _FlNum, state: _GenState): | |
2394 | # Effective byte order | |
2395 | bo = self._fl_num_item_effective_bo(item, state) | |
2396 | ||
2397 | # Validate current byte order | |
2398 | if bo is None and item.len > 8: | |
2399 | _raise_error_for_item( | |
2400 | "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format( | |
2401 | item.expr_str | |
2402 | ), | |
2403 | item, | |
2404 | ) | |
2405 | ||
2406 | # Try an immediate evaluation. If it fails, then keep everything | |
2407 | # needed to (try to) generate the bytes of this item later. | |
2408 | try: | |
2409 | data = self._gen_fl_num_item_inst_data(item, state) | |
2410 | except Exception: | |
2411 | if self._in_trans: | |
2412 | _raise_error_for_item( | |
2413 | "Invalid expression `{}`: failed to evaluate within a transformation block".format( | |
2414 | item.expr_str | |
2415 | ), | |
2416 | item, | |
2417 | ) | |
2418 | ||
2419 | self._fl_num_item_insts.append( | |
2420 | _FlNumItemInst( | |
2421 | item, | |
2422 | len(self._data), | |
2423 | copy.deepcopy(state), | |
2424 | copy.deepcopy(self._parse_error_msgs), | |
2425 | ) | |
2426 | ) | |
2427 | ||
2428 | # Reserve space in `self._data` for this instance | |
2429 | data = bytes([0] * (item.len // 8)) | |
2430 | ||
2431 | # Append bytes | |
2432 | self._data += data | |
2433 | ||
2434 | # Update offset | |
2435 | state.offset += len(data) | |
2436 | ||
2437 | # Returns the size, in bytes, required to encode the value `val` | |
2438 | # with LEB128 (signed version if `is_signed` is `True`). | |
2439 | @staticmethod | |
2440 | def _leb128_size_for_val(val: int, is_signed: bool): | |
2441 | if val < 0: | |
2442 | # Equivalent upper bound. | |
2443 | # | |
2444 | # For example, if `val` is -128, then the full integer for | |
2445 | # this number of bits would be [-128, 127]. | |
2446 | val = -val - 1 | |
2447 | ||
2448 | # Number of bits (add one for the sign if needed) | |
2449 | bits = val.bit_length() + int(is_signed) | |
2450 | ||
2451 | if bits == 0: | |
2452 | bits = 1 | |
2453 | ||
2454 | # Seven bits per byte | |
2455 | return math.ceil(bits / 7) | |
2456 | ||
2457 | # Handles the LEB128 integer item `item`. | |
2458 | def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState): | |
2459 | # Compute value | |
2460 | val = self._eval_item_expr(item, state) | |
2461 | ||
2462 | # Size in bytes | |
2463 | size = self._leb128_size_for_val(val, type(item) is _SLeb128Int) | |
2464 | ||
2465 | # For each byte | |
2466 | for _ in range(size): | |
2467 | # Seven LSBs, MSB of the byte set (continue) | |
2468 | self._data.append((val & 0x7F) | 0x80) | |
2469 | val >>= 7 | |
2470 | ||
2471 | # Clear MSB of last byte (stop) | |
2472 | self._data[-1] &= ~0x80 | |
2473 | ||
2474 | # Update offset | |
2475 | state.offset += size | |
2476 | ||
2477 | # Handles the string item `item`. | |
2478 | def _handle_str_item(self, item: _Str, state: _GenState): | |
2479 | # Compute value | |
2480 | val = str(self._eval_item_expr(item, state, accept_float=True, accept_str=True)) | |
2481 | ||
2482 | # Encode | |
2483 | data = _encode_str(val, item.codec, item.text_loc) | |
2484 | ||
2485 | # Add to data | |
2486 | self._data += data | |
2487 | ||
2488 | # Update offset | |
2489 | state.offset += len(data) | |
2490 | ||
2491 | # Handles the group item `item`, removing the immediate labels from | |
2492 | # `state` at the end if `remove_immediate_labels` is `True`. | |
2493 | def _handle_group_item( | |
2494 | self, item: _Group, state: _GenState, remove_immediate_labels: bool = True | |
2495 | ): | |
2496 | first_fl_num_item_inst_index = len(self._fl_num_item_insts) | |
2497 | immediate_labels = {} # type: LabelsT | |
2498 | ||
2499 | # Handle each item | |
2500 | for subitem in item.items: | |
2501 | if type(subitem) is _Label: | |
2502 | # Add to local immediate labels | |
2503 | immediate_labels[subitem.name] = state.offset | |
2504 | ||
2505 | self._handle_item(subitem, state) | |
2506 | ||
2507 | # Remove immediate labels from current state if needed | |
2508 | if remove_immediate_labels: | |
2509 | for name in immediate_labels: | |
2510 | del state.labels[name] | |
2511 | ||
2512 | # Add all immediate labels to all state snapshots since | |
2513 | # `first_fl_num_item_inst_index`. | |
2514 | for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]: | |
2515 | inst.state.labels.update(immediate_labels) | |
2516 | ||
2517 | # Handles the repetition item `item`. | |
2518 | def _handle_rep_item(self, item: _Rep, state: _GenState): | |
2519 | # Compute the repetition count | |
2520 | mul = _Gen._eval_item_expr(item, state) | |
2521 | ||
2522 | # Validate result | |
2523 | if mul < 0: | |
2524 | _raise_error_for_item( | |
2525 | "Invalid expression `{}`: unexpected negative result {:,}".format( | |
2526 | item.expr_str, mul | |
2527 | ), | |
2528 | item, | |
2529 | ) | |
2530 | ||
2531 | # Generate group data `mul` times | |
2532 | for _ in range(mul): | |
2533 | self._handle_group_item(item, state) | |
2534 | ||
2535 | # Handles the conditional item `item`. | |
2536 | def _handle_cond_item(self, item: _Cond, state: _GenState): | |
2537 | # Compute the conditional value | |
2538 | val = _Gen._eval_item_expr(item, state) | |
2539 | ||
2540 | # Generate selected group data | |
2541 | if val: | |
2542 | self._handle_group_item(item.true_item, state) | |
2543 | else: | |
2544 | self._handle_group_item(item.false_item, state) | |
2545 | ||
2546 | # Handles the transformation item `item`. | |
2547 | def _handle_trans_item(self, item: _Trans, state: _GenState): | |
2548 | init_in_trans = self._in_trans | |
2549 | self._in_trans = True | |
2550 | init_data_len = len(self._data) | |
2551 | init_offset = state.offset | |
2552 | ||
2553 | # Generate group data | |
2554 | self._handle_group_item(item, state) | |
2555 | ||
2556 | # Remove and keep group data | |
2557 | to_trans = self._data[init_data_len:] | |
2558 | del self._data[init_data_len:] | |
2559 | ||
2560 | # Encode group data and append to current data | |
2561 | try: | |
2562 | transformed = item.trans(to_trans) | |
2563 | except Exception as exc: | |
2564 | _raise_error_for_item( | |
2565 | "Cannot apply the {} transformation to this data: {}".format( | |
2566 | item.name, exc | |
2567 | ), | |
2568 | item, | |
2569 | ) | |
2570 | ||
2571 | self._data += transformed | |
2572 | ||
2573 | # Update offset and restore | |
2574 | state.offset = init_offset + len(transformed) | |
2575 | self._in_trans = init_in_trans | |
2576 | ||
2577 | # Evaluates the parameters of the macro expansion item `item` | |
2578 | # considering the initial state `init_state` and returns a new state | |
2579 | # to handle the items of the macro. | |
2580 | def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState): | |
2581 | # New state | |
2582 | exp_state = _GenState({}, {}, init_state.offset, init_state.bo) | |
2583 | ||
2584 | # Evaluate the parameter expressions | |
2585 | macro_def = self._macro_defs[item.name] | |
2586 | ||
2587 | for param_name, param in zip(macro_def.param_names, item.params): | |
2588 | exp_state.variables[param_name] = _Gen._eval_expr( | |
2589 | param.expr_str, | |
2590 | param.expr, | |
2591 | param.text_loc, | |
2592 | init_state, | |
2593 | accept_float=True, | |
2594 | accept_str=True, | |
2595 | ) | |
2596 | ||
2597 | return exp_state | |
2598 | ||
2599 | # Handles the macro expansion item `item`. | |
2600 | def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState): | |
2601 | parse_error_msg_text = "While expanding the macro `{}`:".format(item.name) | |
2602 | ||
2603 | try: | |
2604 | # New state | |
2605 | exp_state = self._eval_macro_exp_params(item, state) | |
2606 | ||
2607 | # Process the contained group | |
2608 | init_data_size = len(self._data) | |
2609 | parse_error_msg = ( | |
2610 | ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage] | |
2611 | parse_error_msg_text, item.text_loc | |
2612 | ) | |
2613 | ) | |
2614 | self._parse_error_msgs.append(parse_error_msg) | |
2615 | self._handle_group_item(self._macro_defs[item.name], exp_state) | |
2616 | self._parse_error_msgs.pop() | |
2617 | except ParseError as exc: | |
2618 | _augment_error(exc, parse_error_msg_text, item.text_loc) | |
2619 | ||
2620 | # Update state offset and return | |
2621 | state.offset += len(self._data) - init_data_size | |
2622 | ||
2623 | # Handles the offset setting item `item`. | |
2624 | def _handle_set_offset_item(self, item: _SetOffset, state: _GenState): | |
2625 | state.offset = item.val | |
2626 | ||
2627 | # Handles the offset alignment item `item` (adds padding). | |
2628 | def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState): | |
2629 | init_offset = state.offset | |
2630 | align_bytes = item.val // 8 | |
2631 | state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes | |
2632 | self._data += bytes([item.pad_val] * (state.offset - init_offset)) | |
2633 | ||
2634 | # Handles the filling item `item` (adds padding). | |
2635 | def _handle_fill_until_item(self, item: _FillUntil, state: _GenState): | |
2636 | # Compute the new offset | |
2637 | new_offset = _Gen._eval_item_expr(item, state) | |
2638 | ||
2639 | # Validate the new offset | |
2640 | if new_offset < state.offset: | |
2641 | _raise_error_for_item( | |
2642 | "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format( | |
2643 | item.expr_str, new_offset, state.offset | |
2644 | ), | |
2645 | item, | |
2646 | ) | |
2647 | ||
2648 | # Fill | |
2649 | self._data += bytes([item.pad_val] * (new_offset - state.offset)) | |
2650 | ||
2651 | # Update offset | |
2652 | state.offset = new_offset | |
2653 | ||
2654 | # Handles the label item `item`. | |
2655 | def _handle_label_item(self, item: _Label, state: _GenState): | |
2656 | state.labels[item.name] = state.offset | |
2657 | ||
2658 | # Handles the item `item`, returning the updated next repetition | |
2659 | # instance. | |
2660 | def _handle_item(self, item: _Item, state: _GenState): | |
2661 | return self._item_handlers[type(item)](item, state) | |
2662 | ||
2663 | # Generates the data for a fixed-length integer item instance having | |
2664 | # the value `val` and the effective byte order `bo` and returns it. | |
2665 | def _gen_fl_int_item_inst_data( | |
2666 | self, val: int, bo: Optional[ByteOrder], item: _FlNum | |
2667 | ): | |
2668 | # Validate range | |
2669 | if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1: | |
2670 | _raise_error_for_item( | |
2671 | "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format( | |
2672 | val, item.len, item.expr_str | |
2673 | ), | |
2674 | item, | |
2675 | ) | |
2676 | ||
2677 | # Encode result on 64 bits (to extend the sign bit whatever the | |
2678 | # value of `item.len`). | |
2679 | data = struct.pack( | |
2680 | "{}{}".format( | |
2681 | ">" if bo in (None, ByteOrder.BE) else "<", | |
2682 | "Q" if val >= 0 else "q", | |
2683 | ), | |
2684 | val, | |
2685 | ) | |
2686 | ||
2687 | # Keep only the requested length | |
2688 | len_bytes = item.len // 8 | |
2689 | ||
2690 | if bo in (None, ByteOrder.BE): | |
2691 | # Big endian: keep last bytes | |
2692 | data = data[-len_bytes:] | |
2693 | else: | |
2694 | # Little endian: keep first bytes | |
2695 | assert bo == ByteOrder.LE | |
2696 | data = data[:len_bytes] | |
2697 | ||
2698 | # Return data | |
2699 | return data | |
2700 | ||
2701 | # Generates the data for a fixed-length floating point number item | |
2702 | # instance having the value `val` and the effective byte order `bo` | |
2703 | # and returns it. | |
2704 | def _gen_fl_float_item_inst_data( | |
2705 | self, val: float, bo: Optional[ByteOrder], item: _FlNum | |
2706 | ): | |
2707 | # Validate length | |
2708 | if item.len not in (32, 64): | |
2709 | _raise_error_for_item( | |
2710 | "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format( | |
2711 | item.len, val | |
2712 | ), | |
2713 | item, | |
2714 | ) | |
2715 | ||
2716 | # Encode and return result | |
2717 | return struct.pack( | |
2718 | "{}{}".format( | |
2719 | ">" if bo in (None, ByteOrder.BE) else "<", | |
2720 | "f" if item.len == 32 else "d", | |
2721 | ), | |
2722 | val, | |
2723 | ) | |
2724 | ||
2725 | # Generates the data for a fixed-length number item instance and | |
2726 | # returns it. | |
2727 | def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState): | |
2728 | # Effective byte order | |
2729 | bo = self._fl_num_item_effective_bo(item, state) | |
2730 | ||
2731 | # Compute value | |
2732 | val = self._eval_item_expr(item, state, True) | |
2733 | ||
2734 | # Handle depending on type | |
2735 | if type(val) is int: | |
2736 | return self._gen_fl_int_item_inst_data(val, bo, item) | |
2737 | else: | |
2738 | assert type(val) is float | |
2739 | return self._gen_fl_float_item_inst_data(val, bo, item) | |
2740 | ||
2741 | # Generates the data for all the fixed-length number item instances | |
2742 | # and writes it at the correct offset within `self._data`. | |
2743 | def _gen_fl_num_item_insts(self): | |
2744 | for inst in self._fl_num_item_insts: | |
2745 | # Generate bytes | |
2746 | try: | |
2747 | data = self._gen_fl_num_item_inst_data(inst.item, inst.state) | |
2748 | except ParseError as exc: | |
2749 | # Add all the saved parse error messages for this | |
2750 | # instance. | |
2751 | for msg in reversed(inst.parse_error_msgs): | |
2752 | _add_error_msg(exc, msg.text, msg.text_location) | |
2753 | ||
2754 | raise | |
2755 | ||
2756 | # Insert bytes into `self._data` | |
2757 | self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data | |
2758 | ||
2759 | # Generates the data (`self._data`) and final state | |
2760 | # (`self._final_state`) from `group` and the initial state `state`. | |
2761 | def _gen(self, group: _Group, state: _GenState): | |
2762 | # Initial state | |
2763 | self._data = bytearray() | |
2764 | ||
2765 | # Item handlers | |
2766 | self._item_handlers = { | |
2767 | _AlignOffset: self._handle_align_offset_item, | |
2768 | _Byte: self._handle_byte_item, | |
2769 | _Cond: self._handle_cond_item, | |
2770 | _FillUntil: self._handle_fill_until_item, | |
2771 | _FlNum: self._handle_fl_num_item, | |
2772 | _Group: self._handle_group_item, | |
2773 | _Label: self._handle_label_item, | |
2774 | _LitStr: self._handle_lit_str_item, | |
2775 | _MacroExp: self._handle_macro_exp_item, | |
2776 | _Rep: self._handle_rep_item, | |
2777 | _SetBo: self._handle_set_bo_item, | |
2778 | _SetOffset: self._handle_set_offset_item, | |
2779 | _SLeb128Int: self._handle_leb128_int_item, | |
2780 | _Str: self._handle_str_item, | |
2781 | _Trans: self._handle_trans_item, | |
2782 | _ULeb128Int: self._handle_leb128_int_item, | |
2783 | _VarAssign: self._handle_var_assign_item, | |
2784 | } # type: Dict[type, Callable[[Any, _GenState], None]] | |
2785 | ||
2786 | # Handle the group item, _not_ removing the immediate labels | |
2787 | # because the `labels` property offers them. | |
2788 | self._handle_group_item(group, state, False) | |
2789 | ||
2790 | # This is actually the final state | |
2791 | self._final_state = state | |
2792 | ||
2793 | # Generate all the fixed-length number bytes now that we know | |
2794 | # their full state | |
2795 | self._gen_fl_num_item_insts() | |
2796 | ||
2797 | ||
2798 | # Returns a `ParseResult` instance containing the bytes encoded by the | |
2799 | # input string `normand`. | |
2800 | # | |
2801 | # `init_variables` is a dictionary of initial variable names (valid | |
2802 | # Python names) to integral values. A variable name must not be the | |
2803 | # reserved name `ICITTE`. | |
2804 | # | |
2805 | # `init_labels` is a dictionary of initial label names (valid Python | |
2806 | # names) to integral values. A label name must not be the reserved name | |
2807 | # `ICITTE`. | |
2808 | # | |
2809 | # `init_offset` is the initial offset. | |
2810 | # | |
2811 | # `init_byte_order` is the initial byte order. | |
2812 | # | |
2813 | # Raises `ParseError` on any parsing error. | |
2814 | def parse( | |
2815 | normand: str, | |
2816 | init_variables: Optional[VariablesT] = None, | |
2817 | init_labels: Optional[LabelsT] = None, | |
2818 | init_offset: int = 0, | |
2819 | init_byte_order: Optional[ByteOrder] = None, | |
2820 | ): | |
2821 | if init_variables is None: | |
2822 | init_variables = {} | |
2823 | ||
2824 | if init_labels is None: | |
2825 | init_labels = {} | |
2826 | ||
2827 | parser = _Parser(normand, init_variables, init_labels) | |
2828 | gen = _Gen( | |
2829 | parser.res, | |
2830 | parser.macro_defs, | |
2831 | init_variables, | |
2832 | init_labels, | |
2833 | init_offset, | |
2834 | init_byte_order, | |
2835 | ) | |
2836 | return ParseResult._create( # pyright: ignore[reportPrivateUsage] | |
2837 | gen.data, gen.variables, gen.labels, gen.offset, gen.bo | |
2838 | ) | |
2839 | ||
2840 | ||
2841 | # Raises a command-line error with the message `msg`. | |
2842 | def _raise_cli_error(msg: str) -> NoReturn: | |
2843 | raise RuntimeError("Command-line error: {}".format(msg)) | |
2844 | ||
2845 | ||
2846 | # Returns the `int` or `float` value out of a CLI assignment value. | |
2847 | def _val_from_assign_val_str(s: str, is_label: bool): | |
2848 | s = s.strip() | |
2849 | ||
2850 | # Floating point number? | |
2851 | if not is_label: | |
2852 | m = _const_float_pat.fullmatch(s) | |
2853 | ||
2854 | if m is not None: | |
2855 | return float(m.group(0)) | |
2856 | ||
2857 | # Integer? | |
2858 | m = _const_int_pat.fullmatch(s) | |
2859 | ||
2860 | if m is not None: | |
2861 | return int(_norm_const_int(m.group(0)), 0) | |
2862 | ||
2863 | exp = "an integer" if is_label else "a number" | |
2864 | _raise_cli_error("Invalid assignment value `{}`: expecting {}".format(s, exp)) | |
2865 | ||
2866 | ||
2867 | # Returns a dictionary of string to numbers from the list of strings | |
2868 | # `args` containing `NAME=VAL` entries. | |
2869 | def _dict_from_arg(args: Optional[List[str]], is_label: bool, is_str_only: bool): | |
2870 | d = {} # type: VariablesT | |
2871 | ||
2872 | if args is None: | |
2873 | return d | |
2874 | ||
2875 | for arg in args: | |
2876 | m = re.match(r"({})\s*=\s*(.*)$".format(_py_name_pat.pattern), arg) | |
2877 | ||
2878 | if m is None: | |
2879 | _raise_cli_error("Invalid assignment `{}`".format(arg)) | |
2880 | ||
2881 | if is_str_only: | |
2882 | val = m.group(2) | |
2883 | else: | |
2884 | val = _val_from_assign_val_str(m.group(2), is_label) | |
2885 | ||
2886 | d[m.group(1)] = val | |
2887 | ||
2888 | return d | |
2889 | ||
2890 | ||
2891 | # Parses the command-line arguments and returns, in this order: | |
2892 | # | |
2893 | # 1. The input file path, or `None` if none. | |
2894 | # 2. The Normand input text. | |
2895 | # 3. The initial offset. | |
2896 | # 4. The initial byte order. | |
2897 | # 5. The initial variables. | |
2898 | # 6. The initial labels. | |
2899 | def _parse_cli_args(): | |
2900 | import argparse | |
2901 | ||
2902 | # Build parser | |
2903 | ap = argparse.ArgumentParser() | |
2904 | ap.add_argument( | |
2905 | "--offset", | |
2906 | metavar="OFFSET", | |
2907 | action="store", | |
2908 | type=int, | |
2909 | default=0, | |
2910 | help="initial offset (positive)", | |
2911 | ) | |
2912 | ap.add_argument( | |
2913 | "-b", | |
2914 | "--byte-order", | |
2915 | metavar="BO", | |
2916 | choices=["be", "le"], | |
2917 | type=str, | |
2918 | help="initial byte order (`be` or `le`)", | |
2919 | ) | |
2920 | ap.add_argument( | |
2921 | "-v", | |
2922 | "--var", | |
2923 | metavar="NAME=VAL", | |
2924 | action="append", | |
2925 | help="add an initial numeric variable (may be repeated)", | |
2926 | ) | |
2927 | ap.add_argument( | |
2928 | "-s", | |
2929 | "--var-str", | |
2930 | metavar="NAME=VAL", | |
2931 | action="append", | |
2932 | help="add an initial string variable (may be repeated)", | |
2933 | ) | |
2934 | ap.add_argument( | |
2935 | "-l", | |
2936 | "--label", | |
2937 | metavar="NAME=VAL", | |
2938 | action="append", | |
2939 | help="add an initial label (may be repeated)", | |
2940 | ) | |
2941 | ap.add_argument( | |
2942 | "--version", action="version", version="Normand {}".format(__version__) | |
2943 | ) | |
2944 | ap.add_argument( | |
2945 | "path", | |
2946 | metavar="PATH", | |
2947 | action="store", | |
2948 | nargs="?", | |
2949 | help="input path (none means standard input)", | |
2950 | ) | |
2951 | ||
2952 | # Parse | |
2953 | args = ap.parse_args() | |
2954 | ||
2955 | # Read input | |
2956 | if args.path is None: | |
2957 | normand = sys.stdin.read() | |
2958 | else: | |
2959 | with open(args.path) as f: | |
2960 | normand = f.read() | |
2961 | ||
2962 | # Variables and labels | |
2963 | variables = _dict_from_arg(args.var, False, False) | |
2964 | variables.update(_dict_from_arg(args.var_str, False, True)) | |
2965 | labels = _dict_from_arg(args.label, True, False) | |
2966 | ||
2967 | # Validate offset | |
2968 | if args.offset < 0: | |
2969 | _raise_cli_error("Invalid negative offset {}") | |
2970 | ||
2971 | # Validate and set byte order | |
2972 | bo = None # type: Optional[ByteOrder] | |
2973 | ||
2974 | if args.byte_order is not None: | |
2975 | if args.byte_order == "be": | |
2976 | bo = ByteOrder.BE | |
2977 | else: | |
2978 | assert args.byte_order == "le" | |
2979 | bo = ByteOrder.LE | |
2980 | ||
2981 | # Return input and initial state | |
2982 | return args.path, normand, args.offset, bo, variables, typing.cast(LabelsT, labels) | |
2983 | ||
2984 | ||
2985 | # CLI entry point without exception handling. | |
2986 | def _run_cli_with_args( | |
2987 | normand: str, | |
2988 | offset: int, | |
2989 | bo: Optional[ByteOrder], | |
2990 | variables: VariablesT, | |
2991 | labels: LabelsT, | |
2992 | ): | |
2993 | sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data) | |
2994 | ||
2995 | ||
2996 | # Prints the exception message `msg` and exits with status 1. | |
2997 | def _fail(msg: str) -> NoReturn: | |
2998 | if not msg.endswith("."): | |
2999 | msg += "." | |
3000 | ||
3001 | print(msg.strip(), file=sys.stderr) | |
3002 | sys.exit(1) | |
3003 | ||
3004 | ||
3005 | # CLI entry point. | |
3006 | def _run_cli(): | |
3007 | try: | |
3008 | args = _parse_cli_args() | |
3009 | except Exception as exc: | |
3010 | _fail(str(exc)) | |
3011 | ||
3012 | try: | |
3013 | _run_cli_with_args(*args[1:]) | |
3014 | except ParseError as exc: | |
3015 | import os.path | |
3016 | ||
3017 | prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0])) | |
3018 | fail_msg = "" | |
3019 | ||
3020 | for msg in reversed(exc.messages): | |
3021 | fail_msg += "{}{}:{} - {}".format( | |
3022 | prefix, | |
3023 | msg.text_location.line_no, | |
3024 | msg.text_location.col_no, | |
3025 | msg.text, | |
3026 | ) | |
3027 | ||
3028 | if fail_msg[-1] not in ".:;": | |
3029 | fail_msg += "." | |
3030 | ||
3031 | fail_msg += "\n" | |
3032 | ||
3033 | _fail(fail_msg.strip()) | |
3034 | except Exception as exc: | |
3035 | _fail(str(exc)) | |
3036 | ||
3037 | ||
3038 | if __name__ == "__main__": | |
3039 | _run_cli() |