From ba11fb1d54eed503a11e9c31339eb2f68fe776e4 Mon Sep 17 00:00:00 2001 From: Philippe Proulx Date: Tue, 10 Oct 2023 19:14:52 -0400 Subject: [PATCH] Parse comments between tokens This patch replaces many whitespace parsing between the individual tokens of an item by whitespace and comment parsing. This makes it possible to comment out parts of items: aa bb cc @#32#64#~ffh# 88 Result: aa bb cc 00 00 00 00 00 88 Moreover, insignificant symbols are only supported between and around items, hexadecimal nibbles, and binary bits now. This is where they're the most useful to improve readability, not between tokens of an item. For example, this is not useful: s/:?latin1,"hello world"=*|5 The new `tests/pass-comment-all.nt` file tests comments in all positions between eligible tokens. Change-Id: I7167440723010c2549f614fc6ab41621df0fd8b4 Signed-off-by: Philippe Proulx Reviewed-on: https://review.lttng.org/c/normand/+/11020 Tested-by: jenkins --- README.adoc | 22 ++----- normand/normand.py | 97 +++++++++++++++--------------- pyproject.toml | 2 +- tests/pass-comment-all.nt | 91 ++++++++++++++++++++++++++++ tests/pass-comment-rep-post.nt | 4 ++ tests/pass-comment-sym-rep-post.nt | 4 -- 6 files changed, 151 insertions(+), 69 deletions(-) create mode 100644 tests/pass-comment-all.nt create mode 100644 tests/pass-comment-rep-post.nt delete mode 100644 tests/pass-comment-sym-rep-post.nt diff --git a/README.adoc b/README.adoc index b700941..52e81c1 100644 --- a/README.adoc +++ b/README.adoc @@ -29,7 +29,7 @@ _**Normand**_ is a text-to-binary processor with its own language. This package offers both a portable {py3} module and a command-line tool. -WARNING: This version of Normand is 0.19, meaning both the Normand +WARNING: This version of Normand is 0.20, meaning both the Normand language and the module/CLI interface aren't stable. ifdef::env-github[] @@ -557,23 +557,13 @@ Moreover, you can repeat many items above a constant or variable number of times with the ``pass:[*]`` operator _after_ the item to repeat. This is called a <>. -A Normand comment may exist: - -* Between items, possibly within a group. -* Between the nibbles of a constant hexadecimal byte. -* Between the bits of a constant binary byte. -* Between the last item and the ``pass:[*]`` character of a post-item - repetition, and between that ``pass:[*]`` character and the following - number or expression. -* Between the ``!repeat``/``!r`` block opening and the following - constant integer, name, or expression of a repetition block. -* Between the ``!if`` block opening and the following name or expression - of a conditional block. +A Normand comment may exist pretty much anywhere between tokens. A comment is anything between two ``pass:[#]`` characters on the same -line, or from ``pass:[#]`` until the end of the line. Whitespaces and -the following symbol characters are also considered comments where a -comment may exist: +line, or from ``pass:[#]`` until the end of the line. Whitespaces are +also considered comments. The following symbols are also considered +comments around and between items, as well as between hexadecimal +nibbles and binary bits of <>: ---- / \ ? & : ; . , [ ] _ = | - diff --git a/normand/normand.py b/normand/normand.py index 8786900..95c76eb 100644 --- a/normand/normand.py +++ b/normand/normand.py @@ -30,7 +30,7 @@ # Upstream repository: . __author__ = "Philippe Proulx" -__version__ = "0.19.0" +__version__ = "0.20.0" __all__ = [ "__author__", "__version__", @@ -761,23 +761,23 @@ class _Parser: # Return match object return m - # Pattern for _skip_ws_and_comments() + # Patterns for _skip_*() + _comment_pat = re.compile(r"#[^#]*?(?:$|#)", re.M) + _ws_or_comments_pat = re.compile(r"(?:\s|{})*".format(_comment_pat.pattern), re.M) _ws_or_syms_or_comments_pat = re.compile( - r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*" + r"(?:[\s/\\?&:;.,[\]_=|-]|{})*".format(_comment_pat.pattern), re.M ) + # Skips as many whitespaces and comments as possible, but not + # insignificant symbol characters. + def _skip_ws_and_comments(self): + self._try_parse_pat(self._ws_or_comments_pat) + # Skips as many whitespaces, insignificant symbol characters, and # comments as possible. - def _skip_ws_and_comments(self): + def _skip_ws_and_comments_and_syms(self): self._try_parse_pat(self._ws_or_syms_or_comments_pat) - # Pattern for _skip_ws() - _ws_pat = re.compile(r"\s*") - - # Skips as many whitespaces as possible. - def _skip_ws(self): - self._try_parse_pat(self._ws_pat) - # Pattern for _try_parse_hex_byte() _nibble_pat = re.compile(r"[A-Fa-f0-9]") @@ -794,7 +794,7 @@ class _Parser: return # Expect another nibble - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() m_low = self._expect_pat( self._nibble_pat, "Expecting another hexadecimal nibble" ) @@ -821,13 +821,13 @@ class _Parser: items = [] # type: List[_Item] for _ in range(len(m.group(0))): - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() byte_text_loc = self._text_loc bits = [] # type: List[str] # Expect eight bits for _ in range(8): - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() m = self._expect_pat( self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)" ) @@ -856,7 +856,7 @@ class _Parser: return # Expect the value - self._skip_ws() + self._skip_ws_and_comments() m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant") # Compute value @@ -977,11 +977,11 @@ class _Parser: # General prefix? if self._try_parse_pat(self._str_encoding_gen_prefix_pat) is not None: # Expect `:` - self._skip_ws() + self._skip_ws_and_comments() self._expect_pat(self._str_encoding_colon_pat, "Expecting `:`") # Expect encoding specification - self._skip_ws() + self._skip_ws_and_comments() # UTF? codec = self._try_parse_utf_str_encoding() @@ -1015,7 +1015,7 @@ class _Parser: codec = self._try_parse_str_encoding() # Match prefix (expect if there's an encoding specification) - self._skip_ws() + self._skip_ws_and_comments() if codec is None: # No encoding: only a literal string (UTF-8) is legal @@ -1045,7 +1045,7 @@ class _Parser: return _LitStr(data, begin_text_loc) else: # Expect expression - self._skip_ws() + self._skip_ws_and_comments() expr_text_loc = self._text_loc m = self._expect_pat(self._str_expr_pat, "Expecting an expression") @@ -1080,7 +1080,7 @@ class _Parser: items = self._parse_items() # Expect end of group - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() if m_open.group(0) == "(": pat = self._right_paren_pat @@ -1132,6 +1132,7 @@ class _Parser: expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc) # Fixed length? + self._skip_ws_and_comments() m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat) if m_fmt is None: @@ -1193,7 +1194,7 @@ class _Parser: _raise_error("Existing label named `{}`".format(name), begin_text_loc) # Expect an expression - self._skip_ws() + self._skip_ws_and_comments() m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression") # Create an expression node from the expression string @@ -1244,7 +1245,7 @@ class _Parser: # No match return - self._skip_ws() + self._skip_ws_and_comments() # Variable assignment item? item = self._try_parse_var_assign() @@ -1264,7 +1265,7 @@ class _Parser: ) # Expect suffix - self._skip_ws() + self._skip_ws_and_comments() self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`") return item @@ -1328,7 +1329,7 @@ class _Parser: return # Offset setting item? - self._skip_ws() + self._skip_ws_and_comments() item = self._try_parse_set_offset_val() if item is None: @@ -1340,7 +1341,7 @@ class _Parser: self._raise_error("Expecting a label name or an offset setting value") # Expect suffix - self._skip_ws() + self._skip_ws_and_comments() self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`") return item @@ -1351,11 +1352,11 @@ class _Parser: # if none. def _parse_pad_val(self): # Padding value? - self._skip_ws() + self._skip_ws_and_comments() pad_val = 0 if self._try_parse_pat(self._pad_val_prefix_pat) is not None: - self._skip_ws() + self._skip_ws_and_comments() pad_val_text_loc = self._text_loc m = self._expect_pat( _pos_const_int_pat, @@ -1388,7 +1389,7 @@ class _Parser: return # Expect an alignment - self._skip_ws() + self._skip_ws_and_comments() align_text_loc = self._text_loc m = self._expect_pat( self._align_offset_val_pat, @@ -1498,13 +1499,13 @@ class _Parser: ) # Expect an expression - self._skip_ws() + self._skip_ws_and_comments() expr_text_loc = self._text_loc m = self._expect_pat(self._inner_expr_pat, "Expecting an expression") expr_str = m.group(0) # Expect `}` - self._skip_ws() + self._skip_ws_and_comments() self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`") return self._ast_expr_from_str(expr_str, expr_text_loc) @@ -1523,7 +1524,7 @@ class _Parser: return # Expect expression - self._skip_ws() + self._skip_ws_and_comments() expr_str, expr = self._expect_expr(accept_const_int=True) # Padding value @@ -1558,12 +1559,12 @@ class _Parser: expr_str, expr = self._expect_rep_mul_expr() # Parse items - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() items_text_loc = self._text_loc items = self._parse_items() # Expect end of block - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() self._expect_pat( self._block_end_pat, "Expecting an item or `!end` (end of repetition block)" ) @@ -1590,18 +1591,18 @@ class _Parser: expr_str, expr = self._expect_expr() # Parse "true" items - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() true_items_text_loc = self._text_loc true_items = self._parse_items() false_items = [] # type: List[_Item] false_items_text_loc = begin_text_loc # `!else`? - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() if self._try_parse_pat(self._cond_block_else_pat) is not None: # Parse "false" items - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() false_items_text_loc = self._text_loc false_items = self._parse_items() @@ -1640,7 +1641,7 @@ class _Parser: return False # Expect a name - self._skip_ws() + self._skip_ws_and_comments() name_text_loc = self._text_loc m = self._expect_pat(_py_name_pat, "Expecting a valid macro name") @@ -1651,7 +1652,7 @@ class _Parser: _raise_error("Duplicate macro named `{}`".format(name), name_text_loc) # Expect `(` - self._skip_ws() + self._skip_ws_and_comments() self._expect_pat(self._left_paren_pat, "Expecting `(`") # Try to parse comma-separated parameter names @@ -1659,7 +1660,7 @@ class _Parser: expect_comma = False while True: - self._skip_ws() + self._skip_ws_and_comments() # End? if self._try_parse_pat(self._right_paren_pat) is not None: @@ -1671,7 +1672,7 @@ class _Parser: self._expect_pat(self._macro_params_comma_pat, "Expecting `,`") # Expect parameter name - self._skip_ws() + self._skip_ws_and_comments() param_text_loc = self._text_loc m = self._expect_pat(_py_name_pat, "Expecting valid parameter name") @@ -1685,7 +1686,7 @@ class _Parser: expect_comma = True # Expect items - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() items_text_loc = self._text_loc old_var_names = self._var_names.copy() old_label_names = self._label_names.copy() @@ -1722,11 +1723,11 @@ class _Parser: return # Expect `:` - self._skip_ws() + self._skip_ws_and_comments() self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`") # Expect a macro name - self._skip_ws() + self._skip_ws_and_comments() name_text_loc = self._text_loc m = self._expect_pat(_py_name_pat, "Expecting a valid macro name") @@ -1738,7 +1739,7 @@ class _Parser: _raise_error("Unknown macro name `{}`".format(name), name_text_loc) # Expect `(` - self._skip_ws() + self._skip_ws_and_comments() self._expect_pat(self._left_paren_pat, "Expecting `(`") # Try to parse comma-separated parameter values @@ -1747,7 +1748,7 @@ class _Parser: expect_comma = False while True: - self._skip_ws() + self._skip_ws_and_comments() # End? if self._try_parse_pat(self._right_paren_pat) is not None: @@ -1758,7 +1759,7 @@ class _Parser: if expect_comma: self._expect_pat(self._macro_params_comma_pat, "Expecting `,`") - self._skip_ws() + self._skip_ws_and_comments() param_text_loc = self._text_loc params.append( _MacroExpParam( @@ -1869,7 +1870,7 @@ class _Parser: # # Appends any parsed item to `items`. def _try_append_item(self, items: List[_Item]): - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() # Base item item = self._try_parse_base_item() @@ -1921,7 +1922,7 @@ class _Parser: items = self._parse_items(True) # Make sure there's nothing left - self._skip_ws_and_comments() + self._skip_ws_and_comments_and_syms() if self._isnt_done(): self._raise_error( diff --git a/pyproject.toml b/pyproject.toml index 1c45be3..97c2a1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ [tool.poetry] name = 'normand' -version = '0.19.0' +version = '0.20.0' description = 'Text-to-binary processor with its own language' license = 'MIT' authors = ['Philippe Proulx '] diff --git a/tests/pass-comment-all.nt b/tests/pass-comment-all.nt new file mode 100644 index 0000000..24ca19c --- /dev/null +++ b/tests/pass-comment-all.nt @@ -0,0 +1,91 @@ +# constant byte: hexadecimal +a#bonjour tout le monde#a#bonjour tout le monde#bb#bonjour tout le monde# + +# constant byte: binary +#bonjour tout le monde#%#bonjour tout le monde# +#bonjour tout le monde#1#bonjour tout le monde#0#bonjour tout le monde#1#bonjour tout le monde#0#bonjour tout le monde# +#bonjour tout le monde#0#bonjour tout le monde#0#bonjour tout le monde#1#bonjour tout le monde#1#bonjour tout le monde# + +# constant byte: decimal +#bonjour tout le monde#$#bonjour tout le monde#-92#bonjour tout le monde# + +# literal string +#bonjour tout le monde#s#bonjour tout le monde#:#bonjour tout le monde#u16be#bonjour tout le monde#"coucou"#bonjour tout le monde# + +# byte order setting +#bonjour tout le monde#{#bonjour tout le monde#be#bonjour tout le monde#}#bonjour tout le monde#{0xaabb:16} +#bonjour tout le monde#{#bonjour tout le monde#le#bonjour tout le monde#}#bonjour tout le monde#{0xaabb:16} + +# fixed-length number +#bonjour tout le monde#{ 0xbeef :#bonjour tout le monde#16#bonjour tout le monde#}#bonjour tout le monde# + +# LEB128 integer: unsigned +#bonjour tout le monde#{ 12345678 :#bonjour tout le monde#uleb128#bonjour tout le monde#}#bonjour tout le monde# + +# LEB128 integer: signed +#bonjour tout le monde#{ -12345678 :#bonjour tout le monde#sleb128#bonjour tout le monde#}#bonjour tout le monde# + +# string: encoding before +#bonjour tout le monde#s#bonjour tout le monde#:#bonjour tout le monde#latin1#bonjour tout le monde#{ "allo" }#bonjour tout le monde# + +# string: encoding after +#bonjour tout le monde#{ 'meow mix' :#bonjour tout le monde#s#bonjour tout le monde#:#bonjour tout le monde#latin3#bonjour tout le monde#}#bonjour tout le monde# + +# current offset setting +#bonjour tout le monde#<#bonjour tout le monde#18#bonjour tout le monde#>#bonjour tout le monde#{ICITTE-18:8} + +# current offset alignment +#bonjour tout le monde#@#bonjour tout le monde#64#bonjour tout le monde#~#bonjour tout le monde#ffh#bonjour tout le monde# + +# filling +#bonjour tout le monde#+#bonjour tout le monde#{ ICITTE+8+4+2+1+1 }#bonjour tout le monde#~#bonjour tout le monde#0xcc#bonjour tout le monde# + +# label +#bonjour tout le monde#<#bonjour tout le monde#salut#bonjour tout le monde#>#bonjour tout le monde#{salut-ICITTE:8} + +# group +#bonjour tout le monde#(#bonjour tout le monde#aa#bonjour tout le monde#)#bonjour tout le monde# +#bonjour tout le monde#!group#bonjour tout le monde#bb#bonjour tout le monde#!end#bonjour tout le monde# +#bonjour tout le monde#!g#bonjour tout le monde#cc#bonjour tout le monde#!end#bonjour tout le monde# + +# conditional block +#bonjour tout le monde#!if#bonjour tout le monde#{ 45 }#bonjour tout le monde#$128#bonjour tout le monde#!end#bonjour tout le monde# + +# repetition block +#bonjour tout le monde#!repeat#bonjour tout le monde#3#bonjour tout le monde#55#bonjour tout le monde#!end#bonjour tout le monde# +#bonjour tout le monde#!r#bonjour tout le monde#3#bonjour tout le monde#77#bonjour tout le monde#!end#bonjour tout le monde# + +# macro definition block +#bonjour tout le monde#!macro#bonjour tout le monde#gang#bonjour tout le monde#(#bonjour tout le monde#meow#bonjour tout le monde#,#bonjour tout le monde#mix#bonjour tout le monde#)#bonjour tout le monde# + aa {meow:8} bb {mix:8} +#bonjour tout le monde#!end#bonjour tout le monde# + +# macro expansion +#bonjour tout le monde#m#bonjour tout le monde#:#bonjour tout le monde#gang#bonjour tout le monde#(#bonjour tout le monde#0x44#bonjour tout le monde#,#bonjour tout le monde#0x88#bonjour tout le monde#)#bonjour tout le monde# + +# post-item repetition +"salut"#bonjour tout le monde#*#bonjour tout le monde#4 +--- +aa bb +a3 +a4 +00 63 00 6f 00 75 00 63 00 6f 00 75 +aa bb +bb aa +ef be +ce c2 f1 05 +b2 bd 8e 7a +61 6c 6c 6f +6d 65 6f 77 20 6d 69 78 +00 +ff ff ff ff ff +cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc +00 +aa +bb +cc +80 +55 55 55 +77 77 77 +aa 44 bb 88 +73 61 6c 75 74 73 61 6c 75 74 73 61 6c 75 74 73 61 6c 75 74 diff --git a/tests/pass-comment-rep-post.nt b/tests/pass-comment-rep-post.nt new file mode 100644 index 0000000..3f34c92 --- /dev/null +++ b/tests/pass-comment-rep-post.nt @@ -0,0 +1,4 @@ +# repetition +ff# alors #*#bonjour tout le monde#5 +--- +ff ff ff ff ff diff --git a/tests/pass-comment-sym-rep-post.nt b/tests/pass-comment-sym-rep-post.nt deleted file mode 100644 index 98c94bc..0000000 --- a/tests/pass-comment-sym-rep-post.nt +++ /dev/null @@ -1,4 +0,0 @@ -# repetition -ff/\?&:;.,[]_=|-*/\?&:;.,[]_=|-5 ---- -ff ff ff ff ff -- 2.34.1