From 6dd69a2ad08c21939a8756468e859ce423094712 Mon Sep 17 00:00:00 2001 From: Philippe Proulx Date: Fri, 6 Oct 2023 16:52:52 -0400 Subject: [PATCH] Make it possible to specify more that one byte with `%` This patch makes it possible to specify more than one consecutive bytes in binary by using two or more `%` prefixes. The number of `%` indicates the number of subsequent bytes. This makes it possible to put a 32-bit binary constant, for example, without any delimiter. Instead of: %11011101 %11110010 %01001101 %11101101 you may write: %%%%11011101111100100100110111101101 I don't have a choice to use something (number of `%` here) to indicate the number of bytes after that because this won't work: %11011101111100100100110111101101 This last example is equivalent to: %11011101 11 11 00 10 01 00 11 01 11 10 11 01 which really is 13 bytes. This patch also fixes `normand.py` for Python 3.4. Change-Id: I4945c9fc5925ab7a32c9015c41b6593db893cdd3 Signed-off-by: Philippe Proulx --- README.adoc | 25 +++++++++++++------ normand/normand.py | 39 +++++++++++++++++++++--------- pyproject.toml | 2 +- tests/pass-bin-const-multi.nt | 9 +++++++ tests/pass-readme-learn-const-4.nt | 2 ++ 5 files changed, 57 insertions(+), 20 deletions(-) create mode 100644 tests/pass-bin-const-multi.nt diff --git a/README.adoc b/README.adoc index e7c85d6..e2c9154 100644 --- a/README.adoc +++ b/README.adoc @@ -29,7 +29,7 @@ _**Normand**_ is a text-to-binary processor with its own language. This package offers both a portable {py3} module and a command-line tool. -WARNING: This version of Normand is 0.15, meaning both the Normand +WARNING: This version of Normand is 0.16, meaning both the Normand language and the module/CLI interface aren't stable. ifdef::env-github[] @@ -507,7 +507,8 @@ the current byte order. The available items are: -* A <> representing a single byte. +* A <> representing one or more + constant bytes. * A <> representing a sequence of bytes encoding UTF-8, UTF-16, or UTF-32 data. @@ -614,18 +615,25 @@ where `file` is the name of a file containing the Normand input. === Byte constant -A _byte constant_ represents a single byte. +A _byte constant_ represents one or more constant bytes. A byte constant is: Hexadecimal form:: - Two consecutive hexadecimal digits. + Two consecutive hexadecimal digits representing a single byte. Decimal form:: - One or more digits after the `$` prefix. + One or more digits after the `$` prefix representing a single byte. -Binary form:: - Eight bits after the `%` prefix. +Binary form:: {empty} ++ +-- +. __**N**__ `%` prefixes (at least one). ++ +The number of `%` characters is the number of subsequent expected bytes. + +. __**N**__{nbsp}×{nbsp}8 bits (`0` or `1`). +-- ==== Input: @@ -676,12 +684,13 @@ Input: ---- %01110011 %01100001 %01101100 %01110101 %01110100 +%%%1101:0010 11111111 #A#11 #B#00 #C#011 #D#1 ---- Output: ---- -73 61 6c 75 74 ┆ salut +73 61 6c 75 74 d2 ff c7 ┆ salut••• ---- ==== diff --git a/normand/normand.py b/normand/normand.py index 4699a28..2a2acfa 100644 --- a/normand/normand.py +++ b/normand/normand.py @@ -30,7 +30,7 @@ # Upstream repository: . __author__ = "Philippe Proulx" -__version__ = "0.15.0" +__version__ = "0.16.0" __all__ = [ "__author__", "__version__", @@ -727,27 +727,43 @@ class _Parser: # Patterns for _try_parse_bin_byte() _bin_byte_bit_pat = re.compile(r"[01]") - _bin_byte_prefix_pat = re.compile(r"%") + _bin_byte_prefix_pat = re.compile(r"%+") # Tries to parse a binary byte, returning a byte item on success. def _try_parse_bin_byte(self): begin_text_loc = self._text_loc # Match prefix - if self._try_parse_pat(self._bin_byte_prefix_pat) is None: + m = self._try_parse_pat(self._bin_byte_prefix_pat) + + if m is None: # No match return - # Expect eight bits - bits = [] # type: List[str] + # Expect as many bytes as there are `%` prefixes + items = [] # type: List[_Item] - for _ in range(8): + for _ in range(len(m.group(0))): self._skip_ws_and_comments() - m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)") - bits.append(m.group(0)) + byte_text_loc = self._text_loc + bits = [] # type: List[str] + + # Expect eight bits + for _ in range(8): + self._skip_ws_and_comments() + m = self._expect_pat( + self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)" + ) + bits.append(m.group(0)) + + items.append(_Byte(int("".join(bits), 2), byte_text_loc)) # Return item - return _Byte(int("".join(bits), 2), begin_text_loc) + if len(items) == 1: + return items[0] + + # As group + return _Group(items, begin_text_loc) # Patterns for _try_parse_dec_byte() _dec_byte_prefix_pat = re.compile(r"\$") @@ -1548,7 +1564,8 @@ class _Parser: param_text_loc = self._text_loc params.append( _MacroExpParam( - *self._expect_const_int_name_expr(True, True), param_text_loc + *self._expect_const_int_name_expr(True, True), + text_loc=param_text_loc ) ) expect_comma = True @@ -1664,7 +1681,7 @@ class _Parser: rep_ret = self._try_parse_rep_post() if rep_ret is not None: - item = _Rep(item, *rep_ret, rep_text_loc) + item = _Rep(item, *rep_ret, text_loc=rep_text_loc) items.append(item) return True diff --git a/pyproject.toml b/pyproject.toml index 03dd81e..590e9c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ [tool.poetry] name = 'normand' -version = '0.15.0' +version = '0.16.0' description = 'Text-to-binary processor with its own language' license = 'MIT' authors = ['Philippe Proulx '] diff --git a/tests/pass-bin-const-multi.nt b/tests/pass-bin-const-multi.nt new file mode 100644 index 0000000..5139c8d --- /dev/null +++ b/tests/pass-bin-const-multi.nt @@ -0,0 +1,9 @@ +%11001010 +%%11110000 11001010 +%%%00110101 11110000 11001010 +%%%%10101010 00110101 11110000 11001010 +--- +ca +f0 ca +35 f0 ca +aa 35 f0 ca diff --git a/tests/pass-readme-learn-const-4.nt b/tests/pass-readme-learn-const-4.nt index 0eefc9d..743096e 100644 --- a/tests/pass-readme-learn-const-4.nt +++ b/tests/pass-readme-learn-const-4.nt @@ -1,3 +1,5 @@ %01110011 %01100001 %01101100 %01110101 %01110100 +%%%1101:0010 11111111 #A#11 #B#00 #C#011 #D#1 --- 73 61 6c 75 74 +d2 ff c7 -- 2.34.1