From 56996d34182f43472cf5c33ec7907960b5c77f39 Mon Sep 17 00:00:00 2001
From: Philippe Proulx <eeppeliteloop@gmail.com>
Date: Wed, 11 Oct 2023 14:23:52 -0400
Subject: [PATCH] Add fixed-length number byte order override

This patch makes it possible to specify an immediate byte order (suffix
of encoding length) to encode a fixed-length number, overriding the
current byte order without changing it:

    !be
    11 22 33 44
    [0xaabbccdd : 32]
    [0xaabbccdd : 32le]
    [0xaabbccdd : 32]
    ff ff ff ff

Result:

    11 22 33 44
    aa bb cc dd
    dd cc bb aa
    aa bb cc dd
    ff ff ff ff

Change-Id: I3d0c9cb3f0f30ef2f74980bf9c63a93be3bdca64
Signed-off-by: Philippe Proulx <eeppeliteloop@gmail.com>
Reviewed-on: https://review.lttng.org/c/normand/+/11040
Tested-by: jenkins <jenkins@lttng.org>
---
 README.adoc                               | 38 +++++++----
 normand/normand.py                        | 82 ++++++++++++++++++-----
 tests/fail-fl-num-inval-len.nt            |  2 +-
 tests/fail-fl-num-missing-len.nt          |  2 +-
 tests/fail-str-post-missing-enc.nt        |  2 +-
 tests/fail-str-post-missing-gen-prefix.nt |  2 +-
 tests/pass-fl-float-bo-oride-be.nt        | 12 ++++
 tests/pass-fl-float-bo-oride-le.nt        | 12 ++++
 tests/pass-fl-float-bo-oride-no-cur-bo.nt |  9 +++
 tests/pass-fl-int-bo-oride-be.nt          | 12 ++++
 tests/pass-fl-int-bo-oride-le.nt          | 12 ++++
 tests/pass-fl-int-bo-oride-no-cur-bo.nt   |  9 +++
 tests/pass-readme-intro-fl-num.nt         |  6 +-
 tests/pass-readme-learn-fl-num-1.nt       |  4 +-
 tests/pass-readme-learn-fl-num-4.nt       |  3 +-
 15 files changed, 166 insertions(+), 41 deletions(-)
 create mode 100644 tests/pass-fl-float-bo-oride-be.nt
 create mode 100644 tests/pass-fl-float-bo-oride-le.nt
 create mode 100644 tests/pass-fl-float-bo-oride-no-cur-bo.nt
 create mode 100644 tests/pass-fl-int-bo-oride-be.nt
 create mode 100644 tests/pass-fl-int-bo-oride-le.nt
 create mode 100644 tests/pass-fl-int-bo-oride-no-cur-bo.nt
diff --git a/README.adoc b/README.adoc
index 035e273..278b54b 100644
--- a/README.adoc
+++ b/README.adoc
@@ -143,14 +143,14 @@ Input:
 {strength = 4}
 !be 67 <lbl> 44 $178 [(end - lbl) * 8 + strength : 16] $99 <end>
 !le [-1993 : 32]
-[-3.141593 : 64]
+[-3.141593 : 64be]
 ----
 +
 Output:
 +
 ----
-67 44 b2 00 2c 63 37 f8  ff ff 7f bd c2 82 fb 21
-09 c0
+67 44 b2 00 2c 63 37 f8  ff ff c0 09 21 fb 82 c2
+bd 7f
 ----
 +
 The encoded number is the evaluation of a valid {py3} expression which
@@ -511,7 +511,7 @@ padding bytes to make the current offset satisfy a given alignment.
 
 |[[cur-bo]] Current byte order
 |
-The current byte order has an effect on the encoding of
+The current byte order can have an effect on the encoding of
 <<fixed-length-number,fixed-length numbers>>.
 
 A <<current-byte-order-setting,current byte order setting>> may change
@@ -542,8 +542,8 @@ The available items are:
   little endian).
 
 * A <<fixed-length-number,fixed-length number>> (integer or
-  floating point) using the <<cur-bo,current byte order>> and of which
-  the value is the result of a {py3} expression.
+  floating point), possibly using the <<cur-bo,current byte order>>, and
+  of which the value is the result of a {py3} expression.
 
 * An <<leb128-integer,LEB128 integer>> of which the value is the result
   of a {py3} expression.
@@ -903,8 +903,10 @@ The available lengths are 8, 16, 24, 32, 40, 48, 56, and 64.
 +
 The available length are 32 (_binary32_) and 64 (_binary64_).
 
-The value is the result of evaluating a {py3} expression using the
-<<cur-bo,current byte order>>.
+The value is the result of evaluating a {py3} expression.
+
+The byte order to use to encode the value is either directly specified
+or is the <<cur-bo,current byte order>>.
 
 A fixed-length number is:
 
@@ -936,14 +938,27 @@ The expression evaluates to a `float` value::
     `32` and `64`.
 --
 
+. **Optional**: a suffix of the previous encoding length, without
+  any whitespace, amongst:
++
+--
+[horizontal]
+`be`:: Encode in big endian.
+`le`:: Encode in little endian.
+--
++
+Without this suffix, the encoding byte order is the <<cur-bo,current
+byte order>> which must be defined if the encoding length is greater
+than eight.
+
 . The `]` suffix.
 
 ====
 Input:
 
 ----
-!le [345:16]
-!be [-0xabcd:32]
+[345:16le]
+[-0xabcd:32be]
 ----
 
 Output:
@@ -993,8 +1008,7 @@ Output:
 Input:
 
 ----
-!le
-[2 * 0.0529 : 32]
+[2 * 0.0529 : 32le]
 ----
 
 Output:
diff --git a/normand/normand.py b/normand/normand.py
index 49188b4..bded6c9 100644
--- a/normand/normand.py
+++ b/normand/normand.py
@@ -296,26 +296,38 @@ class _VarAssign(_Item, _ExprMixin):
 # Fixed-length number, possibly needing more than one byte.
 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
     def __init__(
-        self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
+        self,
+        expr_str: str,
+        expr: ast.Expression,
+        len: int,
+        bo: Optional[ByteOrder],
+        text_loc: TextLocation,
     ):
         super().__init__(text_loc)
         _ExprMixin.__init__(self, expr_str, expr)
         self._len = len
+        self._bo = bo
 
     # Length (bits).
     @property
     def len(self):
         return self._len
 
+    # Byte order override.
+    @property
+    def bo(self):
+        return self._bo
+
     @property
     def size(self):
         return self._len // 8
 
     def __repr__(self):
-        return "_FlNum({}, {}, {}, {})".format(
+        return "_FlNum({}, {}, {}, {}, {})".format(
             repr(self._expr_str),
             repr(self._expr),
             repr(self._len),
+            repr(self._bo),
             repr(self._text_loc),
         )
 
@@ -1155,16 +1167,26 @@ class _Parser:
 
         return expr_str, expr
 
+    # Returns a `ByteOrder` value from the _valid_ byte order string
+    # `bo_str`.
+    @staticmethod
+    def _bo_from_str(bo_str: str):
+        return {
+            "be": ByteOrder.BE,
+            "le": ByteOrder.LE,
+        }[bo_str]
+
     # Patterns for _try_parse_val()
     _val_prefix_pat = re.compile(r"\[")
     _val_expr_pat = re.compile(r"([^\]:]+):")
-    _fl_num_len_fmt_pat = re.compile(r"8|16|24|32|40|48|56|64")
+    _fl_num_len_fmt_pat = re.compile(r"(?P<len>8|16|24|32|40|48|56|64)(?P<bo>[bl]e)?")
     _leb128_int_fmt_pat = re.compile(r"(u|s)leb128")
     _val_suffix_pat = re.compile(r"]")
 
     # Tries to parse a value (number or string) and format (fixed length
-    # in bits, `uleb128`, `sleb128`, or `s:` followed with an encoding
-    # name), returning an item on success.
+    # in bits and optional byte order override, `uleb128`, `sleb128`, or
+    # `s:` followed with an encoding name), returning an item on
+    # success.
     def _try_parse_val(self):
         # Match prefix
         if self._try_parse_pat(self._val_prefix_pat) is None:
@@ -1184,11 +1206,18 @@ class _Parser:
         m_fmt = self._try_parse_pat(self._fl_num_len_fmt_pat)
 
         if m_fmt is not None:
+            # Byte order override
+            if m_fmt.group("bo") is None:
+                bo = None
+            else:
+                bo = self._bo_from_str(m_fmt.group("bo"))
+
             # Create fixed-length number item
             item = _FlNum(
                 expr_str,
                 expr,
-                int(m_fmt.group(0)),
+                int(m_fmt.group("len")),
+                bo,
                 expr_text_loc,
             )
         else:
@@ -1209,7 +1238,7 @@ class _Parser:
                 else:
                     # At this point it's invalid
                     self._raise_error(
-                        "Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
+                        "Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)"
                     )
 
         # Expect `]`
@@ -2351,10 +2380,19 @@ class _Gen:
             item, state, accept_float=True, accept_str=True
         )
 
+    # Returns the effective byte order to use to encode the fixed-length
+    # number `item` considering the current state `state`.
+    @staticmethod
+    def _fl_num_item_effective_bo(item: _FlNum, state: _GenState):
+        return state.bo if item.bo is None else item.bo
+
     # Handles the fixed-length number item `item`.
     def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
+        # Effective byte order
+        bo = self._fl_num_item_effective_bo(item, state)
+
         # Validate current byte order
-        if state.bo is None and item.len > 8:
+        if bo is None and item.len > 8:
             _raise_error_for_item(
                 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
                     item.expr_str
@@ -2620,8 +2658,10 @@ class _Gen:
         return self._item_handlers[type(item)](item, state)
 
     # Generates the data for a fixed-length integer item instance having
-    # the value `val` and returns it.
-    def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
+    # the value `val` and the effective byte order `bo` and returns it.
+    def _gen_fl_int_item_inst_data(
+        self, val: int, bo: Optional[ByteOrder], item: _FlNum
+    ):
         # Validate range
         if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
             _raise_error_for_item(
@@ -2635,7 +2675,7 @@ class _Gen:
         # value of `item.len`).
         data = struct.pack(
             "{}{}".format(
-                ">" if state.bo in (None, ByteOrder.BE) else "<",
+                ">" if bo in (None, ByteOrder.BE) else "<",
                 "Q" if val >= 0 else "q",
             ),
             val,
@@ -2644,20 +2684,23 @@ class _Gen:
         # Keep only the requested length
         len_bytes = item.len // 8
 
-        if state.bo in (None, ByteOrder.BE):
+        if bo in (None, ByteOrder.BE):
             # Big endian: keep last bytes
             data = data[-len_bytes:]
         else:
             # Little endian: keep first bytes
-            assert state.bo == ByteOrder.LE
+            assert bo == ByteOrder.LE
             data = data[:len_bytes]
 
         # Return data
         return data
 
     # Generates the data for a fixed-length floating point number item
-    # instance having the value `val` and returns it.
-    def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
+    # instance having the value `val` and the effective byte order `bo`
+    # and returns it.
+    def _gen_fl_float_item_inst_data(
+        self, val: float, bo: Optional[ByteOrder], item: _FlNum
+    ):
         # Validate length
         if item.len not in (32, 64):
             _raise_error_for_item(
@@ -2670,7 +2713,7 @@ class _Gen:
         # Encode and return result
         return struct.pack(
             "{}{}".format(
-                ">" if state.bo in (None, ByteOrder.BE) else "<",
+                ">" if bo in (None, ByteOrder.BE) else "<",
                 "f" if item.len == 32 else "d",
             ),
             val,
@@ -2679,15 +2722,18 @@ class _Gen:
     # Generates the data for a fixed-length number item instance and
     # returns it.
     def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
+        # Effective byte order
+        bo = self._fl_num_item_effective_bo(item, state)
+
         # Compute value
         val = self._eval_item_expr(item, state, True)
 
         # Handle depending on type
         if type(val) is int:
-            return self._gen_fl_int_item_inst_data(val, item, state)
+            return self._gen_fl_int_item_inst_data(val, bo, item)
         else:
             assert type(val) is float
-            return self._gen_fl_float_item_inst_data(val, item, state)
+            return self._gen_fl_float_item_inst_data(val, bo, item)
 
     # Generates the data for all the fixed-length number item instances
     # and writes it at the correct offset within `self._data`.
diff --git a/tests/fail-fl-num-inval-len.nt b/tests/fail-fl-num-inval-len.nt
index 0b42cda..031e382 100644
--- a/tests/fail-fl-num-inval-len.nt
+++ b/tests/fail-fl-num-inval-len.nt
@@ -1,3 +1,3 @@
 [ 23 : 17 ]
 ---
-1:8 - Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
+1:8 - Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
diff --git a/tests/fail-fl-num-missing-len.nt b/tests/fail-fl-num-missing-len.nt
index e70f99e..db5b5e8 100644
--- a/tests/fail-fl-num-missing-len.nt
+++ b/tests/fail-fl-num-missing-len.nt
@@ -1,3 +1,3 @@
 [ 23 :  ]
 ---
-1:9 - Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
+1:9 - Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
diff --git a/tests/fail-str-post-missing-enc.nt b/tests/fail-str-post-missing-enc.nt
index c850818..e4c2d7b 100644
--- a/tests/fail-str-post-missing-enc.nt
+++ b/tests/fail-str-post-missing-enc.nt
@@ -1,3 +1,3 @@
 ["a string" : ]
 ---
-1:15 - Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
+1:15 - Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
diff --git a/tests/fail-str-post-missing-gen-prefix.nt b/tests/fail-str-post-missing-gen-prefix.nt
index c11dd51..d809ff7 100644
--- a/tests/fail-str-post-missing-gen-prefix.nt
+++ b/tests/fail-str-post-missing-gen-prefix.nt
@@ -1,3 +1,3 @@
 ["a string" : latin1]
 ---
-1:15 - Expecting a fixed length (multiple of eight bits), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
+1:15 - Expecting a fixed length (multiple of eight bits and optional `be` or `le`), `uleb128`, `sleb128`, or `s:` followed with a valid encoding (`u8`, `u16be`, `u16le`, `u32be`, `u32le`, or `latin1` to `latin10`)
diff --git a/tests/pass-fl-float-bo-oride-be.nt b/tests/pass-fl-float-bo-oride-be.nt
new file mode 100644
index 0000000..4ad7758
--- /dev/null
+++ b/tests/pass-fl-float-bo-oride-be.nt
@@ -0,0 +1,12 @@
+!le
+11 22 33 44
+[45.23 : 32]
+[45.23 : 32be]
+[45.23 : 32]
+ff ff ff ff
+---
+11 22 33 44
+85 eb 34 42
+42 34 eb 85
+85 eb 34 42
+ff ff ff ff
diff --git a/tests/pass-fl-float-bo-oride-le.nt b/tests/pass-fl-float-bo-oride-le.nt
new file mode 100644
index 0000000..f7cf1a3
--- /dev/null
+++ b/tests/pass-fl-float-bo-oride-le.nt
@@ -0,0 +1,12 @@
+!be
+11 22 33 44
+[45.23 : 32]
+[45.23 : 32le]
+[45.23 : 32]
+ff ff ff ff
+---
+11 22 33 44
+42 34 eb 85
+85 eb 34 42
+42 34 eb 85
+ff ff ff ff
diff --git a/tests/pass-fl-float-bo-oride-no-cur-bo.nt b/tests/pass-fl-float-bo-oride-no-cur-bo.nt
new file mode 100644
index 0000000..214b6ec
--- /dev/null
+++ b/tests/pass-fl-float-bo-oride-no-cur-bo.nt
@@ -0,0 +1,9 @@
+11 22 33 44
+[45.23 : 32be]
+[45.23 : 32le]
+ff ff ff ff
+---
+11 22 33 44
+42 34 eb 85
+85 eb 34 42
+ff ff ff ff
diff --git a/tests/pass-fl-int-bo-oride-be.nt b/tests/pass-fl-int-bo-oride-be.nt
new file mode 100644
index 0000000..3e8cda3
--- /dev/null
+++ b/tests/pass-fl-int-bo-oride-be.nt
@@ -0,0 +1,12 @@
+!le
+11 22 33 44
+[0xaabbccdd : 32]
+[0xaabbccdd : 32be]
+[0xaabbccdd : 32]
+ff ff ff ff
+---
+11 22 33 44
+dd cc bb aa
+aa bb cc dd
+dd cc bb aa
+ff ff ff ff
diff --git a/tests/pass-fl-int-bo-oride-le.nt b/tests/pass-fl-int-bo-oride-le.nt
new file mode 100644
index 0000000..e817da7
--- /dev/null
+++ b/tests/pass-fl-int-bo-oride-le.nt
@@ -0,0 +1,12 @@
+!be
+11 22 33 44
+[0xaabbccdd : 32]
+[0xaabbccdd : 32le]
+[0xaabbccdd : 32]
+ff ff ff ff
+---
+11 22 33 44
+aa bb cc dd
+dd cc bb aa
+aa bb cc dd
+ff ff ff ff
diff --git a/tests/pass-fl-int-bo-oride-no-cur-bo.nt b/tests/pass-fl-int-bo-oride-no-cur-bo.nt
new file mode 100644
index 0000000..14167a4
--- /dev/null
+++ b/tests/pass-fl-int-bo-oride-no-cur-bo.nt
@@ -0,0 +1,9 @@
+11 22 33 44
+[0xaabbccdd : 32be]
+[0xaabbccdd : 32le]
+ff ff ff ff
+---
+11 22 33 44
+aa bb cc dd
+dd cc bb aa
+ff ff ff ff
diff --git a/tests/pass-readme-intro-fl-num.nt b/tests/pass-readme-intro-fl-num.nt
index d0a6b10..119c48f 100644
--- a/tests/pass-readme-intro-fl-num.nt
+++ b/tests/pass-readme-intro-fl-num.nt
@@ -1,7 +1,7 @@
 {strength = 4}
 !be 67 <lbl> 44 $178 [(end - lbl) * 8 + strength : 16] $99 <end>
 !le [-1993 : 32]
-[-3.141593 : 64]
+[-3.141593 : 64be]
 ---
-67 44 b2 00 2c 63 37 f8 ff ff 7f bd c2 82 fb 21
-09 c0
+67 44 b2 00 2c 63 37 f8 ff ff c0 09 21 fb 82 c2
+bd 7f
diff --git a/tests/pass-readme-learn-fl-num-1.nt b/tests/pass-readme-learn-fl-num-1.nt
index 48965e6..30f1fdc 100644
--- a/tests/pass-readme-learn-fl-num-1.nt
+++ b/tests/pass-readme-learn-fl-num-1.nt
@@ -1,4 +1,4 @@
-!le [345:16]
-!be [-0xabcd:32]
+[345:16le]
+[-0xabcd:32be]
 ---
 59 01 ff ff 54 33
diff --git a/tests/pass-readme-learn-fl-num-4.nt b/tests/pass-readme-learn-fl-num-4.nt
index c31ac6b..b051cea 100644
--- a/tests/pass-readme-learn-fl-num-4.nt
+++ b/tests/pass-readme-learn-fl-num-4.nt
@@ -1,4 +1,3 @@
-!le
-[2 * 0.0529 : 32]
+[2 * 0.0529 : 32le]
 ---
 ac ad d8 3d
-- 
2.34.1