To write a bit array field within a serialization function, you can use
memcpy() directly when all of the following conditions are satisfied:
* The field type's size is 8, 16, 32, or 64.
* The field type's alignment is a multiple of 8.
* The field type's byte order is the target byte order; always the case
since
4c91e76 ("config.py: remove bit array field type's byte order
property").
With recent compilers, using bt_bitfield_write_*() vs. memcpy() with
`-O2` gives the same result.
For example, consider this C code:
const uint8_t *data;
void with_bitfield(size_t offset, int val)
{
bt_bitfield_write_le(&data[offset], 0, sizeof(val) * 8, int, val);
}
void with_memcpy(size_t offset, int val)
{
memcpy(&data[offset], &val, sizeof(val));
}
On x86-64, this gets compiled to:
GCC 10.2:
with_bitfield:
mov rax, QWORD PTR data[rip]
mov DWORD PTR [rax+rdi], esi
ret
with_memcpy:
mov rax, QWORD PTR data[rip]
mov DWORD PTR [rax+rdi], esi
ret
Clang 10.0:
with_bitfield: # @with_bitfield
mov rax, qword ptr [rip + data]
mov dword ptr [rax + rdi], esi
ret
with_memcpy: # @with_memcpy
mov rax, qword ptr [rip + data]
mov dword ptr [rax + rdi], esi
ret
GCC 7.3:
with_bitfield:
add rdi, QWORD PTR data[rip]
mov eax, esi
mov BYTE PTR [rdi], sil
mov BYTE PTR [rdi+1], ah
sar esi, 24
sar eax, 16
mov BYTE PTR [rdi+3], sil
mov BYTE PTR [rdi+2], al
ret
with_memcpy:
mov rax, QWORD PTR data[rip]
mov DWORD PTR [rax+rdi], esi
ret
GCC 4.9.4:
with_bitfield:
add rdi, QWORD PTR data[rip]
mov eax, esi
sar eax, 8
mov BYTE PTR [rdi+1], al
mov eax, esi
mov BYTE PTR [rdi], sil
sar eax, 16
sar esi, 24
mov BYTE PTR [rdi+2], al
mov BYTE PTR [rdi+3], sil
ret
with_memcpy:
mov rax, QWORD PTR data[rip]
mov DWORD PTR [rax+rdi], esi
ret
Clang 3.0:
with_bitfield: # @with_bitfield
mov EAX, ESI
mov RCX, QWORD PTR [RIP + data]
mov BYTE PTR [RCX + RDI], AL
mov BYTE PTR [RCX + RDI + 1], AH # NOREX
mov EDX, EAX
shr EDX, 16
mov BYTE PTR [RCX + RDI + 2], DL
shr EAX, 24
mov BYTE PTR [RCX + RDI + 3], AL
ret
with_memcpy: # @with_memcpy
mov RAX, QWORD PTR [RIP + data]
mov DWORD PTR [RAX + RDI], ESI
ret
Because barectf targets embedded and bare metal environments, it's
possible that its user uses an older compiler. Therefore, this patch
changes `serialize-write-bit-array-statements.j2` to prefer memcpy()
when possible as it's very common for memcpy() to get replaced with a
compiler built-in to generate faster code when the copy size is static
(which is always the case for barectf bit array fields).
Signed-off-by: Philippe Proulx <eeppeliteloop@gmail.com>