-// SPDX-License-Identifier: LGPL-2.1-only
-/*
- * rseq.c
- *
- * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- */
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <signal.h>
#include <limits.h>
#include <dlfcn.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/auxv.h>
+#include <linux/auxvec.h>
#include <rseq/rseq.h>
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#ifndef AT_RSEQ_FEATURE_SIZE
+# define AT_RSEQ_FEATURE_SIZE 27
+#endif
+
+#ifndef AT_RSEQ_ALIGN
+# define AT_RSEQ_ALIGN 28
+#endif
+
+static __attribute__((constructor))
+void rseq_init(void);
+
+static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
+static int init_done;
-static const int *libc_rseq_offset_p;
+static const ptrdiff_t *libc_rseq_offset_p;
static const unsigned int *libc_rseq_size_p;
static const unsigned int *libc_rseq_flags_p;
-/* Offset from the thread pointer to the rseq area. */
-int rseq_offset;
+/* Offset from the thread pointer to the rseq area. */
+ptrdiff_t rseq_offset;
-/* Size of the registered rseq area. 0 if the registration was
- unsuccessful. */
+/*
+ * Size of the registered rseq area. 0 if the registration was
+ * unsuccessful.
+ */
unsigned int rseq_size = -1U;
-/* Flags used during rseq registration. */
+/* Flags used during rseq registration. */
unsigned int rseq_flags;
+/*
+ * rseq feature size supported by the kernel. 0 if the registration was
+ * unsuccessful.
+ */
+unsigned int rseq_feature_size = -1U;
+
static int rseq_ownership;
+static int rseq_reg_success; /* At least one rseq registration has succeded. */
+/* Allocate a large area for the TLS. */
+#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
+
+/* Original struct rseq feature size is 20 bytes. */
+#define ORIG_RSEQ_FEATURE_SIZE 20
+
+/* Original struct rseq allocation size is 32 bytes. */
+#define ORIG_RSEQ_ALLOC_SIZE 32
+
+/*
+ * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the
+ * rseq_abi structure allocated size is at least
+ * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown
+ * kernel rseq extensions.
+ */
static
-__thread struct rseq __rseq_abi __attribute__((tls_model("initial-exec"))) = {
- .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
+ .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
};
-static int sys_rseq(struct rseq *rseq_abi, uint32_t rseq_len,
+static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
int flags, uint32_t sig)
{
return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
}
-int rseq_available(void)
+static int sys_getcpu(unsigned *cpu, unsigned *node)
+{
+ return syscall(__NR_getcpu, cpu, node, NULL);
+}
+
+bool rseq_available(unsigned int query)
{
int rc;
- rc = sys_rseq(NULL, 0, 0, 0);
- if (rc != -1)
- abort();
- switch (errno) {
- case ENOSYS:
- return 0;
- case EINVAL:
- return 1;
+ switch (query) {
+ case RSEQ_AVAILABLE_QUERY_KERNEL:
+ rc = sys_rseq(NULL, 0, 0, 0);
+ if (rc != -1)
+ abort();
+ switch (errno) {
+ case ENOSYS:
+ break;
+ case EINVAL:
+ return true;
+ default:
+ abort();
+ }
+ break;
+ case RSEQ_AVAILABLE_QUERY_LIBC:
+ if (rseq_size && !rseq_ownership)
+ return true;
+ break;
default:
- abort();
+ break;
}
+ return false;
}
int rseq_register_current_thread(void)
{
int rc;
+ rseq_init();
+
if (!rseq_ownership) {
/* Treat libc's ownership as a successful registration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
- if (rc)
+ rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
+ if (rc) {
+ if (RSEQ_READ_ONCE(rseq_reg_success)) {
+ /* Incoherent success/failure within process. */
+ abort();
+ }
return -1;
+ }
assert(rseq_current_cpu_raw() >= 0);
+ RSEQ_WRITE_ONCE(rseq_reg_success, 1);
return 0;
}
/* Treat libc's ownership as a successful unregistration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+ rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
if (rc)
return -1;
return 0;
}
-static __attribute__((constructor))
+static
+unsigned int get_rseq_feature_size(void)
+{
+ unsigned long auxv_rseq_feature_size, auxv_rseq_align;
+
+ auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
+ assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+
+ auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
+ assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+ if (auxv_rseq_feature_size)
+ return auxv_rseq_feature_size;
+ else
+ return ORIG_RSEQ_FEATURE_SIZE;
+}
+
+/*
+ * Initialize the public symbols for the rseq offset, size, feature size and
+ * flags prior to registering threads. If glibc owns the registration, get the
+ * values from its public symbols.
+ */
+static
void rseq_init(void)
{
+ /* Ensure initialization is only done once. */
+ if (RSEQ_READ_ONCE(init_done))
+ return;
+
+ /*
+ * Take the mutex, check the initialization flag again and atomically
+ * set it to ensure we are the only thread doing the initialization.
+ */
+ pthread_mutex_lock(&init_lock);
+ if (init_done)
+ goto unlock;
+ RSEQ_WRITE_ONCE(init_done, 1);
+
+ /*
+ * Check for glibc rseq support, if the 3 public symbols are found and
+ * the rseq_size is not zero, glibc owns the registration.
+ */
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
- if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) {
+ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+ *libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
rseq_offset = *libc_rseq_offset_p;
rseq_size = *libc_rseq_size_p;
rseq_flags = *libc_rseq_flags_p;
- return;
+ rseq_feature_size = get_rseq_feature_size();
+
+ /*
+ * The registered rseq area could be smaller than the feature
+ * size reported by the kernel auxval. Cap it to the rseq size
+ * so we don't try to access features past the end of the rseq
+ * area.
+ */
+ if (rseq_feature_size > rseq_size)
+ rseq_feature_size = rseq_size;
+ goto unlock;
}
- if (!rseq_available())
- return;
+
+ /* librseq owns the registration */
rseq_ownership = 1;
- rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
- rseq_size = sizeof(struct rseq);
+
+ /* Calculate the offset of the rseq area from the thread pointer. */
+ rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer();
+
+ /* rseq flags are deprecated, always set to 0. */
rseq_flags = 0;
+
+ /*
+ * Check if the rseq syscall is available, if not set the size and
+ * feature_size to 0.
+ */
+ if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
+ rseq_size = 0;
+ rseq_feature_size = 0;
+ goto unlock;
+ }
+
+ /*
+ * If the feature size matches the original ABI (20), set the size to
+ * match the original ABI allocation (32), otherwise use the allocated
+ * size.
+ */
+ rseq_feature_size = get_rseq_feature_size();
+ if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
+ rseq_size = ORIG_RSEQ_ALLOC_SIZE;
+ else
+ rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
+unlock:
+ pthread_mutex_unlock(&init_lock);
}
static __attribute__((destructor))
return;
rseq_offset = 0;
rseq_size = -1U;
+ rseq_feature_size = -1U;
rseq_ownership = 0;
}
}
return cpu;
}
+
+int32_t rseq_fallback_current_node(void)
+{
+ uint32_t cpu_id, node_id;
+ int ret;
+
+ ret = sys_getcpu(&cpu_id, &node_id);
+ if (ret) {
+ perror("sys_getcpu()");
+ return ret;
+ }
+ return (int32_t) node_id;
+}