fix: always set the rseq offset and flags
[librseq.git] / src / rseq.c
index 346552c8ea39b3afddc869e68cb0832e32608531..af5c5de310b3b3f83a8b837c68ba30456ae06c2a 100644 (file)
@@ -1,21 +1,9 @@
-// SPDX-License-Identifier: LGPL-2.1-only
-/*
- * rseq.c
- *
- * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- */
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <errno.h>
 #include <sched.h>
 #include <stdio.h>
 #include <assert.h>
 #include <signal.h>
 #include <limits.h>
+#include <dlfcn.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/auxv.h>
+#include <linux/auxvec.h>
 
 #include <rseq/rseq.h>
 
-#define ARRAY_SIZE(arr)        (sizeof(arr) / sizeof((arr)[0]))
+#ifndef AT_RSEQ_FEATURE_SIZE
+# define AT_RSEQ_FEATURE_SIZE          27
+#endif
 
-__attribute__((weak)) __thread
-volatile struct rseq __rseq_abi = {
-       .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
-};
+#ifndef AT_RSEQ_ALIGN
+# define AT_RSEQ_ALIGN                 28
+#endif
+
+static __attribute__((constructor))
+void rseq_init(void);
+
+static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
+static int init_done;
+
+static const ptrdiff_t *libc_rseq_offset_p;
+static const unsigned int *libc_rseq_size_p;
+static const unsigned int *libc_rseq_flags_p;
+
+/* Offset from the thread pointer to the rseq area. */
+ptrdiff_t rseq_offset;
+
+/*
+ * Size of the registered rseq area. 0 if the registration was
+ * unsuccessful.
+ */
+unsigned int rseq_size = -1U;
+
+/* Flags used during rseq registration. */
+unsigned int rseq_flags;
+
+/*
+ * rseq feature size supported by the kernel. 0 if the registration was
+ * unsuccessful.
+ */
+unsigned int rseq_feature_size = -1U;
+
+static int rseq_ownership;
+static int rseq_reg_success;   /* At least one rseq registration has succeded. */
+
+/* Allocate a large area for the TLS. */
+#define RSEQ_THREAD_AREA_ALLOC_SIZE    1024
+
+/* Original struct rseq feature size is 20 bytes. */
+#define ORIG_RSEQ_FEATURE_SIZE         20
 
-__attribute__((weak)) __thread
-volatile uint32_t __rseq_refcount;
+/* Original struct rseq allocation size is 32 bytes. */
+#define ORIG_RSEQ_ALLOC_SIZE           32
 
-static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
+/*
+ * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the
+ * rseq_abi structure allocated size is at least
+ * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown
+ * kernel rseq extensions.
+ */
+static
+__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
+       .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+};
+
+static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
                    int flags, uint32_t sig)
 {
        return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
 }
 
-int rseq_available(void)
+static int sys_getcpu(unsigned *cpu, unsigned *node)
+{
+       return syscall(__NR_getcpu, cpu, node, NULL);
+}
+
+bool rseq_available(unsigned int query)
 {
        int rc;
 
-       rc = sys_rseq(NULL, 0, 0, 0);
-       if (rc != -1)
-               abort();
-       switch (errno) {
-       case ENOSYS:
-               return 0;
-       case EINVAL:
-               return 1;
+       switch (query) {
+       case RSEQ_AVAILABLE_QUERY_KERNEL:
+               rc = sys_rseq(NULL, 0, 0, 0);
+               if (rc != -1)
+                       abort();
+               switch (errno) {
+               case ENOSYS:
+                       break;
+               case EINVAL:
+                       return true;
+               default:
+                       abort();
+               }
+               break;
+       case RSEQ_AVAILABLE_QUERY_LIBC:
+               if (rseq_size && !rseq_ownership)
+                       return true;
+               break;
        default:
-               abort();
+               break;
        }
+       return false;
 }
 
-static void signal_off_save(sigset_t *oldset)
+int rseq_register_current_thread(void)
 {
-       sigset_t set;
-       int ret;
+       int rc;
 
-       sigfillset(&set);
-       ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
-       if (ret)
-               abort();
+       rseq_init();
+
+       if (!rseq_ownership) {
+               /* Treat libc's ownership as a successful registration. */
+               return 0;
+       }
+       rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
+       if (rc) {
+               if (RSEQ_READ_ONCE(rseq_reg_success)) {
+                       /* Incoherent success/failure within process. */
+                       abort();
+               }
+               return -1;
+       }
+       assert(rseq_current_cpu_raw() >= 0);
+       RSEQ_WRITE_ONCE(rseq_reg_success, 1);
+       return 0;
 }
 
-static void signal_restore(sigset_t oldset)
+int rseq_unregister_current_thread(void)
 {
-       int ret;
+       int rc;
 
-       ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
-       if (ret)
-               abort();
+       if (!rseq_ownership) {
+               /* Treat libc's ownership as a successful unregistration. */
+               return 0;
+       }
+       rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+       if (rc)
+               return -1;
+       return 0;
 }
 
-int rseq_register_current_thread(void)
+static
+unsigned int get_rseq_feature_size(void)
 {
-       int rc, ret = 0;
-       sigset_t oldset;
+       unsigned long auxv_rseq_feature_size, auxv_rseq_align;
 
-       signal_off_save(&oldset);
-       if (__rseq_refcount == UINT_MAX) {
-               ret = -1;
-               goto end;
-       }
-       if (__rseq_refcount++)
-               goto end;
-       rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
-       if (!rc) {
-               assert(rseq_current_cpu_raw() >= 0);
-               goto end;
-       }
-       if (errno != EBUSY)
-               __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
-       ret = -1;
-       __rseq_refcount--;
-end:
-       signal_restore(oldset);
-       return ret;
+       auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
+       assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+
+       auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
+       assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+       if (auxv_rseq_feature_size)
+               return auxv_rseq_feature_size;
+       else
+               return ORIG_RSEQ_FEATURE_SIZE;
 }
 
-int rseq_unregister_current_thread(void)
+/*
+ * Initialize the public symbols for the rseq offset, size, feature size and
+ * flags prior to registering threads. If glibc owns the registration, get the
+ * values from its public symbols.
+ */
+static
+void rseq_init(void)
 {
-       int rc, ret = 0;
-       sigset_t oldset;
+       /* Ensure initialization is only done once. */
+       if (RSEQ_READ_ONCE(init_done))
+               return;
+
+       /*
+        * Take the mutex, check the initialization flag again and atomically
+        * set it to ensure we are the only thread doing the initialization.
+        */
+       pthread_mutex_lock(&init_lock);
+       if (init_done)
+               goto unlock;
+       RSEQ_WRITE_ONCE(init_done, 1);
 
-       signal_off_save(&oldset);
-       if (!__rseq_refcount) {
-               ret = -1;
-               goto end;
+       /*
+        * Check for glibc rseq support, if the 3 public symbols are found and
+        * the rseq_size is not zero, glibc owns the registration.
+        */
+       libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+       libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+       libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+       if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+                       *libc_rseq_size_p != 0) {
+               /* rseq registration owned by glibc */
+               rseq_offset = *libc_rseq_offset_p;
+               rseq_size = *libc_rseq_size_p;
+               rseq_flags = *libc_rseq_flags_p;
+               rseq_feature_size = get_rseq_feature_size();
+
+               /*
+                * The registered rseq area could be smaller than the feature
+                * size reported by the kernel auxval. Cap it to the rseq size
+                * so we don't try to access features past the end of the rseq
+                * area.
+                */
+               if (rseq_feature_size > rseq_size)
+                       rseq_feature_size = rseq_size;
+               goto unlock;
+       }
+
+       /* librseq owns the registration */
+       rseq_ownership = 1;
+
+       /* Calculate the offset of the rseq area from the thread pointer. */
+       rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer();
+
+       /* rseq flags are deprecated, always set to 0. */
+       rseq_flags = 0;
+
+       /*
+        * Check if the rseq syscall is available, if not set the size and
+        * feature_size to 0.
+        */
+       if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
+               rseq_size = 0;
+               rseq_feature_size = 0;
+               goto unlock;
        }
-       if (--__rseq_refcount)
-               goto end;
-       rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
-                     RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
-       if (!rc)
-               goto end;
-       ret = -1;
-end:
-       signal_restore(oldset);
-       return ret;
+
+       /*
+        * If the feature size matches the original ABI (20), set the size to
+        * match the original ABI allocation (32), otherwise use the allocated
+        * size.
+        */
+       rseq_feature_size = get_rseq_feature_size();
+       if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
+               rseq_size = ORIG_RSEQ_ALLOC_SIZE;
+       else
+               rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
+unlock:
+       pthread_mutex_unlock(&init_lock);
+}
+
+static __attribute__((destructor))
+void rseq_exit(void)
+{
+       if (!rseq_ownership)
+               return;
+       rseq_offset = 0;
+       rseq_size = -1U;
+       rseq_feature_size = -1U;
+       rseq_ownership = 0;
 }
 
 int32_t rseq_fallback_current_cpu(void)
@@ -141,3 +275,16 @@ int32_t rseq_fallback_current_cpu(void)
        }
        return cpu;
 }
+
+int32_t rseq_fallback_current_node(void)
+{
+       uint32_t cpu_id, node_id;
+       int ret;
+
+       ret = sys_getcpu(&cpu_id, &node_id);
+       if (ret) {
+               perror("sys_getcpu()");
+               return ret;
+       }
+       return (int32_t) node_id;
+}
This page took 0.025534 seconds and 4 git commands to generate.