X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=src%2Frseq.c;h=ae72eaa22f556b64a0d763eaa179e9e4960c0162;hb=c0de0012017aff7b1f310b9e34bac2d842d46a2b;hp=da6fcddd1870422dc466868a01a4f4b2ddffed56;hpb=c2dd1104d60f7488e2f9f316f544b405ea09ad80;p=librseq.git diff --git a/src/rseq.c b/src/rseq.c index da6fcdd..ae72eaa 100644 --- a/src/rseq.c +++ b/src/rseq.c @@ -1,19 +1,6 @@ -// SPDX-License-Identifier: LGPL-2.1-only -/* - * rseq.c - * - * Copyright (C) 2016 Mathieu Desnoyers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; only - * version 2.1 of the License. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - */ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2016 Mathieu Desnoyers + #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif @@ -27,108 +14,255 @@ #include #include #include +#include +#include +#include +#include +#include #include +#include "smp.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#ifndef AT_RSEQ_FEATURE_SIZE +# define AT_RSEQ_FEATURE_SIZE 27 +#endif -__thread -volatile struct rseq __rseq_abi = { - .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, -}; +#ifndef AT_RSEQ_ALIGN +# define AT_RSEQ_ALIGN 28 +#endif + +static __attribute__((constructor)) +void rseq_init(void); + +static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; +static int init_done; + +static const ptrdiff_t *libc_rseq_offset_p; +static const unsigned int *libc_rseq_size_p; +static const unsigned int *libc_rseq_flags_p; + +/* Offset from the thread pointer to the rseq area. */ +ptrdiff_t rseq_offset; + +/* + * Size of the registered rseq area. 0 if the registration was + * unsuccessful. + */ +unsigned int rseq_size = -1U; + +/* Flags used during rseq registration. */ +unsigned int rseq_flags; + +/* + * rseq feature size supported by the kernel. 0 if the registration was + * unsuccessful. + */ +unsigned int rseq_feature_size = -1U; + +static int rseq_ownership; +static int rseq_reg_success; /* At least one rseq registration has succeded. */ -__thread -volatile uint32_t __rseq_refcount; +/* Allocate a large area for the TLS. */ +#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 -static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, +/* Original struct rseq feature size is 20 bytes. */ +#define ORIG_RSEQ_FEATURE_SIZE 20 + +/* Original struct rseq allocation size is 32 bytes. */ +#define ORIG_RSEQ_ALLOC_SIZE 32 + +/* + * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the + * rseq_abi structure allocated size is at least + * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown + * kernel rseq extensions. + */ +static +__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, +}; + +static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) { return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } -int rseq_available(void) +static int sys_getcpu(unsigned *cpu, unsigned *node) +{ + return syscall(__NR_getcpu, cpu, node, NULL); +} + +bool rseq_available(unsigned int query) { int rc; - rc = sys_rseq(NULL, 0, 0, 0); - if (rc != -1) - abort(); - switch (errno) { - case ENOSYS: - return 0; - case EINVAL: - return 1; + switch (query) { + case RSEQ_AVAILABLE_QUERY_KERNEL: + rc = sys_rseq(NULL, 0, 0, 0); + if (rc != -1) + abort(); + switch (errno) { + case ENOSYS: + break; + case EINVAL: + return true; + default: + abort(); + } + break; + case RSEQ_AVAILABLE_QUERY_LIBC: + if (rseq_size && !rseq_ownership) + return true; + break; default: - abort(); + break; } + return false; } -static void signal_off_save(sigset_t *oldset) +int rseq_register_current_thread(void) { - sigset_t set; - int ret; + int rc; - sigfillset(&set); - ret = pthread_sigmask(SIG_BLOCK, &set, oldset); - if (ret) - abort(); + rseq_init(); + + if (!rseq_ownership) { + /* Treat libc's ownership as a successful registration. */ + return 0; + } + rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + if (rc) { + if (RSEQ_READ_ONCE(rseq_reg_success)) { + /* Incoherent success/failure within process. */ + abort(); + } + return -1; + } + assert(rseq_current_cpu_raw() >= 0); + RSEQ_WRITE_ONCE(rseq_reg_success, 1); + return 0; } -static void signal_restore(sigset_t oldset) +int rseq_unregister_current_thread(void) { - int ret; + int rc; - ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); - if (ret) - abort(); + if (!rseq_ownership) { + /* Treat libc's ownership as a successful unregistration. */ + return 0; + } + rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; } -int rseq_register_current_thread(void) +static +unsigned int get_rseq_feature_size(void) { - int rc, ret = 0; - sigset_t oldset; + unsigned long auxv_rseq_feature_size, auxv_rseq_align; - signal_off_save(&oldset); - if (__rseq_refcount == UINT_MAX) { - ret = -1; - goto end; - } - if (__rseq_refcount++) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); - if (!rc) { - assert(rseq_current_cpu_raw() >= 0); - goto end; - } - if (errno != EBUSY) - __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED; - ret = -1; - __rseq_refcount--; -end: - signal_restore(oldset); - return ret; + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; } -int rseq_unregister_current_thread(void) +/* + * Initialize the public symbols for the rseq offset, size, feature size and + * flags prior to registering threads. If glibc owns the registration, get the + * values from its public symbols. + */ +static +void rseq_init(void) { - int rc, ret = 0; - sigset_t oldset; + /* Ensure initialization is only done once. */ + if (RSEQ_READ_ONCE(init_done)) + return; + + /* + * Take the mutex, check the initialization flag again and atomically + * set it to ensure we are the only thread doing the initialization. + */ + pthread_mutex_lock(&init_lock); + if (init_done) + goto unlock; + RSEQ_WRITE_ONCE(init_done, 1); - signal_off_save(&oldset); - if (!__rseq_refcount) { - ret = -1; - goto end; + /* + * Check for glibc rseq support, if the 3 public symbols are found and + * the rseq_size is not zero, glibc owns the registration. + */ + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && + *libc_rseq_size_p != 0) { + /* rseq registration owned by glibc */ + rseq_offset = *libc_rseq_offset_p; + rseq_size = *libc_rseq_size_p; + rseq_flags = *libc_rseq_flags_p; + rseq_feature_size = get_rseq_feature_size(); + + /* + * The registered rseq area could be smaller than the feature + * size reported by the kernel auxval. Cap it to the rseq size + * so we don't try to access features past the end of the rseq + * area. + */ + if (rseq_feature_size > rseq_size) + rseq_feature_size = rseq_size; + goto unlock; } - if (--__rseq_refcount) - goto end; - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), - RSEQ_FLAG_UNREGISTER, RSEQ_SIG); - if (!rc) - goto end; - ret = -1; -end: - signal_restore(oldset); - return ret; + + /* librseq owns the registration */ + rseq_ownership = 1; + + /* Calculate the offset of the rseq area from the thread pointer. */ + rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer(); + + /* rseq flags are deprecated, always set to 0. */ + rseq_flags = 0; + + /* + * Check if the rseq syscall is available, if not set the size and + * feature_size to 0. + */ + if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) { + rseq_size = 0; + rseq_feature_size = 0; + goto unlock; + } + + /* + * If the feature size matches the original ABI (20), set the size to + * match the original ABI allocation (32), otherwise use the allocated + * size. + */ + rseq_feature_size = get_rseq_feature_size(); + if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + else + rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; +unlock: + pthread_mutex_unlock(&init_lock); +} + +static __attribute__((destructor)) +void rseq_exit(void) +{ + if (!rseq_ownership) + return; + rseq_offset = 0; + rseq_size = -1U; + rseq_feature_size = -1U; + rseq_ownership = 0; } int32_t rseq_fallback_current_cpu(void) @@ -142,3 +276,21 @@ int32_t rseq_fallback_current_cpu(void) } return cpu; } + +int32_t rseq_fallback_current_node(void) +{ + uint32_t cpu_id, node_id; + int ret; + + ret = sys_getcpu(&cpu_id, &node_id); + if (ret) { + perror("sys_getcpu()"); + return ret; + } + return (int32_t) node_id; +} + +int rseq_get_max_nr_cpus(void) +{ + return get_possible_cpus_array_len(); +}