1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
21 #include <linux/auxvec.h>
23 #include <rseq/rseq.h>
26 #ifndef AT_RSEQ_FEATURE_SIZE
27 # define AT_RSEQ_FEATURE_SIZE 27
31 # define AT_RSEQ_ALIGN 28
34 static __attribute__((constructor
))
37 static pthread_mutex_t init_lock
= PTHREAD_MUTEX_INITIALIZER
;
40 static const ptrdiff_t *libc_rseq_offset_p
;
41 static const unsigned int *libc_rseq_size_p
;
42 static const unsigned int *libc_rseq_flags_p
;
44 /* Offset from the thread pointer to the rseq area. */
45 ptrdiff_t rseq_offset
;
48 * Size of the registered rseq area. 0 if the registration was
51 unsigned int rseq_size
= -1U;
53 /* Flags used during rseq registration. */
54 unsigned int rseq_flags
;
57 * rseq feature size supported by the kernel. 0 if the registration was
60 unsigned int rseq_feature_size
= -1U;
62 static int rseq_ownership
;
63 static int rseq_reg_success
; /* At least one rseq registration has succeded. */
65 /* Allocate a large area for the TLS. */
66 #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
68 /* Original struct rseq feature size is 20 bytes. */
69 #define ORIG_RSEQ_FEATURE_SIZE 20
71 /* Original struct rseq allocation size is 32 bytes. */
72 #define ORIG_RSEQ_ALLOC_SIZE 32
75 * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the
76 * rseq_abi structure allocated size is at least
77 * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown
78 * kernel rseq extensions.
81 __thread
struct rseq_abi __rseq_abi
__attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE
))) = {
82 .cpu_id
= RSEQ_ABI_CPU_ID_UNINITIALIZED
,
85 static int sys_rseq(struct rseq_abi
*rseq_abi
, uint32_t rseq_len
,
86 int flags
, uint32_t sig
)
88 return syscall(__NR_rseq
, rseq_abi
, rseq_len
, flags
, sig
);
91 static int sys_getcpu(unsigned *cpu
, unsigned *node
)
93 return syscall(__NR_getcpu
, cpu
, node
, NULL
);
96 bool rseq_available(unsigned int query
)
101 case RSEQ_AVAILABLE_QUERY_KERNEL
:
102 rc
= sys_rseq(NULL
, 0, 0, 0);
114 case RSEQ_AVAILABLE_QUERY_LIBC
:
115 if (rseq_size
&& !rseq_ownership
)
124 int rseq_register_current_thread(void)
130 if (!rseq_ownership
) {
131 /* Treat libc's ownership as a successful registration. */
134 rc
= sys_rseq(&__rseq_abi
, rseq_size
, 0, RSEQ_SIG
);
136 if (RSEQ_READ_ONCE(rseq_reg_success
)) {
137 /* Incoherent success/failure within process. */
142 assert(rseq_current_cpu_raw() >= 0);
143 RSEQ_WRITE_ONCE(rseq_reg_success
, 1);
147 int rseq_unregister_current_thread(void)
151 if (!rseq_ownership
) {
152 /* Treat libc's ownership as a successful unregistration. */
155 rc
= sys_rseq(&__rseq_abi
, rseq_size
, RSEQ_ABI_FLAG_UNREGISTER
, RSEQ_SIG
);
162 unsigned int get_rseq_feature_size(void)
164 unsigned long auxv_rseq_feature_size
, auxv_rseq_align
;
166 auxv_rseq_align
= getauxval(AT_RSEQ_ALIGN
);
167 assert(!auxv_rseq_align
|| auxv_rseq_align
<= RSEQ_THREAD_AREA_ALLOC_SIZE
);
169 auxv_rseq_feature_size
= getauxval(AT_RSEQ_FEATURE_SIZE
);
170 assert(!auxv_rseq_feature_size
|| auxv_rseq_feature_size
<= RSEQ_THREAD_AREA_ALLOC_SIZE
);
171 if (auxv_rseq_feature_size
)
172 return auxv_rseq_feature_size
;
174 return ORIG_RSEQ_FEATURE_SIZE
;
178 * Initialize the public symbols for the rseq offset, size, feature size and
179 * flags prior to registering threads. If glibc owns the registration, get the
180 * values from its public symbols.
185 /* Ensure initialization is only done once. */
186 if (RSEQ_READ_ONCE(init_done
))
190 * Take the mutex, check the initialization flag again and atomically
191 * set it to ensure we are the only thread doing the initialization.
193 pthread_mutex_lock(&init_lock
);
196 RSEQ_WRITE_ONCE(init_done
, 1);
199 * Check for glibc rseq support, if the 3 public symbols are found and
200 * the rseq_size is not zero, glibc owns the registration.
202 libc_rseq_offset_p
= dlsym(RTLD_NEXT
, "__rseq_offset");
203 libc_rseq_size_p
= dlsym(RTLD_NEXT
, "__rseq_size");
204 libc_rseq_flags_p
= dlsym(RTLD_NEXT
, "__rseq_flags");
205 if (libc_rseq_size_p
&& libc_rseq_offset_p
&& libc_rseq_flags_p
&&
206 *libc_rseq_size_p
!= 0) {
207 /* rseq registration owned by glibc */
208 rseq_offset
= *libc_rseq_offset_p
;
209 rseq_size
= *libc_rseq_size_p
;
210 rseq_flags
= *libc_rseq_flags_p
;
211 rseq_feature_size
= get_rseq_feature_size();
214 * The registered rseq area could be smaller than the feature
215 * size reported by the kernel auxval. Cap it to the rseq size
216 * so we don't try to access features past the end of the rseq
219 if (rseq_feature_size
> rseq_size
)
220 rseq_feature_size
= rseq_size
;
224 /* librseq owns the registration */
227 /* Calculate the offset of the rseq area from the thread pointer. */
228 rseq_offset
= (uintptr_t)&__rseq_abi
- (uintptr_t)rseq_thread_pointer();
230 /* rseq flags are deprecated, always set to 0. */
234 * Check if the rseq syscall is available, if not set the size and
237 if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL
)) {
239 rseq_feature_size
= 0;
244 * If the feature size matches the original ABI (20), set the size to
245 * match the original ABI allocation (32), otherwise use the allocated
248 rseq_feature_size
= get_rseq_feature_size();
249 if (rseq_feature_size
== ORIG_RSEQ_FEATURE_SIZE
)
250 rseq_size
= ORIG_RSEQ_ALLOC_SIZE
;
252 rseq_size
= RSEQ_THREAD_AREA_ALLOC_SIZE
;
254 pthread_mutex_unlock(&init_lock
);
257 static __attribute__((destructor
))
264 rseq_feature_size
= -1U;
268 int32_t rseq_fallback_current_cpu(void)
272 cpu
= sched_getcpu();
274 perror("sched_getcpu()");
280 int32_t rseq_fallback_current_node(void)
282 uint32_t cpu_id
, node_id
;
285 ret
= sys_getcpu(&cpu_id
, &node_id
);
287 perror("sys_getcpu()");
290 return (int32_t) node_id
;
293 int rseq_get_max_nr_cpus(void)
295 return get_possible_cpus_array_len();