Move robust pool free list to own memory area
[librseq.git] / src / rseq.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530
MJ
2// SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
2cbca301 4#ifndef _GNU_SOURCE
784b0012 5#define _GNU_SOURCE
2cbca301 6#endif
784b0012
MD
7#include <errno.h>
8#include <sched.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <syscall.h>
14#include <assert.h>
15#include <signal.h>
0ceae74a 16#include <limits.h>
9698c399 17#include <dlfcn.h>
170f840b 18#include <stddef.h>
df014a66 19#include <stdint.h>
baa98a34
MD
20#include <sys/auxv.h>
21#include <linux/auxvec.h>
784b0012
MD
22
23#include <rseq/rseq.h>
47c725dd 24#include "smp.h"
784b0012 25
baa98a34
MD
26#ifndef AT_RSEQ_FEATURE_SIZE
27# define AT_RSEQ_FEATURE_SIZE 27
28#endif
29
30#ifndef AT_RSEQ_ALIGN
31# define AT_RSEQ_ALIGN 28
32#endif
33
540263e4
MD
34static __attribute__((constructor))
35void rseq_init(void);
36
37static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
38static int init_done;
39
170f840b 40static const ptrdiff_t *libc_rseq_offset_p;
9698c399
MD
41static const unsigned int *libc_rseq_size_p;
42static const unsigned int *libc_rseq_flags_p;
43
baa98a34 44/* Offset from the thread pointer to the rseq area. */
170f840b 45ptrdiff_t rseq_offset;
9698c399 46
baa98a34
MD
47/*
48 * Size of the registered rseq area. 0 if the registration was
49 * unsuccessful.
50 */
9698c399
MD
51unsigned int rseq_size = -1U;
52
baa98a34 53/* Flags used during rseq registration. */
9698c399
MD
54unsigned int rseq_flags;
55
baa98a34
MD
56/*
57 * rseq feature size supported by the kernel. 0 if the registration was
58 * unsuccessful.
59 */
60unsigned int rseq_feature_size = -1U;
61
9698c399 62static int rseq_ownership;
baa98a34
MD
63static int rseq_reg_success; /* At least one rseq registration has succeded. */
64
65/* Allocate a large area for the TLS. */
66#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
67
68/* Original struct rseq feature size is 20 bytes. */
69#define ORIG_RSEQ_FEATURE_SIZE 20
70
71/* Original struct rseq allocation size is 32 bytes. */
72#define ORIG_RSEQ_ALLOC_SIZE 32
9698c399 73
470c530b
MD
74/*
75 * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the
76 * rseq_abi structure allocated size is at least
77 * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown
78 * kernel rseq extensions.
79 */
9698c399 80static
baa98a34 81__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
2d533093 82 .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
784b0012
MD
83};
84
2d533093 85static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
52e82b87
MD
86 int flags, uint32_t sig)
87{
88 return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
89}
90
baa98a34
MD
91static int sys_getcpu(unsigned *cpu, unsigned *node)
92{
93 return syscall(__NR_getcpu, cpu, node, NULL);
94}
95
8b34114a 96bool rseq_available(unsigned int query)
52e82b87
MD
97{
98 int rc;
99
8b34114a
MD
100 switch (query) {
101 case RSEQ_AVAILABLE_QUERY_KERNEL:
102 rc = sys_rseq(NULL, 0, 0, 0);
103 if (rc != -1)
104 abort();
105 switch (errno) {
106 case ENOSYS:
8b34114a
MD
107 break;
108 case EINVAL:
109 return true;
baa98a34
MD
110 default:
111 abort();
8b34114a
MD
112 }
113 break;
114 case RSEQ_AVAILABLE_QUERY_LIBC:
115 if (rseq_size && !rseq_ownership)
116 return true;
117 break;
52e82b87 118 default:
8b34114a 119 break;
52e82b87 120 }
8b34114a 121 return false;
52e82b87
MD
122}
123
9698c399 124int rseq_register_current_thread(void)
784b0012 125{
9698c399 126 int rc;
784b0012 127
540263e4
MD
128 rseq_init();
129
9698c399
MD
130 if (!rseq_ownership) {
131 /* Treat libc's ownership as a successful registration. */
132 return 0;
133 }
baa98a34
MD
134 rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
135 if (rc) {
136 if (RSEQ_READ_ONCE(rseq_reg_success)) {
137 /* Incoherent success/failure within process. */
138 abort();
139 }
9698c399 140 return -1;
baa98a34 141 }
9698c399 142 assert(rseq_current_cpu_raw() >= 0);
baa98a34 143 RSEQ_WRITE_ONCE(rseq_reg_success, 1);
9698c399 144 return 0;
784b0012
MD
145}
146
9698c399 147int rseq_unregister_current_thread(void)
784b0012 148{
9698c399 149 int rc;
784b0012 150
9698c399
MD
151 if (!rseq_ownership) {
152 /* Treat libc's ownership as a successful unregistration. */
153 return 0;
154 }
baa98a34 155 rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
9698c399
MD
156 if (rc)
157 return -1;
158 return 0;
784b0012
MD
159}
160
baa98a34
MD
161static
162unsigned int get_rseq_feature_size(void)
163{
164 unsigned long auxv_rseq_feature_size, auxv_rseq_align;
165
166 auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
167 assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
168
169 auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
170 assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
171 if (auxv_rseq_feature_size)
172 return auxv_rseq_feature_size;
173 else
174 return ORIG_RSEQ_FEATURE_SIZE;
175}
176
0d0cf5d1
MJ
177/*
178 * Initialize the public symbols for the rseq offset, size, feature size and
179 * flags prior to registering threads. If glibc owns the registration, get the
180 * values from its public symbols.
181 */
540263e4 182static
9698c399 183void rseq_init(void)
784b0012 184{
0d0cf5d1 185 /* Ensure initialization is only done once. */
540263e4
MD
186 if (RSEQ_READ_ONCE(init_done))
187 return;
188
0d0cf5d1
MJ
189 /*
190 * Take the mutex, check the initialization flag again and atomically
191 * set it to ensure we are the only thread doing the initialization.
192 */
540263e4
MD
193 pthread_mutex_lock(&init_lock);
194 if (init_done)
195 goto unlock;
196 RSEQ_WRITE_ONCE(init_done, 1);
0d0cf5d1
MJ
197
198 /*
199 * Check for glibc rseq support, if the 3 public symbols are found and
200 * the rseq_size is not zero, glibc owns the registration.
201 */
9698c399
MD
202 libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
203 libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
204 libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
ad538a80
MJ
205 if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
206 *libc_rseq_size_p != 0) {
9698c399
MD
207 /* rseq registration owned by glibc */
208 rseq_offset = *libc_rseq_offset_p;
209 rseq_size = *libc_rseq_size_p;
210 rseq_flags = *libc_rseq_flags_p;
baa98a34 211 rseq_feature_size = get_rseq_feature_size();
0d0cf5d1
MJ
212
213 /*
214 * The registered rseq area could be smaller than the feature
215 * size reported by the kernel auxval. Cap it to the rseq size
216 * so we don't try to access features past the end of the rseq
217 * area.
218 */
baa98a34
MD
219 if (rseq_feature_size > rseq_size)
220 rseq_feature_size = rseq_size;
540263e4 221 goto unlock;
0ceae74a 222 }
0d0cf5d1
MJ
223
224 /* librseq owns the registration */
9698c399 225 rseq_ownership = 1;
0d0cf5d1 226
9de60bd2
MJ
227 /* Calculate the offset of the rseq area from the thread pointer. */
228 rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer();
229
230 /* rseq flags are deprecated, always set to 0. */
231 rseq_flags = 0;
232
0d0cf5d1
MJ
233 /*
234 * Check if the rseq syscall is available, if not set the size and
235 * feature_size to 0.
236 */
baa98a34
MD
237 if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
238 rseq_size = 0;
239 rseq_feature_size = 0;
240 goto unlock;
241 }
0d0cf5d1 242
0d0cf5d1
MJ
243 /*
244 * If the feature size matches the original ABI (20), set the size to
245 * match the original ABI allocation (32), otherwise use the allocated
246 * size.
247 */
baa98a34
MD
248 rseq_feature_size = get_rseq_feature_size();
249 if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
250 rseq_size = ORIG_RSEQ_ALLOC_SIZE;
251 else
252 rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
540263e4
MD
253unlock:
254 pthread_mutex_unlock(&init_lock);
784b0012
MD
255}
256
9698c399
MD
257static __attribute__((destructor))
258void rseq_exit(void)
784b0012 259{
9698c399
MD
260 if (!rseq_ownership)
261 return;
262 rseq_offset = 0;
263 rseq_size = -1U;
baa98a34 264 rseq_feature_size = -1U;
9698c399 265 rseq_ownership = 0;
784b0012
MD
266}
267
268int32_t rseq_fallback_current_cpu(void)
269{
270 int32_t cpu;
271
272 cpu = sched_getcpu();
273 if (cpu < 0) {
274 perror("sched_getcpu()");
275 abort();
276 }
277 return cpu;
278}
baa98a34
MD
279
280int32_t rseq_fallback_current_node(void)
281{
282 uint32_t cpu_id, node_id;
283 int ret;
284
285 ret = sys_getcpu(&cpu_id, &node_id);
286 if (ret) {
287 perror("sys_getcpu()");
288 return ret;
289 }
290 return (int32_t) node_id;
291}
47c725dd
MD
292
293int rseq_get_max_nr_cpus(void)
294{
295 return get_possible_cpus_array_len();
296}
This page took 0.04039 seconds and 4 git commands to generate.