Introduce common generic header file
[librseq.git] / src / rseq.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530
MJ
2// SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
2cbca301 4#ifndef _GNU_SOURCE
784b0012 5#define _GNU_SOURCE
2cbca301 6#endif
784b0012
MD
7#include <errno.h>
8#include <sched.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <syscall.h>
14#include <assert.h>
15#include <signal.h>
0ceae74a 16#include <limits.h>
9698c399 17#include <dlfcn.h>
170f840b 18#include <stddef.h>
df014a66 19#include <stdint.h>
baa98a34
MD
20#include <sys/auxv.h>
21#include <linux/auxvec.h>
784b0012
MD
22
23#include <rseq/rseq.h>
24
baa98a34
MD
25#ifndef AT_RSEQ_FEATURE_SIZE
26# define AT_RSEQ_FEATURE_SIZE 27
27#endif
28
29#ifndef AT_RSEQ_ALIGN
30# define AT_RSEQ_ALIGN 28
31#endif
32
540263e4
MD
33static __attribute__((constructor))
34void rseq_init(void);
35
36static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
37static int init_done;
38
170f840b 39static const ptrdiff_t *libc_rseq_offset_p;
9698c399
MD
40static const unsigned int *libc_rseq_size_p;
41static const unsigned int *libc_rseq_flags_p;
42
baa98a34 43/* Offset from the thread pointer to the rseq area. */
170f840b 44ptrdiff_t rseq_offset;
9698c399 45
baa98a34
MD
46/*
47 * Size of the registered rseq area. 0 if the registration was
48 * unsuccessful.
49 */
9698c399
MD
50unsigned int rseq_size = -1U;
51
baa98a34 52/* Flags used during rseq registration. */
9698c399
MD
53unsigned int rseq_flags;
54
baa98a34
MD
55/*
56 * rseq feature size supported by the kernel. 0 if the registration was
57 * unsuccessful.
58 */
59unsigned int rseq_feature_size = -1U;
60
9698c399 61static int rseq_ownership;
baa98a34
MD
62static int rseq_reg_success; /* At least one rseq registration has succeded. */
63
64/* Allocate a large area for the TLS. */
65#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
66
67/* Original struct rseq feature size is 20 bytes. */
68#define ORIG_RSEQ_FEATURE_SIZE 20
69
70/* Original struct rseq allocation size is 32 bytes. */
71#define ORIG_RSEQ_ALLOC_SIZE 32
9698c399 72
470c530b
MD
73/*
74 * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the
75 * rseq_abi structure allocated size is at least
76 * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown
77 * kernel rseq extensions.
78 */
9698c399 79static
baa98a34 80__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
2d533093 81 .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
784b0012
MD
82};
83
2d533093 84static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
52e82b87
MD
85 int flags, uint32_t sig)
86{
87 return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
88}
89
baa98a34
MD
90static int sys_getcpu(unsigned *cpu, unsigned *node)
91{
92 return syscall(__NR_getcpu, cpu, node, NULL);
93}
94
8b34114a 95bool rseq_available(unsigned int query)
52e82b87
MD
96{
97 int rc;
98
8b34114a
MD
99 switch (query) {
100 case RSEQ_AVAILABLE_QUERY_KERNEL:
101 rc = sys_rseq(NULL, 0, 0, 0);
102 if (rc != -1)
103 abort();
104 switch (errno) {
105 case ENOSYS:
8b34114a
MD
106 break;
107 case EINVAL:
108 return true;
baa98a34
MD
109 default:
110 abort();
8b34114a
MD
111 }
112 break;
113 case RSEQ_AVAILABLE_QUERY_LIBC:
114 if (rseq_size && !rseq_ownership)
115 return true;
116 break;
52e82b87 117 default:
8b34114a 118 break;
52e82b87 119 }
8b34114a 120 return false;
52e82b87
MD
121}
122
9698c399 123int rseq_register_current_thread(void)
784b0012 124{
9698c399 125 int rc;
784b0012 126
540263e4
MD
127 rseq_init();
128
9698c399
MD
129 if (!rseq_ownership) {
130 /* Treat libc's ownership as a successful registration. */
131 return 0;
132 }
baa98a34
MD
133 rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
134 if (rc) {
135 if (RSEQ_READ_ONCE(rseq_reg_success)) {
136 /* Incoherent success/failure within process. */
137 abort();
138 }
9698c399 139 return -1;
baa98a34 140 }
9698c399 141 assert(rseq_current_cpu_raw() >= 0);
baa98a34 142 RSEQ_WRITE_ONCE(rseq_reg_success, 1);
9698c399 143 return 0;
784b0012
MD
144}
145
9698c399 146int rseq_unregister_current_thread(void)
784b0012 147{
9698c399 148 int rc;
784b0012 149
9698c399
MD
150 if (!rseq_ownership) {
151 /* Treat libc's ownership as a successful unregistration. */
152 return 0;
153 }
baa98a34 154 rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
9698c399
MD
155 if (rc)
156 return -1;
157 return 0;
784b0012
MD
158}
159
baa98a34
MD
160static
161unsigned int get_rseq_feature_size(void)
162{
163 unsigned long auxv_rseq_feature_size, auxv_rseq_align;
164
165 auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
166 assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
167
168 auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
169 assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
170 if (auxv_rseq_feature_size)
171 return auxv_rseq_feature_size;
172 else
173 return ORIG_RSEQ_FEATURE_SIZE;
174}
175
0d0cf5d1
MJ
176/*
177 * Initialize the public symbols for the rseq offset, size, feature size and
178 * flags prior to registering threads. If glibc owns the registration, get the
179 * values from its public symbols.
180 */
540263e4 181static
9698c399 182void rseq_init(void)
784b0012 183{
0d0cf5d1 184 /* Ensure initialization is only done once. */
540263e4
MD
185 if (RSEQ_READ_ONCE(init_done))
186 return;
187
0d0cf5d1
MJ
188 /*
189 * Take the mutex, check the initialization flag again and atomically
190 * set it to ensure we are the only thread doing the initialization.
191 */
540263e4
MD
192 pthread_mutex_lock(&init_lock);
193 if (init_done)
194 goto unlock;
195 RSEQ_WRITE_ONCE(init_done, 1);
0d0cf5d1
MJ
196
197 /*
198 * Check for glibc rseq support, if the 3 public symbols are found and
199 * the rseq_size is not zero, glibc owns the registration.
200 */
9698c399
MD
201 libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
202 libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
203 libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
ad538a80
MJ
204 if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
205 *libc_rseq_size_p != 0) {
9698c399
MD
206 /* rseq registration owned by glibc */
207 rseq_offset = *libc_rseq_offset_p;
208 rseq_size = *libc_rseq_size_p;
209 rseq_flags = *libc_rseq_flags_p;
baa98a34 210 rseq_feature_size = get_rseq_feature_size();
0d0cf5d1
MJ
211
212 /*
213 * The registered rseq area could be smaller than the feature
214 * size reported by the kernel auxval. Cap it to the rseq size
215 * so we don't try to access features past the end of the rseq
216 * area.
217 */
baa98a34
MD
218 if (rseq_feature_size > rseq_size)
219 rseq_feature_size = rseq_size;
540263e4 220 goto unlock;
0ceae74a 221 }
0d0cf5d1
MJ
222
223 /* librseq owns the registration */
9698c399 224 rseq_ownership = 1;
0d0cf5d1 225
9de60bd2
MJ
226 /* Calculate the offset of the rseq area from the thread pointer. */
227 rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer();
228
229 /* rseq flags are deprecated, always set to 0. */
230 rseq_flags = 0;
231
0d0cf5d1
MJ
232 /*
233 * Check if the rseq syscall is available, if not set the size and
234 * feature_size to 0.
235 */
baa98a34
MD
236 if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
237 rseq_size = 0;
238 rseq_feature_size = 0;
239 goto unlock;
240 }
0d0cf5d1 241
0d0cf5d1
MJ
242 /*
243 * If the feature size matches the original ABI (20), set the size to
244 * match the original ABI allocation (32), otherwise use the allocated
245 * size.
246 */
baa98a34
MD
247 rseq_feature_size = get_rseq_feature_size();
248 if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
249 rseq_size = ORIG_RSEQ_ALLOC_SIZE;
250 else
251 rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
540263e4
MD
252unlock:
253 pthread_mutex_unlock(&init_lock);
784b0012
MD
254}
255
9698c399
MD
256static __attribute__((destructor))
257void rseq_exit(void)
784b0012 258{
9698c399
MD
259 if (!rseq_ownership)
260 return;
261 rseq_offset = 0;
262 rseq_size = -1U;
baa98a34 263 rseq_feature_size = -1U;
9698c399 264 rseq_ownership = 0;
784b0012
MD
265}
266
267int32_t rseq_fallback_current_cpu(void)
268{
269 int32_t cpu;
270
271 cpu = sched_getcpu();
272 if (cpu < 0) {
273 perror("sched_getcpu()");
274 abort();
275 }
276 return cpu;
277}
baa98a34
MD
278
279int32_t rseq_fallback_current_node(void)
280{
281 uint32_t cpu_id, node_id;
282 int ret;
283
284 ret = sys_getcpu(&cpu_id, &node_id);
285 if (ret) {
286 perror("sys_getcpu()");
287 return ret;
288 }
289 return (int32_t) node_id;
290}
This page took 0.035941 seconds and 4 git commands to generate.