Commit | Line | Data |
---|---|---|
90702366 | 1 | // SPDX-License-Identifier: MIT |
f2d7b530 MJ |
2 | // SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
3 | ||
2cbca301 | 4 | #ifndef _GNU_SOURCE |
784b0012 | 5 | #define _GNU_SOURCE |
2cbca301 | 6 | #endif |
784b0012 MD |
7 | #include <errno.h> |
8 | #include <sched.h> | |
9 | #include <stdio.h> | |
10 | #include <stdlib.h> | |
11 | #include <string.h> | |
12 | #include <unistd.h> | |
13 | #include <syscall.h> | |
14 | #include <assert.h> | |
15 | #include <signal.h> | |
0ceae74a | 16 | #include <limits.h> |
9698c399 | 17 | #include <dlfcn.h> |
170f840b | 18 | #include <stddef.h> |
df014a66 | 19 | #include <stdint.h> |
baa98a34 MD |
20 | #include <sys/auxv.h> |
21 | #include <linux/auxvec.h> | |
784b0012 MD |
22 | |
23 | #include <rseq/rseq.h> | |
47c725dd | 24 | #include "smp.h" |
784b0012 | 25 | |
baa98a34 MD |
26 | #ifndef AT_RSEQ_FEATURE_SIZE |
27 | # define AT_RSEQ_FEATURE_SIZE 27 | |
28 | #endif | |
29 | ||
30 | #ifndef AT_RSEQ_ALIGN | |
31 | # define AT_RSEQ_ALIGN 28 | |
32 | #endif | |
33 | ||
540263e4 MD |
34 | static __attribute__((constructor)) |
35 | void rseq_init(void); | |
36 | ||
37 | static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; | |
38 | static int init_done; | |
39 | ||
170f840b | 40 | static const ptrdiff_t *libc_rseq_offset_p; |
9698c399 MD |
41 | static const unsigned int *libc_rseq_size_p; |
42 | static const unsigned int *libc_rseq_flags_p; | |
43 | ||
baa98a34 | 44 | /* Offset from the thread pointer to the rseq area. */ |
170f840b | 45 | ptrdiff_t rseq_offset; |
9698c399 | 46 | |
baa98a34 MD |
47 | /* |
48 | * Size of the registered rseq area. 0 if the registration was | |
49 | * unsuccessful. | |
50 | */ | |
9698c399 MD |
51 | unsigned int rseq_size = -1U; |
52 | ||
baa98a34 | 53 | /* Flags used during rseq registration. */ |
9698c399 MD |
54 | unsigned int rseq_flags; |
55 | ||
baa98a34 MD |
56 | /* |
57 | * rseq feature size supported by the kernel. 0 if the registration was | |
58 | * unsuccessful. | |
59 | */ | |
60 | unsigned int rseq_feature_size = -1U; | |
61 | ||
9698c399 | 62 | static int rseq_ownership; |
baa98a34 MD |
63 | static int rseq_reg_success; /* At least one rseq registration has succeded. */ |
64 | ||
65 | /* Allocate a large area for the TLS. */ | |
66 | #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 | |
67 | ||
68 | /* Original struct rseq feature size is 20 bytes. */ | |
69 | #define ORIG_RSEQ_FEATURE_SIZE 20 | |
70 | ||
71 | /* Original struct rseq allocation size is 32 bytes. */ | |
72 | #define ORIG_RSEQ_ALLOC_SIZE 32 | |
9698c399 | 73 | |
470c530b MD |
74 | /* |
75 | * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the | |
76 | * rseq_abi structure allocated size is at least | |
77 | * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown | |
78 | * kernel rseq extensions. | |
79 | */ | |
9698c399 | 80 | static |
baa98a34 | 81 | __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { |
2d533093 | 82 | .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, |
784b0012 MD |
83 | }; |
84 | ||
2d533093 | 85 | static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, |
52e82b87 MD |
86 | int flags, uint32_t sig) |
87 | { | |
88 | return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); | |
89 | } | |
90 | ||
baa98a34 MD |
91 | static int sys_getcpu(unsigned *cpu, unsigned *node) |
92 | { | |
93 | return syscall(__NR_getcpu, cpu, node, NULL); | |
94 | } | |
95 | ||
8b34114a | 96 | bool rseq_available(unsigned int query) |
52e82b87 MD |
97 | { |
98 | int rc; | |
99 | ||
8b34114a MD |
100 | switch (query) { |
101 | case RSEQ_AVAILABLE_QUERY_KERNEL: | |
102 | rc = sys_rseq(NULL, 0, 0, 0); | |
103 | if (rc != -1) | |
104 | abort(); | |
105 | switch (errno) { | |
106 | case ENOSYS: | |
8b34114a MD |
107 | break; |
108 | case EINVAL: | |
109 | return true; | |
baa98a34 MD |
110 | default: |
111 | abort(); | |
8b34114a MD |
112 | } |
113 | break; | |
114 | case RSEQ_AVAILABLE_QUERY_LIBC: | |
115 | if (rseq_size && !rseq_ownership) | |
116 | return true; | |
117 | break; | |
52e82b87 | 118 | default: |
8b34114a | 119 | break; |
52e82b87 | 120 | } |
8b34114a | 121 | return false; |
52e82b87 MD |
122 | } |
123 | ||
9698c399 | 124 | int rseq_register_current_thread(void) |
784b0012 | 125 | { |
9698c399 | 126 | int rc; |
784b0012 | 127 | |
540263e4 MD |
128 | rseq_init(); |
129 | ||
9698c399 MD |
130 | if (!rseq_ownership) { |
131 | /* Treat libc's ownership as a successful registration. */ | |
132 | return 0; | |
133 | } | |
baa98a34 MD |
134 | rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); |
135 | if (rc) { | |
136 | if (RSEQ_READ_ONCE(rseq_reg_success)) { | |
137 | /* Incoherent success/failure within process. */ | |
138 | abort(); | |
139 | } | |
9698c399 | 140 | return -1; |
baa98a34 | 141 | } |
9698c399 | 142 | assert(rseq_current_cpu_raw() >= 0); |
baa98a34 | 143 | RSEQ_WRITE_ONCE(rseq_reg_success, 1); |
9698c399 | 144 | return 0; |
784b0012 MD |
145 | } |
146 | ||
9698c399 | 147 | int rseq_unregister_current_thread(void) |
784b0012 | 148 | { |
9698c399 | 149 | int rc; |
784b0012 | 150 | |
9698c399 MD |
151 | if (!rseq_ownership) { |
152 | /* Treat libc's ownership as a successful unregistration. */ | |
153 | return 0; | |
154 | } | |
baa98a34 | 155 | rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); |
9698c399 MD |
156 | if (rc) |
157 | return -1; | |
158 | return 0; | |
784b0012 MD |
159 | } |
160 | ||
baa98a34 MD |
161 | static |
162 | unsigned int get_rseq_feature_size(void) | |
163 | { | |
164 | unsigned long auxv_rseq_feature_size, auxv_rseq_align; | |
165 | ||
166 | auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); | |
167 | assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); | |
168 | ||
169 | auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); | |
170 | assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); | |
171 | if (auxv_rseq_feature_size) | |
172 | return auxv_rseq_feature_size; | |
173 | else | |
174 | return ORIG_RSEQ_FEATURE_SIZE; | |
175 | } | |
176 | ||
0d0cf5d1 MJ |
177 | /* |
178 | * Initialize the public symbols for the rseq offset, size, feature size and | |
179 | * flags prior to registering threads. If glibc owns the registration, get the | |
180 | * values from its public symbols. | |
181 | */ | |
540263e4 | 182 | static |
9698c399 | 183 | void rseq_init(void) |
784b0012 | 184 | { |
0d0cf5d1 | 185 | /* Ensure initialization is only done once. */ |
540263e4 MD |
186 | if (RSEQ_READ_ONCE(init_done)) |
187 | return; | |
188 | ||
0d0cf5d1 MJ |
189 | /* |
190 | * Take the mutex, check the initialization flag again and atomically | |
191 | * set it to ensure we are the only thread doing the initialization. | |
192 | */ | |
540263e4 MD |
193 | pthread_mutex_lock(&init_lock); |
194 | if (init_done) | |
195 | goto unlock; | |
196 | RSEQ_WRITE_ONCE(init_done, 1); | |
0d0cf5d1 MJ |
197 | |
198 | /* | |
199 | * Check for glibc rseq support, if the 3 public symbols are found and | |
200 | * the rseq_size is not zero, glibc owns the registration. | |
201 | */ | |
9698c399 MD |
202 | libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); |
203 | libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); | |
204 | libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); | |
ad538a80 MJ |
205 | if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && |
206 | *libc_rseq_size_p != 0) { | |
9698c399 MD |
207 | /* rseq registration owned by glibc */ |
208 | rseq_offset = *libc_rseq_offset_p; | |
209 | rseq_size = *libc_rseq_size_p; | |
210 | rseq_flags = *libc_rseq_flags_p; | |
baa98a34 | 211 | rseq_feature_size = get_rseq_feature_size(); |
0d0cf5d1 MJ |
212 | |
213 | /* | |
214 | * The registered rseq area could be smaller than the feature | |
215 | * size reported by the kernel auxval. Cap it to the rseq size | |
216 | * so we don't try to access features past the end of the rseq | |
217 | * area. | |
218 | */ | |
baa98a34 MD |
219 | if (rseq_feature_size > rseq_size) |
220 | rseq_feature_size = rseq_size; | |
540263e4 | 221 | goto unlock; |
0ceae74a | 222 | } |
0d0cf5d1 MJ |
223 | |
224 | /* librseq owns the registration */ | |
9698c399 | 225 | rseq_ownership = 1; |
0d0cf5d1 | 226 | |
9de60bd2 MJ |
227 | /* Calculate the offset of the rseq area from the thread pointer. */ |
228 | rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer(); | |
229 | ||
230 | /* rseq flags are deprecated, always set to 0. */ | |
231 | rseq_flags = 0; | |
232 | ||
0d0cf5d1 MJ |
233 | /* |
234 | * Check if the rseq syscall is available, if not set the size and | |
235 | * feature_size to 0. | |
236 | */ | |
baa98a34 MD |
237 | if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) { |
238 | rseq_size = 0; | |
239 | rseq_feature_size = 0; | |
240 | goto unlock; | |
241 | } | |
0d0cf5d1 | 242 | |
0d0cf5d1 MJ |
243 | /* |
244 | * If the feature size matches the original ABI (20), set the size to | |
245 | * match the original ABI allocation (32), otherwise use the allocated | |
246 | * size. | |
247 | */ | |
baa98a34 MD |
248 | rseq_feature_size = get_rseq_feature_size(); |
249 | if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) | |
250 | rseq_size = ORIG_RSEQ_ALLOC_SIZE; | |
251 | else | |
252 | rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; | |
540263e4 MD |
253 | unlock: |
254 | pthread_mutex_unlock(&init_lock); | |
784b0012 MD |
255 | } |
256 | ||
9698c399 MD |
257 | static __attribute__((destructor)) |
258 | void rseq_exit(void) | |
784b0012 | 259 | { |
9698c399 MD |
260 | if (!rseq_ownership) |
261 | return; | |
262 | rseq_offset = 0; | |
263 | rseq_size = -1U; | |
baa98a34 | 264 | rseq_feature_size = -1U; |
9698c399 | 265 | rseq_ownership = 0; |
784b0012 MD |
266 | } |
267 | ||
268 | int32_t rseq_fallback_current_cpu(void) | |
269 | { | |
270 | int32_t cpu; | |
271 | ||
272 | cpu = sched_getcpu(); | |
273 | if (cpu < 0) { | |
274 | perror("sched_getcpu()"); | |
275 | abort(); | |
276 | } | |
277 | return cpu; | |
278 | } | |
baa98a34 MD |
279 | |
280 | int32_t rseq_fallback_current_node(void) | |
281 | { | |
282 | uint32_t cpu_id, node_id; | |
283 | int ret; | |
284 | ||
285 | ret = sys_getcpu(&cpu_id, &node_id); | |
286 | if (ret) { | |
287 | perror("sys_getcpu()"); | |
288 | return ret; | |
289 | } | |
290 | return (int32_t) node_id; | |
291 | } | |
47c725dd MD |
292 | |
293 | int rseq_get_max_nr_cpus(void) | |
294 | { | |
295 | return get_possible_cpus_array_len(); | |
296 | } |