Commit | Line | Data |
---|---|---|
90702366 | 1 | // SPDX-License-Identifier: MIT |
f2d7b530 MJ |
2 | // SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
3 | ||
2cbca301 | 4 | #ifndef _GNU_SOURCE |
784b0012 | 5 | #define _GNU_SOURCE |
2cbca301 | 6 | #endif |
784b0012 MD |
7 | #include <errno.h> |
8 | #include <sched.h> | |
9 | #include <stdio.h> | |
10 | #include <stdlib.h> | |
11 | #include <string.h> | |
12 | #include <unistd.h> | |
13 | #include <syscall.h> | |
14 | #include <assert.h> | |
15 | #include <signal.h> | |
0ceae74a | 16 | #include <limits.h> |
9698c399 | 17 | #include <dlfcn.h> |
170f840b | 18 | #include <stddef.h> |
df014a66 | 19 | #include <stdint.h> |
baa98a34 MD |
20 | #include <sys/auxv.h> |
21 | #include <linux/auxvec.h> | |
784b0012 MD |
22 | |
23 | #include <rseq/rseq.h> | |
24 | ||
baa98a34 MD |
25 | #ifndef AT_RSEQ_FEATURE_SIZE |
26 | # define AT_RSEQ_FEATURE_SIZE 27 | |
27 | #endif | |
28 | ||
29 | #ifndef AT_RSEQ_ALIGN | |
30 | # define AT_RSEQ_ALIGN 28 | |
31 | #endif | |
32 | ||
540263e4 MD |
33 | static __attribute__((constructor)) |
34 | void rseq_init(void); | |
35 | ||
36 | static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; | |
37 | static int init_done; | |
38 | ||
170f840b | 39 | static const ptrdiff_t *libc_rseq_offset_p; |
9698c399 MD |
40 | static const unsigned int *libc_rseq_size_p; |
41 | static const unsigned int *libc_rseq_flags_p; | |
42 | ||
baa98a34 | 43 | /* Offset from the thread pointer to the rseq area. */ |
170f840b | 44 | ptrdiff_t rseq_offset; |
9698c399 | 45 | |
baa98a34 MD |
46 | /* |
47 | * Size of the registered rseq area. 0 if the registration was | |
48 | * unsuccessful. | |
49 | */ | |
9698c399 MD |
50 | unsigned int rseq_size = -1U; |
51 | ||
baa98a34 | 52 | /* Flags used during rseq registration. */ |
9698c399 MD |
53 | unsigned int rseq_flags; |
54 | ||
baa98a34 MD |
55 | /* |
56 | * rseq feature size supported by the kernel. 0 if the registration was | |
57 | * unsuccessful. | |
58 | */ | |
59 | unsigned int rseq_feature_size = -1U; | |
60 | ||
9698c399 | 61 | static int rseq_ownership; |
baa98a34 MD |
62 | static int rseq_reg_success; /* At least one rseq registration has succeded. */ |
63 | ||
64 | /* Allocate a large area for the TLS. */ | |
65 | #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 | |
66 | ||
67 | /* Original struct rseq feature size is 20 bytes. */ | |
68 | #define ORIG_RSEQ_FEATURE_SIZE 20 | |
69 | ||
70 | /* Original struct rseq allocation size is 32 bytes. */ | |
71 | #define ORIG_RSEQ_ALLOC_SIZE 32 | |
9698c399 | 72 | |
470c530b MD |
73 | /* |
74 | * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the | |
75 | * rseq_abi structure allocated size is at least | |
76 | * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown | |
77 | * kernel rseq extensions. | |
78 | */ | |
9698c399 | 79 | static |
baa98a34 | 80 | __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = { |
2d533093 | 81 | .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, |
784b0012 MD |
82 | }; |
83 | ||
2d533093 | 84 | static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, |
52e82b87 MD |
85 | int flags, uint32_t sig) |
86 | { | |
87 | return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); | |
88 | } | |
89 | ||
baa98a34 MD |
90 | static int sys_getcpu(unsigned *cpu, unsigned *node) |
91 | { | |
92 | return syscall(__NR_getcpu, cpu, node, NULL); | |
93 | } | |
94 | ||
8b34114a | 95 | bool rseq_available(unsigned int query) |
52e82b87 MD |
96 | { |
97 | int rc; | |
98 | ||
8b34114a MD |
99 | switch (query) { |
100 | case RSEQ_AVAILABLE_QUERY_KERNEL: | |
101 | rc = sys_rseq(NULL, 0, 0, 0); | |
102 | if (rc != -1) | |
103 | abort(); | |
104 | switch (errno) { | |
105 | case ENOSYS: | |
8b34114a MD |
106 | break; |
107 | case EINVAL: | |
108 | return true; | |
baa98a34 MD |
109 | default: |
110 | abort(); | |
8b34114a MD |
111 | } |
112 | break; | |
113 | case RSEQ_AVAILABLE_QUERY_LIBC: | |
114 | if (rseq_size && !rseq_ownership) | |
115 | return true; | |
116 | break; | |
52e82b87 | 117 | default: |
8b34114a | 118 | break; |
52e82b87 | 119 | } |
8b34114a | 120 | return false; |
52e82b87 MD |
121 | } |
122 | ||
9698c399 | 123 | int rseq_register_current_thread(void) |
784b0012 | 124 | { |
9698c399 | 125 | int rc; |
784b0012 | 126 | |
540263e4 MD |
127 | rseq_init(); |
128 | ||
9698c399 MD |
129 | if (!rseq_ownership) { |
130 | /* Treat libc's ownership as a successful registration. */ | |
131 | return 0; | |
132 | } | |
baa98a34 MD |
133 | rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); |
134 | if (rc) { | |
135 | if (RSEQ_READ_ONCE(rseq_reg_success)) { | |
136 | /* Incoherent success/failure within process. */ | |
137 | abort(); | |
138 | } | |
9698c399 | 139 | return -1; |
baa98a34 | 140 | } |
9698c399 | 141 | assert(rseq_current_cpu_raw() >= 0); |
baa98a34 | 142 | RSEQ_WRITE_ONCE(rseq_reg_success, 1); |
9698c399 | 143 | return 0; |
784b0012 MD |
144 | } |
145 | ||
9698c399 | 146 | int rseq_unregister_current_thread(void) |
784b0012 | 147 | { |
9698c399 | 148 | int rc; |
784b0012 | 149 | |
9698c399 MD |
150 | if (!rseq_ownership) { |
151 | /* Treat libc's ownership as a successful unregistration. */ | |
152 | return 0; | |
153 | } | |
baa98a34 | 154 | rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); |
9698c399 MD |
155 | if (rc) |
156 | return -1; | |
157 | return 0; | |
784b0012 MD |
158 | } |
159 | ||
baa98a34 MD |
160 | static |
161 | unsigned int get_rseq_feature_size(void) | |
162 | { | |
163 | unsigned long auxv_rseq_feature_size, auxv_rseq_align; | |
164 | ||
165 | auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); | |
166 | assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); | |
167 | ||
168 | auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); | |
169 | assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); | |
170 | if (auxv_rseq_feature_size) | |
171 | return auxv_rseq_feature_size; | |
172 | else | |
173 | return ORIG_RSEQ_FEATURE_SIZE; | |
174 | } | |
175 | ||
540263e4 | 176 | static |
9698c399 | 177 | void rseq_init(void) |
784b0012 | 178 | { |
540263e4 MD |
179 | if (RSEQ_READ_ONCE(init_done)) |
180 | return; | |
181 | ||
182 | pthread_mutex_lock(&init_lock); | |
183 | if (init_done) | |
184 | goto unlock; | |
185 | RSEQ_WRITE_ONCE(init_done, 1); | |
9698c399 MD |
186 | libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); |
187 | libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); | |
188 | libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); | |
ad538a80 MJ |
189 | if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && |
190 | *libc_rseq_size_p != 0) { | |
9698c399 MD |
191 | /* rseq registration owned by glibc */ |
192 | rseq_offset = *libc_rseq_offset_p; | |
193 | rseq_size = *libc_rseq_size_p; | |
194 | rseq_flags = *libc_rseq_flags_p; | |
baa98a34 MD |
195 | rseq_feature_size = get_rseq_feature_size(); |
196 | if (rseq_feature_size > rseq_size) | |
197 | rseq_feature_size = rseq_size; | |
540263e4 | 198 | goto unlock; |
0ceae74a | 199 | } |
9698c399 | 200 | rseq_ownership = 1; |
baa98a34 MD |
201 | if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) { |
202 | rseq_size = 0; | |
203 | rseq_feature_size = 0; | |
204 | goto unlock; | |
205 | } | |
df014a66 | 206 | rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer(); |
9698c399 | 207 | rseq_flags = 0; |
baa98a34 MD |
208 | rseq_feature_size = get_rseq_feature_size(); |
209 | if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) | |
210 | rseq_size = ORIG_RSEQ_ALLOC_SIZE; | |
211 | else | |
212 | rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; | |
540263e4 MD |
213 | unlock: |
214 | pthread_mutex_unlock(&init_lock); | |
784b0012 MD |
215 | } |
216 | ||
9698c399 MD |
217 | static __attribute__((destructor)) |
218 | void rseq_exit(void) | |
784b0012 | 219 | { |
9698c399 MD |
220 | if (!rseq_ownership) |
221 | return; | |
222 | rseq_offset = 0; | |
223 | rseq_size = -1U; | |
baa98a34 | 224 | rseq_feature_size = -1U; |
9698c399 | 225 | rseq_ownership = 0; |
784b0012 MD |
226 | } |
227 | ||
228 | int32_t rseq_fallback_current_cpu(void) | |
229 | { | |
230 | int32_t cpu; | |
231 | ||
232 | cpu = sched_getcpu(); | |
233 | if (cpu < 0) { | |
234 | perror("sched_getcpu()"); | |
235 | abort(); | |
236 | } | |
237 | return cpu; | |
238 | } | |
baa98a34 MD |
239 | |
240 | int32_t rseq_fallback_current_node(void) | |
241 | { | |
242 | uint32_t cpu_id, node_id; | |
243 | int ret; | |
244 | ||
245 | ret = sys_getcpu(&cpu_id, &node_id); | |
246 | if (ret) { | |
247 | perror("sys_getcpu()"); | |
248 | return ret; | |
249 | } | |
250 | return (int32_t) node_id; | |
251 | } |