Document alignment of rseq_abi for allocated size
[librseq.git] / src / rseq.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3
4 #ifndef _GNU_SOURCE
5 #define _GNU_SOURCE
6 #endif
7 #include <errno.h>
8 #include <sched.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <syscall.h>
14 #include <assert.h>
15 #include <signal.h>
16 #include <limits.h>
17 #include <dlfcn.h>
18 #include <stddef.h>
19 #include <stdint.h>
20 #include <sys/auxv.h>
21 #include <linux/auxvec.h>
22
23 #include <rseq/rseq.h>
24
25 #ifndef AT_RSEQ_FEATURE_SIZE
26 # define AT_RSEQ_FEATURE_SIZE 27
27 #endif
28
29 #ifndef AT_RSEQ_ALIGN
30 # define AT_RSEQ_ALIGN 28
31 #endif
32
33 static __attribute__((constructor))
34 void rseq_init(void);
35
36 static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER;
37 static int init_done;
38
39 static const ptrdiff_t *libc_rseq_offset_p;
40 static const unsigned int *libc_rseq_size_p;
41 static const unsigned int *libc_rseq_flags_p;
42
43 /* Offset from the thread pointer to the rseq area. */
44 ptrdiff_t rseq_offset;
45
46 /*
47 * Size of the registered rseq area. 0 if the registration was
48 * unsuccessful.
49 */
50 unsigned int rseq_size = -1U;
51
52 /* Flags used during rseq registration. */
53 unsigned int rseq_flags;
54
55 /*
56 * rseq feature size supported by the kernel. 0 if the registration was
57 * unsuccessful.
58 */
59 unsigned int rseq_feature_size = -1U;
60
61 static int rseq_ownership;
62 static int rseq_reg_success; /* At least one rseq registration has succeded. */
63
64 /* Allocate a large area for the TLS. */
65 #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
66
67 /* Original struct rseq feature size is 20 bytes. */
68 #define ORIG_RSEQ_FEATURE_SIZE 20
69
70 /* Original struct rseq allocation size is 32 bytes. */
71 #define ORIG_RSEQ_ALLOC_SIZE 32
72
73 /*
74 * The alignment on RSEQ_THREAD_AREA_ALLOC_SIZE guarantees that the
75 * rseq_abi structure allocated size is at least
76 * RSEQ_THREAD_AREA_ALLOC_SIZE bytes to hold extra space for yet unknown
77 * kernel rseq extensions.
78 */
79 static
80 __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
81 .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
82 };
83
84 static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
85 int flags, uint32_t sig)
86 {
87 return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
88 }
89
90 static int sys_getcpu(unsigned *cpu, unsigned *node)
91 {
92 return syscall(__NR_getcpu, cpu, node, NULL);
93 }
94
95 bool rseq_available(unsigned int query)
96 {
97 int rc;
98
99 switch (query) {
100 case RSEQ_AVAILABLE_QUERY_KERNEL:
101 rc = sys_rseq(NULL, 0, 0, 0);
102 if (rc != -1)
103 abort();
104 switch (errno) {
105 case ENOSYS:
106 break;
107 case EINVAL:
108 return true;
109 default:
110 abort();
111 }
112 break;
113 case RSEQ_AVAILABLE_QUERY_LIBC:
114 if (rseq_size && !rseq_ownership)
115 return true;
116 break;
117 default:
118 break;
119 }
120 return false;
121 }
122
123 int rseq_register_current_thread(void)
124 {
125 int rc;
126
127 rseq_init();
128
129 if (!rseq_ownership) {
130 /* Treat libc's ownership as a successful registration. */
131 return 0;
132 }
133 rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
134 if (rc) {
135 if (RSEQ_READ_ONCE(rseq_reg_success)) {
136 /* Incoherent success/failure within process. */
137 abort();
138 }
139 return -1;
140 }
141 assert(rseq_current_cpu_raw() >= 0);
142 RSEQ_WRITE_ONCE(rseq_reg_success, 1);
143 return 0;
144 }
145
146 int rseq_unregister_current_thread(void)
147 {
148 int rc;
149
150 if (!rseq_ownership) {
151 /* Treat libc's ownership as a successful unregistration. */
152 return 0;
153 }
154 rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
155 if (rc)
156 return -1;
157 return 0;
158 }
159
160 static
161 unsigned int get_rseq_feature_size(void)
162 {
163 unsigned long auxv_rseq_feature_size, auxv_rseq_align;
164
165 auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
166 assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
167
168 auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
169 assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
170 if (auxv_rseq_feature_size)
171 return auxv_rseq_feature_size;
172 else
173 return ORIG_RSEQ_FEATURE_SIZE;
174 }
175
176 static
177 void rseq_init(void)
178 {
179 if (RSEQ_READ_ONCE(init_done))
180 return;
181
182 pthread_mutex_lock(&init_lock);
183 if (init_done)
184 goto unlock;
185 RSEQ_WRITE_ONCE(init_done, 1);
186 libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
187 libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
188 libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
189 if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
190 *libc_rseq_size_p != 0) {
191 /* rseq registration owned by glibc */
192 rseq_offset = *libc_rseq_offset_p;
193 rseq_size = *libc_rseq_size_p;
194 rseq_flags = *libc_rseq_flags_p;
195 rseq_feature_size = get_rseq_feature_size();
196 if (rseq_feature_size > rseq_size)
197 rseq_feature_size = rseq_size;
198 goto unlock;
199 }
200 rseq_ownership = 1;
201 if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
202 rseq_size = 0;
203 rseq_feature_size = 0;
204 goto unlock;
205 }
206 rseq_offset = (uintptr_t)&__rseq_abi - (uintptr_t)rseq_thread_pointer();
207 rseq_flags = 0;
208 rseq_feature_size = get_rseq_feature_size();
209 if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
210 rseq_size = ORIG_RSEQ_ALLOC_SIZE;
211 else
212 rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
213 unlock:
214 pthread_mutex_unlock(&init_lock);
215 }
216
217 static __attribute__((destructor))
218 void rseq_exit(void)
219 {
220 if (!rseq_ownership)
221 return;
222 rseq_offset = 0;
223 rseq_size = -1U;
224 rseq_feature_size = -1U;
225 rseq_ownership = 0;
226 }
227
228 int32_t rseq_fallback_current_cpu(void)
229 {
230 int32_t cpu;
231
232 cpu = sched_getcpu();
233 if (cpu < 0) {
234 perror("sched_getcpu()");
235 abort();
236 }
237 return cpu;
238 }
239
240 int32_t rseq_fallback_current_node(void)
241 {
242 uint32_t cpu_id, node_id;
243 int ret;
244
245 ret = sys_getcpu(&cpu_id, &node_id);
246 if (ret) {
247 perror("sys_getcpu()");
248 return ret;
249 }
250 return (int32_t) node_id;
251 }
This page took 0.036537 seconds and 5 git commands to generate.