Fix: set FD_CLOEXEC on incoming FDs.
[lttng-ust.git] / libringbuffer / shm.c
CommitLineData
1d498196
MD
1/*
2 * libringbuffer/shm.c
3 *
e92f3e28 4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1d498196 5 *
e92f3e28
MD
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1d498196
MD
19 */
20
3fbec7dc 21#define _LGPL_SOURCE
bfcda6ce 22#include <config.h>
1d498196
MD
23#include "shm.h"
24#include <unistd.h>
25#include <fcntl.h>
26#include <sys/mman.h>
a9ff648c 27#include <sys/types.h>
1d498196
MD
28#include <sys/stat.h> /* For mode constants */
29#include <fcntl.h> /* For O_* constants */
30#include <assert.h>
8da6cd6d
MD
31#include <stdio.h>
32#include <signal.h>
33#include <dirent.h>
4318ae1b 34#include <lttng/align.h>
96e80018 35#include <limits.h>
8a208943 36#include <stdbool.h>
bfcda6ce 37#ifdef HAVE_LIBNUMA
4b68c31f 38#include <numa.h>
8a208943 39#include <numaif.h>
bfcda6ce 40#endif
3a81f31d 41#include <helper.h>
6548fca4 42#include <ust-fd.h>
4d4838ba 43#include "mmap.h"
3a81f31d
MD
44
45/*
46 * Ensure we have the required amount of space available by writing 0
47 * into the entire buffer. Not doing so can trigger SIGBUS when going
48 * beyond the available shm space.
49 */
50static
51int zero_file(int fd, size_t len)
52{
53 ssize_t retlen;
54 size_t written = 0;
55 char *zeropage;
56 long pagelen;
57 int ret;
58
59 pagelen = sysconf(_SC_PAGESIZE);
60 if (pagelen < 0)
61 return (int) pagelen;
62 zeropage = calloc(pagelen, 1);
63 if (!zeropage)
64 return -ENOMEM;
65
66 while (len > written) {
67 do {
68 retlen = write(fd, zeropage,
69 min_t(size_t, pagelen, len - written));
70 } while (retlen == -1UL && errno == EINTR);
71 if (retlen < 0) {
72 ret = (int) retlen;
73 goto error;
74 }
75 written += retlen;
76 }
77 ret = 0;
78error:
79 free(zeropage);
80 return ret;
81}
1d498196
MD
82
83struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
84{
85 struct shm_object_table *table;
86
87 table = zmalloc(sizeof(struct shm_object_table) +
88 max_nb_obj * sizeof(table->objects[0]));
74d48abe
MD
89 if (!table)
90 return NULL;
1d498196
MD
91 table->size = max_nb_obj;
92 return table;
93}
94
74d81a6c
MD
95static
96struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
a9ff648c 97 size_t memory_map_size,
5ea386c3 98 int stream_fd)
1d498196 99{
5ea386c3 100 int shmfd, waitfd[2], ret, i;
1d498196
MD
101 struct shm_object *obj;
102 char *memory_map;
103
5ea386c3
MD
104 if (stream_fd < 0)
105 return NULL;
1d498196
MD
106 if (table->allocated_len >= table->size)
107 return NULL;
7a9c21bd 108 obj = &table->objects[table->allocated_len];
1d498196
MD
109
110 /* wait_fd: create pipe */
111 ret = pipe(waitfd);
112 if (ret < 0) {
113 PERROR("pipe");
114 goto error_pipe;
115 }
116 for (i = 0; i < 2; i++) {
117 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
118 if (ret < 0) {
119 PERROR("fcntl");
120 goto error_fcntl;
121 }
122 }
5d61a504
MD
123 /* The write end of the pipe needs to be non-blocking */
124 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
125 if (ret < 0) {
126 PERROR("fcntl");
127 goto error_fcntl;
128 }
7a9c21bd 129 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
1d498196 130
5ea386c3 131 /* create shm */
a9ff648c 132
5ea386c3 133 shmfd = stream_fd;
3a81f31d
MD
134 ret = zero_file(shmfd, memory_map_size);
135 if (ret) {
136 PERROR("zero_file");
137 goto error_zero_file;
138 }
1d498196
MD
139 ret = ftruncate(shmfd, memory_map_size);
140 if (ret) {
141 PERROR("ftruncate");
142 goto error_ftruncate;
143 }
d0f6cf57
MD
144 /*
145 * Also ensure the file metadata is synced with the storage by using
146 * fsync(2).
147 */
148 ret = fsync(shmfd);
149 if (ret) {
150 PERROR("fsync");
151 goto error_fsync;
152 }
5ea386c3 153 obj->shm_fd_ownership = 0;
1d498196
MD
154 obj->shm_fd = shmfd;
155
156 /* memory_map: mmap */
157 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 158 MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
1d498196
MD
159 if (memory_map == MAP_FAILED) {
160 PERROR("mmap");
161 goto error_mmap;
162 }
74d81a6c 163 obj->type = SHM_OBJECT_SHM;
1d498196
MD
164 obj->memory_map = memory_map;
165 obj->memory_map_size = memory_map_size;
166 obj->allocated_len = 0;
dc613eb9 167 obj->index = table->allocated_len++;
7a9c21bd 168
1d498196
MD
169 return obj;
170
171error_mmap:
d0f6cf57 172error_fsync:
1d498196 173error_ftruncate:
3a81f31d 174error_zero_file:
1d498196
MD
175error_fcntl:
176 for (i = 0; i < 2; i++) {
177 ret = close(waitfd[i]);
178 if (ret) {
179 PERROR("close");
180 assert(0);
181 }
182 }
183error_pipe:
1d498196 184 return NULL;
1d498196
MD
185}
186
74d81a6c
MD
187static
188struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
189 size_t memory_map_size)
190{
191 struct shm_object *obj;
192 void *memory_map;
ff0f5728 193 int waitfd[2], i, ret;
74d81a6c
MD
194
195 if (table->allocated_len >= table->size)
196 return NULL;
197 obj = &table->objects[table->allocated_len];
198
199 memory_map = zmalloc(memory_map_size);
200 if (!memory_map)
201 goto alloc_error;
202
ff0f5728
MD
203 /* wait_fd: create pipe */
204 ret = pipe(waitfd);
205 if (ret < 0) {
206 PERROR("pipe");
207 goto error_pipe;
208 }
209 for (i = 0; i < 2; i++) {
210 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
211 if (ret < 0) {
212 PERROR("fcntl");
213 goto error_fcntl;
214 }
215 }
216 /* The write end of the pipe needs to be non-blocking */
217 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
218 if (ret < 0) {
219 PERROR("fcntl");
220 goto error_fcntl;
221 }
222 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
223
224 /* no shm_fd */
74d81a6c 225 obj->shm_fd = -1;
5ea386c3 226 obj->shm_fd_ownership = 0;
74d81a6c
MD
227
228 obj->type = SHM_OBJECT_MEM;
229 obj->memory_map = memory_map;
230 obj->memory_map_size = memory_map_size;
231 obj->allocated_len = 0;
232 obj->index = table->allocated_len++;
233
234 return obj;
235
ff0f5728
MD
236error_fcntl:
237 for (i = 0; i < 2; i++) {
238 ret = close(waitfd[i]);
239 if (ret) {
240 PERROR("close");
241 assert(0);
242 }
243 }
244error_pipe:
245 free(memory_map);
74d81a6c
MD
246alloc_error:
247 return NULL;
248}
249
8a208943
MD
250/*
251 * libnuma prints errors on the console even for numa_available().
252 * Work-around this limitation by using get_mempolicy() directly to
253 * check whether the kernel supports mempolicy.
254 */
255#ifdef HAVE_LIBNUMA
256static bool lttng_is_numa_available(void)
257{
258 int ret;
259
260 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
261 if (ret && errno == ENOSYS) {
262 return false;
263 }
264 return numa_available() > 0;
265}
266#endif
267
74d81a6c
MD
268struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
269 size_t memory_map_size,
a9ff648c 270 enum shm_object_type type,
4b68c31f
MD
271 int stream_fd,
272 int cpu)
74d81a6c 273{
4b68c31f 274 struct shm_object *shm_object;
bfcda6ce 275#ifdef HAVE_LIBNUMA
8a208943
MD
276 int oldnode = 0, node;
277 bool numa_avail;
4b68c31f 278
8a208943
MD
279 numa_avail = lttng_is_numa_available();
280 if (numa_avail) {
281 oldnode = numa_preferred();
282 if (cpu >= 0) {
283 node = numa_node_of_cpu(cpu);
284 if (node >= 0)
285 numa_set_preferred(node);
286 }
287 if (cpu < 0 || node < 0)
288 numa_set_localalloc();
4b68c31f 289 }
bfcda6ce 290#endif /* HAVE_LIBNUMA */
74d81a6c
MD
291 switch (type) {
292 case SHM_OBJECT_SHM:
4b68c31f 293 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
5ea386c3 294 stream_fd);
4b68c31f 295 break;
74d81a6c 296 case SHM_OBJECT_MEM:
4b68c31f
MD
297 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
298 break;
74d81a6c
MD
299 default:
300 assert(0);
301 }
bfcda6ce 302#ifdef HAVE_LIBNUMA
8a208943
MD
303 if (numa_avail)
304 numa_set_preferred(oldnode);
bfcda6ce 305#endif /* HAVE_LIBNUMA */
4b68c31f 306 return shm_object;
74d81a6c
MD
307}
308
309struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
310 int shm_fd, int wakeup_fd, uint32_t stream_nr,
311 size_t memory_map_size)
193183fb
MD
312{
313 struct shm_object *obj;
314 char *memory_map;
74d81a6c 315 int ret;
193183fb
MD
316
317 if (table->allocated_len >= table->size)
318 return NULL;
74d81a6c
MD
319 /* streams _must_ be received in sequential order, else fail. */
320 if (stream_nr + 1 != table->allocated_len)
321 return NULL;
322
193183fb
MD
323 obj = &table->objects[table->allocated_len];
324
74d81a6c
MD
325 /* wait_fd: set write end of the pipe. */
326 obj->wait_fd[0] = -1; /* read end is unset */
327 obj->wait_fd[1] = wakeup_fd;
193183fb 328 obj->shm_fd = shm_fd;
5ea386c3 329 obj->shm_fd_ownership = 1;
193183fb 330
74d81a6c
MD
331 /* The write end of the pipe needs to be non-blocking */
332 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
333 if (ret < 0) {
334 PERROR("fcntl");
335 goto error_fcntl;
336 }
337
193183fb
MD
338 /* memory_map: mmap */
339 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 340 MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
193183fb
MD
341 if (memory_map == MAP_FAILED) {
342 PERROR("mmap");
343 goto error_mmap;
344 }
74d81a6c 345 obj->type = SHM_OBJECT_SHM;
193183fb
MD
346 obj->memory_map = memory_map;
347 obj->memory_map_size = memory_map_size;
348 obj->allocated_len = memory_map_size;
349 obj->index = table->allocated_len++;
350
351 return obj;
352
74d81a6c 353error_fcntl:
193183fb
MD
354error_mmap:
355 return NULL;
356}
357
74d81a6c
MD
358/*
359 * Passing ownership of mem to object.
360 */
361struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
ff0f5728 362 void *mem, size_t memory_map_size, int wakeup_fd)
74d81a6c
MD
363{
364 struct shm_object *obj;
ff0f5728 365 int ret;
74d81a6c
MD
366
367 if (table->allocated_len >= table->size)
368 return NULL;
369 obj = &table->objects[table->allocated_len];
370
ff0f5728
MD
371 obj->wait_fd[0] = -1; /* read end is unset */
372 obj->wait_fd[1] = wakeup_fd;
74d81a6c 373 obj->shm_fd = -1;
5ea386c3 374 obj->shm_fd_ownership = 0;
74d81a6c 375
ff0f5728
MD
376 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
377 if (ret < 0) {
378 PERROR("fcntl");
379 goto error_fcntl;
380 }
381 /* The write end of the pipe needs to be non-blocking */
382 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
383 if (ret < 0) {
384 PERROR("fcntl");
385 goto error_fcntl;
386 }
387
74d81a6c
MD
388 obj->type = SHM_OBJECT_MEM;
389 obj->memory_map = mem;
390 obj->memory_map_size = memory_map_size;
391 obj->allocated_len = memory_map_size;
392 obj->index = table->allocated_len++;
393
394 return obj;
ff0f5728
MD
395
396error_fcntl:
397 return NULL;
74d81a6c
MD
398}
399
1d498196 400static
6548fca4 401void shmp_object_destroy(struct shm_object *obj, int consumer)
1d498196 402{
74d81a6c
MD
403 switch (obj->type) {
404 case SHM_OBJECT_SHM:
405 {
406 int ret, i;
1d498196 407
7a784989
MD
408 ret = munmap(obj->memory_map, obj->memory_map_size);
409 if (ret) {
410 PERROR("umnmap");
411 assert(0);
412 }
6548fca4 413
5ea386c3 414 if (obj->shm_fd_ownership) {
6548fca4
MD
415 /* Delete FDs only if called from app (not consumer). */
416 if (!consumer) {
417 lttng_ust_lock_fd_tracker();
418 ret = close(obj->shm_fd);
419 if (!ret) {
420 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
421 } else {
422 PERROR("close");
423 assert(0);
424 }
425 lttng_ust_unlock_fd_tracker();
426 } else {
427 ret = close(obj->shm_fd);
428 if (ret) {
429 PERROR("close");
430 assert(0);
431 }
a9ff648c
MD
432 }
433 }
74d81a6c
MD
434 for (i = 0; i < 2; i++) {
435 if (obj->wait_fd[i] < 0)
436 continue;
6548fca4
MD
437 if (!consumer) {
438 lttng_ust_lock_fd_tracker();
439 ret = close(obj->wait_fd[i]);
440 if (!ret) {
441 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
442 } else {
443 PERROR("close");
444 assert(0);
445 }
446 lttng_ust_unlock_fd_tracker();
447 } else {
448 ret = close(obj->wait_fd[i]);
449 if (ret) {
450 PERROR("close");
451 assert(0);
452 }
74d81a6c 453 }
1d498196 454 }
74d81a6c
MD
455 break;
456 }
457 case SHM_OBJECT_MEM:
ff0f5728
MD
458 {
459 int ret, i;
460
461 for (i = 0; i < 2; i++) {
462 if (obj->wait_fd[i] < 0)
463 continue;
6548fca4
MD
464 if (!consumer) {
465 lttng_ust_lock_fd_tracker();
466 ret = close(obj->wait_fd[i]);
467 if (!ret) {
468 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
469 } else {
470 PERROR("close");
471 assert(0);
472 }
473 lttng_ust_unlock_fd_tracker();
474 } else {
475 ret = close(obj->wait_fd[i]);
476 if (ret) {
477 PERROR("close");
478 assert(0);
479 }
ff0f5728
MD
480 }
481 }
74d81a6c
MD
482 free(obj->memory_map);
483 break;
ff0f5728 484 }
74d81a6c
MD
485 default:
486 assert(0);
1d498196
MD
487 }
488}
489
6548fca4 490void shm_object_table_destroy(struct shm_object_table *table, int consumer)
1d498196
MD
491{
492 int i;
493
494 for (i = 0; i < table->allocated_len; i++)
6548fca4 495 shmp_object_destroy(&table->objects[i], consumer);
1d498196
MD
496 free(table);
497}
498
499/*
500 * zalloc_shm - allocate memory within a shm object.
501 *
502 * Shared memory is already zeroed by shmget.
503 * *NOT* multithread-safe (should be protected by mutex).
504 * Returns a -1, -1 tuple on error.
505 */
506struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
507{
508 struct shm_ref ref;
509 struct shm_ref shm_ref_error = { -1, -1 };
510
511 if (obj->memory_map_size - obj->allocated_len < len)
512 return shm_ref_error;
513 ref.index = obj->index;
514 ref.offset = obj->allocated_len;
515 obj->allocated_len += len;
516 return ref;
517}
518
519void align_shm(struct shm_object *obj, size_t align)
520{
521 size_t offset_len = offset_align(obj->allocated_len, align);
522 obj->allocated_len += offset_len;
523}
This page took 0.061381 seconds and 5 git commands to generate.