Observed issue
==============
During high throughput event notification generation scenarios the
following deadlock happens:
Thread 14 (Thread 0x7f74b4ff9700 (LWP 76062)):
#0 __lll_lock_wait (futex=futex@entry=0x56408765dde8, private=0) at lowlevellock.c:52
#1 0x00007f74c941a0a3 in __GI___pthread_mutex_lock (mutex=0x56408765dde8) at ../nptl/pthread_mutex_lock.c:80
#2 0x000056408704b207 in run_command_wait (handle=0x56408765ddd0, cmd=0x7f74b4ff7f70) at notification-thread-commands.cpp:31
#3 0x000056408704bcef in notification_thread_command_remove_tracer_event_source (handle=0x56408765ddd0, tracer_event_source_fd=54) at notification-thread-commands.cpp:319
#4 0x000056408708a0c1 in delete_ust_app (app=0x7f749c000bf0) at ust-app.cpp:1059
#5 0x000056408708a511 in delete_ust_app_rcu (head=0x7f749c000ca0) at ust-app.cpp:1122
#6 0x00007f74c988b4a7 in call_rcu_thread (arg=0x7f74b8004a80) at ../src/urcu-call-rcu-impl.h:369
#7 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#8 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 13 (Thread 0x7f74b57fa700 (LWP 76047)):
#0 0x00007f74c933a49e in epoll_wait (epfd=48, events=0x7f74a4000b60, maxevents=2, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7f74b57f9240, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x00005640870abb65 in thread_agent_management (data=0x56408765f0b0) at agent-thread.cpp:424
#3 0x0000564087062b1a in launch_thread (data=0x56408765f150) at thread.cpp:66
#4 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 12 (Thread 0x7f74b5ffb700 (LWP 76046)): [630/709]
#0 0x00007f74c933a49e in epoll_wait (epfd=47, events=0x7f74a0000b60, maxevents=2, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7f74b5ffa170, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x00005640870a4095 in thread_application_notification (data=0x56408765ee40) at notify-apps.cpp:78
#3 0x0000564087062b1a in launch_thread (data=0x56408765eed0) at thread.cpp:66
#4 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 11 (Thread 0x7f74b67fc700 (LWP 76045)):
#0 0x00007f74c933a49e in epoll_wait (epfd=44, events=0x7f74ac000b60, maxevents=2, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7f74b67fb170, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x00005640870723db in thread_application_management (data=0x56408765ebd0) at manage-apps.cpp:93
#3 0x0000564087062b1a in launch_thread (data=0x56408765ec60) at thread.cpp:66
#4 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 10 (Thread 0x7f74b6ffd700 (LWP 76044)):
#0 0x00007f74c933a49e in epoll_wait (epfd=39, events=0x7f74a8000b60, maxevents=2, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7f74b6ffc130, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x0000564087070a27 in thread_application_registration (data=0x56408765e940) at register.cpp:214
#3 0x0000564087062b1a in launch_thread (data=0x56408765e9f0) at thread.cpp:66
#4 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 9 (Thread 0x7f74b77fe700 (LWP 76043)): [654/709]
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1 0x00005640870c8e25 in futex (uaddr=0x5640871e2800 <ust_cmd_queue>, op=0, val=-1, timeout=0x0, uaddr2=0x0, val3=0) at /home/joraj/lttng/master/install/include/urcu/futex.h:72
#2 0x00005640870c8e6d in futex_async (uaddr=0x5640871e2800 <ust_cmd_queue>, op=0, val=-1, timeout=0x0, uaddr2=0x0, val3=0) at /home/joraj/lttng/master/install/include/urcu/futex.h:104
#3 0x00005640870c939a in futex_nto1_wait (futex=0x5640871e2800 <ust_cmd_queue>) at futex.cpp:77
#4 0x000056408706f2af in thread_dispatch_ust_registration (data=0x56408765e740) at dispatch.cpp:453
#5 0x0000564087062b1a in launch_thread (data=0x56408765e760) at thread.cpp:66
#6 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#7 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 8 (Thread 0x7f74b7fff700 (LWP 76042)):
#0 0x00007f74c933a49e in epoll_wait (epfd=33, events=0x7f74b0000b60, maxevents=2, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7f74b7ffad40, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x000056408706c424 in thread_manage_clients (data=0x56408765e4f0) at client.cpp:2528
#3 0x0000564087062b1a in launch_thread (data=0x56408765e560) at thread.cpp:66
#4 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 7 (Thread 0x7f74c4b8f700 (LWP 76041)): [672/709]
#0 0x00007f74c933a49e in epoll_wait (epfd=31, events=0x7f74bc000b60, maxevents=3, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7f74c4b8e240, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x000056408705f2b6 in thread_rotation (data=0x56408765e280) at rotation-thread.cpp:804
#3 0x0000564087062b1a in launch_thread (data=0x56408765e310) at thread.cpp:66
#4 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 6 (Thread 0x7f74c5390700 (LWP 76040)):
#0 0x00007f74c925f1d2 in __GI___sigtimedwait (set=0x7f74c538f090, info=0x7f74c538f110, timeout=0x0) at ../sysdeps/unix/sysv/linux/sigtimedwait.c:29
#1 0x000056408706138a in thread_timer (data=0x7ffc1fcbe3f0) at timer.cpp:359
#2 0x0000564087062b1a in launch_thread (data=0x56408765e0a0) at thread.cpp:66
#3 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#4 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 5 (Thread 0x7f74c5b91700 (LWP 76039)):
#0 __libc_write (nbytes=8, buf=0x7f74c5b8fc88, fd=24) at ../sysdeps/unix/sysv/linux/write.c:26
#1 __libc_write (fd=24, buf=0x7f74c5b8fc88, nbytes=8) at ../sysdeps/unix/sysv/linux/write.c:24
#2 0x00005640870eeb4f in lttng_write (fd=24, buf=0x7f74c5b8fc88, count=8) at readwrite.cpp:77
#3 0x000056408704b535 in run_command_no_wait (handle=0x56408765ddd0, in_cmd=0x7f74c5b8fdf0) at notification-thread-commands.cpp:92
#4 0x000056408704bf49 in notification_thread_client_communication_update (handle=0x56408765ddd0, id=2, transmission_status=CLIENT_TRANSMISSION_STATUS_QUEUED) at notification-thread-command
#5 0x000056408707bc62 in client_handle_transmission_status (client=0x7f74b80050d0, status=CLIENT_TRANSMISSION_STATUS_QUEUED, user_data=0x7f74b8004410) at action-executor.cpp:258
#6 0x0000564087057525 in notification_client_list_send_evaluation (client_list=0x7f74b8004df0, trigger=0x7f74b0001030, evaluation=0x7f74b815d1d0, source_object_creds=0x0, client_report=0x5
#7 0x000056408707bce9 in action_executor_notify_handler (executor=0x7f74b8004410, work_item=0x7f74b815d430, item=0x7f74b80e48e0) at action-executor.cpp:269
#8 0x000056408707dd6d in action_executor_generic_handler (executor=0x7f74b8004410, work_item=0x7f74b815d430, item=0x7f74b80e48e0) at action-executor.cpp:670
#9 0x000056408707df01 in action_work_item_execute (executor=0x7f74b8004410, work_item=0x7f74b815d430) at action-executor.cpp:689
#10 0x000056408707e525 in action_executor_thread (_data=0x7f74b8004410) at action-executor.cpp:771 [698/709]
#11 0x0000564087062b1a in launch_thread (data=0x7f74b80044b0) at thread.cpp:66
#12 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#13 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 4 (Thread 0x7f74c6392700 (LWP 76038)):
#0 __lll_lock_wait (futex=futex@entry=0x56408765dde8, private=0) at lowlevellock.c:52
#1 0x00007f74c941a0a3 in __GI___pthread_mutex_lock (mutex=0x56408765dde8) at ../nptl/pthread_mutex_lock.c:80
#2 0x0000564087053c89 in handle_notification_thread_command (handle=0x56408765ddd0, state=0x7f74c63911b0) at notification-thread-events.cpp:3142
#3 0x000056408704ac81 in thread_notification (data=0x56408765ddd0) at notification-thread.cpp:715
#4 0x0000564087062b1a in launch_thread (data=0x56408765dec0) at thread.cpp:66
#5 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#run_command_no_wait6 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 3 (Thread 0x7f74c6b93700 (LWP 76037)):
#0 0x00007f74c933a49e in epoll_wait (epfd=21, events=0x7f74c0000b60, maxevents=2, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7f74c6b92170, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x000056408706400a in thread_manage_health (data=0x56408765db50) at health.cpp:140
#3 0x0000564087062b1a in launch_thread (data=0x56408765dbf0) at thread.cpp:66
#4 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 2 (Thread 0x7f74c7394700 (LWP 76036)):
#0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1 0x00007f74c987d238 in futex (uaddr=0x564087659b10, op=0, val=-1, timeout=0x0, uaddr2=0x0, val3=0) at ../include/urcu/futex.h:72
#2 futex_async (uaddr=0x564087659b10, op=0, val=-1, timeout=0x0, uaddr2=0x0, val3=0) at ../include/urcu/futex.h:104
#3 futex_wait (futex=0x564087659b10) at workqueue.c:136
#4 0x00007f74c987ced2 in workqueue_thread (arg=0x564087659ad0) at workqueue.c:237
#5 0x00007f74c9417609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#6 0x00007f74c933a163 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 1 (Thread 0x7f74c73cd300 (LWP 76034)):
#0 0x00007f74c933a49e in epoll_wait (epfd=50, events=0x564087666880, maxevents=1, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
#1 0x00005640870eafa6 in compat_epoll_wait (events=0x7ffc1fcbe280, timeout=-1, interruptible=false) at compat/poll.cpp:280
#2 0x0000564087062244 in sessiond_wait_for_quit_pipe (timeout_ms=-1) at thread-utils.cpp:83
#3 0x00005640870127dc in main (argc=1, argv=0x7ffc1fcbe668) at main.cpp:1921
Cause
=====
The event_pipe used to notify the notification poll loop is full and the
lttng_write call blocks with the locks for both the client and the
cmd_queue held.
Solution
========
Go back to using eventfd but without the use of EFD_SEMAPHORE (linux
2.6.30) to continue supporting kernel between 2.6.27 and 2.6.29.
The EFD_SEMAPHORE is emulated with a read, decrement, write as explained
by the initial committer of EFD_SEMAPHORE [1].
Known drawbacks
=========
This does not solve the actual block+lock problem but simply push it
back further. The lttng_write on the eventfd can block when reaching
UINT64_MAX. This would represent, at 1 command queued per ns (which is
ridiculous), ~584 years of queueing without a dequeue operation.
Reference
=======
[1] https://lwn.net/Articles/318151/
Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Signed-off-by: Jérémie Galarneau <jeremie.galarneau@efficios.com>
Change-Id: Ie749c4169708f57463fe3cfab2366f1015bae4e0
cds_list_add_tail(&cmd->cmd_list_node,
&handle->cmd_queue.list);
/* Wake-up thread. */
- ret = lttng_write(lttng_pipe_get_writefd(handle->cmd_queue.event_pipe),
+ ret = lttng_write(handle->cmd_queue.event_fd,
¬ification_counter, sizeof(notification_counter));
if (ret != sizeof(notification_counter)) {
PERROR("write to notification thread's queue event fd");
cds_list_add_tail(&new_cmd->cmd_list_node,
&handle->cmd_queue.list);
/* Wake-up thread. */
- ret = lttng_write(lttng_pipe_get_writefd(handle->cmd_queue.event_pipe),
+ ret = lttng_write(handle->cmd_queue.event_fd,
¬ification_counter, sizeof(notification_counter));
if (ret != sizeof(notification_counter)) {
PERROR("write to notification thread's queue event fd");
return 0;
}
+static
+int pop_cmd_queue(struct notification_thread_handle *handle,
+ struct notification_thread_command **cmd)
+{
+ int ret;
+ uint64_t counter;
+
+ pthread_mutex_lock(&handle->cmd_queue.lock);
+ ret = lttng_read(handle->cmd_queue.event_fd, &counter, sizeof(counter));
+ if (ret != sizeof(counter)) {
+ ret = -1;
+ goto error_unlock;
+ }
+
+ /* Simulate behaviour of EFD_SEMAPHORE for older kernels. */
+ counter -= 1;
+ if (counter != 0) {
+ ret = lttng_write(handle->cmd_queue.event_fd, &counter,
+ sizeof(counter));
+ if (ret != sizeof(counter)) {
+ PERROR("Failed to write back to event_fd for EFD_SEMAPHORE emulation");
+ ret = -1;
+ goto error_unlock;
+ }
+ }
+
+ *cmd = cds_list_first_entry(&handle->cmd_queue.list,
+ struct notification_thread_command, cmd_list_node);
+ cds_list_del(&((*cmd)->cmd_list_node));
+ ret = 0;
+
+error_unlock:
+ pthread_mutex_unlock(&handle->cmd_queue.lock);
+ return ret;
+}
+
/* Returns 0 on success, 1 on exit requested, negative value on error. */
int handle_notification_thread_command(
struct notification_thread_handle *handle,
struct notification_thread_state *state)
{
int ret;
- uint64_t counter;
struct notification_thread_command *cmd;
- /* Read the event pipe to put it back into a quiescent state. */
- ret = lttng_read(lttng_pipe_get_readfd(handle->cmd_queue.event_pipe), &counter,
- sizeof(counter));
- if (ret != sizeof(counter)) {
+ ret = pop_cmd_queue(handle, &cmd);
+ if (ret) {
goto error;
}
- pthread_mutex_lock(&handle->cmd_queue.lock);
- cmd = cds_list_first_entry(&handle->cmd_queue.list,
- struct notification_thread_command, cmd_list_node);
- cds_list_del(&cmd->cmd_list_node);
- pthread_mutex_unlock(&handle->cmd_queue.lock);
-
DBG("Received `%s` command",
notification_command_type_str(cmd->type));
switch (cmd->type) {
#include <common/align.hpp>
#include <common/time.hpp>
#include <sys/stat.h>
+#include <sys/eventfd.h>
#include <time.h>
#include <signal.h>
pthread_mutex_destroy(&handle->cmd_queue.lock);
sem_destroy(&handle->ready);
- if (handle->cmd_queue.event_pipe) {
- lttng_pipe_destroy(handle->cmd_queue.event_pipe);
+ if (handle->cmd_queue.event_fd >= 0) {
+ ret = close(handle->cmd_queue.event_fd);
+ if (ret < 0) {
+ PERROR("Failed to close notification command queue event fd");
+ }
}
if (handle->channel_monitoring_pipes.ust32_consumer >= 0) {
ret = close(handle->channel_monitoring_pipes.ust32_consumer);
{
int ret;
struct notification_thread_handle *handle;
- struct lttng_pipe *event_pipe = NULL;
+ int event_fd = -1;
handle = zmalloc<notification_thread_handle>();
if (!handle) {
sem_init(&handle->ready, 0, 0);
- event_pipe = lttng_pipe_open(FD_CLOEXEC);
- if (!event_pipe) {
- ERR("event_pipe creation");
+ event_fd = eventfd(0, EFD_CLOEXEC);
+ if (event_fd < 0) {
+ PERROR("event_fd creation");
goto error;
}
- handle->cmd_queue.event_pipe = event_pipe;
- event_pipe = NULL;
+ handle->cmd_queue.event_fd = event_fd;
CDS_INIT_LIST_HEAD(&handle->cmd_queue.list);
ret = pthread_mutex_init(&handle->cmd_queue.lock, NULL);
end:
return handle;
error:
- lttng_pipe_destroy(event_pipe);
notification_thread_handle_destroy(handle);
return NULL;
}
ERR("Failed to add notification channel socket to pollset");
goto error;
}
- ret = lttng_poll_add(poll_set, lttng_pipe_get_readfd(handle->cmd_queue.event_pipe),
+ ret = lttng_poll_add(poll_set, handle->cmd_queue.event_fd,
LPOLLIN | LPOLLERR);
if (ret < 0) {
ERR("Failed to add notification command queue event fd to pollset");
ERR("Unexpected poll events %u for notification socket %i", revents, fd);
goto error;
}
- } else if (fd == lttng_pipe_get_readfd(handle->cmd_queue.event_pipe)) {
+ } else if (fd == handle->cmd_queue.event_fd) {
ret = handle_notification_thread_command(handle,
&state);
if (ret < 0) {
* has been enqueued.
*/
struct {
- struct lttng_pipe *event_pipe;
+ int event_fd;
struct cds_list_head list;
pthread_mutex_t lock;
} cmd_queue;