1 // SPDX-License-Identifier: MIT
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 #include <side/trace.h>
14 /* Top 8 bits reserved for kernel tracer use. */
15 #if SIDE_BITS_PER_LONG == 64
16 # define SIDE_EVENT_ENABLED_KERNEL_MASK 0xFF00000000000000ULL
17 # define SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK 0x8000000000000000ULL
19 /* Allow 2^56 tracer references on an event. */
20 # define SIDE_EVENT_ENABLED_USER_MASK 0x00FFFFFFFFFFFFFFULL
22 # define SIDE_EVENT_ENABLED_KERNEL_MASK 0xFF000000UL
23 # define SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK 0x80000000UL
25 /* Allow 2^24 tracer references on an event. */
26 # define SIDE_EVENT_ENABLED_USER_MASK 0x00FFFFFFUL
29 struct side_events_register_handle
{
30 struct side_list_node node
;
31 struct side_event_description
**events
;
35 struct side_tracer_handle
{
36 struct side_list_node node
;
37 void (*cb
)(enum side_tracer_notification notif
,
38 struct side_event_description
**events
, uint32_t nr_events
, void *priv
);
42 struct side_statedump_request_handle
{
43 struct side_list_node node
; /* RCU list. */
47 struct side_callback
{
49 void (*call
)(const struct side_event_description
*desc
,
50 const struct side_arg_vec
*side_arg_vec
,
52 void (*call_variadic
)(const struct side_event_description
*desc
,
53 const struct side_arg_vec
*side_arg_vec
,
54 const struct side_arg_dynamic_struct
*var_struct
,
61 static struct side_rcu_gp_state event_rcu_gp
, statedump_rcu_gp
;
64 * Lazy initialization for early use within library constructors.
66 static bool initialized
;
68 * Do not register/unregister any more events after destructor.
70 static bool finalized
;
73 * Recursive mutex to allow tracer callbacks to use the side API.
75 static pthread_mutex_t side_event_lock
= PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP
;
76 static pthread_mutex_t side_statedump_lock
= PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP
;
78 static DEFINE_SIDE_LIST_HEAD(side_events_list
);
79 static DEFINE_SIDE_LIST_HEAD(side_tracer_list
);
80 static DEFINE_SIDE_LIST_HEAD(side_statedump_list
);
83 * Callback filter key for state dump.
85 static __thread
void *filter_key
;
88 * The empty callback has a NULL function callback pointer, which stops
89 * iteration on the array of callbacks immediately.
91 const char side_empty_callback
[sizeof(struct side_callback
)];
94 void _side_call(const struct side_event_state
*event_state
, const struct side_arg_vec
*side_arg_vec
, void *key
)
96 struct side_rcu_read_state rcu_read_state
;
97 const struct side_event_state_0
*es0
;
98 const struct side_callback
*side_cb
;
101 if (side_unlikely(finalized
))
103 if (side_unlikely(!initialized
))
105 if (side_unlikely(event_state
->version
!= 0))
107 es0
= side_container_of(event_state
, const struct side_event_state_0
, parent
);
108 assert(!(es0
->desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
));
109 enabled
= __atomic_load_n(&es0
->enabled
, __ATOMIC_RELAXED
);
110 if (side_unlikely(enabled
& SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK
)) {
111 // TODO: call kernel write.
113 side_rcu_read_begin(&event_rcu_gp
, &rcu_read_state
);
114 for (side_cb
= side_rcu_dereference(es0
->callbacks
); side_cb
->u
.call
!= NULL
; side_cb
++) {
115 /* A NULL key is always a match. */
116 if (key
&& side_cb
->key
&& side_cb
->key
!= key
)
118 side_cb
->u
.call(es0
->desc
, side_arg_vec
, side_cb
->priv
);
120 side_rcu_read_end(&event_rcu_gp
, &rcu_read_state
);
123 void side_call(const struct side_event_state
*event_state
, const struct side_arg_vec
*side_arg_vec
)
125 _side_call(event_state
, side_arg_vec
, NULL
);
128 void side_statedump_call(const struct side_event_state
*event_state
, const struct side_arg_vec
*side_arg_vec
)
130 _side_call(event_state
, side_arg_vec
, filter_key
);
134 void _side_call_variadic(const struct side_event_state
*event_state
,
135 const struct side_arg_vec
*side_arg_vec
,
136 const struct side_arg_dynamic_struct
*var_struct
,
139 struct side_rcu_read_state rcu_read_state
;
140 const struct side_event_state_0
*es0
;
141 const struct side_callback
*side_cb
;
144 if (side_unlikely(finalized
))
146 if (side_unlikely(!initialized
))
148 if (side_unlikely(event_state
->version
!= 0))
150 es0
= side_container_of(event_state
, const struct side_event_state_0
, parent
);
151 assert(es0
->desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
);
152 enabled
= __atomic_load_n(&es0
->enabled
, __ATOMIC_RELAXED
);
153 if (side_unlikely(enabled
& SIDE_EVENT_ENABLED_KERNEL_USER_EVENT_MASK
)) {
154 // TODO: call kernel write.
156 side_rcu_read_begin(&event_rcu_gp
, &rcu_read_state
);
157 for (side_cb
= side_rcu_dereference(es0
->callbacks
); side_cb
->u
.call_variadic
!= NULL
; side_cb
++) {
158 /* A NULL key is always a match. */
159 if (key
&& side_cb
->key
&& side_cb
->key
!= key
)
161 side_cb
->u
.call_variadic(es0
->desc
, side_arg_vec
, var_struct
, side_cb
->priv
);
163 side_rcu_read_end(&event_rcu_gp
, &rcu_read_state
);
166 void side_call_variadic(const struct side_event_state
*event_state
,
167 const struct side_arg_vec
*side_arg_vec
,
168 const struct side_arg_dynamic_struct
*var_struct
)
170 _side_call_variadic(event_state
, side_arg_vec
, var_struct
, NULL
);
173 void side_statedump_call_variadic(const struct side_event_state
*event_state
,
174 const struct side_arg_vec
*side_arg_vec
,
175 const struct side_arg_dynamic_struct
*var_struct
)
177 _side_call_variadic(event_state
, side_arg_vec
, var_struct
, filter_key
);
181 const struct side_callback
*side_tracer_callback_lookup(
182 const struct side_event_description
*desc
,
183 void *call
, void *priv
, void *key
)
185 struct side_event_state
*event_state
= side_ptr_get(desc
->state
);
186 const struct side_event_state_0
*es0
;
187 const struct side_callback
*cb
;
189 if (side_unlikely(event_state
->version
!= 0))
191 es0
= side_container_of(event_state
, const struct side_event_state_0
, parent
);
192 for (cb
= es0
->callbacks
; cb
->u
.call
!= NULL
; cb
++) {
193 if ((void *) cb
->u
.call
== call
&& cb
->priv
== priv
&& cb
->key
== key
)
200 int _side_tracer_callback_register(struct side_event_description
*desc
,
201 void *call
, void *priv
, void *key
)
203 struct side_event_state
*event_state
;
204 struct side_callback
*old_cb
, *new_cb
;
205 struct side_event_state_0
*es0
;
206 int ret
= SIDE_ERROR_OK
;
210 return SIDE_ERROR_INVAL
;
212 return SIDE_ERROR_EXITING
;
215 pthread_mutex_lock(&side_event_lock
);
216 event_state
= side_ptr_get(desc
->state
);
217 if (side_unlikely(event_state
->version
!= 0))
219 es0
= side_container_of(event_state
, struct side_event_state_0
, parent
);
220 old_nr_cb
= es0
->nr_callbacks
;
221 if (old_nr_cb
== UINT32_MAX
) {
222 ret
= SIDE_ERROR_INVAL
;
225 /* Reject duplicate (call, priv) tuples. */
226 if (side_tracer_callback_lookup(desc
, call
, priv
, key
)) {
227 ret
= SIDE_ERROR_EXIST
;
230 old_cb
= (struct side_callback
*) es0
->callbacks
;
231 /* old_nr_cb + 1 (new cb) + 1 (NULL) */
232 new_cb
= (struct side_callback
*) calloc(old_nr_cb
+ 2, sizeof(struct side_callback
));
234 ret
= SIDE_ERROR_NOMEM
;
237 memcpy(new_cb
, old_cb
, old_nr_cb
);
238 if (desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
)
239 new_cb
[old_nr_cb
].u
.call_variadic
=
240 (side_tracer_callback_variadic_func
) call
;
242 new_cb
[old_nr_cb
].u
.call
=
243 (side_tracer_callback_func
) call
;
244 new_cb
[old_nr_cb
].priv
= priv
;
245 new_cb
[old_nr_cb
].key
= key
;
246 /* High order bits are already zeroed. */
247 side_rcu_assign_pointer(es0
->callbacks
, new_cb
);
248 side_rcu_wait_grace_period(&event_rcu_gp
);
252 /* Increment concurrently with kernel setting the top bits. */
254 (void) __atomic_add_fetch(&es0
->enabled
, 1, __ATOMIC_RELAXED
);
256 pthread_mutex_unlock(&side_event_lock
);
260 int side_tracer_callback_register(struct side_event_description
*desc
,
261 side_tracer_callback_func call
,
262 void *priv
, void *key
)
264 if (desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
)
265 return SIDE_ERROR_INVAL
;
266 return _side_tracer_callback_register(desc
, (void *) call
, priv
, key
);
269 int side_tracer_callback_variadic_register(struct side_event_description
*desc
,
270 side_tracer_callback_variadic_func call_variadic
,
271 void *priv
, void *key
)
273 if (!(desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
))
274 return SIDE_ERROR_INVAL
;
275 return _side_tracer_callback_register(desc
, (void *) call_variadic
, priv
, key
);
278 static int _side_tracer_callback_unregister(struct side_event_description
*desc
,
279 void *call
, void *priv
, void *key
)
281 struct side_event_state
*event_state
;
282 struct side_callback
*old_cb
, *new_cb
;
283 const struct side_callback
*cb_pos
;
284 struct side_event_state_0
*es0
;
286 int ret
= SIDE_ERROR_OK
;
290 return SIDE_ERROR_INVAL
;
292 return SIDE_ERROR_EXITING
;
295 pthread_mutex_lock(&side_event_lock
);
296 event_state
= side_ptr_get(desc
->state
);
297 if (side_unlikely(event_state
->version
!= 0))
299 es0
= side_container_of(event_state
, struct side_event_state_0
, parent
);
300 cb_pos
= side_tracer_callback_lookup(desc
, call
, priv
, key
);
302 ret
= SIDE_ERROR_NOENT
;
305 old_nr_cb
= es0
->nr_callbacks
;
306 old_cb
= (struct side_callback
*) es0
->callbacks
;
307 if (old_nr_cb
== 1) {
308 new_cb
= (struct side_callback
*) &side_empty_callback
;
310 pos_idx
= cb_pos
- es0
->callbacks
;
311 /* Remove entry at pos_idx. */
312 /* old_nr_cb - 1 (removed cb) + 1 (NULL) */
313 new_cb
= (struct side_callback
*) calloc(old_nr_cb
, sizeof(struct side_callback
));
315 ret
= SIDE_ERROR_NOMEM
;
318 memcpy(new_cb
, old_cb
, pos_idx
);
319 memcpy(&new_cb
[pos_idx
], &old_cb
[pos_idx
+ 1], old_nr_cb
- pos_idx
- 1);
321 /* High order bits are already zeroed. */
322 side_rcu_assign_pointer(es0
->callbacks
, new_cb
);
323 side_rcu_wait_grace_period(&event_rcu_gp
);
326 /* Decrement concurrently with kernel setting the top bits. */
328 (void) __atomic_add_fetch(&es0
->enabled
, -1, __ATOMIC_RELAXED
);
330 pthread_mutex_unlock(&side_event_lock
);
334 int side_tracer_callback_unregister(struct side_event_description
*desc
,
335 side_tracer_callback_func call
,
336 void *priv
, void *key
)
338 if (desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
)
339 return SIDE_ERROR_INVAL
;
340 return _side_tracer_callback_unregister(desc
, (void *) call
, priv
, key
);
343 int side_tracer_callback_variadic_unregister(struct side_event_description
*desc
,
344 side_tracer_callback_variadic_func call_variadic
,
345 void *priv
, void *key
)
347 if (!(desc
->flags
& SIDE_EVENT_FLAG_VARIADIC
))
348 return SIDE_ERROR_INVAL
;
349 return _side_tracer_callback_unregister(desc
, (void *) call_variadic
, priv
, key
);
352 struct side_events_register_handle
*side_events_register(struct side_event_description
**events
, uint32_t nr_events
)
354 struct side_events_register_handle
*events_handle
= NULL
;
355 struct side_tracer_handle
*tracer_handle
;
361 events_handle
= (struct side_events_register_handle
*)
362 calloc(1, sizeof(struct side_events_register_handle
));
365 events_handle
->events
= events
;
366 events_handle
->nr_events
= nr_events
;
368 pthread_mutex_lock(&side_event_lock
);
369 side_list_insert_node_tail(&side_events_list
, &events_handle
->node
);
370 side_list_for_each_entry(tracer_handle
, &side_tracer_list
, node
) {
371 tracer_handle
->cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS
,
372 events
, nr_events
, tracer_handle
->priv
);
374 pthread_mutex_unlock(&side_event_lock
);
375 //TODO: call event batch register ioctl
376 return events_handle
;
380 void side_event_remove_callbacks(struct side_event_description
*desc
)
382 struct side_event_state
*event_state
= side_ptr_get(desc
->state
);
383 struct side_event_state_0
*es0
;
384 struct side_callback
*old_cb
;
387 if (side_unlikely(event_state
->version
!= 0))
389 es0
= side_container_of(event_state
, struct side_event_state_0
, parent
);
390 nr_cb
= es0
->nr_callbacks
;
393 old_cb
= (struct side_callback
*) es0
->callbacks
;
394 (void) __atomic_add_fetch(&es0
->enabled
, -1, __ATOMIC_RELAXED
);
396 * Setting the state back to 0 cb and empty callbacks out of
397 * caution. This should not matter because instrumentation is
400 es0
->nr_callbacks
= 0;
401 side_rcu_assign_pointer(es0
->callbacks
, &side_empty_callback
);
403 * No need to wait for grace period because instrumentation is
410 * Unregister event handle. At this point, all side events in that
411 * handle should be unreachable.
413 void side_events_unregister(struct side_events_register_handle
*events_handle
)
415 struct side_tracer_handle
*tracer_handle
;
424 pthread_mutex_lock(&side_event_lock
);
425 side_list_remove_node(&events_handle
->node
);
426 side_list_for_each_entry(tracer_handle
, &side_tracer_list
, node
) {
427 tracer_handle
->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS
,
428 events_handle
->events
, events_handle
->nr_events
,
429 tracer_handle
->priv
);
431 for (i
= 0; i
< events_handle
->nr_events
; i
++) {
432 struct side_event_description
*event
= events_handle
->events
[i
];
434 /* Skip NULL pointers */
437 side_event_remove_callbacks(event
);
439 pthread_mutex_unlock(&side_event_lock
);
440 //TODO: call event batch unregister ioctl
444 struct side_tracer_handle
*side_tracer_event_notification_register(
445 void (*cb
)(enum side_tracer_notification notif
,
446 struct side_event_description
**events
, uint32_t nr_events
, void *priv
),
449 struct side_tracer_handle
*tracer_handle
;
450 struct side_events_register_handle
*events_handle
;
456 tracer_handle
= (struct side_tracer_handle
*)
457 calloc(1, sizeof(struct side_tracer_handle
));
460 pthread_mutex_lock(&side_event_lock
);
461 tracer_handle
->cb
= cb
;
462 tracer_handle
->priv
= priv
;
463 side_list_insert_node_tail(&side_tracer_list
, &tracer_handle
->node
);
464 side_list_for_each_entry(events_handle
, &side_events_list
, node
) {
465 cb(SIDE_TRACER_NOTIFICATION_INSERT_EVENTS
,
466 events_handle
->events
, events_handle
->nr_events
, priv
);
468 pthread_mutex_unlock(&side_event_lock
);
469 return tracer_handle
;
472 void side_tracer_event_notification_unregister(struct side_tracer_handle
*tracer_handle
)
474 struct side_events_register_handle
*events_handle
;
480 pthread_mutex_lock(&side_event_lock
);
481 side_list_for_each_entry(events_handle
, &side_events_list
, node
) {
482 tracer_handle
->cb(SIDE_TRACER_NOTIFICATION_REMOVE_EVENTS
,
483 events_handle
->events
, events_handle
->nr_events
,
484 tracer_handle
->priv
);
486 side_list_remove_node(&tracer_handle
->node
);
487 pthread_mutex_unlock(&side_event_lock
);
491 struct side_statedump_request_handle
*side_statedump_request_notification_register(void (*statedump_cb
)(void))
493 struct side_statedump_request_handle
*handle
;
500 * The statedump request notification should not be registered
501 * from a notification callback.
503 assert(filter_key
== NULL
);
504 handle
= (struct side_statedump_request_handle
*)
505 calloc(1, sizeof(struct side_statedump_request_handle
));
508 handle
->cb
= statedump_cb
;
510 pthread_mutex_lock(&side_statedump_lock
);
511 side_list_insert_node_tail_rcu(&side_statedump_list
, &handle
->node
);
512 pthread_mutex_unlock(&side_statedump_lock
);
514 /* Invoke callback for all tracers. */
520 void side_statedump_request_notification_unregister(struct side_statedump_request_handle
*handle
)
526 assert(filter_key
== NULL
);
528 pthread_mutex_lock(&side_statedump_lock
);
529 side_list_remove_node_rcu(&handle
->node
);
530 pthread_mutex_unlock(&side_statedump_lock
);
532 side_rcu_wait_grace_period(&statedump_rcu_gp
);
536 void side_tracer_statedump_request(void *key
)
538 struct side_statedump_request_handle
*handle
;
539 struct side_rcu_read_state rcu_read_state
;
541 /* Invoke the state dump callback specifically for the tracer key. */
543 side_rcu_read_begin(&statedump_rcu_gp
, &rcu_read_state
);
544 side_list_for_each_entry_rcu(handle
, &side_statedump_list
, node
)
546 side_rcu_read_end(&statedump_rcu_gp
, &rcu_read_state
);
554 side_rcu_gp_init(&event_rcu_gp
);
555 side_rcu_gp_init(&statedump_rcu_gp
);
560 * side_exit() is executed from a library destructor. It can be called
561 * explicitly at application exit as well. Concurrent side API use is
562 * not expected at that point.
566 struct side_events_register_handle
*handle
, *tmp
;
570 side_list_for_each_entry_safe(handle
, tmp
, &side_events_list
, node
)
571 side_events_unregister(handle
);
572 side_rcu_gp_exit(&event_rcu_gp
);
573 side_rcu_gp_exit(&statedump_rcu_gp
);