From ebabbf580131acd1fe246c4d31fc5c044d36a038 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 4 Sep 2020 11:29:47 -0400 Subject: [PATCH 01/16] Implement libcounter Signed-off-by: Mathieu Desnoyers Change-Id: I7bc2707e8bdea712addc84d90329486cda1a24e1 --- Makefile.am | 1 + configure.ac | 1 + include/Makefile.am | 4 +- include/lttng/bitmap.h | 70 +++ include/lttng/counter-config.h | 57 ++ include/lttng/ust-abi.h | 85 +++ include/lttng/ust-ctl.h | 78 +++ include/lttng/ust-events.h | 57 ++ include/ust-comm.h | 8 + libcounter/Makefile.am | 18 + libcounter/counter-api.h | 296 +++++++++ libcounter/counter-internal.h | 65 ++ libcounter/counter-types.h | 93 +++ libcounter/counter.c | 564 ++++++++++++++++++ libcounter/counter.h | 65 ++ libcounter/shm.c | 387 ++++++++++++ libcounter/shm.h | 142 +++++ libcounter/shm_internal.h | 35 ++ libcounter/shm_types.h | 54 ++ libcounter/smp.c | 111 ++++ libcounter/smp.h | 43 ++ liblttng-ust-comm/lttng-ust-comm.c | 69 +++ liblttng-ust-ctl/ustctl.c | 459 ++++++++++++++ liblttng-ust/Makefile.am | 5 +- .../lttng-counter-client-percpu-32-modular.c | 94 +++ .../lttng-counter-client-percpu-64-modular.c | 94 +++ liblttng-ust/lttng-events.c | 46 +- liblttng-ust/lttng-ust-abi.c | 140 +++++ liblttng-ust/lttng-ust-comm.c | 88 +++ liblttng-ust/ust-core.c | 34 ++ 30 files changed, 3260 insertions(+), 3 deletions(-) create mode 100644 include/lttng/bitmap.h create mode 100644 include/lttng/counter-config.h create mode 100644 libcounter/Makefile.am create mode 100644 libcounter/counter-api.h create mode 100644 libcounter/counter-internal.h create mode 100644 libcounter/counter-types.h create mode 100644 libcounter/counter.c create mode 100644 libcounter/counter.h create mode 100644 libcounter/shm.c create mode 100644 libcounter/shm.h create mode 100644 libcounter/shm_internal.h create mode 100644 libcounter/shm_types.h create mode 100644 libcounter/smp.c create mode 100644 libcounter/smp.h create mode 100644 liblttng-ust/lttng-counter-client-percpu-32-modular.c create mode 100644 liblttng-ust/lttng-counter-client-percpu-64-modular.c diff --git a/Makefile.am b/Makefile.am index a13c6e31..b6b7fb6d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,6 +1,7 @@ ACLOCAL_AMFLAGS = -I m4 SUBDIRS = . include snprintf libringbuffer liblttng-ust-comm \ + libcounter \ libmsgpack \ liblttng-ust \ liblttng-ust-ctl \ diff --git a/configure.ac b/configure.ac index 032bb731..e5ac3200 100644 --- a/configure.ac +++ b/configure.ac @@ -504,6 +504,7 @@ AC_CONFIG_FILES([ include/Makefile include/lttng/ust-version.h snprintf/Makefile + libcounter/Makefile libmsgpack/Makefile libringbuffer/Makefile liblttng-ust-comm/Makefile diff --git a/include/Makefile.am b/include/Makefile.am index 277e4e69..23a165ae 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -25,7 +25,9 @@ nobase_include_HEADERS = \ lttng/lttng-ust-tracelog.h \ lttng/ust-clock.h \ lttng/ust-getcpu.h \ - lttng/ust-elf.h + lttng/ust-elf.h \ + lttng/counter-config.h \ + lttng/bitmap.h # note: usterr-signal-safe.h, core.h and share.h need namespace cleanup. diff --git a/include/lttng/bitmap.h b/include/lttng/bitmap.h new file mode 100644 index 00000000..fb57ff41 --- /dev/null +++ b/include/lttng/bitmap.h @@ -0,0 +1,70 @@ +/* + * lttng/bitmap.h + * + * LTTng Bitmap API + * + * Copyright (C) 2020 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LTTNG_BITMAP_H +#define _LTTNG_BITMAP_H + +#include +#include +#include +#include + +static inline void lttng_bitmap_index(unsigned int index, unsigned int *word, + unsigned int *bit) +{ + *word = index / CAA_BITS_PER_LONG; + *bit = index % CAA_BITS_PER_LONG; +} + +static inline void lttng_bitmap_set_bit(unsigned int index, unsigned long *p) +{ + unsigned int word, bit; + unsigned long val; + + lttng_bitmap_index(index, &word, &bit); + val = 1U << bit; + uatomic_or(p + word, val); +} + +static inline void lttng_bitmap_clear_bit(unsigned int index, unsigned long *p) +{ + unsigned int word, bit; + unsigned long val; + + lttng_bitmap_index(index, &word, &bit); + val = ~(1U << bit); + uatomic_and(p + word, val); +} + +static inline bool lttng_bitmap_test_bit(unsigned int index, unsigned long *p) +{ + unsigned int word, bit; + + lttng_bitmap_index(index, &word, &bit); + return (CMM_LOAD_SHARED(p[word]) >> bit) & 0x1; +} + +#endif /* _LTTNG_BITMAP_H */ diff --git a/include/lttng/counter-config.h b/include/lttng/counter-config.h new file mode 100644 index 00000000..ebe15365 --- /dev/null +++ b/include/lttng/counter-config.h @@ -0,0 +1,57 @@ +/* + * lttng/counter-config.h + * + * LTTng Counters Configuration + * + * Copyright (C) 2020 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LTTNG_COUNTER_CONFIG_H +#define _LTTNG_COUNTER_CONFIG_H + +#include + +enum lib_counter_config_alloc { + COUNTER_ALLOC_PER_CPU = (1 << 0), + COUNTER_ALLOC_GLOBAL = (1 << 1), +}; + +enum lib_counter_config_sync { + COUNTER_SYNC_PER_CPU, + COUNTER_SYNC_GLOBAL, +}; + +struct lib_counter_config { + uint32_t alloc; /* enum lib_counter_config_alloc flags */ + enum lib_counter_config_sync sync; + enum { + COUNTER_ARITHMETIC_MODULAR, + COUNTER_ARITHMETIC_SATURATE, /* TODO */ + } arithmetic; + enum { + COUNTER_SIZE_8_BIT = 1, + COUNTER_SIZE_16_BIT = 2, + COUNTER_SIZE_32_BIT = 4, + COUNTER_SIZE_64_BIT = 8, + } counter_size; +}; + +#endif /* _LTTNG_COUNTER_CONFIG_H */ diff --git a/include/lttng/ust-abi.h b/include/lttng/ust-abi.h index 1c80c144..6a6144f7 100644 --- a/include/lttng/ust-abi.h +++ b/include/lttng/ust-abi.h @@ -103,6 +103,40 @@ struct lttng_ust_stream { */ } LTTNG_PACKED; + +enum lttng_ust_counter_arithmetic { + LTTNG_UST_COUNTER_ARITHMETIC_MODULAR = 0, + LTTNG_UST_COUNTER_ARITHMETIC_SATURATION = 1, +}; + +enum lttng_ust_counter_bitness { + LTTNG_UST_COUNTER_BITNESS_32BITS = 4, + LTTNG_UST_COUNTER_BITNESS_64BITS = 8, +}; + +struct lttng_ust_counter_dimension { + uint64_t size; + uint64_t underflow_index; + uint64_t overflow_index; + uint8_t has_underflow; + uint8_t has_overflow; +} LTTNG_PACKED; + +#define LTTNG_UST_COUNTER_DIMENSION_MAX 8 +struct lttng_ust_counter_conf { + uint32_t arithmetic; /* enum lttng_ust_counter_arithmetic */ + uint32_t bitness; /* enum lttng_ust_counter_bitness */ + uint32_t number_dimensions; + int64_t global_sum_step; + struct lttng_ust_counter_dimension dimensions[LTTNG_UST_COUNTER_DIMENSION_MAX]; +} LTTNG_PACKED; + +struct lttng_ust_counter_value { + uint32_t number_dimensions; + uint64_t dimension_indexes[LTTNG_UST_COUNTER_DIMENSION_MAX]; + int64_t value; +} LTTNG_PACKED; + #define LTTNG_UST_EVENT_PADDING1 8 #define LTTNG_UST_EVENT_PADDING2 (LTTNG_UST_SYM_NAME_LEN + 32) struct lttng_ust_event { @@ -123,6 +157,7 @@ struct lttng_ust_event { #define LTTNG_UST_EVENT_NOTIFIER_PADDING1 16 struct lttng_ust_event_notifier { struct lttng_ust_event event; + uint64_t error_counter_index; char padding[LTTNG_UST_EVENT_NOTIFIER_PADDING1]; } LTTNG_PACKED; @@ -133,6 +168,27 @@ struct lttng_ust_event_notifier_notification { char padding[LTTNG_EVENT_NOTIFIER_NOTIFICATION_PADDING]; } LTTNG_PACKED; +#define LTTNG_UST_COUNTER_PADDING1 (LTTNG_UST_SYM_NAME_LEN + 32) +#define LTTNG_UST_COUNTER_DATA_MAX_LEN 4096U +struct lttng_ust_counter { + uint64_t len; + char padding[LTTNG_UST_COUNTER_PADDING1]; + char data[]; /* variable sized data */ +} LTTNG_PACKED; + +#define LTTNG_UST_COUNTER_GLOBAL_PADDING1 (LTTNG_UST_SYM_NAME_LEN + 32) +struct lttng_ust_counter_global { + uint64_t len; /* shm len */ + char padding[LTTNG_UST_COUNTER_GLOBAL_PADDING1]; +} LTTNG_PACKED; + +#define LTTNG_UST_COUNTER_CPU_PADDING1 (LTTNG_UST_SYM_NAME_LEN + 32) +struct lttng_ust_counter_cpu { + uint64_t len; /* shm len */ + uint32_t cpu_nr; + char padding[LTTNG_UST_COUNTER_CPU_PADDING1]; +} LTTNG_PACKED; + enum lttng_ust_field_type { LTTNG_UST_FIELD_OTHER = 0, LTTNG_UST_FIELD_INTEGER = 1, @@ -233,6 +289,9 @@ enum lttng_ust_object_type { LTTNG_UST_OBJECT_TYPE_CONTEXT = 3, LTTNG_UST_OBJECT_TYPE_EVENT_NOTIFIER_GROUP = 4, LTTNG_UST_OBJECT_TYPE_EVENT_NOTIFIER = 5, + LTTNG_UST_OBJECT_TYPE_COUNTER = 6, + LTTNG_UST_OBJECT_TYPE_COUNTER_GLOBAL = 7, + LTTNG_UST_OBJECT_TYPE_COUNTER_CPU = 8, }; #define LTTNG_UST_OBJECT_DATA_PADDING1 32 @@ -254,6 +313,16 @@ struct lttng_ust_object_data { int wakeup_fd; uint32_t stream_nr; } stream; + struct { + void *data; + } counter; + struct { + int shm_fd; + } counter_global; + struct { + int shm_fd; + uint32_t cpu_nr; + } counter_cpu; char padding2[LTTNG_UST_OBJECT_DATA_PADDING2]; } u; } LTTNG_PACKED; @@ -354,6 +423,16 @@ struct lttng_ust_event_exclusion { _UST_CMDW(0xB0, struct lttng_ust_event_notifier) #define LTTNG_UST_CAPTURE _UST_CMD(0xB1) +/* Session and event notifier group commands */ +#define LTTNG_UST_COUNTER \ + _UST_CMDW(0xC0, struct lttng_ust_counter) + +/* Counter commands */ +#define LTTNG_UST_COUNTER_GLOBAL \ + _UST_CMDW(0xD0, struct lttng_ust_counter_global) +#define LTTNG_UST_COUNTER_CPU \ + _UST_CMDW(0xD1, struct lttng_ust_counter_cpu) + #define LTTNG_UST_ROOT_HANDLE 0 struct lttng_ust_obj; @@ -376,6 +455,12 @@ union ust_args { struct { int event_notifier_notif_fd; } event_notifier_handle; + struct { + void *counter_data; + } counter; + struct { + int shm_fd; + } counter_shm; }; struct lttng_ust_objd_ops { diff --git a/include/lttng/ust-ctl.h b/include/lttng/ust-ctl.h index 35b73342..2c15a54e 100644 --- a/include/lttng/ust-ctl.h +++ b/include/lttng/ust-ctl.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -591,4 +592,81 @@ int ustctl_reply_register_channel(int sock, enum ustctl_channel_header header_type, int ret_code); /* return code. 0 ok, negative error */ +/* + * Counter API. + */ + +enum ustctl_counter_bitness { + USTCTL_COUNTER_BITNESS_32 = 4, + USTCTL_COUNTER_BITNESS_64 = 8, +}; + +enum ustctl_counter_arithmetic { + USTCTL_COUNTER_ARITHMETIC_MODULAR = 0, + USTCTL_COUNTER_ARITHMETIC_SATURATION = 1, +}; + +/* Used as alloc flags. */ +enum ustctl_counter_alloc { + USTCTL_COUNTER_ALLOC_PER_CPU = (1 << 0), + USTCTL_COUNTER_ALLOC_GLOBAL = (1 << 1), +}; + +struct ustctl_daemon_counter; + +int ustctl_get_nr_cpu_per_counter(void); + +struct ustctl_counter_dimension { + uint64_t size; + uint64_t underflow_index; + uint64_t overflow_index; + uint8_t has_underflow; + uint8_t has_overflow; +}; + +struct ustctl_daemon_counter * + ustctl_create_counter(size_t nr_dimensions, + const struct ustctl_counter_dimension *dimensions, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds, + enum ustctl_counter_bitness bitness, + enum ustctl_counter_arithmetic arithmetic, + uint32_t alloc_flags); + +int ustctl_create_counter_data(struct ustctl_daemon_counter *counter, + struct lttng_ust_object_data **counter_data); + +int ustctl_create_counter_global_data(struct ustctl_daemon_counter *counter, + struct lttng_ust_object_data **counter_global_data); +int ustctl_create_counter_cpu_data(struct ustctl_daemon_counter *counter, int cpu, + struct lttng_ust_object_data **counter_cpu_data); + +/* + * Each counter data and counter cpu data created need to be destroyed + * before calling ustctl_destroy_counter(). + */ +void ustctl_destroy_counter(struct ustctl_daemon_counter *counter); + +int ustctl_send_counter_data_to_ust(int sock, int parent_handle, + struct lttng_ust_object_data *counter_data); +int ustctl_send_counter_global_data_to_ust(int sock, + struct lttng_ust_object_data *counter_data, + struct lttng_ust_object_data *counter_global_data); +int ustctl_send_counter_cpu_data_to_ust(int sock, + struct lttng_ust_object_data *counter_data, + struct lttng_ust_object_data *counter_cpu_data); + +int ustctl_counter_read(struct ustctl_daemon_counter *counter, + const size_t *dimension_indexes, + int cpu, int64_t *value, + bool *overflow, bool *underflow); +int ustctl_counter_aggregate(struct ustctl_daemon_counter *counter, + const size_t *dimension_indexes, + int64_t *value, + bool *overflow, bool *underflow); +int ustctl_counter_clear(struct ustctl_daemon_counter *counter, + const size_t *dimension_indexes); + #endif /* _LTTNG_UST_CTL_H */ diff --git a/include/lttng/ust-events.h b/include/lttng/ust-events.h index 44b8d4c4..721b0aed 100644 --- a/include/lttng/ust-events.h +++ b/include/lttng/ust-events.h @@ -64,6 +64,7 @@ struct lttng_session; struct lttng_ust_lib_ring_buffer_ctx; struct lttng_ust_context_app; struct lttng_event_field; +struct lttng_event_notifier_group; /* * Data structures used by tracepoint event declarations, and by the @@ -616,6 +617,36 @@ struct lttng_channel { int tstate:1; /* Transient enable state */ }; +#define LTTNG_COUNTER_DIMENSION_MAX 8 + +struct lttng_counter_dimension { + uint64_t size; + uint64_t underflow_index; + uint64_t overflow_index; + uint8_t has_underflow; + uint8_t has_overflow; +}; + +struct lttng_counter_ops { + struct lib_counter *(*counter_create)(size_t nr_dimensions, + const struct lttng_counter_dimension *dimensions, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds, + bool is_daemon); + void (*counter_destroy)(struct lib_counter *counter); + int (*counter_add)(struct lib_counter *counter, + const size_t *dimension_indexes, int64_t v); + int (*counter_read)(struct lib_counter *counter, + const size_t *dimension_indexes, int cpu, + int64_t *value, bool *overflow, bool *underflow); + int (*counter_aggregate)(struct lib_counter *counter, + const size_t *dimension_indexes, int64_t *value, + bool *overflow, bool *underflow); + int (*counter_clear)(struct lib_counter *counter, const size_t *dimension_indexes); +}; + #define LTTNG_UST_STACK_CTX_PADDING 32 struct lttng_stack_ctx { struct lttng_event *event; @@ -677,6 +708,14 @@ struct lttng_session { struct lttng_ctx *ctx; /* contexts for filters. */ }; +struct lttng_counter { + int objd; + struct lttng_event_notifier_group *event_notifier_group; /* owner */ + struct lttng_counter_transport *transport; + struct lib_counter *counter; + struct lttng_counter_ops *ops; +}; + struct lttng_event_notifier_group { int objd; void *owner; @@ -686,6 +725,9 @@ struct lttng_event_notifier_group { struct cds_list_head event_notifiers_head; /* list of event_notifiers */ struct lttng_ust_event_notifier_ht event_notifiers_ht; /* hashtable of event_notifiers */ struct lttng_ctx *ctx; /* contexts for filters. */ + + struct lttng_counter *error_counter; + size_t error_counter_len; }; struct lttng_transport { @@ -695,6 +737,13 @@ struct lttng_transport { const struct lttng_ust_lib_ring_buffer_config *client_config; }; +struct lttng_counter_transport { + char *name; + struct cds_list_head node; + struct lttng_counter_ops ops; + const struct lib_counter_config *client_config; +}; + struct lttng_session *lttng_session_create(void); int lttng_session_enable(struct lttng_session *session); int lttng_session_disable(struct lttng_session *session); @@ -724,6 +773,13 @@ int lttng_attach_context(struct lttng_ust_context *context_param, void lttng_transport_register(struct lttng_transport *transport); void lttng_transport_unregister(struct lttng_transport *transport); +void lttng_counter_transport_register(struct lttng_counter_transport *transport); +void lttng_counter_transport_unregister(struct lttng_counter_transport *transport); + +struct lttng_counter *lttng_ust_counter_create( + const char *counter_transport_name, + size_t number_dimensions, const struct lttng_counter_dimension *dimensions); + void synchronize_trace(void); int lttng_probe_register(struct lttng_probe_desc *desc); @@ -810,6 +866,7 @@ extern const struct lttng_ust_client_lib_ring_buffer_client_cb *lttng_client_cal extern const struct lttng_ust_client_lib_ring_buffer_client_cb *lttng_client_callbacks_overwrite; struct lttng_transport *lttng_transport_find(const char *name); +struct lttng_counter_transport *lttng_counter_transport_find(const char *name); int lttng_probes_get_event_list(struct lttng_ust_tracepoint_list *list); void lttng_probes_prune_event_list(struct lttng_ust_tracepoint_list *list); diff --git a/include/ust-comm.h b/include/ust-comm.h index fbaef544..49a55d21 100644 --- a/include/ust-comm.h +++ b/include/ust-comm.h @@ -108,6 +108,9 @@ struct ustcomm_ust_msg { uint32_t reloc_offset; uint64_t seqnum; } LTTNG_PACKED capture; + struct lttng_ust_counter counter; + struct lttng_ust_counter_global counter_global; + struct lttng_ust_counter_cpu counter_cpu; char padding[USTCOMM_MSG_PADDING2]; } u; } LTTNG_PACKED; @@ -228,6 +231,11 @@ int ustcomm_recv_stream_from_sessiond(int sock, ssize_t ustcomm_recv_event_notifier_notif_fd_from_sessiond(int sock, int *event_notifier_notif_fd); +ssize_t ustcomm_recv_counter_from_sessiond(int sock, + void **counter_data, uint64_t len); +int ustcomm_recv_counter_shm_from_sessiond(int sock, + int *shm_fd); + /* * Returns 0 on success, negative error value on error. * Returns -EPIPE or -ECONNRESET if other end has hung up. diff --git a/libcounter/Makefile.am b/libcounter/Makefile.am new file mode 100644 index 00000000..9d52b74c --- /dev/null +++ b/libcounter/Makefile.am @@ -0,0 +1,18 @@ +AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include +AM_CFLAGS += -fno-strict-aliasing + +noinst_LTLIBRARIES = libcounter.la + +libcounter_la_SOURCES = \ + counter.c smp.c smp.h shm.c shm.h shm_internal.h shm_types.h \ + counter-api.h counter.h counter-internal.h counter-types.h + +libcounter_la_LIBADD = \ + -lpthread \ + -lrt + +if HAVE_LIBNUMA +libcounter_la_LIBADD += -lnuma +endif + +libcounter_la_CFLAGS = -DUST_COMPONENT="libcounter" $(AM_CFLAGS) diff --git a/libcounter/counter-api.h b/libcounter/counter-api.h new file mode 100644 index 00000000..0a7c0ade --- /dev/null +++ b/libcounter/counter-api.h @@ -0,0 +1,296 @@ +/* + * counter/counter-api.h + * + * LTTng Counters API, requiring counter/config.h + * + * Copyright (C) 2020 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LTTNG_COUNTER_API_H +#define _LTTNG_COUNTER_API_H + +#include +#include +#include "counter.h" +#include "counter-internal.h" +#include +#include +#include +#include "../libringbuffer/getcpu.h" + +/* + * Using unsigned arithmetic because overflow is defined. + */ +static inline int __lttng_counter_add(const struct lib_counter_config *config, + enum lib_counter_config_alloc alloc, + enum lib_counter_config_sync sync, + struct lib_counter *counter, + const size_t *dimension_indexes, int64_t v, + int64_t *remainder) +{ + size_t index; + bool overflow = false, underflow = false; + struct lib_counter_layout *layout; + int64_t move_sum = 0; + + if (caa_unlikely(lttng_counter_validate_indexes(config, counter, dimension_indexes))) + return -EOVERFLOW; + index = lttng_counter_get_index(config, counter, dimension_indexes); + + switch (alloc) { + case COUNTER_ALLOC_PER_CPU: + layout = &counter->percpu_counters[lttng_ust_get_cpu()]; + break; + case COUNTER_ALLOC_GLOBAL: + layout = &counter->global_counters; + break; + default: + return -EINVAL; + } + if (caa_unlikely(!layout->counters)) + return -ENODEV; + + switch (config->counter_size) { + case COUNTER_SIZE_8_BIT: + { + int8_t *int_p = (int8_t *) layout->counters + index; + int8_t old, n, res; + int8_t global_sum_step = counter->global_sum_step.s8; + + res = *int_p; + switch (sync) { + case COUNTER_SYNC_PER_CPU: + { + do { + move_sum = 0; + old = res; + n = (int8_t) ((uint8_t) old + (uint8_t) v); + if (caa_unlikely(n > (int8_t) global_sum_step)) + move_sum = (int8_t) global_sum_step / 2; + else if (caa_unlikely(n < -(int8_t) global_sum_step)) + move_sum = -((int8_t) global_sum_step / 2); + n -= move_sum; + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + case COUNTER_SYNC_GLOBAL: + { + do { + old = res; + n = (int8_t) ((uint8_t) old + (uint8_t) v); + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + } + if (v > 0 && (v >= UINT8_MAX || n < old)) + overflow = true; + else if (v < 0 && (v <= -UINT8_MAX || n > old)) + underflow = true; + break; + } + case COUNTER_SIZE_16_BIT: + { + int16_t *int_p = (int16_t *) layout->counters + index; + int16_t old, n, res; + int16_t global_sum_step = counter->global_sum_step.s16; + + res = *int_p; + switch (sync) { + case COUNTER_SYNC_PER_CPU: + { + do { + move_sum = 0; + old = res; + n = (int16_t) ((uint16_t) old + (uint16_t) v); + if (caa_unlikely(n > (int16_t) global_sum_step)) + move_sum = (int16_t) global_sum_step / 2; + else if (caa_unlikely(n < -(int16_t) global_sum_step)) + move_sum = -((int16_t) global_sum_step / 2); + n -= move_sum; + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + case COUNTER_SYNC_GLOBAL: + { + do { + old = res; + n = (int16_t) ((uint16_t) old + (uint16_t) v); + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + } + if (v > 0 && (v >= UINT16_MAX || n < old)) + overflow = true; + else if (v < 0 && (v <= -UINT16_MAX || n > old)) + underflow = true; + break; + } + case COUNTER_SIZE_32_BIT: + { + int32_t *int_p = (int32_t *) layout->counters + index; + int32_t old, n, res; + int32_t global_sum_step = counter->global_sum_step.s32; + + res = *int_p; + switch (sync) { + case COUNTER_SYNC_PER_CPU: + { + do { + move_sum = 0; + old = res; + n = (int32_t) ((uint32_t) old + (uint32_t) v); + if (caa_unlikely(n > (int32_t) global_sum_step)) + move_sum = (int32_t) global_sum_step / 2; + else if (caa_unlikely(n < -(int32_t) global_sum_step)) + move_sum = -((int32_t) global_sum_step / 2); + n -= move_sum; + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + case COUNTER_SYNC_GLOBAL: + { + do { + old = res; + n = (int32_t) ((uint32_t) old + (uint32_t) v); + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + } + if (v > 0 && (v >= UINT32_MAX || n < old)) + overflow = true; + else if (v < 0 && (v <= -UINT32_MAX || n > old)) + underflow = true; + break; + } +#if CAA_BITS_PER_LONG == 64 + case COUNTER_SIZE_64_BIT: + { + int64_t *int_p = (int64_t *) layout->counters + index; + int64_t old, n, res; + int64_t global_sum_step = counter->global_sum_step.s64; + + res = *int_p; + switch (sync) { + case COUNTER_SYNC_PER_CPU: + { + do { + move_sum = 0; + old = res; + n = (int64_t) ((uint64_t) old + (uint64_t) v); + if (caa_unlikely(n > (int64_t) global_sum_step)) + move_sum = (int64_t) global_sum_step / 2; + else if (caa_unlikely(n < -(int64_t) global_sum_step)) + move_sum = -((int64_t) global_sum_step / 2); + n -= move_sum; + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + case COUNTER_SYNC_GLOBAL: + { + do { + old = res; + n = (int64_t) ((uint64_t) old + (uint64_t) v); + res = uatomic_cmpxchg(int_p, old, n); + } while (old != res); + break; + } + } + if (v > 0 && n < old) + overflow = true; + else if (v < 0 && n > old) + underflow = true; + break; + } +#endif + default: + return -EINVAL; + } + if (caa_unlikely(overflow && !lttng_bitmap_test_bit(index, layout->overflow_bitmap))) + lttng_bitmap_set_bit(index, layout->overflow_bitmap); + else if (caa_unlikely(underflow && !lttng_bitmap_test_bit(index, layout->underflow_bitmap))) + lttng_bitmap_set_bit(index, layout->underflow_bitmap); + if (remainder) + *remainder = move_sum; + return 0; +} + +static inline int __lttng_counter_add_percpu(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, int64_t v) +{ + int64_t move_sum; + int ret; + + ret = __lttng_counter_add(config, COUNTER_ALLOC_PER_CPU, config->sync, + counter, dimension_indexes, v, &move_sum); + if (caa_unlikely(ret)) + return ret; + if (caa_unlikely(move_sum)) + return __lttng_counter_add(config, COUNTER_ALLOC_GLOBAL, COUNTER_SYNC_GLOBAL, + counter, dimension_indexes, move_sum, NULL); + return 0; +} + +static inline int __lttng_counter_add_global(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, int64_t v) +{ + return __lttng_counter_add(config, COUNTER_ALLOC_GLOBAL, config->sync, counter, + dimension_indexes, v, NULL); +} + +static inline int lttng_counter_add(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, int64_t v) +{ + switch (config->alloc) { + case COUNTER_ALLOC_PER_CPU: /* Fallthrough */ + case COUNTER_ALLOC_PER_CPU | COUNTER_ALLOC_GLOBAL: + return __lttng_counter_add_percpu(config, counter, dimension_indexes, v); + case COUNTER_ALLOC_GLOBAL: + return __lttng_counter_add_global(config, counter, dimension_indexes, v); + default: + return -EINVAL; + } +} + +static inline int lttng_counter_inc(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes) +{ + return lttng_counter_add(config, counter, dimension_indexes, 1); +} + +static inline int lttng_counter_dec(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes) +{ + return lttng_counter_add(config, counter, dimension_indexes, -1); +} + +#endif /* _LTTNG_COUNTER_API_H */ diff --git a/libcounter/counter-internal.h b/libcounter/counter-internal.h new file mode 100644 index 00000000..38cb089b --- /dev/null +++ b/libcounter/counter-internal.h @@ -0,0 +1,65 @@ +/* + * counter/counter-internal.h + * + * LTTng Counters Internal Header + * + * Copyright (C) 2020 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LTTNG_COUNTER_INTERNAL_H +#define _LTTNG_COUNTER_INTERNAL_H + +#include +#include +#include +#include "counter-types.h" + +static inline int lttng_counter_validate_indexes(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes) +{ + size_t nr_dimensions = counter->nr_dimensions, i; + + for (i = 0; i < nr_dimensions; i++) { + if (caa_unlikely(dimension_indexes[i] >= counter->dimensions[i].max_nr_elem)) + return -EOVERFLOW; + } + return 0; +} + + +static inline size_t lttng_counter_get_index(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes) +{ + size_t nr_dimensions = counter->nr_dimensions, i; + size_t index = 0; + + for (i = 0; i < nr_dimensions; i++) { + struct lib_counter_dimension *dimension = &counter->dimensions[i]; + const size_t *dimension_index = &dimension_indexes[i]; + + index += *dimension_index * dimension->stride; + } + return index; +} + +#endif /* _LTTNG_COUNTER_INTERNAL_H */ diff --git a/libcounter/counter-types.h b/libcounter/counter-types.h new file mode 100644 index 00000000..b193fb3a --- /dev/null +++ b/libcounter/counter-types.h @@ -0,0 +1,93 @@ +/* + * counter/counter-types.h + * + * LTTng Counters Types + * + * Copyright (C) 2020 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LTTNG_COUNTER_TYPES_H +#define _LTTNG_COUNTER_TYPES_H + +#include +#include +#include +#include +#include +#include +#include "shm_types.h" + +struct lib_counter_dimension { + /* + * Max. number of indexable elements. + */ + size_t max_nr_elem; + /* + * The stride for a dimension is the multiplication factor which + * should be applied to its index to take into account other + * dimensions nested inside. + */ + size_t stride; +}; + +struct lib_counter_layout { + void *counters; + unsigned long *overflow_bitmap; + unsigned long *underflow_bitmap; + int shm_fd; + size_t shm_len; + struct lttng_counter_shm_handle handle; +}; + +enum lib_counter_arithmetic { + LIB_COUNTER_ARITHMETIC_MODULAR, + LIB_COUNTER_ARITHMETIC_SATURATE, +}; + +struct lib_counter { + size_t nr_dimensions; + int64_t allocated_elem; + struct lib_counter_dimension *dimensions; + enum lib_counter_arithmetic arithmetic; + union { + struct { + int32_t max, min; + } limits_32_bit; + struct { + int64_t max, min; + } limits_64_bit; + } saturation; + union { + int8_t s8; + int16_t s16; + int32_t s32; + int64_t s64; + } global_sum_step; /* 0 if unused */ + struct lib_counter_config config; + + struct lib_counter_layout global_counters; + struct lib_counter_layout *percpu_counters; + + bool is_daemon; + struct lttng_counter_shm_object_table *object_table; +}; + +#endif /* _LTTNG_COUNTER_TYPES_H */ diff --git a/libcounter/counter.c b/libcounter/counter.c new file mode 100644 index 00000000..4010d5dd --- /dev/null +++ b/libcounter/counter.c @@ -0,0 +1,564 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR LGPL-2.1-only) + * + * counter.c + * + * Copyright (C) 2020 Mathieu Desnoyers + */ + +#define _GNU_SOURCE +#include +#include "counter.h" +#include "counter-internal.h" +#include +#include +#include +#include +#include +#include +#include "smp.h" +#include "shm.h" + +static size_t lttng_counter_get_dimension_nr_elements(struct lib_counter_dimension *dimension) +{ + return dimension->max_nr_elem; +} + +static int lttng_counter_init_stride(const struct lib_counter_config *config, + struct lib_counter *counter) +{ + size_t nr_dimensions = counter->nr_dimensions; + size_t stride = 1; + ssize_t i; + + for (i = nr_dimensions - 1; i >= 0; i--) { + struct lib_counter_dimension *dimension = &counter->dimensions[i]; + size_t nr_elem; + + nr_elem = lttng_counter_get_dimension_nr_elements(dimension); + dimension->stride = stride; + /* nr_elem should be minimum 1 for each dimension. */ + if (!nr_elem) + return -EINVAL; + stride *= nr_elem; + if (stride > SIZE_MAX / nr_elem) + return -EINVAL; + } + return 0; +} + +static int lttng_counter_layout_init(struct lib_counter *counter, int cpu, int shm_fd) +{ + struct lib_counter_layout *layout; + size_t counter_size; + size_t nr_elem = counter->allocated_elem; + size_t shm_length = 0, counters_offset, overflow_offset, underflow_offset; + struct lttng_counter_shm_object *shm_object; + + if (shm_fd < 0) + return 0; /* Skip, will be populated later. */ + + if (cpu == -1) + layout = &counter->global_counters; + else + layout = &counter->percpu_counters[cpu]; + switch (counter->config.counter_size) { + case COUNTER_SIZE_8_BIT: + case COUNTER_SIZE_16_BIT: + case COUNTER_SIZE_32_BIT: + case COUNTER_SIZE_64_BIT: + counter_size = (size_t) counter->config.counter_size; + break; + default: + return -EINVAL; + } + layout->shm_fd = shm_fd; + counters_offset = shm_length; + shm_length += counter_size * nr_elem; + overflow_offset = shm_length; + shm_length += ALIGN(nr_elem, 8) / 8; + underflow_offset = shm_length; + shm_length += ALIGN(nr_elem, 8) / 8; + layout->shm_len = shm_length; + if (counter->is_daemon) { + /* Allocate and clear shared memory. */ + shm_object = lttng_counter_shm_object_table_alloc(counter->object_table, + shm_length, LTTNG_COUNTER_SHM_OBJECT_SHM, shm_fd, cpu); + if (!shm_object) + return -ENOMEM; + } else { + /* Map pre-existing shared memory. */ + shm_object = lttng_counter_shm_object_table_append_shm(counter->object_table, + shm_fd, shm_length); + if (!shm_object) + return -ENOMEM; + } + layout->counters = shm_object->memory_map + counters_offset; + layout->overflow_bitmap = (unsigned long *)(shm_object->memory_map + overflow_offset); + layout->underflow_bitmap = (unsigned long *)(shm_object->memory_map + underflow_offset); + return 0; +} + +int lttng_counter_set_global_shm(struct lib_counter *counter, int fd) +{ + struct lib_counter_config *config = &counter->config; + struct lib_counter_layout *layout; + + if (!(config->alloc & COUNTER_ALLOC_GLOBAL)) + return -EINVAL; + layout = &counter->global_counters; + if (layout->shm_fd >= 0) + return -EBUSY; + return lttng_counter_layout_init(counter, -1, fd); +} + +int lttng_counter_set_cpu_shm(struct lib_counter *counter, int cpu, int fd) +{ + struct lib_counter_config *config = &counter->config; + struct lib_counter_layout *layout; + + if (cpu < 0 || cpu >= lttng_counter_num_possible_cpus()) + return -EINVAL; + + if (!(config->alloc & COUNTER_ALLOC_PER_CPU)) + return -EINVAL; + layout = &counter->percpu_counters[cpu]; + if (layout->shm_fd >= 0) + return -EBUSY; + return lttng_counter_layout_init(counter, cpu, fd); +} + +static +int lttng_counter_set_global_sum_step(struct lib_counter *counter, + int64_t global_sum_step) +{ + if (global_sum_step < 0) + return -EINVAL; + + switch (counter->config.counter_size) { + case COUNTER_SIZE_8_BIT: + if (global_sum_step > INT8_MAX) + return -EINVAL; + counter->global_sum_step.s8 = (int8_t) global_sum_step; + break; + case COUNTER_SIZE_16_BIT: + if (global_sum_step > INT16_MAX) + return -EINVAL; + counter->global_sum_step.s16 = (int16_t) global_sum_step; + break; + case COUNTER_SIZE_32_BIT: + if (global_sum_step > INT32_MAX) + return -EINVAL; + counter->global_sum_step.s32 = (int32_t) global_sum_step; + break; + case COUNTER_SIZE_64_BIT: + counter->global_sum_step.s64 = global_sum_step; + break; + default: + return -EINVAL; + } + + return 0; +} + +static +int validate_args(const struct lib_counter_config *config, + size_t nr_dimensions, + const size_t *max_nr_elem, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds) +{ + int nr_cpus = lttng_counter_num_possible_cpus(); + + if (CAA_BITS_PER_LONG != 64 && config->counter_size == COUNTER_SIZE_64_BIT) { + WARN_ON_ONCE(1); + return -1; + } + if (!max_nr_elem) + return -1; + /* + * global sum step is only useful with allocating both per-cpu + * and global counters. + */ + if (global_sum_step && (!(config->alloc & COUNTER_ALLOC_GLOBAL) || + !(config->alloc & COUNTER_ALLOC_PER_CPU))) + return -1; + if (!(config->alloc & COUNTER_ALLOC_GLOBAL) && global_counter_fd >= 0) + return -1; + if (!(config->alloc & COUNTER_ALLOC_PER_CPU) && counter_cpu_fds) + return -1; + if (!(config->alloc & COUNTER_ALLOC_PER_CPU) && counter_cpu_fds >= 0) + return -1; + if (counter_cpu_fds && nr_cpus != nr_counter_cpu_fds) + return -1; + return 0; +} + +struct lib_counter *lttng_counter_create(const struct lib_counter_config *config, + size_t nr_dimensions, + const size_t *max_nr_elem, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds, + bool is_daemon) +{ + struct lib_counter *counter; + size_t dimension, nr_elem = 1; + int cpu, ret; + int nr_handles = 0; + int nr_cpus = lttng_counter_num_possible_cpus(); + + if (validate_args(config, nr_dimensions, max_nr_elem, + global_sum_step, global_counter_fd, nr_counter_cpu_fds, + counter_cpu_fds)) + return NULL; + counter = zmalloc(sizeof(struct lib_counter)); + if (!counter) + return NULL; + counter->global_counters.shm_fd = -1; + counter->config = *config; + counter->is_daemon = is_daemon; + if (lttng_counter_set_global_sum_step(counter, global_sum_step)) + goto error_sum_step; + counter->nr_dimensions = nr_dimensions; + counter->dimensions = zmalloc(nr_dimensions * sizeof(*counter->dimensions)); + if (!counter->dimensions) + goto error_dimensions; + for (dimension = 0; dimension < nr_dimensions; dimension++) + counter->dimensions[dimension].max_nr_elem = max_nr_elem[dimension]; + if (config->alloc & COUNTER_ALLOC_PER_CPU) { + counter->percpu_counters = zmalloc(sizeof(struct lib_counter_layout) * nr_cpus); + if (!counter->percpu_counters) + goto error_alloc_percpu; + lttng_counter_for_each_possible_cpu(cpu) + counter->percpu_counters[cpu].shm_fd = -1; + } + + if (lttng_counter_init_stride(config, counter)) + goto error_init_stride; + //TODO saturation values. + for (dimension = 0; dimension < counter->nr_dimensions; dimension++) + nr_elem *= lttng_counter_get_dimension_nr_elements(&counter->dimensions[dimension]); + counter->allocated_elem = nr_elem; + + if (config->alloc & COUNTER_ALLOC_GLOBAL) + nr_handles++; + if (config->alloc & COUNTER_ALLOC_PER_CPU) + nr_handles += nr_cpus; + /* Allocate table for global and per-cpu counters. */ + counter->object_table = lttng_counter_shm_object_table_create(nr_handles); + if (!counter->object_table) + goto error_alloc_object_table; + + if (config->alloc & COUNTER_ALLOC_GLOBAL) { + ret = lttng_counter_layout_init(counter, -1, global_counter_fd); /* global */ + if (ret) + goto layout_init_error; + } + if ((config->alloc & COUNTER_ALLOC_PER_CPU) && counter_cpu_fds) { + lttng_counter_for_each_possible_cpu(cpu) { + ret = lttng_counter_layout_init(counter, cpu, counter_cpu_fds[cpu]); + if (ret) + goto layout_init_error; + } + } + return counter; + +layout_init_error: + lttng_counter_shm_object_table_destroy(counter->object_table, is_daemon); +error_alloc_object_table: +error_init_stride: + free(counter->percpu_counters); +error_alloc_percpu: + free(counter->dimensions); +error_dimensions: +error_sum_step: + free(counter); + return NULL; +} + +void lttng_counter_destroy(struct lib_counter *counter) +{ + struct lib_counter_config *config = &counter->config; + + if (config->alloc & COUNTER_ALLOC_PER_CPU) + free(counter->percpu_counters); + lttng_counter_shm_object_table_destroy(counter->object_table, counter->is_daemon); + free(counter->dimensions); + free(counter); +} + +int lttng_counter_get_global_shm(struct lib_counter *counter, int *fd, size_t *len) +{ + int shm_fd; + + shm_fd = counter->global_counters.shm_fd; + if (shm_fd < 0) + return -1; + *fd = shm_fd; + *len = counter->global_counters.shm_len; + return 0; +} + +int lttng_counter_get_cpu_shm(struct lib_counter *counter, int cpu, int *fd, size_t *len) +{ + struct lib_counter_layout *layout; + int shm_fd; + + if (cpu >= lttng_counter_num_possible_cpus()) + return -1; + layout = &counter->percpu_counters[cpu]; + shm_fd = layout->shm_fd; + if (shm_fd < 0) + return -1; + *fd = shm_fd; + *len = layout->shm_len; + return 0; +} + +int lttng_counter_read(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, + int cpu, int64_t *value, bool *overflow, + bool *underflow) +{ + size_t index; + struct lib_counter_layout *layout; + + if (caa_unlikely(lttng_counter_validate_indexes(config, counter, dimension_indexes))) + return -EOVERFLOW; + index = lttng_counter_get_index(config, counter, dimension_indexes); + + switch (config->alloc) { + case COUNTER_ALLOC_PER_CPU: + if (cpu < 0 || cpu >= lttng_counter_num_possible_cpus()) + return -EINVAL; + layout = &counter->percpu_counters[cpu]; + break; + case COUNTER_ALLOC_PER_CPU | COUNTER_ALLOC_GLOBAL: + if (cpu >= 0) { + if (cpu >= lttng_counter_num_possible_cpus()) + return -EINVAL; + layout = &counter->percpu_counters[cpu]; + } else { + layout = &counter->global_counters; + } + break; + case COUNTER_ALLOC_GLOBAL: + if (cpu >= 0) + return -EINVAL; + layout = &counter->global_counters; + break; + default: + return -EINVAL; + } + if (caa_unlikely(!layout->counters)) + return -ENODEV; + + switch (config->counter_size) { + case COUNTER_SIZE_8_BIT: + { + int8_t *int_p = (int8_t *) layout->counters + index; + *value = (int64_t) CMM_LOAD_SHARED(*int_p); + break; + } + case COUNTER_SIZE_16_BIT: + { + int16_t *int_p = (int16_t *) layout->counters + index; + *value = (int64_t) CMM_LOAD_SHARED(*int_p); + break; + } + case COUNTER_SIZE_32_BIT: + { + int32_t *int_p = (int32_t *) layout->counters + index; + *value = (int64_t) CMM_LOAD_SHARED(*int_p); + break; + } +#if CAA_BITS_PER_LONG == 64 + case COUNTER_SIZE_64_BIT: + { + int64_t *int_p = (int64_t *) layout->counters + index; + *value = CMM_LOAD_SHARED(*int_p); + break; + } +#endif + default: + return -EINVAL; + } + *overflow = lttng_bitmap_test_bit(index, layout->overflow_bitmap); + *underflow = lttng_bitmap_test_bit(index, layout->underflow_bitmap); + return 0; +} + +int lttng_counter_aggregate(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, + int64_t *value, bool *overflow, + bool *underflow) +{ + int cpu, ret; + int64_t v, sum = 0; + bool of, uf; + + *overflow = false; + *underflow = false; + + switch (config->alloc) { + case COUNTER_ALLOC_GLOBAL: /* Fallthrough */ + case COUNTER_ALLOC_PER_CPU | COUNTER_ALLOC_GLOBAL: + /* Read global counter. */ + ret = lttng_counter_read(config, counter, dimension_indexes, + -1, &v, &of, &uf); + if (ret < 0) + return ret; + sum += v; + *overflow |= of; + *underflow |= uf; + break; + case COUNTER_ALLOC_PER_CPU: + break; + default: + return -EINVAL; + } + + switch (config->alloc) { + case COUNTER_ALLOC_GLOBAL: + break; + case COUNTER_ALLOC_PER_CPU | COUNTER_ALLOC_GLOBAL: /* Fallthrough */ + case COUNTER_ALLOC_PER_CPU: + lttng_counter_for_each_possible_cpu(cpu) { + int64_t old = sum; + + ret = lttng_counter_read(config, counter, dimension_indexes, + cpu, &v, &of, &uf); + if (ret < 0) + return ret; + *overflow |= of; + *underflow |= uf; + /* Overflow is defined on unsigned types. */ + sum = (int64_t) ((uint64_t) old + (uint64_t) v); + if (v > 0 && sum < old) + *overflow = true; + else if (v < 0 && sum > old) + *underflow = true; + } + break; + default: + return -EINVAL; + } + *value = sum; + return 0; +} + +static +int lttng_counter_clear_cpu(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, + int cpu) +{ + size_t index; + struct lib_counter_layout *layout; + + if (caa_unlikely(lttng_counter_validate_indexes(config, counter, dimension_indexes))) + return -EOVERFLOW; + index = lttng_counter_get_index(config, counter, dimension_indexes); + + switch (config->alloc) { + case COUNTER_ALLOC_PER_CPU: + if (cpu < 0 || cpu >= lttng_counter_num_possible_cpus()) + return -EINVAL; + layout = &counter->percpu_counters[cpu]; + break; + case COUNTER_ALLOC_PER_CPU | COUNTER_ALLOC_GLOBAL: + if (cpu >= 0) { + if (cpu >= lttng_counter_num_possible_cpus()) + return -EINVAL; + layout = &counter->percpu_counters[cpu]; + } else { + layout = &counter->global_counters; + } + break; + case COUNTER_ALLOC_GLOBAL: + if (cpu >= 0) + return -EINVAL; + layout = &counter->global_counters; + break; + default: + return -EINVAL; + } + if (caa_unlikely(!layout->counters)) + return -ENODEV; + + switch (config->counter_size) { + case COUNTER_SIZE_8_BIT: + { + int8_t *int_p = (int8_t *) layout->counters + index; + CMM_STORE_SHARED(*int_p, 0); + break; + } + case COUNTER_SIZE_16_BIT: + { + int16_t *int_p = (int16_t *) layout->counters + index; + CMM_STORE_SHARED(*int_p, 0); + break; + } + case COUNTER_SIZE_32_BIT: + { + int32_t *int_p = (int32_t *) layout->counters + index; + CMM_STORE_SHARED(*int_p, 0); + break; + } +#if CAA_BITS_PER_LONG == 64 + case COUNTER_SIZE_64_BIT: + { + int64_t *int_p = (int64_t *) layout->counters + index; + CMM_STORE_SHARED(*int_p, 0); + break; + } +#endif + default: + return -EINVAL; + } + lttng_bitmap_clear_bit(index, layout->overflow_bitmap); + lttng_bitmap_clear_bit(index, layout->underflow_bitmap); + return 0; +} + +int lttng_counter_clear(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes) +{ + int cpu, ret; + + switch (config->alloc) { + case COUNTER_ALLOC_PER_CPU: + break; + case COUNTER_ALLOC_GLOBAL: /* Fallthrough */ + case COUNTER_ALLOC_PER_CPU | COUNTER_ALLOC_GLOBAL: + /* Clear global counter. */ + ret = lttng_counter_clear_cpu(config, counter, dimension_indexes, -1); + if (ret < 0) + return ret; + break; + default: + return -EINVAL; + } + + switch (config->alloc) { + case COUNTER_ALLOC_PER_CPU: /* Fallthrough */ + case COUNTER_ALLOC_PER_CPU | COUNTER_ALLOC_GLOBAL: + lttng_counter_for_each_possible_cpu(cpu) { + ret = lttng_counter_clear_cpu(config, counter, dimension_indexes, cpu); + if (ret < 0) + return ret; + } + break; + case COUNTER_ALLOC_GLOBAL: + break; + default: + return -EINVAL; + } + return 0; +} diff --git a/libcounter/counter.h b/libcounter/counter.h new file mode 100644 index 00000000..54f83009 --- /dev/null +++ b/libcounter/counter.h @@ -0,0 +1,65 @@ +/* + * lttng/counter.h + * + * LTTng Counters API + * + * Copyright (C) 2020 Mathieu Desnoyers + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _LTTNG_COUNTER_H +#define _LTTNG_COUNTER_H + +#include +#include +#include "counter-types.h" + +/* max_nr_elem is for each dimension. */ +struct lib_counter *lttng_counter_create(const struct lib_counter_config *config, + size_t nr_dimensions, + const size_t *max_nr_elem, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds, + bool is_daemon); +void lttng_counter_destroy(struct lib_counter *counter); + +int lttng_counter_set_global_shm(struct lib_counter *counter, int fd); +int lttng_counter_set_cpu_shm(struct lib_counter *counter, int cpu, int fd); + +int lttng_counter_get_global_shm(struct lib_counter *counter, int *fd, size_t *len); +int lttng_counter_get_cpu_shm(struct lib_counter *counter, int cpu, int *fd, size_t *len); + +int lttng_counter_read(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, + int cpu, int64_t *value, + bool *overflow, bool *underflow); +int lttng_counter_aggregate(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes, + int64_t *value, + bool *overflow, bool *underflow); +int lttng_counter_clear(const struct lib_counter_config *config, + struct lib_counter *counter, + const size_t *dimension_indexes); + +#endif /* _LTTNG_COUNTER_H */ diff --git a/libcounter/shm.c b/libcounter/shm.c new file mode 100644 index 00000000..a2e1f819 --- /dev/null +++ b/libcounter/shm.c @@ -0,0 +1,387 @@ +/* + * libcounter/shm.c + * + * Copyright (C) 2005-2012 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define _LGPL_SOURCE +#include +#include "shm.h" +#include +#include +#include +#include +#include /* For mode constants */ +#include /* For O_* constants */ +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_LIBNUMA +#include +#include +#endif +#include +#include +#include "../libringbuffer/mmap.h" + +/* + * Ensure we have the required amount of space available by writing 0 + * into the entire buffer. Not doing so can trigger SIGBUS when going + * beyond the available shm space. + */ +static +int zero_file(int fd, size_t len) +{ + ssize_t retlen; + size_t written = 0; + char *zeropage; + long pagelen; + int ret; + + pagelen = sysconf(_SC_PAGESIZE); + if (pagelen < 0) + return (int) pagelen; + zeropage = calloc(pagelen, 1); + if (!zeropage) + return -ENOMEM; + + while (len > written) { + do { + retlen = write(fd, zeropage, + min_t(size_t, pagelen, len - written)); + } while (retlen == -1UL && errno == EINTR); + if (retlen < 0) { + ret = (int) retlen; + goto error; + } + written += retlen; + } + ret = 0; +error: + free(zeropage); + return ret; +} + +struct lttng_counter_shm_object_table *lttng_counter_shm_object_table_create(size_t max_nb_obj) +{ + struct lttng_counter_shm_object_table *table; + + table = zmalloc(sizeof(struct lttng_counter_shm_object_table) + + max_nb_obj * sizeof(table->objects[0])); + if (!table) + return NULL; + table->size = max_nb_obj; + return table; +} + +static +struct lttng_counter_shm_object *_lttng_counter_shm_object_table_alloc_shm(struct lttng_counter_shm_object_table *table, + size_t memory_map_size, + int cpu_fd) +{ + int shmfd, ret; + struct lttng_counter_shm_object *obj; + char *memory_map; + + if (cpu_fd < 0) + return NULL; + if (table->allocated_len >= table->size) + return NULL; + obj = &table->objects[table->allocated_len]; + + /* create shm */ + + shmfd = cpu_fd; + ret = zero_file(shmfd, memory_map_size); + if (ret) { + PERROR("zero_file"); + goto error_zero_file; + } + ret = ftruncate(shmfd, memory_map_size); + if (ret) { + PERROR("ftruncate"); + goto error_ftruncate; + } + /* + * Also ensure the file metadata is synced with the storage by using + * fsync(2). + */ + ret = fsync(shmfd); + if (ret) { + PERROR("fsync"); + goto error_fsync; + } + obj->shm_fd_ownership = 0; + obj->shm_fd = shmfd; + + /* memory_map: mmap */ + memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0); + if (memory_map == MAP_FAILED) { + PERROR("mmap"); + goto error_mmap; + } + obj->type = LTTNG_COUNTER_SHM_OBJECT_SHM; + obj->memory_map = memory_map; + obj->memory_map_size = memory_map_size; + obj->allocated_len = 0; + obj->index = table->allocated_len++; + + return obj; + +error_mmap: +error_fsync: +error_ftruncate: +error_zero_file: + return NULL; +} + +static +struct lttng_counter_shm_object *_lttng_counter_shm_object_table_alloc_mem(struct lttng_counter_shm_object_table *table, + size_t memory_map_size) +{ + struct lttng_counter_shm_object *obj; + void *memory_map; + + if (table->allocated_len >= table->size) + return NULL; + obj = &table->objects[table->allocated_len]; + + memory_map = zmalloc(memory_map_size); + if (!memory_map) + goto alloc_error; + + /* no shm_fd */ + obj->shm_fd = -1; + obj->shm_fd_ownership = 0; + + obj->type = LTTNG_COUNTER_SHM_OBJECT_MEM; + obj->memory_map = memory_map; + obj->memory_map_size = memory_map_size; + obj->allocated_len = 0; + obj->index = table->allocated_len++; + + return obj; + +alloc_error: + return NULL; +} + +/* + * libnuma prints errors on the console even for numa_available(). + * Work-around this limitation by using get_mempolicy() directly to + * check whether the kernel supports mempolicy. + */ +#ifdef HAVE_LIBNUMA +static bool lttng_is_numa_available(void) +{ + int ret; + + ret = get_mempolicy(NULL, NULL, 0, NULL, 0); + if (ret && errno == ENOSYS) { + return false; + } + return numa_available() > 0; +} +#endif + +struct lttng_counter_shm_object *lttng_counter_shm_object_table_alloc(struct lttng_counter_shm_object_table *table, + size_t memory_map_size, + enum lttng_counter_shm_object_type type, + int cpu_fd, + int cpu) +{ + struct lttng_counter_shm_object *shm_object; +#ifdef HAVE_LIBNUMA + int oldnode = 0, node; + bool numa_avail; + + numa_avail = lttng_is_numa_available(); + if (numa_avail) { + oldnode = numa_preferred(); + if (cpu >= 0) { + node = numa_node_of_cpu(cpu); + if (node >= 0) + numa_set_preferred(node); + } + if (cpu < 0 || node < 0) + numa_set_localalloc(); + } +#endif /* HAVE_LIBNUMA */ + switch (type) { + case LTTNG_COUNTER_SHM_OBJECT_SHM: + shm_object = _lttng_counter_shm_object_table_alloc_shm(table, memory_map_size, + cpu_fd); + break; + case LTTNG_COUNTER_SHM_OBJECT_MEM: + shm_object = _lttng_counter_shm_object_table_alloc_mem(table, memory_map_size); + break; + default: + assert(0); + } +#ifdef HAVE_LIBNUMA + if (numa_avail) + numa_set_preferred(oldnode); +#endif /* HAVE_LIBNUMA */ + return shm_object; +} + +struct lttng_counter_shm_object *lttng_counter_shm_object_table_append_shm(struct lttng_counter_shm_object_table *table, + int shm_fd, + size_t memory_map_size) +{ + struct lttng_counter_shm_object *obj; + char *memory_map; + + if (table->allocated_len >= table->size) + return NULL; + + obj = &table->objects[table->allocated_len]; + + obj->shm_fd = shm_fd; + obj->shm_fd_ownership = 1; + + /* memory_map: mmap */ + memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0); + if (memory_map == MAP_FAILED) { + PERROR("mmap"); + goto error_mmap; + } + obj->type = LTTNG_COUNTER_SHM_OBJECT_SHM; + obj->memory_map = memory_map; + obj->memory_map_size = memory_map_size; + obj->allocated_len = memory_map_size; + obj->index = table->allocated_len++; + + return obj; + +error_mmap: + return NULL; +} + +/* + * Passing ownership of mem to object. + */ +struct lttng_counter_shm_object *lttng_counter_shm_object_table_append_mem(struct lttng_counter_shm_object_table *table, + void *mem, size_t memory_map_size) +{ + struct lttng_counter_shm_object *obj; + + if (table->allocated_len >= table->size) + return NULL; + obj = &table->objects[table->allocated_len]; + + obj->shm_fd = -1; + obj->shm_fd_ownership = 0; + + obj->type = LTTNG_COUNTER_SHM_OBJECT_MEM; + obj->memory_map = mem; + obj->memory_map_size = memory_map_size; + obj->allocated_len = memory_map_size; + obj->index = table->allocated_len++; + + return obj; + + return NULL; +} + +static +void lttng_counter_shmp_object_destroy(struct lttng_counter_shm_object *obj, int consumer) +{ + switch (obj->type) { + case LTTNG_COUNTER_SHM_OBJECT_SHM: + { + int ret; + + ret = munmap(obj->memory_map, obj->memory_map_size); + if (ret) { + PERROR("umnmap"); + assert(0); + } + + if (obj->shm_fd_ownership) { + /* Delete FDs only if called from app (not consumer). */ + if (!consumer) { + lttng_ust_lock_fd_tracker(); + ret = close(obj->shm_fd); + if (!ret) { + lttng_ust_delete_fd_from_tracker(obj->shm_fd); + } else { + PERROR("close"); + assert(0); + } + lttng_ust_unlock_fd_tracker(); + } else { + ret = close(obj->shm_fd); + if (ret) { + PERROR("close"); + assert(0); + } + } + } + break; + } + case LTTNG_COUNTER_SHM_OBJECT_MEM: + { + free(obj->memory_map); + break; + } + default: + assert(0); + } +} + +void lttng_counter_shm_object_table_destroy(struct lttng_counter_shm_object_table *table, int consumer) +{ + int i; + + for (i = 0; i < table->allocated_len; i++) + lttng_counter_shmp_object_destroy(&table->objects[i], consumer); + free(table); +} + +/* + * lttng_counter_zalloc_shm - allocate memory within a shm object. + * + * Shared memory is already zeroed by shmget. + * *NOT* multithread-safe (should be protected by mutex). + * Returns a -1, -1 tuple on error. + */ +struct lttng_counter_shm_ref lttng_counter_zalloc_shm(struct lttng_counter_shm_object *obj, size_t len) +{ + struct lttng_counter_shm_ref ref; + struct lttng_counter_shm_ref shm_ref_error = { -1, -1 }; + + if (obj->memory_map_size - obj->allocated_len < len) + return shm_ref_error; + ref.index = obj->index; + ref.offset = obj->allocated_len; + obj->allocated_len += len; + return ref; +} + +void lttng_counter_align_shm(struct lttng_counter_shm_object *obj, size_t align) +{ + size_t offset_len = offset_align(obj->allocated_len, align); + obj->allocated_len += offset_len; +} diff --git a/libcounter/shm.h b/libcounter/shm.h new file mode 100644 index 00000000..2c6e0c7b --- /dev/null +++ b/libcounter/shm.h @@ -0,0 +1,142 @@ +#ifndef _LIBCOUNTER_SHM_H +#define _LIBCOUNTER_SHM_H + +/* + * libcounter/shm.h + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include "shm_types.h" + +/* lttng_counter_handle_create - for UST. */ +extern +struct lttng_counter_shm_handle *lttng_counter_handle_create(void *data, + uint64_t memory_map_size, int wakeup_fd); +/* lttng_counter_handle_add_cpu - for UST. */ +extern +int lttng_counter_handle_add_cpu(struct lttng_counter_shm_handle *handle, + int shm_fd, uint32_t cpu_nr, + uint64_t memory_map_size); +unsigned int lttng_counter_handle_get_nr_cpus(struct lttng_counter_shm_handle *handle); + +/* + * Pointer dereferencing. We don't trust the shm_ref, so we validate + * both the index and offset with known boundaries. + * + * "shmp" and "shmp_index" guarantee that it's safe to use the pointer + * target type, even in the occurrence of shm_ref modification by an + * untrusted process having write access to the shm_ref. We return a + * NULL pointer if the ranges are invalid. + */ +static inline +char *_lttng_counter_shmp_offset(struct lttng_counter_shm_object_table *table, + struct lttng_counter_shm_ref *ref, + size_t idx, size_t elem_size) +{ + struct lttng_counter_shm_object *obj; + size_t objindex, ref_offset; + + objindex = (size_t) ref->index; + if (caa_unlikely(objindex >= table->allocated_len)) + return NULL; + obj = &table->objects[objindex]; + ref_offset = (size_t) ref->offset; + ref_offset += idx * elem_size; + /* Check if part of the element returned would exceed the limits. */ + if (caa_unlikely(ref_offset + elem_size > obj->memory_map_size)) + return NULL; + return &obj->memory_map[ref_offset]; +} + +#define lttng_counter_shmp_index(handle, ref, index) \ + ({ \ + __typeof__((ref)._type) ____ptr_ret; \ + ____ptr_ret = (__typeof__(____ptr_ret)) _lttng_counter_shmp_offset((handle)->table, &(ref)._ref, index, sizeof(*____ptr_ret)); \ + ____ptr_ret; \ + }) + +#define lttng_counter_shmp(handle, ref) lttng_counter_shmp_index(handle, ref, 0) + +static inline +void _lttng_counter_set_shmp(struct lttng_counter_shm_ref *ref, struct lttng_counter_shm_ref src) +{ + *ref = src; +} + +#define lttng_counter_set_shmp(ref, src) _lttng_counter_set_shmp(&(ref)._ref, src) + +struct lttng_counter_shm_object_table *lttng_counter_shm_object_table_create(size_t max_nb_obj); +struct lttng_counter_shm_object *lttng_counter_shm_object_table_alloc(struct lttng_counter_shm_object_table *table, + size_t memory_map_size, + enum lttng_counter_shm_object_type type, + const int cpu_fd, + int cpu); +struct lttng_counter_shm_object *lttng_counter_shm_object_table_append_shm(struct lttng_counter_shm_object_table *table, + int shm_fd, size_t memory_map_size); +/* mem ownership is passed to lttng_counter_shm_object_table_append_mem(). */ +struct lttng_counter_shm_object *lttng_counter_shm_object_table_append_mem(struct lttng_counter_shm_object_table *table, + void *mem, size_t memory_map_size); +void lttng_counter_shm_object_table_destroy(struct lttng_counter_shm_object_table *table, int consumer); + +/* + * lttng_counter_zalloc_shm - allocate memory within a shm object. + * + * Shared memory is already zeroed by shmget. + * *NOT* multithread-safe (should be protected by mutex). + * Returns a -1, -1 tuple on error. + */ +struct lttng_counter_shm_ref lttng_counter_zalloc_shm(struct lttng_counter_shm_object *obj, size_t len); +void lttng_counter_align_shm(struct lttng_counter_shm_object *obj, size_t align); + +static inline +int lttng_counter_shm_get_shm_fd(struct lttng_counter_shm_handle *handle, struct lttng_counter_shm_ref *ref) +{ + struct lttng_counter_shm_object_table *table = handle->table; + struct lttng_counter_shm_object *obj; + size_t index; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + return obj->shm_fd; +} + + +static inline +int lttng_counter_shm_get_shm_size(struct lttng_counter_shm_handle *handle, struct lttng_counter_shm_ref *ref, + uint64_t *size) +{ + struct lttng_counter_shm_object_table *table = handle->table; + struct lttng_counter_shm_object *obj; + size_t index; + + index = (size_t) ref->index; + if (caa_unlikely(index >= table->allocated_len)) + return -EPERM; + obj = &table->objects[index]; + *size = obj->memory_map_size; + return 0; +} + +#endif /* _LIBCOUNTER_SHM_H */ diff --git a/libcounter/shm_internal.h b/libcounter/shm_internal.h new file mode 100644 index 00000000..dcc3aab6 --- /dev/null +++ b/libcounter/shm_internal.h @@ -0,0 +1,35 @@ +#ifndef _LIBCOUNTER_SHM_INTERNAL_H +#define _LIBCOUNTER_SHM_INTERNAL_H + +/* + * libcounter/shm_internal.h + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +struct lttng_counter_shm_ref { + volatile ssize_t index; /* within the object table */ + volatile ssize_t offset; /* within the object */ +}; + +#define DECLARE_LTTNG_COUNTER_SHMP(type, name) \ + union { \ + struct lttng_counter_shm_ref _ref; \ + type *_type; \ + } name + +#endif /* _LIBCOUNTER_SHM_INTERNAL_H */ diff --git a/libcounter/shm_types.h b/libcounter/shm_types.h new file mode 100644 index 00000000..2086a832 --- /dev/null +++ b/libcounter/shm_types.h @@ -0,0 +1,54 @@ +#ifndef _LIBCOUNTER_SHM_TYPES_H +#define _LIBCOUNTER_SHM_TYPES_H + +/* + * libcounter/shm_types.h + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include "shm_internal.h" + +enum lttng_counter_shm_object_type { + LTTNG_COUNTER_SHM_OBJECT_SHM, + LTTNG_COUNTER_SHM_OBJECT_MEM, +}; + +struct lttng_counter_shm_object { + enum lttng_counter_shm_object_type type; + size_t index; /* within the object table */ + int shm_fd; /* shm fd */ + char *memory_map; + size_t memory_map_size; + uint64_t allocated_len; + int shm_fd_ownership; +}; + +struct lttng_counter_shm_object_table { + size_t size; + size_t allocated_len; + struct lttng_counter_shm_object objects[]; +}; + +struct lttng_counter_shm_handle { + struct lttng_counter_shm_object_table *table; +}; + +#endif /* _LIBCOUNTER_SHM_TYPES_H */ diff --git a/libcounter/smp.c b/libcounter/smp.c new file mode 100644 index 00000000..22ad98ab --- /dev/null +++ b/libcounter/smp.c @@ -0,0 +1,111 @@ +/* + * libcounter/smp.c + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * Copyright (C) 2019 Michael Jeanson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define _GNU_SOURCE +#define _LGPL_SOURCE +#include +#include +#include "smp.h" + +int __lttng_counter_num_possible_cpus; + +#if (defined(__GLIBC__) || defined( __UCLIBC__)) +void _lttng_counter_get_num_possible_cpus(void) +{ + int result; + + /* On Linux, when some processors are offline + * _SC_NPROCESSORS_CONF counts the offline + * processors, whereas _SC_NPROCESSORS_ONLN + * does not. If we used _SC_NPROCESSORS_ONLN, + * getcpu() could return a value greater than + * this sysconf, in which case the arrays + * indexed by processor would overflow. + */ + result = sysconf(_SC_NPROCESSORS_CONF); + if (result == -1) + return; + __lttng_counter_num_possible_cpus = result; +} + +#else + +/* + * The MUSL libc implementation of the _SC_NPROCESSORS_CONF sysconf does not + * return the number of configured CPUs in the system but relies on the cpu + * affinity mask of the current task. + * + * So instead we use a strategy similar to GLIBC's, counting the cpu + * directories in "/sys/devices/system/cpu" and fallback on the value from + * sysconf if it fails. + */ + +#include +#include +#include +#include +#include + +#define __max(a,b) ((a)>(b)?(a):(b)) + +void _lttng_counter_get_num_possible_cpus(void) +{ + int result, count = 0; + DIR *cpudir; + struct dirent *entry; + + cpudir = opendir("/sys/devices/system/cpu"); + if (cpudir == NULL) + goto end; + + /* + * Count the number of directories named "cpu" followed by and + * integer. This is the same strategy as glibc uses. + */ + while ((entry = readdir(cpudir))) { + if (entry->d_type == DT_DIR && + strncmp(entry->d_name, "cpu", 3) == 0) { + + char *endptr; + unsigned long cpu_num; + + cpu_num = strtoul(entry->d_name + 3, &endptr, 10); + if ((cpu_num < ULONG_MAX) && (endptr != entry->d_name + 3) + && (*endptr == '\0')) { + count++; + } + } + } + +end: + /* + * Get the sysconf value as a fallback. Keep the highest number. + */ + result = __max(sysconf(_SC_NPROCESSORS_CONF), count); + + /* + * If both methods failed, don't store the value. + */ + if (result < 1) + return; + __lttng_counter_num_possible_cpus = result; +} +#endif diff --git a/libcounter/smp.h b/libcounter/smp.h new file mode 100644 index 00000000..00ca7a03 --- /dev/null +++ b/libcounter/smp.h @@ -0,0 +1,43 @@ +#ifndef _LIBCOUNTER_SMP_H +#define _LIBCOUNTER_SMP_H + +/* + * libcounter/smp.h + * + * Copyright (C) 2011-2012 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * 4kB of per-cpu data available. + */ +#define LTTNG_COUNTER_PER_CPU_MEM_SIZE 4096 + +extern int __lttng_counter_num_possible_cpus; +extern void _lttng_counter_get_num_possible_cpus(void); + +static inline +int lttng_counter_num_possible_cpus(void) +{ + if (!__lttng_counter_num_possible_cpus) + _lttng_counter_get_num_possible_cpus(); + return __lttng_counter_num_possible_cpus; +} + +#define lttng_counter_for_each_possible_cpu(cpu) \ + for ((cpu) = 0; (cpu) < lttng_counter_num_possible_cpus(); (cpu)++) + +#endif /* _LIBCOUNTER_SMP_H */ diff --git a/liblttng-ust-comm/lttng-ust-comm.c b/liblttng-ust-comm/lttng-ust-comm.c index 9786abe7..5322702c 100644 --- a/liblttng-ust-comm/lttng-ust-comm.c +++ b/liblttng-ust-comm/lttng-ust-comm.c @@ -765,6 +765,75 @@ error: return ret; } +ssize_t ustcomm_recv_counter_from_sessiond(int sock, + void **_counter_data, uint64_t var_len) +{ + void *counter_data; + ssize_t len; + + if (var_len > LTTNG_UST_COUNTER_DATA_MAX_LEN) { + len = -EINVAL; + goto error_check; + } + /* Receive variable length data */ + counter_data = zmalloc(var_len); + if (!counter_data) { + len = -ENOMEM; + goto error_alloc; + } + len = ustcomm_recv_unix_sock(sock, counter_data, var_len); + if (len != var_len) { + goto error_recv; + } + *_counter_data = counter_data; + return len; + +error_recv: + free(counter_data); +error_alloc: +error_check: + return len; +} + +int ustcomm_recv_counter_shm_from_sessiond(int sock, + int *shm_fd) +{ + ssize_t len; + int ret; + int fds[1]; + + /* recv shm fd fd */ + lttng_ust_lock_fd_tracker(); + len = ustcomm_recv_fds_unix_sock(sock, fds, 1); + if (len <= 0) { + lttng_ust_unlock_fd_tracker(); + if (len < 0) { + ret = len; + goto error; + } else { + ret = -EIO; + goto error; + } + } + + ret = lttng_ust_add_fd_to_tracker(fds[0]); + if (ret < 0) { + ret = close(fds[0]); + if (ret) { + PERROR("close on received shm_fd"); + } + ret = -EIO; + lttng_ust_unlock_fd_tracker(); + goto error; + } + *shm_fd = ret; + lttng_ust_unlock_fd_tracker(); + return 0; + +error: + return ret; +} + /* * Returns 0 on success, negative error value on error. */ diff --git a/liblttng-ust-ctl/ustctl.c b/liblttng-ust-ctl/ustctl.c index b2fa8258..650e7290 100644 --- a/liblttng-ust-ctl/ustctl.c +++ b/liblttng-ust-ctl/ustctl.c @@ -37,6 +37,10 @@ #include "../liblttng-ust/clock.h" #include "../liblttng-ust/getenv.h" +#include "../libcounter/shm.h" +#include "../libcounter/smp.h" +#include "../libcounter/counter.h" + /* * Number of milliseconds to retry before failing metadata writes on * buffer full condition. (10 seconds) @@ -67,6 +71,24 @@ struct ustctl_consumer_stream { uint64_t memory_map_size; }; +#define USTCTL_COUNTER_ATTR_DIMENSION_MAX 8 +struct ustctl_counter_attr { + enum ustctl_counter_arithmetic arithmetic; + enum ustctl_counter_bitness bitness; + uint32_t nr_dimensions; + int64_t global_sum_step; + struct ustctl_counter_dimension dimensions[USTCTL_COUNTER_ATTR_DIMENSION_MAX]; +}; + +/* + * Counter representation within daemon. + */ +struct ustctl_daemon_counter { + struct lib_counter *counter; + const struct lttng_counter_ops *ops; + struct ustctl_counter_attr *attr; /* initial attributes */ +}; + extern void lttng_ring_buffer_client_overwrite_init(void); extern void lttng_ring_buffer_client_overwrite_rt_init(void); extern void lttng_ring_buffer_client_discard_init(void); @@ -77,6 +99,10 @@ extern void lttng_ring_buffer_client_overwrite_rt_exit(void); extern void lttng_ring_buffer_client_discard_exit(void); extern void lttng_ring_buffer_client_discard_rt_exit(void); extern void lttng_ring_buffer_metadata_client_exit(void); +extern void lttng_counter_client_percpu_32_modular_init(void); +extern void lttng_counter_client_percpu_32_modular_exit(void); +extern void lttng_counter_client_percpu_64_modular_init(void); +extern void lttng_counter_client_percpu_64_modular_exit(void); int ustctl_release_handle(int sock, int handle) { @@ -138,6 +164,30 @@ int ustctl_release_object(int sock, struct lttng_ust_object_data *data) case LTTNG_UST_OBJECT_TYPE_EVENT_NOTIFIER_GROUP: case LTTNG_UST_OBJECT_TYPE_EVENT_NOTIFIER: break; + case LTTNG_UST_OBJECT_TYPE_COUNTER: + free(data->u.counter.data); + data->u.counter.data = NULL; + break; + case LTTNG_UST_OBJECT_TYPE_COUNTER_GLOBAL: + if (data->u.counter_global.shm_fd >= 0) { + ret = close(data->u.counter_global.shm_fd); + if (ret < 0) { + ret = -errno; + return ret; + } + data->u.counter_global.shm_fd = -1; + } + break; + case LTTNG_UST_OBJECT_TYPE_COUNTER_CPU: + if (data->u.counter_cpu.shm_fd >= 0) { + ret = close(data->u.counter_cpu.shm_fd); + if (ret < 0) { + ret = -errno; + return ret; + } + data->u.counter_cpu.shm_fd = -1; + } + break; default: assert(0); } @@ -1107,6 +1157,44 @@ int ustctl_duplicate_ust_object_data(struct lttng_ust_object_data **dest, goto error_type; } + case LTTNG_UST_OBJECT_TYPE_COUNTER: + { + obj->u.counter.data = zmalloc(obj->size); + if (!obj->u.counter.data) { + ret = -ENOMEM; + goto error_type; + } + memcpy(obj->u.counter.data, src->u.counter.data, obj->size); + break; + } + + case LTTNG_UST_OBJECT_TYPE_COUNTER_GLOBAL: + { + if (src->u.counter_global.shm_fd >= 0) { + obj->u.counter_global.shm_fd = + dup(src->u.counter_global.shm_fd); + if (obj->u.counter_global.shm_fd < 0) { + ret = errno; + goto error_type; + } + } + break; + } + + case LTTNG_UST_OBJECT_TYPE_COUNTER_CPU: + { + obj->u.counter_cpu.cpu_nr = src->u.counter_cpu.cpu_nr; + if (src->u.counter_cpu.shm_fd >= 0) { + obj->u.counter_cpu.shm_fd = + dup(src->u.counter_cpu.shm_fd); + if (obj->u.counter_cpu.shm_fd < 0) { + ret = errno; + goto error_type; + } + } + break; + } + default: ret = -EINVAL; goto error_type; @@ -2363,6 +2451,373 @@ int ustctl_regenerate_statedump(int sock, int handle) return 0; } +/* counter operations */ + +int ustctl_get_nr_cpu_per_counter(void) +{ + return lttng_counter_num_possible_cpus(); +} + +struct ustctl_daemon_counter * + ustctl_create_counter(size_t nr_dimensions, + const struct ustctl_counter_dimension *dimensions, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds, + enum ustctl_counter_bitness bitness, + enum ustctl_counter_arithmetic arithmetic, + uint32_t alloc_flags) +{ + const char *transport_name; + struct ustctl_daemon_counter *counter; + struct lttng_counter_transport *transport; + struct lttng_counter_dimension ust_dim[LTTNG_COUNTER_DIMENSION_MAX]; + size_t i; + + if (nr_dimensions > LTTNG_COUNTER_DIMENSION_MAX) + return NULL; + /* Currently, only per-cpu allocation is supported. */ + switch (alloc_flags) { + case USTCTL_COUNTER_ALLOC_PER_CPU: + break; + + case USTCTL_COUNTER_ALLOC_PER_CPU | USTCTL_COUNTER_ALLOC_GLOBAL: + case USTCTL_COUNTER_ALLOC_GLOBAL: + default: + return NULL; + } + switch (bitness) { + case USTCTL_COUNTER_BITNESS_32: + switch (arithmetic) { + case USTCTL_COUNTER_ARITHMETIC_MODULAR: + transport_name = "counter-per-cpu-32-modular"; + break; + case USTCTL_COUNTER_ARITHMETIC_SATURATION: + transport_name = "counter-per-cpu-32-saturation"; + break; + default: + return NULL; + } + break; + case USTCTL_COUNTER_BITNESS_64: + switch (arithmetic) { + case USTCTL_COUNTER_ARITHMETIC_MODULAR: + transport_name = "counter-per-cpu-64-modular"; + break; + case USTCTL_COUNTER_ARITHMETIC_SATURATION: + transport_name = "counter-per-cpu-64-saturation"; + break; + default: + return NULL; + } + break; + default: + return NULL; + } + + transport = lttng_counter_transport_find(transport_name); + if (!transport) { + DBG("LTTng transport %s not found\n", + transport_name); + return NULL; + } + + counter = zmalloc(sizeof(*counter)); + if (!counter) + return NULL; + counter->attr = zmalloc(sizeof(*counter->attr)); + if (!counter->attr) + goto free_counter; + counter->attr->bitness = bitness; + counter->attr->arithmetic = arithmetic; + counter->attr->nr_dimensions = nr_dimensions; + counter->attr->global_sum_step = global_sum_step; + for (i = 0; i < nr_dimensions; i++) + counter->attr->dimensions[i] = dimensions[i]; + + for (i = 0; i < nr_dimensions; i++) { + ust_dim[i].size = dimensions[i].size; + ust_dim[i].underflow_index = dimensions[i].underflow_index; + ust_dim[i].overflow_index = dimensions[i].overflow_index; + ust_dim[i].has_underflow = dimensions[i].has_underflow; + ust_dim[i].has_overflow = dimensions[i].has_overflow; + } + counter->counter = transport->ops.counter_create(nr_dimensions, + ust_dim, global_sum_step, global_counter_fd, + nr_counter_cpu_fds, counter_cpu_fds, true); + if (!counter->counter) + goto free_attr; + counter->ops = &transport->ops; + return counter; + +free_attr: + free(counter->attr); +free_counter: + free(counter); + return NULL; +} + +int ustctl_create_counter_data(struct ustctl_daemon_counter *counter, + struct lttng_ust_object_data **_counter_data) +{ + struct lttng_ust_object_data *counter_data; + struct lttng_ust_counter_conf counter_conf; + size_t i; + int ret; + + switch (counter->attr->arithmetic) { + case USTCTL_COUNTER_ARITHMETIC_MODULAR: + counter_conf.arithmetic = LTTNG_UST_COUNTER_ARITHMETIC_MODULAR; + break; + case USTCTL_COUNTER_ARITHMETIC_SATURATION: + counter_conf.arithmetic = LTTNG_UST_COUNTER_ARITHMETIC_SATURATION; + break; + default: + return -EINVAL; + } + switch (counter->attr->bitness) { + case USTCTL_COUNTER_BITNESS_32: + counter_conf.bitness = LTTNG_UST_COUNTER_BITNESS_32BITS; + break; + case USTCTL_COUNTER_BITNESS_64: + counter_conf.bitness = LTTNG_UST_COUNTER_BITNESS_64BITS; + break; + default: + return -EINVAL; + } + counter_conf.number_dimensions = counter->attr->nr_dimensions; + counter_conf.global_sum_step = counter->attr->global_sum_step; + for (i = 0; i < counter->attr->nr_dimensions; i++) { + counter_conf.dimensions[i].size = counter->attr->dimensions[i].size; + counter_conf.dimensions[i].underflow_index = counter->attr->dimensions[i].underflow_index; + counter_conf.dimensions[i].overflow_index = counter->attr->dimensions[i].overflow_index; + counter_conf.dimensions[i].has_underflow = counter->attr->dimensions[i].has_underflow; + counter_conf.dimensions[i].has_overflow = counter->attr->dimensions[i].has_overflow; + } + + counter_data = zmalloc(sizeof(*counter_data)); + if (!counter_data) { + ret = -ENOMEM; + goto error_alloc; + } + counter_data->type = LTTNG_UST_OBJECT_TYPE_COUNTER; + counter_data->handle = -1; + + counter_data->size = sizeof(counter_conf); + counter_data->u.counter.data = zmalloc(sizeof(counter_conf)); + if (!counter_data->u.counter.data) { + ret = -ENOMEM; + goto error_alloc_data; + } + + memcpy(counter_data->u.counter.data, &counter_conf, sizeof(counter_conf)); + *_counter_data = counter_data; + + return 0; + +error_alloc_data: + free(counter_data); +error_alloc: + return ret; +} + +int ustctl_create_counter_global_data(struct ustctl_daemon_counter *counter, + struct lttng_ust_object_data **_counter_global_data) +{ + struct lttng_ust_object_data *counter_global_data; + int ret, fd; + size_t len; + + if (lttng_counter_get_global_shm(counter->counter, &fd, &len)) + return -EINVAL; + counter_global_data = zmalloc(sizeof(*counter_global_data)); + if (!counter_global_data) { + ret = -ENOMEM; + goto error_alloc; + } + counter_global_data->type = LTTNG_UST_OBJECT_TYPE_COUNTER_GLOBAL; + counter_global_data->handle = -1; + counter_global_data->size = len; + counter_global_data->u.counter_global.shm_fd = fd; + *_counter_global_data = counter_global_data; + return 0; + +error_alloc: + return ret; +} + +int ustctl_create_counter_cpu_data(struct ustctl_daemon_counter *counter, int cpu, + struct lttng_ust_object_data **_counter_cpu_data) +{ + struct lttng_ust_object_data *counter_cpu_data; + int ret, fd; + size_t len; + + if (lttng_counter_get_cpu_shm(counter->counter, cpu, &fd, &len)) + return -EINVAL; + counter_cpu_data = zmalloc(sizeof(*counter_cpu_data)); + if (!counter_cpu_data) { + ret = -ENOMEM; + goto error_alloc; + } + counter_cpu_data->type = LTTNG_UST_OBJECT_TYPE_COUNTER_CPU; + counter_cpu_data->handle = -1; + counter_cpu_data->size = len; + counter_cpu_data->u.counter_cpu.shm_fd = fd; + counter_cpu_data->u.counter_cpu.cpu_nr = cpu; + *_counter_cpu_data = counter_cpu_data; + return 0; + +error_alloc: + return ret; +} + +void ustctl_destroy_counter(struct ustctl_daemon_counter *counter) +{ + counter->ops->counter_destroy(counter->counter); + free(counter->attr); + free(counter); +} + +int ustctl_send_counter_data_to_ust(int sock, int parent_handle, + struct lttng_ust_object_data *counter_data) +{ + struct ustcomm_ust_msg lum; + struct ustcomm_ust_reply lur; + int ret; + size_t size; + ssize_t len; + + if (!counter_data) + return -EINVAL; + + size = counter_data->size; + memset(&lum, 0, sizeof(lum)); + lum.handle = parent_handle; + lum.cmd = LTTNG_UST_COUNTER; + lum.u.counter.len = size; + ret = ustcomm_send_app_msg(sock, &lum); + if (ret) + return ret; + + /* Send counter data */ + len = ustcomm_send_unix_sock(sock, counter_data->u.counter.data, size); + if (len != size) { + if (len < 0) + return len; + else + return -EIO; + } + + ret = ustcomm_recv_app_reply(sock, &lur, lum.handle, lum.cmd); + if (!ret) { + counter_data->handle = lur.ret_val; + } + return ret; +} + +int ustctl_send_counter_global_data_to_ust(int sock, + struct lttng_ust_object_data *counter_data, + struct lttng_ust_object_data *counter_global_data) +{ + struct ustcomm_ust_msg lum; + struct ustcomm_ust_reply lur; + int ret, shm_fd[1]; + size_t size; + ssize_t len; + + if (!counter_data || !counter_global_data) + return -EINVAL; + + size = counter_global_data->size; + memset(&lum, 0, sizeof(lum)); + lum.handle = counter_data->handle; /* parent handle */ + lum.cmd = LTTNG_UST_COUNTER_GLOBAL; + lum.u.counter_global.len = size; + ret = ustcomm_send_app_msg(sock, &lum); + if (ret) + return ret; + + shm_fd[0] = counter_global_data->u.counter_global.shm_fd; + len = ustcomm_send_fds_unix_sock(sock, shm_fd, 1); + if (len <= 0) { + if (len < 0) + return len; + else + return -EIO; + } + + ret = ustcomm_recv_app_reply(sock, &lur, lum.handle, lum.cmd); + if (!ret) { + counter_global_data->handle = lur.ret_val; + } + return ret; +} + +int ustctl_send_counter_cpu_data_to_ust(int sock, + struct lttng_ust_object_data *counter_data, + struct lttng_ust_object_data *counter_cpu_data) +{ + struct ustcomm_ust_msg lum; + struct ustcomm_ust_reply lur; + int ret, shm_fd[1]; + size_t size; + ssize_t len; + + if (!counter_data || !counter_cpu_data) + return -EINVAL; + + size = counter_cpu_data->size; + memset(&lum, 0, sizeof(lum)); + lum.handle = counter_data->handle; /* parent handle */ + lum.cmd = LTTNG_UST_COUNTER_CPU; + lum.u.counter_cpu.len = size; + lum.u.counter_cpu.cpu_nr = counter_cpu_data->u.counter_cpu.cpu_nr; + ret = ustcomm_send_app_msg(sock, &lum); + if (ret) + return ret; + + shm_fd[0] = counter_cpu_data->u.counter_global.shm_fd; + len = ustcomm_send_fds_unix_sock(sock, shm_fd, 1); + if (len <= 0) { + if (len < 0) + return len; + else + return -EIO; + } + + ret = ustcomm_recv_app_reply(sock, &lur, lum.handle, lum.cmd); + if (!ret) { + counter_cpu_data->handle = lur.ret_val; + } + return ret; +} + +int ustctl_counter_read(struct ustctl_daemon_counter *counter, + const size_t *dimension_indexes, + int cpu, int64_t *value, + bool *overflow, bool *underflow) +{ + return counter->ops->counter_read(counter->counter, dimension_indexes, cpu, + value, overflow, underflow); +} + +int ustctl_counter_aggregate(struct ustctl_daemon_counter *counter, + const size_t *dimension_indexes, + int64_t *value, + bool *overflow, bool *underflow) +{ + return counter->ops->counter_aggregate(counter->counter, dimension_indexes, + value, overflow, underflow); +} + +int ustctl_counter_clear(struct ustctl_daemon_counter *counter, + const size_t *dimension_indexes) +{ + return counter->ops->counter_clear(counter->counter, dimension_indexes); +} + static __attribute__((constructor)) void ustctl_init(void) { @@ -2374,6 +2829,8 @@ void ustctl_init(void) lttng_ring_buffer_client_overwrite_rt_init(); lttng_ring_buffer_client_discard_init(); lttng_ring_buffer_client_discard_rt_init(); + lttng_counter_client_percpu_32_modular_init(); + lttng_counter_client_percpu_64_modular_init(); lib_ringbuffer_signal_init(); } @@ -2385,4 +2842,6 @@ void ustctl_exit(void) lttng_ring_buffer_client_overwrite_rt_exit(); lttng_ring_buffer_client_overwrite_exit(); lttng_ring_buffer_metadata_client_exit(); + lttng_counter_client_percpu_32_modular_exit(); + lttng_counter_client_percpu_64_modular_exit(); } diff --git a/liblttng-ust/Makefile.am b/liblttng-ust/Makefile.am index 98fb2153..fab09251 100644 --- a/liblttng-ust/Makefile.am +++ b/liblttng-ust/Makefile.am @@ -102,6 +102,8 @@ liblttng_ust_support_la_SOURCES = \ lttng-ring-buffer-client-overwrite-rt.c \ lttng-ring-buffer-metadata-client.h \ lttng-ring-buffer-metadata-client.c \ + lttng-counter-client-percpu-32-modular.c \ + lttng-counter-client-percpu-64-modular.c \ lttng-clock.c lttng-getcpu.c liblttng_ust_la_SOURCES = @@ -109,7 +111,8 @@ liblttng_ust_la_SOURCES = liblttng_ust_la_LDFLAGS = -no-undefined -version-info $(LTTNG_UST_LIBRARY_VERSION) liblttng_ust_support_la_LIBADD = \ - $(top_builddir)/libringbuffer/libringbuffer.la + $(top_builddir)/libringbuffer/libringbuffer.la \ + $(top_builddir)/libcounter/libcounter.la liblttng_ust_la_LIBADD = \ -lrt \ diff --git a/liblttng-ust/lttng-counter-client-percpu-32-modular.c b/liblttng-ust/lttng-counter-client-percpu-32-modular.c new file mode 100644 index 00000000..86fa1164 --- /dev/null +++ b/liblttng-ust/lttng-counter-client-percpu-32-modular.c @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: (GPL-2.0-only or LGPL-2.1-only) + * + * lttng-counter-client-percpu-32-modular.c + * + * LTTng lib counter client. Per-cpu 32-bit counters in modular + * arithmetic. + * + * Copyright (C) 2020 Mathieu Desnoyers + */ + +#include +#include "../libcounter/counter.h" +#include "../libcounter/counter-api.h" + +static const struct lib_counter_config client_config = { + .alloc = COUNTER_ALLOC_PER_CPU, + .sync = COUNTER_SYNC_PER_CPU, + .arithmetic = COUNTER_ARITHMETIC_MODULAR, + .counter_size = COUNTER_SIZE_32_BIT, +}; + +static struct lib_counter *counter_create(size_t nr_dimensions, + const struct lttng_counter_dimension *dimensions, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds, + bool is_daemon) +{ + size_t max_nr_elem[LTTNG_COUNTER_DIMENSION_MAX], i; + + if (nr_dimensions > LTTNG_COUNTER_DIMENSION_MAX) + return NULL; + for (i = 0; i < nr_dimensions; i++) { + if (dimensions[i].has_underflow || dimensions[i].has_overflow) + return NULL; + max_nr_elem[i] = dimensions[i].size; + } + return lttng_counter_create(&client_config, nr_dimensions, max_nr_elem, + global_sum_step, global_counter_fd, nr_counter_cpu_fds, + counter_cpu_fds, is_daemon); +} + +static void counter_destroy(struct lib_counter *counter) +{ + lttng_counter_destroy(counter); +} + +static int counter_add(struct lib_counter *counter, const size_t *dimension_indexes, int64_t v) +{ + return lttng_counter_add(&client_config, counter, dimension_indexes, v); +} + +static int counter_read(struct lib_counter *counter, const size_t *dimension_indexes, int cpu, + int64_t *value, bool *overflow, bool *underflow) +{ + return lttng_counter_read(&client_config, counter, dimension_indexes, cpu, value, + overflow, underflow); +} + +static int counter_aggregate(struct lib_counter *counter, const size_t *dimension_indexes, + int64_t *value, bool *overflow, bool *underflow) +{ + return lttng_counter_aggregate(&client_config, counter, dimension_indexes, value, + overflow, underflow); +} + +static int counter_clear(struct lib_counter *counter, const size_t *dimension_indexes) +{ + return lttng_counter_clear(&client_config, counter, dimension_indexes); +} + +static struct lttng_counter_transport lttng_counter_transport = { + .name = "counter-per-cpu-32-modular", + .ops = { + .counter_create = counter_create, + .counter_destroy = counter_destroy, + .counter_add = counter_add, + .counter_read = counter_read, + .counter_aggregate = counter_aggregate, + .counter_clear = counter_clear, + }, + .client_config = &client_config, +}; + +void lttng_counter_client_percpu_32_modular_init(void) +{ + lttng_counter_transport_register(<tng_counter_transport); +} + +void lttng_counter_client_percpu_32_modular_exit(void) +{ + lttng_counter_transport_unregister(<tng_counter_transport); +} diff --git a/liblttng-ust/lttng-counter-client-percpu-64-modular.c b/liblttng-ust/lttng-counter-client-percpu-64-modular.c new file mode 100644 index 00000000..c3851946 --- /dev/null +++ b/liblttng-ust/lttng-counter-client-percpu-64-modular.c @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: (GPL-2.0-only or LGPL-2.1-only) + * + * lttng-counter-client-percpu-64-modular.c + * + * LTTng lib counter client. Per-cpu 64-bit counters in modular + * arithmetic. + * + * Copyright (C) 2020 Mathieu Desnoyers + */ + +#include +#include "../libcounter/counter.h" +#include "../libcounter/counter-api.h" + +static const struct lib_counter_config client_config = { + .alloc = COUNTER_ALLOC_PER_CPU, + .sync = COUNTER_SYNC_PER_CPU, + .arithmetic = COUNTER_ARITHMETIC_MODULAR, + .counter_size = COUNTER_SIZE_64_BIT, +}; + +static struct lib_counter *counter_create(size_t nr_dimensions, + const struct lttng_counter_dimension *dimensions, + int64_t global_sum_step, + int global_counter_fd, + int nr_counter_cpu_fds, + const int *counter_cpu_fds, + bool is_daemon) +{ + size_t max_nr_elem[LTTNG_COUNTER_DIMENSION_MAX], i; + + if (nr_dimensions > LTTNG_COUNTER_DIMENSION_MAX) + return NULL; + for (i = 0; i < nr_dimensions; i++) { + if (dimensions[i].has_underflow || dimensions[i].has_overflow) + return NULL; + max_nr_elem[i] = dimensions[i].size; + } + return lttng_counter_create(&client_config, nr_dimensions, max_nr_elem, + global_sum_step, global_counter_fd, nr_counter_cpu_fds, + counter_cpu_fds, is_daemon); +} + +static void counter_destroy(struct lib_counter *counter) +{ + lttng_counter_destroy(counter); +} + +static int counter_add(struct lib_counter *counter, const size_t *dimension_indexes, int64_t v) +{ + return lttng_counter_add(&client_config, counter, dimension_indexes, v); +} + +static int counter_read(struct lib_counter *counter, const size_t *dimension_indexes, int cpu, + int64_t *value, bool *overflow, bool *underflow) +{ + return lttng_counter_read(&client_config, counter, dimension_indexes, cpu, value, + overflow, underflow); +} + +static int counter_aggregate(struct lib_counter *counter, const size_t *dimension_indexes, + int64_t *value, bool *overflow, bool *underflow) +{ + return lttng_counter_aggregate(&client_config, counter, dimension_indexes, value, + overflow, underflow); +} + +static int counter_clear(struct lib_counter *counter, const size_t *dimension_indexes) +{ + return lttng_counter_clear(&client_config, counter, dimension_indexes); +} + +static struct lttng_counter_transport lttng_counter_transport = { + .name = "counter-per-cpu-64-modular", + .ops = { + .counter_create = counter_create, + .counter_destroy = counter_destroy, + .counter_add = counter_add, + .counter_read = counter_read, + .counter_aggregate = counter_aggregate, + .counter_clear = counter_clear, + }, + .client_config = &client_config, +}; + +void lttng_counter_client_percpu_64_modular_init(void) +{ + lttng_counter_transport_register(<tng_counter_transport); +} + +void lttng_counter_client_percpu_64_modular_exit(void) +{ + lttng_counter_transport_unregister(<tng_counter_transport); +} diff --git a/liblttng-ust/lttng-events.c b/liblttng-ust/lttng-events.c index 66ec7b2c..6a089122 100644 --- a/liblttng-ust/lttng-events.c +++ b/liblttng-ust/lttng-events.c @@ -67,6 +67,7 @@ #include "ust-events-internal.h" #include "wait.h" #include "../libringbuffer/shm.h" +#include "../libcounter/counter.h" #include "jhash.h" #include @@ -171,6 +172,46 @@ struct lttng_session *lttng_session_create(void) return session; } +struct lttng_counter *lttng_ust_counter_create( + const char *counter_transport_name, + size_t number_dimensions, const struct lttng_counter_dimension *dimensions) +{ + struct lttng_counter_transport *counter_transport = NULL; + struct lttng_counter *counter = NULL; + + counter_transport = lttng_counter_transport_find(counter_transport_name); + if (!counter_transport) + goto notransport; + counter = zmalloc(sizeof(struct lttng_counter)); + if (!counter) + goto nomem; + + counter->ops = &counter_transport->ops; + counter->transport = counter_transport; + + counter->counter = counter->ops->counter_create( + number_dimensions, dimensions, 0, + -1, 0, NULL, false); + if (!counter->counter) { + goto create_error; + } + + return counter; + +create_error: + free(counter); +nomem: +notransport: + return NULL; +} + +static +void lttng_ust_counter_destroy(struct lttng_counter *counter) +{ + counter->ops->counter_destroy(counter->counter); + free(counter); +} + struct lttng_event_notifier_group *lttng_event_notifier_group_create(void) { struct lttng_event_notifier_group *event_notifier_group; @@ -352,7 +393,10 @@ void lttng_event_notifier_group_destroy( &event_notifier_group->event_notifiers_head, node) _lttng_event_notifier_destroy(notifier); - /* Close the notification fd to the listener of event notifiers. */ + if (event_notifier_group->error_counter) + lttng_ust_counter_destroy(event_notifier_group->error_counter); + + /* Close the notification fd to the listener of event_notifiers. */ lttng_ust_lock_fd_tracker(); close_ret = close(event_notifier_group->notification_fd); diff --git a/liblttng-ust/lttng-ust-abi.c b/liblttng-ust/lttng-ust-abi.c index 76fd7ea0..0d2058a3 100644 --- a/liblttng-ust/lttng-ust-abi.c +++ b/liblttng-ust/lttng-ust-abi.c @@ -56,6 +56,7 @@ #include "../libringbuffer/frontend_types.h" #include "../libringbuffer/shm.h" +#include "../libcounter/counter.h" #include "lttng-tracer.h" #include "string-utils.h" #include "ust-events-internal.h" @@ -637,6 +638,11 @@ long lttng_session_cmd(int objd, unsigned int cmd, unsigned long arg, return lttng_session_disable(session); case LTTNG_UST_SESSION_STATEDUMP: return lttng_session_statedump(session); + case LTTNG_UST_COUNTER: + case LTTNG_UST_COUNTER_GLOBAL: + case LTTNG_UST_COUNTER_CPU: + /* Not implemented yet. */ + return -EINVAL; default: return -EINVAL; } @@ -735,6 +741,133 @@ long lttng_event_notifier_enabler_cmd(int objd, unsigned int cmd, unsigned long } } +/** + * lttng_event_notifier_group_error_counter_cmd - lttng event_notifier group error counter object command + * + * @obj: the object + * @cmd: the command + * @arg: command arg + * @uargs: UST arguments (internal) + * @owner: objd owner + * + * This descriptor implements lttng commands: + * LTTNG_UST_COUNTER_GLOBAL + * Return negative error code on error, 0 on success. + * LTTNG_UST_COUNTER_CPU + * Return negative error code on error, 0 on success. + */ +static +long lttng_event_notifier_group_error_counter_cmd(int objd, unsigned int cmd, unsigned long arg, + union ust_args *uargs, void *owner) +{ + struct lttng_counter *counter = objd_private(objd); + + switch (cmd) { + case LTTNG_UST_COUNTER_GLOBAL: + return -EINVAL; /* Unimplemented. */ + case LTTNG_UST_COUNTER_CPU: + { + struct lttng_ust_counter_cpu *counter_cpu = + (struct lttng_ust_counter_cpu *)arg; + return lttng_counter_set_cpu_shm(counter->counter, + counter_cpu->cpu_nr, uargs->counter_shm.shm_fd); + } + default: + return -EINVAL; + } +} + +int lttng_release_event_notifier_group_error_counter(int objd) +{ + struct lttng_counter *counter = objd_private(objd); + + if (counter) { + return lttng_ust_objd_unref(counter->event_notifier_group->objd, 0); + } else { + return -EINVAL; + } +} + +static const struct lttng_ust_objd_ops lttng_event_notifier_group_error_counter_ops = { + .release = lttng_release_event_notifier_group_error_counter, + .cmd = lttng_event_notifier_group_error_counter_cmd, +}; + +static +int lttng_ust_event_notifier_group_create_error_counter(int event_notifier_group_objd, void *owner, + struct lttng_ust_counter_conf *error_counter_conf) +{ + const char *counter_transport_name; + struct lttng_event_notifier_group *event_notifier_group = + objd_private(event_notifier_group_objd); + struct lttng_counter *counter; + int counter_objd, ret; + struct lttng_counter_dimension dimensions[1]; + size_t counter_len; + + if (event_notifier_group->error_counter) + return -EBUSY; + + if (error_counter_conf->arithmetic != LTTNG_UST_COUNTER_ARITHMETIC_MODULAR) + return -EINVAL; + + if (error_counter_conf->number_dimensions != 1) + return -EINVAL; + + switch (error_counter_conf->bitness) { + case LTTNG_UST_COUNTER_BITNESS_64BITS: + counter_transport_name = "counter-per-cpu-64-modular"; + break; + case LTTNG_UST_COUNTER_BITNESS_32BITS: + counter_transport_name = "counter-per-cpu-32-modular"; + break; + default: + return -EINVAL; + } + + counter_objd = objd_alloc(NULL, <tng_event_notifier_group_error_counter_ops, owner, + "event_notifier group error counter"); + if (counter_objd < 0) { + ret = counter_objd; + goto objd_error; + } + + counter_len = error_counter_conf->dimensions[0].size; + dimensions[0].size = counter_len; + dimensions[0].underflow_index = 0; + dimensions[0].overflow_index = 0; + dimensions[0].has_underflow = 0; + dimensions[0].has_overflow = 0; + + counter = lttng_ust_counter_create(counter_transport_name, 1, dimensions); + if (!counter) { + ret = -EINVAL; + goto create_error; + } + + event_notifier_group->error_counter = counter; + event_notifier_group->error_counter_len = counter_len; + + counter->objd = counter_objd; + counter->event_notifier_group = event_notifier_group; /* owner */ + + objd_set_private(counter_objd, counter); + /* The error counter holds a reference on the event_notifier group. */ + objd_ref(event_notifier_group->objd); + + return counter_objd; + +create_error: + { + int err; + + err = lttng_ust_objd_unref(counter_objd, 1); + assert(!err); + } +objd_error: + return ret; +} + static long lttng_event_notifier_group_cmd(int objd, unsigned int cmd, unsigned long arg, union ust_args *uargs, void *owner) @@ -758,6 +891,13 @@ long lttng_event_notifier_group_cmd(int objd, unsigned int cmd, unsigned long ar LTTNG_ENABLER_FORMAT_EVENT); } } + case LTTNG_UST_COUNTER: + { + struct lttng_ust_counter_conf *counter_conf = + (struct lttng_ust_counter_conf *) uargs->counter.counter_data; + return lttng_ust_event_notifier_group_create_error_counter( + objd, owner, counter_conf); + } default: return -EINVAL; } diff --git a/liblttng-ust/lttng-ust-comm.c b/liblttng-ust/lttng-ust-comm.c index de03407b..4b13571d 100644 --- a/liblttng-ust/lttng-ust-comm.c +++ b/liblttng-ust/lttng-ust-comm.c @@ -350,6 +350,13 @@ static const char *cmd_name_mapping[] = { /* Event notifier group commands */ [ LTTNG_UST_EVENT_NOTIFIER_CREATE ] = "Create event notifier", + + /* Session and event notifier group commands */ + [ LTTNG_UST_COUNTER ] = "Create Counter", + + /* Counter commands */ + [ LTTNG_UST_COUNTER_GLOBAL ] = "Create Counter Global", + [ LTTNG_UST_COUNTER_CPU ] = "Create Counter CPU", }; static const char *str_timeout; @@ -365,6 +372,10 @@ extern void lttng_ring_buffer_client_overwrite_rt_exit(void); extern void lttng_ring_buffer_client_discard_exit(void); extern void lttng_ring_buffer_client_discard_rt_exit(void); extern void lttng_ring_buffer_metadata_client_exit(void); +extern void lttng_counter_client_percpu_32_modular_init(void); +extern void lttng_counter_client_percpu_32_modular_exit(void); +extern void lttng_counter_client_percpu_64_modular_init(void); +extern void lttng_counter_client_percpu_64_modular_exit(void); static char *get_map_shm(struct sock_info *sock_info); @@ -1133,6 +1144,79 @@ int handle_message(struct sock_info *sock_info, ret = -ENOSYS; } break; + case LTTNG_UST_COUNTER: + { + void *counter_data; + + len = ustcomm_recv_counter_from_sessiond(sock, + &counter_data, lum->u.counter.len); + switch (len) { + case 0: /* orderly shutdown */ + ret = 0; + goto error; + default: + if (len == lum->u.counter.len) { + DBG("counter data received"); + break; + } else if (len < 0) { + DBG("Receive failed from lttng-sessiond with errno %d", (int) -len); + if (len == -ECONNRESET) { + ERR("%s remote end closed connection", sock_info->name); + ret = len; + goto error; + } + ret = len; + goto error; + } else { + DBG("incorrect counter data message size: %zd", len); + ret = -EINVAL; + goto error; + } + } + args.counter.counter_data = counter_data; + if (ops->cmd) + ret = ops->cmd(lum->handle, lum->cmd, + (unsigned long) &lum->u, + &args, sock_info); + else + ret = -ENOSYS; + break; + } + case LTTNG_UST_COUNTER_GLOBAL: + { + /* Receive shm_fd */ + ret = ustcomm_recv_counter_shm_from_sessiond(sock, + &args.counter_shm.shm_fd); + if (ret) { + goto error; + } + + if (ops->cmd) + ret = ops->cmd(lum->handle, lum->cmd, + (unsigned long) &lum->u, + &args, sock_info); + else + ret = -ENOSYS; + break; + } + case LTTNG_UST_COUNTER_CPU: + { + /* Receive shm_fd */ + ret = ustcomm_recv_counter_shm_from_sessiond(sock, + &args.counter_shm.shm_fd); + if (ret) { + goto error; + } + + if (ops->cmd) + ret = ops->cmd(lum->handle, lum->cmd, + (unsigned long) &lum->u, + &args, sock_info); + else + ret = -ENOSYS; + break; + } + default: if (ops->cmd) ret = ops->cmd(lum->handle, lum->cmd, @@ -1958,6 +2042,8 @@ void __attribute__((constructor)) lttng_ust_init(void) lttng_ring_buffer_client_overwrite_rt_init(); lttng_ring_buffer_client_discard_init(); lttng_ring_buffer_client_discard_rt_init(); + lttng_counter_client_percpu_32_modular_init(); + lttng_counter_client_percpu_64_modular_init(); lttng_perf_counter_init(); /* * Invoke ust malloc wrapper init before starting other threads. @@ -2103,6 +2189,8 @@ void lttng_ust_cleanup(int exiting) lttng_ring_buffer_client_overwrite_rt_exit(); lttng_ring_buffer_client_overwrite_exit(); lttng_ring_buffer_metadata_client_exit(); + lttng_counter_client_percpu_32_modular_exit(); + lttng_counter_client_percpu_64_modular_exit(); lttng_ust_statedump_destroy(); exit_tracepoint(); if (!exiting) { diff --git a/liblttng-ust/ust-core.c b/liblttng-ust/ust-core.c index abea7bbd..e8dff983 100644 --- a/liblttng-ust/ust-core.c +++ b/liblttng-ust/ust-core.c @@ -28,6 +28,7 @@ #include "jhash.h" static CDS_LIST_HEAD(lttng_transport_list); +static CDS_LIST_HEAD(lttng_counter_transport_list); struct lttng_transport *lttng_transport_find(const char *name) { @@ -40,6 +41,17 @@ struct lttng_transport *lttng_transport_find(const char *name) return NULL; } +struct lttng_counter_transport *lttng_counter_transport_find(const char *name) +{ + struct lttng_counter_transport *transport; + + cds_list_for_each_entry(transport, <tng_counter_transport_list, node) { + if (!strcmp(transport->name, name)) + return transport; + } + return NULL; +} + /** * lttng_transport_register - LTT transport registration * @transport: transport structure @@ -62,6 +74,28 @@ void lttng_transport_unregister(struct lttng_transport *transport) cds_list_del(&transport->node); } +/** + * lttng_counter_transport_register - LTTng counter transport registration + * @transport: transport structure + * + * Registers a counter transport which can be used as output to extract + * the data out of LTTng. Called with ust_lock held. + */ +void lttng_counter_transport_register(struct lttng_counter_transport *transport) +{ + cds_list_add_tail(&transport->node, <tng_counter_transport_list); +} + +/** + * lttng_counter_transport_unregister - LTTng counter transport unregistration + * @transport: transport structure + * Called with ust_lock held. + */ +void lttng_counter_transport_unregister(struct lttng_counter_transport *transport) +{ + cds_list_del(&transport->node); +} + /* * Needed by comm layer. */ -- 2.34.1 From 6566528bd873067b5e68a0599d57044060908e29 Mon Sep 17 00:00:00 2001 From: Francis Deslauriers Date: Mon, 12 Oct 2020 12:24:49 -0400 Subject: [PATCH 02/16] Implement event notifier error counter Signed-off-by: Francis Deslauriers Signed-off-by: Mathieu Desnoyers Change-Id: Ia8008a162a1e4c881fdc948a390847cb5258e4f7 --- include/lttng/ust-events.h | 1 + liblttng-ust/event-notifier-notification.c | 28 ++++++++++++++++++---- liblttng-ust/lttng-events.c | 4 +++- liblttng-ust/ust-events-internal.h | 1 + 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/include/lttng/ust-events.h b/include/lttng/ust-events.h index 721b0aed..19726f88 100644 --- a/include/lttng/ust-events.h +++ b/include/lttng/ust-events.h @@ -511,6 +511,7 @@ struct lttng_event { struct lttng_event_notifier { uint64_t user_token; + uint64_t error_counter_index; int enabled; int registered; /* has reg'd tracepoint probe */ size_t num_captures; /* Needed to allocate the msgpack array. */ diff --git a/liblttng-ust/event-notifier-notification.c b/liblttng-ust/event-notifier-notification.c index 6fce4357..f1fb8adc 100644 --- a/liblttng-ust/event-notifier-notification.c +++ b/liblttng-ust/event-notifier-notification.c @@ -280,8 +280,24 @@ void notification_append_empty_capture( lttng_msgpack_write_nil(¬if->writer); } +static void record_error(struct lttng_event_notifier *event_notifier) +{ + struct lttng_event_notifier_group *event_notifier_group = + event_notifier->group; + size_t dimension_index[1]; + int ret; + + dimension_index[0] = event_notifier->error_counter_index; + ret = event_notifier_group->error_counter->ops->counter_add( + event_notifier_group->error_counter->counter, + dimension_index, 1); + if (ret) + WARN_ON_ONCE(1); +} + static -void notification_send(struct lttng_event_notifier_notification *notif) +void notification_send(struct lttng_event_notifier_notification *notif, + struct lttng_event_notifier *event_notifier) { ssize_t ret; size_t content_len; @@ -291,7 +307,7 @@ void notification_send(struct lttng_event_notifier_notification *notif) assert(notif); - ust_notif.token = notif->event_notifier_token; + ust_notif.token = event_notifier->user_token; /* * Prepare sending the notification from multiple buffers using an @@ -329,7 +345,8 @@ void notification_send(struct lttng_event_notifier_notification *notif) ret = patient_writev(notif->notification_fd, iov, iovec_count); if (ret == -1) { if (errno == EAGAIN) { - DBG("Cannot send event notifier notification without blocking: %s", + record_error(event_notifier); + DBG("Cannot send event_notifier notification without blocking: %s", strerror(errno)); } else { DBG("Error to sending event notifier notification: %s", @@ -339,7 +356,8 @@ void notification_send(struct lttng_event_notifier_notification *notif) } } -void lttng_event_notifier_notification_send(struct lttng_event_notifier *event_notifier, +void lttng_event_notifier_notification_send( + struct lttng_event_notifier *event_notifier, const char *stack_data) { /* @@ -375,5 +393,5 @@ void lttng_event_notifier_notification_send(struct lttng_event_notifier *event_n * Send the notification (including the capture buffer) to the * sessiond. */ - notification_send(¬if); + notification_send(¬if, event_notifier); } diff --git a/liblttng-ust/lttng-events.c b/liblttng-ust/lttng-events.c index 6a089122..b455c363 100644 --- a/liblttng-ust/lttng-events.c +++ b/liblttng-ust/lttng-events.c @@ -841,7 +841,7 @@ socket_error: static int lttng_event_notifier_create(const struct lttng_event_desc *desc, - uint64_t token, + uint64_t token, uint64_t error_counter_index, struct lttng_event_notifier_group *event_notifier_group) { struct lttng_event_notifier *event_notifier; @@ -864,6 +864,7 @@ int lttng_event_notifier_create(const struct lttng_event_desc *desc, event_notifier->group = event_notifier_group; event_notifier->user_token = token; + event_notifier->error_counter_index = error_counter_index; /* Event notifier will be enabled by enabler sync. */ event_notifier->enabled = 0; @@ -1766,6 +1767,7 @@ void lttng_create_event_notifier_if_missing( */ ret = lttng_event_notifier_create(desc, event_notifier_enabler->user_token, + event_notifier_enabler->error_counter_index, event_notifier_group); if (ret) { DBG("Unable to create event_notifier %s, error %d\n", diff --git a/liblttng-ust/ust-events-internal.h b/liblttng-ust/ust-events-internal.h index 966fce6e..be5c549c 100644 --- a/liblttng-ust/ust-events-internal.h +++ b/liblttng-ust/ust-events-internal.h @@ -46,6 +46,7 @@ struct lttng_event_enabler { struct lttng_event_notifier_enabler { struct lttng_enabler base; + uint64_t error_counter_index; struct cds_list_head node; /* per-app list of event_notifier enablers */ struct cds_list_head capture_bytecode_head; struct lttng_event_notifier_group *group; /* weak ref */ -- 2.34.1 From 3c5d75828a6c8e7469378bbcc73d63043b619d0e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 26 Nov 2020 13:51:46 -0500 Subject: [PATCH 03/16] Fix: ust-abi: clarify which command is used on which object descriptor Signed-off-by: Mathieu Desnoyers Change-Id: If0c874ec7c8061422706582fdb66838659f7b890 --- include/lttng/ust-abi.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/lttng/ust-abi.h b/include/lttng/ust-abi.h index 6a6144f7..b00dbb44 100644 --- a/include/lttng/ust-abi.h +++ b/include/lttng/ust-abi.h @@ -388,25 +388,25 @@ struct lttng_ust_event_exclusion { #define LTTNG_UST_TRACEPOINT_FIELD_LIST _UST_CMD(0x45) #define LTTNG_UST_EVENT_NOTIFIER_GROUP_CREATE _UST_CMD(0x46) -/* Session FD commands */ +/* Session commands */ #define LTTNG_UST_CHANNEL \ _UST_CMDW(0x51, struct lttng_ust_channel) #define LTTNG_UST_SESSION_START _UST_CMD(0x52) #define LTTNG_UST_SESSION_STOP _UST_CMD(0x53) #define LTTNG_UST_SESSION_STATEDUMP _UST_CMD(0x54) -/* Channel FD commands */ +/* Channel commands */ #define LTTNG_UST_STREAM _UST_CMD(0x60) #define LTTNG_UST_EVENT \ _UST_CMDW(0x61, struct lttng_ust_event) -/* Event and Channel FD commands */ +/* Event and channel commands */ #define LTTNG_UST_CONTEXT \ _UST_CMDW(0x70, struct lttng_ust_context) #define LTTNG_UST_FLUSH_BUFFER \ _UST_CMD(0x71) -/* Event, Channel and Session commands */ +/* Event, event notifier, channel and session commands */ #define LTTNG_UST_ENABLE _UST_CMD(0x80) #define LTTNG_UST_DISABLE _UST_CMD(0x81) @@ -414,14 +414,16 @@ struct lttng_ust_event_exclusion { #define LTTNG_UST_TRACEPOINT_LIST_GET _UST_CMD(0x90) #define LTTNG_UST_TRACEPOINT_FIELD_LIST_GET _UST_CMD(0x91) -/* Event FD commands */ +/* Event and event notifier commands */ #define LTTNG_UST_FILTER _UST_CMD(0xA0) #define LTTNG_UST_EXCLUSION _UST_CMD(0xA1) /* Event notifier group commands */ #define LTTNG_UST_EVENT_NOTIFIER_CREATE \ _UST_CMDW(0xB0, struct lttng_ust_event_notifier) -#define LTTNG_UST_CAPTURE _UST_CMD(0xB1) + +/* Event notifier commands */ +#define LTTNG_UST_CAPTURE _UST_CMD(0xB6) /* Session and event notifier group commands */ #define LTTNG_UST_COUNTER \ -- 2.34.1 From 00adec46a40302f315a5d18b3f4b601beb6cd059 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 26 Nov 2020 14:40:04 -0500 Subject: [PATCH 04/16] Fix: libcounter: use LTTNG_UST_ALIGN ALIGN() on FreeBSD does not expect the same number of arguments. Signed-off-by: Mathieu Desnoyers Change-Id: I8ce4cc94bc840a11dc30ae0679f6b2c41e11bc12 --- libcounter/counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcounter/counter.c b/libcounter/counter.c index 4010d5dd..640705d3 100644 --- a/libcounter/counter.c +++ b/libcounter/counter.c @@ -75,9 +75,9 @@ static int lttng_counter_layout_init(struct lib_counter *counter, int cpu, int s counters_offset = shm_length; shm_length += counter_size * nr_elem; overflow_offset = shm_length; - shm_length += ALIGN(nr_elem, 8) / 8; + shm_length += LTTNG_UST_ALIGN(nr_elem, 8) / 8; underflow_offset = shm_length; - shm_length += ALIGN(nr_elem, 8) / 8; + shm_length += LTTNG_UST_ALIGN(nr_elem, 8) / 8; layout->shm_len = shm_length; if (counter->is_daemon) { /* Allocate and clear shared memory. */ -- 2.34.1 From a40716088ab08fa708b2b2ba5024c3c62eee8805 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 26 Nov 2020 15:29:01 -0500 Subject: [PATCH 05/16] Fix: counter: cast UINT*_MAX to 64-bit signed type before negative (long long)-UINT32_MAX leads to value 1 which is not what we expect. This is due to implicit type promotion from unsigned to signed 32-bit integer. Apply this to 8-bit and 16-bit types as well even though they are not affected by this issue to keep things regular. Signed-off-by: Mathieu Desnoyers Change-Id: I8e05f270eae71cb15848075c4dfae80e8070fd80 --- libcounter/counter-api.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcounter/counter-api.h b/libcounter/counter-api.h index 0a7c0ade..d18fa16e 100644 --- a/libcounter/counter-api.h +++ b/libcounter/counter-api.h @@ -104,7 +104,7 @@ static inline int __lttng_counter_add(const struct lib_counter_config *config, } if (v > 0 && (v >= UINT8_MAX || n < old)) overflow = true; - else if (v < 0 && (v <= -UINT8_MAX || n > old)) + else if (v < 0 && (v <= -(int64_t) UINT8_MAX || n > old)) underflow = true; break; } @@ -143,7 +143,7 @@ static inline int __lttng_counter_add(const struct lib_counter_config *config, } if (v > 0 && (v >= UINT16_MAX || n < old)) overflow = true; - else if (v < 0 && (v <= -UINT16_MAX || n > old)) + else if (v < 0 && (v <= -(int64_t) UINT16_MAX || n > old)) underflow = true; break; } @@ -182,7 +182,7 @@ static inline int __lttng_counter_add(const struct lib_counter_config *config, } if (v > 0 && (v >= UINT32_MAX || n < old)) overflow = true; - else if (v < 0 && (v <= -UINT32_MAX || n > old)) + else if (v < 0 && (v <= -(int64_t) UINT32_MAX || n > old)) underflow = true; break; } -- 2.34.1 From 8b914423d8af8805685f6114888f3bbd3b528d0f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 26 Nov 2020 15:35:31 -0500 Subject: [PATCH 06/16] Fix: remove dead code in msgpack.c Identified by Coverity. Comparing unsigned types < 0 is never true. Signed-off-by: Mathieu Desnoyers Change-Id: Ic623a5b15c7053844c8e3a0b374ac3b9b929c050 --- libmsgpack/msgpack.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libmsgpack/msgpack.c b/libmsgpack/msgpack.c index 0803bc65..f3eca64a 100644 --- a/libmsgpack/msgpack.c +++ b/libmsgpack/msgpack.c @@ -316,7 +316,7 @@ int lttng_msgpack_begin_map(struct lttng_msgpack_writer *writer, size_t count) { int ret; - if (count < 0 || count >= (1 << 16)) { + if (count >= (1 << 16)) { ret = -1; goto end; } @@ -343,7 +343,7 @@ int lttng_msgpack_begin_array( { int ret; - if (count < 0 || count >= (1 << 16)) { + if (count >= (1 << 16)) { ret = -1; goto end; } @@ -370,7 +370,8 @@ int lttng_msgpack_write_str(struct lttng_msgpack_writer *writer, { int ret; size_t length = strlen(str); - if (length < 0 || length >= (1 << 16)) { + + if (length >= (1 << 16)) { ret = -1; goto end; } -- 2.34.1 From 41d6c53d7f72e279ac5b0e6490a5be37e460c4c0 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 26 Nov 2020 15:38:19 -0500 Subject: [PATCH 07/16] Fix: libcounter bad compare Identified by Coverity. >>> CID 1437325: Incorrect expression (BAD_COMPARE) >>> Comparing pointer "counter_cpu_fds" against "NULL" using anything besides "==" or "!=" is likely to be incorrect. Signed-off-by: Mathieu Desnoyers Change-Id: I0d21112a0266d67fafde7a6b81e0062a7a43dce0 --- libcounter/counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcounter/counter.c b/libcounter/counter.c index 640705d3..91506faf 100644 --- a/libcounter/counter.c +++ b/libcounter/counter.c @@ -188,7 +188,7 @@ int validate_args(const struct lib_counter_config *config, return -1; if (!(config->alloc & COUNTER_ALLOC_PER_CPU) && counter_cpu_fds) return -1; - if (!(config->alloc & COUNTER_ALLOC_PER_CPU) && counter_cpu_fds >= 0) + if (!(config->alloc & COUNTER_ALLOC_PER_CPU) && nr_counter_cpu_fds >= 0) return -1; if (counter_cpu_fds && nr_cpus != nr_counter_cpu_fds) return -1; -- 2.34.1 From 185c782861fe6ed224d14293afc57c46ab5e7aed Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 26 Nov 2020 15:50:45 -0500 Subject: [PATCH 08/16] Fix: memory leak in apply_context_reloc Fixes commit b77aaa1b8edf2 ("Decouple `struct lttng_session` from filter code") lttng_ust_add_app_context_to_ctx_rcu should be able to update the session ctx field and not only the local ctx pointer. Signed-off-by: Mathieu Desnoyers Change-Id: I1bfdc7a1a2b6f9c579206e996572e36bf30d60e7 --- liblttng-ust/lttng-bytecode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/liblttng-ust/lttng-bytecode.c b/liblttng-ust/lttng-bytecode.c index 30d3cbd1..a30dd182 100644 --- a/liblttng-ust/lttng-bytecode.c +++ b/liblttng-ust/lttng-bytecode.c @@ -298,21 +298,21 @@ int apply_context_reloc(struct bytecode_runtime *runtime, struct load_op *op; struct lttng_ctx_field *ctx_field; int idx; - struct lttng_ctx *ctx = *runtime->p.pctx; + struct lttng_ctx **pctx = runtime->p.pctx; dbg_printf("Apply context reloc: %u %s\n", reloc_offset, context_name); /* Get context index */ - idx = lttng_get_context_index(ctx, context_name); + idx = lttng_get_context_index(*pctx, context_name); if (idx < 0) { if (lttng_context_is_app(context_name)) { int ret; ret = lttng_ust_add_app_context_to_ctx_rcu(context_name, - &ctx); + pctx); if (ret) return ret; - idx = lttng_get_context_index(ctx, context_name); + idx = lttng_get_context_index(*pctx, context_name); if (idx < 0) return -ENOENT; } else { @@ -324,7 +324,7 @@ int apply_context_reloc(struct bytecode_runtime *runtime, return -EINVAL; /* Get context return type */ - ctx_field = &ctx->fields[idx]; + ctx_field = &(*pctx)->fields[idx]; op = (struct load_op *) &runtime->code[reloc_offset]; switch (bytecode_op) { -- 2.34.1 From 1bd9425ae9807e6ed78a64cdcd7056439dc8f9da Mon Sep 17 00:00:00 2001 From: Francis Deslauriers Date: Fri, 27 Nov 2020 11:02:45 -0500 Subject: [PATCH 09/16] Bump LTTNG_UST_EVENT_NOTIFIER_PADDING to 32 bytes Signed-off-by: Francis Deslauriers Signed-off-by: Mathieu Desnoyers Change-Id: Iad5de41f09c373cf1069e81996b1201e3c02e572 --- include/lttng/ust-abi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/lttng/ust-abi.h b/include/lttng/ust-abi.h index b00dbb44..69e07ec9 100644 --- a/include/lttng/ust-abi.h +++ b/include/lttng/ust-abi.h @@ -154,11 +154,11 @@ struct lttng_ust_event { } u; } LTTNG_PACKED; -#define LTTNG_UST_EVENT_NOTIFIER_PADDING1 16 +#define LTTNG_UST_EVENT_NOTIFIER_PADDING 32 struct lttng_ust_event_notifier { struct lttng_ust_event event; uint64_t error_counter_index; - char padding[LTTNG_UST_EVENT_NOTIFIER_PADDING1]; + char padding[LTTNG_UST_EVENT_NOTIFIER_PADDING]; } LTTNG_PACKED; #define LTTNG_EVENT_NOTIFIER_NOTIFICATION_PADDING 32 -- 2.34.1 From 3336564f68cb964478db684e5e70815fc179e97b Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 30 Nov 2020 14:09:15 -0500 Subject: [PATCH 10/16] Fix: handle default switch case with -EINVAL in __lttng_counter_add Signed-off-by: Mathieu Desnoyers Change-Id: Ie7cb76725789a75cfd7cf2bdb3031a81983ad146 --- libcounter/counter-api.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libcounter/counter-api.h b/libcounter/counter-api.h index d18fa16e..540055ae 100644 --- a/libcounter/counter-api.h +++ b/libcounter/counter-api.h @@ -101,6 +101,8 @@ static inline int __lttng_counter_add(const struct lib_counter_config *config, } while (old != res); break; } + default: + return -EINVAL; } if (v > 0 && (v >= UINT8_MAX || n < old)) overflow = true; @@ -140,6 +142,8 @@ static inline int __lttng_counter_add(const struct lib_counter_config *config, } while (old != res); break; } + default: + return -EINVAL; } if (v > 0 && (v >= UINT16_MAX || n < old)) overflow = true; @@ -179,6 +183,8 @@ static inline int __lttng_counter_add(const struct lib_counter_config *config, } while (old != res); break; } + default: + return -EINVAL; } if (v > 0 && (v >= UINT32_MAX || n < old)) overflow = true; @@ -219,6 +225,8 @@ static inline int __lttng_counter_add(const struct lib_counter_config *config, } while (old != res); break; } + default: + return -EINVAL; } if (v > 0 && n < old) overflow = true; -- 2.34.1 From 10544ee8af31afb239e3dfa71cb2fe09d3de3771 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 11 Nov 2020 17:28:06 -0500 Subject: [PATCH 11/16] Remove runtime dependency on liburcu shared objects Remove the runtime dependency on: - liblurcu-bp.so - liblurcu-cds.so - compat futex code. By integrating those into the lttng-ust project. For rculfhash, only the minimum pieces needed by lttng-ust are integrated (no auto-resize, no accounting). lttng-ust still requires liburcu at build time for header dependencies. Signed-off-by: Mathieu Desnoyers Change-Id: Idffb205b27b1bb0f972523c3ce3bdaf25bfe1710 --- configure.ac | 2 - include/Makefile.am | 6 +- include/lttng/tracepoint-rcu.h | 19 +- include/lttng/tracepoint.h | 36 +- include/lttng/urcu/pointer.h | 129 ++ include/lttng/urcu/static/pointer.h | 146 +++ include/lttng/urcu/static/urcu-ust.h | 211 ++++ include/lttng/urcu/urcu-ust.h | 105 ++ include/lttng/ust-events.h | 2 - include/lttng/ust-tracepoint-event.h | 8 +- liblttng-ust/Makefile.am | 16 +- liblttng-ust/compat_futex.c | 154 +++ liblttng-ust/futex.h | 189 +++ liblttng-ust/lttng-bytecode-interpreter.c | 4 +- liblttng-ust/lttng-bytecode-validator.c | 41 +- liblttng-ust/lttng-context.c | 11 +- liblttng-ust/lttng-events.c | 13 +- liblttng-ust/lttng-tracer-core.h | 4 + liblttng-ust/lttng-ust-abi.c | 3 +- liblttng-ust/lttng-ust-comm.c | 20 +- liblttng-ust/lttng-ust-urcu-pointer.c | 54 + liblttng-ust/lttng-ust-urcu.c | 733 ++++++++++++ liblttng-ust/rculfhash-internal.h | 179 +++ liblttng-ust/rculfhash-mm-chunk.c | 97 ++ liblttng-ust/rculfhash-mm-mmap.c | 214 ++++ liblttng-ust/rculfhash-mm-order.c | 90 ++ liblttng-ust/rculfhash.c | 1314 +++++++++++++++++++++ liblttng-ust/rculfhash.h | 467 ++++++++ liblttng-ust/tracepoint-internal.h | 6 +- liblttng-ust/tracepoint.c | 158 ++- libringbuffer/frontend_api.h | 1 - lttng-ust.pc.in | 1 - 32 files changed, 4326 insertions(+), 107 deletions(-) create mode 100644 include/lttng/urcu/pointer.h create mode 100644 include/lttng/urcu/static/pointer.h create mode 100644 include/lttng/urcu/static/urcu-ust.h create mode 100644 include/lttng/urcu/urcu-ust.h create mode 100644 liblttng-ust/compat_futex.c create mode 100644 liblttng-ust/futex.h create mode 100644 liblttng-ust/lttng-ust-urcu-pointer.c create mode 100644 liblttng-ust/lttng-ust-urcu.c create mode 100644 liblttng-ust/rculfhash-internal.h create mode 100644 liblttng-ust/rculfhash-mm-chunk.c create mode 100644 liblttng-ust/rculfhash-mm-mmap.c create mode 100644 liblttng-ust/rculfhash-mm-order.c create mode 100644 liblttng-ust/rculfhash.c create mode 100644 liblttng-ust/rculfhash.h diff --git a/configure.ac b/configure.ac index e5ac3200..f698e973 100644 --- a/configure.ac +++ b/configure.ac @@ -251,8 +251,6 @@ AM_CONDITIONAL([HAVE_DLINFO], [test "x${ac_cv_have_decl_RTLD_DI_LINKMAP}" = "xye # Require URCU >= 0.12 for DEFINE_URCU_TLS_INIT PKG_CHECK_MODULES([URCU], [liburcu >= 0.12]) -PKG_CHECK_MODULES([URCU_BP], [liburcu-bp >= 0.12]) -PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.12]) # numa.h integration AS_IF([test "x$NO_NUMA" = "x1"],[ diff --git a/include/Makefile.am b/include/Makefile.am index 23a165ae..40990ff6 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -27,7 +27,11 @@ nobase_include_HEADERS = \ lttng/ust-getcpu.h \ lttng/ust-elf.h \ lttng/counter-config.h \ - lttng/bitmap.h + lttng/bitmap.h \ + lttng/urcu/pointer.h \ + lttng/urcu/urcu-ust.h \ + lttng/urcu/static/pointer.h \ + lttng/urcu/static/urcu-ust.h # note: usterr-signal-safe.h, core.h and share.h need namespace cleanup. diff --git a/include/lttng/tracepoint-rcu.h b/include/lttng/tracepoint-rcu.h index 95d60493..3378d924 100644 --- a/include/lttng/tracepoint-rcu.h +++ b/include/lttng/tracepoint-rcu.h @@ -24,26 +24,27 @@ */ #include +#include #ifdef _LGPL_SOURCE -#include +#include -#define tp_rcu_read_lock_bp urcu_bp_read_lock -#define tp_rcu_read_unlock_bp urcu_bp_read_unlock -#define tp_rcu_dereference_bp rcu_dereference +#define tp_rcu_read_lock lttng_ust_urcu_read_lock +#define tp_rcu_read_unlock lttng_ust_urcu_read_unlock +#define tp_rcu_dereference lttng_ust_rcu_dereference #define TP_RCU_LINK_TEST() 1 #else /* _LGPL_SOURCE */ -#define tp_rcu_read_lock_bp tracepoint_dlopen_ptr->rcu_read_lock_sym_bp -#define tp_rcu_read_unlock_bp tracepoint_dlopen_ptr->rcu_read_unlock_sym_bp +#define tp_rcu_read_lock tracepoint_dlopen_ptr->rcu_read_lock_sym +#define tp_rcu_read_unlock tracepoint_dlopen_ptr->rcu_read_unlock_sym -#define tp_rcu_dereference_bp(p) \ +#define tp_rcu_dereference(p) \ URCU_FORCE_CAST(__typeof__(p), \ - tracepoint_dlopen_ptr->rcu_dereference_sym_bp(URCU_FORCE_CAST(void *, p))) + tracepoint_dlopen_ptr->rcu_dereference_sym(URCU_FORCE_CAST(void *, p))) -#define TP_RCU_LINK_TEST() (tracepoint_dlopen_ptr && tp_rcu_read_lock_bp) +#define TP_RCU_LINK_TEST() (tracepoint_dlopen_ptr && tp_rcu_read_lock) #endif /* _LGPL_SOURCE */ diff --git a/include/lttng/tracepoint.h b/include/lttng/tracepoint.h index 1cf02188..41ac5a28 100644 --- a/include/lttng/tracepoint.h +++ b/include/lttng/tracepoint.h @@ -180,12 +180,12 @@ void __tracepoint_cb_##_provider##___##_name(_TP_ARGS_PROTO(__VA_ARGS__)); \ static \ void __tracepoint_cb_##_provider##___##_name(_TP_ARGS_PROTO(__VA_ARGS__)) \ { \ - struct lttng_ust_tracepoint_probe *__tp_probe; \ + struct lttng_ust_tracepoint_probe *__tp_probe; \ \ if (caa_unlikely(!TP_RCU_LINK_TEST())) \ return; \ - tp_rcu_read_lock_bp(); \ - __tp_probe = tp_rcu_dereference_bp(__tracepoint_##_provider##___##_name.probes); \ + tp_rcu_read_lock(); \ + __tp_probe = tp_rcu_dereference(__tracepoint_##_provider##___##_name.probes); \ if (caa_unlikely(!__tp_probe)) \ goto end; \ do { \ @@ -196,7 +196,7 @@ void __tracepoint_cb_##_provider##___##_name(_TP_ARGS_PROTO(__VA_ARGS__)) \ (_TP_ARGS_DATA_VAR(__VA_ARGS__)); \ } while ((++__tp_probe)->func); \ end: \ - tp_rcu_read_unlock_bp(); \ + tp_rcu_read_unlock(); \ } \ static inline lttng_ust_notrace \ void __tracepoint_register_##_provider##___##_name(char *name, \ @@ -233,9 +233,9 @@ struct lttng_ust_tracepoint_dlopen { int (*tracepoint_register_lib)(struct lttng_ust_tracepoint * const *tracepoints_start, int tracepoints_count); int (*tracepoint_unregister_lib)(struct lttng_ust_tracepoint * const *tracepoints_start); - void (*rcu_read_lock_sym_bp)(void); - void (*rcu_read_unlock_sym_bp)(void); - void *(*rcu_dereference_sym_bp)(void *p); + void (*rcu_read_lock_sym)(void); + void (*rcu_read_unlock_sym)(void); + void *(*rcu_dereference_sym)(void *p); }; extern struct lttng_ust_tracepoint_dlopen tracepoint_dlopen; @@ -308,21 +308,21 @@ __tracepoint__init_urcu_sym(void) * Symbols below are needed by tracepoint call sites and probe * providers. */ - if (!tracepoint_dlopen_ptr->rcu_read_lock_sym_bp) - tracepoint_dlopen_ptr->rcu_read_lock_sym_bp = + if (!tracepoint_dlopen_ptr->rcu_read_lock_sym) + tracepoint_dlopen_ptr->rcu_read_lock_sym = URCU_FORCE_CAST(void (*)(void), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tp_rcu_read_lock_bp")); - if (!tracepoint_dlopen_ptr->rcu_read_unlock_sym_bp) - tracepoint_dlopen_ptr->rcu_read_unlock_sym_bp = + "tp_rcu_read_lock")); + if (!tracepoint_dlopen_ptr->rcu_read_unlock_sym) + tracepoint_dlopen_ptr->rcu_read_unlock_sym = URCU_FORCE_CAST(void (*)(void), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tp_rcu_read_unlock_bp")); - if (!tracepoint_dlopen_ptr->rcu_dereference_sym_bp) - tracepoint_dlopen_ptr->rcu_dereference_sym_bp = + "tp_rcu_read_unlock")); + if (!tracepoint_dlopen_ptr->rcu_dereference_sym) + tracepoint_dlopen_ptr->rcu_dereference_sym = URCU_FORCE_CAST(void *(*)(void *p), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tp_rcu_dereference_sym_bp")); + "tp_rcu_dereference_sym")); } #else static inline void lttng_ust_notrace @@ -479,11 +479,11 @@ __tracepoints__ptrs_init(void) tracepoint_dlopen_ptr->tracepoint_register_lib = URCU_FORCE_CAST(int (*)(struct lttng_ust_tracepoint * const *, int), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tracepoint_register_lib")); + "tracepoint_register_lib2")); tracepoint_dlopen_ptr->tracepoint_unregister_lib = URCU_FORCE_CAST(int (*)(struct lttng_ust_tracepoint * const *), dlsym(tracepoint_dlopen_ptr->liblttngust_handle, - "tracepoint_unregister_lib")); + "tracepoint_unregister_lib2")); tracepoint_destructors_syms_ptr->old_tracepoint_disable_destructors = URCU_FORCE_CAST(int *, dlsym(tracepoint_dlopen_ptr->liblttngust_handle, diff --git a/include/lttng/urcu/pointer.h b/include/lttng/urcu/pointer.h new file mode 100644 index 00000000..21bc2f4c --- /dev/null +++ b/include/lttng/urcu/pointer.h @@ -0,0 +1,129 @@ +#ifndef _LTTNG_UST_URCU_POINTER_H +#define _LTTNG_UST_URCU_POINTER_H + +/* + * lttng/urcu/pointer.h + * + * Userspace RCU header. Operations on pointers. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_LGPL_SOURCE) || defined(LTTNG_UST_URCU_INLINE_SMALL_FUNCTIONS) + +#include + +/* + * lttng_ust_rcu_dereference(ptr) + * + * Fetch a RCU-protected pointer. Typically used to copy the variable ptr to a + * local variable. + */ +#define lttng_ust_rcu_dereference _lttng_ust_rcu_dereference + +/* + * type *lttng_ust_rcu_cmpxchg_pointer(type **ptr, type *new, type *old) + * type *lttng_ust_rcu_xchg_pointer(type **ptr, type *new) + * void lttng_ust_rcu_set_pointer(type **ptr, type *new) + * + * RCU pointer updates. + * @ptr: address of the pointer to modify + * @new: new pointer value + * @old: old pointer value (expected) + * + * return: old pointer value + */ +#define lttng_ust_rcu_cmpxchg_pointer _lttng_ust_rcu_cmpxchg_pointer +#define lttng_ust_rcu_xchg_pointer _lttng_ust_rcu_xchg_pointer +#define lttng_ust_rcu_set_pointer _lttng_ust_rcu_set_pointer + +#else /* !(defined(_LGPL_SOURCE) || defined(LTTNG_UST_URCU_INLINE_SMALL_FUNCTIONS)) */ + +extern void *lttng_ust_rcu_dereference_sym(void *p); +#define lttng_ust_rcu_dereference(p) \ + __extension__ \ + ({ \ + __typeof__(p) _________p1 = URCU_FORCE_CAST(__typeof__(p), \ + lttng_ust_rcu_dereference_sym(URCU_FORCE_CAST(void *, p))); \ + (_________p1); \ + }) + +extern void *lttng_ust_rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new); +#define lttng_ust_rcu_cmpxchg_pointer(p, old, _new) \ + __extension__ \ + ({ \ + __typeof__(*(p)) _________pold = (old); \ + __typeof__(*(p)) _________pnew = (_new); \ + __typeof__(*(p)) _________p1 = URCU_FORCE_CAST(__typeof__(*(p)), \ + lttng_ust_rcu_cmpxchg_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pold, \ + _________pnew)); \ + (_________p1); \ + }) + +extern void *lttng_ust_rcu_xchg_pointer_sym(void **p, void *v); +#define lttng_ust_rcu_xchg_pointer(p, v) \ + __extension__ \ + ({ \ + __typeof__(*(p)) _________pv = (v); \ + __typeof__(*(p)) _________p1 = URCU_FORCE_CAST(__typeof__(*(p)), \ + lttng_ust_rcu_xchg_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pv)); \ + (_________p1); \ + }) + +/* + * Note: lttng_ust_rcu_set_pointer_sym returns @v because we don't want to break + * the ABI. At the API level, lttng_ust_rcu_set_pointer() now returns void. Use of + * the return value is therefore deprecated, and will cause a build + * error. + */ +extern void *lttng_ust_rcu_set_pointer_sym(void **p, void *v); +#define lttng_ust_rcu_set_pointer(p, v) \ + do { \ + __typeof__(*(p)) _________pv = (v); \ + (void) lttng_ust_rcu_set_pointer_sym(URCU_FORCE_CAST(void **, p), \ + _________pv); \ + } while (0) + +#endif /* !(defined(_LGPL_SOURCE) || defined(LTTNG_UST_URCU_INLINE_SMALL_FUNCTIONS)) */ + +/* + * void lttng_ust_rcu_assign_pointer(type *ptr, type *new) + * + * Same as lttng_ust_rcu_set_pointer, but takes the pointer to assign to rather than its + * address as first parameter. Provided for compatibility with the Linux kernel + * RCU semantic. + */ +#define lttng_ust_rcu_assign_pointer(p, v) lttng_ust_rcu_set_pointer((&p), (v)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_URCU_POINTER_H */ diff --git a/include/lttng/urcu/static/pointer.h b/include/lttng/urcu/static/pointer.h new file mode 100644 index 00000000..dcb85785 --- /dev/null +++ b/include/lttng/urcu/static/pointer.h @@ -0,0 +1,146 @@ +#ifndef _LTTNG_URCU_POINTER_STATIC_H +#define _LTTNG_URCU_POINTER_STATIC_H + +/* + * lttng/urcu/static/pointer.h + * + * Userspace RCU header. Operations on pointers. + * + * TO BE INCLUDED ONLY IN CODE THAT IS TO BE RECOMPILED ON EACH LIBURCU + * RELEASE. See urcu.h for linking dynamically with the userspace rcu library. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * _rcu_dereference - reads (copy) a RCU-protected pointer to a local variable + * into a RCU read-side critical section. The pointer can later be safely + * dereferenced within the critical section. + * + * This ensures that the pointer copy is invariant thorough the whole critical + * section. + * + * Inserts memory barriers on architectures that require them (currently only + * Alpha) and documents which pointers are protected by RCU. + * + * The compiler memory barrier in CMM_LOAD_SHARED() ensures that value-speculative + * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the + * data read before the pointer read by speculating the value of the pointer. + * Correct ordering is ensured because the pointer is read as a volatile access. + * This acts as a global side-effect operation, which forbids reordering of + * dependent memory operations. Note that such concern about dependency-breaking + * optimizations will eventually be taken care of by the "memory_order_consume" + * addition to forthcoming C++ standard. + * + * Should match rcu_assign_pointer() or rcu_xchg_pointer(). + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_dereference(p) \ + __extension__ \ + ({ \ + __typeof__(p) _________p1 = CMM_LOAD_SHARED(p); \ + cmm_smp_read_barrier_depends(); \ + (_________p1); \ + }) + +/** + * _rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer + * is as expected by "old". If succeeds, returns the previous pointer to the + * data structure, which can be safely freed after waiting for a quiescent state + * using synchronize_rcu(). If fails (unexpected value), returns old (which + * should not be freed !). + * + * uatomic_cmpxchg() acts as both release and acquire barriers. + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_cmpxchg_pointer(p, old, _new) \ + __extension__ \ + ({ \ + __typeof__(*p) _________pold = (old); \ + __typeof__(*p) _________pnew = (_new); \ + uatomic_cmpxchg(p, _________pold, _________pnew); \ + }) + +/** + * _rcu_xchg_pointer - same as rcu_assign_pointer, but returns the previous + * pointer to the data structure, which can be safely freed after waiting for a + * quiescent state using synchronize_rcu(). + * + * uatomic_xchg() acts as both release and acquire barriers. + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_xchg_pointer(p, v) \ + __extension__ \ + ({ \ + __typeof__(*p) _________pv = (v); \ + uatomic_xchg(p, _________pv); \ + }) + + +#define _lttng_ust_rcu_set_pointer(p, v) \ + do { \ + __typeof__(*p) _________pv = (v); \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + cmm_wmb(); \ + uatomic_set(p, _________pv); \ + } while (0) + +/** + * _rcu_assign_pointer - assign (publicize) a pointer to a new data structure + * meant to be read by RCU read-side critical sections. Returns the assigned + * value. + * + * Documents which pointers will be dereferenced by RCU read-side critical + * sections and adds the required memory barriers on architectures requiring + * them. It also makes sure the compiler does not reorder code initializing the + * data structure before its publication. + * + * Should match rcu_dereference(). + * + * This macro is less than 10 lines long. The intent is that this macro + * meets the 10-line criterion in LGPL, allowing this function to be + * expanded directly in non-LGPL code. + */ +#define _lttng_ust_rcu_assign_pointer(p, v) _lttng_ust_rcu_set_pointer(&(p), v) + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_URCU_POINTER_STATIC_H */ diff --git a/include/lttng/urcu/static/urcu-ust.h b/include/lttng/urcu/static/urcu-ust.h new file mode 100644 index 00000000..cbe27c09 --- /dev/null +++ b/include/lttng/urcu/static/urcu-ust.h @@ -0,0 +1,211 @@ +#ifndef _LTTNG_UST_URCU_STATIC_H +#define _LTTNG_UST_URCU_STATIC_H + +/* + * urcu-ust-static.h + * + * Userspace RCU header. + * + * TO BE INCLUDED ONLY IN CODE THAT IS TO BE RECOMPILED ON EACH LIBURCU + * RELEASE. See urcu.h for linking dynamically with the userspace rcu library. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This code section can only be included in LGPL 2.1 compatible source code. + * See below for the function call wrappers which can be used in code meant to + * be only linked with the Userspace RCU library. This comes with a small + * performance degradation on the read-side due to the added function calls. + * This is required to permit relinking with newer versions of the library. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +enum lttng_ust_urcu_state { + LTTNG_UST_URCU_READER_ACTIVE_CURRENT, + LTTNG_UST_URCU_READER_ACTIVE_OLD, + LTTNG_UST_URCU_READER_INACTIVE, +}; + +/* + * The trick here is that LTTNG_UST_URCU_GP_CTR_PHASE must be a multiple of 8 so we can use a + * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits. + */ +#define LTTNG_UST_URCU_GP_COUNT (1UL << 0) +/* Use the amount of bits equal to half of the architecture long size */ +#define LTTNG_UST_URCU_GP_CTR_PHASE (1UL << (sizeof(long) << 2)) +#define LTTNG_UST_URCU_GP_CTR_NEST_MASK (LTTNG_UST_URCU_GP_CTR_PHASE - 1) + +/* + * Used internally by _lttng_ust_urcu_read_lock. + */ +extern void lttng_ust_urcu_register(void); + +struct lttng_ust_urcu_gp { + /* + * Global grace period counter. + * Contains the current LTTNG_UST_URCU_GP_CTR_PHASE. + * Also has a LTTNG_UST_URCU_GP_COUNT of 1, to accelerate the reader fast path. + * Written to only by writer with mutex taken. + * Read by both writer and readers. + */ + unsigned long ctr; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +extern struct lttng_ust_urcu_gp lttng_ust_urcu_gp; + +struct lttng_ust_urcu_reader { + /* Data used by both reader and lttng_ust_urcu_synchronize_rcu() */ + unsigned long ctr; + /* Data used for registry */ + struct cds_list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + pthread_t tid; + int alloc; /* registry entry allocated */ +}; + +/* + * Bulletproof version keeps a pointer to a registry not part of the TLS. + * Adds a pointer dereference on the read-side, but won't require to unregister + * the reader thread. + */ +extern DECLARE_URCU_TLS(struct lttng_ust_urcu_reader *, lttng_ust_urcu_reader); + +#ifdef CONFIG_RCU_FORCE_SYS_MEMBARRIER +#define lttng_ust_urcu_has_sys_membarrier 1 +#else +extern int lttng_ust_urcu_has_sys_membarrier; +#endif + +static inline void lttng_ust_urcu_smp_mb_slave(void) +{ + if (caa_likely(lttng_ust_urcu_has_sys_membarrier)) + cmm_barrier(); + else + cmm_smp_mb(); +} + +static inline enum lttng_ust_urcu_state lttng_ust_urcu_reader_state(unsigned long *ctr) +{ + unsigned long v; + + if (ctr == NULL) + return LTTNG_UST_URCU_READER_INACTIVE; + /* + * Make sure both tests below are done on the same version of *value + * to insure consistency. + */ + v = CMM_LOAD_SHARED(*ctr); + if (!(v & LTTNG_UST_URCU_GP_CTR_NEST_MASK)) + return LTTNG_UST_URCU_READER_INACTIVE; + if (!((v ^ lttng_ust_urcu_gp.ctr) & LTTNG_UST_URCU_GP_CTR_PHASE)) + return LTTNG_UST_URCU_READER_ACTIVE_CURRENT; + return LTTNG_UST_URCU_READER_ACTIVE_OLD; +} + +/* + * Helper for _lttng_ust_urcu_read_lock(). The format of lttng_ust_urcu_gp.ctr (as well as + * the per-thread rcu_reader.ctr) has the upper bits containing a count of + * _lttng_ust_urcu_read_lock() nesting, and a lower-order bit that contains either zero + * or LTTNG_UST_URCU_GP_CTR_PHASE. The smp_mb_slave() ensures that the accesses in + * _lttng_ust_urcu_read_lock() happen before the subsequent read-side critical section. + */ +static inline void _lttng_ust_urcu_read_lock_update(unsigned long tmp) +{ + if (caa_likely(!(tmp & LTTNG_UST_URCU_GP_CTR_NEST_MASK))) { + _CMM_STORE_SHARED(URCU_TLS(lttng_ust_urcu_reader)->ctr, _CMM_LOAD_SHARED(lttng_ust_urcu_gp.ctr)); + lttng_ust_urcu_smp_mb_slave(); + } else + _CMM_STORE_SHARED(URCU_TLS(lttng_ust_urcu_reader)->ctr, tmp + LTTNG_UST_URCU_GP_COUNT); +} + +/* + * Enter an RCU read-side critical section. + * + * The first cmm_barrier() call ensures that the compiler does not reorder + * the body of _lttng_ust_urcu_read_lock() with a mutex. + * + * This function and its helper are both less than 10 lines long. The + * intent is that this function meets the 10-line criterion in LGPL, + * allowing this function to be invoked directly from non-LGPL code. + */ +static inline void _lttng_ust_urcu_read_lock(void) +{ + unsigned long tmp; + + if (caa_unlikely(!URCU_TLS(lttng_ust_urcu_reader))) + lttng_ust_urcu_register(); /* If not yet registered. */ + cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */ + tmp = URCU_TLS(lttng_ust_urcu_reader)->ctr; + urcu_assert((tmp & LTTNG_UST_URCU_GP_CTR_NEST_MASK) != LTTNG_UST_URCU_GP_CTR_NEST_MASK); + _lttng_ust_urcu_read_lock_update(tmp); +} + +/* + * Exit an RCU read-side critical section. This function is less than + * 10 lines of code, and is intended to be usable by non-LGPL code, as + * called out in LGPL. + */ +static inline void _lttng_ust_urcu_read_unlock(void) +{ + unsigned long tmp; + + tmp = URCU_TLS(lttng_ust_urcu_reader)->ctr; + urcu_assert(tmp & LTTNG_UST_URCU_GP_CTR_NEST_MASK); + /* Finish using rcu before decrementing the pointer. */ + lttng_ust_urcu_smp_mb_slave(); + _CMM_STORE_SHARED(URCU_TLS(lttng_ust_urcu_reader)->ctr, tmp - LTTNG_UST_URCU_GP_COUNT); + cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */ +} + +/* + * Returns whether within a RCU read-side critical section. + * + * This function is less than 10 lines long. The intent is that this + * function meets the 10-line criterion for LGPL, allowing this function + * to be invoked directly from non-LGPL code. + */ +static inline int _lttng_ust_urcu_read_ongoing(void) +{ + if (caa_unlikely(!URCU_TLS(lttng_ust_urcu_reader))) + lttng_ust_urcu_register(); /* If not yet registered. */ + return URCU_TLS(lttng_ust_urcu_reader)->ctr & LTTNG_UST_URCU_GP_CTR_NEST_MASK; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_URCU_STATIC_H */ diff --git a/include/lttng/urcu/urcu-ust.h b/include/lttng/urcu/urcu-ust.h new file mode 100644 index 00000000..421e0cca --- /dev/null +++ b/include/lttng/urcu/urcu-ust.h @@ -0,0 +1,105 @@ +#ifndef _LTTNG_UST_URCU_H +#define _LTTNG_UST_URCU_H + +/* + * urcu-ust.h + * + * Userspace RCU header for LTTng-UST. Derived from liburcu + * "bulletproof" flavor. + * + * Slower RCU read-side adapted for tracing library. Does not require thread + * registration nor unregistration. Also signal-safe. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * LGPL-compatible code should include this header with : + * + * #define _LGPL_SOURCE + * #include + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * See lttng/urcu/pointer.h and lttng/urcu/static/pointer.h for pointer + * publication headers. + */ +#include + +#ifdef _LGPL_SOURCE + +#include + +/* + * Mappings for static use of the userspace RCU library. + * Should only be used in LGPL-compatible code. + */ + +/* + * lttng_ust_urcu_read_lock() + * lttng_ust_urcu_read_unlock() + * + * Mark the beginning and end of a read-side critical section. + */ +#define lttng_ust_urcu_read_lock _lttng_ust_urcu_read_lock +#define lttng_ust_urcu_read_unlock _lttng_ust_urcu_read_unlock +#define lttng_ust_urcu_read_ongoing _lttng_ust_urcu_read_ongoing + +#else /* !_LGPL_SOURCE */ + +/* + * library wrappers to be used by non-LGPL compatible source code. + * See LGPL-only urcu/static/pointer.h for documentation. + */ + +extern void lttng_ust_urcu_read_lock(void); +extern void lttng_ust_urcu_read_unlock(void); +extern int lttng_ust_urcu_read_ongoing(void); + +#endif /* !_LGPL_SOURCE */ + +extern void lttng_ust_urcu_synchronize_rcu(void); + +/* + * lttng_ust_urcu_before_fork, lttng_ust_urcu_after_fork_parent and + * lttng_ust_urcu_after_fork_child should be called around fork() system + * calls when the child process is not expected to immediately perform + * an exec(). For pthread users, see pthread_atfork(3). + */ +extern void lttng_ust_urcu_before_fork(void); +extern void lttng_ust_urcu_after_fork_parent(void); +extern void lttng_ust_urcu_after_fork_child(void); + +/* + * In the UST version, thread registration is performed lazily, but it can be + * forced by issuing an explicit lttng_ust_urcu_register_thread(). + */ +extern void lttng_ust_urcu_register_thread(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_URCU_H */ diff --git a/include/lttng/ust-events.h b/include/lttng/ust-events.h index 19726f88..7e70ea40 100644 --- a/include/lttng/ust-events.h +++ b/include/lttng/ust-events.h @@ -781,8 +781,6 @@ struct lttng_counter *lttng_ust_counter_create( const char *counter_transport_name, size_t number_dimensions, const struct lttng_counter_dimension *dimensions); -void synchronize_trace(void); - int lttng_probe_register(struct lttng_probe_desc *desc); void lttng_probe_unregister(struct lttng_probe_desc *desc); void lttng_probe_provider_unregister_events(struct lttng_probe_desc *desc); diff --git a/include/lttng/ust-tracepoint-event.h b/include/lttng/ust-tracepoint-event.h index 0a397e50..11025204 100644 --- a/include/lttng/ust-tracepoint-event.h +++ b/include/lttng/ust-tracepoint-event.h @@ -36,9 +36,9 @@ #undef tp_list_for_each_entry_rcu #define tp_list_for_each_entry_rcu(pos, head, member) \ - for (pos = cds_list_entry(tp_rcu_dereference_bp((head)->next), __typeof__(*pos), member); \ + for (pos = cds_list_entry(tp_rcu_dereference((head)->next), __typeof__(*pos), member); \ &pos->member != (head); \ - pos = cds_list_entry(tp_rcu_dereference_bp(pos->member.next), __typeof__(*pos), member)) + pos = cds_list_entry(tp_rcu_dereference(pos->member.next), __typeof__(*pos), member)) /* * TRACEPOINT_EVENT_CLASS declares a class of tracepoints receiving the @@ -892,8 +892,8 @@ void __event_probe__##_provider##___##_name(_TP_ARGS_DATA_PROTO(_args)) \ __event_align = __event_get_align__##_provider##___##_name(_TP_ARGS_VAR(_args)); \ memset(&__lttng_ctx, 0, sizeof(__lttng_ctx)); \ __lttng_ctx.event = __event; \ - __lttng_ctx.chan_ctx = tp_rcu_dereference_bp(__chan->ctx); \ - __lttng_ctx.event_ctx = tp_rcu_dereference_bp(__event->ctx); \ + __lttng_ctx.chan_ctx = tp_rcu_dereference(__chan->ctx); \ + __lttng_ctx.event_ctx = tp_rcu_dereference(__event->ctx); \ lib_ring_buffer_ctx_init(&__ctx, __chan->chan, __event, __event_len, \ __event_align, -1, __chan->handle, &__lttng_ctx); \ __ctx.ip = _TP_IP_PARAM(TP_IP_PARAM); \ diff --git a/liblttng-ust/Makefile.am b/liblttng-ust/Makefile.am index fab09251..2b25cfc5 100644 --- a/liblttng-ust/Makefile.am +++ b/liblttng-ust/Makefile.am @@ -10,10 +10,11 @@ liblttng_ust_tracepoint_la_SOURCES = \ tracepoint-internal.h \ lttng-tracer-core.h \ jhash.h \ - error.h + error.h \ + lttng-ust-urcu.c \ + lttng-ust-urcu-pointer.c liblttng_ust_tracepoint_la_LIBADD = \ - $(URCU_BP_LIBS) \ $(top_builddir)/snprintf/libustsnprintf.la \ $(DL_LIBS) @@ -79,7 +80,15 @@ liblttng_ust_runtime_la_SOURCES = \ string-utils.h \ event-notifier-notification.c \ ns.h \ - creds.h + creds.h \ + rculfhash.c \ + rculfhash.h \ + rculfhash-internal.h \ + rculfhash-mm-chunk.c \ + rculfhash-mm-mmap.c \ + rculfhash-mm-order.c \ + compat_futex.c \ + futex.h if HAVE_PERF_EVENT liblttng_ust_runtime_la_SOURCES += \ @@ -116,7 +125,6 @@ liblttng_ust_support_la_LIBADD = \ liblttng_ust_la_LIBADD = \ -lrt \ - $(URCU_CDS_LIBS) \ $(top_builddir)/snprintf/libustsnprintf.la \ $(top_builddir)/liblttng-ust-comm/liblttng-ust-comm.la \ liblttng-ust-tracepoint.la \ diff --git a/liblttng-ust/compat_futex.c b/liblttng-ust/compat_futex.c new file mode 100644 index 00000000..9517eda2 --- /dev/null +++ b/liblttng-ust/compat_futex.c @@ -0,0 +1,154 @@ +/* + * compat_futex.c + * + * Userspace RCU library - sys_futex compatibility code + * + * Copyright (c) 2009 Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "futex.h" + +/* + * Using attribute "weak" for __lttng_ust_compat_futex_lock and + * __lttng_ust_compat_futex_cond. Those are globally visible by the entire + * program, even though many shared objects may have their own version. + * The first version that gets loaded will be used by the entire program + * (executable and all shared objects). + */ + +__attribute__((weak)) +pthread_mutex_t __lttng_ust_compat_futex_lock = PTHREAD_MUTEX_INITIALIZER; +__attribute__((weak)) +pthread_cond_t __lttng_ust_compat_futex_cond = PTHREAD_COND_INITIALIZER; + +/* + * _NOT SIGNAL-SAFE_. pthread_cond is not signal-safe anyway. Though. + * For now, timeout, uaddr2 and val3 are unused. + * Waiter will relinquish the CPU until woken up. + */ + +int lttng_ust_compat_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret = 0, lockret; + + /* + * Check if NULL. Don't let users expect that they are taken into + * account. + */ + assert(!timeout); + assert(!uaddr2); + assert(!val3); + + /* + * memory barriers to serialize with the previous uaddr modification. + */ + cmm_smp_mb(); + + lockret = pthread_mutex_lock(&__lttng_ust_compat_futex_lock); + if (lockret) { + errno = lockret; + ret = -1; + goto end; + } + switch (op) { + case FUTEX_WAIT: + /* + * Wait until *uaddr is changed to something else than "val". + * Comparing *uaddr content against val figures out which + * thread has been awakened. + */ + while (CMM_LOAD_SHARED(*uaddr) == val) + pthread_cond_wait(&__lttng_ust_compat_futex_cond, + &__lttng_ust_compat_futex_lock); + break; + case FUTEX_WAKE: + /* + * Each wake is sending a broadcast, thus attempting wakeup of + * all awaiting threads, independently of their respective + * uaddr. + */ + pthread_cond_broadcast(&__lttng_ust_compat_futex_cond); + break; + default: + errno = EINVAL; + ret = -1; + } + lockret = pthread_mutex_unlock(&__lttng_ust_compat_futex_lock); + if (lockret) { + errno = lockret; + ret = -1; + } +end: + return ret; +} + +/* + * _ASYNC SIGNAL-SAFE_. + * For now, timeout, uaddr2 and val3 are unused. + * Waiter will busy-loop trying to read the condition. + * It is OK to use compat_futex_async() on a futex address on which + * futex() WAKE operations are also performed. + */ + +int lttng_ust_compat_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret = 0; + + /* + * Check if NULL. Don't let users expect that they are taken into + * account. + */ + assert(!timeout); + assert(!uaddr2); + assert(!val3); + + /* + * Ensure previous memory operations on uaddr have completed. + */ + cmm_smp_mb(); + + switch (op) { + case FUTEX_WAIT: + while (CMM_LOAD_SHARED(*uaddr) == val) { + if (poll(NULL, 0, 10) < 0) { + ret = -1; + /* Keep poll errno. Caller handles EINTR. */ + goto end; + } + } + break; + case FUTEX_WAKE: + break; + default: + errno = EINVAL; + ret = -1; + } +end: + return ret; +} diff --git a/liblttng-ust/futex.h b/liblttng-ust/futex.h new file mode 100644 index 00000000..902ceecf --- /dev/null +++ b/liblttng-ust/futex.h @@ -0,0 +1,189 @@ +#ifndef _LTTNG_UST_FUTEX_H +#define _LTTNG_UST_FUTEX_H + +/* + * liblttng-ust/futex.h + * + * Userspace RCU - sys_futex/compat_futex header. + * + * Copyright 2011-2012 - Mathieu Desnoyers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 + +/* + * sys_futex compatibility header. + * Use *only* *either of* futex_noasync OR futex_async on a given address. + * + * futex_noasync cannot be executed in signal handlers, but ensures that + * it will be put in a wait queue even in compatibility mode. + * + * futex_async is signal-handler safe for the wakeup. It uses polling + * on the wait-side in compatibility mode. + * + * BEWARE: sys_futex() FUTEX_WAIT may return early if interrupted + * (returns EINTR). + */ + +extern int lttng_ust_compat_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3); +extern int lttng_ust_compat_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3); + +#if (defined(__linux__) && defined(__NR_futex)) + +#include +#include +#include +#include + +static inline int lttng_ust_futex(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return syscall(__NR_futex, uaddr, op, val, timeout, + uaddr2, val3); +} + +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret; + + ret = lttng_ust_futex(uaddr, op, val, timeout, uaddr2, val3); + if (caa_unlikely(ret < 0 && errno == ENOSYS)) { + /* + * The fallback on ENOSYS is the async-safe version of + * the compat futex implementation, because the + * async-safe compat implementation allows being used + * concurrently with calls to futex(). Indeed, sys_futex + * FUTEX_WAIT, on some architectures (mips and parisc), + * within a given process, spuriously return ENOSYS due + * to signal restart bugs on some kernel versions. + */ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, + uaddr2, val3); + } + return ret; + +} + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int ret; + + ret = lttng_ust_futex(uaddr, op, val, timeout, uaddr2, val3); + if (caa_unlikely(ret < 0 && errno == ENOSYS)) { + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, + uaddr2, val3); + } + return ret; +} + +#elif defined(__FreeBSD__) + +#include +#include + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + int umtx_op; + void *umtx_uaddr = NULL, *umtx_uaddr2 = NULL; + struct _umtx_time umtx_timeout = { + ._flags = UMTX_ABSTIME, + ._clockid = CLOCK_MONOTONIC, + }; + + switch (op) { + case FUTEX_WAIT: + /* On FreeBSD, a "u_int" is a 32-bit integer. */ + umtx_op = UMTX_OP_WAIT_UINT; + if (timeout != NULL) { + umtx_timeout._timeout = *timeout; + umtx_uaddr = (void *) sizeof(umtx_timeout); + umtx_uaddr2 = (void *) &umtx_timeout; + } + break; + case FUTEX_WAKE: + umtx_op = UMTX_OP_WAKE; + break; + default: + errno = EINVAL; + return -1; + } + + return _umtx_op(uaddr, umtx_op, (uint32_t) val, umtx_uaddr, + umtx_uaddr2); +} + +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +#elif defined(__CYGWIN__) + +/* + * The futex_noasync compat code uses a weak symbol to share state across + * different shared object which is not possible on Windows with the + * Portable Executable format. Use the async compat code for both cases. + */ +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +#else + +static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_noasync(uaddr, op, val, timeout, uaddr2, val3); +} + +static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) +{ + return lttng_ust_compat_futex_async(uaddr, op, val, timeout, uaddr2, val3); +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_FUTEX_H */ diff --git a/liblttng-ust/lttng-bytecode-interpreter.c b/liblttng-ust/lttng-bytecode-interpreter.c index bba3cac8..637fdc1a 100644 --- a/liblttng-ust/lttng-bytecode-interpreter.c +++ b/liblttng-ust/lttng-bytecode-interpreter.c @@ -27,9 +27,9 @@ #define _LGPL_SOURCE #include #include -#include #include +#include #include #include @@ -771,7 +771,7 @@ uint64_t bytecode_interpret(void *interpreter_data, struct lttng_interpreter_output *output) { struct bytecode_runtime *bytecode = interpreter_data; - struct lttng_ctx *ctx = rcu_dereference(*bytecode->p.pctx); + struct lttng_ctx *ctx = lttng_ust_rcu_dereference(*bytecode->p.pctx); void *pc, *next_pc, *start_pc; int ret = -EINVAL; uint64_t retval = 0; diff --git a/liblttng-ust/lttng-bytecode-validator.c b/liblttng-ust/lttng-bytecode-validator.c index f60c9367..198d2fab 100644 --- a/liblttng-ust/lttng-bytecode-validator.c +++ b/liblttng-ust/lttng-bytecode-validator.c @@ -29,8 +29,7 @@ #include #include -#include -#include +#include "rculfhash.h" #include "lttng-bytecode.h" #include "lttng-hash-helper.h" @@ -49,7 +48,7 @@ /* merge point table node */ struct lfht_mp_node { - struct cds_lfht_node node; + struct lttng_ust_lfht_node node; /* Context at merge point */ struct vstack stack; @@ -60,7 +59,7 @@ static unsigned long lttng_hash_seed; static unsigned int lttng_hash_seed_ready; static -int lttng_hash_match(struct cds_lfht_node *node, const void *key) +int lttng_hash_match(struct lttng_ust_lfht_node *node, const void *key) { struct lfht_mp_node *mp_node = caa_container_of(node, struct lfht_mp_node, node); @@ -92,14 +91,14 @@ int merge_points_compare(const struct vstack *stacka, } static -int merge_point_add_check(struct cds_lfht *ht, unsigned long target_pc, +int merge_point_add_check(struct lttng_ust_lfht *ht, unsigned long target_pc, const struct vstack *stack) { struct lfht_mp_node *node; unsigned long hash = lttng_hash_mix((const char *) target_pc, sizeof(target_pc), lttng_hash_seed); - struct cds_lfht_node *ret; + struct lttng_ust_lfht_node *ret; dbg_printf("Bytecode: adding merge point at offset %lu, hash %lu\n", target_pc, hash); @@ -108,7 +107,7 @@ int merge_point_add_check(struct cds_lfht *ht, unsigned long target_pc, return -ENOMEM; node->target_pc = target_pc; memcpy(&node->stack, stack, sizeof(node->stack)); - ret = cds_lfht_add_unique(ht, hash, lttng_hash_match, + ret = lttng_ust_lfht_add_unique(ht, hash, lttng_hash_match, (const char *) target_pc, &node->node); if (ret != &node->node) { struct lfht_mp_node *ret_mp = @@ -547,16 +546,16 @@ int bytecode_validate_overflow(struct bytecode_runtime *bytecode, } static -unsigned long delete_all_nodes(struct cds_lfht *ht) +unsigned long delete_all_nodes(struct lttng_ust_lfht *ht) { - struct cds_lfht_iter iter; + struct lttng_ust_lfht_iter iter; struct lfht_mp_node *node; unsigned long nr_nodes = 0; - cds_lfht_for_each_entry(ht, &iter, node, node) { + lttng_ust_lfht_for_each_entry(ht, &iter, node, node) { int ret; - ret = cds_lfht_del(ht, cds_lfht_iter_get_node(&iter)); + ret = lttng_ust_lfht_del(ht, lttng_ust_lfht_iter_get_node(&iter)); assert(!ret); /* note: this hash table is never used concurrently */ free(node); @@ -1223,15 +1222,15 @@ end: */ static int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, - struct cds_lfht *merge_points, + struct lttng_ust_lfht *merge_points, struct vstack *stack, char *start_pc, char *pc) { int ret; unsigned long target_pc = pc - start_pc; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; + struct lttng_ust_lfht_iter iter; + struct lttng_ust_lfht_node *node; struct lfht_mp_node *mp_node; unsigned long hash; @@ -1243,9 +1242,9 @@ int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, /* Validate merge points */ hash = lttng_hash_mix((const char *) target_pc, sizeof(target_pc), lttng_hash_seed); - cds_lfht_lookup(merge_points, hash, lttng_hash_match, + lttng_ust_lfht_lookup(merge_points, hash, lttng_hash_match, (const char *) target_pc, &iter); - node = cds_lfht_iter_get_node(&iter); + node = lttng_ust_lfht_iter_get_node(&iter); if (node) { mp_node = caa_container_of(node, struct lfht_mp_node, node); @@ -1259,7 +1258,7 @@ int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, /* Once validated, we can remove the merge point */ dbg_printf("Bytecode: remove merge point at offset %lu\n", target_pc); - ret = cds_lfht_del(merge_points, node); + ret = lttng_ust_lfht_del(merge_points, node); assert(!ret); } return 0; @@ -1273,7 +1272,7 @@ int validate_instruction_all_contexts(struct bytecode_runtime *bytecode, */ static int exec_insn(struct bytecode_runtime *bytecode, - struct cds_lfht *merge_points, + struct lttng_ust_lfht *merge_points, struct vstack *stack, char **_next_pc, char *pc) @@ -1960,7 +1959,7 @@ end: */ int lttng_bytecode_validate(struct bytecode_runtime *bytecode) { - struct cds_lfht *merge_points; + struct lttng_ust_lfht *merge_points; char *pc, *next_pc, *start_pc; int ret = -EINVAL; struct vstack stack; @@ -1977,7 +1976,7 @@ int lttng_bytecode_validate(struct bytecode_runtime *bytecode) * holding RCU read-side lock and free nodes without using * call_rcu. */ - merge_points = cds_lfht_new(DEFAULT_NR_MERGE_POINTS, + merge_points = lttng_ust_lfht_new(DEFAULT_NR_MERGE_POINTS, MIN_NR_BUCKETS, MAX_NR_BUCKETS, 0, NULL); if (!merge_points) { @@ -2017,7 +2016,7 @@ end: ret = -EINVAL; } } - if (cds_lfht_destroy(merge_points, NULL)) { + if (lttng_ust_lfht_destroy(merge_points)) { ERR("Error destroying hash table\n"); } return ret; diff --git a/liblttng-ust/lttng-context.c b/liblttng-ust/lttng-context.c index c457d3e1..fde3607d 100644 --- a/liblttng-ust/lttng-context.c +++ b/liblttng-ust/lttng-context.c @@ -24,12 +24,13 @@ #include #include #include -#include +#include #include #include #include #include #include +#include "tracepoint-internal.h" #include "context-internal.h" @@ -157,8 +158,8 @@ int lttng_context_add_rcu(struct lttng_ctx **ctx_p, } *nf = *f; lttng_context_update(new_ctx); - rcu_assign_pointer(*ctx_p, new_ctx); - synchronize_trace(); + lttng_ust_rcu_assign_pointer(*ctx_p, new_ctx); + lttng_ust_synchronize_trace(); if (old_ctx) { free(old_ctx->fields); free(old_ctx); @@ -391,8 +392,8 @@ int lttng_ust_context_set_provider_rcu(struct lttng_ctx **_ctx, new_fields[i].get_value = get_value; } new_ctx->fields = new_fields; - rcu_assign_pointer(*_ctx, new_ctx); - synchronize_trace(); + lttng_ust_rcu_assign_pointer(*_ctx, new_ctx); + lttng_ust_synchronize_trace(); free(ctx->fields); free(ctx); return 0; diff --git a/liblttng-ust/lttng-events.c b/liblttng-ust/lttng-events.c index b455c363..4fa675dd 100644 --- a/liblttng-ust/lttng-events.c +++ b/liblttng-ust/lttng-events.c @@ -34,9 +34,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -143,11 +143,6 @@ int lttng_loglevel_match(int loglevel, } } -void synchronize_trace(void) -{ - synchronize_rcu(); -} - struct lttng_session *lttng_session_create(void) { struct lttng_session *session; @@ -350,7 +345,7 @@ void lttng_session_destroy(struct lttng_session *session) cds_list_for_each_entry(event, &session->events_head, node) { _lttng_event_unregister(event); } - synchronize_trace(); /* Wait for in-flight events to complete */ + lttng_ust_synchronize_trace(); /* Wait for in-flight events to complete */ __tracepoint_probe_prune_release_queue(); cds_list_for_each_entry_safe(event_enabler, event_tmpenabler, &session->enablers_head, node) @@ -383,7 +378,7 @@ void lttng_event_notifier_group_destroy( &event_notifier_group->event_notifiers_head, node) _lttng_event_notifier_unregister(notifier); - synchronize_trace(); + lttng_ust_synchronize_trace(); cds_list_for_each_entry_safe(notifier_enabler, tmpnotifier_enabler, &event_notifier_group->enablers_head, node) @@ -1219,7 +1214,7 @@ void lttng_probe_provider_unregister_events( _lttng_event_notifier_unregister); /* Wait for grace period. */ - synchronize_trace(); + lttng_ust_synchronize_trace(); /* Prune the unregistration queue. */ __tracepoint_probe_prune_release_queue(); diff --git a/liblttng-ust/lttng-tracer-core.h b/liblttng-ust/lttng-tracer-core.h index bce6c239..76f62510 100644 --- a/liblttng-ust/lttng-tracer-core.h +++ b/liblttng-ust/lttng-tracer-core.h @@ -80,6 +80,10 @@ void lttng_ust_dummy_get_value(struct lttng_ctx_field *field, int lttng_context_is_app(const char *name); void lttng_ust_fixup_tls(void); +extern void (*lttng_ust_liburcu_bp_before_fork)(void); +extern void (*lttng_ust_liburcu_bp_after_fork_parent)(void); +extern void (*lttng_ust_liburcu_bp_after_fork_child)(void); + #ifdef LTTNG_UST_HAVE_PERF_EVENT void lttng_ust_fixup_perf_counter_tls(void); void lttng_perf_lock(void); diff --git a/liblttng-ust/lttng-ust-abi.c b/liblttng-ust/lttng-ust-abi.c index 0d2058a3..a024b616 100644 --- a/liblttng-ust/lttng-ust-abi.c +++ b/liblttng-ust/lttng-ust-abi.c @@ -57,6 +57,7 @@ #include "../libringbuffer/frontend_types.h" #include "../libringbuffer/shm.h" #include "../libcounter/counter.h" +#include "tracepoint-internal.h" #include "lttng-tracer.h" #include "string-utils.h" #include "ust-events-internal.h" @@ -438,7 +439,7 @@ long lttng_cmd(int objd, unsigned int cmd, unsigned long arg, case LTTNG_UST_TRACEPOINT_FIELD_LIST: return lttng_abi_tracepoint_field_list(owner); case LTTNG_UST_WAIT_QUIESCENT: - synchronize_trace(); + lttng_ust_synchronize_trace(); return 0; case LTTNG_UST_EVENT_NOTIFIER_GROUP_CREATE: return lttng_abi_event_notifier_send_fd(owner, diff --git a/liblttng-ust/lttng-ust-comm.c b/liblttng-ust/lttng-ust-comm.c index 4b13571d..bb22a8a9 100644 --- a/liblttng-ust/lttng-ust-comm.c +++ b/liblttng-ust/lttng-ust-comm.c @@ -39,8 +39,9 @@ #include #include #include -#include +#include "futex.h" #include +#include #include #include @@ -434,8 +435,7 @@ void lttng_fixup_ust_mutex_nest_tls(void) static void lttng_fixup_urcu_bp_tls(void) { - rcu_read_lock(); - rcu_read_unlock(); + (void) lttng_ust_urcu_read_ongoing(); } void lttng_ust_fixup_tls(void) @@ -1624,7 +1624,7 @@ void wait_for_sessiond(struct sock_info *sock_info) if (uatomic_read((int32_t *) sock_info->wait_shm_mmap)) goto end_wait; - while (futex_async((int32_t *) sock_info->wait_shm_mmap, + while (lttng_ust_futex_async((int32_t *) sock_info->wait_shm_mmap, FUTEX_WAIT, 0, NULL, NULL, 0)) { switch (errno) { case EWOULDBLOCK: @@ -2315,7 +2315,9 @@ void ust_before_fork(sigset_t *save_sigset) pthread_mutex_lock(&ust_fork_mutex); ust_lock_nocheck(); - urcu_bp_before_fork(); + lttng_ust_urcu_before_fork(); + if (lttng_ust_liburcu_bp_before_fork) + lttng_ust_liburcu_bp_before_fork(); lttng_ust_lock_fd_tracker(); lttng_perf_lock(); } @@ -2343,7 +2345,9 @@ void ust_after_fork_parent(sigset_t *restore_sigset) if (URCU_TLS(lttng_ust_nest_count)) return; DBG("process %d", getpid()); - urcu_bp_after_fork_parent(); + lttng_ust_urcu_after_fork_parent(); + if (lttng_ust_liburcu_bp_after_fork_parent) + lttng_ust_liburcu_bp_after_fork_parent(); /* Release mutexes and reenable signals */ ust_after_fork_common(restore_sigset); } @@ -2369,7 +2373,9 @@ void ust_after_fork_child(sigset_t *restore_sigset) ust_context_vgids_reset(); DBG("process %d", getpid()); /* Release urcu mutexes */ - urcu_bp_after_fork_child(); + lttng_ust_urcu_after_fork_child(); + if (lttng_ust_liburcu_bp_after_fork_child) + lttng_ust_liburcu_bp_after_fork_child(); lttng_ust_cleanup(0); /* Release mutexes and reenable signals */ ust_after_fork_common(restore_sigset); diff --git a/liblttng-ust/lttng-ust-urcu-pointer.c b/liblttng-ust/lttng-ust-urcu-pointer.c new file mode 100644 index 00000000..94b6a960 --- /dev/null +++ b/liblttng-ust/lttng-ust-urcu-pointer.c @@ -0,0 +1,54 @@ +/* + * lttng-ust-urcu-pointer.c + * + * library wrappers to be used by non-LGPL compatible source code. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include + +#include +/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */ +#include + +void *lttng_ust_rcu_dereference_sym(void *p) +{ + return _lttng_ust_rcu_dereference(p); +} + +void *lttng_ust_rcu_set_pointer_sym(void **p, void *v) +{ + cmm_wmb(); + uatomic_set(p, v); + return v; +} + +void *lttng_ust_rcu_xchg_pointer_sym(void **p, void *v) +{ + cmm_wmb(); + return uatomic_xchg(p, v); +} + +void *lttng_ust_rcu_cmpxchg_pointer_sym(void **p, void *old, void *_new) +{ + cmm_wmb(); + return uatomic_cmpxchg(p, old, _new); +} diff --git a/liblttng-ust/lttng-ust-urcu.c b/liblttng-ust/lttng-ust-urcu.c new file mode 100644 index 00000000..88300031 --- /dev/null +++ b/liblttng-ust/lttng-ust-urcu.c @@ -0,0 +1,733 @@ +/* + * lttng-ust-urcu.c + * + * Userspace RCU library for LTTng-UST, derived from liburcu "bulletproof" version. + * + * Copyright (c) 2009 Mathieu Desnoyers + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* Do not #define _LGPL_SOURCE to ensure we can emit the wrapper symbols */ +#undef _LGPL_SOURCE +#include +#define _LGPL_SOURCE + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +#ifdef __linux__ +static +void *mremap_wrapper(void *old_address, size_t old_size, + size_t new_size, int flags) +{ + return mremap(old_address, old_size, new_size, flags); +} +#else + +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 + +/* + * mremap wrapper for non-Linux systems not allowing MAYMOVE. + * This is not generic. +*/ +static +void *mremap_wrapper(void *old_address, size_t old_size, + size_t new_size, int flags) +{ + assert(!(flags & MREMAP_MAYMOVE)); + + return MAP_FAILED; +} +#endif + +/* Sleep delay in ms */ +#define RCU_SLEEP_DELAY_MS 10 +#define INIT_NR_THREADS 8 +#define ARENA_INIT_ALLOC \ + sizeof(struct registry_chunk) \ + + INIT_NR_THREADS * sizeof(struct lttng_ust_urcu_reader) + +/* + * Active attempts to check for reader Q.S. before calling sleep(). + */ +#define RCU_QS_ACTIVE_ATTEMPTS 100 + +static +int lttng_ust_urcu_refcount; + +/* If the headers do not support membarrier system call, fall back smp_mb. */ +#ifdef __NR_membarrier +# define membarrier(...) syscall(__NR_membarrier, __VA_ARGS__) +#else +# define membarrier(...) -ENOSYS +#endif + +enum membarrier_cmd { + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), + /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ + /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), +}; + +static +void __attribute__((constructor)) _lttng_ust_urcu_init(void); +static +void __attribute__((destructor)) lttng_ust_urcu_exit(void); + +#ifndef CONFIG_RCU_FORCE_SYS_MEMBARRIER +int lttng_ust_urcu_has_sys_membarrier; +#endif + +/* + * rcu_gp_lock ensures mutual exclusion between threads calling + * synchronize_rcu(). + */ +static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER; +/* + * rcu_registry_lock ensures mutual exclusion between threads + * registering and unregistering themselves to/from the registry, and + * with threads reading that registry from synchronize_rcu(). However, + * this lock is not held all the way through the completion of awaiting + * for the grace period. It is sporadically released between iterations + * on the registry. + * rcu_registry_lock may nest inside rcu_gp_lock. + */ +static pthread_mutex_t rcu_registry_lock = PTHREAD_MUTEX_INITIALIZER; + +static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; +static int initialized; + +static pthread_key_t lttng_ust_urcu_key; + +struct lttng_ust_urcu_gp lttng_ust_urcu_gp = { .ctr = LTTNG_UST_URCU_GP_COUNT }; + +/* + * Pointer to registry elements. Written to only by each individual reader. Read + * by both the reader and the writers. + */ +DEFINE_URCU_TLS(struct lttng_ust_urcu_reader *, lttng_ust_urcu_reader); + +static CDS_LIST_HEAD(registry); + +struct registry_chunk { + size_t data_len; /* data length */ + size_t used; /* amount of data used */ + struct cds_list_head node; /* chunk_list node */ + char data[]; +}; + +struct registry_arena { + struct cds_list_head chunk_list; +}; + +static struct registry_arena registry_arena = { + .chunk_list = CDS_LIST_HEAD_INIT(registry_arena.chunk_list), +}; + +/* Saved fork signal mask, protected by rcu_gp_lock */ +static sigset_t saved_fork_signal_mask; + +static void mutex_lock(pthread_mutex_t *mutex) +{ + int ret; + +#ifndef DISTRUST_SIGNALS_EXTREME + ret = pthread_mutex_lock(mutex); + if (ret) + abort(); +#else /* #ifndef DISTRUST_SIGNALS_EXTREME */ + while ((ret = pthread_mutex_trylock(mutex)) != 0) { + if (ret != EBUSY && ret != EINTR) + abort(); + poll(NULL,0,10); + } +#endif /* #else #ifndef DISTRUST_SIGNALS_EXTREME */ +} + +static void mutex_unlock(pthread_mutex_t *mutex) +{ + int ret; + + ret = pthread_mutex_unlock(mutex); + if (ret) + abort(); +} + +static void smp_mb_master(void) +{ + if (caa_likely(lttng_ust_urcu_has_sys_membarrier)) { + if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0)) + abort(); + } else { + cmm_smp_mb(); + } +} + +/* + * Always called with rcu_registry lock held. Releases this lock between + * iterations and grabs it again. Holds the lock when it returns. + */ +static void wait_for_readers(struct cds_list_head *input_readers, + struct cds_list_head *cur_snap_readers, + struct cds_list_head *qsreaders) +{ + unsigned int wait_loops = 0; + struct lttng_ust_urcu_reader *index, *tmp; + + /* + * Wait for each thread URCU_TLS(lttng_ust_urcu_reader).ctr to either + * indicate quiescence (not nested), or observe the current + * rcu_gp.ctr value. + */ + for (;;) { + if (wait_loops < RCU_QS_ACTIVE_ATTEMPTS) + wait_loops++; + + cds_list_for_each_entry_safe(index, tmp, input_readers, node) { + switch (lttng_ust_urcu_reader_state(&index->ctr)) { + case LTTNG_UST_URCU_READER_ACTIVE_CURRENT: + if (cur_snap_readers) { + cds_list_move(&index->node, + cur_snap_readers); + break; + } + /* Fall-through */ + case LTTNG_UST_URCU_READER_INACTIVE: + cds_list_move(&index->node, qsreaders); + break; + case LTTNG_UST_URCU_READER_ACTIVE_OLD: + /* + * Old snapshot. Leaving node in + * input_readers will make us busy-loop + * until the snapshot becomes current or + * the reader becomes inactive. + */ + break; + } + } + + if (cds_list_empty(input_readers)) { + break; + } else { + /* Temporarily unlock the registry lock. */ + mutex_unlock(&rcu_registry_lock); + if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) + (void) poll(NULL, 0, RCU_SLEEP_DELAY_MS); + else + caa_cpu_relax(); + /* Re-lock the registry lock before the next loop. */ + mutex_lock(&rcu_registry_lock); + } + } +} + +void lttng_ust_urcu_synchronize_rcu(void) +{ + CDS_LIST_HEAD(cur_snap_readers); + CDS_LIST_HEAD(qsreaders); + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + assert(!ret); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + assert(!ret); + + mutex_lock(&rcu_gp_lock); + + mutex_lock(&rcu_registry_lock); + + if (cds_list_empty(®istry)) + goto out; + + /* All threads should read qparity before accessing data structure + * where new ptr points to. */ + /* Write new ptr before changing the qparity */ + smp_mb_master(); + + /* + * Wait for readers to observe original parity or be quiescent. + * wait_for_readers() can release and grab again rcu_registry_lock + * interally. + */ + wait_for_readers(®istry, &cur_snap_readers, &qsreaders); + + /* + * Adding a cmm_smp_mb() which is _not_ formally required, but makes the + * model easier to understand. It does not have a big performance impact + * anyway, given this is the write-side. + */ + cmm_smp_mb(); + + /* Switch parity: 0 -> 1, 1 -> 0 */ + CMM_STORE_SHARED(lttng_ust_urcu_gp.ctr, lttng_ust_urcu_gp.ctr ^ LTTNG_UST_URCU_GP_CTR_PHASE); + + /* + * Must commit qparity update to memory before waiting for other parity + * quiescent state. Failure to do so could result in the writer waiting + * forever while new readers are always accessing data (no progress). + * Ensured by CMM_STORE_SHARED and CMM_LOAD_SHARED. + */ + + /* + * Adding a cmm_smp_mb() which is _not_ formally required, but makes the + * model easier to understand. It does not have a big performance impact + * anyway, given this is the write-side. + */ + cmm_smp_mb(); + + /* + * Wait for readers to observe new parity or be quiescent. + * wait_for_readers() can release and grab again rcu_registry_lock + * interally. + */ + wait_for_readers(&cur_snap_readers, NULL, &qsreaders); + + /* + * Put quiescent reader list back into registry. + */ + cds_list_splice(&qsreaders, ®istry); + + /* + * Finish waiting for reader threads before letting the old ptr being + * freed. + */ + smp_mb_master(); +out: + mutex_unlock(&rcu_registry_lock); + mutex_unlock(&rcu_gp_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + assert(!ret); +} + +/* + * library wrappers to be used by non-LGPL compatible source code. + */ + +void lttng_ust_urcu_read_lock(void) +{ + _lttng_ust_urcu_read_lock(); +} + +void lttng_ust_urcu_read_unlock(void) +{ + _lttng_ust_urcu_read_unlock(); +} + +int lttng_ust_urcu_read_ongoing(void) +{ + return _lttng_ust_urcu_read_ongoing(); +} + +/* + * Only grow for now. If empty, allocate a ARENA_INIT_ALLOC sized chunk. + * Else, try expanding the last chunk. If this fails, allocate a new + * chunk twice as big as the last chunk. + * Memory used by chunks _never_ moves. A chunk could theoretically be + * freed when all "used" slots are released, but we don't do it at this + * point. + */ +static +void expand_arena(struct registry_arena *arena) +{ + struct registry_chunk *new_chunk, *last_chunk; + size_t old_chunk_len, new_chunk_len; + + /* No chunk. */ + if (cds_list_empty(&arena->chunk_list)) { + assert(ARENA_INIT_ALLOC >= + sizeof(struct registry_chunk) + + sizeof(struct lttng_ust_urcu_reader)); + new_chunk_len = ARENA_INIT_ALLOC; + new_chunk = (struct registry_chunk *) mmap(NULL, + new_chunk_len, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (new_chunk == MAP_FAILED) + abort(); + memset(new_chunk, 0, new_chunk_len); + new_chunk->data_len = + new_chunk_len - sizeof(struct registry_chunk); + cds_list_add_tail(&new_chunk->node, &arena->chunk_list); + return; /* We're done. */ + } + + /* Try expanding last chunk. */ + last_chunk = cds_list_entry(arena->chunk_list.prev, + struct registry_chunk, node); + old_chunk_len = + last_chunk->data_len + sizeof(struct registry_chunk); + new_chunk_len = old_chunk_len << 1; + + /* Don't allow memory mapping to move, just expand. */ + new_chunk = mremap_wrapper(last_chunk, old_chunk_len, + new_chunk_len, 0); + if (new_chunk != MAP_FAILED) { + /* Should not have moved. */ + assert(new_chunk == last_chunk); + memset((char *) last_chunk + old_chunk_len, 0, + new_chunk_len - old_chunk_len); + last_chunk->data_len = + new_chunk_len - sizeof(struct registry_chunk); + return; /* We're done. */ + } + + /* Remap did not succeed, we need to add a new chunk. */ + new_chunk = (struct registry_chunk *) mmap(NULL, + new_chunk_len, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (new_chunk == MAP_FAILED) + abort(); + memset(new_chunk, 0, new_chunk_len); + new_chunk->data_len = + new_chunk_len - sizeof(struct registry_chunk); + cds_list_add_tail(&new_chunk->node, &arena->chunk_list); +} + +static +struct lttng_ust_urcu_reader *arena_alloc(struct registry_arena *arena) +{ + struct registry_chunk *chunk; + struct lttng_ust_urcu_reader *rcu_reader_reg; + int expand_done = 0; /* Only allow to expand once per alloc */ + size_t len = sizeof(struct lttng_ust_urcu_reader); + +retry: + cds_list_for_each_entry(chunk, &arena->chunk_list, node) { + if (chunk->data_len - chunk->used < len) + continue; + /* Find spot */ + for (rcu_reader_reg = (struct lttng_ust_urcu_reader *) &chunk->data[0]; + rcu_reader_reg < (struct lttng_ust_urcu_reader *) &chunk->data[chunk->data_len]; + rcu_reader_reg++) { + if (!rcu_reader_reg->alloc) { + rcu_reader_reg->alloc = 1; + chunk->used += len; + return rcu_reader_reg; + } + } + } + + if (!expand_done) { + expand_arena(arena); + expand_done = 1; + goto retry; + } + + return NULL; +} + +/* Called with signals off and mutex locked */ +static +void add_thread(void) +{ + struct lttng_ust_urcu_reader *rcu_reader_reg; + int ret; + + rcu_reader_reg = arena_alloc(®istry_arena); + if (!rcu_reader_reg) + abort(); + ret = pthread_setspecific(lttng_ust_urcu_key, rcu_reader_reg); + if (ret) + abort(); + + /* Add to registry */ + rcu_reader_reg->tid = pthread_self(); + assert(rcu_reader_reg->ctr == 0); + cds_list_add(&rcu_reader_reg->node, ®istry); + /* + * Reader threads are pointing to the reader registry. This is + * why its memory should never be relocated. + */ + URCU_TLS(lttng_ust_urcu_reader) = rcu_reader_reg; +} + +/* Called with mutex locked */ +static +void cleanup_thread(struct registry_chunk *chunk, + struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + rcu_reader_reg->ctr = 0; + cds_list_del(&rcu_reader_reg->node); + rcu_reader_reg->tid = 0; + rcu_reader_reg->alloc = 0; + chunk->used -= sizeof(struct lttng_ust_urcu_reader); +} + +static +struct registry_chunk *find_chunk(struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + struct registry_chunk *chunk; + + cds_list_for_each_entry(chunk, ®istry_arena.chunk_list, node) { + if (rcu_reader_reg < (struct lttng_ust_urcu_reader *) &chunk->data[0]) + continue; + if (rcu_reader_reg >= (struct lttng_ust_urcu_reader *) &chunk->data[chunk->data_len]) + continue; + return chunk; + } + return NULL; +} + +/* Called with signals off and mutex locked */ +static +void remove_thread(struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + cleanup_thread(find_chunk(rcu_reader_reg), rcu_reader_reg); + URCU_TLS(lttng_ust_urcu_reader) = NULL; +} + +/* Disable signals, take mutex, add to registry */ +void lttng_ust_urcu_register(void) +{ + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + if (ret) + abort(); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + if (ret) + abort(); + + /* + * Check if a signal concurrently registered our thread since + * the check in rcu_read_lock(). + */ + if (URCU_TLS(lttng_ust_urcu_reader)) + goto end; + + /* + * Take care of early registration before lttng_ust_urcu constructor. + */ + _lttng_ust_urcu_init(); + + mutex_lock(&rcu_registry_lock); + add_thread(); + mutex_unlock(&rcu_registry_lock); +end: + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + if (ret) + abort(); +} + +void lttng_ust_urcu_register_thread(void) +{ + if (caa_unlikely(!URCU_TLS(lttng_ust_urcu_reader))) + lttng_ust_urcu_register(); /* If not yet registered. */ +} + +/* Disable signals, take mutex, remove from registry */ +static +void lttng_ust_urcu_unregister(struct lttng_ust_urcu_reader *rcu_reader_reg) +{ + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + if (ret) + abort(); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + if (ret) + abort(); + + mutex_lock(&rcu_registry_lock); + remove_thread(rcu_reader_reg); + mutex_unlock(&rcu_registry_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + if (ret) + abort(); + lttng_ust_urcu_exit(); +} + +/* + * Remove thread from the registry when it exits, and flag it as + * destroyed so garbage collection can take care of it. + */ +static +void lttng_ust_urcu_thread_exit_notifier(void *rcu_key) +{ + lttng_ust_urcu_unregister(rcu_key); +} + +#ifdef CONFIG_RCU_FORCE_SYS_MEMBARRIER +static +void lttng_ust_urcu_sys_membarrier_status(bool available) +{ + if (!available) + abort(); +} +#else +static +void lttng_ust_urcu_sys_membarrier_status(bool available) +{ + if (!available) + return; + lttng_ust_urcu_has_sys_membarrier = 1; +} +#endif + +static +void lttng_ust_urcu_sys_membarrier_init(void) +{ + bool available = false; + int mask; + + mask = membarrier(MEMBARRIER_CMD_QUERY, 0); + if (mask >= 0) { + if (mask & MEMBARRIER_CMD_PRIVATE_EXPEDITED) { + if (membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0)) + abort(); + available = true; + } + } + lttng_ust_urcu_sys_membarrier_status(available); +} + +static +void _lttng_ust_urcu_init(void) +{ + mutex_lock(&init_lock); + if (!lttng_ust_urcu_refcount++) { + int ret; + + ret = pthread_key_create(<tng_ust_urcu_key, + lttng_ust_urcu_thread_exit_notifier); + if (ret) + abort(); + lttng_ust_urcu_sys_membarrier_init(); + initialized = 1; + } + mutex_unlock(&init_lock); +} + +static +void lttng_ust_urcu_exit(void) +{ + mutex_lock(&init_lock); + if (!--lttng_ust_urcu_refcount) { + struct registry_chunk *chunk, *tmp; + int ret; + + cds_list_for_each_entry_safe(chunk, tmp, + ®istry_arena.chunk_list, node) { + munmap((void *) chunk, chunk->data_len + + sizeof(struct registry_chunk)); + } + CDS_INIT_LIST_HEAD(®istry_arena.chunk_list); + ret = pthread_key_delete(lttng_ust_urcu_key); + if (ret) + abort(); + } + mutex_unlock(&init_lock); +} + +/* + * Holding the rcu_gp_lock and rcu_registry_lock across fork will make + * sure we fork() don't race with a concurrent thread executing with + * any of those locks held. This ensures that the registry and data + * protected by rcu_gp_lock are in a coherent state in the child. + */ +void lttng_ust_urcu_before_fork(void) +{ + sigset_t newmask, oldmask; + int ret; + + ret = sigfillset(&newmask); + assert(!ret); + ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask); + assert(!ret); + mutex_lock(&rcu_gp_lock); + mutex_lock(&rcu_registry_lock); + saved_fork_signal_mask = oldmask; +} + +void lttng_ust_urcu_after_fork_parent(void) +{ + sigset_t oldmask; + int ret; + + oldmask = saved_fork_signal_mask; + mutex_unlock(&rcu_registry_lock); + mutex_unlock(&rcu_gp_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + assert(!ret); +} + +/* + * Prune all entries from registry except our own thread. Fits the Linux + * fork behavior. Called with rcu_gp_lock and rcu_registry_lock held. + */ +static +void lttng_ust_urcu_prune_registry(void) +{ + struct registry_chunk *chunk; + struct lttng_ust_urcu_reader *rcu_reader_reg; + + cds_list_for_each_entry(chunk, ®istry_arena.chunk_list, node) { + for (rcu_reader_reg = (struct lttng_ust_urcu_reader *) &chunk->data[0]; + rcu_reader_reg < (struct lttng_ust_urcu_reader *) &chunk->data[chunk->data_len]; + rcu_reader_reg++) { + if (!rcu_reader_reg->alloc) + continue; + if (rcu_reader_reg->tid == pthread_self()) + continue; + cleanup_thread(chunk, rcu_reader_reg); + } + } +} + +void lttng_ust_urcu_after_fork_child(void) +{ + sigset_t oldmask; + int ret; + + lttng_ust_urcu_prune_registry(); + oldmask = saved_fork_signal_mask; + mutex_unlock(&rcu_registry_lock); + mutex_unlock(&rcu_gp_lock); + ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + assert(!ret); +} diff --git a/liblttng-ust/rculfhash-internal.h b/liblttng-ust/rculfhash-internal.h new file mode 100644 index 00000000..86eb501f --- /dev/null +++ b/liblttng-ust/rculfhash-internal.h @@ -0,0 +1,179 @@ +#ifndef _LTTNG_UST_RCULFHASH_INTERNAL_H +#define _LTTNG_UST_RCULFHASH_INTERNAL_H + +/* + * liblttng-ust/rculfhash-internal.h + * + * Internal header for Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "rculfhash.h" +#include +#include +#include + +#ifdef DEBUG +#define dbg_printf(fmt, args...) printf("[debug lttng-ust rculfhash] " fmt, ## args) +#else +#define dbg_printf(fmt, args...) \ +do { \ + /* do nothing but check printf format */ \ + if (0) \ + printf("[debug lttng-ust rculfhash] " fmt, ## args); \ +} while (0) +#endif + +#if (CAA_BITS_PER_LONG == 32) +#define MAX_TABLE_ORDER 32 +#else +#define MAX_TABLE_ORDER 64 +#endif + +#define MAX_CHUNK_TABLE (1UL << 10) + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +/* + * lttng_ust_lfht: Top-level data structure representing a lock-free hash + * table. Defined in the implementation file to make it be an opaque + * cookie to users. + * + * The fields used in fast-paths are placed near the end of the + * structure, because we need to have a variable-sized union to contain + * the mm plugin fields, which are used in the fast path. + */ +struct lttng_ust_lfht { + /* Initial configuration items */ + unsigned long max_nr_buckets; + const struct lttng_ust_lfht_mm_type *mm; /* memory management plugin */ + const struct rcu_flavor_struct *flavor; /* RCU flavor */ + + /* + * We need to put the work threads offline (QSBR) when taking this + * mutex, because we use synchronize_rcu within this mutex critical + * section, which waits on read-side critical sections, and could + * therefore cause grace-period deadlock if we hold off RCU G.P. + * completion. + */ + pthread_mutex_t resize_mutex; /* resize mutex: add/del mutex */ + unsigned int in_progress_destroy; + unsigned long resize_target; + int resize_initiated; + + /* + * Variables needed for add and remove fast-paths. + */ + int flags; + unsigned long min_alloc_buckets_order; + unsigned long min_nr_alloc_buckets; + + /* + * Variables needed for the lookup, add and remove fast-paths. + */ + unsigned long size; /* always a power of 2, shared (RCU) */ + /* + * bucket_at pointer is kept here to skip the extra level of + * dereference needed to get to "mm" (this is a fast-path). + */ + struct lttng_ust_lfht_node *(*bucket_at)(struct lttng_ust_lfht *ht, + unsigned long index); + /* + * Dynamic length "tbl_chunk" needs to be at the end of + * lttng_ust_lfht. + */ + union { + /* + * Contains the per order-index-level bucket node table. + * The size of each bucket node table is half the number + * of hashes contained in this order (except for order 0). + * The minimum allocation buckets size parameter allows + * combining the bucket node arrays of the lowermost + * levels to improve cache locality for small index orders. + */ + struct lttng_ust_lfht_node *tbl_order[MAX_TABLE_ORDER]; + + /* + * Contains the bucket node chunks. The size of each + * bucket node chunk is ->min_alloc_size (we avoid to + * allocate chunks with different size). Chunks improve + * cache locality for small index orders, and are more + * friendly with environments where allocation of large + * contiguous memory areas is challenging due to memory + * fragmentation concerns or inability to use virtual + * memory addressing. + */ + struct lttng_ust_lfht_node *tbl_chunk[0]; + + /* + * Memory mapping with room for all possible buckets. + * Their memory is allocated when needed. + */ + struct lttng_ust_lfht_node *tbl_mmap; + }; + /* + * End of variables needed for the lookup, add and remove + * fast-paths. + */ +}; + +extern unsigned int lttng_ust_lfht_fls_ulong(unsigned long x); +extern int lttng_ust_lfht_get_count_order_ulong(unsigned long x); + +#ifdef POISON_FREE +#define poison_free(ptr) \ + do { \ + if (ptr) { \ + memset(ptr, 0x42, sizeof(*(ptr))); \ + free(ptr); \ + } \ + } while (0) +#else +#define poison_free(ptr) free(ptr) +#endif + +static inline +struct lttng_ust_lfht *__default_alloc_lttng_ust_lfht( + const struct lttng_ust_lfht_mm_type *mm, + unsigned long lttng_ust_lfht_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + struct lttng_ust_lfht *ht; + + ht = calloc(1, lttng_ust_lfht_size); + assert(ht); + + ht->mm = mm; + ht->bucket_at = mm->bucket_at; + ht->min_nr_alloc_buckets = min_nr_alloc_buckets; + ht->min_alloc_buckets_order = + lttng_ust_lfht_get_count_order_ulong(min_nr_alloc_buckets); + ht->max_nr_buckets = max_nr_buckets; + + return ht; +} + +#endif /* _LTTNG_UST_RCULFHASH_INTERNAL_H */ diff --git a/liblttng-ust/rculfhash-mm-chunk.c b/liblttng-ust/rculfhash-mm-chunk.c new file mode 100644 index 00000000..2eef6ebc --- /dev/null +++ b/liblttng-ust/rculfhash-mm-chunk.c @@ -0,0 +1,97 @@ +/* + * rculfhash-mm-chunk.c + * + * Chunk based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "rculfhash-internal.h" + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + ht->tbl_chunk[0] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_chunk[0]); + } else if (order > ht->min_alloc_buckets_order) { + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order); + + for (i = len; i < 2 * len; i++) { + ht->tbl_chunk[i] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_chunk[i]); + } + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) + poison_free(ht->tbl_chunk[0]); + else if (order > ht->min_alloc_buckets_order) { + unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_buckets_order); + + for (i = len; i < 2 * len; i++) + poison_free(ht->tbl_chunk[i]); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + unsigned long chunk, offset; + + chunk = index >> ht->min_alloc_buckets_order; + offset = index & (ht->min_nr_alloc_buckets - 1); + return &ht->tbl_chunk[chunk][offset]; +} + +static +struct lttng_ust_lfht *alloc_lttng_ust_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + unsigned long nr_chunks, lttng_ust_lfht_size; + + min_nr_alloc_buckets = max(min_nr_alloc_buckets, + max_nr_buckets / MAX_CHUNK_TABLE); + nr_chunks = max_nr_buckets / min_nr_alloc_buckets; + lttng_ust_lfht_size = offsetof(struct lttng_ust_lfht, tbl_chunk) + + sizeof(struct lttng_ust_lfht_node *) * nr_chunks; + lttng_ust_lfht_size = max(lttng_ust_lfht_size, sizeof(struct lttng_ust_lfht)); + + return __default_alloc_lttng_ust_lfht( + <tng_ust_lfht_mm_chunk, lttng_ust_lfht_size, + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_chunk = { + .alloc_lttng_ust_lfht = alloc_lttng_ust_lfht, + .alloc_bucket_table = lttng_ust_lfht_alloc_bucket_table, + .free_bucket_table = lttng_ust_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ust/rculfhash-mm-mmap.c b/liblttng-ust/rculfhash-mm-mmap.c new file mode 100644 index 00000000..5b02c406 --- /dev/null +++ b/liblttng-ust/rculfhash-mm-mmap.c @@ -0,0 +1,214 @@ +/* + * rculfhash-mm-mmap.c + * + * mmap/reservation based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include "rculfhash-internal.h" + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/* + * The allocation scheme used by the mmap based RCU hash table is to make a + * large unaccessible mapping to reserve memory without allocating it. + * Then smaller chunks are allocated by overlapping read/write mappings which + * do allocate memory. Deallocation is done by an overlapping unaccessible + * mapping. + * + * This scheme was tested on Linux, macOS and Solaris. However, on Cygwin the + * mmap wrapper is based on the Windows NtMapViewOfSection API which doesn't + * support overlapping mappings. + * + * An alternative to the overlapping mappings is to use mprotect to change the + * protection on chunks of the large mapping, read/write to allocate and none + * to deallocate. This works perfecty on Cygwin and Solaris but on Linux a + * call to madvise is also required to deallocate and it just doesn't work on + * macOS. + * + * For this reason, we keep to original scheme on all platforms except Cygwin. + */ + + +/* Reserve inaccessible memory space without allocating it */ +static +void *memory_map(size_t length) +{ + void *ret; + + ret = mmap(NULL, length, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ret == MAP_FAILED) { + perror("mmap"); + abort(); + } + return ret; +} + +static +void memory_unmap(void *ptr, size_t length) +{ + if (munmap(ptr, length)) { + perror("munmap"); + abort(); + } +} + +#ifdef __CYGWIN__ +/* Set protection to read/write to allocate a memory chunk */ +static +void memory_populate(void *ptr, size_t length) +{ + if (mprotect(ptr, length, PROT_READ | PROT_WRITE)) { + perror("mprotect"); + abort(); + } +} + +/* Set protection to none to deallocate a memory chunk */ +static +void memory_discard(void *ptr, size_t length) +{ + if (mprotect(ptr, length, PROT_NONE)) { + perror("mprotect"); + abort(); + } +} + +#else /* __CYGWIN__ */ + +static +void memory_populate(void *ptr, size_t length) +{ + if (mmap(ptr, length, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0) != ptr) { + perror("mmap"); + abort(); + } +} + +/* + * Discard garbage memory and avoid system save it when try to swap it out. + * Make it still reserved, inaccessible. + */ +static +void memory_discard(void *ptr, size_t length) +{ + if (mmap(ptr, length, PROT_NONE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0) != ptr) { + perror("mmap"); + abort(); + } +} +#endif /* __CYGWIN__ */ + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { + /* small table */ + ht->tbl_mmap = calloc(ht->max_nr_buckets, + sizeof(*ht->tbl_mmap)); + assert(ht->tbl_mmap); + return; + } + /* large table */ + ht->tbl_mmap = memory_map(ht->max_nr_buckets + * sizeof(*ht->tbl_mmap)); + memory_populate(ht->tbl_mmap, + ht->min_nr_alloc_buckets * sizeof(*ht->tbl_mmap)); + } else if (order > ht->min_alloc_buckets_order) { + /* large table */ + unsigned long len = 1UL << (order - 1); + + assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); + memory_populate(ht->tbl_mmap + len, + len * sizeof(*ht->tbl_mmap)); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + if (ht->min_nr_alloc_buckets == ht->max_nr_buckets) { + /* small table */ + poison_free(ht->tbl_mmap); + return; + } + /* large table */ + memory_unmap(ht->tbl_mmap, + ht->max_nr_buckets * sizeof(*ht->tbl_mmap)); + } else if (order > ht->min_alloc_buckets_order) { + /* large table */ + unsigned long len = 1UL << (order - 1); + + assert(ht->min_nr_alloc_buckets < ht->max_nr_buckets); + memory_discard(ht->tbl_mmap + len, len * sizeof(*ht->tbl_mmap)); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + return &ht->tbl_mmap[index]; +} + +static +struct lttng_ust_lfht *alloc_lttng_ust_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + unsigned long page_bucket_size; + + page_bucket_size = getpagesize() / sizeof(struct lttng_ust_lfht_node); + if (max_nr_buckets <= page_bucket_size) { + /* small table */ + min_nr_alloc_buckets = max_nr_buckets; + } else { + /* large table */ + min_nr_alloc_buckets = max(min_nr_alloc_buckets, + page_bucket_size); + } + + return __default_alloc_lttng_ust_lfht( + <tng_ust_lfht_mm_mmap, sizeof(struct lttng_ust_lfht), + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_mmap = { + .alloc_lttng_ust_lfht = alloc_lttng_ust_lfht, + .alloc_bucket_table = lttng_ust_lfht_alloc_bucket_table, + .free_bucket_table = lttng_ust_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ust/rculfhash-mm-order.c b/liblttng-ust/rculfhash-mm-order.c new file mode 100644 index 00000000..78f2c7ae --- /dev/null +++ b/liblttng-ust/rculfhash-mm-order.c @@ -0,0 +1,90 @@ +/* + * rculfhash-mm-order.c + * + * Order based memory management for Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) { + ht->tbl_order[0] = calloc(ht->min_nr_alloc_buckets, + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_order[0]); + } else if (order > ht->min_alloc_buckets_order) { + ht->tbl_order[order] = calloc(1UL << (order -1), + sizeof(struct lttng_ust_lfht_node)); + assert(ht->tbl_order[order]); + } + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + if (order == 0) + poison_free(ht->tbl_order[0]); + else if (order > ht->min_alloc_buckets_order) + poison_free(ht->tbl_order[order]); + /* Nothing to do for 0 < order && order <= ht->min_alloc_buckets_order */ +} + +static +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + unsigned long order; + + if (index < ht->min_nr_alloc_buckets) { + dbg_printf("bucket index %lu order 0 aridx 0\n", index); + return &ht->tbl_order[0][index]; + } + /* + * equivalent to lttng_ust_lfht_get_count_order_ulong(index + 1), but + * optimizes away the non-existing 0 special-case for + * lttng_ust_lfht_get_count_order_ulong. + */ + order = lttng_ust_lfht_fls_ulong(index); + dbg_printf("bucket index %lu order %lu aridx %lu\n", + index, order, index & ((1UL << (order - 1)) - 1)); + return &ht->tbl_order[order][index & ((1UL << (order - 1)) - 1)]; +} + +static +struct lttng_ust_lfht *alloc_lttng_ust_lfht(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets) +{ + return __default_alloc_lttng_ust_lfht( + <tng_ust_lfht_mm_order, sizeof(struct lttng_ust_lfht), + min_nr_alloc_buckets, max_nr_buckets); +} + +const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_order = { + .alloc_lttng_ust_lfht = alloc_lttng_ust_lfht, + .alloc_bucket_table = lttng_ust_lfht_alloc_bucket_table, + .free_bucket_table = lttng_ust_lfht_free_bucket_table, + .bucket_at = bucket_at, +}; diff --git a/liblttng-ust/rculfhash.c b/liblttng-ust/rculfhash.c new file mode 100644 index 00000000..8aa8bc3e --- /dev/null +++ b/liblttng-ust/rculfhash.c @@ -0,0 +1,1314 @@ +/* + * rculfhash.c + * + * Userspace RCU library - Lock-Free Resizable RCU Hash Table + * + * Copyright 2010-2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Based on the following articles: + * - Ori Shalev and Nir Shavit. Split-ordered lists: Lock-free + * extensible hash tables. J. ACM 53, 3 (May 2006), 379-405. + * - Michael, M. M. High performance dynamic lock-free hash tables + * and list-based sets. In Proceedings of the fourteenth annual ACM + * symposium on Parallel algorithms and architectures, ACM Press, + * (2002), 73-82. + * + * Some specificities of this Lock-Free Resizable RCU Hash Table + * implementation: + * + * - RCU read-side critical section allows readers to perform hash + * table lookups, as well as traversals, and use the returned objects + * safely by allowing memory reclaim to take place only after a grace + * period. + * - Add and remove operations are lock-free, and do not need to + * allocate memory. They need to be executed within RCU read-side + * critical section to ensure the objects they read are valid and to + * deal with the cmpxchg ABA problem. + * - add and add_unique operations are supported. add_unique checks if + * the node key already exists in the hash table. It ensures not to + * populate a duplicate key if the node key already exists in the hash + * table. + * - The resize operation executes concurrently with + * add/add_unique/add_replace/remove/lookup/traversal. + * - Hash table nodes are contained within a split-ordered list. This + * list is ordered by incrementing reversed-bits-hash value. + * - An index of bucket nodes is kept. These bucket nodes are the hash + * table "buckets". These buckets are internal nodes that allow to + * perform a fast hash lookup, similarly to a skip list. These + * buckets are chained together in the split-ordered list, which + * allows recursive expansion by inserting new buckets between the + * existing buckets. The split-ordered list allows adding new buckets + * between existing buckets as the table needs to grow. + * - The resize operation for small tables only allows expanding the + * hash table. It is triggered automatically by detecting long chains + * in the add operation. + * - The resize operation for larger tables (and available through an + * API) allows both expanding and shrinking the hash table. + * - Split-counters are used to keep track of the number of + * nodes within the hash table for automatic resize triggering. + * - Resize operation initiated by long chain detection is executed by a + * worker thread, which keeps lock-freedom of add and remove. + * - Resize operations are protected by a mutex. + * - The removal operation is split in two parts: first, a "removed" + * flag is set in the next pointer within the node to remove. Then, + * a "garbage collection" is performed in the bucket containing the + * removed node (from the start of the bucket up to the removed node). + * All encountered nodes with "removed" flag set in their next + * pointers are removed from the linked-list. If the cmpxchg used for + * removal fails (due to concurrent garbage-collection or concurrent + * add), we retry from the beginning of the bucket. This ensures that + * the node with "removed" flag set is removed from the hash table + * (not visible to lookups anymore) before the RCU read-side critical + * section held across removal ends. Furthermore, this ensures that + * the node with "removed" flag set is removed from the linked-list + * before its memory is reclaimed. After setting the "removal" flag, + * only the thread which removal is the first to set the "removal + * owner" flag (with an xchg) into a node's next pointer is considered + * to have succeeded its removal (and thus owns the node to reclaim). + * Because we garbage-collect starting from an invariant node (the + * start-of-bucket bucket node) up to the "removed" node (or find a + * reverse-hash that is higher), we are sure that a successful + * traversal of the chain leads to a chain that is present in the + * linked-list (the start node is never removed) and that it does not + * contain the "removed" node anymore, even if concurrent delete/add + * operations are changing the structure of the list concurrently. + * - The add operations perform garbage collection of buckets if they + * encounter nodes with removed flag set in the bucket where they want + * to add their new node. This ensures lock-freedom of add operation by + * helping the remover unlink nodes from the list rather than to wait + * for it do to so. + * - There are three memory backends for the hash table buckets: the + * "order table", the "chunks", and the "mmap". + * - These bucket containers contain a compact version of the hash table + * nodes. + * - The RCU "order table": + * - has a first level table indexed by log2(hash index) which is + * copied and expanded by the resize operation. This order table + * allows finding the "bucket node" tables. + * - There is one bucket node table per hash index order. The size of + * each bucket node table is half the number of hashes contained in + * this order (except for order 0). + * - The RCU "chunks" is best suited for close interaction with a page + * allocator. It uses a linear array as index to "chunks" containing + * each the same number of buckets. + * - The RCU "mmap" memory backend uses a single memory map to hold + * all buckets. + * - synchronize_rcu is used to garbage-collect the old bucket node table. + * + * Ordering Guarantees: + * + * To discuss these guarantees, we first define "read" operation as any + * of the the basic lttng_ust_lfht_lookup, lttng_ust_lfht_next_duplicate, + * lttng_ust_lfht_first, lttng_ust_lfht_next operation, as well as + * lttng_ust_lfht_add_unique (failure). + * + * We define "read traversal" operation as any of the following + * group of operations + * - lttng_ust_lfht_lookup followed by iteration with lttng_ust_lfht_next_duplicate + * (and/or lttng_ust_lfht_next, although less common). + * - lttng_ust_lfht_add_unique (failure) followed by iteration with + * lttng_ust_lfht_next_duplicate (and/or lttng_ust_lfht_next, although less + * common). + * - lttng_ust_lfht_first followed iteration with lttng_ust_lfht_next (and/or + * lttng_ust_lfht_next_duplicate, although less common). + * + * We define "write" operations as any of lttng_ust_lfht_add, lttng_ust_lfht_replace, + * lttng_ust_lfht_add_unique (success), lttng_ust_lfht_add_replace, lttng_ust_lfht_del. + * + * When lttng_ust_lfht_add_unique succeeds (returns the node passed as + * parameter), it acts as a "write" operation. When lttng_ust_lfht_add_unique + * fails (returns a node different from the one passed as parameter), it + * acts as a "read" operation. A lttng_ust_lfht_add_unique failure is a + * lttng_ust_lfht_lookup "read" operation, therefore, any ordering guarantee + * referring to "lookup" imply any of "lookup" or lttng_ust_lfht_add_unique + * (failure). + * + * We define "prior" and "later" node as nodes observable by reads and + * read traversals respectively before and after a write or sequence of + * write operations. + * + * Hash-table operations are often cascaded, for example, the pointer + * returned by a lttng_ust_lfht_lookup() might be passed to a lttng_ust_lfht_next(), + * whose return value might in turn be passed to another hash-table + * operation. This entire cascaded series of operations must be enclosed + * by a pair of matching rcu_read_lock() and rcu_read_unlock() + * operations. + * + * The following ordering guarantees are offered by this hash table: + * + * A.1) "read" after "write": if there is ordering between a write and a + * later read, then the read is guaranteed to see the write or some + * later write. + * A.2) "read traversal" after "write": given that there is dependency + * ordering between reads in a "read traversal", if there is + * ordering between a write and the first read of the traversal, + * then the "read traversal" is guaranteed to see the write or + * some later write. + * B.1) "write" after "read": if there is ordering between a read and a + * later write, then the read will never see the write. + * B.2) "write" after "read traversal": given that there is dependency + * ordering between reads in a "read traversal", if there is + * ordering between the last read of the traversal and a later + * write, then the "read traversal" will never see the write. + * C) "write" while "read traversal": if a write occurs during a "read + * traversal", the traversal may, or may not, see the write. + * D.1) "write" after "write": if there is ordering between a write and + * a later write, then the later write is guaranteed to see the + * effects of the first write. + * D.2) Concurrent "write" pairs: The system will assign an arbitrary + * order to any pair of concurrent conflicting writes. + * Non-conflicting writes (for example, to different keys) are + * unordered. + * E) If a grace period separates a "del" or "replace" operation + * and a subsequent operation, then that subsequent operation is + * guaranteed not to see the removed item. + * F) Uniqueness guarantee: given a hash table that does not contain + * duplicate items for a given key, there will only be one item in + * the hash table after an arbitrary sequence of add_unique and/or + * add_replace operations. Note, however, that a pair of + * concurrent read operations might well access two different items + * with that key. + * G.1) If a pair of lookups for a given key are ordered (e.g. by a + * memory barrier), then the second lookup will return the same + * node as the previous lookup, or some later node. + * G.2) A "read traversal" that starts after the end of a prior "read + * traversal" (ordered by memory barriers) is guaranteed to see the + * same nodes as the previous traversal, or some later nodes. + * G.3) Concurrent "read" pairs: concurrent reads are unordered. For + * example, if a pair of reads to the same key run concurrently + * with an insertion of that same key, the reads remain unordered + * regardless of their return values. In other words, you cannot + * rely on the values returned by the reads to deduce ordering. + * + * Progress guarantees: + * + * * Reads are wait-free. These operations always move forward in the + * hash table linked list, and this list has no loop. + * * Writes are lock-free. Any retry loop performed by a write operation + * is triggered by progress made within another update operation. + * + * Bucket node tables: + * + * hash table hash table the last all bucket node tables + * order size bucket node 0 1 2 3 4 5 6(index) + * table size + * 0 1 1 1 + * 1 2 1 1 1 + * 2 4 2 1 1 2 + * 3 8 4 1 1 2 4 + * 4 16 8 1 1 2 4 8 + * 5 32 16 1 1 2 4 8 16 + * 6 64 32 1 1 2 4 8 16 32 + * + * When growing/shrinking, we only focus on the last bucket node table + * which size is (!order ? 1 : (1 << (order -1))). + * + * Example for growing/shrinking: + * grow hash table from order 5 to 6: init the index=6 bucket node table + * shrink hash table from order 6 to 5: fini the index=6 bucket node table + * + * A bit of ascii art explanation: + * + * The order index is the off-by-one compared to the actual power of 2 + * because we use index 0 to deal with the 0 special-case. + * + * This shows the nodes for a small table ordered by reversed bits: + * + * bits reverse + * 0 000 000 + * 4 100 001 + * 2 010 010 + * 6 110 011 + * 1 001 100 + * 5 101 101 + * 3 011 110 + * 7 111 111 + * + * This shows the nodes in order of non-reversed bits, linked by + * reversed-bit order. + * + * order bits reverse + * 0 0 000 000 + * 1 | 1 001 100 <- + * 2 | | 2 010 010 <- | + * | | | 3 011 110 | <- | + * 3 -> | | | 4 100 001 | | + * -> | | 5 101 101 | + * -> | 6 110 011 + * -> 7 111 111 + */ + +/* + * Note on port to lttng-ust: auto-resize and accounting features are + * removed. + */ + +#define _LGPL_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "rculfhash.h" +#include "rculfhash-internal.h" +#include +#include +#include + +/* + * Split-counters lazily update the global counter each 1024 + * addition/removal. It automatically keeps track of resize required. + * We use the bucket length as indicator for need to expand for small + * tables and machines lacking per-cpu data support. + */ +#define COUNT_COMMIT_ORDER 10 + +/* + * Define the minimum table size. + */ +#define MIN_TABLE_ORDER 0 +#define MIN_TABLE_SIZE (1UL << MIN_TABLE_ORDER) + +/* + * Minimum number of bucket nodes to touch per thread to parallelize grow/shrink. + */ +#define MIN_PARTITION_PER_THREAD_ORDER 12 +#define MIN_PARTITION_PER_THREAD (1UL << MIN_PARTITION_PER_THREAD_ORDER) + +/* + * The removed flag needs to be updated atomically with the pointer. + * It indicates that no node must attach to the node scheduled for + * removal, and that node garbage collection must be performed. + * The bucket flag does not require to be updated atomically with the + * pointer, but it is added as a pointer low bit flag to save space. + * The "removal owner" flag is used to detect which of the "del" + * operation that has set the "removed flag" gets to return the removed + * node to its caller. Note that the replace operation does not need to + * iteract with the "removal owner" flag, because it validates that + * the "removed" flag is not set before performing its cmpxchg. + */ +#define REMOVED_FLAG (1UL << 0) +#define BUCKET_FLAG (1UL << 1) +#define REMOVAL_OWNER_FLAG (1UL << 2) +#define FLAGS_MASK ((1UL << 3) - 1) + +/* Value of the end pointer. Should not interact with flags. */ +#define END_VALUE NULL + +/* + * ht_items_count: Split-counters counting the number of node addition + * and removal in the table. Only used if the LTTNG_UST_LFHT_ACCOUNTING flag + * is set at hash table creation. + * + * These are free-running counters, never reset to zero. They count the + * number of add/remove, and trigger every (1 << COUNT_COMMIT_ORDER) + * operations to update the global counter. We choose a power-of-2 value + * for the trigger to deal with 32 or 64-bit overflow of the counter. + */ +struct ht_items_count { + unsigned long add, del; +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); + +#ifdef CONFIG_LTTNG_UST_LFHT_ITER_DEBUG + +static +void lttng_ust_lfht_iter_debug_set_ht(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ + iter->lfht = ht; +} + +#define lttng_ust_lfht_iter_debug_assert(...) assert(__VA_ARGS__) + +#else + +static +void lttng_ust_lfht_iter_debug_set_ht(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ +} + +#define lttng_ust_lfht_iter_debug_assert(...) + +#endif + +/* + * Algorithm to reverse bits in a word by lookup table, extended to + * 64-bit words. + * Source: + * http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable + * Originally from Public Domain. + */ + +static const uint8_t BitReverseTable256[256] = +{ +#define R2(n) (n), (n) + 2*64, (n) + 1*64, (n) + 3*64 +#define R4(n) R2(n), R2((n) + 2*16), R2((n) + 1*16), R2((n) + 3*16) +#define R6(n) R4(n), R4((n) + 2*4 ), R4((n) + 1*4 ), R4((n) + 3*4 ) + R6(0), R6(2), R6(1), R6(3) +}; +#undef R2 +#undef R4 +#undef R6 + +static +uint8_t bit_reverse_u8(uint8_t v) +{ + return BitReverseTable256[v]; +} + +#if (CAA_BITS_PER_LONG == 32) +static +uint32_t bit_reverse_u32(uint32_t v) +{ + return ((uint32_t) bit_reverse_u8(v) << 24) | + ((uint32_t) bit_reverse_u8(v >> 8) << 16) | + ((uint32_t) bit_reverse_u8(v >> 16) << 8) | + ((uint32_t) bit_reverse_u8(v >> 24)); +} +#else +static +uint64_t bit_reverse_u64(uint64_t v) +{ + return ((uint64_t) bit_reverse_u8(v) << 56) | + ((uint64_t) bit_reverse_u8(v >> 8) << 48) | + ((uint64_t) bit_reverse_u8(v >> 16) << 40) | + ((uint64_t) bit_reverse_u8(v >> 24) << 32) | + ((uint64_t) bit_reverse_u8(v >> 32) << 24) | + ((uint64_t) bit_reverse_u8(v >> 40) << 16) | + ((uint64_t) bit_reverse_u8(v >> 48) << 8) | + ((uint64_t) bit_reverse_u8(v >> 56)); +} +#endif + +static +unsigned long bit_reverse_ulong(unsigned long v) +{ +#if (CAA_BITS_PER_LONG == 32) + return bit_reverse_u32(v); +#else + return bit_reverse_u64(v); +#endif +} + +/* + * fls: returns the position of the most significant bit. + * Returns 0 if no bit is set, else returns the position of the most + * significant bit (from 1 to 32 on 32-bit, from 1 to 64 on 64-bit). + */ +#if defined(__i386) || defined(__x86_64) +static inline +unsigned int fls_u32(uint32_t x) +{ + int r; + + __asm__ ("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n\t" + "1:\n\t" + : "=r" (r) : "rm" (x)); + return r + 1; +} +#define HAS_FLS_U32 +#endif + +#if defined(__x86_64) +static inline +unsigned int fls_u64(uint64_t x) +{ + long r; + + __asm__ ("bsrq %1,%0\n\t" + "jnz 1f\n\t" + "movq $-1,%0\n\t" + "1:\n\t" + : "=r" (r) : "rm" (x)); + return r + 1; +} +#define HAS_FLS_U64 +#endif + +#ifndef HAS_FLS_U64 +static __attribute__((unused)) +unsigned int fls_u64(uint64_t x) +{ + unsigned int r = 64; + + if (!x) + return 0; + + if (!(x & 0xFFFFFFFF00000000ULL)) { + x <<= 32; + r -= 32; + } + if (!(x & 0xFFFF000000000000ULL)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF00000000000000ULL)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF000000000000000ULL)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC000000000000000ULL)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x8000000000000000ULL)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +#ifndef HAS_FLS_U32 +static __attribute__((unused)) +unsigned int fls_u32(uint32_t x) +{ + unsigned int r = 32; + + if (!x) + return 0; + if (!(x & 0xFFFF0000U)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF000000U)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF0000000U)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC0000000U)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000U)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +unsigned int lttng_ust_lfht_fls_ulong(unsigned long x) +{ +#if (CAA_BITS_PER_LONG == 32) + return fls_u32(x); +#else + return fls_u64(x); +#endif +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +int lttng_ust_lfht_get_count_order_u32(uint32_t x) +{ + if (!x) + return -1; + + return fls_u32(x - 1); +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +int lttng_ust_lfht_get_count_order_ulong(unsigned long x) +{ + if (!x) + return -1; + + return lttng_ust_lfht_fls_ulong(x - 1); +} + +static +struct lttng_ust_lfht_node *clear_flag(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) & ~FLAGS_MASK); +} + +static +int is_removed(const struct lttng_ust_lfht_node *node) +{ + return ((unsigned long) node) & REMOVED_FLAG; +} + +static +int is_bucket(struct lttng_ust_lfht_node *node) +{ + return ((unsigned long) node) & BUCKET_FLAG; +} + +static +struct lttng_ust_lfht_node *flag_bucket(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) | BUCKET_FLAG); +} + +static +int is_removal_owner(struct lttng_ust_lfht_node *node) +{ + return ((unsigned long) node) & REMOVAL_OWNER_FLAG; +} + +static +struct lttng_ust_lfht_node *flag_removal_owner(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) | REMOVAL_OWNER_FLAG); +} + +static +struct lttng_ust_lfht_node *flag_removed_or_removal_owner(struct lttng_ust_lfht_node *node) +{ + return (struct lttng_ust_lfht_node *) (((unsigned long) node) | REMOVED_FLAG | REMOVAL_OWNER_FLAG); +} + +static +struct lttng_ust_lfht_node *get_end(void) +{ + return (struct lttng_ust_lfht_node *) END_VALUE; +} + +static +int is_end(struct lttng_ust_lfht_node *node) +{ + return clear_flag(node) == (struct lttng_ust_lfht_node *) END_VALUE; +} + +static +void lttng_ust_lfht_alloc_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + return ht->mm->alloc_bucket_table(ht, order); +} + +/* + * lttng_ust_lfht_free_bucket_table() should be called with decreasing order. + * When lttng_ust_lfht_free_bucket_table(0) is called, it means the whole + * lfht is destroyed. + */ +static +void lttng_ust_lfht_free_bucket_table(struct lttng_ust_lfht *ht, unsigned long order) +{ + return ht->mm->free_bucket_table(ht, order); +} + +static inline +struct lttng_ust_lfht_node *bucket_at(struct lttng_ust_lfht *ht, unsigned long index) +{ + return ht->bucket_at(ht, index); +} + +static inline +struct lttng_ust_lfht_node *lookup_bucket(struct lttng_ust_lfht *ht, unsigned long size, + unsigned long hash) +{ + assert(size > 0); + return bucket_at(ht, hash & (size - 1)); +} + +/* + * Remove all logically deleted nodes from a bucket up to a certain node key. + */ +static +void _lttng_ust_lfht_gc_bucket(struct lttng_ust_lfht_node *bucket, struct lttng_ust_lfht_node *node) +{ + struct lttng_ust_lfht_node *iter_prev, *iter, *next, *new_next; + + assert(!is_bucket(bucket)); + assert(!is_removed(bucket)); + assert(!is_removal_owner(bucket)); + assert(!is_bucket(node)); + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + for (;;) { + iter_prev = bucket; + /* We can always skip the bucket node initially */ + iter = lttng_ust_rcu_dereference(iter_prev->next); + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + assert(iter_prev->reverse_hash <= node->reverse_hash); + /* + * We should never be called with bucket (start of chain) + * and logically removed node (end of path compression + * marker) being the actual same node. This would be a + * bug in the algorithm implementation. + */ + assert(bucket != node); + for (;;) { + if (caa_unlikely(is_end(iter))) + return; + if (caa_likely(clear_flag(iter)->reverse_hash > node->reverse_hash)) + return; + next = lttng_ust_rcu_dereference(clear_flag(iter)->next); + if (caa_likely(is_removed(next))) + break; + iter_prev = clear_flag(iter); + iter = next; + } + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + if (is_bucket(iter)) + new_next = flag_bucket(clear_flag(next)); + else + new_next = clear_flag(next); + (void) uatomic_cmpxchg(&iter_prev->next, iter, new_next); + } +} + +static +int _lttng_ust_lfht_replace(struct lttng_ust_lfht *ht, unsigned long size, + struct lttng_ust_lfht_node *old_node, + struct lttng_ust_lfht_node *old_next, + struct lttng_ust_lfht_node *new_node) +{ + struct lttng_ust_lfht_node *bucket, *ret_next; + + if (!old_node) /* Return -ENOENT if asked to replace NULL node */ + return -ENOENT; + + assert(!is_removed(old_node)); + assert(!is_removal_owner(old_node)); + assert(!is_bucket(old_node)); + assert(!is_removed(new_node)); + assert(!is_removal_owner(new_node)); + assert(!is_bucket(new_node)); + assert(new_node != old_node); + for (;;) { + /* Insert after node to be replaced */ + if (is_removed(old_next)) { + /* + * Too late, the old node has been removed under us + * between lookup and replace. Fail. + */ + return -ENOENT; + } + assert(old_next == clear_flag(old_next)); + assert(new_node != old_next); + /* + * REMOVAL_OWNER flag is _NEVER_ set before the REMOVED + * flag. It is either set atomically at the same time + * (replace) or after (del). + */ + assert(!is_removal_owner(old_next)); + new_node->next = old_next; + /* + * Here is the whole trick for lock-free replace: we add + * the replacement node _after_ the node we want to + * replace by atomically setting its next pointer at the + * same time we set its removal flag. Given that + * the lookups/get next use an iterator aware of the + * next pointer, they will either skip the old node due + * to the removal flag and see the new node, or use + * the old node, but will not see the new one. + * This is a replacement of a node with another node + * that has the same value: we are therefore not + * removing a value from the hash table. We set both the + * REMOVED and REMOVAL_OWNER flags atomically so we own + * the node after successful cmpxchg. + */ + ret_next = uatomic_cmpxchg(&old_node->next, + old_next, flag_removed_or_removal_owner(new_node)); + if (ret_next == old_next) + break; /* We performed the replacement. */ + old_next = ret_next; + } + + /* + * Ensure that the old node is not visible to readers anymore: + * lookup for the node, and remove it (along with any other + * logically removed node) if found. + */ + bucket = lookup_bucket(ht, size, bit_reverse_ulong(old_node->reverse_hash)); + _lttng_ust_lfht_gc_bucket(bucket, new_node); + + assert(is_removed(CMM_LOAD_SHARED(old_node->next))); + return 0; +} + +/* + * A non-NULL unique_ret pointer uses the "add unique" (or uniquify) add + * mode. A NULL unique_ret allows creation of duplicate keys. + */ +static +void _lttng_ust_lfht_add(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + unsigned long size, + struct lttng_ust_lfht_node *node, + struct lttng_ust_lfht_iter *unique_ret, + int bucket_flag) +{ + struct lttng_ust_lfht_node *iter_prev, *iter, *next, *new_node, *new_next, + *return_node; + struct lttng_ust_lfht_node *bucket; + + assert(!is_bucket(node)); + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + bucket = lookup_bucket(ht, size, hash); + for (;;) { + /* + * iter_prev points to the non-removed node prior to the + * insert location. + */ + iter_prev = bucket; + /* We can always skip the bucket node initially */ + iter = lttng_ust_rcu_dereference(iter_prev->next); + assert(iter_prev->reverse_hash <= node->reverse_hash); + for (;;) { + if (caa_unlikely(is_end(iter))) + goto insert; + if (caa_likely(clear_flag(iter)->reverse_hash > node->reverse_hash)) + goto insert; + + /* bucket node is the first node of the identical-hash-value chain */ + if (bucket_flag && clear_flag(iter)->reverse_hash == node->reverse_hash) + goto insert; + + next = lttng_ust_rcu_dereference(clear_flag(iter)->next); + if (caa_unlikely(is_removed(next))) + goto gc_node; + + /* uniquely add */ + if (unique_ret + && !is_bucket(next) + && clear_flag(iter)->reverse_hash == node->reverse_hash) { + struct lttng_ust_lfht_iter d_iter = { + .node = node, + .next = iter, +#ifdef CONFIG_LTTNG_UST_LFHT_ITER_DEBUG + .lfht = ht, +#endif + }; + + /* + * uniquely adding inserts the node as the first + * node of the identical-hash-value node chain. + * + * This semantic ensures no duplicated keys + * should ever be observable in the table + * (including traversing the table node by + * node by forward iterations) + */ + lttng_ust_lfht_next_duplicate(ht, match, key, &d_iter); + if (!d_iter.node) + goto insert; + + *unique_ret = d_iter; + return; + } + + iter_prev = clear_flag(iter); + iter = next; + } + + insert: + assert(node != clear_flag(iter)); + assert(!is_removed(iter_prev)); + assert(!is_removal_owner(iter_prev)); + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + assert(iter_prev != node); + if (!bucket_flag) + node->next = clear_flag(iter); + else + node->next = flag_bucket(clear_flag(iter)); + if (is_bucket(iter)) + new_node = flag_bucket(node); + else + new_node = node; + if (uatomic_cmpxchg(&iter_prev->next, iter, + new_node) != iter) { + continue; /* retry */ + } else { + return_node = node; + goto end; + } + + gc_node: + assert(!is_removed(iter)); + assert(!is_removal_owner(iter)); + if (is_bucket(iter)) + new_next = flag_bucket(clear_flag(next)); + else + new_next = clear_flag(next); + (void) uatomic_cmpxchg(&iter_prev->next, iter, new_next); + /* retry */ + } +end: + if (unique_ret) { + unique_ret->node = return_node; + /* unique_ret->next left unset, never used. */ + } +} + +static +int _lttng_ust_lfht_del(struct lttng_ust_lfht *ht, unsigned long size, + struct lttng_ust_lfht_node *node) +{ + struct lttng_ust_lfht_node *bucket, *next; + + if (!node) /* Return -ENOENT if asked to delete NULL node */ + return -ENOENT; + + /* logically delete the node */ + assert(!is_bucket(node)); + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + + /* + * We are first checking if the node had previously been + * logically removed (this check is not atomic with setting the + * logical removal flag). Return -ENOENT if the node had + * previously been removed. + */ + next = CMM_LOAD_SHARED(node->next); /* next is not dereferenced */ + if (caa_unlikely(is_removed(next))) + return -ENOENT; + assert(!is_bucket(next)); + /* + * The del operation semantic guarantees a full memory barrier + * before the uatomic_or atomic commit of the deletion flag. + */ + cmm_smp_mb__before_uatomic_or(); + /* + * We set the REMOVED_FLAG unconditionally. Note that there may + * be more than one concurrent thread setting this flag. + * Knowing which wins the race will be known after the garbage + * collection phase, stay tuned! + */ + uatomic_or(&node->next, REMOVED_FLAG); + /* We performed the (logical) deletion. */ + + /* + * Ensure that the node is not visible to readers anymore: lookup for + * the node, and remove it (along with any other logically removed node) + * if found. + */ + bucket = lookup_bucket(ht, size, bit_reverse_ulong(node->reverse_hash)); + _lttng_ust_lfht_gc_bucket(bucket, node); + + assert(is_removed(CMM_LOAD_SHARED(node->next))); + /* + * Last phase: atomically exchange node->next with a version + * having "REMOVAL_OWNER_FLAG" set. If the returned node->next + * pointer did _not_ have "REMOVAL_OWNER_FLAG" set, we now own + * the node and win the removal race. + * It is interesting to note that all "add" paths are forbidden + * to change the next pointer starting from the point where the + * REMOVED_FLAG is set, so here using a read, followed by a + * xchg() suffice to guarantee that the xchg() will ever only + * set the "REMOVAL_OWNER_FLAG" (or change nothing if the flag + * was already set). + */ + if (!is_removal_owner(uatomic_xchg(&node->next, + flag_removal_owner(node->next)))) + return 0; + else + return -ENOENT; +} + +/* + * Never called with size < 1. + */ +static +void lttng_ust_lfht_create_bucket(struct lttng_ust_lfht *ht, unsigned long size) +{ + struct lttng_ust_lfht_node *prev, *node; + unsigned long order, len, i; + int bucket_order; + + lttng_ust_lfht_alloc_bucket_table(ht, 0); + + dbg_printf("create bucket: order 0 index 0 hash 0\n"); + node = bucket_at(ht, 0); + node->next = flag_bucket(get_end()); + node->reverse_hash = 0; + + bucket_order = lttng_ust_lfht_get_count_order_ulong(size); + assert(bucket_order >= 0); + + for (order = 1; order < (unsigned long) bucket_order + 1; order++) { + len = 1UL << (order - 1); + lttng_ust_lfht_alloc_bucket_table(ht, order); + + for (i = 0; i < len; i++) { + /* + * Now, we are trying to init the node with the + * hash=(len+i) (which is also a bucket with the + * index=(len+i)) and insert it into the hash table, + * so this node has to be inserted after the bucket + * with the index=(len+i)&(len-1)=i. And because there + * is no other non-bucket node nor bucket node with + * larger index/hash inserted, so the bucket node + * being inserted should be inserted directly linked + * after the bucket node with index=i. + */ + prev = bucket_at(ht, i); + node = bucket_at(ht, len + i); + + dbg_printf("create bucket: order %lu index %lu hash %lu\n", + order, len + i, len + i); + node->reverse_hash = bit_reverse_ulong(len + i); + + /* insert after prev */ + assert(is_bucket(prev->next)); + node->next = prev->next; + prev->next = flag_bucket(node); + } + } +} + +#if (CAA_BITS_PER_LONG > 32) +/* + * For 64-bit architectures, with max number of buckets small enough not to + * use the entire 64-bit memory mapping space (and allowing a fair number of + * hash table instances), use the mmap allocator, which is faster. Otherwise, + * fallback to the order allocator. + */ +static +const struct lttng_ust_lfht_mm_type *get_mm_type(unsigned long max_nr_buckets) +{ + if (max_nr_buckets && max_nr_buckets <= (1ULL << 32)) + return <tng_ust_lfht_mm_mmap; + else + return <tng_ust_lfht_mm_order; +} +#else +/* + * For 32-bit architectures, use the order allocator. + */ +static +const struct lttng_ust_lfht_mm_type *get_mm_type(unsigned long max_nr_buckets) +{ + return <tng_ust_lfht_mm_order; +} +#endif + +struct lttng_ust_lfht *lttng_ust_lfht_new(unsigned long init_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets, + int flags, + const struct lttng_ust_lfht_mm_type *mm) +{ + struct lttng_ust_lfht *ht; + unsigned long order; + + /* min_nr_alloc_buckets must be power of two */ + if (!min_nr_alloc_buckets || (min_nr_alloc_buckets & (min_nr_alloc_buckets - 1))) + return NULL; + + /* init_size must be power of two */ + if (!init_size || (init_size & (init_size - 1))) + return NULL; + + /* + * Memory management plugin default. + */ + if (!mm) + mm = get_mm_type(max_nr_buckets); + + /* max_nr_buckets == 0 for order based mm means infinite */ + if (mm == <tng_ust_lfht_mm_order && !max_nr_buckets) + max_nr_buckets = 1UL << (MAX_TABLE_ORDER - 1); + + /* max_nr_buckets must be power of two */ + if (!max_nr_buckets || (max_nr_buckets & (max_nr_buckets - 1))) + return NULL; + + if (flags & LTTNG_UST_LFHT_AUTO_RESIZE) + return NULL; + + min_nr_alloc_buckets = max(min_nr_alloc_buckets, MIN_TABLE_SIZE); + init_size = max(init_size, MIN_TABLE_SIZE); + max_nr_buckets = max(max_nr_buckets, min_nr_alloc_buckets); + init_size = min(init_size, max_nr_buckets); + + ht = mm->alloc_lttng_ust_lfht(min_nr_alloc_buckets, max_nr_buckets); + assert(ht); + assert(ht->mm == mm); + assert(ht->bucket_at == mm->bucket_at); + + ht->flags = flags; + /* this mutex should not nest in read-side C.S. */ + pthread_mutex_init(&ht->resize_mutex, NULL); + order = lttng_ust_lfht_get_count_order_ulong(init_size); + ht->resize_target = 1UL << order; + lttng_ust_lfht_create_bucket(ht, 1UL << order); + ht->size = 1UL << order; + return ht; +} + +void lttng_ust_lfht_lookup(struct lttng_ust_lfht *ht, unsigned long hash, + lttng_ust_lfht_match_fct match, const void *key, + struct lttng_ust_lfht_iter *iter) +{ + struct lttng_ust_lfht_node *node, *next, *bucket; + unsigned long reverse_hash, size; + + lttng_ust_lfht_iter_debug_set_ht(ht, iter); + + reverse_hash = bit_reverse_ulong(hash); + + size = lttng_ust_rcu_dereference(ht->size); + bucket = lookup_bucket(ht, size, hash); + /* We can always skip the bucket node initially */ + node = lttng_ust_rcu_dereference(bucket->next); + node = clear_flag(node); + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + if (caa_unlikely(node->reverse_hash > reverse_hash)) { + node = next = NULL; + break; + } + next = lttng_ust_rcu_dereference(node->next); + assert(node == clear_flag(node)); + if (caa_likely(!is_removed(next)) + && !is_bucket(next) + && node->reverse_hash == reverse_hash + && caa_likely(match(node, key))) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(CMM_LOAD_SHARED(node->next))); + iter->node = node; + iter->next = next; +} + +void lttng_ust_lfht_next_duplicate(struct lttng_ust_lfht *ht, lttng_ust_lfht_match_fct match, + const void *key, struct lttng_ust_lfht_iter *iter) +{ + struct lttng_ust_lfht_node *node, *next; + unsigned long reverse_hash; + + lttng_ust_lfht_iter_debug_assert(ht == iter->lfht); + node = iter->node; + reverse_hash = node->reverse_hash; + next = iter->next; + node = clear_flag(next); + + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + if (caa_unlikely(node->reverse_hash > reverse_hash)) { + node = next = NULL; + break; + } + next = lttng_ust_rcu_dereference(node->next); + if (caa_likely(!is_removed(next)) + && !is_bucket(next) + && caa_likely(match(node, key))) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(CMM_LOAD_SHARED(node->next))); + iter->node = node; + iter->next = next; +} + +void lttng_ust_lfht_next(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ + struct lttng_ust_lfht_node *node, *next; + + lttng_ust_lfht_iter_debug_assert(ht == iter->lfht); + node = clear_flag(iter->next); + for (;;) { + if (caa_unlikely(is_end(node))) { + node = next = NULL; + break; + } + next = lttng_ust_rcu_dereference(node->next); + if (caa_likely(!is_removed(next)) + && !is_bucket(next)) { + break; + } + node = clear_flag(next); + } + assert(!node || !is_bucket(CMM_LOAD_SHARED(node->next))); + iter->node = node; + iter->next = next; +} + +void lttng_ust_lfht_first(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter) +{ + lttng_ust_lfht_iter_debug_set_ht(ht, iter); + /* + * Get next after first bucket node. The first bucket node is the + * first node of the linked list. + */ + iter->next = bucket_at(ht, 0)->next; + lttng_ust_lfht_next(ht, iter); +} + +void lttng_ust_lfht_add(struct lttng_ust_lfht *ht, unsigned long hash, + struct lttng_ust_lfht_node *node) +{ + unsigned long size; + + node->reverse_hash = bit_reverse_ulong(hash); + size = lttng_ust_rcu_dereference(ht->size); + _lttng_ust_lfht_add(ht, hash, NULL, NULL, size, node, NULL, 0); +} + +struct lttng_ust_lfht_node *lttng_ust_lfht_add_unique(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node) +{ + unsigned long size; + struct lttng_ust_lfht_iter iter; + + node->reverse_hash = bit_reverse_ulong(hash); + size = lttng_ust_rcu_dereference(ht->size); + _lttng_ust_lfht_add(ht, hash, match, key, size, node, &iter, 0); + return iter.node; +} + +struct lttng_ust_lfht_node *lttng_ust_lfht_add_replace(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node) +{ + unsigned long size; + struct lttng_ust_lfht_iter iter; + + node->reverse_hash = bit_reverse_ulong(hash); + size = lttng_ust_rcu_dereference(ht->size); + for (;;) { + _lttng_ust_lfht_add(ht, hash, match, key, size, node, &iter, 0); + if (iter.node == node) { + return NULL; + } + + if (!_lttng_ust_lfht_replace(ht, size, iter.node, iter.next, node)) + return iter.node; + } +} + +int lttng_ust_lfht_replace(struct lttng_ust_lfht *ht, + struct lttng_ust_lfht_iter *old_iter, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *new_node) +{ + unsigned long size; + + new_node->reverse_hash = bit_reverse_ulong(hash); + if (!old_iter->node) + return -ENOENT; + if (caa_unlikely(old_iter->node->reverse_hash != new_node->reverse_hash)) + return -EINVAL; + if (caa_unlikely(!match(old_iter->node, key))) + return -EINVAL; + size = lttng_ust_rcu_dereference(ht->size); + return _lttng_ust_lfht_replace(ht, size, old_iter->node, old_iter->next, + new_node); +} + +int lttng_ust_lfht_del(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_node *node) +{ + unsigned long size; + + size = lttng_ust_rcu_dereference(ht->size); + return _lttng_ust_lfht_del(ht, size, node); +} + +int lttng_ust_lfht_is_node_deleted(const struct lttng_ust_lfht_node *node) +{ + return is_removed(CMM_LOAD_SHARED(node->next)); +} + +static +int lttng_ust_lfht_delete_bucket(struct lttng_ust_lfht *ht) +{ + struct lttng_ust_lfht_node *node; + unsigned long order, i, size; + + /* Check that the table is empty */ + node = bucket_at(ht, 0); + do { + node = clear_flag(node)->next; + if (!is_bucket(node)) + return -EPERM; + assert(!is_removed(node)); + assert(!is_removal_owner(node)); + } while (!is_end(node)); + /* + * size accessed without lttng_ust_rcu_dereference because hash table is + * being destroyed. + */ + size = ht->size; + /* Internal sanity check: all nodes left should be buckets */ + for (i = 0; i < size; i++) { + node = bucket_at(ht, i); + dbg_printf("delete bucket: index %lu expected hash %lu hash %lu\n", + i, i, bit_reverse_ulong(node->reverse_hash)); + assert(is_bucket(node->next)); + } + + for (order = lttng_ust_lfht_get_count_order_ulong(size); (long)order >= 0; order--) + lttng_ust_lfht_free_bucket_table(ht, order); + + return 0; +} + +/* + * Should only be called when no more concurrent readers nor writers can + * possibly access the table. + */ +int lttng_ust_lfht_destroy(struct lttng_ust_lfht *ht) +{ + int ret; + + ret = lttng_ust_lfht_delete_bucket(ht); + if (ret) + return ret; + ret = pthread_mutex_destroy(&ht->resize_mutex); + if (ret) + ret = -EBUSY; + poison_free(ht); + return ret; +} diff --git a/liblttng-ust/rculfhash.h b/liblttng-ust/rculfhash.h new file mode 100644 index 00000000..58f5a5da --- /dev/null +++ b/liblttng-ust/rculfhash.h @@ -0,0 +1,467 @@ +#ifndef _LTTNG_UST_RCULFHASH_H +#define _LTTNG_UST_RCULFHASH_H + +/* + * urcu/rculfhash.h + * + * Userspace RCU library - Lock-Free RCU Hash Table + * + * Copyright 2011 - Mathieu Desnoyers + * Copyright 2011 - Lai Jiangshan + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct lttng_ust_lfht; + +/* + * lttng_ust_lfht_node: Contains the next pointers and reverse-hash + * value required for lookup and traversal of the hash table. + * + * struct lttng_ust_lfht_node should be aligned on 8-bytes boundaries because + * the three lower bits are used as flags. It is worth noting that the + * information contained within these three bits could be represented on + * two bits by re-using the same bit for REMOVAL_OWNER_FLAG and + * BUCKET_FLAG. This can be done if we ensure that no iterator nor + * updater check the BUCKET_FLAG after it detects that the REMOVED_FLAG + * is set. Given the minimum size of struct lttng_ust_lfht_node is 8 bytes on + * 32-bit architectures, we choose to go for simplicity and reserve + * three bits. + * + * struct lttng_ust_lfht_node can be embedded into a structure (as a field). + * caa_container_of() can be used to get the structure from the struct + * lttng_ust_lfht_node after a lookup. + * + * The structure which embeds it typically holds the key (or key-value + * pair) of the object. The caller code is responsible for calculation + * of the hash value for lttng_ust_lfht APIs. + */ +struct lttng_ust_lfht_node { + struct lttng_ust_lfht_node *next; /* ptr | REMOVAL_OWNER_FLAG | BUCKET_FLAG | REMOVED_FLAG */ + unsigned long reverse_hash; +} __attribute__((aligned(8))); + +/* lttng_ust_lfht_iter: Used to track state while traversing a hash chain. */ +struct lttng_ust_lfht_iter { + struct lttng_ust_lfht_node *node, *next; +}; + +static inline +struct lttng_ust_lfht_node *lttng_ust_lfht_iter_get_node(struct lttng_ust_lfht_iter *iter) +{ + return iter->node; +} + +struct rcu_flavor_struct; + +/* + * Caution ! + * Ensure reader and writer threads are registered as urcu readers. + */ + +typedef int (*lttng_ust_lfht_match_fct)(struct lttng_ust_lfht_node *node, const void *key); + +/* + * lttng_ust_lfht_node_init - initialize a hash table node + * @node: the node to initialize. + * + * This function is kept to be eventually used for debugging purposes + * (detection of memory corruption). + */ +static inline +void lttng_ust_lfht_node_init(struct lttng_ust_lfht_node *node) +{ +} + +/* + * Hash table creation flags. + */ +enum { + LTTNG_UST_LFHT_AUTO_RESIZE = (1U << 0), + LTTNG_UST_LFHT_ACCOUNTING = (1U << 1), +}; + +struct lttng_ust_lfht_mm_type { + struct lttng_ust_lfht *(*alloc_lttng_ust_lfht)(unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets); + void (*alloc_bucket_table)(struct lttng_ust_lfht *ht, unsigned long order); + void (*free_bucket_table)(struct lttng_ust_lfht *ht, unsigned long order); + struct lttng_ust_lfht_node *(*bucket_at)(struct lttng_ust_lfht *ht, + unsigned long index); +}; + +extern const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_order; +extern const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_chunk; +extern const struct lttng_ust_lfht_mm_type lttng_ust_lfht_mm_mmap; + +/* + * lttng_ust_lfht_new - allocate a hash table. + * @init_size: number of buckets to allocate initially. Must be power of two. + * @min_nr_alloc_buckets: the minimum number of allocated buckets. + * (must be power of two) + * @max_nr_buckets: the maximum number of hash table buckets allowed. + * (must be power of two, 0 is accepted, means + * "infinite") + * @flags: hash table creation flags (can be combined with bitwise or: '|'). + * 0: no flags. + * LTTNG_UST_LFHT_AUTO_RESIZE: automatically resize hash table. + * LTTNG_UST_LFHT_ACCOUNTING: count the number of node addition + * and removal in the table + * + * Return NULL on error. + * Note: the RCU flavor must be already included before the hash table header. + */ +extern +struct lttng_ust_lfht *lttng_ust_lfht_new(unsigned long init_size, + unsigned long min_nr_alloc_buckets, + unsigned long max_nr_buckets, + int flags, + const struct lttng_ust_lfht_mm_type *mm); + +/* + * lttng_ust_lfht_destroy - destroy a hash table. + * @ht: the hash table to destroy. + * + * Return 0 on success, negative error value on error. + + * Prior to liburcu 0.10: + * - Threads calling this API need to be registered RCU read-side + * threads. + * - lttng_ust_lfht_destroy should *not* be called from a RCU read-side + * critical section. It should *not* be called from a call_rcu thread + * context neither. + * + * Starting from liburcu 0.10, rculfhash implements its own worker + * thread to handle resize operations, which removes RCU requirements on + * lttng_ust_lfht_destroy. + */ +extern +int lttng_ust_lfht_destroy(struct lttng_ust_lfht *ht); + +/* + * lttng_ust_lfht_count_nodes - count the number of nodes in the hash table. + * @ht: the hash table. + * @split_count_before: sample the node count split-counter before traversal. + * @count: traverse the hash table, count the number of nodes observed. + * @split_count_after: sample the node count split-counter after traversal. + * + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + */ +extern +void lttng_ust_lfht_count_nodes(struct lttng_ust_lfht *ht, + long *split_count_before, + unsigned long *count, + long *split_count_after); + +/* + * lttng_ust_lfht_lookup - lookup a node by key. + * @ht: the hash table. + * @hash: the key hash. + * @match: the key match function. + * @key: the current node key. + * @iter: node, if found (output). *iter->node set to NULL if not found. + * + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_lookup(struct lttng_ust_lfht *ht, unsigned long hash, + lttng_ust_lfht_match_fct match, const void *key, + struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_next_duplicate - get the next item with same key, after iterator. + * @ht: the hash table. + * @match: the key match function. + * @key: the current node key. + * @iter: input: current iterator. + * output: node, if found. *iter->node set to NULL if not found. + * + * Uses an iterator initialized by a lookup or traversal. Important: the + * iterator _needs_ to be initialized before calling + * lttng_ust_lfht_next_duplicate. + * Sets *iter-node to the following node with same key. + * Sets *iter->node to NULL if no following node exists with same key. + * RCU read-side lock must be held across lttng_ust_lfht_lookup and + * lttng_ust_lfht_next calls, and also between lttng_ust_lfht_next calls using the + * node returned by a previous lttng_ust_lfht_next. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_next_duplicate(struct lttng_ust_lfht *ht, + lttng_ust_lfht_match_fct match, const void *key, + struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_first - get the first node in the table. + * @ht: the hash table. + * @iter: First node, if exists (output). *iter->node set to NULL if not found. + * + * Output in "*iter". *iter->node set to NULL if table is empty. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_first(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_next - get the next node in the table. + * @ht: the hash table. + * @iter: input: current iterator. + * output: next node, if exists. *iter->node set to NULL if not found. + * + * Input/Output in "*iter". *iter->node set to NULL if *iter was + * pointing to the last table node. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function acts as a rcu_dereference() to read the node pointer. + */ +extern +void lttng_ust_lfht_next(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_iter *iter); + +/* + * lttng_ust_lfht_add - add a node to the hash table. + * @ht: the hash table. + * @hash: the key hash. + * @node: the node to add. + * + * This function supports adding redundant keys into the table. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function issues a full memory barrier before and after its + * atomic commit. + */ +extern +void lttng_ust_lfht_add(struct lttng_ust_lfht *ht, unsigned long hash, + struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_add_unique - add a node to hash table, if key is not present. + * @ht: the hash table. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @node: the node to try adding. + * + * Return the node added upon success. + * Return the unique node already present upon failure. If + * lttng_ust_lfht_add_unique fails, the node passed as parameter should be + * freed by the caller. In this case, the caller does NOT need to wait + * for a grace period before freeing or re-using the node. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * + * The semantic of this function is that if only this function is used + * to add keys into the table, no duplicated keys should ever be + * observable in the table. The same guarantee apply for combination of + * add_unique and add_replace (see below). + * + * Upon success, this function issues a full memory barrier before and + * after its atomic commit. Upon failure, this function acts like a + * simple lookup operation: it acts as a rcu_dereference() to read the + * node pointer. The failure case does not guarantee any other memory + * barrier. + */ +extern +struct lttng_ust_lfht_node *lttng_ust_lfht_add_unique(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_add_replace - replace or add a node within hash table. + * @ht: the hash table. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @node: the node to add. + * + * Return the node replaced upon success. If no node matching the key + * was present, return NULL, which also means the operation succeeded. + * This replacement operation should never fail. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful replacement, a grace period must be waited for before + * freeing or re-using the memory reserved for the returned node. + * + * The semantic of replacement vs lookups and traversals is the + * following: if lookups and traversals are performed between a key + * unique insertion and its removal, we guarantee that the lookups and + * traversals will always find exactly one instance of the key if it is + * replaced concurrently with the lookups. + * + * Providing this semantic allows us to ensure that replacement-only + * schemes will never generate duplicated keys. It also allows us to + * guarantee that a combination of add_replace and add_unique updates + * will never generate duplicated keys. + * + * This function issues a full memory barrier before and after its + * atomic commit. + */ +extern +struct lttng_ust_lfht_node *lttng_ust_lfht_add_replace(struct lttng_ust_lfht *ht, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_replace - replace a node pointed to by iter within hash table. + * @ht: the hash table. + * @old_iter: the iterator position of the node to replace. + * @hash: the node's hash. + * @match: the key match function. + * @key: the node's key. + * @new_node: the new node to use as replacement. + * + * Return 0 if replacement is successful, negative value otherwise. + * Replacing a NULL old node or an already removed node will fail with + * -ENOENT. + * If the hash or value of the node to replace and the new node differ, + * this function returns -EINVAL without proceeding to the replacement. + * Old node can be looked up with lttng_ust_lfht_lookup and lttng_ust_lfht_next. + * RCU read-side lock must be held between lookup and replacement. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful replacement, a grace period must be waited for before + * freeing or re-using the memory reserved for the old node (which can + * be accessed with lttng_ust_lfht_iter_get_node). + * + * The semantic of replacement vs lookups is the same as + * lttng_ust_lfht_add_replace(). + * + * Upon success, this function issues a full memory barrier before and + * after its atomic commit. Upon failure, this function does not issue + * any memory barrier. + */ +extern +int lttng_ust_lfht_replace(struct lttng_ust_lfht *ht, + struct lttng_ust_lfht_iter *old_iter, + unsigned long hash, + lttng_ust_lfht_match_fct match, + const void *key, + struct lttng_ust_lfht_node *new_node); + +/* + * lttng_ust_lfht_del - remove node pointed to by iterator from hash table. + * @ht: the hash table. + * @node: the node to delete. + * + * Return 0 if the node is successfully removed, negative value + * otherwise. + * Deleting a NULL node or an already removed node will fail with a + * negative value. + * Node can be looked up with lttng_ust_lfht_lookup and lttng_ust_lfht_next, + * followed by use of lttng_ust_lfht_iter_get_node. + * RCU read-side lock must be held between lookup and removal. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * After successful removal, a grace period must be waited for before + * freeing or re-using the memory reserved for old node (which can be + * accessed with lttng_ust_lfht_iter_get_node). + * Upon success, this function issues a full memory barrier before and + * after its atomic commit. Upon failure, this function does not issue + * any memory barrier. + */ +extern +int lttng_ust_lfht_del(struct lttng_ust_lfht *ht, struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_is_node_deleted - query whether a node is removed from hash table. + * + * Return non-zero if the node is deleted from the hash table, 0 + * otherwise. + * Node can be looked up with lttng_ust_lfht_lookup and lttng_ust_lfht_next, + * followed by use of lttng_ust_lfht_iter_get_node. + * RCU read-side lock must be held between lookup and call to this + * function. + * Call with rcu_read_lock held. + * Threads calling this API need to be registered RCU read-side threads. + * This function does not issue any memory barrier. + */ +extern +int lttng_ust_lfht_is_node_deleted(const struct lttng_ust_lfht_node *node); + +/* + * lttng_ust_lfht_resize - Force a hash table resize + * @ht: the hash table. + * @new_size: update to this hash table size. + * + * Threads calling this API need to be registered RCU read-side threads. + * This function does not (necessarily) issue memory barriers. + * lttng_ust_lfht_resize should *not* be called from a RCU read-side critical + * section. + */ +extern +void lttng_ust_lfht_resize(struct lttng_ust_lfht *ht, unsigned long new_size); + +/* + * Note: it is safe to perform element removal (del), replacement, or + * any hash table update operation during any of the following hash + * table traversals. + * These functions act as rcu_dereference() to read the node pointers. + */ +#define lttng_ust_lfht_for_each(ht, iter, node) \ + for (lttng_ust_lfht_first(ht, iter), \ + node = lttng_ust_lfht_iter_get_node(iter); \ + node != NULL; \ + lttng_ust_lfht_next(ht, iter), \ + node = lttng_ust_lfht_iter_get_node(iter)) + +#define lttng_ust_lfht_for_each_duplicate(ht, hash, match, key, iter, node) \ + for (lttng_ust_lfht_lookup(ht, hash, match, key, iter), \ + node = lttng_ust_lfht_iter_get_node(iter); \ + node != NULL; \ + lttng_ust_lfht_next_duplicate(ht, match, key, iter), \ + node = lttng_ust_lfht_iter_get_node(iter)) + +#define lttng_ust_lfht_for_each_entry(ht, iter, pos, member) \ + for (lttng_ust_lfht_first(ht, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member); \ + lttng_ust_lfht_iter_get_node(iter) != NULL; \ + lttng_ust_lfht_next(ht, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member)) + +#define lttng_ust_lfht_for_each_entry_duplicate(ht, hash, match, key, \ + iter, pos, member) \ + for (lttng_ust_lfht_lookup(ht, hash, match, key, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member); \ + lttng_ust_lfht_iter_get_node(iter) != NULL; \ + lttng_ust_lfht_next_duplicate(ht, match, key, iter), \ + pos = caa_container_of(lttng_ust_lfht_iter_get_node(iter), \ + __typeof__(*(pos)), member)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LTTNG_UST_RCULFHASH_H */ diff --git a/liblttng-ust/tracepoint-internal.h b/liblttng-ust/tracepoint-internal.h index 1e6f92b6..964b1f0e 100644 --- a/liblttng-ust/tracepoint-internal.h +++ b/liblttng-ust/tracepoint-internal.h @@ -20,8 +20,8 @@ */ #include -#include #include +#include #define TRACE_DEFAULT TRACE_DEBUG_LINE @@ -44,13 +44,15 @@ extern int __tracepoint_probe_unregister_queue_release(const char *name, void (*func)(void), void *data); extern void __tracepoint_probe_prune_release_queue(void); +void lttng_ust_synchronize_trace(void); + /* * call after disconnection of last probe implemented within a * shared object before unmapping the library that contains the probe. */ static inline void tracepoint_synchronize_unregister(void) { - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); } extern void init_tracepoint(void); diff --git a/liblttng-ust/tracepoint.c b/liblttng-ust/tracepoint.c index b109fe30..cf7cf63c 100644 --- a/liblttng-ust/tracepoint.c +++ b/liblttng-ust/tracepoint.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include @@ -151,6 +151,72 @@ struct callsite_entry { bool tp_entry_callsite_ref; /* Has a tp_entry took a ref on this callsite */ }; +static int tracepoint_v1_api_used; +static void (*lttng_ust_liburcu_bp_synchronize_rcu)(void); +static void (*lttng_ust_liburcu_bp_rcu_read_lock)(void); +static void (*lttng_ust_liburcu_bp_rcu_read_unlock)(void); +void (*lttng_ust_liburcu_bp_before_fork)(void); +void (*lttng_ust_liburcu_bp_after_fork_parent)(void); +void (*lttng_ust_liburcu_bp_after_fork_child)(void); + +static bool lttng_ust_tracepoint_v1_used(void) +{ + return uatomic_read(&tracepoint_v1_api_used); +} + +static void lttng_ust_tracepoint_set_v1_used(void) +{ + if (!lttng_ust_tracepoint_v1_used()) { + /* + * Perform dlsym here rather than lazily on first use to + * eliminate nesting of dynamic loader lock (used within + * dlsym) inside the ust lock. + */ + if (!lttng_ust_liburcu_bp_synchronize_rcu) { + lttng_ust_liburcu_bp_synchronize_rcu = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "synchronize_rcu_bp")); + if (!lttng_ust_liburcu_bp_synchronize_rcu) + abort(); + } + if (!lttng_ust_liburcu_bp_before_fork) { + lttng_ust_liburcu_bp_before_fork = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_bp_before_fork")); + if (!lttng_ust_liburcu_bp_before_fork) + abort(); + } + if (!lttng_ust_liburcu_bp_after_fork_parent) { + lttng_ust_liburcu_bp_after_fork_parent = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_bp_after_fork_parent")); + if (!lttng_ust_liburcu_bp_after_fork_parent) + abort(); + } + if (!lttng_ust_liburcu_bp_after_fork_child) { + lttng_ust_liburcu_bp_after_fork_child = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_bp_after_fork_child")); + if (!lttng_ust_liburcu_bp_after_fork_child) + abort(); + } + if (!lttng_ust_liburcu_bp_rcu_read_lock) { + lttng_ust_liburcu_bp_rcu_read_lock = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_read_lock_bp")); + if (!lttng_ust_liburcu_bp_rcu_read_lock) + abort(); + } + if (!lttng_ust_liburcu_bp_rcu_read_unlock) { + lttng_ust_liburcu_bp_rcu_read_unlock = URCU_FORCE_CAST(void (*)(void), + dlsym(RTLD_DEFAULT, "rcu_read_unlock_bp")); + if (!lttng_ust_liburcu_bp_rcu_read_unlock) + abort(); + } + + /* Fixup URCU bp TLS. */ + lttng_ust_liburcu_bp_rcu_read_lock(); + lttng_ust_liburcu_bp_rcu_read_unlock(); + + uatomic_set(&tracepoint_v1_api_used, 1); + } +} + /* coverity[+alloc] */ static void *allocate_probes(int count) { @@ -166,7 +232,7 @@ static void release_probes(void *old) if (old) { struct tp_probes *tp_probes = caa_container_of(old, struct tp_probes, probes[0]); - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); free(tp_probes); } } @@ -386,7 +452,7 @@ static void set_tracepoint(struct tracepoint_entry **entry, * include/linux/tracepoints.h. A matching cmm_smp_read_barrier_depends() * is used. */ - rcu_assign_pointer(elem->probes, (*entry)->probes); + lttng_ust_rcu_assign_pointer(elem->probes, (*entry)->probes); CMM_STORE_SHARED(elem->state, active); } @@ -399,7 +465,7 @@ static void set_tracepoint(struct tracepoint_entry **entry, static void disable_tracepoint(struct lttng_ust_tracepoint *elem) { CMM_STORE_SHARED(elem->state, 0); - rcu_assign_pointer(elem->probes, NULL); + lttng_ust_rcu_assign_pointer(elem->probes, NULL); } /* @@ -750,7 +816,7 @@ void __tracepoint_probe_prune_release_queue(void) release_queue_need_update = 0; /* Wait for grace period between all sync_callsites and free. */ - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); cds_list_for_each_entry_safe(pos, next, &release_probes, u.list) { cds_list_del(&pos->u.list); @@ -841,7 +907,7 @@ void tracepoint_probe_update_all(void) tracepoint_update_probes(); /* Wait for grace period between update_probes and free. */ - urcu_bp_synchronize_rcu(); + lttng_ust_synchronize_trace(); cds_list_for_each_entry_safe(pos, next, &release_probes, u.list) { cds_list_del(&pos->u.list); free(pos); @@ -868,8 +934,20 @@ static void new_tracepoints(struct lttng_ust_tracepoint * const *start, } } -int tracepoint_register_lib(struct lttng_ust_tracepoint * const *tracepoints_start, - int tracepoints_count) +/* + * tracepoint_{un,}register_lib is meant to be looked up by instrumented + * applications through dlsym(). If found, those can register their + * tracepoints, else those tracepoints will not be available for + * tracing. The number at the end of those symbols acts as a major + * version for tracepoints. + * + * Older instrumented applications should still work with newer + * liblttng-ust, but it is fine that instrumented applications compiled + * against recent liblttng-ust headers require a recent liblttng-ust + * runtime for those tracepoints to be taken into account. + */ +int tracepoint_register_lib2(struct lttng_ust_tracepoint * const *tracepoints_start, + int tracepoints_count) { struct tracepoint_lib *pl, *iter; @@ -917,7 +995,15 @@ lib_added: return 0; } -int tracepoint_unregister_lib(struct lttng_ust_tracepoint * const *tracepoints_start) +/* Exposed for backward compatibility with old instrumented applications. */ +int tracepoint_register_lib(struct lttng_ust_tracepoint * const *tracepoints_start, + int tracepoints_count) +{ + lttng_ust_tracepoint_set_v1_used(); + return tracepoint_register_lib2(tracepoints_start, tracepoints_count); +} + +int tracepoint_unregister_lib2(struct lttng_ust_tracepoint * const *tracepoints_start) { struct tracepoint_lib *lib; @@ -943,6 +1029,13 @@ int tracepoint_unregister_lib(struct lttng_ust_tracepoint * const *tracepoints_s return 0; } +/* Exposed for backward compatibility with old instrumented applications. */ +int tracepoint_unregister_lib(struct lttng_ust_tracepoint * const *tracepoints_start) +{ + lttng_ust_tracepoint_set_v1_used(); + return tracepoint_unregister_lib2(tracepoints_start); +} + /* * Report in debug message whether the compiler correctly supports weak * hidden symbols. This test checks that the address associated with two @@ -981,23 +1074,23 @@ void exit_tracepoint(void) /* * Create the wrapper symbols. */ -#undef tp_rcu_read_lock_bp -#undef tp_rcu_read_unlock_bp -#undef tp_rcu_dereference_bp +#undef tp_rcu_read_lock +#undef tp_rcu_read_unlock +#undef tp_rcu_dereference -void tp_rcu_read_lock_bp(void) +void tp_rcu_read_lock(void) { - urcu_bp_read_lock(); + lttng_ust_urcu_read_lock(); } -void tp_rcu_read_unlock_bp(void) +void tp_rcu_read_unlock(void) { - urcu_bp_read_unlock(); + lttng_ust_urcu_read_unlock(); } -void *tp_rcu_dereference_sym_bp(void *p) +void *tp_rcu_dereference_sym(void *p) { - return urcu_bp_dereference(p); + return lttng_ust_rcu_dereference(p); } /* @@ -1024,3 +1117,32 @@ int tp_get_destructors_state(void) { return uatomic_read(&tracepoint_destructors_state); } + +void lttng_ust_synchronize_trace(void) +{ + lttng_ust_urcu_synchronize_rcu(); + /* + * For legacy tracepoint instrumentation, also wait for urcu-bp + * grace period. + */ + if (lttng_ust_liburcu_bp_synchronize_rcu) + lttng_ust_liburcu_bp_synchronize_rcu(); +} + +/* + * Create the wrapper symbols for legacy v1 API. + */ +void tp_rcu_read_lock_bp(void) +{ + lttng_ust_urcu_read_lock(); +} + +void tp_rcu_read_unlock_bp(void) +{ + lttng_ust_urcu_read_unlock(); +} + +void *tp_rcu_dereference_sym_bp(void *p) +{ + return lttng_ust_rcu_dereference(p); +} diff --git a/libringbuffer/frontend_api.h b/libringbuffer/frontend_api.h index 8a320061..c112c846 100644 --- a/libringbuffer/frontend_api.h +++ b/libringbuffer/frontend_api.h @@ -32,7 +32,6 @@ #include -#include #include #include "frontend.h" diff --git a/lttng-ust.pc.in b/lttng-ust.pc.in index f9adde87..45d1a8d4 100644 --- a/lttng-ust.pc.in +++ b/lttng-ust.pc.in @@ -7,7 +7,6 @@ Name: LTTng Userspace Tracer Description: The LTTng Userspace Tracer (UST) is a library accompanied by a set of tools to trace userspace code. Version: @PACKAGE_VERSION@ Requires: -Requires.private: liburcu-bp Libs: -L${libdir} -llttng-ust -ldl Cflags: -I${includedir} -- 2.34.1 From 3f38c6a3da61d0d66bb57475645103b8b4c6be4e Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Tue, 1 Dec 2020 11:02:41 -0500 Subject: [PATCH 12/16] fix: undefined symbols for tracepoints in lgpl source Move the new internal 'urcu-ust' urcu flavor symbols to a separate library named 'liblttng-ust-common.so' and link 'liblttng-ust.so' and 'liblttng-ust-tracepoint.so' on it. Add this common library to the pkgconfig file which should handle the transition for client code using it in its build system. LGPL_SOURCE client code which doesn't use pkgconfig in its build system will need to link with '-llttng-ust-common' instead of '-lurcu-bp' like prior ust versions. Change-Id: I807f997a2a94cb4f8d1bdf72002c1ae1b9fedf8d Signed-off-by: Michael Jeanson Signed-off-by: Mathieu Desnoyers --- liblttng-ust/Makefile.am | 14 ++++++++++---- lttng-ust.pc.in | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/liblttng-ust/Makefile.am b/liblttng-ust/Makefile.am index 2b25cfc5..ca81ba6d 100644 --- a/liblttng-ust/Makefile.am +++ b/liblttng-ust/Makefile.am @@ -2,7 +2,13 @@ AM_CFLAGS += -I$(srcdir) -fno-strict-aliasing noinst_LTLIBRARIES = liblttng-ust-runtime.la liblttng-ust-support.la -lib_LTLIBRARIES = liblttng-ust-tracepoint.la liblttng-ust.la +lib_LTLIBRARIES = liblttng-ust-common.la liblttng-ust-tracepoint.la liblttng-ust.la + +liblttng_ust_common_la_SOURCES = \ + lttng-ust-urcu.c \ + lttng-ust-urcu-pointer.c + +liblttng_ust_common_la_LDFLAGS = -no-undefined -version-info $(LTTNG_UST_LIBRARY_VERSION) liblttng_ust_tracepoint_la_SOURCES = \ tracepoint.c \ @@ -10,11 +16,10 @@ liblttng_ust_tracepoint_la_SOURCES = \ tracepoint-internal.h \ lttng-tracer-core.h \ jhash.h \ - error.h \ - lttng-ust-urcu.c \ - lttng-ust-urcu-pointer.c + error.h liblttng_ust_tracepoint_la_LIBADD = \ + liblttng-ust-common.la \ $(top_builddir)/snprintf/libustsnprintf.la \ $(DL_LIBS) @@ -125,6 +130,7 @@ liblttng_ust_support_la_LIBADD = \ liblttng_ust_la_LIBADD = \ -lrt \ + liblttng-ust-common.la \ $(top_builddir)/snprintf/libustsnprintf.la \ $(top_builddir)/liblttng-ust-comm/liblttng-ust-comm.la \ liblttng-ust-tracepoint.la \ diff --git a/lttng-ust.pc.in b/lttng-ust.pc.in index 45d1a8d4..6aee46ef 100644 --- a/lttng-ust.pc.in +++ b/lttng-ust.pc.in @@ -7,6 +7,6 @@ Name: LTTng Userspace Tracer Description: The LTTng Userspace Tracer (UST) is a library accompanied by a set of tools to trace userspace code. Version: @PACKAGE_VERSION@ Requires: -Libs: -L${libdir} -llttng-ust -ldl +Libs: -L${libdir} -llttng-ust -llttng-ust-common -ldl Cflags: -I${includedir} -- 2.34.1 From 9560f5ebf55ec70baefe6da3e3d644fc7d497b63 Mon Sep 17 00:00:00 2001 From: Francis Deslauriers Date: Fri, 4 Dec 2020 17:39:37 -0500 Subject: [PATCH 13/16] Fix: event-notifier: not propagating error counter indexes Signed-off-by: Francis Deslauriers Signed-off-by: Mathieu Desnoyers Change-Id: Ie2936cd59e81410891c0fa87602d1b24132d1e35 --- liblttng-ust-ctl/ustctl.c | 1 + liblttng-ust/lttng-events.c | 1 + 2 files changed, 2 insertions(+) diff --git a/liblttng-ust-ctl/ustctl.c b/liblttng-ust-ctl/ustctl.c index 650e7290..e7978744 100644 --- a/liblttng-ust-ctl/ustctl.c +++ b/liblttng-ust-ctl/ustctl.c @@ -583,6 +583,7 @@ int ustctl_create_event_notifier(int sock, struct lttng_ust_event_notifier *even lum.u.event_notifier.event.loglevel_type = event_notifier->event.loglevel_type; lum.u.event_notifier.event.loglevel = event_notifier->event.loglevel; lum.u.event_notifier.event.token = event_notifier->event.token; + lum.u.event_notifier.error_counter_index = event_notifier->error_counter_index; ret = ustcomm_send_app_cmd(sock, &lum, &lur); if (ret) { free(event_notifier_data); diff --git a/liblttng-ust/lttng-events.c b/liblttng-ust/lttng-events.c index 4fa675dd..c80ecbfd 100644 --- a/liblttng-ust/lttng-events.c +++ b/liblttng-ust/lttng-events.c @@ -1413,6 +1413,7 @@ struct lttng_event_notifier_enabler *lttng_event_notifier_enabler_create( CDS_INIT_LIST_HEAD(&event_notifier_enabler->base.excluder_head); event_notifier_enabler->user_token = event_notifier_param->event.token; + event_notifier_enabler->error_counter_index = event_notifier_param->error_counter_index; event_notifier_enabler->num_captures = 0; memcpy(&event_notifier_enabler->base.event_param.name, -- 2.34.1 From f03092d7780681b375484769c2f0e20ed0f0d2a2 Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Mon, 30 Nov 2020 16:21:08 -0500 Subject: [PATCH 14/16] port: use ust-endian.h compat Signed-off-by: Michael Jeanson Signed-off-by: Mathieu Desnoyers Change-Id: Id76539961e28a90bb7083a7a83eafd6d966cfb39 --- liblttng-ust/event-notifier-notification.c | 2 +- liblttng-ust/lttng-bytecode-interpreter.c | 1 - libmsgpack/msgpack.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/liblttng-ust/event-notifier-notification.c b/liblttng-ust/event-notifier-notification.c index f1fb8adc..ae997eb7 100644 --- a/liblttng-ust/event-notifier-notification.c +++ b/liblttng-ust/event-notifier-notification.c @@ -21,9 +21,9 @@ #define _LGPL_SOURCE #include -#include #include #include +#include #include #include "../libmsgpack/msgpack.h" diff --git a/liblttng-ust/lttng-bytecode-interpreter.c b/liblttng-ust/lttng-bytecode-interpreter.c index 637fdc1a..6072aee9 100644 --- a/liblttng-ust/lttng-bytecode-interpreter.c +++ b/liblttng-ust/lttng-bytecode-interpreter.c @@ -27,7 +27,6 @@ #define _LGPL_SOURCE #include #include -#include #include #include diff --git a/libmsgpack/msgpack.c b/libmsgpack/msgpack.c index f3eca64a..564639cd 100644 --- a/libmsgpack/msgpack.c +++ b/libmsgpack/msgpack.c @@ -76,7 +76,7 @@ #else /* __KERNEL__ */ -#include +#include #include #include -- 2.34.1 From a3bdb11cc1ba9817d92f3a3b409ab1baf3ba058e Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Mon, 30 Nov 2020 16:24:25 -0500 Subject: [PATCH 15/16] port: fix futex_async wrapper namespacing on FreeBSD Signed-off-by: Michael Jeanson Signed-off-by: Mathieu Desnoyers Change-Id: I5db3fe8a596d273e6a693f3e89e26719c552f027 --- liblttng-ust/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/liblttng-ust/futex.h b/liblttng-ust/futex.h index 902ceecf..cbee19c2 100644 --- a/liblttng-ust/futex.h +++ b/liblttng-ust/futex.h @@ -144,7 +144,7 @@ static inline int lttng_ust_futex_async(int32_t *uaddr, int op, int32_t val, static inline int lttng_ust_futex_noasync(int32_t *uaddr, int op, int32_t val, const struct timespec *timeout, int32_t *uaddr2, int32_t val3) { - return futex_async(uaddr, op, val, timeout, uaddr2, val3); + return lttng_ust_futex_async(uaddr, op, val, timeout, uaddr2, val3); } #elif defined(__CYGWIN__) -- 2.34.1 From 809b929460a19e790b0d4281753b3dd3fdd073a3 Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Fri, 20 Nov 2020 16:05:25 -0500 Subject: [PATCH 16/16] cleanup: use Signed-off-by: Michael Jeanson Signed-off-by: Mathieu Desnoyers Change-Id: I82f783fa131aa6337bc1cd7111bcd5ac85ec4d02 --- include/lttng/ust-tid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/lttng/ust-tid.h b/include/lttng/ust-tid.h index e637718a..f92fb419 100644 --- a/include/lttng/ust-tid.h +++ b/include/lttng/ust-tid.h @@ -28,7 +28,7 @@ */ #ifdef __linux__ -#include +#include #endif #if defined(__NR_gettid) -- 2.34.1