doc: Add flags for JUL and python agent compiling
[deliverable/lttng-ust.git] / liblttng-ust / lttng-context-perf-counters.c
1 /*
2 * lttng-context-perf-counters.c
3 *
4 * LTTng UST performance monitoring counters (perf-counters) integration.
5 *
6 * Copyright (C) 2009-2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; only
11 * version 2.1 of the License.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <stdbool.h>
29 #include <sys/mman.h>
30 #include <sys/syscall.h>
31 #include <linux/perf_event.h>
32 #include <lttng/ust-events.h>
33 #include <lttng/ust-tracer.h>
34 #include <lttng/ringbuffer-config.h>
35 #include <urcu/system.h>
36 #include <urcu/arch.h>
37 #include <urcu/rculist.h>
38 #include <helper.h>
39 #include <urcu/ref.h>
40 #include <usterr-signal-safe.h>
41 #include <signal.h>
42 #include "lttng-tracer-core.h"
43
44 /*
45 * We use a global perf counter key and iterate on per-thread RCU lists
46 * of fields in the fast path, even though this is not strictly speaking
47 * what would provide the best fast-path complexity, to ensure teardown
48 * of sessions vs thread exit is handled racelessly.
49 *
50 * Updates and traversals of thread_list are protected by UST lock.
51 * Updates to rcu_field_list are protected by UST lock.
52 */
53
54 struct lttng_perf_counter_thread_field {
55 struct lttng_perf_counter_field *field; /* Back reference */
56 struct perf_event_mmap_page *pc;
57 struct cds_list_head thread_field_node; /* Per-field list of thread fields (node) */
58 struct cds_list_head rcu_field_node; /* RCU per-thread list of fields (node) */
59 int fd; /* Perf FD */
60 };
61
62 struct lttng_perf_counter_thread {
63 struct cds_list_head rcu_field_list; /* RCU per-thread list of fields */
64 };
65
66 struct lttng_perf_counter_field {
67 struct perf_event_attr attr;
68 struct cds_list_head thread_field_list; /* Per-field list of thread fields */
69 };
70
71 static pthread_key_t perf_counter_key;
72
73 static
74 size_t perf_counter_get_size(struct lttng_ctx_field *field, size_t offset)
75 {
76 size_t size = 0;
77
78 size += lib_ring_buffer_align(offset, lttng_alignof(uint64_t));
79 size += sizeof(uint64_t);
80 return size;
81 }
82
83 #if defined(__x86_64__) || defined(__i386__)
84
85 static
86 uint64_t rdpmc(unsigned int counter)
87 {
88 unsigned int low, high;
89
90 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
91
92 return low | ((uint64_t) high) << 32;
93 }
94
95 static bool arch_perf_use_read(void)
96 {
97 return false;
98 }
99
100 static
101 uint64_t read_perf_counter(
102 struct lttng_perf_counter_thread_field *thread_field)
103 {
104 uint32_t seq, idx;
105 uint64_t count;
106 struct perf_event_mmap_page *pc = thread_field->pc;
107
108 if (caa_unlikely(!pc))
109 return 0;
110
111 do {
112 seq = CMM_LOAD_SHARED(pc->lock);
113 cmm_barrier();
114
115 idx = pc->index;
116 if (idx)
117 count = pc->offset + rdpmc(idx - 1);
118 else
119 count = 0;
120
121 cmm_barrier();
122 } while (CMM_LOAD_SHARED(pc->lock) != seq);
123
124 return count;
125 }
126
127 #elif defined (__ARM_ARCH_7A__)
128
129 static bool arch_perf_use_read(void)
130 {
131 return true;
132 }
133
134 static
135 uint64_t read_perf_counter(
136 struct lttng_perf_counter_thread_field *thread_field)
137 {
138 uint64_t count;
139
140 if (caa_unlikely(thread_field->fd < 0))
141 return 0;
142
143 if (caa_unlikely(read(thread_field->fd, &count, sizeof(count))
144 < sizeof(count)))
145 return 0;
146
147 return count;
148 }
149
150 #else /* defined(__x86_64__) || defined(__i386__) || defined(__ARM_ARCH_7A__) */
151
152 #error "Perf event counters are only supported on x86 and ARMv7 so far."
153
154 #endif /* #else defined(__x86_64__) || defined(__i386__) || defined(__ARM_ARCH_7A__) */
155
156 static
157 int sys_perf_event_open(struct perf_event_attr *attr,
158 pid_t pid, int cpu, int group_fd,
159 unsigned long flags)
160 {
161 return syscall(SYS_perf_event_open, attr, pid, cpu,
162 group_fd, flags);
163 }
164
165 static
166 int open_perf_fd(struct perf_event_attr *attr)
167 {
168 int fd;
169
170 fd = sys_perf_event_open(attr, 0, -1, -1, 0);
171 if (fd < 0)
172 return -1;
173
174 return fd;
175 }
176
177 static
178 void close_perf_fd(int fd)
179 {
180 int ret;
181
182 if (fd < 0)
183 return;
184
185 ret = close(fd);
186 if (ret) {
187 perror("Error closing LTTng-UST perf memory mapping FD");
188 }
189 }
190
191 static
192 struct perf_event_mmap_page *setup_perf(
193 struct lttng_perf_counter_thread_field *thread_field)
194 {
195 void *perf_addr;
196
197 perf_addr = mmap(NULL, sizeof(struct perf_event_mmap_page),
198 PROT_READ, MAP_SHARED, thread_field->fd, 0);
199 if (perf_addr == MAP_FAILED)
200 perf_addr = NULL;
201
202 if (!arch_perf_use_read()) {
203 close_perf_fd(thread_field->fd);
204 thread_field->fd = -1;
205 }
206
207 return perf_addr;
208 }
209
210 static
211 void unmap_perf_page(struct perf_event_mmap_page *pc)
212 {
213 int ret;
214
215 if (!pc)
216 return;
217 ret = munmap(pc, sizeof(struct perf_event_mmap_page));
218 if (ret < 0) {
219 PERROR("Error in munmap");
220 abort();
221 }
222 }
223
224 static
225 struct lttng_perf_counter_thread *alloc_perf_counter_thread(void)
226 {
227 struct lttng_perf_counter_thread *perf_thread;
228 sigset_t newmask, oldmask;
229 int ret;
230
231 ret = sigfillset(&newmask);
232 if (ret)
233 abort();
234 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
235 if (ret)
236 abort();
237 /* Check again with signals disabled */
238 perf_thread = pthread_getspecific(perf_counter_key);
239 if (perf_thread)
240 goto skip;
241 perf_thread = zmalloc(sizeof(*perf_thread));
242 if (!perf_thread)
243 abort();
244 CDS_INIT_LIST_HEAD(&perf_thread->rcu_field_list);
245 ret = pthread_setspecific(perf_counter_key, perf_thread);
246 if (ret)
247 abort();
248 skip:
249 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
250 if (ret)
251 abort();
252 return perf_thread;
253 }
254
255 static
256 struct lttng_perf_counter_thread_field *
257 add_thread_field(struct lttng_perf_counter_field *perf_field,
258 struct lttng_perf_counter_thread *perf_thread)
259 {
260 struct lttng_perf_counter_thread_field *thread_field;
261 sigset_t newmask, oldmask;
262 int ret;
263
264 ret = sigfillset(&newmask);
265 if (ret)
266 abort();
267 ret = pthread_sigmask(SIG_BLOCK, &newmask, &oldmask);
268 if (ret)
269 abort();
270 /* Check again with signals disabled */
271 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
272 rcu_field_node) {
273 if (thread_field->field == perf_field)
274 goto skip;
275 }
276 thread_field = zmalloc(sizeof(*thread_field));
277 if (!thread_field)
278 abort();
279 thread_field->field = perf_field;
280 thread_field->fd = open_perf_fd(&perf_field->attr);
281 if (thread_field->fd >= 0)
282 thread_field->pc = setup_perf(thread_field);
283 /*
284 * Note: thread_field->pc can be NULL if setup_perf() fails.
285 * Also, thread_field->fd can be -1 if open_perf_fd() fails.
286 */
287 ust_lock_nocheck();
288 cds_list_add_rcu(&thread_field->rcu_field_node,
289 &perf_thread->rcu_field_list);
290 cds_list_add(&thread_field->thread_field_node,
291 &perf_field->thread_field_list);
292 ust_unlock();
293 skip:
294 ret = pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
295 if (ret)
296 abort();
297 return thread_field;
298 }
299
300 static
301 struct lttng_perf_counter_thread_field *
302 get_thread_field(struct lttng_perf_counter_field *field)
303 {
304 struct lttng_perf_counter_thread *perf_thread;
305 struct lttng_perf_counter_thread_field *thread_field;
306
307 perf_thread = pthread_getspecific(perf_counter_key);
308 if (!perf_thread)
309 perf_thread = alloc_perf_counter_thread();
310 cds_list_for_each_entry_rcu(thread_field, &perf_thread->rcu_field_list,
311 rcu_field_node) {
312 if (thread_field->field == field)
313 return thread_field;
314 }
315 /* perf_counter_thread_field not found, need to add one */
316 return add_thread_field(field, perf_thread);
317 }
318
319 static
320 uint64_t wrapper_perf_counter_read(struct lttng_ctx_field *field)
321 {
322 struct lttng_perf_counter_field *perf_field;
323 struct lttng_perf_counter_thread_field *perf_thread_field;
324
325 perf_field = field->u.perf_counter;
326 perf_thread_field = get_thread_field(perf_field);
327 return read_perf_counter(perf_thread_field);
328 }
329
330 static
331 void perf_counter_record(struct lttng_ctx_field *field,
332 struct lttng_ust_lib_ring_buffer_ctx *ctx,
333 struct lttng_channel *chan)
334 {
335 uint64_t value;
336
337 value = wrapper_perf_counter_read(field);
338 lib_ring_buffer_align_ctx(ctx, lttng_alignof(value));
339 chan->ops->event_write(ctx, &value, sizeof(value));
340 }
341
342 static
343 void perf_counter_get_value(struct lttng_ctx_field *field,
344 struct lttng_ctx_value *value)
345 {
346 uint64_t v;
347
348 v = wrapper_perf_counter_read(field);
349 value->u.s64 = v;
350 }
351
352 /* Called with UST lock held */
353 static
354 void lttng_destroy_perf_thread_field(
355 struct lttng_perf_counter_thread_field *thread_field)
356 {
357 close_perf_fd(thread_field->fd);
358 unmap_perf_page(thread_field->pc);
359 cds_list_del_rcu(&thread_field->rcu_field_node);
360 cds_list_del(&thread_field->thread_field_node);
361 free(thread_field);
362 }
363
364 static
365 void lttng_destroy_perf_thread_key(void *_key)
366 {
367 struct lttng_perf_counter_thread *perf_thread = _key;
368 struct lttng_perf_counter_thread_field *pos, *p;
369
370 ust_lock_nocheck();
371 cds_list_for_each_entry_safe(pos, p, &perf_thread->rcu_field_list,
372 rcu_field_node)
373 lttng_destroy_perf_thread_field(pos);
374 ust_unlock();
375 free(perf_thread);
376 }
377
378 /* Called with UST lock held */
379 static
380 void lttng_destroy_perf_counter_field(struct lttng_ctx_field *field)
381 {
382 struct lttng_perf_counter_field *perf_field;
383 struct lttng_perf_counter_thread_field *pos, *p;
384
385 free((char *) field->event_field.name);
386 perf_field = field->u.perf_counter;
387 /*
388 * This put is performed when no threads can concurrently
389 * perform a "get" concurrently, thanks to urcu-bp grace
390 * period.
391 */
392 cds_list_for_each_entry_safe(pos, p, &perf_field->thread_field_list,
393 thread_field_node)
394 lttng_destroy_perf_thread_field(pos);
395 free(perf_field);
396 }
397
398 #ifdef __ARM_ARCH_7A__
399
400 static
401 int perf_get_exclude_kernel(void)
402 {
403 return 0;
404 }
405
406 #else /* __ARM_ARCH_7A__ */
407
408 static
409 int perf_get_exclude_kernel(void)
410 {
411 return 1;
412 }
413
414 #endif /* __ARM_ARCH_7A__ */
415
416 /* Called with UST lock held */
417 int lttng_add_perf_counter_to_ctx(uint32_t type,
418 uint64_t config,
419 const char *name,
420 struct lttng_ctx **ctx)
421 {
422 struct lttng_ctx_field *field;
423 struct lttng_perf_counter_field *perf_field;
424 char *name_alloc;
425 int ret;
426
427 name_alloc = strdup(name);
428 if (!name_alloc) {
429 ret = -ENOMEM;
430 goto name_alloc_error;
431 }
432 perf_field = zmalloc(sizeof(*perf_field));
433 if (!perf_field) {
434 ret = -ENOMEM;
435 goto perf_field_alloc_error;
436 }
437 field = lttng_append_context(ctx);
438 if (!field) {
439 ret = -ENOMEM;
440 goto append_context_error;
441 }
442 if (lttng_find_context(*ctx, name_alloc)) {
443 ret = -EEXIST;
444 goto find_error;
445 }
446
447 field->destroy = lttng_destroy_perf_counter_field;
448
449 field->event_field.name = name_alloc;
450 field->event_field.type.atype = atype_integer;
451 field->event_field.type.u.basic.integer.size =
452 sizeof(uint64_t) * CHAR_BIT;
453 field->event_field.type.u.basic.integer.alignment =
454 lttng_alignof(uint64_t) * CHAR_BIT;
455 field->event_field.type.u.basic.integer.signedness =
456 lttng_is_signed_type(uint64_t);
457 field->event_field.type.u.basic.integer.reverse_byte_order = 0;
458 field->event_field.type.u.basic.integer.base = 10;
459 field->event_field.type.u.basic.integer.encoding = lttng_encode_none;
460 field->get_size = perf_counter_get_size;
461 field->record = perf_counter_record;
462 field->get_value = perf_counter_get_value;
463
464 perf_field->attr.type = type;
465 perf_field->attr.config = config;
466 perf_field->attr.exclude_kernel = perf_get_exclude_kernel();
467 CDS_INIT_LIST_HEAD(&perf_field->thread_field_list);
468 field->u.perf_counter = perf_field;
469
470 /* Ensure that this perf counter can be used in this process. */
471 ret = open_perf_fd(&perf_field->attr);
472 if (ret < 0) {
473 ret = -ENODEV;
474 goto setup_error;
475 }
476 close_perf_fd(ret);
477
478 /*
479 * Contexts can only be added before tracing is started, so we
480 * don't have to synchronize against concurrent threads using
481 * the field here.
482 */
483
484 lttng_context_update(*ctx);
485 return 0;
486
487 setup_error:
488 find_error:
489 lttng_remove_context_field(ctx, field);
490 append_context_error:
491 free(perf_field);
492 perf_field_alloc_error:
493 free(name_alloc);
494 name_alloc_error:
495 return ret;
496 }
497
498 int lttng_perf_counter_init(void)
499 {
500 int ret;
501
502 ret = pthread_key_create(&perf_counter_key,
503 lttng_destroy_perf_thread_key);
504 if (ret)
505 ret = -ret;
506 return ret;
507 }
508
509 void lttng_perf_counter_exit(void)
510 {
511 int ret;
512
513 ret = pthread_key_delete(perf_counter_key);
514 if (ret) {
515 errno = ret;
516 PERROR("Error in pthread_key_delete");
517 }
518 }
This page took 0.045301 seconds and 6 git commands to generate.