tracepoints: use rcu sched
[deliverable/linux.git] / kernel / tracepoint.c
1 /*
2 * Copyright (C) 2008 Mathieu Desnoyers
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18 #include <linux/module.h>
19 #include <linux/mutex.h>
20 #include <linux/types.h>
21 #include <linux/jhash.h>
22 #include <linux/list.h>
23 #include <linux/rcupdate.h>
24 #include <linux/tracepoint.h>
25 #include <linux/err.h>
26 #include <linux/slab.h>
27
28 extern struct tracepoint __start___tracepoints[];
29 extern struct tracepoint __stop___tracepoints[];
30
31 /* Set to 1 to enable tracepoint debug output */
32 static const int tracepoint_debug;
33
34 /*
35 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
36 * builtin and module tracepoints and the hash table.
37 */
38 static DEFINE_MUTEX(tracepoints_mutex);
39
40 /*
41 * Tracepoint hash table, containing the active tracepoints.
42 * Protected by tracepoints_mutex.
43 */
44 #define TRACEPOINT_HASH_BITS 6
45 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
46
47 /*
48 * Note about RCU :
49 * It is used to to delay the free of multiple probes array until a quiescent
50 * state is reached.
51 * Tracepoint entries modifications are protected by the tracepoints_mutex.
52 */
53 struct tracepoint_entry {
54 struct hlist_node hlist;
55 void **funcs;
56 int refcount; /* Number of times armed. 0 if disarmed. */
57 struct rcu_head rcu;
58 void *oldptr;
59 unsigned char rcu_pending:1;
60 char name[0];
61 };
62
63 static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
64
65 static void free_old_closure(struct rcu_head *head)
66 {
67 struct tracepoint_entry *entry = container_of(head,
68 struct tracepoint_entry, rcu);
69 kfree(entry->oldptr);
70 /* Make sure we free the data before setting the pending flag to 0 */
71 smp_wmb();
72 entry->rcu_pending = 0;
73 }
74
75 static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
76 {
77 if (!old)
78 return;
79 entry->oldptr = old;
80 entry->rcu_pending = 1;
81 /* write rcu_pending before calling the RCU callback */
82 smp_wmb();
83 call_rcu_sched(&entry->rcu, free_old_closure);
84 }
85
86 static void debug_print_probes(struct tracepoint_entry *entry)
87 {
88 int i;
89
90 if (!tracepoint_debug)
91 return;
92
93 for (i = 0; entry->funcs[i]; i++)
94 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
95 }
96
97 static void *
98 tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
99 {
100 int nr_probes = 0;
101 void **old, **new;
102
103 WARN_ON(!probe);
104
105 debug_print_probes(entry);
106 old = entry->funcs;
107 if (old) {
108 /* (N -> N+1), (N != 0, 1) probes */
109 for (nr_probes = 0; old[nr_probes]; nr_probes++)
110 if (old[nr_probes] == probe)
111 return ERR_PTR(-EEXIST);
112 }
113 /* + 2 : one for new probe, one for NULL func */
114 new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
115 if (new == NULL)
116 return ERR_PTR(-ENOMEM);
117 if (old)
118 memcpy(new, old, nr_probes * sizeof(void *));
119 new[nr_probes] = probe;
120 entry->refcount = nr_probes + 1;
121 entry->funcs = new;
122 debug_print_probes(entry);
123 return old;
124 }
125
126 static void *
127 tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
128 {
129 int nr_probes = 0, nr_del = 0, i;
130 void **old, **new;
131
132 old = entry->funcs;
133
134 debug_print_probes(entry);
135 /* (N -> M), (N > 1, M >= 0) probes */
136 for (nr_probes = 0; old[nr_probes]; nr_probes++) {
137 if ((!probe || old[nr_probes] == probe))
138 nr_del++;
139 }
140
141 if (nr_probes - nr_del == 0) {
142 /* N -> 0, (N > 1) */
143 entry->funcs = NULL;
144 entry->refcount = 0;
145 debug_print_probes(entry);
146 return old;
147 } else {
148 int j = 0;
149 /* N -> M, (N > 1, M > 0) */
150 /* + 1 for NULL */
151 new = kzalloc((nr_probes - nr_del + 1)
152 * sizeof(void *), GFP_KERNEL);
153 if (new == NULL)
154 return ERR_PTR(-ENOMEM);
155 for (i = 0; old[i]; i++)
156 if ((probe && old[i] != probe))
157 new[j++] = old[i];
158 entry->refcount = nr_probes - nr_del;
159 entry->funcs = new;
160 }
161 debug_print_probes(entry);
162 return old;
163 }
164
165 /*
166 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
167 * Must be called with tracepoints_mutex held.
168 * Returns NULL if not present.
169 */
170 static struct tracepoint_entry *get_tracepoint(const char *name)
171 {
172 struct hlist_head *head;
173 struct hlist_node *node;
174 struct tracepoint_entry *e;
175 u32 hash = jhash(name, strlen(name), 0);
176
177 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
178 hlist_for_each_entry(e, node, head, hlist) {
179 if (!strcmp(name, e->name))
180 return e;
181 }
182 return NULL;
183 }
184
185 /*
186 * Add the tracepoint to the tracepoint hash table. Must be called with
187 * tracepoints_mutex held.
188 */
189 static struct tracepoint_entry *add_tracepoint(const char *name)
190 {
191 struct hlist_head *head;
192 struct hlist_node *node;
193 struct tracepoint_entry *e;
194 size_t name_len = strlen(name) + 1;
195 u32 hash = jhash(name, name_len-1, 0);
196
197 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
198 hlist_for_each_entry(e, node, head, hlist) {
199 if (!strcmp(name, e->name)) {
200 printk(KERN_NOTICE
201 "tracepoint %s busy\n", name);
202 return ERR_PTR(-EEXIST); /* Already there */
203 }
204 }
205 /*
206 * Using kmalloc here to allocate a variable length element. Could
207 * cause some memory fragmentation if overused.
208 */
209 e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
210 if (!e)
211 return ERR_PTR(-ENOMEM);
212 memcpy(&e->name[0], name, name_len);
213 e->funcs = NULL;
214 e->refcount = 0;
215 e->rcu_pending = 0;
216 hlist_add_head(&e->hlist, head);
217 return e;
218 }
219
220 /*
221 * Remove the tracepoint from the tracepoint hash table. Must be called with
222 * mutex_lock held.
223 */
224 static int remove_tracepoint(const char *name)
225 {
226 struct hlist_head *head;
227 struct hlist_node *node;
228 struct tracepoint_entry *e;
229 int found = 0;
230 size_t len = strlen(name) + 1;
231 u32 hash = jhash(name, len-1, 0);
232
233 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
234 hlist_for_each_entry(e, node, head, hlist) {
235 if (!strcmp(name, e->name)) {
236 found = 1;
237 break;
238 }
239 }
240 if (!found)
241 return -ENOENT;
242 if (e->refcount)
243 return -EBUSY;
244 hlist_del(&e->hlist);
245 /* Make sure the call_rcu_sched has been executed */
246 if (e->rcu_pending)
247 rcu_barrier_sched();
248 kfree(e);
249 return 0;
250 }
251
252 /*
253 * Sets the probe callback corresponding to one tracepoint.
254 */
255 static void set_tracepoint(struct tracepoint_entry **entry,
256 struct tracepoint *elem, int active)
257 {
258 WARN_ON(strcmp((*entry)->name, elem->name) != 0);
259
260 /*
261 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
262 * probe callbacks array is consistent before setting a pointer to it.
263 * This array is referenced by __DO_TRACE from
264 * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
265 * is used.
266 */
267 rcu_assign_pointer(elem->funcs, (*entry)->funcs);
268 elem->state = active;
269 }
270
271 /*
272 * Disable a tracepoint and its probe callback.
273 * Note: only waiting an RCU period after setting elem->call to the empty
274 * function insures that the original callback is not used anymore. This insured
275 * by preempt_disable around the call site.
276 */
277 static void disable_tracepoint(struct tracepoint *elem)
278 {
279 elem->state = 0;
280 }
281
282 /**
283 * tracepoint_update_probe_range - Update a probe range
284 * @begin: beginning of the range
285 * @end: end of the range
286 *
287 * Updates the probe callback corresponding to a range of tracepoints.
288 */
289 void tracepoint_update_probe_range(struct tracepoint *begin,
290 struct tracepoint *end)
291 {
292 struct tracepoint *iter;
293 struct tracepoint_entry *mark_entry;
294
295 mutex_lock(&tracepoints_mutex);
296 for (iter = begin; iter < end; iter++) {
297 mark_entry = get_tracepoint(iter->name);
298 if (mark_entry) {
299 set_tracepoint(&mark_entry, iter,
300 !!mark_entry->refcount);
301 } else {
302 disable_tracepoint(iter);
303 }
304 }
305 mutex_unlock(&tracepoints_mutex);
306 }
307
308 /*
309 * Update probes, removing the faulty probes.
310 */
311 static void tracepoint_update_probes(void)
312 {
313 /* Core kernel tracepoints */
314 tracepoint_update_probe_range(__start___tracepoints,
315 __stop___tracepoints);
316 /* tracepoints in modules. */
317 module_update_tracepoints();
318 }
319
320 /**
321 * tracepoint_probe_register - Connect a probe to a tracepoint
322 * @name: tracepoint name
323 * @probe: probe handler
324 *
325 * Returns 0 if ok, error value on error.
326 * The probe address must at least be aligned on the architecture pointer size.
327 */
328 int tracepoint_probe_register(const char *name, void *probe)
329 {
330 struct tracepoint_entry *entry;
331 int ret = 0;
332 void *old;
333
334 mutex_lock(&tracepoints_mutex);
335 entry = get_tracepoint(name);
336 if (!entry) {
337 entry = add_tracepoint(name);
338 if (IS_ERR(entry)) {
339 ret = PTR_ERR(entry);
340 goto end;
341 }
342 }
343 /*
344 * If we detect that a call_rcu_sched is pending for this tracepoint,
345 * make sure it's executed now.
346 */
347 if (entry->rcu_pending)
348 rcu_barrier_sched();
349 old = tracepoint_entry_add_probe(entry, probe);
350 if (IS_ERR(old)) {
351 ret = PTR_ERR(old);
352 goto end;
353 }
354 mutex_unlock(&tracepoints_mutex);
355 tracepoint_update_probes(); /* may update entry */
356 mutex_lock(&tracepoints_mutex);
357 entry = get_tracepoint(name);
358 WARN_ON(!entry);
359 tracepoint_entry_free_old(entry, old);
360 end:
361 mutex_unlock(&tracepoints_mutex);
362 return ret;
363 }
364 EXPORT_SYMBOL_GPL(tracepoint_probe_register);
365
366 /**
367 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
368 * @name: tracepoint name
369 * @probe: probe function pointer
370 *
371 * We do not need to call a synchronize_sched to make sure the probes have
372 * finished running before doing a module unload, because the module unload
373 * itself uses stop_machine(), which insures that every preempt disabled section
374 * have finished.
375 */
376 int tracepoint_probe_unregister(const char *name, void *probe)
377 {
378 struct tracepoint_entry *entry;
379 void *old;
380 int ret = -ENOENT;
381
382 mutex_lock(&tracepoints_mutex);
383 entry = get_tracepoint(name);
384 if (!entry)
385 goto end;
386 if (entry->rcu_pending)
387 rcu_barrier_sched();
388 old = tracepoint_entry_remove_probe(entry, probe);
389 mutex_unlock(&tracepoints_mutex);
390 tracepoint_update_probes(); /* may update entry */
391 mutex_lock(&tracepoints_mutex);
392 entry = get_tracepoint(name);
393 if (!entry)
394 goto end;
395 tracepoint_entry_free_old(entry, old);
396 remove_tracepoint(name); /* Ignore busy error message */
397 ret = 0;
398 end:
399 mutex_unlock(&tracepoints_mutex);
400 return ret;
401 }
402 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
403
404 /**
405 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
406 * @tracepoint: current tracepoints (in), next tracepoint (out)
407 * @begin: beginning of the range
408 * @end: end of the range
409 *
410 * Returns whether a next tracepoint has been found (1) or not (0).
411 * Will return the first tracepoint in the range if the input tracepoint is
412 * NULL.
413 */
414 int tracepoint_get_iter_range(struct tracepoint **tracepoint,
415 struct tracepoint *begin, struct tracepoint *end)
416 {
417 if (!*tracepoint && begin != end) {
418 *tracepoint = begin;
419 return 1;
420 }
421 if (*tracepoint >= begin && *tracepoint < end)
422 return 1;
423 return 0;
424 }
425 EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
426
427 static void tracepoint_get_iter(struct tracepoint_iter *iter)
428 {
429 int found = 0;
430
431 /* Core kernel tracepoints */
432 if (!iter->module) {
433 found = tracepoint_get_iter_range(&iter->tracepoint,
434 __start___tracepoints, __stop___tracepoints);
435 if (found)
436 goto end;
437 }
438 /* tracepoints in modules. */
439 found = module_get_iter_tracepoints(iter);
440 end:
441 if (!found)
442 tracepoint_iter_reset(iter);
443 }
444
445 void tracepoint_iter_start(struct tracepoint_iter *iter)
446 {
447 tracepoint_get_iter(iter);
448 }
449 EXPORT_SYMBOL_GPL(tracepoint_iter_start);
450
451 void tracepoint_iter_next(struct tracepoint_iter *iter)
452 {
453 iter->tracepoint++;
454 /*
455 * iter->tracepoint may be invalid because we blindly incremented it.
456 * Make sure it is valid by marshalling on the tracepoints, getting the
457 * tracepoints from following modules if necessary.
458 */
459 tracepoint_get_iter(iter);
460 }
461 EXPORT_SYMBOL_GPL(tracepoint_iter_next);
462
463 void tracepoint_iter_stop(struct tracepoint_iter *iter)
464 {
465 }
466 EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
467
468 void tracepoint_iter_reset(struct tracepoint_iter *iter)
469 {
470 iter->module = NULL;
471 iter->tracepoint = NULL;
472 }
473 EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
This page took 0.049146 seconds and 6 git commands to generate.