Commit | Line | Data |
---|---|---|
97e1c18e MD |
1 | /* |
2 | * Copyright (C) 2008 Mathieu Desnoyers | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | */ | |
18 | #include <linux/module.h> | |
19 | #include <linux/mutex.h> | |
20 | #include <linux/types.h> | |
21 | #include <linux/jhash.h> | |
22 | #include <linux/list.h> | |
23 | #include <linux/rcupdate.h> | |
24 | #include <linux/tracepoint.h> | |
25 | #include <linux/err.h> | |
26 | #include <linux/slab.h> | |
27 | ||
28 | extern struct tracepoint __start___tracepoints[]; | |
29 | extern struct tracepoint __stop___tracepoints[]; | |
30 | ||
31 | /* Set to 1 to enable tracepoint debug output */ | |
32 | static const int tracepoint_debug; | |
33 | ||
34 | /* | |
35 | * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the | |
36 | * builtin and module tracepoints and the hash table. | |
37 | */ | |
38 | static DEFINE_MUTEX(tracepoints_mutex); | |
39 | ||
40 | /* | |
41 | * Tracepoint hash table, containing the active tracepoints. | |
42 | * Protected by tracepoints_mutex. | |
43 | */ | |
44 | #define TRACEPOINT_HASH_BITS 6 | |
45 | #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) | |
46 | ||
47 | /* | |
48 | * Note about RCU : | |
49 | * It is used to to delay the free of multiple probes array until a quiescent | |
50 | * state is reached. | |
51 | * Tracepoint entries modifications are protected by the tracepoints_mutex. | |
52 | */ | |
53 | struct tracepoint_entry { | |
54 | struct hlist_node hlist; | |
55 | void **funcs; | |
56 | int refcount; /* Number of times armed. 0 if disarmed. */ | |
57 | struct rcu_head rcu; | |
58 | void *oldptr; | |
59 | unsigned char rcu_pending:1; | |
60 | char name[0]; | |
61 | }; | |
62 | ||
63 | static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; | |
64 | ||
65 | static void free_old_closure(struct rcu_head *head) | |
66 | { | |
67 | struct tracepoint_entry *entry = container_of(head, | |
68 | struct tracepoint_entry, rcu); | |
69 | kfree(entry->oldptr); | |
70 | /* Make sure we free the data before setting the pending flag to 0 */ | |
71 | smp_wmb(); | |
72 | entry->rcu_pending = 0; | |
73 | } | |
74 | ||
75 | static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) | |
76 | { | |
77 | if (!old) | |
78 | return; | |
79 | entry->oldptr = old; | |
80 | entry->rcu_pending = 1; | |
81 | /* write rcu_pending before calling the RCU callback */ | |
82 | smp_wmb(); | |
ca2db6cf | 83 | call_rcu_sched(&entry->rcu, free_old_closure); |
97e1c18e MD |
84 | } |
85 | ||
86 | static void debug_print_probes(struct tracepoint_entry *entry) | |
87 | { | |
88 | int i; | |
89 | ||
90 | if (!tracepoint_debug) | |
91 | return; | |
92 | ||
93 | for (i = 0; entry->funcs[i]; i++) | |
94 | printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); | |
95 | } | |
96 | ||
97 | static void * | |
98 | tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) | |
99 | { | |
100 | int nr_probes = 0; | |
101 | void **old, **new; | |
102 | ||
103 | WARN_ON(!probe); | |
104 | ||
105 | debug_print_probes(entry); | |
106 | old = entry->funcs; | |
107 | if (old) { | |
108 | /* (N -> N+1), (N != 0, 1) probes */ | |
109 | for (nr_probes = 0; old[nr_probes]; nr_probes++) | |
110 | if (old[nr_probes] == probe) | |
111 | return ERR_PTR(-EEXIST); | |
112 | } | |
113 | /* + 2 : one for new probe, one for NULL func */ | |
114 | new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); | |
115 | if (new == NULL) | |
116 | return ERR_PTR(-ENOMEM); | |
117 | if (old) | |
118 | memcpy(new, old, nr_probes * sizeof(void *)); | |
119 | new[nr_probes] = probe; | |
120 | entry->refcount = nr_probes + 1; | |
121 | entry->funcs = new; | |
122 | debug_print_probes(entry); | |
123 | return old; | |
124 | } | |
125 | ||
126 | static void * | |
127 | tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) | |
128 | { | |
129 | int nr_probes = 0, nr_del = 0, i; | |
130 | void **old, **new; | |
131 | ||
132 | old = entry->funcs; | |
133 | ||
f66af459 FW |
134 | if (!old) |
135 | return NULL; | |
136 | ||
97e1c18e MD |
137 | debug_print_probes(entry); |
138 | /* (N -> M), (N > 1, M >= 0) probes */ | |
139 | for (nr_probes = 0; old[nr_probes]; nr_probes++) { | |
140 | if ((!probe || old[nr_probes] == probe)) | |
141 | nr_del++; | |
142 | } | |
143 | ||
144 | if (nr_probes - nr_del == 0) { | |
145 | /* N -> 0, (N > 1) */ | |
146 | entry->funcs = NULL; | |
147 | entry->refcount = 0; | |
148 | debug_print_probes(entry); | |
149 | return old; | |
150 | } else { | |
151 | int j = 0; | |
152 | /* N -> M, (N > 1, M > 0) */ | |
153 | /* + 1 for NULL */ | |
154 | new = kzalloc((nr_probes - nr_del + 1) | |
155 | * sizeof(void *), GFP_KERNEL); | |
156 | if (new == NULL) | |
157 | return ERR_PTR(-ENOMEM); | |
158 | for (i = 0; old[i]; i++) | |
159 | if ((probe && old[i] != probe)) | |
160 | new[j++] = old[i]; | |
161 | entry->refcount = nr_probes - nr_del; | |
162 | entry->funcs = new; | |
163 | } | |
164 | debug_print_probes(entry); | |
165 | return old; | |
166 | } | |
167 | ||
168 | /* | |
169 | * Get tracepoint if the tracepoint is present in the tracepoint hash table. | |
170 | * Must be called with tracepoints_mutex held. | |
171 | * Returns NULL if not present. | |
172 | */ | |
173 | static struct tracepoint_entry *get_tracepoint(const char *name) | |
174 | { | |
175 | struct hlist_head *head; | |
176 | struct hlist_node *node; | |
177 | struct tracepoint_entry *e; | |
178 | u32 hash = jhash(name, strlen(name), 0); | |
179 | ||
9795302a | 180 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
97e1c18e MD |
181 | hlist_for_each_entry(e, node, head, hlist) { |
182 | if (!strcmp(name, e->name)) | |
183 | return e; | |
184 | } | |
185 | return NULL; | |
186 | } | |
187 | ||
188 | /* | |
189 | * Add the tracepoint to the tracepoint hash table. Must be called with | |
190 | * tracepoints_mutex held. | |
191 | */ | |
192 | static struct tracepoint_entry *add_tracepoint(const char *name) | |
193 | { | |
194 | struct hlist_head *head; | |
195 | struct hlist_node *node; | |
196 | struct tracepoint_entry *e; | |
197 | size_t name_len = strlen(name) + 1; | |
198 | u32 hash = jhash(name, name_len-1, 0); | |
199 | ||
9795302a | 200 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
97e1c18e MD |
201 | hlist_for_each_entry(e, node, head, hlist) { |
202 | if (!strcmp(name, e->name)) { | |
203 | printk(KERN_NOTICE | |
204 | "tracepoint %s busy\n", name); | |
205 | return ERR_PTR(-EEXIST); /* Already there */ | |
206 | } | |
207 | } | |
208 | /* | |
209 | * Using kmalloc here to allocate a variable length element. Could | |
210 | * cause some memory fragmentation if overused. | |
211 | */ | |
212 | e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); | |
213 | if (!e) | |
214 | return ERR_PTR(-ENOMEM); | |
215 | memcpy(&e->name[0], name, name_len); | |
216 | e->funcs = NULL; | |
217 | e->refcount = 0; | |
218 | e->rcu_pending = 0; | |
219 | hlist_add_head(&e->hlist, head); | |
220 | return e; | |
221 | } | |
222 | ||
223 | /* | |
224 | * Remove the tracepoint from the tracepoint hash table. Must be called with | |
225 | * mutex_lock held. | |
226 | */ | |
227 | static int remove_tracepoint(const char *name) | |
228 | { | |
229 | struct hlist_head *head; | |
230 | struct hlist_node *node; | |
231 | struct tracepoint_entry *e; | |
232 | int found = 0; | |
233 | size_t len = strlen(name) + 1; | |
234 | u32 hash = jhash(name, len-1, 0); | |
235 | ||
9795302a | 236 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
97e1c18e MD |
237 | hlist_for_each_entry(e, node, head, hlist) { |
238 | if (!strcmp(name, e->name)) { | |
239 | found = 1; | |
240 | break; | |
241 | } | |
242 | } | |
243 | if (!found) | |
244 | return -ENOENT; | |
245 | if (e->refcount) | |
246 | return -EBUSY; | |
247 | hlist_del(&e->hlist); | |
ca2db6cf | 248 | /* Make sure the call_rcu_sched has been executed */ |
97e1c18e | 249 | if (e->rcu_pending) |
ca2db6cf | 250 | rcu_barrier_sched(); |
97e1c18e MD |
251 | kfree(e); |
252 | return 0; | |
253 | } | |
254 | ||
255 | /* | |
256 | * Sets the probe callback corresponding to one tracepoint. | |
257 | */ | |
258 | static void set_tracepoint(struct tracepoint_entry **entry, | |
259 | struct tracepoint *elem, int active) | |
260 | { | |
261 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | |
262 | ||
263 | /* | |
264 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | |
265 | * probe callbacks array is consistent before setting a pointer to it. | |
266 | * This array is referenced by __DO_TRACE from | |
267 | * include/linux/tracepoints.h. A matching smp_read_barrier_depends() | |
268 | * is used. | |
269 | */ | |
270 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | |
271 | elem->state = active; | |
272 | } | |
273 | ||
274 | /* | |
275 | * Disable a tracepoint and its probe callback. | |
276 | * Note: only waiting an RCU period after setting elem->call to the empty | |
277 | * function insures that the original callback is not used anymore. This insured | |
278 | * by preempt_disable around the call site. | |
279 | */ | |
280 | static void disable_tracepoint(struct tracepoint *elem) | |
281 | { | |
282 | elem->state = 0; | |
283 | } | |
284 | ||
285 | /** | |
286 | * tracepoint_update_probe_range - Update a probe range | |
287 | * @begin: beginning of the range | |
288 | * @end: end of the range | |
289 | * | |
290 | * Updates the probe callback corresponding to a range of tracepoints. | |
291 | */ | |
292 | void tracepoint_update_probe_range(struct tracepoint *begin, | |
293 | struct tracepoint *end) | |
294 | { | |
295 | struct tracepoint *iter; | |
296 | struct tracepoint_entry *mark_entry; | |
297 | ||
298 | mutex_lock(&tracepoints_mutex); | |
299 | for (iter = begin; iter < end; iter++) { | |
300 | mark_entry = get_tracepoint(iter->name); | |
301 | if (mark_entry) { | |
302 | set_tracepoint(&mark_entry, iter, | |
303 | !!mark_entry->refcount); | |
304 | } else { | |
305 | disable_tracepoint(iter); | |
306 | } | |
307 | } | |
308 | mutex_unlock(&tracepoints_mutex); | |
309 | } | |
310 | ||
311 | /* | |
312 | * Update probes, removing the faulty probes. | |
313 | */ | |
314 | static void tracepoint_update_probes(void) | |
315 | { | |
316 | /* Core kernel tracepoints */ | |
317 | tracepoint_update_probe_range(__start___tracepoints, | |
318 | __stop___tracepoints); | |
319 | /* tracepoints in modules. */ | |
320 | module_update_tracepoints(); | |
321 | } | |
322 | ||
323 | /** | |
324 | * tracepoint_probe_register - Connect a probe to a tracepoint | |
325 | * @name: tracepoint name | |
326 | * @probe: probe handler | |
327 | * | |
328 | * Returns 0 if ok, error value on error. | |
329 | * The probe address must at least be aligned on the architecture pointer size. | |
330 | */ | |
331 | int tracepoint_probe_register(const char *name, void *probe) | |
332 | { | |
333 | struct tracepoint_entry *entry; | |
334 | int ret = 0; | |
335 | void *old; | |
336 | ||
337 | mutex_lock(&tracepoints_mutex); | |
338 | entry = get_tracepoint(name); | |
339 | if (!entry) { | |
340 | entry = add_tracepoint(name); | |
341 | if (IS_ERR(entry)) { | |
342 | ret = PTR_ERR(entry); | |
343 | goto end; | |
344 | } | |
345 | } | |
346 | /* | |
ca2db6cf | 347 | * If we detect that a call_rcu_sched is pending for this tracepoint, |
97e1c18e MD |
348 | * make sure it's executed now. |
349 | */ | |
350 | if (entry->rcu_pending) | |
ca2db6cf | 351 | rcu_barrier_sched(); |
97e1c18e MD |
352 | old = tracepoint_entry_add_probe(entry, probe); |
353 | if (IS_ERR(old)) { | |
354 | ret = PTR_ERR(old); | |
355 | goto end; | |
356 | } | |
357 | mutex_unlock(&tracepoints_mutex); | |
358 | tracepoint_update_probes(); /* may update entry */ | |
359 | mutex_lock(&tracepoints_mutex); | |
360 | entry = get_tracepoint(name); | |
361 | WARN_ON(!entry); | |
9a1e9693 MD |
362 | if (entry->rcu_pending) |
363 | rcu_barrier_sched(); | |
97e1c18e MD |
364 | tracepoint_entry_free_old(entry, old); |
365 | end: | |
366 | mutex_unlock(&tracepoints_mutex); | |
367 | return ret; | |
368 | } | |
369 | EXPORT_SYMBOL_GPL(tracepoint_probe_register); | |
370 | ||
371 | /** | |
372 | * tracepoint_probe_unregister - Disconnect a probe from a tracepoint | |
373 | * @name: tracepoint name | |
374 | * @probe: probe function pointer | |
375 | * | |
376 | * We do not need to call a synchronize_sched to make sure the probes have | |
377 | * finished running before doing a module unload, because the module unload | |
378 | * itself uses stop_machine(), which insures that every preempt disabled section | |
379 | * have finished. | |
380 | */ | |
381 | int tracepoint_probe_unregister(const char *name, void *probe) | |
382 | { | |
383 | struct tracepoint_entry *entry; | |
384 | void *old; | |
385 | int ret = -ENOENT; | |
386 | ||
387 | mutex_lock(&tracepoints_mutex); | |
388 | entry = get_tracepoint(name); | |
389 | if (!entry) | |
390 | goto end; | |
391 | if (entry->rcu_pending) | |
ca2db6cf | 392 | rcu_barrier_sched(); |
97e1c18e | 393 | old = tracepoint_entry_remove_probe(entry, probe); |
f66af459 FW |
394 | if (!old) { |
395 | printk(KERN_WARNING "Warning: Trying to unregister a probe" | |
396 | "that doesn't exist\n"); | |
397 | goto end; | |
398 | } | |
97e1c18e MD |
399 | mutex_unlock(&tracepoints_mutex); |
400 | tracepoint_update_probes(); /* may update entry */ | |
401 | mutex_lock(&tracepoints_mutex); | |
402 | entry = get_tracepoint(name); | |
403 | if (!entry) | |
404 | goto end; | |
9a1e9693 MD |
405 | if (entry->rcu_pending) |
406 | rcu_barrier_sched(); | |
97e1c18e MD |
407 | tracepoint_entry_free_old(entry, old); |
408 | remove_tracepoint(name); /* Ignore busy error message */ | |
409 | ret = 0; | |
410 | end: | |
411 | mutex_unlock(&tracepoints_mutex); | |
412 | return ret; | |
413 | } | |
414 | EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); | |
415 | ||
416 | /** | |
417 | * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. | |
418 | * @tracepoint: current tracepoints (in), next tracepoint (out) | |
419 | * @begin: beginning of the range | |
420 | * @end: end of the range | |
421 | * | |
422 | * Returns whether a next tracepoint has been found (1) or not (0). | |
423 | * Will return the first tracepoint in the range if the input tracepoint is | |
424 | * NULL. | |
425 | */ | |
426 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, | |
427 | struct tracepoint *begin, struct tracepoint *end) | |
428 | { | |
429 | if (!*tracepoint && begin != end) { | |
430 | *tracepoint = begin; | |
431 | return 1; | |
432 | } | |
433 | if (*tracepoint >= begin && *tracepoint < end) | |
434 | return 1; | |
435 | return 0; | |
436 | } | |
437 | EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); | |
438 | ||
439 | static void tracepoint_get_iter(struct tracepoint_iter *iter) | |
440 | { | |
441 | int found = 0; | |
442 | ||
443 | /* Core kernel tracepoints */ | |
444 | if (!iter->module) { | |
445 | found = tracepoint_get_iter_range(&iter->tracepoint, | |
446 | __start___tracepoints, __stop___tracepoints); | |
447 | if (found) | |
448 | goto end; | |
449 | } | |
450 | /* tracepoints in modules. */ | |
451 | found = module_get_iter_tracepoints(iter); | |
452 | end: | |
453 | if (!found) | |
454 | tracepoint_iter_reset(iter); | |
455 | } | |
456 | ||
457 | void tracepoint_iter_start(struct tracepoint_iter *iter) | |
458 | { | |
459 | tracepoint_get_iter(iter); | |
460 | } | |
461 | EXPORT_SYMBOL_GPL(tracepoint_iter_start); | |
462 | ||
463 | void tracepoint_iter_next(struct tracepoint_iter *iter) | |
464 | { | |
465 | iter->tracepoint++; | |
466 | /* | |
467 | * iter->tracepoint may be invalid because we blindly incremented it. | |
468 | * Make sure it is valid by marshalling on the tracepoints, getting the | |
469 | * tracepoints from following modules if necessary. | |
470 | */ | |
471 | tracepoint_get_iter(iter); | |
472 | } | |
473 | EXPORT_SYMBOL_GPL(tracepoint_iter_next); | |
474 | ||
475 | void tracepoint_iter_stop(struct tracepoint_iter *iter) | |
476 | { | |
477 | } | |
478 | EXPORT_SYMBOL_GPL(tracepoint_iter_stop); | |
479 | ||
480 | void tracepoint_iter_reset(struct tracepoint_iter *iter) | |
481 | { | |
482 | iter->module = NULL; | |
483 | iter->tracepoint = NULL; | |
484 | } | |
485 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); |