diff options
Diffstat (limited to 'kernel/tracepoint.c')
-rw-r--r-- | kernel/tracepoint.c | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c new file mode 100644 index 000000000000..af8c85664882 --- /dev/null +++ b/kernel/tracepoint.c | |||
@@ -0,0 +1,485 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Mathieu Desnoyers | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | */ | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/jhash.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/rcupdate.h> | ||
24 | #include <linux/tracepoint.h> | ||
25 | #include <linux/err.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | extern struct tracepoint __start___tracepoints[]; | ||
29 | extern struct tracepoint __stop___tracepoints[]; | ||
30 | |||
31 | /* Set to 1 to enable tracepoint debug output */ | ||
32 | static const int tracepoint_debug; | ||
33 | |||
34 | /* | ||
35 | * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the | ||
36 | * builtin and module tracepoints and the hash table. | ||
37 | */ | ||
38 | static DEFINE_MUTEX(tracepoints_mutex); | ||
39 | |||
40 | /* | ||
41 | * Tracepoint hash table, containing the active tracepoints. | ||
42 | * Protected by tracepoints_mutex. | ||
43 | */ | ||
44 | #define TRACEPOINT_HASH_BITS 6 | ||
45 | #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) | ||
46 | |||
47 | /* | ||
48 | * Note about RCU : | ||
49 | * It is used to to delay the free of multiple probes array until a quiescent | ||
50 | * state is reached. | ||
51 | * Tracepoint entries modifications are protected by the tracepoints_mutex. | ||
52 | */ | ||
53 | struct tracepoint_entry { | ||
54 | struct hlist_node hlist; | ||
55 | void **funcs; | ||
56 | int refcount; /* Number of times armed. 0 if disarmed. */ | ||
57 | struct rcu_head rcu; | ||
58 | void *oldptr; | ||
59 | unsigned char rcu_pending:1; | ||
60 | char name[0]; | ||
61 | }; | ||
62 | |||
63 | static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; | ||
64 | |||
65 | static void free_old_closure(struct rcu_head *head) | ||
66 | { | ||
67 | struct tracepoint_entry *entry = container_of(head, | ||
68 | struct tracepoint_entry, rcu); | ||
69 | kfree(entry->oldptr); | ||
70 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
71 | smp_wmb(); | ||
72 | entry->rcu_pending = 0; | ||
73 | } | ||
74 | |||
75 | static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) | ||
76 | { | ||
77 | if (!old) | ||
78 | return; | ||
79 | entry->oldptr = old; | ||
80 | entry->rcu_pending = 1; | ||
81 | /* write rcu_pending before calling the RCU callback */ | ||
82 | smp_wmb(); | ||
83 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
84 | } | ||
85 | |||
86 | static void debug_print_probes(struct tracepoint_entry *entry) | ||
87 | { | ||
88 | int i; | ||
89 | |||
90 | if (!tracepoint_debug) | ||
91 | return; | ||
92 | |||
93 | for (i = 0; entry->funcs[i]; i++) | ||
94 | printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); | ||
95 | } | ||
96 | |||
97 | static void * | ||
98 | tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) | ||
99 | { | ||
100 | int nr_probes = 0; | ||
101 | void **old, **new; | ||
102 | |||
103 | WARN_ON(!probe); | ||
104 | |||
105 | debug_print_probes(entry); | ||
106 | old = entry->funcs; | ||
107 | if (old) { | ||
108 | /* (N -> N+1), (N != 0, 1) probes */ | ||
109 | for (nr_probes = 0; old[nr_probes]; nr_probes++) | ||
110 | if (old[nr_probes] == probe) | ||
111 | return ERR_PTR(-EEXIST); | ||
112 | } | ||
113 | /* + 2 : one for new probe, one for NULL func */ | ||
114 | new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); | ||
115 | if (new == NULL) | ||
116 | return ERR_PTR(-ENOMEM); | ||
117 | if (old) | ||
118 | memcpy(new, old, nr_probes * sizeof(void *)); | ||
119 | new[nr_probes] = probe; | ||
120 | entry->refcount = nr_probes + 1; | ||
121 | entry->funcs = new; | ||
122 | debug_print_probes(entry); | ||
123 | return old; | ||
124 | } | ||
125 | |||
126 | static void * | ||
127 | tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) | ||
128 | { | ||
129 | int nr_probes = 0, nr_del = 0, i; | ||
130 | void **old, **new; | ||
131 | |||
132 | old = entry->funcs; | ||
133 | |||
134 | if (!old) | ||
135 | return NULL; | ||
136 | |||
137 | debug_print_probes(entry); | ||
138 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
139 | for (nr_probes = 0; old[nr_probes]; nr_probes++) { | ||
140 | if ((!probe || old[nr_probes] == probe)) | ||
141 | nr_del++; | ||
142 | } | ||
143 | |||
144 | if (nr_probes - nr_del == 0) { | ||
145 | /* N -> 0, (N > 1) */ | ||
146 | entry->funcs = NULL; | ||
147 | entry->refcount = 0; | ||
148 | debug_print_probes(entry); | ||
149 | return old; | ||
150 | } else { | ||
151 | int j = 0; | ||
152 | /* N -> M, (N > 1, M > 0) */ | ||
153 | /* + 1 for NULL */ | ||
154 | new = kzalloc((nr_probes - nr_del + 1) | ||
155 | * sizeof(void *), GFP_KERNEL); | ||
156 | if (new == NULL) | ||
157 | return ERR_PTR(-ENOMEM); | ||
158 | for (i = 0; old[i]; i++) | ||
159 | if ((probe && old[i] != probe)) | ||
160 | new[j++] = old[i]; | ||
161 | entry->refcount = nr_probes - nr_del; | ||
162 | entry->funcs = new; | ||
163 | } | ||
164 | debug_print_probes(entry); | ||
165 | return old; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * Get tracepoint if the tracepoint is present in the tracepoint hash table. | ||
170 | * Must be called with tracepoints_mutex held. | ||
171 | * Returns NULL if not present. | ||
172 | */ | ||
173 | static struct tracepoint_entry *get_tracepoint(const char *name) | ||
174 | { | ||
175 | struct hlist_head *head; | ||
176 | struct hlist_node *node; | ||
177 | struct tracepoint_entry *e; | ||
178 | u32 hash = jhash(name, strlen(name), 0); | ||
179 | |||
180 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
181 | hlist_for_each_entry(e, node, head, hlist) { | ||
182 | if (!strcmp(name, e->name)) | ||
183 | return e; | ||
184 | } | ||
185 | return NULL; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Add the tracepoint to the tracepoint hash table. Must be called with | ||
190 | * tracepoints_mutex held. | ||
191 | */ | ||
192 | static struct tracepoint_entry *add_tracepoint(const char *name) | ||
193 | { | ||
194 | struct hlist_head *head; | ||
195 | struct hlist_node *node; | ||
196 | struct tracepoint_entry *e; | ||
197 | size_t name_len = strlen(name) + 1; | ||
198 | u32 hash = jhash(name, name_len-1, 0); | ||
199 | |||
200 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
201 | hlist_for_each_entry(e, node, head, hlist) { | ||
202 | if (!strcmp(name, e->name)) { | ||
203 | printk(KERN_NOTICE | ||
204 | "tracepoint %s busy\n", name); | ||
205 | return ERR_PTR(-EEXIST); /* Already there */ | ||
206 | } | ||
207 | } | ||
208 | /* | ||
209 | * Using kmalloc here to allocate a variable length element. Could | ||
210 | * cause some memory fragmentation if overused. | ||
211 | */ | ||
212 | e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); | ||
213 | if (!e) | ||
214 | return ERR_PTR(-ENOMEM); | ||
215 | memcpy(&e->name[0], name, name_len); | ||
216 | e->funcs = NULL; | ||
217 | e->refcount = 0; | ||
218 | e->rcu_pending = 0; | ||
219 | hlist_add_head(&e->hlist, head); | ||
220 | return e; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Remove the tracepoint from the tracepoint hash table. Must be called with | ||
225 | * mutex_lock held. | ||
226 | */ | ||
227 | static int remove_tracepoint(const char *name) | ||
228 | { | ||
229 | struct hlist_head *head; | ||
230 | struct hlist_node *node; | ||
231 | struct tracepoint_entry *e; | ||
232 | int found = 0; | ||
233 | size_t len = strlen(name) + 1; | ||
234 | u32 hash = jhash(name, len-1, 0); | ||
235 | |||
236 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
237 | hlist_for_each_entry(e, node, head, hlist) { | ||
238 | if (!strcmp(name, e->name)) { | ||
239 | found = 1; | ||
240 | break; | ||
241 | } | ||
242 | } | ||
243 | if (!found) | ||
244 | return -ENOENT; | ||
245 | if (e->refcount) | ||
246 | return -EBUSY; | ||
247 | hlist_del(&e->hlist); | ||
248 | /* Make sure the call_rcu_sched has been executed */ | ||
249 | if (e->rcu_pending) | ||
250 | rcu_barrier_sched(); | ||
251 | kfree(e); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Sets the probe callback corresponding to one tracepoint. | ||
257 | */ | ||
258 | static void set_tracepoint(struct tracepoint_entry **entry, | ||
259 | struct tracepoint *elem, int active) | ||
260 | { | ||
261 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | ||
262 | |||
263 | /* | ||
264 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | ||
265 | * probe callbacks array is consistent before setting a pointer to it. | ||
266 | * This array is referenced by __DO_TRACE from | ||
267 | * include/linux/tracepoints.h. A matching smp_read_barrier_depends() | ||
268 | * is used. | ||
269 | */ | ||
270 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | ||
271 | elem->state = active; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Disable a tracepoint and its probe callback. | ||
276 | * Note: only waiting an RCU period after setting elem->call to the empty | ||
277 | * function insures that the original callback is not used anymore. This insured | ||
278 | * by preempt_disable around the call site. | ||
279 | */ | ||
280 | static void disable_tracepoint(struct tracepoint *elem) | ||
281 | { | ||
282 | elem->state = 0; | ||
283 | } | ||
284 | |||
285 | /** | ||
286 | * tracepoint_update_probe_range - Update a probe range | ||
287 | * @begin: beginning of the range | ||
288 | * @end: end of the range | ||
289 | * | ||
290 | * Updates the probe callback corresponding to a range of tracepoints. | ||
291 | */ | ||
292 | void tracepoint_update_probe_range(struct tracepoint *begin, | ||
293 | struct tracepoint *end) | ||
294 | { | ||
295 | struct tracepoint *iter; | ||
296 | struct tracepoint_entry *mark_entry; | ||
297 | |||
298 | mutex_lock(&tracepoints_mutex); | ||
299 | for (iter = begin; iter < end; iter++) { | ||
300 | mark_entry = get_tracepoint(iter->name); | ||
301 | if (mark_entry) { | ||
302 | set_tracepoint(&mark_entry, iter, | ||
303 | !!mark_entry->refcount); | ||
304 | } else { | ||
305 | disable_tracepoint(iter); | ||
306 | } | ||
307 | } | ||
308 | mutex_unlock(&tracepoints_mutex); | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Update probes, removing the faulty probes. | ||
313 | */ | ||
314 | static void tracepoint_update_probes(void) | ||
315 | { | ||
316 | /* Core kernel tracepoints */ | ||
317 | tracepoint_update_probe_range(__start___tracepoints, | ||
318 | __stop___tracepoints); | ||
319 | /* tracepoints in modules. */ | ||
320 | module_update_tracepoints(); | ||
321 | } | ||
322 | |||
323 | /** | ||
324 | * tracepoint_probe_register - Connect a probe to a tracepoint | ||
325 | * @name: tracepoint name | ||
326 | * @probe: probe handler | ||
327 | * | ||
328 | * Returns 0 if ok, error value on error. | ||
329 | * The probe address must at least be aligned on the architecture pointer size. | ||
330 | */ | ||
331 | int tracepoint_probe_register(const char *name, void *probe) | ||
332 | { | ||
333 | struct tracepoint_entry *entry; | ||
334 | int ret = 0; | ||
335 | void *old; | ||
336 | |||
337 | mutex_lock(&tracepoints_mutex); | ||
338 | entry = get_tracepoint(name); | ||
339 | if (!entry) { | ||
340 | entry = add_tracepoint(name); | ||
341 | if (IS_ERR(entry)) { | ||
342 | ret = PTR_ERR(entry); | ||
343 | goto end; | ||
344 | } | ||
345 | } | ||
346 | /* | ||
347 | * If we detect that a call_rcu_sched is pending for this tracepoint, | ||
348 | * make sure it's executed now. | ||
349 | */ | ||
350 | if (entry->rcu_pending) | ||
351 | rcu_barrier_sched(); | ||
352 | old = tracepoint_entry_add_probe(entry, probe); | ||
353 | if (IS_ERR(old)) { | ||
354 | ret = PTR_ERR(old); | ||
355 | goto end; | ||
356 | } | ||
357 | mutex_unlock(&tracepoints_mutex); | ||
358 | tracepoint_update_probes(); /* may update entry */ | ||
359 | mutex_lock(&tracepoints_mutex); | ||
360 | entry = get_tracepoint(name); | ||
361 | WARN_ON(!entry); | ||
362 | if (entry->rcu_pending) | ||
363 | rcu_barrier_sched(); | ||
364 | tracepoint_entry_free_old(entry, old); | ||
365 | end: | ||
366 | mutex_unlock(&tracepoints_mutex); | ||
367 | return ret; | ||
368 | } | ||
369 | EXPORT_SYMBOL_GPL(tracepoint_probe_register); | ||
370 | |||
371 | /** | ||
372 | * tracepoint_probe_unregister - Disconnect a probe from a tracepoint | ||
373 | * @name: tracepoint name | ||
374 | * @probe: probe function pointer | ||
375 | * | ||
376 | * We do not need to call a synchronize_sched to make sure the probes have | ||
377 | * finished running before doing a module unload, because the module unload | ||
378 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
379 | * have finished. | ||
380 | */ | ||
381 | int tracepoint_probe_unregister(const char *name, void *probe) | ||
382 | { | ||
383 | struct tracepoint_entry *entry; | ||
384 | void *old; | ||
385 | int ret = -ENOENT; | ||
386 | |||
387 | mutex_lock(&tracepoints_mutex); | ||
388 | entry = get_tracepoint(name); | ||
389 | if (!entry) | ||
390 | goto end; | ||
391 | if (entry->rcu_pending) | ||
392 | rcu_barrier_sched(); | ||
393 | old = tracepoint_entry_remove_probe(entry, probe); | ||
394 | if (!old) { | ||
395 | printk(KERN_WARNING "Warning: Trying to unregister a probe" | ||
396 | "that doesn't exist\n"); | ||
397 | goto end; | ||
398 | } | ||
399 | mutex_unlock(&tracepoints_mutex); | ||
400 | tracepoint_update_probes(); /* may update entry */ | ||
401 | mutex_lock(&tracepoints_mutex); | ||
402 | entry = get_tracepoint(name); | ||
403 | if (!entry) | ||
404 | goto end; | ||
405 | if (entry->rcu_pending) | ||
406 | rcu_barrier_sched(); | ||
407 | tracepoint_entry_free_old(entry, old); | ||
408 | remove_tracepoint(name); /* Ignore busy error message */ | ||
409 | ret = 0; | ||
410 | end: | ||
411 | mutex_unlock(&tracepoints_mutex); | ||
412 | return ret; | ||
413 | } | ||
414 | EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); | ||
415 | |||
416 | /** | ||
417 | * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. | ||
418 | * @tracepoint: current tracepoints (in), next tracepoint (out) | ||
419 | * @begin: beginning of the range | ||
420 | * @end: end of the range | ||
421 | * | ||
422 | * Returns whether a next tracepoint has been found (1) or not (0). | ||
423 | * Will return the first tracepoint in the range if the input tracepoint is | ||
424 | * NULL. | ||
425 | */ | ||
426 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, | ||
427 | struct tracepoint *begin, struct tracepoint *end) | ||
428 | { | ||
429 | if (!*tracepoint && begin != end) { | ||
430 | *tracepoint = begin; | ||
431 | return 1; | ||
432 | } | ||
433 | if (*tracepoint >= begin && *tracepoint < end) | ||
434 | return 1; | ||
435 | return 0; | ||
436 | } | ||
437 | EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); | ||
438 | |||
439 | static void tracepoint_get_iter(struct tracepoint_iter *iter) | ||
440 | { | ||
441 | int found = 0; | ||
442 | |||
443 | /* Core kernel tracepoints */ | ||
444 | if (!iter->module) { | ||
445 | found = tracepoint_get_iter_range(&iter->tracepoint, | ||
446 | __start___tracepoints, __stop___tracepoints); | ||
447 | if (found) | ||
448 | goto end; | ||
449 | } | ||
450 | /* tracepoints in modules. */ | ||
451 | found = module_get_iter_tracepoints(iter); | ||
452 | end: | ||
453 | if (!found) | ||
454 | tracepoint_iter_reset(iter); | ||
455 | } | ||
456 | |||
457 | void tracepoint_iter_start(struct tracepoint_iter *iter) | ||
458 | { | ||
459 | tracepoint_get_iter(iter); | ||
460 | } | ||
461 | EXPORT_SYMBOL_GPL(tracepoint_iter_start); | ||
462 | |||
463 | void tracepoint_iter_next(struct tracepoint_iter *iter) | ||
464 | { | ||
465 | iter->tracepoint++; | ||
466 | /* | ||
467 | * iter->tracepoint may be invalid because we blindly incremented it. | ||
468 | * Make sure it is valid by marshalling on the tracepoints, getting the | ||
469 | * tracepoints from following modules if necessary. | ||
470 | */ | ||
471 | tracepoint_get_iter(iter); | ||
472 | } | ||
473 | EXPORT_SYMBOL_GPL(tracepoint_iter_next); | ||
474 | |||
475 | void tracepoint_iter_stop(struct tracepoint_iter *iter) | ||
476 | { | ||
477 | } | ||
478 | EXPORT_SYMBOL_GPL(tracepoint_iter_stop); | ||
479 | |||
480 | void tracepoint_iter_reset(struct tracepoint_iter *iter) | ||
481 | { | ||
482 | iter->module = NULL; | ||
483 | iter->tracepoint = NULL; | ||
484 | } | ||
485 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); | ||