diff options
Diffstat (limited to 'kernel/tracepoint.c')
-rw-r--r-- | kernel/tracepoint.c | 477 |
1 files changed, 477 insertions, 0 deletions
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c new file mode 100644 index 000000000000..f2b7c28a4708 --- /dev/null +++ b/kernel/tracepoint.c | |||
@@ -0,0 +1,477 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Mathieu Desnoyers | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | */ | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/jhash.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/rcupdate.h> | ||
24 | #include <linux/tracepoint.h> | ||
25 | #include <linux/err.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | extern struct tracepoint __start___tracepoints[]; | ||
29 | extern struct tracepoint __stop___tracepoints[]; | ||
30 | |||
31 | /* Set to 1 to enable tracepoint debug output */ | ||
32 | static const int tracepoint_debug; | ||
33 | |||
34 | /* | ||
35 | * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the | ||
36 | * builtin and module tracepoints and the hash table. | ||
37 | */ | ||
38 | static DEFINE_MUTEX(tracepoints_mutex); | ||
39 | |||
40 | /* | ||
41 | * Tracepoint hash table, containing the active tracepoints. | ||
42 | * Protected by tracepoints_mutex. | ||
43 | */ | ||
44 | #define TRACEPOINT_HASH_BITS 6 | ||
45 | #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) | ||
46 | |||
47 | /* | ||
48 | * Note about RCU : | ||
49 | * It is used to to delay the free of multiple probes array until a quiescent | ||
50 | * state is reached. | ||
51 | * Tracepoint entries modifications are protected by the tracepoints_mutex. | ||
52 | */ | ||
53 | struct tracepoint_entry { | ||
54 | struct hlist_node hlist; | ||
55 | void **funcs; | ||
56 | int refcount; /* Number of times armed. 0 if disarmed. */ | ||
57 | struct rcu_head rcu; | ||
58 | void *oldptr; | ||
59 | unsigned char rcu_pending:1; | ||
60 | char name[0]; | ||
61 | }; | ||
62 | |||
63 | static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; | ||
64 | |||
65 | static void free_old_closure(struct rcu_head *head) | ||
66 | { | ||
67 | struct tracepoint_entry *entry = container_of(head, | ||
68 | struct tracepoint_entry, rcu); | ||
69 | kfree(entry->oldptr); | ||
70 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
71 | smp_wmb(); | ||
72 | entry->rcu_pending = 0; | ||
73 | } | ||
74 | |||
75 | static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) | ||
76 | { | ||
77 | if (!old) | ||
78 | return; | ||
79 | entry->oldptr = old; | ||
80 | entry->rcu_pending = 1; | ||
81 | /* write rcu_pending before calling the RCU callback */ | ||
82 | smp_wmb(); | ||
83 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
84 | } | ||
85 | |||
86 | static void debug_print_probes(struct tracepoint_entry *entry) | ||
87 | { | ||
88 | int i; | ||
89 | |||
90 | if (!tracepoint_debug) | ||
91 | return; | ||
92 | |||
93 | for (i = 0; entry->funcs[i]; i++) | ||
94 | printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); | ||
95 | } | ||
96 | |||
97 | static void * | ||
98 | tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) | ||
99 | { | ||
100 | int nr_probes = 0; | ||
101 | void **old, **new; | ||
102 | |||
103 | WARN_ON(!probe); | ||
104 | |||
105 | debug_print_probes(entry); | ||
106 | old = entry->funcs; | ||
107 | if (old) { | ||
108 | /* (N -> N+1), (N != 0, 1) probes */ | ||
109 | for (nr_probes = 0; old[nr_probes]; nr_probes++) | ||
110 | if (old[nr_probes] == probe) | ||
111 | return ERR_PTR(-EEXIST); | ||
112 | } | ||
113 | /* + 2 : one for new probe, one for NULL func */ | ||
114 | new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); | ||
115 | if (new == NULL) | ||
116 | return ERR_PTR(-ENOMEM); | ||
117 | if (old) | ||
118 | memcpy(new, old, nr_probes * sizeof(void *)); | ||
119 | new[nr_probes] = probe; | ||
120 | entry->refcount = nr_probes + 1; | ||
121 | entry->funcs = new; | ||
122 | debug_print_probes(entry); | ||
123 | return old; | ||
124 | } | ||
125 | |||
126 | static void * | ||
127 | tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) | ||
128 | { | ||
129 | int nr_probes = 0, nr_del = 0, i; | ||
130 | void **old, **new; | ||
131 | |||
132 | old = entry->funcs; | ||
133 | |||
134 | debug_print_probes(entry); | ||
135 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
136 | for (nr_probes = 0; old[nr_probes]; nr_probes++) { | ||
137 | if ((!probe || old[nr_probes] == probe)) | ||
138 | nr_del++; | ||
139 | } | ||
140 | |||
141 | if (nr_probes - nr_del == 0) { | ||
142 | /* N -> 0, (N > 1) */ | ||
143 | entry->funcs = NULL; | ||
144 | entry->refcount = 0; | ||
145 | debug_print_probes(entry); | ||
146 | return old; | ||
147 | } else { | ||
148 | int j = 0; | ||
149 | /* N -> M, (N > 1, M > 0) */ | ||
150 | /* + 1 for NULL */ | ||
151 | new = kzalloc((nr_probes - nr_del + 1) | ||
152 | * sizeof(void *), GFP_KERNEL); | ||
153 | if (new == NULL) | ||
154 | return ERR_PTR(-ENOMEM); | ||
155 | for (i = 0; old[i]; i++) | ||
156 | if ((probe && old[i] != probe)) | ||
157 | new[j++] = old[i]; | ||
158 | entry->refcount = nr_probes - nr_del; | ||
159 | entry->funcs = new; | ||
160 | } | ||
161 | debug_print_probes(entry); | ||
162 | return old; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Get tracepoint if the tracepoint is present in the tracepoint hash table. | ||
167 | * Must be called with tracepoints_mutex held. | ||
168 | * Returns NULL if not present. | ||
169 | */ | ||
170 | static struct tracepoint_entry *get_tracepoint(const char *name) | ||
171 | { | ||
172 | struct hlist_head *head; | ||
173 | struct hlist_node *node; | ||
174 | struct tracepoint_entry *e; | ||
175 | u32 hash = jhash(name, strlen(name), 0); | ||
176 | |||
177 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
178 | hlist_for_each_entry(e, node, head, hlist) { | ||
179 | if (!strcmp(name, e->name)) | ||
180 | return e; | ||
181 | } | ||
182 | return NULL; | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Add the tracepoint to the tracepoint hash table. Must be called with | ||
187 | * tracepoints_mutex held. | ||
188 | */ | ||
189 | static struct tracepoint_entry *add_tracepoint(const char *name) | ||
190 | { | ||
191 | struct hlist_head *head; | ||
192 | struct hlist_node *node; | ||
193 | struct tracepoint_entry *e; | ||
194 | size_t name_len = strlen(name) + 1; | ||
195 | u32 hash = jhash(name, name_len-1, 0); | ||
196 | |||
197 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
198 | hlist_for_each_entry(e, node, head, hlist) { | ||
199 | if (!strcmp(name, e->name)) { | ||
200 | printk(KERN_NOTICE | ||
201 | "tracepoint %s busy\n", name); | ||
202 | return ERR_PTR(-EEXIST); /* Already there */ | ||
203 | } | ||
204 | } | ||
205 | /* | ||
206 | * Using kmalloc here to allocate a variable length element. Could | ||
207 | * cause some memory fragmentation if overused. | ||
208 | */ | ||
209 | e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); | ||
210 | if (!e) | ||
211 | return ERR_PTR(-ENOMEM); | ||
212 | memcpy(&e->name[0], name, name_len); | ||
213 | e->funcs = NULL; | ||
214 | e->refcount = 0; | ||
215 | e->rcu_pending = 0; | ||
216 | hlist_add_head(&e->hlist, head); | ||
217 | return e; | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * Remove the tracepoint from the tracepoint hash table. Must be called with | ||
222 | * mutex_lock held. | ||
223 | */ | ||
224 | static int remove_tracepoint(const char *name) | ||
225 | { | ||
226 | struct hlist_head *head; | ||
227 | struct hlist_node *node; | ||
228 | struct tracepoint_entry *e; | ||
229 | int found = 0; | ||
230 | size_t len = strlen(name) + 1; | ||
231 | u32 hash = jhash(name, len-1, 0); | ||
232 | |||
233 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
234 | hlist_for_each_entry(e, node, head, hlist) { | ||
235 | if (!strcmp(name, e->name)) { | ||
236 | found = 1; | ||
237 | break; | ||
238 | } | ||
239 | } | ||
240 | if (!found) | ||
241 | return -ENOENT; | ||
242 | if (e->refcount) | ||
243 | return -EBUSY; | ||
244 | hlist_del(&e->hlist); | ||
245 | /* Make sure the call_rcu_sched has been executed */ | ||
246 | if (e->rcu_pending) | ||
247 | rcu_barrier_sched(); | ||
248 | kfree(e); | ||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Sets the probe callback corresponding to one tracepoint. | ||
254 | */ | ||
255 | static void set_tracepoint(struct tracepoint_entry **entry, | ||
256 | struct tracepoint *elem, int active) | ||
257 | { | ||
258 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | ||
259 | |||
260 | /* | ||
261 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | ||
262 | * probe callbacks array is consistent before setting a pointer to it. | ||
263 | * This array is referenced by __DO_TRACE from | ||
264 | * include/linux/tracepoints.h. A matching smp_read_barrier_depends() | ||
265 | * is used. | ||
266 | */ | ||
267 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | ||
268 | elem->state = active; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * Disable a tracepoint and its probe callback. | ||
273 | * Note: only waiting an RCU period after setting elem->call to the empty | ||
274 | * function insures that the original callback is not used anymore. This insured | ||
275 | * by preempt_disable around the call site. | ||
276 | */ | ||
277 | static void disable_tracepoint(struct tracepoint *elem) | ||
278 | { | ||
279 | elem->state = 0; | ||
280 | } | ||
281 | |||
282 | /** | ||
283 | * tracepoint_update_probe_range - Update a probe range | ||
284 | * @begin: beginning of the range | ||
285 | * @end: end of the range | ||
286 | * | ||
287 | * Updates the probe callback corresponding to a range of tracepoints. | ||
288 | */ | ||
289 | void tracepoint_update_probe_range(struct tracepoint *begin, | ||
290 | struct tracepoint *end) | ||
291 | { | ||
292 | struct tracepoint *iter; | ||
293 | struct tracepoint_entry *mark_entry; | ||
294 | |||
295 | mutex_lock(&tracepoints_mutex); | ||
296 | for (iter = begin; iter < end; iter++) { | ||
297 | mark_entry = get_tracepoint(iter->name); | ||
298 | if (mark_entry) { | ||
299 | set_tracepoint(&mark_entry, iter, | ||
300 | !!mark_entry->refcount); | ||
301 | } else { | ||
302 | disable_tracepoint(iter); | ||
303 | } | ||
304 | } | ||
305 | mutex_unlock(&tracepoints_mutex); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * Update probes, removing the faulty probes. | ||
310 | */ | ||
311 | static void tracepoint_update_probes(void) | ||
312 | { | ||
313 | /* Core kernel tracepoints */ | ||
314 | tracepoint_update_probe_range(__start___tracepoints, | ||
315 | __stop___tracepoints); | ||
316 | /* tracepoints in modules. */ | ||
317 | module_update_tracepoints(); | ||
318 | } | ||
319 | |||
320 | /** | ||
321 | * tracepoint_probe_register - Connect a probe to a tracepoint | ||
322 | * @name: tracepoint name | ||
323 | * @probe: probe handler | ||
324 | * | ||
325 | * Returns 0 if ok, error value on error. | ||
326 | * The probe address must at least be aligned on the architecture pointer size. | ||
327 | */ | ||
328 | int tracepoint_probe_register(const char *name, void *probe) | ||
329 | { | ||
330 | struct tracepoint_entry *entry; | ||
331 | int ret = 0; | ||
332 | void *old; | ||
333 | |||
334 | mutex_lock(&tracepoints_mutex); | ||
335 | entry = get_tracepoint(name); | ||
336 | if (!entry) { | ||
337 | entry = add_tracepoint(name); | ||
338 | if (IS_ERR(entry)) { | ||
339 | ret = PTR_ERR(entry); | ||
340 | goto end; | ||
341 | } | ||
342 | } | ||
343 | /* | ||
344 | * If we detect that a call_rcu_sched is pending for this tracepoint, | ||
345 | * make sure it's executed now. | ||
346 | */ | ||
347 | if (entry->rcu_pending) | ||
348 | rcu_barrier_sched(); | ||
349 | old = tracepoint_entry_add_probe(entry, probe); | ||
350 | if (IS_ERR(old)) { | ||
351 | ret = PTR_ERR(old); | ||
352 | goto end; | ||
353 | } | ||
354 | mutex_unlock(&tracepoints_mutex); | ||
355 | tracepoint_update_probes(); /* may update entry */ | ||
356 | mutex_lock(&tracepoints_mutex); | ||
357 | entry = get_tracepoint(name); | ||
358 | WARN_ON(!entry); | ||
359 | if (entry->rcu_pending) | ||
360 | rcu_barrier_sched(); | ||
361 | tracepoint_entry_free_old(entry, old); | ||
362 | end: | ||
363 | mutex_unlock(&tracepoints_mutex); | ||
364 | return ret; | ||
365 | } | ||
366 | EXPORT_SYMBOL_GPL(tracepoint_probe_register); | ||
367 | |||
368 | /** | ||
369 | * tracepoint_probe_unregister - Disconnect a probe from a tracepoint | ||
370 | * @name: tracepoint name | ||
371 | * @probe: probe function pointer | ||
372 | * | ||
373 | * We do not need to call a synchronize_sched to make sure the probes have | ||
374 | * finished running before doing a module unload, because the module unload | ||
375 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
376 | * have finished. | ||
377 | */ | ||
378 | int tracepoint_probe_unregister(const char *name, void *probe) | ||
379 | { | ||
380 | struct tracepoint_entry *entry; | ||
381 | void *old; | ||
382 | int ret = -ENOENT; | ||
383 | |||
384 | mutex_lock(&tracepoints_mutex); | ||
385 | entry = get_tracepoint(name); | ||
386 | if (!entry) | ||
387 | goto end; | ||
388 | if (entry->rcu_pending) | ||
389 | rcu_barrier_sched(); | ||
390 | old = tracepoint_entry_remove_probe(entry, probe); | ||
391 | mutex_unlock(&tracepoints_mutex); | ||
392 | tracepoint_update_probes(); /* may update entry */ | ||
393 | mutex_lock(&tracepoints_mutex); | ||
394 | entry = get_tracepoint(name); | ||
395 | if (!entry) | ||
396 | goto end; | ||
397 | if (entry->rcu_pending) | ||
398 | rcu_barrier_sched(); | ||
399 | tracepoint_entry_free_old(entry, old); | ||
400 | remove_tracepoint(name); /* Ignore busy error message */ | ||
401 | ret = 0; | ||
402 | end: | ||
403 | mutex_unlock(&tracepoints_mutex); | ||
404 | return ret; | ||
405 | } | ||
406 | EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); | ||
407 | |||
408 | /** | ||
409 | * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. | ||
410 | * @tracepoint: current tracepoints (in), next tracepoint (out) | ||
411 | * @begin: beginning of the range | ||
412 | * @end: end of the range | ||
413 | * | ||
414 | * Returns whether a next tracepoint has been found (1) or not (0). | ||
415 | * Will return the first tracepoint in the range if the input tracepoint is | ||
416 | * NULL. | ||
417 | */ | ||
418 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, | ||
419 | struct tracepoint *begin, struct tracepoint *end) | ||
420 | { | ||
421 | if (!*tracepoint && begin != end) { | ||
422 | *tracepoint = begin; | ||
423 | return 1; | ||
424 | } | ||
425 | if (*tracepoint >= begin && *tracepoint < end) | ||
426 | return 1; | ||
427 | return 0; | ||
428 | } | ||
429 | EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); | ||
430 | |||
431 | static void tracepoint_get_iter(struct tracepoint_iter *iter) | ||
432 | { | ||
433 | int found = 0; | ||
434 | |||
435 | /* Core kernel tracepoints */ | ||
436 | if (!iter->module) { | ||
437 | found = tracepoint_get_iter_range(&iter->tracepoint, | ||
438 | __start___tracepoints, __stop___tracepoints); | ||
439 | if (found) | ||
440 | goto end; | ||
441 | } | ||
442 | /* tracepoints in modules. */ | ||
443 | found = module_get_iter_tracepoints(iter); | ||
444 | end: | ||
445 | if (!found) | ||
446 | tracepoint_iter_reset(iter); | ||
447 | } | ||
448 | |||
449 | void tracepoint_iter_start(struct tracepoint_iter *iter) | ||
450 | { | ||
451 | tracepoint_get_iter(iter); | ||
452 | } | ||
453 | EXPORT_SYMBOL_GPL(tracepoint_iter_start); | ||
454 | |||
455 | void tracepoint_iter_next(struct tracepoint_iter *iter) | ||
456 | { | ||
457 | iter->tracepoint++; | ||
458 | /* | ||
459 | * iter->tracepoint may be invalid because we blindly incremented it. | ||
460 | * Make sure it is valid by marshalling on the tracepoints, getting the | ||
461 | * tracepoints from following modules if necessary. | ||
462 | */ | ||
463 | tracepoint_get_iter(iter); | ||
464 | } | ||
465 | EXPORT_SYMBOL_GPL(tracepoint_iter_next); | ||
466 | |||
467 | void tracepoint_iter_stop(struct tracepoint_iter *iter) | ||
468 | { | ||
469 | } | ||
470 | EXPORT_SYMBOL_GPL(tracepoint_iter_stop); | ||
471 | |||
472 | void tracepoint_iter_reset(struct tracepoint_iter *iter) | ||
473 | { | ||
474 | iter->module = NULL; | ||
475 | iter->tracepoint = NULL; | ||
476 | } | ||
477 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); | ||