diff options
Diffstat (limited to 'kernel/tracepoint.c')
| -rw-r--r-- | kernel/tracepoint.c | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c new file mode 100644 index 000000000000..af8c85664882 --- /dev/null +++ b/kernel/tracepoint.c | |||
| @@ -0,0 +1,485 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008 Mathieu Desnoyers | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
| 17 | */ | ||
| 18 | #include <linux/module.h> | ||
| 19 | #include <linux/mutex.h> | ||
| 20 | #include <linux/types.h> | ||
| 21 | #include <linux/jhash.h> | ||
| 22 | #include <linux/list.h> | ||
| 23 | #include <linux/rcupdate.h> | ||
| 24 | #include <linux/tracepoint.h> | ||
| 25 | #include <linux/err.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | |||
| 28 | extern struct tracepoint __start___tracepoints[]; | ||
| 29 | extern struct tracepoint __stop___tracepoints[]; | ||
| 30 | |||
| 31 | /* Set to 1 to enable tracepoint debug output */ | ||
| 32 | static const int tracepoint_debug; | ||
| 33 | |||
| 34 | /* | ||
| 35 | * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the | ||
| 36 | * builtin and module tracepoints and the hash table. | ||
| 37 | */ | ||
| 38 | static DEFINE_MUTEX(tracepoints_mutex); | ||
| 39 | |||
| 40 | /* | ||
| 41 | * Tracepoint hash table, containing the active tracepoints. | ||
| 42 | * Protected by tracepoints_mutex. | ||
| 43 | */ | ||
| 44 | #define TRACEPOINT_HASH_BITS 6 | ||
| 45 | #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Note about RCU : | ||
| 49 | * It is used to to delay the free of multiple probes array until a quiescent | ||
| 50 | * state is reached. | ||
| 51 | * Tracepoint entries modifications are protected by the tracepoints_mutex. | ||
| 52 | */ | ||
| 53 | struct tracepoint_entry { | ||
| 54 | struct hlist_node hlist; | ||
| 55 | void **funcs; | ||
| 56 | int refcount; /* Number of times armed. 0 if disarmed. */ | ||
| 57 | struct rcu_head rcu; | ||
| 58 | void *oldptr; | ||
| 59 | unsigned char rcu_pending:1; | ||
| 60 | char name[0]; | ||
| 61 | }; | ||
| 62 | |||
| 63 | static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; | ||
| 64 | |||
| 65 | static void free_old_closure(struct rcu_head *head) | ||
| 66 | { | ||
| 67 | struct tracepoint_entry *entry = container_of(head, | ||
| 68 | struct tracepoint_entry, rcu); | ||
| 69 | kfree(entry->oldptr); | ||
| 70 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
| 71 | smp_wmb(); | ||
| 72 | entry->rcu_pending = 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) | ||
| 76 | { | ||
| 77 | if (!old) | ||
| 78 | return; | ||
| 79 | entry->oldptr = old; | ||
| 80 | entry->rcu_pending = 1; | ||
| 81 | /* write rcu_pending before calling the RCU callback */ | ||
| 82 | smp_wmb(); | ||
| 83 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
| 84 | } | ||
| 85 | |||
| 86 | static void debug_print_probes(struct tracepoint_entry *entry) | ||
| 87 | { | ||
| 88 | int i; | ||
| 89 | |||
| 90 | if (!tracepoint_debug) | ||
| 91 | return; | ||
| 92 | |||
| 93 | for (i = 0; entry->funcs[i]; i++) | ||
| 94 | printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); | ||
| 95 | } | ||
| 96 | |||
| 97 | static void * | ||
| 98 | tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) | ||
| 99 | { | ||
| 100 | int nr_probes = 0; | ||
| 101 | void **old, **new; | ||
| 102 | |||
| 103 | WARN_ON(!probe); | ||
| 104 | |||
| 105 | debug_print_probes(entry); | ||
| 106 | old = entry->funcs; | ||
| 107 | if (old) { | ||
| 108 | /* (N -> N+1), (N != 0, 1) probes */ | ||
| 109 | for (nr_probes = 0; old[nr_probes]; nr_probes++) | ||
| 110 | if (old[nr_probes] == probe) | ||
| 111 | return ERR_PTR(-EEXIST); | ||
| 112 | } | ||
| 113 | /* + 2 : one for new probe, one for NULL func */ | ||
| 114 | new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); | ||
| 115 | if (new == NULL) | ||
| 116 | return ERR_PTR(-ENOMEM); | ||
| 117 | if (old) | ||
| 118 | memcpy(new, old, nr_probes * sizeof(void *)); | ||
| 119 | new[nr_probes] = probe; | ||
| 120 | entry->refcount = nr_probes + 1; | ||
| 121 | entry->funcs = new; | ||
| 122 | debug_print_probes(entry); | ||
| 123 | return old; | ||
| 124 | } | ||
| 125 | |||
| 126 | static void * | ||
| 127 | tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) | ||
| 128 | { | ||
| 129 | int nr_probes = 0, nr_del = 0, i; | ||
| 130 | void **old, **new; | ||
| 131 | |||
| 132 | old = entry->funcs; | ||
| 133 | |||
| 134 | if (!old) | ||
| 135 | return NULL; | ||
| 136 | |||
| 137 | debug_print_probes(entry); | ||
| 138 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
| 139 | for (nr_probes = 0; old[nr_probes]; nr_probes++) { | ||
| 140 | if ((!probe || old[nr_probes] == probe)) | ||
| 141 | nr_del++; | ||
| 142 | } | ||
| 143 | |||
| 144 | if (nr_probes - nr_del == 0) { | ||
| 145 | /* N -> 0, (N > 1) */ | ||
| 146 | entry->funcs = NULL; | ||
| 147 | entry->refcount = 0; | ||
| 148 | debug_print_probes(entry); | ||
| 149 | return old; | ||
| 150 | } else { | ||
| 151 | int j = 0; | ||
| 152 | /* N -> M, (N > 1, M > 0) */ | ||
| 153 | /* + 1 for NULL */ | ||
| 154 | new = kzalloc((nr_probes - nr_del + 1) | ||
| 155 | * sizeof(void *), GFP_KERNEL); | ||
| 156 | if (new == NULL) | ||
| 157 | return ERR_PTR(-ENOMEM); | ||
| 158 | for (i = 0; old[i]; i++) | ||
| 159 | if ((probe && old[i] != probe)) | ||
| 160 | new[j++] = old[i]; | ||
| 161 | entry->refcount = nr_probes - nr_del; | ||
| 162 | entry->funcs = new; | ||
| 163 | } | ||
| 164 | debug_print_probes(entry); | ||
| 165 | return old; | ||
| 166 | } | ||
| 167 | |||
| 168 | /* | ||
| 169 | * Get tracepoint if the tracepoint is present in the tracepoint hash table. | ||
| 170 | * Must be called with tracepoints_mutex held. | ||
| 171 | * Returns NULL if not present. | ||
| 172 | */ | ||
| 173 | static struct tracepoint_entry *get_tracepoint(const char *name) | ||
| 174 | { | ||
| 175 | struct hlist_head *head; | ||
| 176 | struct hlist_node *node; | ||
| 177 | struct tracepoint_entry *e; | ||
| 178 | u32 hash = jhash(name, strlen(name), 0); | ||
| 179 | |||
| 180 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
| 181 | hlist_for_each_entry(e, node, head, hlist) { | ||
| 182 | if (!strcmp(name, e->name)) | ||
| 183 | return e; | ||
| 184 | } | ||
| 185 | return NULL; | ||
| 186 | } | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Add the tracepoint to the tracepoint hash table. Must be called with | ||
| 190 | * tracepoints_mutex held. | ||
| 191 | */ | ||
| 192 | static struct tracepoint_entry *add_tracepoint(const char *name) | ||
| 193 | { | ||
| 194 | struct hlist_head *head; | ||
| 195 | struct hlist_node *node; | ||
| 196 | struct tracepoint_entry *e; | ||
| 197 | size_t name_len = strlen(name) + 1; | ||
| 198 | u32 hash = jhash(name, name_len-1, 0); | ||
| 199 | |||
| 200 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
| 201 | hlist_for_each_entry(e, node, head, hlist) { | ||
| 202 | if (!strcmp(name, e->name)) { | ||
| 203 | printk(KERN_NOTICE | ||
| 204 | "tracepoint %s busy\n", name); | ||
| 205 | return ERR_PTR(-EEXIST); /* Already there */ | ||
| 206 | } | ||
| 207 | } | ||
| 208 | /* | ||
| 209 | * Using kmalloc here to allocate a variable length element. Could | ||
| 210 | * cause some memory fragmentation if overused. | ||
| 211 | */ | ||
| 212 | e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); | ||
| 213 | if (!e) | ||
| 214 | return ERR_PTR(-ENOMEM); | ||
| 215 | memcpy(&e->name[0], name, name_len); | ||
| 216 | e->funcs = NULL; | ||
| 217 | e->refcount = 0; | ||
| 218 | e->rcu_pending = 0; | ||
| 219 | hlist_add_head(&e->hlist, head); | ||
| 220 | return e; | ||
| 221 | } | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Remove the tracepoint from the tracepoint hash table. Must be called with | ||
| 225 | * mutex_lock held. | ||
| 226 | */ | ||
| 227 | static int remove_tracepoint(const char *name) | ||
| 228 | { | ||
| 229 | struct hlist_head *head; | ||
| 230 | struct hlist_node *node; | ||
| 231 | struct tracepoint_entry *e; | ||
| 232 | int found = 0; | ||
| 233 | size_t len = strlen(name) + 1; | ||
| 234 | u32 hash = jhash(name, len-1, 0); | ||
| 235 | |||
| 236 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; | ||
| 237 | hlist_for_each_entry(e, node, head, hlist) { | ||
| 238 | if (!strcmp(name, e->name)) { | ||
| 239 | found = 1; | ||
| 240 | break; | ||
| 241 | } | ||
| 242 | } | ||
| 243 | if (!found) | ||
| 244 | return -ENOENT; | ||
| 245 | if (e->refcount) | ||
| 246 | return -EBUSY; | ||
| 247 | hlist_del(&e->hlist); | ||
| 248 | /* Make sure the call_rcu_sched has been executed */ | ||
| 249 | if (e->rcu_pending) | ||
| 250 | rcu_barrier_sched(); | ||
| 251 | kfree(e); | ||
| 252 | return 0; | ||
| 253 | } | ||
| 254 | |||
| 255 | /* | ||
| 256 | * Sets the probe callback corresponding to one tracepoint. | ||
| 257 | */ | ||
| 258 | static void set_tracepoint(struct tracepoint_entry **entry, | ||
| 259 | struct tracepoint *elem, int active) | ||
| 260 | { | ||
| 261 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | ||
| 262 | |||
| 263 | /* | ||
| 264 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | ||
| 265 | * probe callbacks array is consistent before setting a pointer to it. | ||
| 266 | * This array is referenced by __DO_TRACE from | ||
| 267 | * include/linux/tracepoints.h. A matching smp_read_barrier_depends() | ||
| 268 | * is used. | ||
| 269 | */ | ||
| 270 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | ||
| 271 | elem->state = active; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * Disable a tracepoint and its probe callback. | ||
| 276 | * Note: only waiting an RCU period after setting elem->call to the empty | ||
| 277 | * function insures that the original callback is not used anymore. This insured | ||
| 278 | * by preempt_disable around the call site. | ||
| 279 | */ | ||
| 280 | static void disable_tracepoint(struct tracepoint *elem) | ||
| 281 | { | ||
| 282 | elem->state = 0; | ||
| 283 | } | ||
| 284 | |||
| 285 | /** | ||
| 286 | * tracepoint_update_probe_range - Update a probe range | ||
| 287 | * @begin: beginning of the range | ||
| 288 | * @end: end of the range | ||
| 289 | * | ||
| 290 | * Updates the probe callback corresponding to a range of tracepoints. | ||
| 291 | */ | ||
| 292 | void tracepoint_update_probe_range(struct tracepoint *begin, | ||
| 293 | struct tracepoint *end) | ||
| 294 | { | ||
| 295 | struct tracepoint *iter; | ||
| 296 | struct tracepoint_entry *mark_entry; | ||
| 297 | |||
| 298 | mutex_lock(&tracepoints_mutex); | ||
| 299 | for (iter = begin; iter < end; iter++) { | ||
| 300 | mark_entry = get_tracepoint(iter->name); | ||
| 301 | if (mark_entry) { | ||
| 302 | set_tracepoint(&mark_entry, iter, | ||
| 303 | !!mark_entry->refcount); | ||
| 304 | } else { | ||
| 305 | disable_tracepoint(iter); | ||
| 306 | } | ||
| 307 | } | ||
| 308 | mutex_unlock(&tracepoints_mutex); | ||
| 309 | } | ||
| 310 | |||
| 311 | /* | ||
| 312 | * Update probes, removing the faulty probes. | ||
| 313 | */ | ||
| 314 | static void tracepoint_update_probes(void) | ||
| 315 | { | ||
| 316 | /* Core kernel tracepoints */ | ||
| 317 | tracepoint_update_probe_range(__start___tracepoints, | ||
| 318 | __stop___tracepoints); | ||
| 319 | /* tracepoints in modules. */ | ||
| 320 | module_update_tracepoints(); | ||
| 321 | } | ||
| 322 | |||
| 323 | /** | ||
| 324 | * tracepoint_probe_register - Connect a probe to a tracepoint | ||
| 325 | * @name: tracepoint name | ||
| 326 | * @probe: probe handler | ||
| 327 | * | ||
| 328 | * Returns 0 if ok, error value on error. | ||
| 329 | * The probe address must at least be aligned on the architecture pointer size. | ||
| 330 | */ | ||
| 331 | int tracepoint_probe_register(const char *name, void *probe) | ||
| 332 | { | ||
| 333 | struct tracepoint_entry *entry; | ||
| 334 | int ret = 0; | ||
| 335 | void *old; | ||
| 336 | |||
| 337 | mutex_lock(&tracepoints_mutex); | ||
| 338 | entry = get_tracepoint(name); | ||
| 339 | if (!entry) { | ||
| 340 | entry = add_tracepoint(name); | ||
| 341 | if (IS_ERR(entry)) { | ||
| 342 | ret = PTR_ERR(entry); | ||
| 343 | goto end; | ||
| 344 | } | ||
| 345 | } | ||
| 346 | /* | ||
| 347 | * If we detect that a call_rcu_sched is pending for this tracepoint, | ||
| 348 | * make sure it's executed now. | ||
| 349 | */ | ||
| 350 | if (entry->rcu_pending) | ||
| 351 | rcu_barrier_sched(); | ||
| 352 | old = tracepoint_entry_add_probe(entry, probe); | ||
| 353 | if (IS_ERR(old)) { | ||
| 354 | ret = PTR_ERR(old); | ||
| 355 | goto end; | ||
| 356 | } | ||
| 357 | mutex_unlock(&tracepoints_mutex); | ||
| 358 | tracepoint_update_probes(); /* may update entry */ | ||
| 359 | mutex_lock(&tracepoints_mutex); | ||
| 360 | entry = get_tracepoint(name); | ||
| 361 | WARN_ON(!entry); | ||
| 362 | if (entry->rcu_pending) | ||
| 363 | rcu_barrier_sched(); | ||
| 364 | tracepoint_entry_free_old(entry, old); | ||
| 365 | end: | ||
| 366 | mutex_unlock(&tracepoints_mutex); | ||
| 367 | return ret; | ||
| 368 | } | ||
| 369 | EXPORT_SYMBOL_GPL(tracepoint_probe_register); | ||
| 370 | |||
| 371 | /** | ||
| 372 | * tracepoint_probe_unregister - Disconnect a probe from a tracepoint | ||
| 373 | * @name: tracepoint name | ||
| 374 | * @probe: probe function pointer | ||
| 375 | * | ||
| 376 | * We do not need to call a synchronize_sched to make sure the probes have | ||
| 377 | * finished running before doing a module unload, because the module unload | ||
| 378 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
| 379 | * have finished. | ||
| 380 | */ | ||
| 381 | int tracepoint_probe_unregister(const char *name, void *probe) | ||
| 382 | { | ||
| 383 | struct tracepoint_entry *entry; | ||
| 384 | void *old; | ||
| 385 | int ret = -ENOENT; | ||
| 386 | |||
| 387 | mutex_lock(&tracepoints_mutex); | ||
| 388 | entry = get_tracepoint(name); | ||
| 389 | if (!entry) | ||
| 390 | goto end; | ||
| 391 | if (entry->rcu_pending) | ||
| 392 | rcu_barrier_sched(); | ||
| 393 | old = tracepoint_entry_remove_probe(entry, probe); | ||
| 394 | if (!old) { | ||
| 395 | printk(KERN_WARNING "Warning: Trying to unregister a probe" | ||
| 396 | "that doesn't exist\n"); | ||
| 397 | goto end; | ||
| 398 | } | ||
| 399 | mutex_unlock(&tracepoints_mutex); | ||
| 400 | tracepoint_update_probes(); /* may update entry */ | ||
| 401 | mutex_lock(&tracepoints_mutex); | ||
| 402 | entry = get_tracepoint(name); | ||
| 403 | if (!entry) | ||
| 404 | goto end; | ||
| 405 | if (entry->rcu_pending) | ||
| 406 | rcu_barrier_sched(); | ||
| 407 | tracepoint_entry_free_old(entry, old); | ||
| 408 | remove_tracepoint(name); /* Ignore busy error message */ | ||
| 409 | ret = 0; | ||
| 410 | end: | ||
| 411 | mutex_unlock(&tracepoints_mutex); | ||
| 412 | return ret; | ||
| 413 | } | ||
| 414 | EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); | ||
| 415 | |||
| 416 | /** | ||
| 417 | * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. | ||
| 418 | * @tracepoint: current tracepoints (in), next tracepoint (out) | ||
| 419 | * @begin: beginning of the range | ||
| 420 | * @end: end of the range | ||
| 421 | * | ||
| 422 | * Returns whether a next tracepoint has been found (1) or not (0). | ||
| 423 | * Will return the first tracepoint in the range if the input tracepoint is | ||
| 424 | * NULL. | ||
| 425 | */ | ||
| 426 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, | ||
| 427 | struct tracepoint *begin, struct tracepoint *end) | ||
| 428 | { | ||
| 429 | if (!*tracepoint && begin != end) { | ||
| 430 | *tracepoint = begin; | ||
| 431 | return 1; | ||
| 432 | } | ||
| 433 | if (*tracepoint >= begin && *tracepoint < end) | ||
| 434 | return 1; | ||
| 435 | return 0; | ||
| 436 | } | ||
| 437 | EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); | ||
| 438 | |||
| 439 | static void tracepoint_get_iter(struct tracepoint_iter *iter) | ||
| 440 | { | ||
| 441 | int found = 0; | ||
| 442 | |||
| 443 | /* Core kernel tracepoints */ | ||
| 444 | if (!iter->module) { | ||
| 445 | found = tracepoint_get_iter_range(&iter->tracepoint, | ||
| 446 | __start___tracepoints, __stop___tracepoints); | ||
| 447 | if (found) | ||
| 448 | goto end; | ||
| 449 | } | ||
| 450 | /* tracepoints in modules. */ | ||
| 451 | found = module_get_iter_tracepoints(iter); | ||
| 452 | end: | ||
| 453 | if (!found) | ||
| 454 | tracepoint_iter_reset(iter); | ||
| 455 | } | ||
| 456 | |||
| 457 | void tracepoint_iter_start(struct tracepoint_iter *iter) | ||
| 458 | { | ||
| 459 | tracepoint_get_iter(iter); | ||
| 460 | } | ||
| 461 | EXPORT_SYMBOL_GPL(tracepoint_iter_start); | ||
| 462 | |||
| 463 | void tracepoint_iter_next(struct tracepoint_iter *iter) | ||
| 464 | { | ||
| 465 | iter->tracepoint++; | ||
| 466 | /* | ||
| 467 | * iter->tracepoint may be invalid because we blindly incremented it. | ||
| 468 | * Make sure it is valid by marshalling on the tracepoints, getting the | ||
| 469 | * tracepoints from following modules if necessary. | ||
| 470 | */ | ||
| 471 | tracepoint_get_iter(iter); | ||
| 472 | } | ||
| 473 | EXPORT_SYMBOL_GPL(tracepoint_iter_next); | ||
| 474 | |||
| 475 | void tracepoint_iter_stop(struct tracepoint_iter *iter) | ||
| 476 | { | ||
| 477 | } | ||
| 478 | EXPORT_SYMBOL_GPL(tracepoint_iter_stop); | ||
| 479 | |||
| 480 | void tracepoint_iter_reset(struct tracepoint_iter *iter) | ||
| 481 | { | ||
| 482 | iter->module = NULL; | ||
| 483 | iter->tracepoint = NULL; | ||
| 484 | } | ||
| 485 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); | ||
