aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-21 12:05:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-21 12:05:47 -0400
commitbd4c3a3441144cd46d1f544046523724c5bc6e94 (patch)
tree8b5c67249a7a163caf3f88cbcb9df5236fcc3b93 /kernel
parentb3727c24da69971503a4ca98b3b877753c6a4393 (diff)
parent583a22e7c154dc0a3938db522696b4bc7f098f59 (diff)
Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: kernel/profile.c: Switch /proc/irq/prof_cpu_mask to seq_file tracing: Export trace_profile_buf symbols tracing/events: use list_for_entry_continue tracing: remove max_tracer_type_len function-graph: use ftrace_graph_funcs directly tracing: Remove markers tracing: Allocate the ftrace event profile buffer dynamically tracing: Factorize the events profile accounting
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/marker.c930
-rw-r--r--kernel/module.c18
-rw-r--r--kernel/profile.c45
-rw-r--r--kernel/trace/ftrace.c23
-rw-r--r--kernel/trace/trace.c49
-rw-r--r--kernel/trace/trace_event_profile.c82
-rw-r--r--kernel/trace/trace_events.c49
-rw-r--r--kernel/trace/trace_printk.c1
-rw-r--r--kernel/trace/trace_syscalls.c97
10 files changed, 217 insertions, 1078 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 3d9c7e27e3f9..7c9b0a585502 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o
87obj-$(CONFIG_SYSCTL) += utsname_sysctl.o 87obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
88obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o 88obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
89obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 89obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
90obj-$(CONFIG_MARKERS) += marker.o
91obj-$(CONFIG_TRACEPOINTS) += tracepoint.o 90obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
92obj-$(CONFIG_LATENCYTOP) += latencytop.o 91obj-$(CONFIG_LATENCYTOP) += latencytop.o
93obj-$(CONFIG_FUNCTION_TRACER) += trace/ 92obj-$(CONFIG_FUNCTION_TRACER) += trace/
diff --git a/kernel/marker.c b/kernel/marker.c
deleted file mode 100644
index ea54f2647868..000000000000
--- a/kernel/marker.c
+++ /dev/null
@@ -1,930 +0,0 @@
1/*
2 * Copyright (C) 2007 Mathieu Desnoyers
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */
18#include <linux/module.h>
19#include <linux/mutex.h>
20#include <linux/types.h>
21#include <linux/jhash.h>
22#include <linux/list.h>
23#include <linux/rcupdate.h>
24#include <linux/marker.h>
25#include <linux/err.h>
26#include <linux/slab.h>
27
28extern struct marker __start___markers[];
29extern struct marker __stop___markers[];
30
31/* Set to 1 to enable marker debug output */
32static const int marker_debug;
33
34/*
35 * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
36 * and module markers and the hash table.
37 */
38static DEFINE_MUTEX(markers_mutex);
39
40/*
41 * Marker hash table, containing the active markers.
42 * Protected by module_mutex.
43 */
44#define MARKER_HASH_BITS 6
45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
46static struct hlist_head marker_table[MARKER_TABLE_SIZE];
47
48/*
49 * Note about RCU :
50 * It is used to make sure every handler has finished using its private data
51 * between two consecutive operation (add or remove) on a given marker. It is
52 * also used to delay the free of multiple probes array until a quiescent state
53 * is reached.
54 * marker entries modifications are protected by the markers_mutex.
55 */
56struct marker_entry {
57 struct hlist_node hlist;
58 char *format;
59 /* Probe wrapper */
60 void (*call)(const struct marker *mdata, void *call_private, ...);
61 struct marker_probe_closure single;
62 struct marker_probe_closure *multi;
63 int refcount; /* Number of times armed. 0 if disarmed. */
64 struct rcu_head rcu;
65 void *oldptr;
66 int rcu_pending;
67 unsigned char ptype:1;
68 unsigned char format_allocated:1;
69 char name[0]; /* Contains name'\0'format'\0' */
70};
71
72/**
73 * __mark_empty_function - Empty probe callback
74 * @probe_private: probe private data
75 * @call_private: call site private data
76 * @fmt: format string
77 * @...: variable argument list
78 *
79 * Empty callback provided as a probe to the markers. By providing this to a
80 * disabled marker, we make sure the execution flow is always valid even
81 * though the function pointer change and the marker enabling are two distinct
82 * operations that modifies the execution flow of preemptible code.
83 */
84notrace void __mark_empty_function(void *probe_private, void *call_private,
85 const char *fmt, va_list *args)
86{
87}
88EXPORT_SYMBOL_GPL(__mark_empty_function);
89
90/*
91 * marker_probe_cb Callback that prepares the variable argument list for probes.
92 * @mdata: pointer of type struct marker
93 * @call_private: caller site private data
94 * @...: Variable argument list.
95 *
96 * Since we do not use "typical" pointer based RCU in the 1 argument case, we
97 * need to put a full smp_rmb() in this branch. This is why we do not use
98 * rcu_dereference() for the pointer read.
99 */
100notrace void marker_probe_cb(const struct marker *mdata,
101 void *call_private, ...)
102{
103 va_list args;
104 char ptype;
105
106 /*
107 * rcu_read_lock_sched does two things : disabling preemption to make
108 * sure the teardown of the callbacks can be done correctly when they
109 * are in modules and they insure RCU read coherency.
110 */
111 rcu_read_lock_sched_notrace();
112 ptype = mdata->ptype;
113 if (likely(!ptype)) {
114 marker_probe_func *func;
115 /* Must read the ptype before ptr. They are not data dependant,
116 * so we put an explicit smp_rmb() here. */
117 smp_rmb();
118 func = mdata->single.func;
119 /* Must read the ptr before private data. They are not data
120 * dependant, so we put an explicit smp_rmb() here. */
121 smp_rmb();
122 va_start(args, call_private);
123 func(mdata->single.probe_private, call_private, mdata->format,
124 &args);
125 va_end(args);
126 } else {
127 struct marker_probe_closure *multi;
128 int i;
129 /*
130 * Read mdata->ptype before mdata->multi.
131 */
132 smp_rmb();
133 multi = mdata->multi;
134 /*
135 * multi points to an array, therefore accessing the array
136 * depends on reading multi. However, even in this case,
137 * we must insure that the pointer is read _before_ the array
138 * data. Same as rcu_dereference, but we need a full smp_rmb()
139 * in the fast path, so put the explicit barrier here.
140 */
141 smp_read_barrier_depends();
142 for (i = 0; multi[i].func; i++) {
143 va_start(args, call_private);
144 multi[i].func(multi[i].probe_private, call_private,
145 mdata->format, &args);
146 va_end(args);
147 }
148 }
149 rcu_read_unlock_sched_notrace();
150}
151EXPORT_SYMBOL_GPL(marker_probe_cb);
152
153/*
154 * marker_probe_cb Callback that does not prepare the variable argument list.
155 * @mdata: pointer of type struct marker
156 * @call_private: caller site private data
157 * @...: Variable argument list.
158 *
159 * Should be connected to markers "MARK_NOARGS".
160 */
161static notrace void marker_probe_cb_noarg(const struct marker *mdata,
162 void *call_private, ...)
163{
164 va_list args; /* not initialized */
165 char ptype;
166
167 rcu_read_lock_sched_notrace();
168 ptype = mdata->ptype;
169 if (likely(!ptype)) {
170 marker_probe_func *func;
171 /* Must read the ptype before ptr. They are not data dependant,
172 * so we put an explicit smp_rmb() here. */
173 smp_rmb();
174 func = mdata->single.func;
175 /* Must read the ptr before private data. They are not data
176 * dependant, so we put an explicit smp_rmb() here. */
177 smp_rmb();
178 func(mdata->single.probe_private, call_private, mdata->format,
179 &args);
180 } else {
181 struct marker_probe_closure *multi;
182 int i;
183 /*
184 * Read mdata->ptype before mdata->multi.
185 */
186 smp_rmb();
187 multi = mdata->multi;
188 /*
189 * multi points to an array, therefore accessing the array
190 * depends on reading multi. However, even in this case,
191 * we must insure that the pointer is read _before_ the array
192 * data. Same as rcu_dereference, but we need a full smp_rmb()
193 * in the fast path, so put the explicit barrier here.
194 */
195 smp_read_barrier_depends();
196 for (i = 0; multi[i].func; i++)
197 multi[i].func(multi[i].probe_private, call_private,
198 mdata->format, &args);
199 }
200 rcu_read_unlock_sched_notrace();
201}
202
203static void free_old_closure(struct rcu_head *head)
204{
205 struct marker_entry *entry = container_of(head,
206 struct marker_entry, rcu);
207 kfree(entry->oldptr);
208 /* Make sure we free the data before setting the pending flag to 0 */
209 smp_wmb();
210 entry->rcu_pending = 0;
211}
212
213static void debug_print_probes(struct marker_entry *entry)
214{
215 int i;
216
217 if (!marker_debug)
218 return;
219
220 if (!entry->ptype) {
221 printk(KERN_DEBUG "Single probe : %p %p\n",
222 entry->single.func,
223 entry->single.probe_private);
224 } else {
225 for (i = 0; entry->multi[i].func; i++)
226 printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
227 entry->multi[i].func,
228 entry->multi[i].probe_private);
229 }
230}
231
232static struct marker_probe_closure *
233marker_entry_add_probe(struct marker_entry *entry,
234 marker_probe_func *probe, void *probe_private)
235{
236 int nr_probes = 0;
237 struct marker_probe_closure *old, *new;
238
239 WARN_ON(!probe);
240
241 debug_print_probes(entry);
242 old = entry->multi;
243 if (!entry->ptype) {
244 if (entry->single.func == probe &&
245 entry->single.probe_private == probe_private)
246 return ERR_PTR(-EBUSY);
247 if (entry->single.func == __mark_empty_function) {
248 /* 0 -> 1 probes */
249 entry->single.func = probe;
250 entry->single.probe_private = probe_private;
251 entry->refcount = 1;
252 entry->ptype = 0;
253 debug_print_probes(entry);
254 return NULL;
255 } else {
256 /* 1 -> 2 probes */
257 nr_probes = 1;
258 old = NULL;
259 }
260 } else {
261 /* (N -> N+1), (N != 0, 1) probes */
262 for (nr_probes = 0; old[nr_probes].func; nr_probes++)
263 if (old[nr_probes].func == probe
264 && old[nr_probes].probe_private
265 == probe_private)
266 return ERR_PTR(-EBUSY);
267 }
268 /* + 2 : one for new probe, one for NULL func */
269 new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
270 GFP_KERNEL);
271 if (new == NULL)
272 return ERR_PTR(-ENOMEM);
273 if (!old)
274 new[0] = entry->single;
275 else
276 memcpy(new, old,
277 nr_probes * sizeof(struct marker_probe_closure));
278 new[nr_probes].func = probe;
279 new[nr_probes].probe_private = probe_private;
280 entry->refcount = nr_probes + 1;
281 entry->multi = new;
282 entry->ptype = 1;
283 debug_print_probes(entry);
284 return old;
285}
286
287static struct marker_probe_closure *
288marker_entry_remove_probe(struct marker_entry *entry,
289 marker_probe_func *probe, void *probe_private)
290{
291 int nr_probes = 0, nr_del = 0, i;
292 struct marker_probe_closure *old, *new;
293
294 old = entry->multi;
295
296 debug_print_probes(entry);
297 if (!entry->ptype) {
298 /* 0 -> N is an error */
299 WARN_ON(entry->single.func == __mark_empty_function);
300 /* 1 -> 0 probes */
301 WARN_ON(probe && entry->single.func != probe);
302 WARN_ON(entry->single.probe_private != probe_private);
303 entry->single.func = __mark_empty_function;
304 entry->refcount = 0;
305 entry->ptype = 0;
306 debug_print_probes(entry);
307 return NULL;
308 } else {
309 /* (N -> M), (N > 1, M >= 0) probes */
310 for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
311 if ((!probe || old[nr_probes].func == probe)
312 && old[nr_probes].probe_private
313 == probe_private)
314 nr_del++;
315 }
316 }
317
318 if (nr_probes - nr_del == 0) {
319 /* N -> 0, (N > 1) */
320 entry->single.func = __mark_empty_function;
321 entry->refcount = 0;
322 entry->ptype = 0;
323 } else if (nr_probes - nr_del == 1) {
324 /* N -> 1, (N > 1) */
325 for (i = 0; old[i].func; i++)
326 if ((probe && old[i].func != probe) ||
327 old[i].probe_private != probe_private)
328 entry->single = old[i];
329 entry->refcount = 1;
330 entry->ptype = 0;
331 } else {
332 int j = 0;
333 /* N -> M, (N > 1, M > 1) */
334 /* + 1 for NULL */
335 new = kzalloc((nr_probes - nr_del + 1)
336 * sizeof(struct marker_probe_closure), GFP_KERNEL);
337 if (new == NULL)
338 return ERR_PTR(-ENOMEM);
339 for (i = 0; old[i].func; i++)
340 if ((probe && old[i].func != probe) ||
341 old[i].probe_private != probe_private)
342 new[j++] = old[i];
343 entry->refcount = nr_probes - nr_del;
344 entry->ptype = 1;
345 entry->multi = new;
346 }
347 debug_print_probes(entry);
348 return old;
349}
350
351/*
352 * Get marker if the marker is present in the marker hash table.
353 * Must be called with markers_mutex held.
354 * Returns NULL if not present.
355 */
356static struct marker_entry *get_marker(const char *name)
357{
358 struct hlist_head *head;
359 struct hlist_node *node;
360 struct marker_entry *e;
361 u32 hash = jhash(name, strlen(name), 0);
362
363 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
364 hlist_for_each_entry(e, node, head, hlist) {
365 if (!strcmp(name, e->name))
366 return e;
367 }
368 return NULL;
369}
370
371/*
372 * Add the marker to the marker hash table. Must be called with markers_mutex
373 * held.
374 */
375static struct marker_entry *add_marker(const char *name, const char *format)
376{
377 struct hlist_head *head;
378 struct hlist_node *node;
379 struct marker_entry *e;
380 size_t name_len = strlen(name) + 1;
381 size_t format_len = 0;
382 u32 hash = jhash(name, name_len-1, 0);
383
384 if (format)
385 format_len = strlen(format) + 1;
386 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
387 hlist_for_each_entry(e, node, head, hlist) {
388 if (!strcmp(name, e->name)) {
389 printk(KERN_NOTICE
390 "Marker %s busy\n", name);
391 return ERR_PTR(-EBUSY); /* Already there */
392 }
393 }
394 /*
395 * Using kmalloc here to allocate a variable length element. Could
396 * cause some memory fragmentation if overused.
397 */
398 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
399 GFP_KERNEL);
400 if (!e)
401 return ERR_PTR(-ENOMEM);
402 memcpy(&e->name[0], name, name_len);
403 if (format) {
404 e->format = &e->name[name_len];
405 memcpy(e->format, format, format_len);
406 if (strcmp(e->format, MARK_NOARGS) == 0)
407 e->call = marker_probe_cb_noarg;
408 else
409 e->call = marker_probe_cb;
410 trace_mark(core_marker_format, "name %s format %s",
411 e->name, e->format);
412 } else {
413 e->format = NULL;
414 e->call = marker_probe_cb;
415 }
416 e->single.func = __mark_empty_function;
417 e->single.probe_private = NULL;
418 e->multi = NULL;
419 e->ptype = 0;
420 e->format_allocated = 0;
421 e->refcount = 0;
422 e->rcu_pending = 0;
423 hlist_add_head(&e->hlist, head);
424 return e;
425}
426
427/*
428 * Remove the marker from the marker hash table. Must be called with mutex_lock
429 * held.
430 */
431static int remove_marker(const char *name)
432{
433 struct hlist_head *head;
434 struct hlist_node *node;
435 struct marker_entry *e;
436 int found = 0;
437 size_t len = strlen(name) + 1;
438 u32 hash = jhash(name, len-1, 0);
439
440 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
441 hlist_for_each_entry(e, node, head, hlist) {
442 if (!strcmp(name, e->name)) {
443 found = 1;
444 break;
445 }
446 }
447 if (!found)
448 return -ENOENT;
449 if (e->single.func != __mark_empty_function)
450 return -EBUSY;
451 hlist_del(&e->hlist);
452 if (e->format_allocated)
453 kfree(e->format);
454 /* Make sure the call_rcu has been executed */
455 if (e->rcu_pending)
456 rcu_barrier_sched();
457 kfree(e);
458 return 0;
459}
460
461/*
462 * Set the mark_entry format to the format found in the element.
463 */
464static int marker_set_format(struct marker_entry *entry, const char *format)
465{
466 entry->format = kstrdup(format, GFP_KERNEL);
467 if (!entry->format)
468 return -ENOMEM;
469 entry->format_allocated = 1;
470
471 trace_mark(core_marker_format, "name %s format %s",
472 entry->name, entry->format);
473 return 0;
474}
475
476/*
477 * Sets the probe callback corresponding to one marker.
478 */
479static int set_marker(struct marker_entry *entry, struct marker *elem,
480 int active)
481{
482 int ret = 0;
483 WARN_ON(strcmp(entry->name, elem->name) != 0);
484
485 if (entry->format) {
486 if (strcmp(entry->format, elem->format) != 0) {
487 printk(KERN_NOTICE
488 "Format mismatch for probe %s "
489 "(%s), marker (%s)\n",
490 entry->name,
491 entry->format,
492 elem->format);
493 return -EPERM;
494 }
495 } else {
496 ret = marker_set_format(entry, elem->format);
497 if (ret)
498 return ret;
499 }
500
501 /*
502 * probe_cb setup (statically known) is done here. It is
503 * asynchronous with the rest of execution, therefore we only
504 * pass from a "safe" callback (with argument) to an "unsafe"
505 * callback (does not set arguments).
506 */
507 elem->call = entry->call;
508 /*
509 * Sanity check :
510 * We only update the single probe private data when the ptr is
511 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
512 */
513 WARN_ON(elem->single.func != __mark_empty_function
514 && elem->single.probe_private != entry->single.probe_private
515 && !elem->ptype);
516 elem->single.probe_private = entry->single.probe_private;
517 /*
518 * Make sure the private data is valid when we update the
519 * single probe ptr.
520 */
521 smp_wmb();
522 elem->single.func = entry->single.func;
523 /*
524 * We also make sure that the new probe callbacks array is consistent
525 * before setting a pointer to it.
526 */
527 rcu_assign_pointer(elem->multi, entry->multi);
528 /*
529 * Update the function or multi probe array pointer before setting the
530 * ptype.
531 */
532 smp_wmb();
533 elem->ptype = entry->ptype;
534
535 if (elem->tp_name && (active ^ elem->state)) {
536 WARN_ON(!elem->tp_cb);
537 /*
538 * It is ok to directly call the probe registration because type
539 * checking has been done in the __trace_mark_tp() macro.
540 */
541
542 if (active) {
543 /*
544 * try_module_get should always succeed because we hold
545 * lock_module() to get the tp_cb address.
546 */
547 ret = try_module_get(__module_text_address(
548 (unsigned long)elem->tp_cb));
549 BUG_ON(!ret);
550 ret = tracepoint_probe_register_noupdate(
551 elem->tp_name,
552 elem->tp_cb);
553 } else {
554 ret = tracepoint_probe_unregister_noupdate(
555 elem->tp_name,
556 elem->tp_cb);
557 /*
558 * tracepoint_probe_update_all() must be called
559 * before the module containing tp_cb is unloaded.
560 */
561 module_put(__module_text_address(
562 (unsigned long)elem->tp_cb));
563 }
564 }
565 elem->state = active;
566
567 return ret;
568}
569
570/*
571 * Disable a marker and its probe callback.
572 * Note: only waiting an RCU period after setting elem->call to the empty
573 * function insures that the original callback is not used anymore. This insured
574 * by rcu_read_lock_sched around the call site.
575 */
576static void disable_marker(struct marker *elem)
577{
578 int ret;
579
580 /* leave "call" as is. It is known statically. */
581 if (elem->tp_name && elem->state) {
582 WARN_ON(!elem->tp_cb);
583 /*
584 * It is ok to directly call the probe registration because type
585 * checking has been done in the __trace_mark_tp() macro.
586 */
587 ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
588 elem->tp_cb);
589 WARN_ON(ret);
590 /*
591 * tracepoint_probe_update_all() must be called
592 * before the module containing tp_cb is unloaded.
593 */
594 module_put(__module_text_address((unsigned long)elem->tp_cb));
595 }
596 elem->state = 0;
597 elem->single.func = __mark_empty_function;
598 /* Update the function before setting the ptype */
599 smp_wmb();
600 elem->ptype = 0; /* single probe */
601 /*
602 * Leave the private data and id there, because removal is racy and
603 * should be done only after an RCU period. These are never used until
604 * the next initialization anyway.
605 */
606}
607
608/**
609 * marker_update_probe_range - Update a probe range
610 * @begin: beginning of the range
611 * @end: end of the range
612 *
613 * Updates the probe callback corresponding to a range of markers.
614 */
615void marker_update_probe_range(struct marker *begin,
616 struct marker *end)
617{
618 struct marker *iter;
619 struct marker_entry *mark_entry;
620
621 mutex_lock(&markers_mutex);
622 for (iter = begin; iter < end; iter++) {
623 mark_entry = get_marker(iter->name);
624 if (mark_entry) {
625 set_marker(mark_entry, iter, !!mark_entry->refcount);
626 /*
627 * ignore error, continue
628 */
629 } else {
630 disable_marker(iter);
631 }
632 }
633 mutex_unlock(&markers_mutex);
634}
635
636/*
637 * Update probes, removing the faulty probes.
638 *
639 * Internal callback only changed before the first probe is connected to it.
640 * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
641 * transitions. All other transitions will leave the old private data valid.
642 * This makes the non-atomicity of the callback/private data updates valid.
643 *
644 * "special case" updates :
645 * 0 -> 1 callback
646 * 1 -> 0 callback
647 * 1 -> 2 callbacks
648 * 2 -> 1 callbacks
649 * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
650 * Site effect : marker_set_format may delete the marker entry (creating a
651 * replacement).
652 */
653static void marker_update_probes(void)
654{
655 /* Core kernel markers */
656 marker_update_probe_range(__start___markers, __stop___markers);
657 /* Markers in modules. */
658 module_update_markers();
659 tracepoint_probe_update_all();
660}
661
662/**
663 * marker_probe_register - Connect a probe to a marker
664 * @name: marker name
665 * @format: format string
666 * @probe: probe handler
667 * @probe_private: probe private data
668 *
669 * private data must be a valid allocated memory address, or NULL.
670 * Returns 0 if ok, error value on error.
671 * The probe address must at least be aligned on the architecture pointer size.
672 */
673int marker_probe_register(const char *name, const char *format,
674 marker_probe_func *probe, void *probe_private)
675{
676 struct marker_entry *entry;
677 int ret = 0;
678 struct marker_probe_closure *old;
679
680 mutex_lock(&markers_mutex);
681 entry = get_marker(name);
682 if (!entry) {
683 entry = add_marker(name, format);
684 if (IS_ERR(entry))
685 ret = PTR_ERR(entry);
686 } else if (format) {
687 if (!entry->format)
688 ret = marker_set_format(entry, format);
689 else if (strcmp(entry->format, format))
690 ret = -EPERM;
691 }
692 if (ret)
693 goto end;
694
695 /*
696 * If we detect that a call_rcu is pending for this marker,
697 * make sure it's executed now.
698 */
699 if (entry->rcu_pending)
700 rcu_barrier_sched();
701 old = marker_entry_add_probe(entry, probe, probe_private);
702 if (IS_ERR(old)) {
703 ret = PTR_ERR(old);
704 goto end;
705 }
706 mutex_unlock(&markers_mutex);
707 marker_update_probes();
708 mutex_lock(&markers_mutex);
709 entry = get_marker(name);
710 if (!entry)
711 goto end;
712 if (entry->rcu_pending)
713 rcu_barrier_sched();
714 entry->oldptr = old;
715 entry->rcu_pending = 1;
716 /* write rcu_pending before calling the RCU callback */
717 smp_wmb();
718 call_rcu_sched(&entry->rcu, free_old_closure);
719end:
720 mutex_unlock(&markers_mutex);
721 return ret;
722}
723EXPORT_SYMBOL_GPL(marker_probe_register);
724
725/**
726 * marker_probe_unregister - Disconnect a probe from a marker
727 * @name: marker name
728 * @probe: probe function pointer
729 * @probe_private: probe private data
730 *
731 * Returns the private data given to marker_probe_register, or an ERR_PTR().
732 * We do not need to call a synchronize_sched to make sure the probes have
733 * finished running before doing a module unload, because the module unload
734 * itself uses stop_machine(), which insures that every preempt disabled section
735 * have finished.
736 */
737int marker_probe_unregister(const char *name,
738 marker_probe_func *probe, void *probe_private)
739{
740 struct marker_entry *entry;
741 struct marker_probe_closure *old;
742 int ret = -ENOENT;
743
744 mutex_lock(&markers_mutex);
745 entry = get_marker(name);
746 if (!entry)
747 goto end;
748 if (entry->rcu_pending)
749 rcu_barrier_sched();
750 old = marker_entry_remove_probe(entry, probe, probe_private);
751 mutex_unlock(&markers_mutex);
752 marker_update_probes();
753 mutex_lock(&markers_mutex);
754 entry = get_marker(name);
755 if (!entry)
756 goto end;
757 if (entry->rcu_pending)
758 rcu_barrier_sched();
759 entry->oldptr = old;
760 entry->rcu_pending = 1;
761 /* write rcu_pending before calling the RCU callback */
762 smp_wmb();
763 call_rcu_sched(&entry->rcu, free_old_closure);
764 remove_marker(name); /* Ignore busy error message */
765 ret = 0;
766end:
767 mutex_unlock(&markers_mutex);
768 return ret;
769}
770EXPORT_SYMBOL_GPL(marker_probe_unregister);
771
772static struct marker_entry *
773get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
774{
775 struct marker_entry *entry;
776 unsigned int i;
777 struct hlist_head *head;
778 struct hlist_node *node;
779
780 for (i = 0; i < MARKER_TABLE_SIZE; i++) {
781 head = &marker_table[i];
782 hlist_for_each_entry(entry, node, head, hlist) {
783 if (!entry->ptype) {
784 if (entry->single.func == probe
785 && entry->single.probe_private
786 == probe_private)
787 return entry;
788 } else {
789 struct marker_probe_closure *closure;
790 closure = entry->multi;
791 for (i = 0; closure[i].func; i++) {
792 if (closure[i].func == probe &&
793 closure[i].probe_private
794 == probe_private)
795 return entry;
796 }
797 }
798 }
799 }
800 return NULL;
801}
802
803/**
804 * marker_probe_unregister_private_data - Disconnect a probe from a marker
805 * @probe: probe function
806 * @probe_private: probe private data
807 *
808 * Unregister a probe by providing the registered private data.
809 * Only removes the first marker found in hash table.
810 * Return 0 on success or error value.
811 * We do not need to call a synchronize_sched to make sure the probes have
812 * finished running before doing a module unload, because the module unload
813 * itself uses stop_machine(), which insures that every preempt disabled section
814 * have finished.
815 */
816int marker_probe_unregister_private_data(marker_probe_func *probe,
817 void *probe_private)
818{
819 struct marker_entry *entry;
820 int ret = 0;
821 struct marker_probe_closure *old;
822
823 mutex_lock(&markers_mutex);
824 entry = get_marker_from_private_data(probe, probe_private);
825 if (!entry) {
826 ret = -ENOENT;
827 goto end;
828 }
829 if (entry->rcu_pending)
830 rcu_barrier_sched();
831 old = marker_entry_remove_probe(entry, NULL, probe_private);
832 mutex_unlock(&markers_mutex);
833 marker_update_probes();
834 mutex_lock(&markers_mutex);
835 entry = get_marker_from_private_data(probe, probe_private);
836 if (!entry)
837 goto end;
838 if (entry->rcu_pending)
839 rcu_barrier_sched();
840 entry->oldptr = old;
841 entry->rcu_pending = 1;
842 /* write rcu_pending before calling the RCU callback */
843 smp_wmb();
844 call_rcu_sched(&entry->rcu, free_old_closure);
845 remove_marker(entry->name); /* Ignore busy error message */
846end:
847 mutex_unlock(&markers_mutex);
848 return ret;
849}
850EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
851
852/**
853 * marker_get_private_data - Get a marker's probe private data
854 * @name: marker name
855 * @probe: probe to match
856 * @num: get the nth matching probe's private data
857 *
858 * Returns the nth private data pointer (starting from 0) matching, or an
859 * ERR_PTR.
860 * Returns the private data pointer, or an ERR_PTR.
861 * The private data pointer should _only_ be dereferenced if the caller is the
862 * owner of the data, or its content could vanish. This is mostly used to
863 * confirm that a caller is the owner of a registered probe.
864 */
865void *marker_get_private_data(const char *name, marker_probe_func *probe,
866 int num)
867{
868 struct hlist_head *head;
869 struct hlist_node *node;
870 struct marker_entry *e;
871 size_t name_len = strlen(name) + 1;
872 u32 hash = jhash(name, name_len-1, 0);
873 int i;
874
875 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
876 hlist_for_each_entry(e, node, head, hlist) {
877 if (!strcmp(name, e->name)) {
878 if (!e->ptype) {
879 if (num == 0 && e->single.func == probe)
880 return e->single.probe_private;
881 } else {
882 struct marker_probe_closure *closure;
883 int match = 0;
884 closure = e->multi;
885 for (i = 0; closure[i].func; i++) {
886 if (closure[i].func != probe)
887 continue;
888 if (match++ == num)
889 return closure[i].probe_private;
890 }
891 }
892 break;
893 }
894 }
895 return ERR_PTR(-ENOENT);
896}
897EXPORT_SYMBOL_GPL(marker_get_private_data);
898
899#ifdef CONFIG_MODULES
900
901int marker_module_notify(struct notifier_block *self,
902 unsigned long val, void *data)
903{
904 struct module *mod = data;
905
906 switch (val) {
907 case MODULE_STATE_COMING:
908 marker_update_probe_range(mod->markers,
909 mod->markers + mod->num_markers);
910 break;
911 case MODULE_STATE_GOING:
912 marker_update_probe_range(mod->markers,
913 mod->markers + mod->num_markers);
914 break;
915 }
916 return 0;
917}
918
919struct notifier_block marker_module_nb = {
920 .notifier_call = marker_module_notify,
921 .priority = 0,
922};
923
924static int init_markers(void)
925{
926 return register_module_notifier(&marker_module_nb);
927}
928__initcall(init_markers);
929
930#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 05ce49ced8f6..b6ee424245dd 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod,
2237 sizeof(*mod->ctors), &mod->num_ctors); 2237 sizeof(*mod->ctors), &mod->num_ctors);
2238#endif 2238#endif
2239 2239
2240#ifdef CONFIG_MARKERS
2241 mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
2242 sizeof(*mod->markers), &mod->num_markers);
2243#endif
2244#ifdef CONFIG_TRACEPOINTS 2240#ifdef CONFIG_TRACEPOINTS
2245 mod->tracepoints = section_objs(hdr, sechdrs, secstrings, 2241 mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
2246 "__tracepoints", 2242 "__tracepoints",
@@ -2958,20 +2954,6 @@ void module_layout(struct module *mod,
2958EXPORT_SYMBOL(module_layout); 2954EXPORT_SYMBOL(module_layout);
2959#endif 2955#endif
2960 2956
2961#ifdef CONFIG_MARKERS
2962void module_update_markers(void)
2963{
2964 struct module *mod;
2965
2966 mutex_lock(&module_mutex);
2967 list_for_each_entry(mod, &modules, list)
2968 if (!mod->taints)
2969 marker_update_probe_range(mod->markers,
2970 mod->markers + mod->num_markers);
2971 mutex_unlock(&module_mutex);
2972}
2973#endif
2974
2975#ifdef CONFIG_TRACEPOINTS 2957#ifdef CONFIG_TRACEPOINTS
2976void module_update_tracepoints(void) 2958void module_update_tracepoints(void)
2977{ 2959{
diff --git a/kernel/profile.c b/kernel/profile.c
index 419250ebec4d..a55d3a367ae8 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,48 +442,51 @@ void profile_tick(int type)
442 442
443#ifdef CONFIG_PROC_FS 443#ifdef CONFIG_PROC_FS
444#include <linux/proc_fs.h> 444#include <linux/proc_fs.h>
445#include <linux/seq_file.h>
445#include <asm/uaccess.h> 446#include <asm/uaccess.h>
446 447
447static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, 448static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
448 int count, int *eof, void *data)
449{ 449{
450 int len = cpumask_scnprintf(page, count, data); 450 seq_cpumask(m, prof_cpu_mask);
451 if (count - len < 2) 451 seq_putc(m, '\n');
452 return -EINVAL; 452 return 0;
453 len += sprintf(page + len, "\n");
454 return len;
455} 453}
456 454
457static int prof_cpu_mask_write_proc(struct file *file, 455static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file)
458 const char __user *buffer, unsigned long count, void *data) 456{
457 return single_open(file, prof_cpu_mask_proc_show, NULL);
458}
459
460static ssize_t prof_cpu_mask_proc_write(struct file *file,
461 const char __user *buffer, size_t count, loff_t *pos)
459{ 462{
460 struct cpumask *mask = data;
461 unsigned long full_count = count, err;
462 cpumask_var_t new_value; 463 cpumask_var_t new_value;
464 int err;
463 465
464 if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) 466 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
465 return -ENOMEM; 467 return -ENOMEM;
466 468
467 err = cpumask_parse_user(buffer, count, new_value); 469 err = cpumask_parse_user(buffer, count, new_value);
468 if (!err) { 470 if (!err) {
469 cpumask_copy(mask, new_value); 471 cpumask_copy(prof_cpu_mask, new_value);
470 err = full_count; 472 err = count;
471 } 473 }
472 free_cpumask_var(new_value); 474 free_cpumask_var(new_value);
473 return err; 475 return err;
474} 476}
475 477
478static const struct file_operations prof_cpu_mask_proc_fops = {
479 .open = prof_cpu_mask_proc_open,
480 .read = seq_read,
481 .llseek = seq_lseek,
482 .release = single_release,
483 .write = prof_cpu_mask_proc_write,
484};
485
476void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) 486void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
477{ 487{
478 struct proc_dir_entry *entry;
479
480 /* create /proc/irq/prof_cpu_mask */ 488 /* create /proc/irq/prof_cpu_mask */
481 entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); 489 proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops);
482 if (!entry)
483 return;
484 entry->data = prof_cpu_mask;
485 entry->read_proc = prof_cpu_mask_read_proc;
486 entry->write_proc = prof_cpu_mask_write_proc;
487} 490}
488 491
489/* 492/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index cc615f84751b..c71e91bf7372 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2414,11 +2414,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2414static void * 2414static void *
2415__g_next(struct seq_file *m, loff_t *pos) 2415__g_next(struct seq_file *m, loff_t *pos)
2416{ 2416{
2417 unsigned long *array = m->private;
2418
2419 if (*pos >= ftrace_graph_count) 2417 if (*pos >= ftrace_graph_count)
2420 return NULL; 2418 return NULL;
2421 return &array[*pos]; 2419 return &ftrace_graph_funcs[*pos];
2422} 2420}
2423 2421
2424static void * 2422static void *
@@ -2482,16 +2480,10 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2482 ftrace_graph_count = 0; 2480 ftrace_graph_count = 0;
2483 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2481 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2484 } 2482 }
2483 mutex_unlock(&graph_lock);
2485 2484
2486 if (file->f_mode & FMODE_READ) { 2485 if (file->f_mode & FMODE_READ)
2487 ret = seq_open(file, &ftrace_graph_seq_ops); 2486 ret = seq_open(file, &ftrace_graph_seq_ops);
2488 if (!ret) {
2489 struct seq_file *m = file->private_data;
2490 m->private = ftrace_graph_funcs;
2491 }
2492 } else
2493 file->private_data = ftrace_graph_funcs;
2494 mutex_unlock(&graph_lock);
2495 2487
2496 return ret; 2488 return ret;
2497} 2489}
@@ -2560,7 +2552,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2560 size_t cnt, loff_t *ppos) 2552 size_t cnt, loff_t *ppos)
2561{ 2553{
2562 struct trace_parser parser; 2554 struct trace_parser parser;
2563 unsigned long *array;
2564 size_t read = 0; 2555 size_t read = 0;
2565 ssize_t ret; 2556 ssize_t ret;
2566 2557
@@ -2574,12 +2565,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2574 goto out; 2565 goto out;
2575 } 2566 }
2576 2567
2577 if (file->f_mode & FMODE_READ) {
2578 struct seq_file *m = file->private_data;
2579 array = m->private;
2580 } else
2581 array = file->private_data;
2582
2583 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2568 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2584 ret = -ENOMEM; 2569 ret = -ENOMEM;
2585 goto out; 2570 goto out;
@@ -2591,7 +2576,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2591 parser.buffer[parser.idx] = 0; 2576 parser.buffer[parser.idx] = 0;
2592 2577
2593 /* we allow only one expression at a time */ 2578 /* we allow only one expression at a time */
2594 ret = ftrace_set_func(array, &ftrace_graph_count, 2579 ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
2595 parser.buffer); 2580 parser.buffer);
2596 if (ret) 2581 if (ret)
2597 goto out; 2582 goto out;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fd52a19dd172..861308072d28 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -125,13 +125,13 @@ int ftrace_dump_on_oops;
125 125
126static int tracing_set_tracer(const char *buf); 126static int tracing_set_tracer(const char *buf);
127 127
128#define BOOTUP_TRACER_SIZE 100 128#define MAX_TRACER_SIZE 100
129static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; 129static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
130static char *default_bootup_tracer; 130static char *default_bootup_tracer;
131 131
132static int __init set_ftrace(char *str) 132static int __init set_ftrace(char *str)
133{ 133{
134 strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); 134 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
135 default_bootup_tracer = bootup_tracer_buf; 135 default_bootup_tracer = bootup_tracer_buf;
136 /* We are using ftrace early, expand it */ 136 /* We are using ftrace early, expand it */
137 ring_buffer_expanded = 1; 137 ring_buffer_expanded = 1;
@@ -242,13 +242,6 @@ static struct tracer *trace_types __read_mostly;
242static struct tracer *current_trace __read_mostly; 242static struct tracer *current_trace __read_mostly;
243 243
244/* 244/*
245 * max_tracer_type_len is used to simplify the allocating of
246 * buffers to read userspace tracer names. We keep track of
247 * the longest tracer name registered.
248 */
249static int max_tracer_type_len;
250
251/*
252 * trace_types_lock is used to protect the trace_types list. 245 * trace_types_lock is used to protect the trace_types list.
253 * This lock is also used to keep user access serialized. 246 * This lock is also used to keep user access serialized.
254 * Accesses from userspace will grab this lock while userspace 247 * Accesses from userspace will grab this lock while userspace
@@ -619,7 +612,6 @@ __releases(kernel_lock)
619__acquires(kernel_lock) 612__acquires(kernel_lock)
620{ 613{
621 struct tracer *t; 614 struct tracer *t;
622 int len;
623 int ret = 0; 615 int ret = 0;
624 616
625 if (!type->name) { 617 if (!type->name) {
@@ -627,6 +619,11 @@ __acquires(kernel_lock)
627 return -1; 619 return -1;
628 } 620 }
629 621
622 if (strlen(type->name) > MAX_TRACER_SIZE) {
623 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
624 return -1;
625 }
626
630 /* 627 /*
631 * When this gets called we hold the BKL which means that 628 * When this gets called we hold the BKL which means that
632 * preemption is disabled. Various trace selftests however 629 * preemption is disabled. Various trace selftests however
@@ -641,7 +638,7 @@ __acquires(kernel_lock)
641 for (t = trace_types; t; t = t->next) { 638 for (t = trace_types; t; t = t->next) {
642 if (strcmp(type->name, t->name) == 0) { 639 if (strcmp(type->name, t->name) == 0) {
643 /* already found */ 640 /* already found */
644 pr_info("Trace %s already registered\n", 641 pr_info("Tracer %s already registered\n",
645 type->name); 642 type->name);
646 ret = -1; 643 ret = -1;
647 goto out; 644 goto out;
@@ -692,9 +689,6 @@ __acquires(kernel_lock)
692 689
693 type->next = trace_types; 690 type->next = trace_types;
694 trace_types = type; 691 trace_types = type;
695 len = strlen(type->name);
696 if (len > max_tracer_type_len)
697 max_tracer_type_len = len;
698 692
699 out: 693 out:
700 tracing_selftest_running = false; 694 tracing_selftest_running = false;
@@ -703,7 +697,7 @@ __acquires(kernel_lock)
703 if (ret || !default_bootup_tracer) 697 if (ret || !default_bootup_tracer)
704 goto out_unlock; 698 goto out_unlock;
705 699
706 if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) 700 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
707 goto out_unlock; 701 goto out_unlock;
708 702
709 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 703 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
@@ -725,14 +719,13 @@ __acquires(kernel_lock)
725void unregister_tracer(struct tracer *type) 719void unregister_tracer(struct tracer *type)
726{ 720{
727 struct tracer **t; 721 struct tracer **t;
728 int len;
729 722
730 mutex_lock(&trace_types_lock); 723 mutex_lock(&trace_types_lock);
731 for (t = &trace_types; *t; t = &(*t)->next) { 724 for (t = &trace_types; *t; t = &(*t)->next) {
732 if (*t == type) 725 if (*t == type)
733 goto found; 726 goto found;
734 } 727 }
735 pr_info("Trace %s not registered\n", type->name); 728 pr_info("Tracer %s not registered\n", type->name);
736 goto out; 729 goto out;
737 730
738 found: 731 found:
@@ -745,17 +738,7 @@ void unregister_tracer(struct tracer *type)
745 current_trace->stop(&global_trace); 738 current_trace->stop(&global_trace);
746 current_trace = &nop_trace; 739 current_trace = &nop_trace;
747 } 740 }
748 741out:
749 if (strlen(type->name) != max_tracer_type_len)
750 goto out;
751
752 max_tracer_type_len = 0;
753 for (t = &trace_types; *t; t = &(*t)->next) {
754 len = strlen((*t)->name);
755 if (len > max_tracer_type_len)
756 max_tracer_type_len = len;
757 }
758 out:
759 mutex_unlock(&trace_types_lock); 742 mutex_unlock(&trace_types_lock);
760} 743}
761 744
@@ -2604,7 +2587,7 @@ static ssize_t
2604tracing_set_trace_read(struct file *filp, char __user *ubuf, 2587tracing_set_trace_read(struct file *filp, char __user *ubuf,
2605 size_t cnt, loff_t *ppos) 2588 size_t cnt, loff_t *ppos)
2606{ 2589{
2607 char buf[max_tracer_type_len+2]; 2590 char buf[MAX_TRACER_SIZE+2];
2608 int r; 2591 int r;
2609 2592
2610 mutex_lock(&trace_types_lock); 2593 mutex_lock(&trace_types_lock);
@@ -2754,15 +2737,15 @@ static ssize_t
2754tracing_set_trace_write(struct file *filp, const char __user *ubuf, 2737tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2755 size_t cnt, loff_t *ppos) 2738 size_t cnt, loff_t *ppos)
2756{ 2739{
2757 char buf[max_tracer_type_len+1]; 2740 char buf[MAX_TRACER_SIZE+1];
2758 int i; 2741 int i;
2759 size_t ret; 2742 size_t ret;
2760 int err; 2743 int err;
2761 2744
2762 ret = cnt; 2745 ret = cnt;
2763 2746
2764 if (cnt > max_tracer_type_len) 2747 if (cnt > MAX_TRACER_SIZE)
2765 cnt = max_tracer_type_len; 2748 cnt = MAX_TRACER_SIZE;
2766 2749
2767 if (copy_from_user(&buf, ubuf, cnt)) 2750 if (copy_from_user(&buf, ubuf, cnt))
2768 return -EFAULT; 2751 return -EFAULT;
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 55a25c933d15..dd44b8768867 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,6 +8,57 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include "trace.h" 9#include "trace.h"
10 10
11/*
12 * We can't use a size but a type in alloc_percpu()
13 * So let's create a dummy type that matches the desired size
14 */
15typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
16
17char *trace_profile_buf;
18EXPORT_SYMBOL_GPL(trace_profile_buf);
19
20char *trace_profile_buf_nmi;
21EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
22
23/* Count the events in use (per event id, not per instance) */
24static int total_profile_count;
25
26static int ftrace_profile_enable_event(struct ftrace_event_call *event)
27{
28 char *buf;
29 int ret = -ENOMEM;
30
31 if (atomic_inc_return(&event->profile_count))
32 return 0;
33
34 if (!total_profile_count++) {
35 buf = (char *)alloc_percpu(profile_buf_t);
36 if (!buf)
37 goto fail_buf;
38
39 rcu_assign_pointer(trace_profile_buf, buf);
40
41 buf = (char *)alloc_percpu(profile_buf_t);
42 if (!buf)
43 goto fail_buf_nmi;
44
45 rcu_assign_pointer(trace_profile_buf_nmi, buf);
46 }
47
48 ret = event->profile_enable();
49 if (!ret)
50 return 0;
51
52 kfree(trace_profile_buf_nmi);
53fail_buf_nmi:
54 kfree(trace_profile_buf);
55fail_buf:
56 total_profile_count--;
57 atomic_dec(&event->profile_count);
58
59 return ret;
60}
61
11int ftrace_profile_enable(int event_id) 62int ftrace_profile_enable(int event_id)
12{ 63{
13 struct ftrace_event_call *event; 64 struct ftrace_event_call *event;
@@ -17,7 +68,7 @@ int ftrace_profile_enable(int event_id)
17 list_for_each_entry(event, &ftrace_events, list) { 68 list_for_each_entry(event, &ftrace_events, list) {
18 if (event->id == event_id && event->profile_enable && 69 if (event->id == event_id && event->profile_enable &&
19 try_module_get(event->mod)) { 70 try_module_get(event->mod)) {
20 ret = event->profile_enable(event); 71 ret = ftrace_profile_enable_event(event);
21 break; 72 break;
22 } 73 }
23 } 74 }
@@ -26,6 +77,33 @@ int ftrace_profile_enable(int event_id)
26 return ret; 77 return ret;
27} 78}
28 79
80static void ftrace_profile_disable_event(struct ftrace_event_call *event)
81{
82 char *buf, *nmi_buf;
83
84 if (!atomic_add_negative(-1, &event->profile_count))
85 return;
86
87 event->profile_disable();
88
89 if (!--total_profile_count) {
90 buf = trace_profile_buf;
91 rcu_assign_pointer(trace_profile_buf, NULL);
92
93 nmi_buf = trace_profile_buf_nmi;
94 rcu_assign_pointer(trace_profile_buf_nmi, NULL);
95
96 /*
97 * Ensure every events in profiling have finished before
98 * releasing the buffers
99 */
100 synchronize_sched();
101
102 free_percpu(buf);
103 free_percpu(nmi_buf);
104 }
105}
106
29void ftrace_profile_disable(int event_id) 107void ftrace_profile_disable(int event_id)
30{ 108{
31 struct ftrace_event_call *event; 109 struct ftrace_event_call *event;
@@ -33,7 +111,7 @@ void ftrace_profile_disable(int event_id)
33 mutex_lock(&event_mutex); 111 mutex_lock(&event_mutex);
34 list_for_each_entry(event, &ftrace_events, list) { 112 list_for_each_entry(event, &ftrace_events, list) {
35 if (event->id == event_id) { 113 if (event->id == event_id) {
36 event->profile_disable(event); 114 ftrace_profile_disable_event(event);
37 module_put(event->mod); 115 module_put(event->mod);
38 break; 116 break;
39 } 117 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 56c260b83a9c..6f03c8a1105e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -271,42 +271,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
271static void * 271static void *
272t_next(struct seq_file *m, void *v, loff_t *pos) 272t_next(struct seq_file *m, void *v, loff_t *pos)
273{ 273{
274 struct list_head *list = m->private; 274 struct ftrace_event_call *call = v;
275 struct ftrace_event_call *call;
276 275
277 (*pos)++; 276 (*pos)++;
278 277
279 for (;;) { 278 list_for_each_entry_continue(call, &ftrace_events, list) {
280 if (list == &ftrace_events)
281 return NULL;
282
283 call = list_entry(list, struct ftrace_event_call, list);
284
285 /* 279 /*
286 * The ftrace subsystem is for showing formats only. 280 * The ftrace subsystem is for showing formats only.
287 * They can not be enabled or disabled via the event files. 281 * They can not be enabled or disabled via the event files.
288 */ 282 */
289 if (call->regfunc) 283 if (call->regfunc)
290 break; 284 return call;
291
292 list = list->next;
293 } 285 }
294 286
295 m->private = list->next; 287 return NULL;
296
297 return call;
298} 288}
299 289
300static void *t_start(struct seq_file *m, loff_t *pos) 290static void *t_start(struct seq_file *m, loff_t *pos)
301{ 291{
302 struct ftrace_event_call *call = NULL; 292 struct ftrace_event_call *call;
303 loff_t l; 293 loff_t l;
304 294
305 mutex_lock(&event_mutex); 295 mutex_lock(&event_mutex);
306 296
307 m->private = ftrace_events.next; 297 call = list_entry(&ftrace_events, struct ftrace_event_call, list);
308 for (l = 0; l <= *pos; ) { 298 for (l = 0; l <= *pos; ) {
309 call = t_next(m, NULL, &l); 299 call = t_next(m, call, &l);
310 if (!call) 300 if (!call)
311 break; 301 break;
312 } 302 }
@@ -316,37 +306,28 @@ static void *t_start(struct seq_file *m, loff_t *pos)
316static void * 306static void *
317s_next(struct seq_file *m, void *v, loff_t *pos) 307s_next(struct seq_file *m, void *v, loff_t *pos)
318{ 308{
319 struct list_head *list = m->private; 309 struct ftrace_event_call *call = v;
320 struct ftrace_event_call *call;
321 310
322 (*pos)++; 311 (*pos)++;
323 312
324 retry: 313 list_for_each_entry_continue(call, &ftrace_events, list) {
325 if (list == &ftrace_events) 314 if (call->enabled)
326 return NULL; 315 return call;
327
328 call = list_entry(list, struct ftrace_event_call, list);
329
330 if (!call->enabled) {
331 list = list->next;
332 goto retry;
333 } 316 }
334 317
335 m->private = list->next; 318 return NULL;
336
337 return call;
338} 319}
339 320
340static void *s_start(struct seq_file *m, loff_t *pos) 321static void *s_start(struct seq_file *m, loff_t *pos)
341{ 322{
342 struct ftrace_event_call *call = NULL; 323 struct ftrace_event_call *call;
343 loff_t l; 324 loff_t l;
344 325
345 mutex_lock(&event_mutex); 326 mutex_lock(&event_mutex);
346 327
347 m->private = ftrace_events.next; 328 call = list_entry(&ftrace_events, struct ftrace_event_call, list);
348 for (l = 0; l <= *pos; ) { 329 for (l = 0; l <= *pos; ) {
349 call = s_next(m, NULL, &l); 330 call = s_next(m, call, &l);
350 if (!call) 331 if (!call)
351 break; 332 break;
352 } 333 }
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 687699d365ae..2547d8813cf0 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -11,7 +11,6 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/string.h> 12#include <linux/string.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/marker.h>
15#include <linux/mutex.h> 14#include <linux/mutex.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/list.h> 16#include <linux/list.h>
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..7a3550cf2597 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
384 384
385static void prof_syscall_enter(struct pt_regs *regs, long id) 385static void prof_syscall_enter(struct pt_regs *regs, long id)
386{ 386{
387 struct syscall_trace_enter *rec;
388 struct syscall_metadata *sys_data; 387 struct syscall_metadata *sys_data;
388 struct syscall_trace_enter *rec;
389 unsigned long flags;
390 char *raw_data;
389 int syscall_nr; 391 int syscall_nr;
390 int size; 392 int size;
393 int cpu;
391 394
392 syscall_nr = syscall_get_nr(current, regs); 395 syscall_nr = syscall_get_nr(current, regs);
393 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 396 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
402 size = ALIGN(size + sizeof(u32), sizeof(u64)); 405 size = ALIGN(size + sizeof(u32), sizeof(u64));
403 size -= sizeof(u32); 406 size -= sizeof(u32);
404 407
405 do { 408 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
406 char raw_data[size]; 409 "profile buffer not large enough"))
410 return;
411
412 /* Protect the per cpu buffer, begin the rcu read side */
413 local_irq_save(flags);
407 414
408 /* zero the dead bytes from align to not leak stack to user */ 415 cpu = smp_processor_id();
409 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 416
417 if (in_nmi())
418 raw_data = rcu_dereference(trace_profile_buf_nmi);
419 else
420 raw_data = rcu_dereference(trace_profile_buf);
421
422 if (!raw_data)
423 goto end;
410 424
411 rec = (struct syscall_trace_enter *) raw_data; 425 raw_data = per_cpu_ptr(raw_data, cpu);
412 tracing_generic_entry_update(&rec->ent, 0, 0); 426
413 rec->ent.type = sys_data->enter_id; 427 /* zero the dead bytes from align to not leak stack to user */
414 rec->nr = syscall_nr; 428 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
415 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 429
416 (unsigned long *)&rec->args); 430 rec = (struct syscall_trace_enter *) raw_data;
417 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); 431 tracing_generic_entry_update(&rec->ent, 0, 0);
418 } while(0); 432 rec->ent.type = sys_data->enter_id;
433 rec->nr = syscall_nr;
434 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
435 (unsigned long *)&rec->args);
436 perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
437
438end:
439 local_irq_restore(flags);
419} 440}
420 441
421int reg_prof_syscall_enter(char *name) 442int reg_prof_syscall_enter(char *name)
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
460static void prof_syscall_exit(struct pt_regs *regs, long ret) 481static void prof_syscall_exit(struct pt_regs *regs, long ret)
461{ 482{
462 struct syscall_metadata *sys_data; 483 struct syscall_metadata *sys_data;
463 struct syscall_trace_exit rec; 484 struct syscall_trace_exit *rec;
485 unsigned long flags;
464 int syscall_nr; 486 int syscall_nr;
487 char *raw_data;
488 int size;
489 int cpu;
465 490
466 syscall_nr = syscall_get_nr(current, regs); 491 syscall_nr = syscall_get_nr(current, regs);
467 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 492 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
471 if (!sys_data) 496 if (!sys_data)
472 return; 497 return;
473 498
474 tracing_generic_entry_update(&rec.ent, 0, 0); 499 /* We can probably do that at build time */
475 rec.ent.type = sys_data->exit_id; 500 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
476 rec.nr = syscall_nr; 501 size -= sizeof(u32);
477 rec.ret = syscall_get_return_value(current, regs);
478 502
479 perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); 503 /*
504 * Impossible, but be paranoid with the future
505 * How to put this check outside runtime?
506 */
507 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
508 "exit event has grown above profile buffer size"))
509 return;
510
511 /* Protect the per cpu buffer, begin the rcu read side */
512 local_irq_save(flags);
513 cpu = smp_processor_id();
514
515 if (in_nmi())
516 raw_data = rcu_dereference(trace_profile_buf_nmi);
517 else
518 raw_data = rcu_dereference(trace_profile_buf);
519
520 if (!raw_data)
521 goto end;
522
523 raw_data = per_cpu_ptr(raw_data, cpu);
524
525 /* zero the dead bytes from align to not leak stack to user */
526 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
527
528 rec = (struct syscall_trace_exit *)raw_data;
529
530 tracing_generic_entry_update(&rec->ent, 0, 0);
531 rec->ent.type = sys_data->exit_id;
532 rec->nr = syscall_nr;
533 rec->ret = syscall_get_return_value(current, regs);
534
535 perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
536
537end:
538 local_irq_restore(flags);
480} 539}
481 540
482int reg_prof_syscall_exit(char *name) 541int reg_prof_syscall_exit(char *name)