diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-21 12:05:47 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-21 12:05:47 -0400 |
commit | bd4c3a3441144cd46d1f544046523724c5bc6e94 (patch) | |
tree | 8b5c67249a7a163caf3f88cbcb9df5236fcc3b93 /kernel | |
parent | b3727c24da69971503a4ca98b3b877753c6a4393 (diff) | |
parent | 583a22e7c154dc0a3938db522696b4bc7f098f59 (diff) |
Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
kernel/profile.c: Switch /proc/irq/prof_cpu_mask to seq_file
tracing: Export trace_profile_buf symbols
tracing/events: use list_for_entry_continue
tracing: remove max_tracer_type_len
function-graph: use ftrace_graph_funcs directly
tracing: Remove markers
tracing: Allocate the ftrace event profile buffer dynamically
tracing: Factorize the events profile accounting
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/marker.c | 930 | ||||
-rw-r--r-- | kernel/module.c | 18 | ||||
-rw-r--r-- | kernel/profile.c | 45 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 23 | ||||
-rw-r--r-- | kernel/trace/trace.c | 49 | ||||
-rw-r--r-- | kernel/trace/trace_event_profile.c | 82 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 49 | ||||
-rw-r--r-- | kernel/trace/trace_printk.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 97 |
10 files changed, 217 insertions, 1078 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f9..7c9b0a585502 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o | |||
87 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 87 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
88 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 88 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
89 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 89 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
90 | obj-$(CONFIG_MARKERS) += marker.o | ||
91 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o | 90 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o |
92 | obj-$(CONFIG_LATENCYTOP) += latencytop.o | 91 | obj-$(CONFIG_LATENCYTOP) += latencytop.o |
93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ | 92 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ |
diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f2647868..000000000000 --- a/kernel/marker.c +++ /dev/null | |||
@@ -1,930 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007 Mathieu Desnoyers | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | */ | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/jhash.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/rcupdate.h> | ||
24 | #include <linux/marker.h> | ||
25 | #include <linux/err.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | extern struct marker __start___markers[]; | ||
29 | extern struct marker __stop___markers[]; | ||
30 | |||
31 | /* Set to 1 to enable marker debug output */ | ||
32 | static const int marker_debug; | ||
33 | |||
34 | /* | ||
35 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin | ||
36 | * and module markers and the hash table. | ||
37 | */ | ||
38 | static DEFINE_MUTEX(markers_mutex); | ||
39 | |||
40 | /* | ||
41 | * Marker hash table, containing the active markers. | ||
42 | * Protected by module_mutex. | ||
43 | */ | ||
44 | #define MARKER_HASH_BITS 6 | ||
45 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) | ||
46 | static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | ||
47 | |||
48 | /* | ||
49 | * Note about RCU : | ||
50 | * It is used to make sure every handler has finished using its private data | ||
51 | * between two consecutive operation (add or remove) on a given marker. It is | ||
52 | * also used to delay the free of multiple probes array until a quiescent state | ||
53 | * is reached. | ||
54 | * marker entries modifications are protected by the markers_mutex. | ||
55 | */ | ||
56 | struct marker_entry { | ||
57 | struct hlist_node hlist; | ||
58 | char *format; | ||
59 | /* Probe wrapper */ | ||
60 | void (*call)(const struct marker *mdata, void *call_private, ...); | ||
61 | struct marker_probe_closure single; | ||
62 | struct marker_probe_closure *multi; | ||
63 | int refcount; /* Number of times armed. 0 if disarmed. */ | ||
64 | struct rcu_head rcu; | ||
65 | void *oldptr; | ||
66 | int rcu_pending; | ||
67 | unsigned char ptype:1; | ||
68 | unsigned char format_allocated:1; | ||
69 | char name[0]; /* Contains name'\0'format'\0' */ | ||
70 | }; | ||
71 | |||
72 | /** | ||
73 | * __mark_empty_function - Empty probe callback | ||
74 | * @probe_private: probe private data | ||
75 | * @call_private: call site private data | ||
76 | * @fmt: format string | ||
77 | * @...: variable argument list | ||
78 | * | ||
79 | * Empty callback provided as a probe to the markers. By providing this to a | ||
80 | * disabled marker, we make sure the execution flow is always valid even | ||
81 | * though the function pointer change and the marker enabling are two distinct | ||
82 | * operations that modifies the execution flow of preemptible code. | ||
83 | */ | ||
84 | notrace void __mark_empty_function(void *probe_private, void *call_private, | ||
85 | const char *fmt, va_list *args) | ||
86 | { | ||
87 | } | ||
88 | EXPORT_SYMBOL_GPL(__mark_empty_function); | ||
89 | |||
90 | /* | ||
91 | * marker_probe_cb Callback that prepares the variable argument list for probes. | ||
92 | * @mdata: pointer of type struct marker | ||
93 | * @call_private: caller site private data | ||
94 | * @...: Variable argument list. | ||
95 | * | ||
96 | * Since we do not use "typical" pointer based RCU in the 1 argument case, we | ||
97 | * need to put a full smp_rmb() in this branch. This is why we do not use | ||
98 | * rcu_dereference() for the pointer read. | ||
99 | */ | ||
100 | notrace void marker_probe_cb(const struct marker *mdata, | ||
101 | void *call_private, ...) | ||
102 | { | ||
103 | va_list args; | ||
104 | char ptype; | ||
105 | |||
106 | /* | ||
107 | * rcu_read_lock_sched does two things : disabling preemption to make | ||
108 | * sure the teardown of the callbacks can be done correctly when they | ||
109 | * are in modules and they insure RCU read coherency. | ||
110 | */ | ||
111 | rcu_read_lock_sched_notrace(); | ||
112 | ptype = mdata->ptype; | ||
113 | if (likely(!ptype)) { | ||
114 | marker_probe_func *func; | ||
115 | /* Must read the ptype before ptr. They are not data dependant, | ||
116 | * so we put an explicit smp_rmb() here. */ | ||
117 | smp_rmb(); | ||
118 | func = mdata->single.func; | ||
119 | /* Must read the ptr before private data. They are not data | ||
120 | * dependant, so we put an explicit smp_rmb() here. */ | ||
121 | smp_rmb(); | ||
122 | va_start(args, call_private); | ||
123 | func(mdata->single.probe_private, call_private, mdata->format, | ||
124 | &args); | ||
125 | va_end(args); | ||
126 | } else { | ||
127 | struct marker_probe_closure *multi; | ||
128 | int i; | ||
129 | /* | ||
130 | * Read mdata->ptype before mdata->multi. | ||
131 | */ | ||
132 | smp_rmb(); | ||
133 | multi = mdata->multi; | ||
134 | /* | ||
135 | * multi points to an array, therefore accessing the array | ||
136 | * depends on reading multi. However, even in this case, | ||
137 | * we must insure that the pointer is read _before_ the array | ||
138 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
139 | * in the fast path, so put the explicit barrier here. | ||
140 | */ | ||
141 | smp_read_barrier_depends(); | ||
142 | for (i = 0; multi[i].func; i++) { | ||
143 | va_start(args, call_private); | ||
144 | multi[i].func(multi[i].probe_private, call_private, | ||
145 | mdata->format, &args); | ||
146 | va_end(args); | ||
147 | } | ||
148 | } | ||
149 | rcu_read_unlock_sched_notrace(); | ||
150 | } | ||
151 | EXPORT_SYMBOL_GPL(marker_probe_cb); | ||
152 | |||
153 | /* | ||
154 | * marker_probe_cb Callback that does not prepare the variable argument list. | ||
155 | * @mdata: pointer of type struct marker | ||
156 | * @call_private: caller site private data | ||
157 | * @...: Variable argument list. | ||
158 | * | ||
159 | * Should be connected to markers "MARK_NOARGS". | ||
160 | */ | ||
161 | static notrace void marker_probe_cb_noarg(const struct marker *mdata, | ||
162 | void *call_private, ...) | ||
163 | { | ||
164 | va_list args; /* not initialized */ | ||
165 | char ptype; | ||
166 | |||
167 | rcu_read_lock_sched_notrace(); | ||
168 | ptype = mdata->ptype; | ||
169 | if (likely(!ptype)) { | ||
170 | marker_probe_func *func; | ||
171 | /* Must read the ptype before ptr. They are not data dependant, | ||
172 | * so we put an explicit smp_rmb() here. */ | ||
173 | smp_rmb(); | ||
174 | func = mdata->single.func; | ||
175 | /* Must read the ptr before private data. They are not data | ||
176 | * dependant, so we put an explicit smp_rmb() here. */ | ||
177 | smp_rmb(); | ||
178 | func(mdata->single.probe_private, call_private, mdata->format, | ||
179 | &args); | ||
180 | } else { | ||
181 | struct marker_probe_closure *multi; | ||
182 | int i; | ||
183 | /* | ||
184 | * Read mdata->ptype before mdata->multi. | ||
185 | */ | ||
186 | smp_rmb(); | ||
187 | multi = mdata->multi; | ||
188 | /* | ||
189 | * multi points to an array, therefore accessing the array | ||
190 | * depends on reading multi. However, even in this case, | ||
191 | * we must insure that the pointer is read _before_ the array | ||
192 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
193 | * in the fast path, so put the explicit barrier here. | ||
194 | */ | ||
195 | smp_read_barrier_depends(); | ||
196 | for (i = 0; multi[i].func; i++) | ||
197 | multi[i].func(multi[i].probe_private, call_private, | ||
198 | mdata->format, &args); | ||
199 | } | ||
200 | rcu_read_unlock_sched_notrace(); | ||
201 | } | ||
202 | |||
203 | static void free_old_closure(struct rcu_head *head) | ||
204 | { | ||
205 | struct marker_entry *entry = container_of(head, | ||
206 | struct marker_entry, rcu); | ||
207 | kfree(entry->oldptr); | ||
208 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
209 | smp_wmb(); | ||
210 | entry->rcu_pending = 0; | ||
211 | } | ||
212 | |||
213 | static void debug_print_probes(struct marker_entry *entry) | ||
214 | { | ||
215 | int i; | ||
216 | |||
217 | if (!marker_debug) | ||
218 | return; | ||
219 | |||
220 | if (!entry->ptype) { | ||
221 | printk(KERN_DEBUG "Single probe : %p %p\n", | ||
222 | entry->single.func, | ||
223 | entry->single.probe_private); | ||
224 | } else { | ||
225 | for (i = 0; entry->multi[i].func; i++) | ||
226 | printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, | ||
227 | entry->multi[i].func, | ||
228 | entry->multi[i].probe_private); | ||
229 | } | ||
230 | } | ||
231 | |||
232 | static struct marker_probe_closure * | ||
233 | marker_entry_add_probe(struct marker_entry *entry, | ||
234 | marker_probe_func *probe, void *probe_private) | ||
235 | { | ||
236 | int nr_probes = 0; | ||
237 | struct marker_probe_closure *old, *new; | ||
238 | |||
239 | WARN_ON(!probe); | ||
240 | |||
241 | debug_print_probes(entry); | ||
242 | old = entry->multi; | ||
243 | if (!entry->ptype) { | ||
244 | if (entry->single.func == probe && | ||
245 | entry->single.probe_private == probe_private) | ||
246 | return ERR_PTR(-EBUSY); | ||
247 | if (entry->single.func == __mark_empty_function) { | ||
248 | /* 0 -> 1 probes */ | ||
249 | entry->single.func = probe; | ||
250 | entry->single.probe_private = probe_private; | ||
251 | entry->refcount = 1; | ||
252 | entry->ptype = 0; | ||
253 | debug_print_probes(entry); | ||
254 | return NULL; | ||
255 | } else { | ||
256 | /* 1 -> 2 probes */ | ||
257 | nr_probes = 1; | ||
258 | old = NULL; | ||
259 | } | ||
260 | } else { | ||
261 | /* (N -> N+1), (N != 0, 1) probes */ | ||
262 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) | ||
263 | if (old[nr_probes].func == probe | ||
264 | && old[nr_probes].probe_private | ||
265 | == probe_private) | ||
266 | return ERR_PTR(-EBUSY); | ||
267 | } | ||
268 | /* + 2 : one for new probe, one for NULL func */ | ||
269 | new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), | ||
270 | GFP_KERNEL); | ||
271 | if (new == NULL) | ||
272 | return ERR_PTR(-ENOMEM); | ||
273 | if (!old) | ||
274 | new[0] = entry->single; | ||
275 | else | ||
276 | memcpy(new, old, | ||
277 | nr_probes * sizeof(struct marker_probe_closure)); | ||
278 | new[nr_probes].func = probe; | ||
279 | new[nr_probes].probe_private = probe_private; | ||
280 | entry->refcount = nr_probes + 1; | ||
281 | entry->multi = new; | ||
282 | entry->ptype = 1; | ||
283 | debug_print_probes(entry); | ||
284 | return old; | ||
285 | } | ||
286 | |||
287 | static struct marker_probe_closure * | ||
288 | marker_entry_remove_probe(struct marker_entry *entry, | ||
289 | marker_probe_func *probe, void *probe_private) | ||
290 | { | ||
291 | int nr_probes = 0, nr_del = 0, i; | ||
292 | struct marker_probe_closure *old, *new; | ||
293 | |||
294 | old = entry->multi; | ||
295 | |||
296 | debug_print_probes(entry); | ||
297 | if (!entry->ptype) { | ||
298 | /* 0 -> N is an error */ | ||
299 | WARN_ON(entry->single.func == __mark_empty_function); | ||
300 | /* 1 -> 0 probes */ | ||
301 | WARN_ON(probe && entry->single.func != probe); | ||
302 | WARN_ON(entry->single.probe_private != probe_private); | ||
303 | entry->single.func = __mark_empty_function; | ||
304 | entry->refcount = 0; | ||
305 | entry->ptype = 0; | ||
306 | debug_print_probes(entry); | ||
307 | return NULL; | ||
308 | } else { | ||
309 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
310 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | ||
311 | if ((!probe || old[nr_probes].func == probe) | ||
312 | && old[nr_probes].probe_private | ||
313 | == probe_private) | ||
314 | nr_del++; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | if (nr_probes - nr_del == 0) { | ||
319 | /* N -> 0, (N > 1) */ | ||
320 | entry->single.func = __mark_empty_function; | ||
321 | entry->refcount = 0; | ||
322 | entry->ptype = 0; | ||
323 | } else if (nr_probes - nr_del == 1) { | ||
324 | /* N -> 1, (N > 1) */ | ||
325 | for (i = 0; old[i].func; i++) | ||
326 | if ((probe && old[i].func != probe) || | ||
327 | old[i].probe_private != probe_private) | ||
328 | entry->single = old[i]; | ||
329 | entry->refcount = 1; | ||
330 | entry->ptype = 0; | ||
331 | } else { | ||
332 | int j = 0; | ||
333 | /* N -> M, (N > 1, M > 1) */ | ||
334 | /* + 1 for NULL */ | ||
335 | new = kzalloc((nr_probes - nr_del + 1) | ||
336 | * sizeof(struct marker_probe_closure), GFP_KERNEL); | ||
337 | if (new == NULL) | ||
338 | return ERR_PTR(-ENOMEM); | ||
339 | for (i = 0; old[i].func; i++) | ||
340 | if ((probe && old[i].func != probe) || | ||
341 | old[i].probe_private != probe_private) | ||
342 | new[j++] = old[i]; | ||
343 | entry->refcount = nr_probes - nr_del; | ||
344 | entry->ptype = 1; | ||
345 | entry->multi = new; | ||
346 | } | ||
347 | debug_print_probes(entry); | ||
348 | return old; | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * Get marker if the marker is present in the marker hash table. | ||
353 | * Must be called with markers_mutex held. | ||
354 | * Returns NULL if not present. | ||
355 | */ | ||
356 | static struct marker_entry *get_marker(const char *name) | ||
357 | { | ||
358 | struct hlist_head *head; | ||
359 | struct hlist_node *node; | ||
360 | struct marker_entry *e; | ||
361 | u32 hash = jhash(name, strlen(name), 0); | ||
362 | |||
363 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
364 | hlist_for_each_entry(e, node, head, hlist) { | ||
365 | if (!strcmp(name, e->name)) | ||
366 | return e; | ||
367 | } | ||
368 | return NULL; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * Add the marker to the marker hash table. Must be called with markers_mutex | ||
373 | * held. | ||
374 | */ | ||
375 | static struct marker_entry *add_marker(const char *name, const char *format) | ||
376 | { | ||
377 | struct hlist_head *head; | ||
378 | struct hlist_node *node; | ||
379 | struct marker_entry *e; | ||
380 | size_t name_len = strlen(name) + 1; | ||
381 | size_t format_len = 0; | ||
382 | u32 hash = jhash(name, name_len-1, 0); | ||
383 | |||
384 | if (format) | ||
385 | format_len = strlen(format) + 1; | ||
386 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
387 | hlist_for_each_entry(e, node, head, hlist) { | ||
388 | if (!strcmp(name, e->name)) { | ||
389 | printk(KERN_NOTICE | ||
390 | "Marker %s busy\n", name); | ||
391 | return ERR_PTR(-EBUSY); /* Already there */ | ||
392 | } | ||
393 | } | ||
394 | /* | ||
395 | * Using kmalloc here to allocate a variable length element. Could | ||
396 | * cause some memory fragmentation if overused. | ||
397 | */ | ||
398 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | ||
399 | GFP_KERNEL); | ||
400 | if (!e) | ||
401 | return ERR_PTR(-ENOMEM); | ||
402 | memcpy(&e->name[0], name, name_len); | ||
403 | if (format) { | ||
404 | e->format = &e->name[name_len]; | ||
405 | memcpy(e->format, format, format_len); | ||
406 | if (strcmp(e->format, MARK_NOARGS) == 0) | ||
407 | e->call = marker_probe_cb_noarg; | ||
408 | else | ||
409 | e->call = marker_probe_cb; | ||
410 | trace_mark(core_marker_format, "name %s format %s", | ||
411 | e->name, e->format); | ||
412 | } else { | ||
413 | e->format = NULL; | ||
414 | e->call = marker_probe_cb; | ||
415 | } | ||
416 | e->single.func = __mark_empty_function; | ||
417 | e->single.probe_private = NULL; | ||
418 | e->multi = NULL; | ||
419 | e->ptype = 0; | ||
420 | e->format_allocated = 0; | ||
421 | e->refcount = 0; | ||
422 | e->rcu_pending = 0; | ||
423 | hlist_add_head(&e->hlist, head); | ||
424 | return e; | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Remove the marker from the marker hash table. Must be called with mutex_lock | ||
429 | * held. | ||
430 | */ | ||
431 | static int remove_marker(const char *name) | ||
432 | { | ||
433 | struct hlist_head *head; | ||
434 | struct hlist_node *node; | ||
435 | struct marker_entry *e; | ||
436 | int found = 0; | ||
437 | size_t len = strlen(name) + 1; | ||
438 | u32 hash = jhash(name, len-1, 0); | ||
439 | |||
440 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
441 | hlist_for_each_entry(e, node, head, hlist) { | ||
442 | if (!strcmp(name, e->name)) { | ||
443 | found = 1; | ||
444 | break; | ||
445 | } | ||
446 | } | ||
447 | if (!found) | ||
448 | return -ENOENT; | ||
449 | if (e->single.func != __mark_empty_function) | ||
450 | return -EBUSY; | ||
451 | hlist_del(&e->hlist); | ||
452 | if (e->format_allocated) | ||
453 | kfree(e->format); | ||
454 | /* Make sure the call_rcu has been executed */ | ||
455 | if (e->rcu_pending) | ||
456 | rcu_barrier_sched(); | ||
457 | kfree(e); | ||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Set the mark_entry format to the format found in the element. | ||
463 | */ | ||
464 | static int marker_set_format(struct marker_entry *entry, const char *format) | ||
465 | { | ||
466 | entry->format = kstrdup(format, GFP_KERNEL); | ||
467 | if (!entry->format) | ||
468 | return -ENOMEM; | ||
469 | entry->format_allocated = 1; | ||
470 | |||
471 | trace_mark(core_marker_format, "name %s format %s", | ||
472 | entry->name, entry->format); | ||
473 | return 0; | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Sets the probe callback corresponding to one marker. | ||
478 | */ | ||
479 | static int set_marker(struct marker_entry *entry, struct marker *elem, | ||
480 | int active) | ||
481 | { | ||
482 | int ret = 0; | ||
483 | WARN_ON(strcmp(entry->name, elem->name) != 0); | ||
484 | |||
485 | if (entry->format) { | ||
486 | if (strcmp(entry->format, elem->format) != 0) { | ||
487 | printk(KERN_NOTICE | ||
488 | "Format mismatch for probe %s " | ||
489 | "(%s), marker (%s)\n", | ||
490 | entry->name, | ||
491 | entry->format, | ||
492 | elem->format); | ||
493 | return -EPERM; | ||
494 | } | ||
495 | } else { | ||
496 | ret = marker_set_format(entry, elem->format); | ||
497 | if (ret) | ||
498 | return ret; | ||
499 | } | ||
500 | |||
501 | /* | ||
502 | * probe_cb setup (statically known) is done here. It is | ||
503 | * asynchronous with the rest of execution, therefore we only | ||
504 | * pass from a "safe" callback (with argument) to an "unsafe" | ||
505 | * callback (does not set arguments). | ||
506 | */ | ||
507 | elem->call = entry->call; | ||
508 | /* | ||
509 | * Sanity check : | ||
510 | * We only update the single probe private data when the ptr is | ||
511 | * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) | ||
512 | */ | ||
513 | WARN_ON(elem->single.func != __mark_empty_function | ||
514 | && elem->single.probe_private != entry->single.probe_private | ||
515 | && !elem->ptype); | ||
516 | elem->single.probe_private = entry->single.probe_private; | ||
517 | /* | ||
518 | * Make sure the private data is valid when we update the | ||
519 | * single probe ptr. | ||
520 | */ | ||
521 | smp_wmb(); | ||
522 | elem->single.func = entry->single.func; | ||
523 | /* | ||
524 | * We also make sure that the new probe callbacks array is consistent | ||
525 | * before setting a pointer to it. | ||
526 | */ | ||
527 | rcu_assign_pointer(elem->multi, entry->multi); | ||
528 | /* | ||
529 | * Update the function or multi probe array pointer before setting the | ||
530 | * ptype. | ||
531 | */ | ||
532 | smp_wmb(); | ||
533 | elem->ptype = entry->ptype; | ||
534 | |||
535 | if (elem->tp_name && (active ^ elem->state)) { | ||
536 | WARN_ON(!elem->tp_cb); | ||
537 | /* | ||
538 | * It is ok to directly call the probe registration because type | ||
539 | * checking has been done in the __trace_mark_tp() macro. | ||
540 | */ | ||
541 | |||
542 | if (active) { | ||
543 | /* | ||
544 | * try_module_get should always succeed because we hold | ||
545 | * lock_module() to get the tp_cb address. | ||
546 | */ | ||
547 | ret = try_module_get(__module_text_address( | ||
548 | (unsigned long)elem->tp_cb)); | ||
549 | BUG_ON(!ret); | ||
550 | ret = tracepoint_probe_register_noupdate( | ||
551 | elem->tp_name, | ||
552 | elem->tp_cb); | ||
553 | } else { | ||
554 | ret = tracepoint_probe_unregister_noupdate( | ||
555 | elem->tp_name, | ||
556 | elem->tp_cb); | ||
557 | /* | ||
558 | * tracepoint_probe_update_all() must be called | ||
559 | * before the module containing tp_cb is unloaded. | ||
560 | */ | ||
561 | module_put(__module_text_address( | ||
562 | (unsigned long)elem->tp_cb)); | ||
563 | } | ||
564 | } | ||
565 | elem->state = active; | ||
566 | |||
567 | return ret; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * Disable a marker and its probe callback. | ||
572 | * Note: only waiting an RCU period after setting elem->call to the empty | ||
573 | * function insures that the original callback is not used anymore. This insured | ||
574 | * by rcu_read_lock_sched around the call site. | ||
575 | */ | ||
576 | static void disable_marker(struct marker *elem) | ||
577 | { | ||
578 | int ret; | ||
579 | |||
580 | /* leave "call" as is. It is known statically. */ | ||
581 | if (elem->tp_name && elem->state) { | ||
582 | WARN_ON(!elem->tp_cb); | ||
583 | /* | ||
584 | * It is ok to directly call the probe registration because type | ||
585 | * checking has been done in the __trace_mark_tp() macro. | ||
586 | */ | ||
587 | ret = tracepoint_probe_unregister_noupdate(elem->tp_name, | ||
588 | elem->tp_cb); | ||
589 | WARN_ON(ret); | ||
590 | /* | ||
591 | * tracepoint_probe_update_all() must be called | ||
592 | * before the module containing tp_cb is unloaded. | ||
593 | */ | ||
594 | module_put(__module_text_address((unsigned long)elem->tp_cb)); | ||
595 | } | ||
596 | elem->state = 0; | ||
597 | elem->single.func = __mark_empty_function; | ||
598 | /* Update the function before setting the ptype */ | ||
599 | smp_wmb(); | ||
600 | elem->ptype = 0; /* single probe */ | ||
601 | /* | ||
602 | * Leave the private data and id there, because removal is racy and | ||
603 | * should be done only after an RCU period. These are never used until | ||
604 | * the next initialization anyway. | ||
605 | */ | ||
606 | } | ||
607 | |||
608 | /** | ||
609 | * marker_update_probe_range - Update a probe range | ||
610 | * @begin: beginning of the range | ||
611 | * @end: end of the range | ||
612 | * | ||
613 | * Updates the probe callback corresponding to a range of markers. | ||
614 | */ | ||
615 | void marker_update_probe_range(struct marker *begin, | ||
616 | struct marker *end) | ||
617 | { | ||
618 | struct marker *iter; | ||
619 | struct marker_entry *mark_entry; | ||
620 | |||
621 | mutex_lock(&markers_mutex); | ||
622 | for (iter = begin; iter < end; iter++) { | ||
623 | mark_entry = get_marker(iter->name); | ||
624 | if (mark_entry) { | ||
625 | set_marker(mark_entry, iter, !!mark_entry->refcount); | ||
626 | /* | ||
627 | * ignore error, continue | ||
628 | */ | ||
629 | } else { | ||
630 | disable_marker(iter); | ||
631 | } | ||
632 | } | ||
633 | mutex_unlock(&markers_mutex); | ||
634 | } | ||
635 | |||
636 | /* | ||
637 | * Update probes, removing the faulty probes. | ||
638 | * | ||
639 | * Internal callback only changed before the first probe is connected to it. | ||
640 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | ||
641 | * transitions. All other transitions will leave the old private data valid. | ||
642 | * This makes the non-atomicity of the callback/private data updates valid. | ||
643 | * | ||
644 | * "special case" updates : | ||
645 | * 0 -> 1 callback | ||
646 | * 1 -> 0 callback | ||
647 | * 1 -> 2 callbacks | ||
648 | * 2 -> 1 callbacks | ||
649 | * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. | ||
650 | * Site effect : marker_set_format may delete the marker entry (creating a | ||
651 | * replacement). | ||
652 | */ | ||
653 | static void marker_update_probes(void) | ||
654 | { | ||
655 | /* Core kernel markers */ | ||
656 | marker_update_probe_range(__start___markers, __stop___markers); | ||
657 | /* Markers in modules. */ | ||
658 | module_update_markers(); | ||
659 | tracepoint_probe_update_all(); | ||
660 | } | ||
661 | |||
662 | /** | ||
663 | * marker_probe_register - Connect a probe to a marker | ||
664 | * @name: marker name | ||
665 | * @format: format string | ||
666 | * @probe: probe handler | ||
667 | * @probe_private: probe private data | ||
668 | * | ||
669 | * private data must be a valid allocated memory address, or NULL. | ||
670 | * Returns 0 if ok, error value on error. | ||
671 | * The probe address must at least be aligned on the architecture pointer size. | ||
672 | */ | ||
673 | int marker_probe_register(const char *name, const char *format, | ||
674 | marker_probe_func *probe, void *probe_private) | ||
675 | { | ||
676 | struct marker_entry *entry; | ||
677 | int ret = 0; | ||
678 | struct marker_probe_closure *old; | ||
679 | |||
680 | mutex_lock(&markers_mutex); | ||
681 | entry = get_marker(name); | ||
682 | if (!entry) { | ||
683 | entry = add_marker(name, format); | ||
684 | if (IS_ERR(entry)) | ||
685 | ret = PTR_ERR(entry); | ||
686 | } else if (format) { | ||
687 | if (!entry->format) | ||
688 | ret = marker_set_format(entry, format); | ||
689 | else if (strcmp(entry->format, format)) | ||
690 | ret = -EPERM; | ||
691 | } | ||
692 | if (ret) | ||
693 | goto end; | ||
694 | |||
695 | /* | ||
696 | * If we detect that a call_rcu is pending for this marker, | ||
697 | * make sure it's executed now. | ||
698 | */ | ||
699 | if (entry->rcu_pending) | ||
700 | rcu_barrier_sched(); | ||
701 | old = marker_entry_add_probe(entry, probe, probe_private); | ||
702 | if (IS_ERR(old)) { | ||
703 | ret = PTR_ERR(old); | ||
704 | goto end; | ||
705 | } | ||
706 | mutex_unlock(&markers_mutex); | ||
707 | marker_update_probes(); | ||
708 | mutex_lock(&markers_mutex); | ||
709 | entry = get_marker(name); | ||
710 | if (!entry) | ||
711 | goto end; | ||
712 | if (entry->rcu_pending) | ||
713 | rcu_barrier_sched(); | ||
714 | entry->oldptr = old; | ||
715 | entry->rcu_pending = 1; | ||
716 | /* write rcu_pending before calling the RCU callback */ | ||
717 | smp_wmb(); | ||
718 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
719 | end: | ||
720 | mutex_unlock(&markers_mutex); | ||
721 | return ret; | ||
722 | } | ||
723 | EXPORT_SYMBOL_GPL(marker_probe_register); | ||
724 | |||
725 | /** | ||
726 | * marker_probe_unregister - Disconnect a probe from a marker | ||
727 | * @name: marker name | ||
728 | * @probe: probe function pointer | ||
729 | * @probe_private: probe private data | ||
730 | * | ||
731 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | ||
732 | * We do not need to call a synchronize_sched to make sure the probes have | ||
733 | * finished running before doing a module unload, because the module unload | ||
734 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
735 | * have finished. | ||
736 | */ | ||
737 | int marker_probe_unregister(const char *name, | ||
738 | marker_probe_func *probe, void *probe_private) | ||
739 | { | ||
740 | struct marker_entry *entry; | ||
741 | struct marker_probe_closure *old; | ||
742 | int ret = -ENOENT; | ||
743 | |||
744 | mutex_lock(&markers_mutex); | ||
745 | entry = get_marker(name); | ||
746 | if (!entry) | ||
747 | goto end; | ||
748 | if (entry->rcu_pending) | ||
749 | rcu_barrier_sched(); | ||
750 | old = marker_entry_remove_probe(entry, probe, probe_private); | ||
751 | mutex_unlock(&markers_mutex); | ||
752 | marker_update_probes(); | ||
753 | mutex_lock(&markers_mutex); | ||
754 | entry = get_marker(name); | ||
755 | if (!entry) | ||
756 | goto end; | ||
757 | if (entry->rcu_pending) | ||
758 | rcu_barrier_sched(); | ||
759 | entry->oldptr = old; | ||
760 | entry->rcu_pending = 1; | ||
761 | /* write rcu_pending before calling the RCU callback */ | ||
762 | smp_wmb(); | ||
763 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
764 | remove_marker(name); /* Ignore busy error message */ | ||
765 | ret = 0; | ||
766 | end: | ||
767 | mutex_unlock(&markers_mutex); | ||
768 | return ret; | ||
769 | } | ||
770 | EXPORT_SYMBOL_GPL(marker_probe_unregister); | ||
771 | |||
772 | static struct marker_entry * | ||
773 | get_marker_from_private_data(marker_probe_func *probe, void *probe_private) | ||
774 | { | ||
775 | struct marker_entry *entry; | ||
776 | unsigned int i; | ||
777 | struct hlist_head *head; | ||
778 | struct hlist_node *node; | ||
779 | |||
780 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { | ||
781 | head = &marker_table[i]; | ||
782 | hlist_for_each_entry(entry, node, head, hlist) { | ||
783 | if (!entry->ptype) { | ||
784 | if (entry->single.func == probe | ||
785 | && entry->single.probe_private | ||
786 | == probe_private) | ||
787 | return entry; | ||
788 | } else { | ||
789 | struct marker_probe_closure *closure; | ||
790 | closure = entry->multi; | ||
791 | for (i = 0; closure[i].func; i++) { | ||
792 | if (closure[i].func == probe && | ||
793 | closure[i].probe_private | ||
794 | == probe_private) | ||
795 | return entry; | ||
796 | } | ||
797 | } | ||
798 | } | ||
799 | } | ||
800 | return NULL; | ||
801 | } | ||
802 | |||
803 | /** | ||
804 | * marker_probe_unregister_private_data - Disconnect a probe from a marker | ||
805 | * @probe: probe function | ||
806 | * @probe_private: probe private data | ||
807 | * | ||
808 | * Unregister a probe by providing the registered private data. | ||
809 | * Only removes the first marker found in hash table. | ||
810 | * Return 0 on success or error value. | ||
811 | * We do not need to call a synchronize_sched to make sure the probes have | ||
812 | * finished running before doing a module unload, because the module unload | ||
813 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
814 | * have finished. | ||
815 | */ | ||
816 | int marker_probe_unregister_private_data(marker_probe_func *probe, | ||
817 | void *probe_private) | ||
818 | { | ||
819 | struct marker_entry *entry; | ||
820 | int ret = 0; | ||
821 | struct marker_probe_closure *old; | ||
822 | |||
823 | mutex_lock(&markers_mutex); | ||
824 | entry = get_marker_from_private_data(probe, probe_private); | ||
825 | if (!entry) { | ||
826 | ret = -ENOENT; | ||
827 | goto end; | ||
828 | } | ||
829 | if (entry->rcu_pending) | ||
830 | rcu_barrier_sched(); | ||
831 | old = marker_entry_remove_probe(entry, NULL, probe_private); | ||
832 | mutex_unlock(&markers_mutex); | ||
833 | marker_update_probes(); | ||
834 | mutex_lock(&markers_mutex); | ||
835 | entry = get_marker_from_private_data(probe, probe_private); | ||
836 | if (!entry) | ||
837 | goto end; | ||
838 | if (entry->rcu_pending) | ||
839 | rcu_barrier_sched(); | ||
840 | entry->oldptr = old; | ||
841 | entry->rcu_pending = 1; | ||
842 | /* write rcu_pending before calling the RCU callback */ | ||
843 | smp_wmb(); | ||
844 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
845 | remove_marker(entry->name); /* Ignore busy error message */ | ||
846 | end: | ||
847 | mutex_unlock(&markers_mutex); | ||
848 | return ret; | ||
849 | } | ||
850 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); | ||
851 | |||
852 | /** | ||
853 | * marker_get_private_data - Get a marker's probe private data | ||
854 | * @name: marker name | ||
855 | * @probe: probe to match | ||
856 | * @num: get the nth matching probe's private data | ||
857 | * | ||
858 | * Returns the nth private data pointer (starting from 0) matching, or an | ||
859 | * ERR_PTR. | ||
860 | * Returns the private data pointer, or an ERR_PTR. | ||
861 | * The private data pointer should _only_ be dereferenced if the caller is the | ||
862 | * owner of the data, or its content could vanish. This is mostly used to | ||
863 | * confirm that a caller is the owner of a registered probe. | ||
864 | */ | ||
865 | void *marker_get_private_data(const char *name, marker_probe_func *probe, | ||
866 | int num) | ||
867 | { | ||
868 | struct hlist_head *head; | ||
869 | struct hlist_node *node; | ||
870 | struct marker_entry *e; | ||
871 | size_t name_len = strlen(name) + 1; | ||
872 | u32 hash = jhash(name, name_len-1, 0); | ||
873 | int i; | ||
874 | |||
875 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
876 | hlist_for_each_entry(e, node, head, hlist) { | ||
877 | if (!strcmp(name, e->name)) { | ||
878 | if (!e->ptype) { | ||
879 | if (num == 0 && e->single.func == probe) | ||
880 | return e->single.probe_private; | ||
881 | } else { | ||
882 | struct marker_probe_closure *closure; | ||
883 | int match = 0; | ||
884 | closure = e->multi; | ||
885 | for (i = 0; closure[i].func; i++) { | ||
886 | if (closure[i].func != probe) | ||
887 | continue; | ||
888 | if (match++ == num) | ||
889 | return closure[i].probe_private; | ||
890 | } | ||
891 | } | ||
892 | break; | ||
893 | } | ||
894 | } | ||
895 | return ERR_PTR(-ENOENT); | ||
896 | } | ||
897 | EXPORT_SYMBOL_GPL(marker_get_private_data); | ||
898 | |||
899 | #ifdef CONFIG_MODULES | ||
900 | |||
901 | int marker_module_notify(struct notifier_block *self, | ||
902 | unsigned long val, void *data) | ||
903 | { | ||
904 | struct module *mod = data; | ||
905 | |||
906 | switch (val) { | ||
907 | case MODULE_STATE_COMING: | ||
908 | marker_update_probe_range(mod->markers, | ||
909 | mod->markers + mod->num_markers); | ||
910 | break; | ||
911 | case MODULE_STATE_GOING: | ||
912 | marker_update_probe_range(mod->markers, | ||
913 | mod->markers + mod->num_markers); | ||
914 | break; | ||
915 | } | ||
916 | return 0; | ||
917 | } | ||
918 | |||
919 | struct notifier_block marker_module_nb = { | ||
920 | .notifier_call = marker_module_notify, | ||
921 | .priority = 0, | ||
922 | }; | ||
923 | |||
924 | static int init_markers(void) | ||
925 | { | ||
926 | return register_module_notifier(&marker_module_nb); | ||
927 | } | ||
928 | __initcall(init_markers); | ||
929 | |||
930 | #endif /* CONFIG_MODULES */ | ||
diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f6..b6ee424245dd 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, | |||
2237 | sizeof(*mod->ctors), &mod->num_ctors); | 2237 | sizeof(*mod->ctors), &mod->num_ctors); |
2238 | #endif | 2238 | #endif |
2239 | 2239 | ||
2240 | #ifdef CONFIG_MARKERS | ||
2241 | mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", | ||
2242 | sizeof(*mod->markers), &mod->num_markers); | ||
2243 | #endif | ||
2244 | #ifdef CONFIG_TRACEPOINTS | 2240 | #ifdef CONFIG_TRACEPOINTS |
2245 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, | 2241 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, |
2246 | "__tracepoints", | 2242 | "__tracepoints", |
@@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, | |||
2958 | EXPORT_SYMBOL(module_layout); | 2954 | EXPORT_SYMBOL(module_layout); |
2959 | #endif | 2955 | #endif |
2960 | 2956 | ||
2961 | #ifdef CONFIG_MARKERS | ||
2962 | void module_update_markers(void) | ||
2963 | { | ||
2964 | struct module *mod; | ||
2965 | |||
2966 | mutex_lock(&module_mutex); | ||
2967 | list_for_each_entry(mod, &modules, list) | ||
2968 | if (!mod->taints) | ||
2969 | marker_update_probe_range(mod->markers, | ||
2970 | mod->markers + mod->num_markers); | ||
2971 | mutex_unlock(&module_mutex); | ||
2972 | } | ||
2973 | #endif | ||
2974 | |||
2975 | #ifdef CONFIG_TRACEPOINTS | 2957 | #ifdef CONFIG_TRACEPOINTS |
2976 | void module_update_tracepoints(void) | 2958 | void module_update_tracepoints(void) |
2977 | { | 2959 | { |
diff --git a/kernel/profile.c b/kernel/profile.c index 419250ebec4d..a55d3a367ae8 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -442,48 +442,51 @@ void profile_tick(int type) | |||
442 | 442 | ||
443 | #ifdef CONFIG_PROC_FS | 443 | #ifdef CONFIG_PROC_FS |
444 | #include <linux/proc_fs.h> | 444 | #include <linux/proc_fs.h> |
445 | #include <linux/seq_file.h> | ||
445 | #include <asm/uaccess.h> | 446 | #include <asm/uaccess.h> |
446 | 447 | ||
447 | static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, | 448 | static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) |
448 | int count, int *eof, void *data) | ||
449 | { | 449 | { |
450 | int len = cpumask_scnprintf(page, count, data); | 450 | seq_cpumask(m, prof_cpu_mask); |
451 | if (count - len < 2) | 451 | seq_putc(m, '\n'); |
452 | return -EINVAL; | 452 | return 0; |
453 | len += sprintf(page + len, "\n"); | ||
454 | return len; | ||
455 | } | 453 | } |
456 | 454 | ||
457 | static int prof_cpu_mask_write_proc(struct file *file, | 455 | static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) |
458 | const char __user *buffer, unsigned long count, void *data) | 456 | { |
457 | return single_open(file, prof_cpu_mask_proc_show, NULL); | ||
458 | } | ||
459 | |||
460 | static ssize_t prof_cpu_mask_proc_write(struct file *file, | ||
461 | const char __user *buffer, size_t count, loff_t *pos) | ||
459 | { | 462 | { |
460 | struct cpumask *mask = data; | ||
461 | unsigned long full_count = count, err; | ||
462 | cpumask_var_t new_value; | 463 | cpumask_var_t new_value; |
464 | int err; | ||
463 | 465 | ||
464 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) | 466 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) |
465 | return -ENOMEM; | 467 | return -ENOMEM; |
466 | 468 | ||
467 | err = cpumask_parse_user(buffer, count, new_value); | 469 | err = cpumask_parse_user(buffer, count, new_value); |
468 | if (!err) { | 470 | if (!err) { |
469 | cpumask_copy(mask, new_value); | 471 | cpumask_copy(prof_cpu_mask, new_value); |
470 | err = full_count; | 472 | err = count; |
471 | } | 473 | } |
472 | free_cpumask_var(new_value); | 474 | free_cpumask_var(new_value); |
473 | return err; | 475 | return err; |
474 | } | 476 | } |
475 | 477 | ||
478 | static const struct file_operations prof_cpu_mask_proc_fops = { | ||
479 | .open = prof_cpu_mask_proc_open, | ||
480 | .read = seq_read, | ||
481 | .llseek = seq_lseek, | ||
482 | .release = single_release, | ||
483 | .write = prof_cpu_mask_proc_write, | ||
484 | }; | ||
485 | |||
476 | void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) | 486 | void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir) |
477 | { | 487 | { |
478 | struct proc_dir_entry *entry; | ||
479 | |||
480 | /* create /proc/irq/prof_cpu_mask */ | 488 | /* create /proc/irq/prof_cpu_mask */ |
481 | entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); | 489 | proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops); |
482 | if (!entry) | ||
483 | return; | ||
484 | entry->data = prof_cpu_mask; | ||
485 | entry->read_proc = prof_cpu_mask_read_proc; | ||
486 | entry->write_proc = prof_cpu_mask_write_proc; | ||
487 | } | 490 | } |
488 | 491 | ||
489 | /* | 492 | /* |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cc615f84751b..c71e91bf7372 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -2414,11 +2414,9 @@ unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; | |||
2414 | static void * | 2414 | static void * |
2415 | __g_next(struct seq_file *m, loff_t *pos) | 2415 | __g_next(struct seq_file *m, loff_t *pos) |
2416 | { | 2416 | { |
2417 | unsigned long *array = m->private; | ||
2418 | |||
2419 | if (*pos >= ftrace_graph_count) | 2417 | if (*pos >= ftrace_graph_count) |
2420 | return NULL; | 2418 | return NULL; |
2421 | return &array[*pos]; | 2419 | return &ftrace_graph_funcs[*pos]; |
2422 | } | 2420 | } |
2423 | 2421 | ||
2424 | static void * | 2422 | static void * |
@@ -2482,16 +2480,10 @@ ftrace_graph_open(struct inode *inode, struct file *file) | |||
2482 | ftrace_graph_count = 0; | 2480 | ftrace_graph_count = 0; |
2483 | memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); | 2481 | memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); |
2484 | } | 2482 | } |
2483 | mutex_unlock(&graph_lock); | ||
2485 | 2484 | ||
2486 | if (file->f_mode & FMODE_READ) { | 2485 | if (file->f_mode & FMODE_READ) |
2487 | ret = seq_open(file, &ftrace_graph_seq_ops); | 2486 | ret = seq_open(file, &ftrace_graph_seq_ops); |
2488 | if (!ret) { | ||
2489 | struct seq_file *m = file->private_data; | ||
2490 | m->private = ftrace_graph_funcs; | ||
2491 | } | ||
2492 | } else | ||
2493 | file->private_data = ftrace_graph_funcs; | ||
2494 | mutex_unlock(&graph_lock); | ||
2495 | 2487 | ||
2496 | return ret; | 2488 | return ret; |
2497 | } | 2489 | } |
@@ -2560,7 +2552,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, | |||
2560 | size_t cnt, loff_t *ppos) | 2552 | size_t cnt, loff_t *ppos) |
2561 | { | 2553 | { |
2562 | struct trace_parser parser; | 2554 | struct trace_parser parser; |
2563 | unsigned long *array; | ||
2564 | size_t read = 0; | 2555 | size_t read = 0; |
2565 | ssize_t ret; | 2556 | ssize_t ret; |
2566 | 2557 | ||
@@ -2574,12 +2565,6 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, | |||
2574 | goto out; | 2565 | goto out; |
2575 | } | 2566 | } |
2576 | 2567 | ||
2577 | if (file->f_mode & FMODE_READ) { | ||
2578 | struct seq_file *m = file->private_data; | ||
2579 | array = m->private; | ||
2580 | } else | ||
2581 | array = file->private_data; | ||
2582 | |||
2583 | if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { | 2568 | if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { |
2584 | ret = -ENOMEM; | 2569 | ret = -ENOMEM; |
2585 | goto out; | 2570 | goto out; |
@@ -2591,7 +2576,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, | |||
2591 | parser.buffer[parser.idx] = 0; | 2576 | parser.buffer[parser.idx] = 0; |
2592 | 2577 | ||
2593 | /* we allow only one expression at a time */ | 2578 | /* we allow only one expression at a time */ |
2594 | ret = ftrace_set_func(array, &ftrace_graph_count, | 2579 | ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, |
2595 | parser.buffer); | 2580 | parser.buffer); |
2596 | if (ret) | 2581 | if (ret) |
2597 | goto out; | 2582 | goto out; |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd52a19dd172..861308072d28 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -125,13 +125,13 @@ int ftrace_dump_on_oops; | |||
125 | 125 | ||
126 | static int tracing_set_tracer(const char *buf); | 126 | static int tracing_set_tracer(const char *buf); |
127 | 127 | ||
128 | #define BOOTUP_TRACER_SIZE 100 | 128 | #define MAX_TRACER_SIZE 100 |
129 | static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata; | 129 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; |
130 | static char *default_bootup_tracer; | 130 | static char *default_bootup_tracer; |
131 | 131 | ||
132 | static int __init set_ftrace(char *str) | 132 | static int __init set_ftrace(char *str) |
133 | { | 133 | { |
134 | strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE); | 134 | strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); |
135 | default_bootup_tracer = bootup_tracer_buf; | 135 | default_bootup_tracer = bootup_tracer_buf; |
136 | /* We are using ftrace early, expand it */ | 136 | /* We are using ftrace early, expand it */ |
137 | ring_buffer_expanded = 1; | 137 | ring_buffer_expanded = 1; |
@@ -242,13 +242,6 @@ static struct tracer *trace_types __read_mostly; | |||
242 | static struct tracer *current_trace __read_mostly; | 242 | static struct tracer *current_trace __read_mostly; |
243 | 243 | ||
244 | /* | 244 | /* |
245 | * max_tracer_type_len is used to simplify the allocating of | ||
246 | * buffers to read userspace tracer names. We keep track of | ||
247 | * the longest tracer name registered. | ||
248 | */ | ||
249 | static int max_tracer_type_len; | ||
250 | |||
251 | /* | ||
252 | * trace_types_lock is used to protect the trace_types list. | 245 | * trace_types_lock is used to protect the trace_types list. |
253 | * This lock is also used to keep user access serialized. | 246 | * This lock is also used to keep user access serialized. |
254 | * Accesses from userspace will grab this lock while userspace | 247 | * Accesses from userspace will grab this lock while userspace |
@@ -619,7 +612,6 @@ __releases(kernel_lock) | |||
619 | __acquires(kernel_lock) | 612 | __acquires(kernel_lock) |
620 | { | 613 | { |
621 | struct tracer *t; | 614 | struct tracer *t; |
622 | int len; | ||
623 | int ret = 0; | 615 | int ret = 0; |
624 | 616 | ||
625 | if (!type->name) { | 617 | if (!type->name) { |
@@ -627,6 +619,11 @@ __acquires(kernel_lock) | |||
627 | return -1; | 619 | return -1; |
628 | } | 620 | } |
629 | 621 | ||
622 | if (strlen(type->name) > MAX_TRACER_SIZE) { | ||
623 | pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); | ||
624 | return -1; | ||
625 | } | ||
626 | |||
630 | /* | 627 | /* |
631 | * When this gets called we hold the BKL which means that | 628 | * When this gets called we hold the BKL which means that |
632 | * preemption is disabled. Various trace selftests however | 629 | * preemption is disabled. Various trace selftests however |
@@ -641,7 +638,7 @@ __acquires(kernel_lock) | |||
641 | for (t = trace_types; t; t = t->next) { | 638 | for (t = trace_types; t; t = t->next) { |
642 | if (strcmp(type->name, t->name) == 0) { | 639 | if (strcmp(type->name, t->name) == 0) { |
643 | /* already found */ | 640 | /* already found */ |
644 | pr_info("Trace %s already registered\n", | 641 | pr_info("Tracer %s already registered\n", |
645 | type->name); | 642 | type->name); |
646 | ret = -1; | 643 | ret = -1; |
647 | goto out; | 644 | goto out; |
@@ -692,9 +689,6 @@ __acquires(kernel_lock) | |||
692 | 689 | ||
693 | type->next = trace_types; | 690 | type->next = trace_types; |
694 | trace_types = type; | 691 | trace_types = type; |
695 | len = strlen(type->name); | ||
696 | if (len > max_tracer_type_len) | ||
697 | max_tracer_type_len = len; | ||
698 | 692 | ||
699 | out: | 693 | out: |
700 | tracing_selftest_running = false; | 694 | tracing_selftest_running = false; |
@@ -703,7 +697,7 @@ __acquires(kernel_lock) | |||
703 | if (ret || !default_bootup_tracer) | 697 | if (ret || !default_bootup_tracer) |
704 | goto out_unlock; | 698 | goto out_unlock; |
705 | 699 | ||
706 | if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) | 700 | if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) |
707 | goto out_unlock; | 701 | goto out_unlock; |
708 | 702 | ||
709 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); | 703 | printk(KERN_INFO "Starting tracer '%s'\n", type->name); |
@@ -725,14 +719,13 @@ __acquires(kernel_lock) | |||
725 | void unregister_tracer(struct tracer *type) | 719 | void unregister_tracer(struct tracer *type) |
726 | { | 720 | { |
727 | struct tracer **t; | 721 | struct tracer **t; |
728 | int len; | ||
729 | 722 | ||
730 | mutex_lock(&trace_types_lock); | 723 | mutex_lock(&trace_types_lock); |
731 | for (t = &trace_types; *t; t = &(*t)->next) { | 724 | for (t = &trace_types; *t; t = &(*t)->next) { |
732 | if (*t == type) | 725 | if (*t == type) |
733 | goto found; | 726 | goto found; |
734 | } | 727 | } |
735 | pr_info("Trace %s not registered\n", type->name); | 728 | pr_info("Tracer %s not registered\n", type->name); |
736 | goto out; | 729 | goto out; |
737 | 730 | ||
738 | found: | 731 | found: |
@@ -745,17 +738,7 @@ void unregister_tracer(struct tracer *type) | |||
745 | current_trace->stop(&global_trace); | 738 | current_trace->stop(&global_trace); |
746 | current_trace = &nop_trace; | 739 | current_trace = &nop_trace; |
747 | } | 740 | } |
748 | 741 | out: | |
749 | if (strlen(type->name) != max_tracer_type_len) | ||
750 | goto out; | ||
751 | |||
752 | max_tracer_type_len = 0; | ||
753 | for (t = &trace_types; *t; t = &(*t)->next) { | ||
754 | len = strlen((*t)->name); | ||
755 | if (len > max_tracer_type_len) | ||
756 | max_tracer_type_len = len; | ||
757 | } | ||
758 | out: | ||
759 | mutex_unlock(&trace_types_lock); | 742 | mutex_unlock(&trace_types_lock); |
760 | } | 743 | } |
761 | 744 | ||
@@ -2604,7 +2587,7 @@ static ssize_t | |||
2604 | tracing_set_trace_read(struct file *filp, char __user *ubuf, | 2587 | tracing_set_trace_read(struct file *filp, char __user *ubuf, |
2605 | size_t cnt, loff_t *ppos) | 2588 | size_t cnt, loff_t *ppos) |
2606 | { | 2589 | { |
2607 | char buf[max_tracer_type_len+2]; | 2590 | char buf[MAX_TRACER_SIZE+2]; |
2608 | int r; | 2591 | int r; |
2609 | 2592 | ||
2610 | mutex_lock(&trace_types_lock); | 2593 | mutex_lock(&trace_types_lock); |
@@ -2754,15 +2737,15 @@ static ssize_t | |||
2754 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, | 2737 | tracing_set_trace_write(struct file *filp, const char __user *ubuf, |
2755 | size_t cnt, loff_t *ppos) | 2738 | size_t cnt, loff_t *ppos) |
2756 | { | 2739 | { |
2757 | char buf[max_tracer_type_len+1]; | 2740 | char buf[MAX_TRACER_SIZE+1]; |
2758 | int i; | 2741 | int i; |
2759 | size_t ret; | 2742 | size_t ret; |
2760 | int err; | 2743 | int err; |
2761 | 2744 | ||
2762 | ret = cnt; | 2745 | ret = cnt; |
2763 | 2746 | ||
2764 | if (cnt > max_tracer_type_len) | 2747 | if (cnt > MAX_TRACER_SIZE) |
2765 | cnt = max_tracer_type_len; | 2748 | cnt = MAX_TRACER_SIZE; |
2766 | 2749 | ||
2767 | if (copy_from_user(&buf, ubuf, cnt)) | 2750 | if (copy_from_user(&buf, ubuf, cnt)) |
2768 | return -EFAULT; | 2751 | return -EFAULT; |
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 55a25c933d15..dd44b8768867 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -8,6 +8,57 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include "trace.h" | 9 | #include "trace.h" |
10 | 10 | ||
11 | /* | ||
12 | * We can't use a size but a type in alloc_percpu() | ||
13 | * So let's create a dummy type that matches the desired size | ||
14 | */ | ||
15 | typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; | ||
16 | |||
17 | char *trace_profile_buf; | ||
18 | EXPORT_SYMBOL_GPL(trace_profile_buf); | ||
19 | |||
20 | char *trace_profile_buf_nmi; | ||
21 | EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); | ||
22 | |||
23 | /* Count the events in use (per event id, not per instance) */ | ||
24 | static int total_profile_count; | ||
25 | |||
26 | static int ftrace_profile_enable_event(struct ftrace_event_call *event) | ||
27 | { | ||
28 | char *buf; | ||
29 | int ret = -ENOMEM; | ||
30 | |||
31 | if (atomic_inc_return(&event->profile_count)) | ||
32 | return 0; | ||
33 | |||
34 | if (!total_profile_count++) { | ||
35 | buf = (char *)alloc_percpu(profile_buf_t); | ||
36 | if (!buf) | ||
37 | goto fail_buf; | ||
38 | |||
39 | rcu_assign_pointer(trace_profile_buf, buf); | ||
40 | |||
41 | buf = (char *)alloc_percpu(profile_buf_t); | ||
42 | if (!buf) | ||
43 | goto fail_buf_nmi; | ||
44 | |||
45 | rcu_assign_pointer(trace_profile_buf_nmi, buf); | ||
46 | } | ||
47 | |||
48 | ret = event->profile_enable(); | ||
49 | if (!ret) | ||
50 | return 0; | ||
51 | |||
52 | kfree(trace_profile_buf_nmi); | ||
53 | fail_buf_nmi: | ||
54 | kfree(trace_profile_buf); | ||
55 | fail_buf: | ||
56 | total_profile_count--; | ||
57 | atomic_dec(&event->profile_count); | ||
58 | |||
59 | return ret; | ||
60 | } | ||
61 | |||
11 | int ftrace_profile_enable(int event_id) | 62 | int ftrace_profile_enable(int event_id) |
12 | { | 63 | { |
13 | struct ftrace_event_call *event; | 64 | struct ftrace_event_call *event; |
@@ -17,7 +68,7 @@ int ftrace_profile_enable(int event_id) | |||
17 | list_for_each_entry(event, &ftrace_events, list) { | 68 | list_for_each_entry(event, &ftrace_events, list) { |
18 | if (event->id == event_id && event->profile_enable && | 69 | if (event->id == event_id && event->profile_enable && |
19 | try_module_get(event->mod)) { | 70 | try_module_get(event->mod)) { |
20 | ret = event->profile_enable(event); | 71 | ret = ftrace_profile_enable_event(event); |
21 | break; | 72 | break; |
22 | } | 73 | } |
23 | } | 74 | } |
@@ -26,6 +77,33 @@ int ftrace_profile_enable(int event_id) | |||
26 | return ret; | 77 | return ret; |
27 | } | 78 | } |
28 | 79 | ||
80 | static void ftrace_profile_disable_event(struct ftrace_event_call *event) | ||
81 | { | ||
82 | char *buf, *nmi_buf; | ||
83 | |||
84 | if (!atomic_add_negative(-1, &event->profile_count)) | ||
85 | return; | ||
86 | |||
87 | event->profile_disable(); | ||
88 | |||
89 | if (!--total_profile_count) { | ||
90 | buf = trace_profile_buf; | ||
91 | rcu_assign_pointer(trace_profile_buf, NULL); | ||
92 | |||
93 | nmi_buf = trace_profile_buf_nmi; | ||
94 | rcu_assign_pointer(trace_profile_buf_nmi, NULL); | ||
95 | |||
96 | /* | ||
97 | * Ensure every events in profiling have finished before | ||
98 | * releasing the buffers | ||
99 | */ | ||
100 | synchronize_sched(); | ||
101 | |||
102 | free_percpu(buf); | ||
103 | free_percpu(nmi_buf); | ||
104 | } | ||
105 | } | ||
106 | |||
29 | void ftrace_profile_disable(int event_id) | 107 | void ftrace_profile_disable(int event_id) |
30 | { | 108 | { |
31 | struct ftrace_event_call *event; | 109 | struct ftrace_event_call *event; |
@@ -33,7 +111,7 @@ void ftrace_profile_disable(int event_id) | |||
33 | mutex_lock(&event_mutex); | 111 | mutex_lock(&event_mutex); |
34 | list_for_each_entry(event, &ftrace_events, list) { | 112 | list_for_each_entry(event, &ftrace_events, list) { |
35 | if (event->id == event_id) { | 113 | if (event->id == event_id) { |
36 | event->profile_disable(event); | 114 | ftrace_profile_disable_event(event); |
37 | module_put(event->mod); | 115 | module_put(event->mod); |
38 | break; | 116 | break; |
39 | } | 117 | } |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 56c260b83a9c..6f03c8a1105e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -271,42 +271,32 @@ ftrace_event_write(struct file *file, const char __user *ubuf, | |||
271 | static void * | 271 | static void * |
272 | t_next(struct seq_file *m, void *v, loff_t *pos) | 272 | t_next(struct seq_file *m, void *v, loff_t *pos) |
273 | { | 273 | { |
274 | struct list_head *list = m->private; | 274 | struct ftrace_event_call *call = v; |
275 | struct ftrace_event_call *call; | ||
276 | 275 | ||
277 | (*pos)++; | 276 | (*pos)++; |
278 | 277 | ||
279 | for (;;) { | 278 | list_for_each_entry_continue(call, &ftrace_events, list) { |
280 | if (list == &ftrace_events) | ||
281 | return NULL; | ||
282 | |||
283 | call = list_entry(list, struct ftrace_event_call, list); | ||
284 | |||
285 | /* | 279 | /* |
286 | * The ftrace subsystem is for showing formats only. | 280 | * The ftrace subsystem is for showing formats only. |
287 | * They can not be enabled or disabled via the event files. | 281 | * They can not be enabled or disabled via the event files. |
288 | */ | 282 | */ |
289 | if (call->regfunc) | 283 | if (call->regfunc) |
290 | break; | 284 | return call; |
291 | |||
292 | list = list->next; | ||
293 | } | 285 | } |
294 | 286 | ||
295 | m->private = list->next; | 287 | return NULL; |
296 | |||
297 | return call; | ||
298 | } | 288 | } |
299 | 289 | ||
300 | static void *t_start(struct seq_file *m, loff_t *pos) | 290 | static void *t_start(struct seq_file *m, loff_t *pos) |
301 | { | 291 | { |
302 | struct ftrace_event_call *call = NULL; | 292 | struct ftrace_event_call *call; |
303 | loff_t l; | 293 | loff_t l; |
304 | 294 | ||
305 | mutex_lock(&event_mutex); | 295 | mutex_lock(&event_mutex); |
306 | 296 | ||
307 | m->private = ftrace_events.next; | 297 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); |
308 | for (l = 0; l <= *pos; ) { | 298 | for (l = 0; l <= *pos; ) { |
309 | call = t_next(m, NULL, &l); | 299 | call = t_next(m, call, &l); |
310 | if (!call) | 300 | if (!call) |
311 | break; | 301 | break; |
312 | } | 302 | } |
@@ -316,37 +306,28 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
316 | static void * | 306 | static void * |
317 | s_next(struct seq_file *m, void *v, loff_t *pos) | 307 | s_next(struct seq_file *m, void *v, loff_t *pos) |
318 | { | 308 | { |
319 | struct list_head *list = m->private; | 309 | struct ftrace_event_call *call = v; |
320 | struct ftrace_event_call *call; | ||
321 | 310 | ||
322 | (*pos)++; | 311 | (*pos)++; |
323 | 312 | ||
324 | retry: | 313 | list_for_each_entry_continue(call, &ftrace_events, list) { |
325 | if (list == &ftrace_events) | 314 | if (call->enabled) |
326 | return NULL; | 315 | return call; |
327 | |||
328 | call = list_entry(list, struct ftrace_event_call, list); | ||
329 | |||
330 | if (!call->enabled) { | ||
331 | list = list->next; | ||
332 | goto retry; | ||
333 | } | 316 | } |
334 | 317 | ||
335 | m->private = list->next; | 318 | return NULL; |
336 | |||
337 | return call; | ||
338 | } | 319 | } |
339 | 320 | ||
340 | static void *s_start(struct seq_file *m, loff_t *pos) | 321 | static void *s_start(struct seq_file *m, loff_t *pos) |
341 | { | 322 | { |
342 | struct ftrace_event_call *call = NULL; | 323 | struct ftrace_event_call *call; |
343 | loff_t l; | 324 | loff_t l; |
344 | 325 | ||
345 | mutex_lock(&event_mutex); | 326 | mutex_lock(&event_mutex); |
346 | 327 | ||
347 | m->private = ftrace_events.next; | 328 | call = list_entry(&ftrace_events, struct ftrace_event_call, list); |
348 | for (l = 0; l <= *pos; ) { | 329 | for (l = 0; l <= *pos; ) { |
349 | call = s_next(m, NULL, &l); | 330 | call = s_next(m, call, &l); |
350 | if (!call) | 331 | if (!call) |
351 | break; | 332 | break; |
352 | } | 333 | } |
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d365ae..2547d8813cf0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/marker.h> | ||
15 | #include <linux/mutex.h> | 14 | #include <linux/mutex.h> |
16 | #include <linux/ctype.h> | 15 | #include <linux/ctype.h> |
17 | #include <linux/list.h> | 16 | #include <linux/list.h> |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 8712ce3c6a0e..7a3550cf2597 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit; | |||
384 | 384 | ||
385 | static void prof_syscall_enter(struct pt_regs *regs, long id) | 385 | static void prof_syscall_enter(struct pt_regs *regs, long id) |
386 | { | 386 | { |
387 | struct syscall_trace_enter *rec; | ||
388 | struct syscall_metadata *sys_data; | 387 | struct syscall_metadata *sys_data; |
388 | struct syscall_trace_enter *rec; | ||
389 | unsigned long flags; | ||
390 | char *raw_data; | ||
389 | int syscall_nr; | 391 | int syscall_nr; |
390 | int size; | 392 | int size; |
393 | int cpu; | ||
391 | 394 | ||
392 | syscall_nr = syscall_get_nr(current, regs); | 395 | syscall_nr = syscall_get_nr(current, regs); |
393 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) | 396 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
402 | size = ALIGN(size + sizeof(u32), sizeof(u64)); | 405 | size = ALIGN(size + sizeof(u32), sizeof(u64)); |
403 | size -= sizeof(u32); | 406 | size -= sizeof(u32); |
404 | 407 | ||
405 | do { | 408 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
406 | char raw_data[size]; | 409 | "profile buffer not large enough")) |
410 | return; | ||
411 | |||
412 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
413 | local_irq_save(flags); | ||
407 | 414 | ||
408 | /* zero the dead bytes from align to not leak stack to user */ | 415 | cpu = smp_processor_id(); |
409 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 416 | |
417 | if (in_nmi()) | ||
418 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
419 | else | ||
420 | raw_data = rcu_dereference(trace_profile_buf); | ||
421 | |||
422 | if (!raw_data) | ||
423 | goto end; | ||
410 | 424 | ||
411 | rec = (struct syscall_trace_enter *) raw_data; | 425 | raw_data = per_cpu_ptr(raw_data, cpu); |
412 | tracing_generic_entry_update(&rec->ent, 0, 0); | 426 | |
413 | rec->ent.type = sys_data->enter_id; | 427 | /* zero the dead bytes from align to not leak stack to user */ |
414 | rec->nr = syscall_nr; | 428 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; |
415 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 429 | |
416 | (unsigned long *)&rec->args); | 430 | rec = (struct syscall_trace_enter *) raw_data; |
417 | perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); | 431 | tracing_generic_entry_update(&rec->ent, 0, 0); |
418 | } while(0); | 432 | rec->ent.type = sys_data->enter_id; |
433 | rec->nr = syscall_nr; | ||
434 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | ||
435 | (unsigned long *)&rec->args); | ||
436 | perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size); | ||
437 | |||
438 | end: | ||
439 | local_irq_restore(flags); | ||
419 | } | 440 | } |
420 | 441 | ||
421 | int reg_prof_syscall_enter(char *name) | 442 | int reg_prof_syscall_enter(char *name) |
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name) | |||
460 | static void prof_syscall_exit(struct pt_regs *regs, long ret) | 481 | static void prof_syscall_exit(struct pt_regs *regs, long ret) |
461 | { | 482 | { |
462 | struct syscall_metadata *sys_data; | 483 | struct syscall_metadata *sys_data; |
463 | struct syscall_trace_exit rec; | 484 | struct syscall_trace_exit *rec; |
485 | unsigned long flags; | ||
464 | int syscall_nr; | 486 | int syscall_nr; |
487 | char *raw_data; | ||
488 | int size; | ||
489 | int cpu; | ||
465 | 490 | ||
466 | syscall_nr = syscall_get_nr(current, regs); | 491 | syscall_nr = syscall_get_nr(current, regs); |
467 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | 492 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) |
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
471 | if (!sys_data) | 496 | if (!sys_data) |
472 | return; | 497 | return; |
473 | 498 | ||
474 | tracing_generic_entry_update(&rec.ent, 0, 0); | 499 | /* We can probably do that at build time */ |
475 | rec.ent.type = sys_data->exit_id; | 500 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); |
476 | rec.nr = syscall_nr; | 501 | size -= sizeof(u32); |
477 | rec.ret = syscall_get_return_value(current, regs); | ||
478 | 502 | ||
479 | perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec)); | 503 | /* |
504 | * Impossible, but be paranoid with the future | ||
505 | * How to put this check outside runtime? | ||
506 | */ | ||
507 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | ||
508 | "exit event has grown above profile buffer size")) | ||
509 | return; | ||
510 | |||
511 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
512 | local_irq_save(flags); | ||
513 | cpu = smp_processor_id(); | ||
514 | |||
515 | if (in_nmi()) | ||
516 | raw_data = rcu_dereference(trace_profile_buf_nmi); | ||
517 | else | ||
518 | raw_data = rcu_dereference(trace_profile_buf); | ||
519 | |||
520 | if (!raw_data) | ||
521 | goto end; | ||
522 | |||
523 | raw_data = per_cpu_ptr(raw_data, cpu); | ||
524 | |||
525 | /* zero the dead bytes from align to not leak stack to user */ | ||
526 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
527 | |||
528 | rec = (struct syscall_trace_exit *)raw_data; | ||
529 | |||
530 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
531 | rec->ent.type = sys_data->exit_id; | ||
532 | rec->nr = syscall_nr; | ||
533 | rec->ret = syscall_get_return_value(current, regs); | ||
534 | |||
535 | perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size); | ||
536 | |||
537 | end: | ||
538 | local_irq_restore(flags); | ||
480 | } | 539 | } |
481 | 540 | ||
482 | int reg_prof_syscall_exit(char *name) | 541 | int reg_prof_syscall_exit(char *name) |