diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-11-21 08:07:23 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-11-21 08:07:23 -0500 |
commit | 96200591a34f8ecb98481c626125df43a2463b55 (patch) | |
tree | 314c376b01f254d04f9aaf449b1f9147ad177fa6 /kernel | |
parent | 7031281e02bf951a2259849217193fb9d75a9762 (diff) | |
parent | 68efa37df779b3e04280598e8b5b3a1919b65fee (diff) |
Merge branch 'tracing/hw-breakpoints' into perf/core
Conflicts:
arch/x86/kernel/kprobes.c
kernel/trace/Makefile
Merge reason: hw-breakpoints perf integration is looking
good in testing and in reviews, plus conflicts
are mounting up - so merge & resolve.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 494 | ||||
-rw-r--r-- | kernel/kallsyms.c | 1 | ||||
-rw-r--r-- | kernel/perf_event.c | 136 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 21 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/trace.h | 7 | ||||
-rw-r--r-- | kernel/trace/trace_entries.h | 16 | ||||
-rw-r--r-- | kernel/trace/trace_ksym.c | 554 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 55 |
11 files changed, 1288 insertions, 3 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8ac0b9..17b575ec7d07 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -95,6 +95,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ | |||
95 | obj-$(CONFIG_SMP) += sched_cpupri.o | 95 | obj-$(CONFIG_SMP) += sched_cpupri.o |
96 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 96 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
97 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 97 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
98 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | ||
98 | 99 | ||
99 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 100 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
100 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 101 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac2ecc1..3f45e3cf931d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/init_task.h> | 49 | #include <linux/init_task.h> |
50 | #include <linux/perf_event.h> | 50 | #include <linux/perf_event.h> |
51 | #include <trace/events/sched.h> | 51 | #include <trace/events/sched.h> |
52 | #include <linux/hw_breakpoint.h> | ||
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
54 | #include <asm/unistd.h> | 55 | #include <asm/unistd.h> |
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code) | |||
978 | proc_exit_connector(tsk); | 979 | proc_exit_connector(tsk); |
979 | 980 | ||
980 | /* | 981 | /* |
982 | * FIXME: do that only when needed, using sched_exit tracepoint | ||
983 | */ | ||
984 | flush_ptrace_hw_breakpoint(tsk); | ||
985 | /* | ||
981 | * Flush inherited counters to the parent - before the parent | 986 | * Flush inherited counters to the parent - before the parent |
982 | * gets woken up by child-exit notifications. | 987 | * gets woken up by child-exit notifications. |
983 | */ | 988 | */ |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..9ea9414e0e58 --- /dev/null +++ b/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,494 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) IBM Corporation, 2009 | ||
18 | * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Thanks to Ingo Molnar for his many suggestions. | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
25 | * using the CPU's debug registers. | ||
26 | * This file contains the arch-independent routines. | ||
27 | */ | ||
28 | |||
29 | #include <linux/irqflags.h> | ||
30 | #include <linux/kallsyms.h> | ||
31 | #include <linux/notifier.h> | ||
32 | #include <linux/kprobes.h> | ||
33 | #include <linux/kdebug.h> | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/percpu.h> | ||
37 | #include <linux/sched.h> | ||
38 | #include <linux/init.h> | ||
39 | #include <linux/smp.h> | ||
40 | |||
41 | #include <linux/hw_breakpoint.h> | ||
42 | |||
43 | #include <asm/processor.h> | ||
44 | |||
45 | #ifdef CONFIG_X86 | ||
46 | #include <asm/debugreg.h> | ||
47 | #endif | ||
48 | |||
49 | /* | ||
50 | * Constraints data | ||
51 | */ | ||
52 | |||
53 | /* Number of pinned cpu breakpoints in a cpu */ | ||
54 | static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); | ||
55 | |||
56 | /* Number of pinned task breakpoints in a cpu */ | ||
57 | static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); | ||
58 | |||
59 | /* Number of non-pinned cpu/task breakpoints in a cpu */ | ||
60 | static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); | ||
61 | |||
62 | /* Gather the number of total pinned and un-pinned bp in a cpuset */ | ||
63 | struct bp_busy_slots { | ||
64 | unsigned int pinned; | ||
65 | unsigned int flexible; | ||
66 | }; | ||
67 | |||
68 | /* Serialize accesses to the above constraints */ | ||
69 | static DEFINE_MUTEX(nr_bp_mutex); | ||
70 | |||
71 | /* | ||
72 | * Report the maximum number of pinned breakpoints a task | ||
73 | * have in this cpu | ||
74 | */ | ||
75 | static unsigned int max_task_bp_pinned(int cpu) | ||
76 | { | ||
77 | int i; | ||
78 | unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); | ||
79 | |||
80 | for (i = HBP_NUM -1; i >= 0; i--) { | ||
81 | if (tsk_pinned[i] > 0) | ||
82 | return i + 1; | ||
83 | } | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Report the number of pinned/un-pinned breakpoints we have in | ||
90 | * a given cpu (cpu > -1) or in all of them (cpu = -1). | ||
91 | */ | ||
92 | static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) | ||
93 | { | ||
94 | if (cpu >= 0) { | ||
95 | slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); | ||
96 | slots->pinned += max_task_bp_pinned(cpu); | ||
97 | slots->flexible = per_cpu(nr_bp_flexible, cpu); | ||
98 | |||
99 | return; | ||
100 | } | ||
101 | |||
102 | for_each_online_cpu(cpu) { | ||
103 | unsigned int nr; | ||
104 | |||
105 | nr = per_cpu(nr_cpu_bp_pinned, cpu); | ||
106 | nr += max_task_bp_pinned(cpu); | ||
107 | |||
108 | if (nr > slots->pinned) | ||
109 | slots->pinned = nr; | ||
110 | |||
111 | nr = per_cpu(nr_bp_flexible, cpu); | ||
112 | |||
113 | if (nr > slots->flexible) | ||
114 | slots->flexible = nr; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * Add a pinned breakpoint for the given task in our constraint table | ||
120 | */ | ||
121 | static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) | ||
122 | { | ||
123 | int count = 0; | ||
124 | struct perf_event *bp; | ||
125 | struct perf_event_context *ctx = tsk->perf_event_ctxp; | ||
126 | unsigned int *task_bp_pinned; | ||
127 | struct list_head *list; | ||
128 | unsigned long flags; | ||
129 | |||
130 | if (WARN_ONCE(!ctx, "No perf context for this task")) | ||
131 | return; | ||
132 | |||
133 | list = &ctx->event_list; | ||
134 | |||
135 | spin_lock_irqsave(&ctx->lock, flags); | ||
136 | |||
137 | /* | ||
138 | * The current breakpoint counter is not included in the list | ||
139 | * at the open() callback time | ||
140 | */ | ||
141 | list_for_each_entry(bp, list, event_entry) { | ||
142 | if (bp->attr.type == PERF_TYPE_BREAKPOINT) | ||
143 | count++; | ||
144 | } | ||
145 | |||
146 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
147 | |||
148 | if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) | ||
149 | return; | ||
150 | |||
151 | task_bp_pinned = per_cpu(task_bp_pinned, cpu); | ||
152 | if (enable) { | ||
153 | task_bp_pinned[count]++; | ||
154 | if (count > 0) | ||
155 | task_bp_pinned[count-1]--; | ||
156 | } else { | ||
157 | task_bp_pinned[count]--; | ||
158 | if (count > 0) | ||
159 | task_bp_pinned[count-1]++; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Add/remove the given breakpoint in our constraint table | ||
165 | */ | ||
166 | static void toggle_bp_slot(struct perf_event *bp, bool enable) | ||
167 | { | ||
168 | int cpu = bp->cpu; | ||
169 | struct task_struct *tsk = bp->ctx->task; | ||
170 | |||
171 | /* Pinned counter task profiling */ | ||
172 | if (tsk) { | ||
173 | if (cpu >= 0) { | ||
174 | toggle_bp_task_slot(tsk, cpu, enable); | ||
175 | return; | ||
176 | } | ||
177 | |||
178 | for_each_online_cpu(cpu) | ||
179 | toggle_bp_task_slot(tsk, cpu, enable); | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | /* Pinned counter cpu profiling */ | ||
184 | if (enable) | ||
185 | per_cpu(nr_cpu_bp_pinned, bp->cpu)++; | ||
186 | else | ||
187 | per_cpu(nr_cpu_bp_pinned, bp->cpu)--; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Contraints to check before allowing this new breakpoint counter: | ||
192 | * | ||
193 | * == Non-pinned counter == (Considered as pinned for now) | ||
194 | * | ||
195 | * - If attached to a single cpu, check: | ||
196 | * | ||
197 | * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) | ||
198 | * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM | ||
199 | * | ||
200 | * -> If there are already non-pinned counters in this cpu, it means | ||
201 | * there is already a free slot for them. | ||
202 | * Otherwise, we check that the maximum number of per task | ||
203 | * breakpoints (for this cpu) plus the number of per cpu breakpoint | ||
204 | * (for this cpu) doesn't cover every registers. | ||
205 | * | ||
206 | * - If attached to every cpus, check: | ||
207 | * | ||
208 | * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) | ||
209 | * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM | ||
210 | * | ||
211 | * -> This is roughly the same, except we check the number of per cpu | ||
212 | * bp for every cpu and we keep the max one. Same for the per tasks | ||
213 | * breakpoints. | ||
214 | * | ||
215 | * | ||
216 | * == Pinned counter == | ||
217 | * | ||
218 | * - If attached to a single cpu, check: | ||
219 | * | ||
220 | * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) | ||
221 | * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM | ||
222 | * | ||
223 | * -> Same checks as before. But now the nr_bp_flexible, if any, must keep | ||
224 | * one register at least (or they will never be fed). | ||
225 | * | ||
226 | * - If attached to every cpus, check: | ||
227 | * | ||
228 | * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) | ||
229 | * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM | ||
230 | */ | ||
231 | int reserve_bp_slot(struct perf_event *bp) | ||
232 | { | ||
233 | struct bp_busy_slots slots = {0}; | ||
234 | int ret = 0; | ||
235 | |||
236 | mutex_lock(&nr_bp_mutex); | ||
237 | |||
238 | fetch_bp_busy_slots(&slots, bp->cpu); | ||
239 | |||
240 | /* Flexible counters need to keep at least one slot */ | ||
241 | if (slots.pinned + (!!slots.flexible) == HBP_NUM) { | ||
242 | ret = -ENOSPC; | ||
243 | goto end; | ||
244 | } | ||
245 | |||
246 | toggle_bp_slot(bp, true); | ||
247 | |||
248 | end: | ||
249 | mutex_unlock(&nr_bp_mutex); | ||
250 | |||
251 | return ret; | ||
252 | } | ||
253 | |||
254 | void release_bp_slot(struct perf_event *bp) | ||
255 | { | ||
256 | mutex_lock(&nr_bp_mutex); | ||
257 | |||
258 | toggle_bp_slot(bp, false); | ||
259 | |||
260 | mutex_unlock(&nr_bp_mutex); | ||
261 | } | ||
262 | |||
263 | |||
264 | int __register_perf_hw_breakpoint(struct perf_event *bp) | ||
265 | { | ||
266 | int ret; | ||
267 | |||
268 | ret = reserve_bp_slot(bp); | ||
269 | if (ret) | ||
270 | return ret; | ||
271 | |||
272 | if (!bp->attr.disabled) | ||
273 | ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); | ||
274 | |||
275 | return ret; | ||
276 | } | ||
277 | |||
278 | int register_perf_hw_breakpoint(struct perf_event *bp) | ||
279 | { | ||
280 | bp->callback = perf_bp_event; | ||
281 | |||
282 | return __register_perf_hw_breakpoint(bp); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Register a breakpoint bound to a task and a given cpu. | ||
287 | * If cpu is -1, the breakpoint is active for the task in every cpu | ||
288 | * If the task is -1, the breakpoint is active for every tasks in the given | ||
289 | * cpu. | ||
290 | */ | ||
291 | static struct perf_event * | ||
292 | register_user_hw_breakpoint_cpu(unsigned long addr, | ||
293 | int len, | ||
294 | int type, | ||
295 | perf_callback_t triggered, | ||
296 | pid_t pid, | ||
297 | int cpu, | ||
298 | bool active) | ||
299 | { | ||
300 | struct perf_event_attr *attr; | ||
301 | struct perf_event *bp; | ||
302 | |||
303 | attr = kzalloc(sizeof(*attr), GFP_KERNEL); | ||
304 | if (!attr) | ||
305 | return ERR_PTR(-ENOMEM); | ||
306 | |||
307 | attr->type = PERF_TYPE_BREAKPOINT; | ||
308 | attr->size = sizeof(*attr); | ||
309 | attr->bp_addr = addr; | ||
310 | attr->bp_len = len; | ||
311 | attr->bp_type = type; | ||
312 | /* | ||
313 | * Such breakpoints are used by debuggers to trigger signals when | ||
314 | * we hit the excepted memory op. We can't miss such events, they | ||
315 | * must be pinned. | ||
316 | */ | ||
317 | attr->pinned = 1; | ||
318 | |||
319 | if (!active) | ||
320 | attr->disabled = 1; | ||
321 | |||
322 | bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); | ||
323 | kfree(attr); | ||
324 | |||
325 | return bp; | ||
326 | } | ||
327 | |||
328 | /** | ||
329 | * register_user_hw_breakpoint - register a hardware breakpoint for user space | ||
330 | * @addr: is the memory address that triggers the breakpoint | ||
331 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
332 | * @type: the type of the access to the memory (read/write/exec) | ||
333 | * @triggered: callback to trigger when we hit the breakpoint | ||
334 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
335 | * @active: should we activate it while registering it | ||
336 | * | ||
337 | */ | ||
338 | struct perf_event * | ||
339 | register_user_hw_breakpoint(unsigned long addr, | ||
340 | int len, | ||
341 | int type, | ||
342 | perf_callback_t triggered, | ||
343 | struct task_struct *tsk, | ||
344 | bool active) | ||
345 | { | ||
346 | return register_user_hw_breakpoint_cpu(addr, len, type, triggered, | ||
347 | tsk->pid, -1, active); | ||
348 | } | ||
349 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | ||
350 | |||
351 | /** | ||
352 | * modify_user_hw_breakpoint - modify a user-space hardware breakpoint | ||
353 | * @bp: the breakpoint structure to modify | ||
354 | * @addr: is the memory address that triggers the breakpoint | ||
355 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
356 | * @type: the type of the access to the memory (read/write/exec) | ||
357 | * @triggered: callback to trigger when we hit the breakpoint | ||
358 | * @tsk: pointer to 'task_struct' of the process to which the address belongs | ||
359 | * @active: should we activate it while registering it | ||
360 | */ | ||
361 | struct perf_event * | ||
362 | modify_user_hw_breakpoint(struct perf_event *bp, | ||
363 | unsigned long addr, | ||
364 | int len, | ||
365 | int type, | ||
366 | perf_callback_t triggered, | ||
367 | struct task_struct *tsk, | ||
368 | bool active) | ||
369 | { | ||
370 | /* | ||
371 | * FIXME: do it without unregistering | ||
372 | * - We don't want to lose our slot | ||
373 | * - If the new bp is incorrect, don't lose the older one | ||
374 | */ | ||
375 | unregister_hw_breakpoint(bp); | ||
376 | |||
377 | return register_user_hw_breakpoint(addr, len, type, triggered, | ||
378 | tsk, active); | ||
379 | } | ||
380 | EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); | ||
381 | |||
382 | /** | ||
383 | * unregister_hw_breakpoint - unregister a user-space hardware breakpoint | ||
384 | * @bp: the breakpoint structure to unregister | ||
385 | */ | ||
386 | void unregister_hw_breakpoint(struct perf_event *bp) | ||
387 | { | ||
388 | if (!bp) | ||
389 | return; | ||
390 | perf_event_release_kernel(bp); | ||
391 | } | ||
392 | EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); | ||
393 | |||
394 | static struct perf_event * | ||
395 | register_kernel_hw_breakpoint_cpu(unsigned long addr, | ||
396 | int len, | ||
397 | int type, | ||
398 | perf_callback_t triggered, | ||
399 | int cpu, | ||
400 | bool active) | ||
401 | { | ||
402 | return register_user_hw_breakpoint_cpu(addr, len, type, triggered, | ||
403 | -1, cpu, active); | ||
404 | } | ||
405 | |||
406 | /** | ||
407 | * register_wide_hw_breakpoint - register a wide breakpoint in the kernel | ||
408 | * @addr: is the memory address that triggers the breakpoint | ||
409 | * @len: the length of the access to the memory (1 byte, 2 bytes etc...) | ||
410 | * @type: the type of the access to the memory (read/write/exec) | ||
411 | * @triggered: callback to trigger when we hit the breakpoint | ||
412 | * @active: should we activate it while registering it | ||
413 | * | ||
414 | * @return a set of per_cpu pointers to perf events | ||
415 | */ | ||
416 | struct perf_event ** | ||
417 | register_wide_hw_breakpoint(unsigned long addr, | ||
418 | int len, | ||
419 | int type, | ||
420 | perf_callback_t triggered, | ||
421 | bool active) | ||
422 | { | ||
423 | struct perf_event **cpu_events, **pevent, *bp; | ||
424 | long err; | ||
425 | int cpu; | ||
426 | |||
427 | cpu_events = alloc_percpu(typeof(*cpu_events)); | ||
428 | if (!cpu_events) | ||
429 | return ERR_PTR(-ENOMEM); | ||
430 | |||
431 | for_each_possible_cpu(cpu) { | ||
432 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
433 | bp = register_kernel_hw_breakpoint_cpu(addr, len, type, | ||
434 | triggered, cpu, active); | ||
435 | |||
436 | *pevent = bp; | ||
437 | |||
438 | if (IS_ERR(bp) || !bp) { | ||
439 | err = PTR_ERR(bp); | ||
440 | goto fail; | ||
441 | } | ||
442 | } | ||
443 | |||
444 | return cpu_events; | ||
445 | |||
446 | fail: | ||
447 | for_each_possible_cpu(cpu) { | ||
448 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
449 | if (IS_ERR(*pevent) || !*pevent) | ||
450 | break; | ||
451 | unregister_hw_breakpoint(*pevent); | ||
452 | } | ||
453 | free_percpu(cpu_events); | ||
454 | /* return the error if any */ | ||
455 | return ERR_PTR(err); | ||
456 | } | ||
457 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | ||
458 | |||
459 | /** | ||
460 | * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel | ||
461 | * @cpu_events: the per cpu set of events to unregister | ||
462 | */ | ||
463 | void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) | ||
464 | { | ||
465 | int cpu; | ||
466 | struct perf_event **pevent; | ||
467 | |||
468 | for_each_possible_cpu(cpu) { | ||
469 | pevent = per_cpu_ptr(cpu_events, cpu); | ||
470 | unregister_hw_breakpoint(*pevent); | ||
471 | } | ||
472 | free_percpu(cpu_events); | ||
473 | } | ||
474 | EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); | ||
475 | |||
476 | static struct notifier_block hw_breakpoint_exceptions_nb = { | ||
477 | .notifier_call = hw_breakpoint_exceptions_notify, | ||
478 | /* we need to be notified first */ | ||
479 | .priority = 0x7fffffff | ||
480 | }; | ||
481 | |||
482 | static int __init init_hw_breakpoint(void) | ||
483 | { | ||
484 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | ||
485 | } | ||
486 | core_initcall(init_hw_breakpoint); | ||
487 | |||
488 | |||
489 | struct pmu perf_ops_bp = { | ||
490 | .enable = arch_install_hw_breakpoint, | ||
491 | .disable = arch_uninstall_hw_breakpoint, | ||
492 | .read = hw_breakpoint_pmu_read, | ||
493 | .unthrottle = hw_breakpoint_pmu_unthrottle | ||
494 | }; | ||
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8b6b8b697c68..8e5288a8a355 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name) | |||
181 | } | 181 | } |
182 | return module_kallsyms_lookup_name(name); | 182 | return module_kallsyms_lookup_name(name); |
183 | } | 183 | } |
184 | EXPORT_SYMBOL_GPL(kallsyms_lookup_name); | ||
184 | 185 | ||
185 | int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, | 186 | int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, |
186 | unsigned long), | 187 | unsigned long), |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3256e36ad251..3852e2656bb0 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/kernel_stat.h> | 29 | #include <linux/kernel_stat.h> |
30 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/ftrace_event.h> | 31 | #include <linux/ftrace_event.h> |
32 | #include <linux/hw_breakpoint.h> | ||
32 | 33 | ||
33 | #include <asm/irq_regs.h> | 34 | #include <asm/irq_regs.h> |
34 | 35 | ||
@@ -1725,6 +1726,26 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1725 | return 0; | 1726 | return 0; |
1726 | } | 1727 | } |
1727 | 1728 | ||
1729 | int perf_event_release_kernel(struct perf_event *event) | ||
1730 | { | ||
1731 | struct perf_event_context *ctx = event->ctx; | ||
1732 | |||
1733 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1734 | mutex_lock(&ctx->mutex); | ||
1735 | perf_event_remove_from_context(event); | ||
1736 | mutex_unlock(&ctx->mutex); | ||
1737 | |||
1738 | mutex_lock(&event->owner->perf_event_mutex); | ||
1739 | list_del_init(&event->owner_entry); | ||
1740 | mutex_unlock(&event->owner->perf_event_mutex); | ||
1741 | put_task_struct(event->owner); | ||
1742 | |||
1743 | free_event(event); | ||
1744 | |||
1745 | return 0; | ||
1746 | } | ||
1747 | EXPORT_SYMBOL_GPL(perf_event_release_kernel); | ||
1748 | |||
1728 | static int perf_event_read_size(struct perf_event *event) | 1749 | static int perf_event_read_size(struct perf_event *event) |
1729 | { | 1750 | { |
1730 | int entry = sizeof(u64); /* value */ | 1751 | int entry = sizeof(u64); /* value */ |
@@ -1750,7 +1771,7 @@ static int perf_event_read_size(struct perf_event *event) | |||
1750 | return size; | 1771 | return size; |
1751 | } | 1772 | } |
1752 | 1773 | ||
1753 | static u64 perf_event_read_value(struct perf_event *event) | 1774 | u64 perf_event_read_value(struct perf_event *event) |
1754 | { | 1775 | { |
1755 | struct perf_event *child; | 1776 | struct perf_event *child; |
1756 | u64 total = 0; | 1777 | u64 total = 0; |
@@ -1761,6 +1782,7 @@ static u64 perf_event_read_value(struct perf_event *event) | |||
1761 | 1782 | ||
1762 | return total; | 1783 | return total; |
1763 | } | 1784 | } |
1785 | EXPORT_SYMBOL_GPL(perf_event_read_value); | ||
1764 | 1786 | ||
1765 | static int perf_event_read_entry(struct perf_event *event, | 1787 | static int perf_event_read_entry(struct perf_event *event, |
1766 | u64 read_format, char __user *buf) | 1788 | u64 read_format, char __user *buf) |
@@ -4231,6 +4253,51 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4231 | 4253 | ||
4232 | #endif /* CONFIG_EVENT_PROFILE */ | 4254 | #endif /* CONFIG_EVENT_PROFILE */ |
4233 | 4255 | ||
4256 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
4257 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4258 | { | ||
4259 | release_bp_slot(event); | ||
4260 | } | ||
4261 | |||
4262 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4263 | { | ||
4264 | int err; | ||
4265 | /* | ||
4266 | * The breakpoint is already filled if we haven't created the counter | ||
4267 | * through perf syscall | ||
4268 | * FIXME: manage to get trigerred to NULL if it comes from syscalls | ||
4269 | */ | ||
4270 | if (!bp->callback) | ||
4271 | err = register_perf_hw_breakpoint(bp); | ||
4272 | else | ||
4273 | err = __register_perf_hw_breakpoint(bp); | ||
4274 | if (err) | ||
4275 | return ERR_PTR(err); | ||
4276 | |||
4277 | bp->destroy = bp_perf_event_destroy; | ||
4278 | |||
4279 | return &perf_ops_bp; | ||
4280 | } | ||
4281 | |||
4282 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4283 | { | ||
4284 | /* TODO */ | ||
4285 | } | ||
4286 | #else | ||
4287 | static void bp_perf_event_destroy(struct perf_event *event) | ||
4288 | { | ||
4289 | } | ||
4290 | |||
4291 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | ||
4292 | { | ||
4293 | return NULL; | ||
4294 | } | ||
4295 | |||
4296 | void perf_bp_event(struct perf_event *bp, void *regs) | ||
4297 | { | ||
4298 | } | ||
4299 | #endif | ||
4300 | |||
4234 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4301 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
4235 | 4302 | ||
4236 | static void sw_perf_event_destroy(struct perf_event *event) | 4303 | static void sw_perf_event_destroy(struct perf_event *event) |
@@ -4297,6 +4364,7 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4297 | struct perf_event_context *ctx, | 4364 | struct perf_event_context *ctx, |
4298 | struct perf_event *group_leader, | 4365 | struct perf_event *group_leader, |
4299 | struct perf_event *parent_event, | 4366 | struct perf_event *parent_event, |
4367 | perf_callback_t callback, | ||
4300 | gfp_t gfpflags) | 4368 | gfp_t gfpflags) |
4301 | { | 4369 | { |
4302 | const struct pmu *pmu; | 4370 | const struct pmu *pmu; |
@@ -4339,6 +4407,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4339 | 4407 | ||
4340 | event->state = PERF_EVENT_STATE_INACTIVE; | 4408 | event->state = PERF_EVENT_STATE_INACTIVE; |
4341 | 4409 | ||
4410 | if (!callback && parent_event) | ||
4411 | callback = parent_event->callback; | ||
4412 | |||
4413 | event->callback = callback; | ||
4414 | |||
4342 | if (attr->disabled) | 4415 | if (attr->disabled) |
4343 | event->state = PERF_EVENT_STATE_OFF; | 4416 | event->state = PERF_EVENT_STATE_OFF; |
4344 | 4417 | ||
@@ -4373,6 +4446,11 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4373 | pmu = tp_perf_event_init(event); | 4446 | pmu = tp_perf_event_init(event); |
4374 | break; | 4447 | break; |
4375 | 4448 | ||
4449 | case PERF_TYPE_BREAKPOINT: | ||
4450 | pmu = bp_perf_event_init(event); | ||
4451 | break; | ||
4452 | |||
4453 | |||
4376 | default: | 4454 | default: |
4377 | break; | 4455 | break; |
4378 | } | 4456 | } |
@@ -4615,7 +4693,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
4615 | } | 4693 | } |
4616 | 4694 | ||
4617 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | 4695 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, |
4618 | NULL, GFP_KERNEL); | 4696 | NULL, NULL, GFP_KERNEL); |
4619 | err = PTR_ERR(event); | 4697 | err = PTR_ERR(event); |
4620 | if (IS_ERR(event)) | 4698 | if (IS_ERR(event)) |
4621 | goto err_put_context; | 4699 | goto err_put_context; |
@@ -4663,6 +4741,58 @@ err_put_context: | |||
4663 | return err; | 4741 | return err; |
4664 | } | 4742 | } |
4665 | 4743 | ||
4744 | /** | ||
4745 | * perf_event_create_kernel_counter | ||
4746 | * | ||
4747 | * @attr: attributes of the counter to create | ||
4748 | * @cpu: cpu in which the counter is bound | ||
4749 | * @pid: task to profile | ||
4750 | */ | ||
4751 | struct perf_event * | ||
4752 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | ||
4753 | pid_t pid, perf_callback_t callback) | ||
4754 | { | ||
4755 | struct perf_event *event; | ||
4756 | struct perf_event_context *ctx; | ||
4757 | int err; | ||
4758 | |||
4759 | /* | ||
4760 | * Get the target context (task or percpu): | ||
4761 | */ | ||
4762 | |||
4763 | ctx = find_get_context(pid, cpu); | ||
4764 | if (IS_ERR(ctx)) | ||
4765 | return NULL; | ||
4766 | |||
4767 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
4768 | NULL, callback, GFP_KERNEL); | ||
4769 | err = PTR_ERR(event); | ||
4770 | if (IS_ERR(event)) | ||
4771 | goto err_put_context; | ||
4772 | |||
4773 | event->filp = NULL; | ||
4774 | WARN_ON_ONCE(ctx->parent_ctx); | ||
4775 | mutex_lock(&ctx->mutex); | ||
4776 | perf_install_in_context(ctx, event, cpu); | ||
4777 | ++ctx->generation; | ||
4778 | mutex_unlock(&ctx->mutex); | ||
4779 | |||
4780 | event->owner = current; | ||
4781 | get_task_struct(current); | ||
4782 | mutex_lock(¤t->perf_event_mutex); | ||
4783 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
4784 | mutex_unlock(¤t->perf_event_mutex); | ||
4785 | |||
4786 | return event; | ||
4787 | |||
4788 | err_put_context: | ||
4789 | if (err < 0) | ||
4790 | put_ctx(ctx); | ||
4791 | |||
4792 | return NULL; | ||
4793 | } | ||
4794 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | ||
4795 | |||
4666 | /* | 4796 | /* |
4667 | * inherit a event from parent task to child task: | 4797 | * inherit a event from parent task to child task: |
4668 | */ | 4798 | */ |
@@ -4688,7 +4818,7 @@ inherit_event(struct perf_event *parent_event, | |||
4688 | child_event = perf_event_alloc(&parent_event->attr, | 4818 | child_event = perf_event_alloc(&parent_event->attr, |
4689 | parent_event->cpu, child_ctx, | 4819 | parent_event->cpu, child_ctx, |
4690 | group_leader, parent_event, | 4820 | group_leader, parent_event, |
4691 | GFP_KERNEL); | 4821 | NULL, GFP_KERNEL); |
4692 | if (IS_ERR(child_event)) | 4822 | if (IS_ERR(child_event)) |
4693 | return child_event; | 4823 | return child_event; |
4694 | get_ctx(child_ctx); | 4824 | get_ctx(child_ctx); |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f05671609a89..d006554888dc 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -339,6 +339,27 @@ config POWER_TRACER | |||
339 | power management decisions, specifically the C-state and P-state | 339 | power management decisions, specifically the C-state and P-state |
340 | behavior. | 340 | behavior. |
341 | 341 | ||
342 | config KSYM_TRACER | ||
343 | bool "Trace read and write access on kernel memory locations" | ||
344 | depends on HAVE_HW_BREAKPOINT | ||
345 | select TRACING | ||
346 | help | ||
347 | This tracer helps find read and write operations on any given kernel | ||
348 | symbol i.e. /proc/kallsyms. | ||
349 | |||
350 | config PROFILE_KSYM_TRACER | ||
351 | bool "Profile all kernel memory accesses on 'watched' variables" | ||
352 | depends on KSYM_TRACER | ||
353 | help | ||
354 | This tracer profiles kernel accesses on variables watched through the | ||
355 | ksym tracer ftrace plugin. Depending upon the hardware, all read | ||
356 | and write operations on kernel variables can be monitored for | ||
357 | accesses. | ||
358 | |||
359 | The results will be displayed in: | ||
360 | /debugfs/tracing/profile_ksym | ||
361 | |||
362 | Say N if unsure. | ||
342 | 363 | ||
343 | config STACK_TRACER | 364 | config STACK_TRACER |
344 | bool "Trace max stack" | 365 | bool "Trace max stack" |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index edc3a3cca1a1..cd9ecd89ec77 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | |||
54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o |
55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
57 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o | ||
57 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o | 58 | obj-$(CONFIG_EVENT_TRACING) += power-traces.o |
58 | 59 | ||
59 | libftrace-y := ftrace.o | 60 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b4e4212e66d7..4da6ede74401 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <trace/boot.h> | 12 | #include <trace/boot.h> |
13 | #include <linux/kmemtrace.h> | 13 | #include <linux/kmemtrace.h> |
14 | #include <linux/hw_breakpoint.h> | ||
14 | 15 | ||
15 | #include <linux/trace_seq.h> | 16 | #include <linux/trace_seq.h> |
16 | #include <linux/ftrace_event.h> | 17 | #include <linux/ftrace_event.h> |
@@ -37,6 +38,7 @@ enum trace_type { | |||
37 | TRACE_KMEM_ALLOC, | 38 | TRACE_KMEM_ALLOC, |
38 | TRACE_KMEM_FREE, | 39 | TRACE_KMEM_FREE, |
39 | TRACE_BLK, | 40 | TRACE_BLK, |
41 | TRACE_KSYM, | ||
40 | 42 | ||
41 | __TRACE_LAST_TYPE, | 43 | __TRACE_LAST_TYPE, |
42 | }; | 44 | }; |
@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void); | |||
232 | TRACE_KMEM_ALLOC); \ | 234 | TRACE_KMEM_ALLOC); \ |
233 | IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ | 235 | IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ |
234 | TRACE_KMEM_FREE); \ | 236 | TRACE_KMEM_FREE); \ |
237 | IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ | ||
235 | __ftrace_bad_type(); \ | 238 | __ftrace_bad_type(); \ |
236 | } while (0) | 239 | } while (0) |
237 | 240 | ||
@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type); | |||
387 | void unregister_tracer(struct tracer *type); | 390 | void unregister_tracer(struct tracer *type); |
388 | int is_tracing_stopped(void); | 391 | int is_tracing_stopped(void); |
389 | 392 | ||
393 | extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); | ||
394 | |||
390 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); | 395 | extern unsigned long nsecs_to_usecs(unsigned long nsecs); |
391 | 396 | ||
392 | #ifdef CONFIG_TRACER_MAX_TRACE | 397 | #ifdef CONFIG_TRACER_MAX_TRACE |
@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, | |||
461 | struct trace_array *tr); | 466 | struct trace_array *tr); |
462 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, | 467 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, |
463 | struct trace_array *tr); | 468 | struct trace_array *tr); |
469 | extern int trace_selftest_startup_ksym(struct tracer *trace, | ||
470 | struct trace_array *tr); | ||
464 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 471 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
465 | 472 | ||
466 | extern void *head_page(struct trace_array_cpu *data); | 473 | extern void *head_page(struct trace_array_cpu *data); |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index ead3d724599d..c16a08f399df 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, | |||
364 | F_printk("type:%u call_site:%lx ptr:%p", | 364 | F_printk("type:%u call_site:%lx ptr:%p", |
365 | __entry->type_id, __entry->call_site, __entry->ptr) | 365 | __entry->type_id, __entry->call_site, __entry->ptr) |
366 | ); | 366 | ); |
367 | |||
368 | FTRACE_ENTRY(ksym_trace, ksym_trace_entry, | ||
369 | |||
370 | TRACE_KSYM, | ||
371 | |||
372 | F_STRUCT( | ||
373 | __field( unsigned long, ip ) | ||
374 | __field( unsigned char, type ) | ||
375 | __array( char , cmd, TASK_COMM_LEN ) | ||
376 | __field( unsigned long, addr ) | ||
377 | ), | ||
378 | |||
379 | F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", | ||
380 | (void *)__entry->ip, (unsigned int)__entry->type, | ||
381 | (void *)__entry->addr, __entry->cmd) | ||
382 | ); | ||
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c new file mode 100644 index 000000000000..11935b53a6cb --- /dev/null +++ b/kernel/trace/trace_ksym.c | |||
@@ -0,0 +1,554 @@ | |||
1 | /* | ||
2 | * trace_ksym.c - Kernel Symbol Tracer | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2009 | ||
19 | */ | ||
20 | |||
21 | #include <linux/kallsyms.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/debugfs.h> | ||
24 | #include <linux/ftrace.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #include "trace_output.h" | ||
29 | #include "trace_stat.h" | ||
30 | #include "trace.h" | ||
31 | |||
32 | #include <linux/hw_breakpoint.h> | ||
33 | #include <asm/hw_breakpoint.h> | ||
34 | |||
35 | /* | ||
36 | * For now, let us restrict the no. of symbols traced simultaneously to number | ||
37 | * of available hardware breakpoint registers. | ||
38 | */ | ||
39 | #define KSYM_TRACER_MAX HBP_NUM | ||
40 | |||
41 | #define KSYM_TRACER_OP_LEN 3 /* rw- */ | ||
42 | |||
43 | struct trace_ksym { | ||
44 | struct perf_event **ksym_hbp; | ||
45 | unsigned long ksym_addr; | ||
46 | int type; | ||
47 | int len; | ||
48 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
49 | unsigned long counter; | ||
50 | #endif | ||
51 | struct hlist_node ksym_hlist; | ||
52 | }; | ||
53 | |||
54 | static struct trace_array *ksym_trace_array; | ||
55 | |||
56 | static unsigned int ksym_filter_entry_count; | ||
57 | static unsigned int ksym_tracing_enabled; | ||
58 | |||
59 | static HLIST_HEAD(ksym_filter_head); | ||
60 | |||
61 | static DEFINE_MUTEX(ksym_tracer_mutex); | ||
62 | |||
63 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
64 | |||
65 | #define MAX_UL_INT 0xffffffff | ||
66 | |||
67 | void ksym_collect_stats(unsigned long hbp_hit_addr) | ||
68 | { | ||
69 | struct hlist_node *node; | ||
70 | struct trace_ksym *entry; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { | ||
74 | if ((entry->ksym_addr == hbp_hit_addr) && | ||
75 | (entry->counter <= MAX_UL_INT)) { | ||
76 | entry->counter++; | ||
77 | break; | ||
78 | } | ||
79 | } | ||
80 | rcu_read_unlock(); | ||
81 | } | ||
82 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
83 | |||
84 | void ksym_hbp_handler(struct perf_event *hbp, void *data) | ||
85 | { | ||
86 | struct ring_buffer_event *event; | ||
87 | struct ksym_trace_entry *entry; | ||
88 | struct pt_regs *regs = data; | ||
89 | struct ring_buffer *buffer; | ||
90 | int pc; | ||
91 | |||
92 | if (!ksym_tracing_enabled) | ||
93 | return; | ||
94 | |||
95 | buffer = ksym_trace_array->buffer; | ||
96 | |||
97 | pc = preempt_count(); | ||
98 | |||
99 | event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, | ||
100 | sizeof(*entry), 0, pc); | ||
101 | if (!event) | ||
102 | return; | ||
103 | |||
104 | entry = ring_buffer_event_data(event); | ||
105 | entry->ip = instruction_pointer(regs); | ||
106 | entry->type = hw_breakpoint_type(hbp); | ||
107 | entry->addr = hw_breakpoint_addr(hbp); | ||
108 | strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); | ||
109 | |||
110 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
111 | ksym_collect_stats(hw_breakpoint_addr(hbp)); | ||
112 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
113 | |||
114 | trace_buffer_unlock_commit(buffer, event, 0, pc); | ||
115 | } | ||
116 | |||
117 | /* Valid access types are represented as | ||
118 | * | ||
119 | * rw- : Set Read/Write Access Breakpoint | ||
120 | * -w- : Set Write Access Breakpoint | ||
121 | * --- : Clear Breakpoints | ||
122 | * --x : Set Execution Break points (Not available yet) | ||
123 | * | ||
124 | */ | ||
125 | static int ksym_trace_get_access_type(char *str) | ||
126 | { | ||
127 | int access = 0; | ||
128 | |||
129 | if (str[0] == 'r') | ||
130 | access |= HW_BREAKPOINT_R; | ||
131 | |||
132 | if (str[1] == 'w') | ||
133 | access |= HW_BREAKPOINT_W; | ||
134 | |||
135 | if (str[2] == 'x') | ||
136 | access |= HW_BREAKPOINT_X; | ||
137 | |||
138 | switch (access) { | ||
139 | case HW_BREAKPOINT_R: | ||
140 | case HW_BREAKPOINT_W: | ||
141 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
142 | return access; | ||
143 | default: | ||
144 | return -EINVAL; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * There can be several possible malformed requests and we attempt to capture | ||
150 | * all of them. We enumerate some of the rules | ||
151 | * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. | ||
152 | * i.e. multiple ':' symbols disallowed. Possible uses are of the form | ||
153 | * <module>:<ksym_name>:<op>. | ||
154 | * 2. No delimiter symbol ':' in the input string | ||
155 | * 3. Spurious operator symbols or symbols not in their respective positions | ||
156 | * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file | ||
157 | * 5. Kernel symbol not a part of /proc/kallsyms | ||
158 | * 6. Duplicate requests | ||
159 | */ | ||
160 | static int parse_ksym_trace_str(char *input_string, char **ksymname, | ||
161 | unsigned long *addr) | ||
162 | { | ||
163 | int ret; | ||
164 | |||
165 | *ksymname = strsep(&input_string, ":"); | ||
166 | *addr = kallsyms_lookup_name(*ksymname); | ||
167 | |||
168 | /* Check for malformed request: (2), (1) and (5) */ | ||
169 | if ((!input_string) || | ||
170 | (strlen(input_string) != KSYM_TRACER_OP_LEN) || | ||
171 | (*addr == 0)) | ||
172 | return -EINVAL;; | ||
173 | |||
174 | ret = ksym_trace_get_access_type(input_string); | ||
175 | |||
176 | return ret; | ||
177 | } | ||
178 | |||
179 | int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) | ||
180 | { | ||
181 | struct trace_ksym *entry; | ||
182 | int ret = -ENOMEM; | ||
183 | |||
184 | if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { | ||
185 | printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" | ||
186 | " new requests for tracing can be accepted now.\n", | ||
187 | KSYM_TRACER_MAX); | ||
188 | return -ENOSPC; | ||
189 | } | ||
190 | |||
191 | entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); | ||
192 | if (!entry) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | entry->type = op; | ||
196 | entry->ksym_addr = addr; | ||
197 | entry->len = HW_BREAKPOINT_LEN_4; | ||
198 | |||
199 | ret = -EAGAIN; | ||
200 | entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, | ||
201 | entry->len, entry->type, | ||
202 | ksym_hbp_handler, true); | ||
203 | if (IS_ERR(entry->ksym_hbp)) { | ||
204 | entry->ksym_hbp = NULL; | ||
205 | ret = PTR_ERR(entry->ksym_hbp); | ||
206 | } | ||
207 | |||
208 | if (!entry->ksym_hbp) { | ||
209 | printk(KERN_INFO "ksym_tracer request failed. Try again" | ||
210 | " later!!\n"); | ||
211 | goto err; | ||
212 | } | ||
213 | |||
214 | hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); | ||
215 | ksym_filter_entry_count++; | ||
216 | |||
217 | return 0; | ||
218 | |||
219 | err: | ||
220 | kfree(entry); | ||
221 | |||
222 | return ret; | ||
223 | } | ||
224 | |||
225 | static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, | ||
226 | size_t count, loff_t *ppos) | ||
227 | { | ||
228 | struct trace_ksym *entry; | ||
229 | struct hlist_node *node; | ||
230 | struct trace_seq *s; | ||
231 | ssize_t cnt = 0; | ||
232 | int ret; | ||
233 | |||
234 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
235 | if (!s) | ||
236 | return -ENOMEM; | ||
237 | trace_seq_init(s); | ||
238 | |||
239 | mutex_lock(&ksym_tracer_mutex); | ||
240 | |||
241 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
242 | ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); | ||
243 | if (entry->type == HW_BREAKPOINT_R) | ||
244 | ret = trace_seq_puts(s, "r--\n"); | ||
245 | else if (entry->type == HW_BREAKPOINT_W) | ||
246 | ret = trace_seq_puts(s, "-w-\n"); | ||
247 | else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) | ||
248 | ret = trace_seq_puts(s, "rw-\n"); | ||
249 | WARN_ON_ONCE(!ret); | ||
250 | } | ||
251 | |||
252 | cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | ||
253 | |||
254 | mutex_unlock(&ksym_tracer_mutex); | ||
255 | |||
256 | kfree(s); | ||
257 | |||
258 | return cnt; | ||
259 | } | ||
260 | |||
261 | static void __ksym_trace_reset(void) | ||
262 | { | ||
263 | struct trace_ksym *entry; | ||
264 | struct hlist_node *node, *node1; | ||
265 | |||
266 | mutex_lock(&ksym_tracer_mutex); | ||
267 | hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, | ||
268 | ksym_hlist) { | ||
269 | unregister_wide_hw_breakpoint(entry->ksym_hbp); | ||
270 | ksym_filter_entry_count--; | ||
271 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
272 | synchronize_rcu(); | ||
273 | kfree(entry); | ||
274 | } | ||
275 | mutex_unlock(&ksym_tracer_mutex); | ||
276 | } | ||
277 | |||
278 | static ssize_t ksym_trace_filter_write(struct file *file, | ||
279 | const char __user *buffer, | ||
280 | size_t count, loff_t *ppos) | ||
281 | { | ||
282 | struct trace_ksym *entry; | ||
283 | struct hlist_node *node; | ||
284 | char *input_string, *ksymname = NULL; | ||
285 | unsigned long ksym_addr = 0; | ||
286 | int ret, op, changed = 0; | ||
287 | |||
288 | input_string = kzalloc(count + 1, GFP_KERNEL); | ||
289 | if (!input_string) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | if (copy_from_user(input_string, buffer, count)) { | ||
293 | kfree(input_string); | ||
294 | return -EFAULT; | ||
295 | } | ||
296 | input_string[count] = '\0'; | ||
297 | |||
298 | strstrip(input_string); | ||
299 | |||
300 | /* | ||
301 | * Clear all breakpoints if: | ||
302 | * 1: echo > ksym_trace_filter | ||
303 | * 2: echo 0 > ksym_trace_filter | ||
304 | * 3: echo "*:---" > ksym_trace_filter | ||
305 | */ | ||
306 | if (!input_string[0] || !strcmp(input_string, "0") || | ||
307 | !strcmp(input_string, "*:---")) { | ||
308 | __ksym_trace_reset(); | ||
309 | kfree(input_string); | ||
310 | return count; | ||
311 | } | ||
312 | |||
313 | ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); | ||
314 | if (ret < 0) { | ||
315 | kfree(input_string); | ||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | mutex_lock(&ksym_tracer_mutex); | ||
320 | |||
321 | ret = -EINVAL; | ||
322 | hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { | ||
323 | if (entry->ksym_addr == ksym_addr) { | ||
324 | /* Check for malformed request: (6) */ | ||
325 | if (entry->type != op) | ||
326 | changed = 1; | ||
327 | else | ||
328 | goto out; | ||
329 | break; | ||
330 | } | ||
331 | } | ||
332 | if (changed) { | ||
333 | unregister_wide_hw_breakpoint(entry->ksym_hbp); | ||
334 | entry->type = op; | ||
335 | if (op > 0) { | ||
336 | entry->ksym_hbp = | ||
337 | register_wide_hw_breakpoint(entry->ksym_addr, | ||
338 | entry->len, entry->type, | ||
339 | ksym_hbp_handler, true); | ||
340 | if (IS_ERR(entry->ksym_hbp)) | ||
341 | entry->ksym_hbp = NULL; | ||
342 | if (!entry->ksym_hbp) | ||
343 | goto out; | ||
344 | } | ||
345 | ksym_filter_entry_count--; | ||
346 | hlist_del_rcu(&(entry->ksym_hlist)); | ||
347 | synchronize_rcu(); | ||
348 | kfree(entry); | ||
349 | ret = 0; | ||
350 | goto out; | ||
351 | } else { | ||
352 | /* Check for malformed request: (4) */ | ||
353 | if (op == 0) | ||
354 | goto out; | ||
355 | ret = process_new_ksym_entry(ksymname, op, ksym_addr); | ||
356 | } | ||
357 | out: | ||
358 | mutex_unlock(&ksym_tracer_mutex); | ||
359 | |||
360 | kfree(input_string); | ||
361 | |||
362 | if (!ret) | ||
363 | ret = count; | ||
364 | return ret; | ||
365 | } | ||
366 | |||
367 | static const struct file_operations ksym_tracing_fops = { | ||
368 | .open = tracing_open_generic, | ||
369 | .read = ksym_trace_filter_read, | ||
370 | .write = ksym_trace_filter_write, | ||
371 | }; | ||
372 | |||
373 | static void ksym_trace_reset(struct trace_array *tr) | ||
374 | { | ||
375 | ksym_tracing_enabled = 0; | ||
376 | __ksym_trace_reset(); | ||
377 | } | ||
378 | |||
379 | static int ksym_trace_init(struct trace_array *tr) | ||
380 | { | ||
381 | int cpu, ret = 0; | ||
382 | |||
383 | for_each_online_cpu(cpu) | ||
384 | tracing_reset(tr, cpu); | ||
385 | ksym_tracing_enabled = 1; | ||
386 | ksym_trace_array = tr; | ||
387 | |||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | static void ksym_trace_print_header(struct seq_file *m) | ||
392 | { | ||
393 | seq_puts(m, | ||
394 | "# TASK-PID CPU# Symbol " | ||
395 | "Type Function\n"); | ||
396 | seq_puts(m, | ||
397 | "# | | | " | ||
398 | " | |\n"); | ||
399 | } | ||
400 | |||
401 | static enum print_line_t ksym_trace_output(struct trace_iterator *iter) | ||
402 | { | ||
403 | struct trace_entry *entry = iter->ent; | ||
404 | struct trace_seq *s = &iter->seq; | ||
405 | struct ksym_trace_entry *field; | ||
406 | char str[KSYM_SYMBOL_LEN]; | ||
407 | int ret; | ||
408 | |||
409 | if (entry->type != TRACE_KSYM) | ||
410 | return TRACE_TYPE_UNHANDLED; | ||
411 | |||
412 | trace_assign_type(field, entry); | ||
413 | |||
414 | ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, | ||
415 | entry->pid, iter->cpu, (char *)field->addr); | ||
416 | if (!ret) | ||
417 | return TRACE_TYPE_PARTIAL_LINE; | ||
418 | |||
419 | switch (field->type) { | ||
420 | case HW_BREAKPOINT_R: | ||
421 | ret = trace_seq_printf(s, " R "); | ||
422 | break; | ||
423 | case HW_BREAKPOINT_W: | ||
424 | ret = trace_seq_printf(s, " W "); | ||
425 | break; | ||
426 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
427 | ret = trace_seq_printf(s, " RW "); | ||
428 | break; | ||
429 | default: | ||
430 | return TRACE_TYPE_PARTIAL_LINE; | ||
431 | } | ||
432 | |||
433 | if (!ret) | ||
434 | return TRACE_TYPE_PARTIAL_LINE; | ||
435 | |||
436 | sprint_symbol(str, field->ip); | ||
437 | ret = trace_seq_printf(s, "%s\n", str); | ||
438 | if (!ret) | ||
439 | return TRACE_TYPE_PARTIAL_LINE; | ||
440 | |||
441 | return TRACE_TYPE_HANDLED; | ||
442 | } | ||
443 | |||
444 | struct tracer ksym_tracer __read_mostly = | ||
445 | { | ||
446 | .name = "ksym_tracer", | ||
447 | .init = ksym_trace_init, | ||
448 | .reset = ksym_trace_reset, | ||
449 | #ifdef CONFIG_FTRACE_SELFTEST | ||
450 | .selftest = trace_selftest_startup_ksym, | ||
451 | #endif | ||
452 | .print_header = ksym_trace_print_header, | ||
453 | .print_line = ksym_trace_output | ||
454 | }; | ||
455 | |||
456 | __init static int init_ksym_trace(void) | ||
457 | { | ||
458 | struct dentry *d_tracer; | ||
459 | struct dentry *entry; | ||
460 | |||
461 | d_tracer = tracing_init_dentry(); | ||
462 | ksym_filter_entry_count = 0; | ||
463 | |||
464 | entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, | ||
465 | NULL, &ksym_tracing_fops); | ||
466 | if (!entry) | ||
467 | pr_warning("Could not create debugfs " | ||
468 | "'ksym_trace_filter' file\n"); | ||
469 | |||
470 | return register_tracer(&ksym_tracer); | ||
471 | } | ||
472 | device_initcall(init_ksym_trace); | ||
473 | |||
474 | |||
475 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
476 | static int ksym_tracer_stat_headers(struct seq_file *m) | ||
477 | { | ||
478 | seq_puts(m, " Access Type "); | ||
479 | seq_puts(m, " Symbol Counter\n"); | ||
480 | seq_puts(m, " ----------- "); | ||
481 | seq_puts(m, " ------ -------\n"); | ||
482 | return 0; | ||
483 | } | ||
484 | |||
485 | static int ksym_tracer_stat_show(struct seq_file *m, void *v) | ||
486 | { | ||
487 | struct hlist_node *stat = v; | ||
488 | struct trace_ksym *entry; | ||
489 | int access_type = 0; | ||
490 | char fn_name[KSYM_NAME_LEN]; | ||
491 | |||
492 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); | ||
493 | |||
494 | access_type = entry->type; | ||
495 | |||
496 | switch (access_type) { | ||
497 | case HW_BREAKPOINT_R: | ||
498 | seq_puts(m, " R "); | ||
499 | break; | ||
500 | case HW_BREAKPOINT_W: | ||
501 | seq_puts(m, " W "); | ||
502 | break; | ||
503 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
504 | seq_puts(m, " RW "); | ||
505 | break; | ||
506 | default: | ||
507 | seq_puts(m, " NA "); | ||
508 | } | ||
509 | |||
510 | if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) | ||
511 | seq_printf(m, " %-36s", fn_name); | ||
512 | else | ||
513 | seq_printf(m, " %-36s", "<NA>"); | ||
514 | seq_printf(m, " %15lu\n", entry->counter); | ||
515 | |||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static void *ksym_tracer_stat_start(struct tracer_stat *trace) | ||
520 | { | ||
521 | return ksym_filter_head.first; | ||
522 | } | ||
523 | |||
524 | static void * | ||
525 | ksym_tracer_stat_next(void *v, int idx) | ||
526 | { | ||
527 | struct hlist_node *stat = v; | ||
528 | |||
529 | return stat->next; | ||
530 | } | ||
531 | |||
532 | static struct tracer_stat ksym_tracer_stats = { | ||
533 | .name = "ksym_tracer", | ||
534 | .stat_start = ksym_tracer_stat_start, | ||
535 | .stat_next = ksym_tracer_stat_next, | ||
536 | .stat_headers = ksym_tracer_stat_headers, | ||
537 | .stat_show = ksym_tracer_stat_show | ||
538 | }; | ||
539 | |||
540 | __init static int ksym_tracer_stat_init(void) | ||
541 | { | ||
542 | int ret; | ||
543 | |||
544 | ret = register_stat_tracer(&ksym_tracer_stats); | ||
545 | if (ret) { | ||
546 | printk(KERN_WARNING "Warning: could not register " | ||
547 | "ksym tracer stats\n"); | ||
548 | return 1; | ||
549 | } | ||
550 | |||
551 | return 0; | ||
552 | } | ||
553 | fs_initcall(ksym_tracer_stat_init); | ||
554 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index d2cdbabb4ead..dc98309e839a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) | |||
17 | case TRACE_GRAPH_ENT: | 17 | case TRACE_GRAPH_ENT: |
18 | case TRACE_GRAPH_RET: | 18 | case TRACE_GRAPH_RET: |
19 | case TRACE_HW_BRANCHES: | 19 | case TRACE_HW_BRANCHES: |
20 | case TRACE_KSYM: | ||
20 | return 1; | 21 | return 1; |
21 | } | 22 | } |
22 | return 0; | 23 | return 0; |
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace, | |||
808 | return ret; | 809 | return ret; |
809 | } | 810 | } |
810 | #endif /* CONFIG_HW_BRANCH_TRACER */ | 811 | #endif /* CONFIG_HW_BRANCH_TRACER */ |
812 | |||
813 | #ifdef CONFIG_KSYM_TRACER | ||
814 | static int ksym_selftest_dummy; | ||
815 | |||
816 | int | ||
817 | trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) | ||
818 | { | ||
819 | unsigned long count; | ||
820 | int ret; | ||
821 | |||
822 | /* start the tracing */ | ||
823 | ret = tracer_init(trace, tr); | ||
824 | if (ret) { | ||
825 | warn_failed_init_tracer(trace, ret); | ||
826 | return ret; | ||
827 | } | ||
828 | |||
829 | ksym_selftest_dummy = 0; | ||
830 | /* Register the read-write tracing request */ | ||
831 | |||
832 | ret = process_new_ksym_entry("ksym_selftest_dummy", | ||
833 | HW_BREAKPOINT_R | HW_BREAKPOINT_W, | ||
834 | (unsigned long)(&ksym_selftest_dummy)); | ||
835 | |||
836 | if (ret < 0) { | ||
837 | printk(KERN_CONT "ksym_trace read-write startup test failed\n"); | ||
838 | goto ret_path; | ||
839 | } | ||
840 | /* Perform a read and a write operation over the dummy variable to | ||
841 | * trigger the tracer | ||
842 | */ | ||
843 | if (ksym_selftest_dummy == 0) | ||
844 | ksym_selftest_dummy++; | ||
845 | |||
846 | /* stop the tracing. */ | ||
847 | tracing_stop(); | ||
848 | /* check the trace buffer */ | ||
849 | ret = trace_test_buffer(tr, &count); | ||
850 | trace->reset(tr); | ||
851 | tracing_start(); | ||
852 | |||
853 | /* read & write operations - one each is performed on the dummy variable | ||
854 | * triggering two entries in the trace buffer | ||
855 | */ | ||
856 | if (!ret && count != 2) { | ||
857 | printk(KERN_CONT "Ksym tracer startup test failed"); | ||
858 | ret = -1; | ||
859 | } | ||
860 | |||
861 | ret_path: | ||
862 | return ret; | ||
863 | } | ||
864 | #endif /* CONFIG_KSYM_TRACER */ | ||
865 | |||