aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-11-21 08:07:23 -0500
committerIngo Molnar <mingo@elte.hu>2009-11-21 08:07:23 -0500
commit96200591a34f8ecb98481c626125df43a2463b55 (patch)
tree314c376b01f254d04f9aaf449b1f9147ad177fa6 /kernel
parent7031281e02bf951a2259849217193fb9d75a9762 (diff)
parent68efa37df779b3e04280598e8b5b3a1919b65fee (diff)
Merge branch 'tracing/hw-breakpoints' into perf/core
Conflicts: arch/x86/kernel/kprobes.c kernel/trace/Makefile Merge reason: hw-breakpoints perf integration is looking good in testing and in reviews, plus conflicts are mounting up - so merge & resolve. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hw_breakpoint.c494
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/perf_event.c136
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h7
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_ksym.c554
-rw-r--r--kernel/trace/trace_selftest.c55
11 files changed, 1288 insertions, 3 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..17b575ec7d07 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
95obj-$(CONFIG_SMP) += sched_cpupri.o 95obj-$(CONFIG_SMP) += sched_cpupri.o
96obj-$(CONFIG_SLOW_WORK) += slow-work.o 96obj-$(CONFIG_SLOW_WORK) += slow-work.o
97obj-$(CONFIG_PERF_EVENTS) += perf_event.o 97obj-$(CONFIG_PERF_EVENTS) += perf_event.o
98obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
98 99
99ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 100ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
100# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 101# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..3f45e3cf931d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
49#include <linux/init_task.h> 49#include <linux/init_task.h>
50#include <linux/perf_event.h> 50#include <linux/perf_event.h>
51#include <trace/events/sched.h> 51#include <trace/events/sched.h>
52#include <linux/hw_breakpoint.h>
52 53
53#include <asm/uaccess.h> 54#include <asm/uaccess.h>
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code)
978 proc_exit_connector(tsk); 979 proc_exit_connector(tsk);
979 980
980 /* 981 /*
982 * FIXME: do that only when needed, using sched_exit tracepoint
983 */
984 flush_ptrace_hw_breakpoint(tsk);
985 /*
981 * Flush inherited counters to the parent - before the parent 986 * Flush inherited counters to the parent - before the parent
982 * gets woken up by child-exit notifications. 987 * gets woken up by child-exit notifications.
983 */ 988 */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..9ea9414e0e58
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,494 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) 2007 Alan Stern
17 * Copyright (C) IBM Corporation, 2009
18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
19 *
20 * Thanks to Ingo Molnar for his many suggestions.
21 */
22
23/*
24 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
25 * using the CPU's debug registers.
26 * This file contains the arch-independent routines.
27 */
28
29#include <linux/irqflags.h>
30#include <linux/kallsyms.h>
31#include <linux/notifier.h>
32#include <linux/kprobes.h>
33#include <linux/kdebug.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/percpu.h>
37#include <linux/sched.h>
38#include <linux/init.h>
39#include <linux/smp.h>
40
41#include <linux/hw_breakpoint.h>
42
43#include <asm/processor.h>
44
45#ifdef CONFIG_X86
46#include <asm/debugreg.h>
47#endif
48
49/*
50 * Constraints data
51 */
52
53/* Number of pinned cpu breakpoints in a cpu */
54static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
55
56/* Number of pinned task breakpoints in a cpu */
57static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
58
59/* Number of non-pinned cpu/task breakpoints in a cpu */
60static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
61
62/* Gather the number of total pinned and un-pinned bp in a cpuset */
63struct bp_busy_slots {
64 unsigned int pinned;
65 unsigned int flexible;
66};
67
68/* Serialize accesses to the above constraints */
69static DEFINE_MUTEX(nr_bp_mutex);
70
71/*
72 * Report the maximum number of pinned breakpoints a task
73 * have in this cpu
74 */
75static unsigned int max_task_bp_pinned(int cpu)
76{
77 int i;
78 unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
79
80 for (i = HBP_NUM -1; i >= 0; i--) {
81 if (tsk_pinned[i] > 0)
82 return i + 1;
83 }
84
85 return 0;
86}
87
88/*
89 * Report the number of pinned/un-pinned breakpoints we have in
90 * a given cpu (cpu > -1) or in all of them (cpu = -1).
91 */
92static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
93{
94 if (cpu >= 0) {
95 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
96 slots->pinned += max_task_bp_pinned(cpu);
97 slots->flexible = per_cpu(nr_bp_flexible, cpu);
98
99 return;
100 }
101
102 for_each_online_cpu(cpu) {
103 unsigned int nr;
104
105 nr = per_cpu(nr_cpu_bp_pinned, cpu);
106 nr += max_task_bp_pinned(cpu);
107
108 if (nr > slots->pinned)
109 slots->pinned = nr;
110
111 nr = per_cpu(nr_bp_flexible, cpu);
112
113 if (nr > slots->flexible)
114 slots->flexible = nr;
115 }
116}
117
118/*
119 * Add a pinned breakpoint for the given task in our constraint table
120 */
121static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
122{
123 int count = 0;
124 struct perf_event *bp;
125 struct perf_event_context *ctx = tsk->perf_event_ctxp;
126 unsigned int *task_bp_pinned;
127 struct list_head *list;
128 unsigned long flags;
129
130 if (WARN_ONCE(!ctx, "No perf context for this task"))
131 return;
132
133 list = &ctx->event_list;
134
135 spin_lock_irqsave(&ctx->lock, flags);
136
137 /*
138 * The current breakpoint counter is not included in the list
139 * at the open() callback time
140 */
141 list_for_each_entry(bp, list, event_entry) {
142 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
143 count++;
144 }
145
146 spin_unlock_irqrestore(&ctx->lock, flags);
147
148 if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
149 return;
150
151 task_bp_pinned = per_cpu(task_bp_pinned, cpu);
152 if (enable) {
153 task_bp_pinned[count]++;
154 if (count > 0)
155 task_bp_pinned[count-1]--;
156 } else {
157 task_bp_pinned[count]--;
158 if (count > 0)
159 task_bp_pinned[count-1]++;
160 }
161}
162
163/*
164 * Add/remove the given breakpoint in our constraint table
165 */
166static void toggle_bp_slot(struct perf_event *bp, bool enable)
167{
168 int cpu = bp->cpu;
169 struct task_struct *tsk = bp->ctx->task;
170
171 /* Pinned counter task profiling */
172 if (tsk) {
173 if (cpu >= 0) {
174 toggle_bp_task_slot(tsk, cpu, enable);
175 return;
176 }
177
178 for_each_online_cpu(cpu)
179 toggle_bp_task_slot(tsk, cpu, enable);
180 return;
181 }
182
183 /* Pinned counter cpu profiling */
184 if (enable)
185 per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
186 else
187 per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
188}
189
190/*
191 * Contraints to check before allowing this new breakpoint counter:
192 *
193 * == Non-pinned counter == (Considered as pinned for now)
194 *
195 * - If attached to a single cpu, check:
196 *
197 * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
198 * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
199 *
200 * -> If there are already non-pinned counters in this cpu, it means
201 * there is already a free slot for them.
202 * Otherwise, we check that the maximum number of per task
203 * breakpoints (for this cpu) plus the number of per cpu breakpoint
204 * (for this cpu) doesn't cover every registers.
205 *
206 * - If attached to every cpus, check:
207 *
208 * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
209 * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
210 *
211 * -> This is roughly the same, except we check the number of per cpu
212 * bp for every cpu and we keep the max one. Same for the per tasks
213 * breakpoints.
214 *
215 *
216 * == Pinned counter ==
217 *
218 * - If attached to a single cpu, check:
219 *
220 * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
221 * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
222 *
223 * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
224 * one register at least (or they will never be fed).
225 *
226 * - If attached to every cpus, check:
227 *
228 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
229 * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
230 */
231int reserve_bp_slot(struct perf_event *bp)
232{
233 struct bp_busy_slots slots = {0};
234 int ret = 0;
235
236 mutex_lock(&nr_bp_mutex);
237
238 fetch_bp_busy_slots(&slots, bp->cpu);
239
240 /* Flexible counters need to keep at least one slot */
241 if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
242 ret = -ENOSPC;
243 goto end;
244 }
245
246 toggle_bp_slot(bp, true);
247
248end:
249 mutex_unlock(&nr_bp_mutex);
250
251 return ret;
252}
253
254void release_bp_slot(struct perf_event *bp)
255{
256 mutex_lock(&nr_bp_mutex);
257
258 toggle_bp_slot(bp, false);
259
260 mutex_unlock(&nr_bp_mutex);
261}
262
263
264int __register_perf_hw_breakpoint(struct perf_event *bp)
265{
266 int ret;
267
268 ret = reserve_bp_slot(bp);
269 if (ret)
270 return ret;
271
272 if (!bp->attr.disabled)
273 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
274
275 return ret;
276}
277
278int register_perf_hw_breakpoint(struct perf_event *bp)
279{
280 bp->callback = perf_bp_event;
281
282 return __register_perf_hw_breakpoint(bp);
283}
284
285/*
286 * Register a breakpoint bound to a task and a given cpu.
287 * If cpu is -1, the breakpoint is active for the task in every cpu
288 * If the task is -1, the breakpoint is active for every tasks in the given
289 * cpu.
290 */
291static struct perf_event *
292register_user_hw_breakpoint_cpu(unsigned long addr,
293 int len,
294 int type,
295 perf_callback_t triggered,
296 pid_t pid,
297 int cpu,
298 bool active)
299{
300 struct perf_event_attr *attr;
301 struct perf_event *bp;
302
303 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
304 if (!attr)
305 return ERR_PTR(-ENOMEM);
306
307 attr->type = PERF_TYPE_BREAKPOINT;
308 attr->size = sizeof(*attr);
309 attr->bp_addr = addr;
310 attr->bp_len = len;
311 attr->bp_type = type;
312 /*
313 * Such breakpoints are used by debuggers to trigger signals when
314 * we hit the excepted memory op. We can't miss such events, they
315 * must be pinned.
316 */
317 attr->pinned = 1;
318
319 if (!active)
320 attr->disabled = 1;
321
322 bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
323 kfree(attr);
324
325 return bp;
326}
327
328/**
329 * register_user_hw_breakpoint - register a hardware breakpoint for user space
330 * @addr: is the memory address that triggers the breakpoint
331 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
332 * @type: the type of the access to the memory (read/write/exec)
333 * @triggered: callback to trigger when we hit the breakpoint
334 * @tsk: pointer to 'task_struct' of the process to which the address belongs
335 * @active: should we activate it while registering it
336 *
337 */
338struct perf_event *
339register_user_hw_breakpoint(unsigned long addr,
340 int len,
341 int type,
342 perf_callback_t triggered,
343 struct task_struct *tsk,
344 bool active)
345{
346 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
347 tsk->pid, -1, active);
348}
349EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
350
351/**
352 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
353 * @bp: the breakpoint structure to modify
354 * @addr: is the memory address that triggers the breakpoint
355 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
356 * @type: the type of the access to the memory (read/write/exec)
357 * @triggered: callback to trigger when we hit the breakpoint
358 * @tsk: pointer to 'task_struct' of the process to which the address belongs
359 * @active: should we activate it while registering it
360 */
361struct perf_event *
362modify_user_hw_breakpoint(struct perf_event *bp,
363 unsigned long addr,
364 int len,
365 int type,
366 perf_callback_t triggered,
367 struct task_struct *tsk,
368 bool active)
369{
370 /*
371 * FIXME: do it without unregistering
372 * - We don't want to lose our slot
373 * - If the new bp is incorrect, don't lose the older one
374 */
375 unregister_hw_breakpoint(bp);
376
377 return register_user_hw_breakpoint(addr, len, type, triggered,
378 tsk, active);
379}
380EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
381
382/**
383 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
384 * @bp: the breakpoint structure to unregister
385 */
386void unregister_hw_breakpoint(struct perf_event *bp)
387{
388 if (!bp)
389 return;
390 perf_event_release_kernel(bp);
391}
392EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
393
394static struct perf_event *
395register_kernel_hw_breakpoint_cpu(unsigned long addr,
396 int len,
397 int type,
398 perf_callback_t triggered,
399 int cpu,
400 bool active)
401{
402 return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
403 -1, cpu, active);
404}
405
406/**
407 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
408 * @addr: is the memory address that triggers the breakpoint
409 * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
410 * @type: the type of the access to the memory (read/write/exec)
411 * @triggered: callback to trigger when we hit the breakpoint
412 * @active: should we activate it while registering it
413 *
414 * @return a set of per_cpu pointers to perf events
415 */
416struct perf_event **
417register_wide_hw_breakpoint(unsigned long addr,
418 int len,
419 int type,
420 perf_callback_t triggered,
421 bool active)
422{
423 struct perf_event **cpu_events, **pevent, *bp;
424 long err;
425 int cpu;
426
427 cpu_events = alloc_percpu(typeof(*cpu_events));
428 if (!cpu_events)
429 return ERR_PTR(-ENOMEM);
430
431 for_each_possible_cpu(cpu) {
432 pevent = per_cpu_ptr(cpu_events, cpu);
433 bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
434 triggered, cpu, active);
435
436 *pevent = bp;
437
438 if (IS_ERR(bp) || !bp) {
439 err = PTR_ERR(bp);
440 goto fail;
441 }
442 }
443
444 return cpu_events;
445
446fail:
447 for_each_possible_cpu(cpu) {
448 pevent = per_cpu_ptr(cpu_events, cpu);
449 if (IS_ERR(*pevent) || !*pevent)
450 break;
451 unregister_hw_breakpoint(*pevent);
452 }
453 free_percpu(cpu_events);
454 /* return the error if any */
455 return ERR_PTR(err);
456}
457EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
458
459/**
460 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
461 * @cpu_events: the per cpu set of events to unregister
462 */
463void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
464{
465 int cpu;
466 struct perf_event **pevent;
467
468 for_each_possible_cpu(cpu) {
469 pevent = per_cpu_ptr(cpu_events, cpu);
470 unregister_hw_breakpoint(*pevent);
471 }
472 free_percpu(cpu_events);
473}
474EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
475
476static struct notifier_block hw_breakpoint_exceptions_nb = {
477 .notifier_call = hw_breakpoint_exceptions_notify,
478 /* we need to be notified first */
479 .priority = 0x7fffffff
480};
481
482static int __init init_hw_breakpoint(void)
483{
484 return register_die_notifier(&hw_breakpoint_exceptions_nb);
485}
486core_initcall(init_hw_breakpoint);
487
488
489struct pmu perf_ops_bp = {
490 .enable = arch_install_hw_breakpoint,
491 .disable = arch_uninstall_hw_breakpoint,
492 .read = hw_breakpoint_pmu_read,
493 .unthrottle = hw_breakpoint_pmu_unthrottle
494};
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8b6b8b697c68..8e5288a8a355 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
181 } 181 }
182 return module_kallsyms_lookup_name(name); 182 return module_kallsyms_lookup_name(name);
183} 183}
184EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
184 185
185int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, 186int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
186 unsigned long), 187 unsigned long),
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3256e36ad251..3852e2656bb0 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -29,6 +29,7 @@
29#include <linux/kernel_stat.h> 29#include <linux/kernel_stat.h>
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/ftrace_event.h> 31#include <linux/ftrace_event.h>
32#include <linux/hw_breakpoint.h>
32 33
33#include <asm/irq_regs.h> 34#include <asm/irq_regs.h>
34 35
@@ -1725,6 +1726,26 @@ static int perf_release(struct inode *inode, struct file *file)
1725 return 0; 1726 return 0;
1726} 1727}
1727 1728
1729int perf_event_release_kernel(struct perf_event *event)
1730{
1731 struct perf_event_context *ctx = event->ctx;
1732
1733 WARN_ON_ONCE(ctx->parent_ctx);
1734 mutex_lock(&ctx->mutex);
1735 perf_event_remove_from_context(event);
1736 mutex_unlock(&ctx->mutex);
1737
1738 mutex_lock(&event->owner->perf_event_mutex);
1739 list_del_init(&event->owner_entry);
1740 mutex_unlock(&event->owner->perf_event_mutex);
1741 put_task_struct(event->owner);
1742
1743 free_event(event);
1744
1745 return 0;
1746}
1747EXPORT_SYMBOL_GPL(perf_event_release_kernel);
1748
1728static int perf_event_read_size(struct perf_event *event) 1749static int perf_event_read_size(struct perf_event *event)
1729{ 1750{
1730 int entry = sizeof(u64); /* value */ 1751 int entry = sizeof(u64); /* value */
@@ -1750,7 +1771,7 @@ static int perf_event_read_size(struct perf_event *event)
1750 return size; 1771 return size;
1751} 1772}
1752 1773
1753static u64 perf_event_read_value(struct perf_event *event) 1774u64 perf_event_read_value(struct perf_event *event)
1754{ 1775{
1755 struct perf_event *child; 1776 struct perf_event *child;
1756 u64 total = 0; 1777 u64 total = 0;
@@ -1761,6 +1782,7 @@ static u64 perf_event_read_value(struct perf_event *event)
1761 1782
1762 return total; 1783 return total;
1763} 1784}
1785EXPORT_SYMBOL_GPL(perf_event_read_value);
1764 1786
1765static int perf_event_read_entry(struct perf_event *event, 1787static int perf_event_read_entry(struct perf_event *event,
1766 u64 read_format, char __user *buf) 1788 u64 read_format, char __user *buf)
@@ -4231,6 +4253,51 @@ static void perf_event_free_filter(struct perf_event *event)
4231 4253
4232#endif /* CONFIG_EVENT_PROFILE */ 4254#endif /* CONFIG_EVENT_PROFILE */
4233 4255
4256#ifdef CONFIG_HAVE_HW_BREAKPOINT
4257static void bp_perf_event_destroy(struct perf_event *event)
4258{
4259 release_bp_slot(event);
4260}
4261
4262static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4263{
4264 int err;
4265 /*
4266 * The breakpoint is already filled if we haven't created the counter
4267 * through perf syscall
4268 * FIXME: manage to get trigerred to NULL if it comes from syscalls
4269 */
4270 if (!bp->callback)
4271 err = register_perf_hw_breakpoint(bp);
4272 else
4273 err = __register_perf_hw_breakpoint(bp);
4274 if (err)
4275 return ERR_PTR(err);
4276
4277 bp->destroy = bp_perf_event_destroy;
4278
4279 return &perf_ops_bp;
4280}
4281
4282void perf_bp_event(struct perf_event *bp, void *regs)
4283{
4284 /* TODO */
4285}
4286#else
4287static void bp_perf_event_destroy(struct perf_event *event)
4288{
4289}
4290
4291static const struct pmu *bp_perf_event_init(struct perf_event *bp)
4292{
4293 return NULL;
4294}
4295
4296void perf_bp_event(struct perf_event *bp, void *regs)
4297{
4298}
4299#endif
4300
4234atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; 4301atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
4235 4302
4236static void sw_perf_event_destroy(struct perf_event *event) 4303static void sw_perf_event_destroy(struct perf_event *event)
@@ -4297,6 +4364,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4297 struct perf_event_context *ctx, 4364 struct perf_event_context *ctx,
4298 struct perf_event *group_leader, 4365 struct perf_event *group_leader,
4299 struct perf_event *parent_event, 4366 struct perf_event *parent_event,
4367 perf_callback_t callback,
4300 gfp_t gfpflags) 4368 gfp_t gfpflags)
4301{ 4369{
4302 const struct pmu *pmu; 4370 const struct pmu *pmu;
@@ -4339,6 +4407,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4339 4407
4340 event->state = PERF_EVENT_STATE_INACTIVE; 4408 event->state = PERF_EVENT_STATE_INACTIVE;
4341 4409
4410 if (!callback && parent_event)
4411 callback = parent_event->callback;
4412
4413 event->callback = callback;
4414
4342 if (attr->disabled) 4415 if (attr->disabled)
4343 event->state = PERF_EVENT_STATE_OFF; 4416 event->state = PERF_EVENT_STATE_OFF;
4344 4417
@@ -4373,6 +4446,11 @@ perf_event_alloc(struct perf_event_attr *attr,
4373 pmu = tp_perf_event_init(event); 4446 pmu = tp_perf_event_init(event);
4374 break; 4447 break;
4375 4448
4449 case PERF_TYPE_BREAKPOINT:
4450 pmu = bp_perf_event_init(event);
4451 break;
4452
4453
4376 default: 4454 default:
4377 break; 4455 break;
4378 } 4456 }
@@ -4615,7 +4693,7 @@ SYSCALL_DEFINE5(perf_event_open,
4615 } 4693 }
4616 4694
4617 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 4695 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
4618 NULL, GFP_KERNEL); 4696 NULL, NULL, GFP_KERNEL);
4619 err = PTR_ERR(event); 4697 err = PTR_ERR(event);
4620 if (IS_ERR(event)) 4698 if (IS_ERR(event))
4621 goto err_put_context; 4699 goto err_put_context;
@@ -4663,6 +4741,58 @@ err_put_context:
4663 return err; 4741 return err;
4664} 4742}
4665 4743
4744/**
4745 * perf_event_create_kernel_counter
4746 *
4747 * @attr: attributes of the counter to create
4748 * @cpu: cpu in which the counter is bound
4749 * @pid: task to profile
4750 */
4751struct perf_event *
4752perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
4753 pid_t pid, perf_callback_t callback)
4754{
4755 struct perf_event *event;
4756 struct perf_event_context *ctx;
4757 int err;
4758
4759 /*
4760 * Get the target context (task or percpu):
4761 */
4762
4763 ctx = find_get_context(pid, cpu);
4764 if (IS_ERR(ctx))
4765 return NULL;
4766
4767 event = perf_event_alloc(attr, cpu, ctx, NULL,
4768 NULL, callback, GFP_KERNEL);
4769 err = PTR_ERR(event);
4770 if (IS_ERR(event))
4771 goto err_put_context;
4772
4773 event->filp = NULL;
4774 WARN_ON_ONCE(ctx->parent_ctx);
4775 mutex_lock(&ctx->mutex);
4776 perf_install_in_context(ctx, event, cpu);
4777 ++ctx->generation;
4778 mutex_unlock(&ctx->mutex);
4779
4780 event->owner = current;
4781 get_task_struct(current);
4782 mutex_lock(&current->perf_event_mutex);
4783 list_add_tail(&event->owner_entry, &current->perf_event_list);
4784 mutex_unlock(&current->perf_event_mutex);
4785
4786 return event;
4787
4788err_put_context:
4789 if (err < 0)
4790 put_ctx(ctx);
4791
4792 return NULL;
4793}
4794EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
4795
4666/* 4796/*
4667 * inherit a event from parent task to child task: 4797 * inherit a event from parent task to child task:
4668 */ 4798 */
@@ -4688,7 +4818,7 @@ inherit_event(struct perf_event *parent_event,
4688 child_event = perf_event_alloc(&parent_event->attr, 4818 child_event = perf_event_alloc(&parent_event->attr,
4689 parent_event->cpu, child_ctx, 4819 parent_event->cpu, child_ctx,
4690 group_leader, parent_event, 4820 group_leader, parent_event,
4691 GFP_KERNEL); 4821 NULL, GFP_KERNEL);
4692 if (IS_ERR(child_event)) 4822 if (IS_ERR(child_event))
4693 return child_event; 4823 return child_event;
4694 get_ctx(child_ctx); 4824 get_ctx(child_ctx);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f05671609a89..d006554888dc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
339 power management decisions, specifically the C-state and P-state 339 power management decisions, specifically the C-state and P-state
340 behavior. 340 behavior.
341 341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
342 363
343config STACK_TRACER 364config STACK_TRACER
344 bool "Trace max stack" 365 bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index edc3a3cca1a1..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
57obj-$(CONFIG_EVENT_TRACING) += power-traces.o 58obj-$(CONFIG_EVENT_TRACING) += power-traces.o
58 59
59libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b4e4212e66d7..4da6ede74401 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h> 12#include <trace/boot.h>
13#include <linux/kmemtrace.h> 13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h>
14 15
15#include <linux/trace_seq.h> 16#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h> 17#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
37 TRACE_KMEM_ALLOC, 38 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE, 39 TRACE_KMEM_FREE,
39 TRACE_BLK, 40 TRACE_BLK,
41 TRACE_KSYM,
40 42
41 __TRACE_LAST_TYPE, 43 __TRACE_LAST_TYPE,
42}; 44};
@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void);
232 TRACE_KMEM_ALLOC); \ 234 TRACE_KMEM_ALLOC); \
233 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
234 TRACE_KMEM_FREE); \ 236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
235 __ftrace_bad_type(); \ 238 __ftrace_bad_type(); \
236 } while (0) 239 } while (0)
237 240
@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type);
387void unregister_tracer(struct tracer *type); 390void unregister_tracer(struct tracer *type);
388int is_tracing_stopped(void); 391int is_tracing_stopped(void);
389 392
393extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
394
390extern unsigned long nsecs_to_usecs(unsigned long nsecs); 395extern unsigned long nsecs_to_usecs(unsigned long nsecs);
391 396
392#ifdef CONFIG_TRACER_MAX_TRACE 397#ifdef CONFIG_TRACER_MAX_TRACE
@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
461 struct trace_array *tr); 466 struct trace_array *tr);
462extern int trace_selftest_startup_hw_branches(struct tracer *trace, 467extern int trace_selftest_startup_hw_branches(struct tracer *trace,
463 struct trace_array *tr); 468 struct trace_array *tr);
469extern int trace_selftest_startup_ksym(struct tracer *trace,
470 struct trace_array *tr);
464#endif /* CONFIG_FTRACE_STARTUP_TEST */ 471#endif /* CONFIG_FTRACE_STARTUP_TEST */
465 472
466extern void *head_page(struct trace_array_cpu *data); 473extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
364 F_printk("type:%u call_site:%lx ptr:%p", 364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr) 365 __entry->type_id, __entry->call_site, __entry->ptr)
366); 366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..11935b53a6cb
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,554 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h"
31
32#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h>
34
35/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers.
38 */
39#define KSYM_TRACER_MAX HBP_NUM
40
41#define KSYM_TRACER_OP_LEN 3 /* rw- */
42
43struct trace_ksym {
44 struct perf_event **ksym_hbp;
45 unsigned long ksym_addr;
46 int type;
47 int len;
48#ifdef CONFIG_PROFILE_KSYM_TRACER
49 unsigned long counter;
50#endif
51 struct hlist_node ksym_hlist;
52};
53
54static struct trace_array *ksym_trace_array;
55
56static unsigned int ksym_filter_entry_count;
57static unsigned int ksym_tracing_enabled;
58
59static HLIST_HEAD(ksym_filter_head);
60
61static DEFINE_MUTEX(ksym_tracer_mutex);
62
63#ifdef CONFIG_PROFILE_KSYM_TRACER
64
65#define MAX_UL_INT 0xffffffff
66
67void ksym_collect_stats(unsigned long hbp_hit_addr)
68{
69 struct hlist_node *node;
70 struct trace_ksym *entry;
71
72 rcu_read_lock();
73 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
74 if ((entry->ksym_addr == hbp_hit_addr) &&
75 (entry->counter <= MAX_UL_INT)) {
76 entry->counter++;
77 break;
78 }
79 }
80 rcu_read_unlock();
81}
82#endif /* CONFIG_PROFILE_KSYM_TRACER */
83
84void ksym_hbp_handler(struct perf_event *hbp, void *data)
85{
86 struct ring_buffer_event *event;
87 struct ksym_trace_entry *entry;
88 struct pt_regs *regs = data;
89 struct ring_buffer *buffer;
90 int pc;
91
92 if (!ksym_tracing_enabled)
93 return;
94
95 buffer = ksym_trace_array->buffer;
96
97 pc = preempt_count();
98
99 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
100 sizeof(*entry), 0, pc);
101 if (!event)
102 return;
103
104 entry = ring_buffer_event_data(event);
105 entry->ip = instruction_pointer(regs);
106 entry->type = hw_breakpoint_type(hbp);
107 entry->addr = hw_breakpoint_addr(hbp);
108 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
109
110#ifdef CONFIG_PROFILE_KSYM_TRACER
111 ksym_collect_stats(hw_breakpoint_addr(hbp));
112#endif /* CONFIG_PROFILE_KSYM_TRACER */
113
114 trace_buffer_unlock_commit(buffer, event, 0, pc);
115}
116
117/* Valid access types are represented as
118 *
119 * rw- : Set Read/Write Access Breakpoint
120 * -w- : Set Write Access Breakpoint
121 * --- : Clear Breakpoints
122 * --x : Set Execution Break points (Not available yet)
123 *
124 */
125static int ksym_trace_get_access_type(char *str)
126{
127 int access = 0;
128
129 if (str[0] == 'r')
130 access |= HW_BREAKPOINT_R;
131
132 if (str[1] == 'w')
133 access |= HW_BREAKPOINT_W;
134
135 if (str[2] == 'x')
136 access |= HW_BREAKPOINT_X;
137
138 switch (access) {
139 case HW_BREAKPOINT_R:
140 case HW_BREAKPOINT_W:
141 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
142 return access;
143 default:
144 return -EINVAL;
145 }
146}
147
148/*
149 * There can be several possible malformed requests and we attempt to capture
150 * all of them. We enumerate some of the rules
151 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
152 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
153 * <module>:<ksym_name>:<op>.
154 * 2. No delimiter symbol ':' in the input string
155 * 3. Spurious operator symbols or symbols not in their respective positions
156 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
157 * 5. Kernel symbol not a part of /proc/kallsyms
158 * 6. Duplicate requests
159 */
160static int parse_ksym_trace_str(char *input_string, char **ksymname,
161 unsigned long *addr)
162{
163 int ret;
164
165 *ksymname = strsep(&input_string, ":");
166 *addr = kallsyms_lookup_name(*ksymname);
167
168 /* Check for malformed request: (2), (1) and (5) */
169 if ((!input_string) ||
170 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
171 (*addr == 0))
172 return -EINVAL;;
173
174 ret = ksym_trace_get_access_type(input_string);
175
176 return ret;
177}
178
179int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
180{
181 struct trace_ksym *entry;
182 int ret = -ENOMEM;
183
184 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
185 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
186 " new requests for tracing can be accepted now.\n",
187 KSYM_TRACER_MAX);
188 return -ENOSPC;
189 }
190
191 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
192 if (!entry)
193 return -ENOMEM;
194
195 entry->type = op;
196 entry->ksym_addr = addr;
197 entry->len = HW_BREAKPOINT_LEN_4;
198
199 ret = -EAGAIN;
200 entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
201 entry->len, entry->type,
202 ksym_hbp_handler, true);
203 if (IS_ERR(entry->ksym_hbp)) {
204 entry->ksym_hbp = NULL;
205 ret = PTR_ERR(entry->ksym_hbp);
206 }
207
208 if (!entry->ksym_hbp) {
209 printk(KERN_INFO "ksym_tracer request failed. Try again"
210 " later!!\n");
211 goto err;
212 }
213
214 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
215 ksym_filter_entry_count++;
216
217 return 0;
218
219err:
220 kfree(entry);
221
222 return ret;
223}
224
225static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
226 size_t count, loff_t *ppos)
227{
228 struct trace_ksym *entry;
229 struct hlist_node *node;
230 struct trace_seq *s;
231 ssize_t cnt = 0;
232 int ret;
233
234 s = kmalloc(sizeof(*s), GFP_KERNEL);
235 if (!s)
236 return -ENOMEM;
237 trace_seq_init(s);
238
239 mutex_lock(&ksym_tracer_mutex);
240
241 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
242 ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
243 if (entry->type == HW_BREAKPOINT_R)
244 ret = trace_seq_puts(s, "r--\n");
245 else if (entry->type == HW_BREAKPOINT_W)
246 ret = trace_seq_puts(s, "-w-\n");
247 else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
248 ret = trace_seq_puts(s, "rw-\n");
249 WARN_ON_ONCE(!ret);
250 }
251
252 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
253
254 mutex_unlock(&ksym_tracer_mutex);
255
256 kfree(s);
257
258 return cnt;
259}
260
261static void __ksym_trace_reset(void)
262{
263 struct trace_ksym *entry;
264 struct hlist_node *node, *node1;
265
266 mutex_lock(&ksym_tracer_mutex);
267 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
268 ksym_hlist) {
269 unregister_wide_hw_breakpoint(entry->ksym_hbp);
270 ksym_filter_entry_count--;
271 hlist_del_rcu(&(entry->ksym_hlist));
272 synchronize_rcu();
273 kfree(entry);
274 }
275 mutex_unlock(&ksym_tracer_mutex);
276}
277
278static ssize_t ksym_trace_filter_write(struct file *file,
279 const char __user *buffer,
280 size_t count, loff_t *ppos)
281{
282 struct trace_ksym *entry;
283 struct hlist_node *node;
284 char *input_string, *ksymname = NULL;
285 unsigned long ksym_addr = 0;
286 int ret, op, changed = 0;
287
288 input_string = kzalloc(count + 1, GFP_KERNEL);
289 if (!input_string)
290 return -ENOMEM;
291
292 if (copy_from_user(input_string, buffer, count)) {
293 kfree(input_string);
294 return -EFAULT;
295 }
296 input_string[count] = '\0';
297
298 strstrip(input_string);
299
300 /*
301 * Clear all breakpoints if:
302 * 1: echo > ksym_trace_filter
303 * 2: echo 0 > ksym_trace_filter
304 * 3: echo "*:---" > ksym_trace_filter
305 */
306 if (!input_string[0] || !strcmp(input_string, "0") ||
307 !strcmp(input_string, "*:---")) {
308 __ksym_trace_reset();
309 kfree(input_string);
310 return count;
311 }
312
313 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
314 if (ret < 0) {
315 kfree(input_string);
316 return ret;
317 }
318
319 mutex_lock(&ksym_tracer_mutex);
320
321 ret = -EINVAL;
322 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
323 if (entry->ksym_addr == ksym_addr) {
324 /* Check for malformed request: (6) */
325 if (entry->type != op)
326 changed = 1;
327 else
328 goto out;
329 break;
330 }
331 }
332 if (changed) {
333 unregister_wide_hw_breakpoint(entry->ksym_hbp);
334 entry->type = op;
335 if (op > 0) {
336 entry->ksym_hbp =
337 register_wide_hw_breakpoint(entry->ksym_addr,
338 entry->len, entry->type,
339 ksym_hbp_handler, true);
340 if (IS_ERR(entry->ksym_hbp))
341 entry->ksym_hbp = NULL;
342 if (!entry->ksym_hbp)
343 goto out;
344 }
345 ksym_filter_entry_count--;
346 hlist_del_rcu(&(entry->ksym_hlist));
347 synchronize_rcu();
348 kfree(entry);
349 ret = 0;
350 goto out;
351 } else {
352 /* Check for malformed request: (4) */
353 if (op == 0)
354 goto out;
355 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
356 }
357out:
358 mutex_unlock(&ksym_tracer_mutex);
359
360 kfree(input_string);
361
362 if (!ret)
363 ret = count;
364 return ret;
365}
366
367static const struct file_operations ksym_tracing_fops = {
368 .open = tracing_open_generic,
369 .read = ksym_trace_filter_read,
370 .write = ksym_trace_filter_write,
371};
372
373static void ksym_trace_reset(struct trace_array *tr)
374{
375 ksym_tracing_enabled = 0;
376 __ksym_trace_reset();
377}
378
379static int ksym_trace_init(struct trace_array *tr)
380{
381 int cpu, ret = 0;
382
383 for_each_online_cpu(cpu)
384 tracing_reset(tr, cpu);
385 ksym_tracing_enabled = 1;
386 ksym_trace_array = tr;
387
388 return ret;
389}
390
391static void ksym_trace_print_header(struct seq_file *m)
392{
393 seq_puts(m,
394 "# TASK-PID CPU# Symbol "
395 "Type Function\n");
396 seq_puts(m,
397 "# | | | "
398 " | |\n");
399}
400
401static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
402{
403 struct trace_entry *entry = iter->ent;
404 struct trace_seq *s = &iter->seq;
405 struct ksym_trace_entry *field;
406 char str[KSYM_SYMBOL_LEN];
407 int ret;
408
409 if (entry->type != TRACE_KSYM)
410 return TRACE_TYPE_UNHANDLED;
411
412 trace_assign_type(field, entry);
413
414 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
415 entry->pid, iter->cpu, (char *)field->addr);
416 if (!ret)
417 return TRACE_TYPE_PARTIAL_LINE;
418
419 switch (field->type) {
420 case HW_BREAKPOINT_R:
421 ret = trace_seq_printf(s, " R ");
422 break;
423 case HW_BREAKPOINT_W:
424 ret = trace_seq_printf(s, " W ");
425 break;
426 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
427 ret = trace_seq_printf(s, " RW ");
428 break;
429 default:
430 return TRACE_TYPE_PARTIAL_LINE;
431 }
432
433 if (!ret)
434 return TRACE_TYPE_PARTIAL_LINE;
435
436 sprint_symbol(str, field->ip);
437 ret = trace_seq_printf(s, "%s\n", str);
438 if (!ret)
439 return TRACE_TYPE_PARTIAL_LINE;
440
441 return TRACE_TYPE_HANDLED;
442}
443
444struct tracer ksym_tracer __read_mostly =
445{
446 .name = "ksym_tracer",
447 .init = ksym_trace_init,
448 .reset = ksym_trace_reset,
449#ifdef CONFIG_FTRACE_SELFTEST
450 .selftest = trace_selftest_startup_ksym,
451#endif
452 .print_header = ksym_trace_print_header,
453 .print_line = ksym_trace_output
454};
455
456__init static int init_ksym_trace(void)
457{
458 struct dentry *d_tracer;
459 struct dentry *entry;
460
461 d_tracer = tracing_init_dentry();
462 ksym_filter_entry_count = 0;
463
464 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
465 NULL, &ksym_tracing_fops);
466 if (!entry)
467 pr_warning("Could not create debugfs "
468 "'ksym_trace_filter' file\n");
469
470 return register_tracer(&ksym_tracer);
471}
472device_initcall(init_ksym_trace);
473
474
475#ifdef CONFIG_PROFILE_KSYM_TRACER
476static int ksym_tracer_stat_headers(struct seq_file *m)
477{
478 seq_puts(m, " Access Type ");
479 seq_puts(m, " Symbol Counter\n");
480 seq_puts(m, " ----------- ");
481 seq_puts(m, " ------ -------\n");
482 return 0;
483}
484
485static int ksym_tracer_stat_show(struct seq_file *m, void *v)
486{
487 struct hlist_node *stat = v;
488 struct trace_ksym *entry;
489 int access_type = 0;
490 char fn_name[KSYM_NAME_LEN];
491
492 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
493
494 access_type = entry->type;
495
496 switch (access_type) {
497 case HW_BREAKPOINT_R:
498 seq_puts(m, " R ");
499 break;
500 case HW_BREAKPOINT_W:
501 seq_puts(m, " W ");
502 break;
503 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
504 seq_puts(m, " RW ");
505 break;
506 default:
507 seq_puts(m, " NA ");
508 }
509
510 if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
511 seq_printf(m, " %-36s", fn_name);
512 else
513 seq_printf(m, " %-36s", "<NA>");
514 seq_printf(m, " %15lu\n", entry->counter);
515
516 return 0;
517}
518
519static void *ksym_tracer_stat_start(struct tracer_stat *trace)
520{
521 return ksym_filter_head.first;
522}
523
524static void *
525ksym_tracer_stat_next(void *v, int idx)
526{
527 struct hlist_node *stat = v;
528
529 return stat->next;
530}
531
532static struct tracer_stat ksym_tracer_stats = {
533 .name = "ksym_tracer",
534 .stat_start = ksym_tracer_stat_start,
535 .stat_next = ksym_tracer_stat_next,
536 .stat_headers = ksym_tracer_stat_headers,
537 .stat_show = ksym_tracer_stat_show
538};
539
540__init static int ksym_tracer_stat_init(void)
541{
542 int ret;
543
544 ret = register_stat_tracer(&ksym_tracer_stats);
545 if (ret) {
546 printk(KERN_WARNING "Warning: could not register "
547 "ksym tracer stats\n");
548 return 1;
549 }
550
551 return 0;
552}
553fs_initcall(ksym_tracer_stat_init);
554#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..dc98309e839a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES: 19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
20 return 1; 21 return 1;
21 } 22 }
22 return 0; 23 return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
808 return ret; 809 return ret;
809} 810}
810#endif /* CONFIG_HW_BRANCH_TRACER */ 811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865