aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2010-05-17 22:26:53 -0400
committerSteven Rostedt <rostedt@goodmis.org>2010-05-18 00:35:23 -0400
commitf0218b3e9974f06014b61be8987159f4a20e011e (patch)
tree29a593c4d71ab18cb0c450a34e79bf6bea66877e /kernel
parent1eaa4787a774c4896518c81f24e8bccaa2244924 (diff)
parent9d192e118a094087494997ea1c8a2faf39af38c5 (diff)
Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip into trace/tip/tracing/core-6
Conflicts: include/trace/ftrace.h kernel/trace/trace_kprobe.c Acked-by: Masami Hiramatsu <mhiramat@redhat.com> Acked-by: Frederic Weisbecker <fweisbec@gmail.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup_freezer.c5
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/hw_breakpoint.c196
-rw-r--r--kernel/kprobes.c132
-rw-r--r--kernel/lockdep.c4
-rw-r--r--kernel/perf_event.c379
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/sched.c61
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h20
-rw-r--r--kernel/trace/trace_entries.h12
-rw-r--r--kernel/trace/trace_events_filter.c2
-rw-r--r--kernel/trace/trace_hw_branches.c312
-rw-r--r--kernel/trace/trace_kprobe.c535
-rw-r--r--kernel/trace/trace_ksym.c26
-rw-r--r--kernel/trace/trace_selftest.c57
-rw-r--r--kernel/workqueue.c2
18 files changed, 849 insertions, 910 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index da5e13975531..e5c0244962b0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -205,9 +205,12 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
205 * No lock is needed, since the task isn't on tasklist yet, 205 * No lock is needed, since the task isn't on tasklist yet,
206 * so it can't be moved to another cgroup, which means the 206 * so it can't be moved to another cgroup, which means the
207 * freezer won't be removed and will be valid during this 207 * freezer won't be removed and will be valid during this
208 * function call. 208 * function call. Nevertheless, apply RCU read-side critical
209 * section to suppress RCU lockdep false positives.
209 */ 210 */
211 rcu_read_lock();
210 freezer = task_freezer(task); 212 freezer = task_freezer(task);
213 rcu_read_unlock();
211 214
212 /* 215 /*
213 * The root cgroup is non-freezable, so we can skip the 216 * The root cgroup is non-freezable, so we can skip the
diff --git a/kernel/fork.c b/kernel/fork.c
index 44b0791b0a2e..5d3592deaf71 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1111,9 +1111,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1111 p->memcg_batch.do_batch = 0; 1111 p->memcg_batch.do_batch = 0;
1112 p->memcg_batch.memcg = NULL; 1112 p->memcg_batch.memcg = NULL;
1113#endif 1113#endif
1114
1115 p->bts = NULL;
1116
1117 p->stack_start = stack_start; 1114 p->stack_start = stack_start;
1118 1115
1119 /* Perform scheduler related setup. Assign this task to a CPU. */ 1116 /* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 03808ed342a6..7a56b22e0602 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,23 +40,29 @@
40#include <linux/percpu.h> 40#include <linux/percpu.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/slab.h>
43#include <linux/cpu.h> 44#include <linux/cpu.h>
44#include <linux/smp.h> 45#include <linux/smp.h>
45 46
46#include <linux/hw_breakpoint.h> 47#include <linux/hw_breakpoint.h>
47 48
49
48/* 50/*
49 * Constraints data 51 * Constraints data
50 */ 52 */
51 53
52/* Number of pinned cpu breakpoints in a cpu */ 54/* Number of pinned cpu breakpoints in a cpu */
53static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); 55static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
54 56
55/* Number of pinned task breakpoints in a cpu */ 57/* Number of pinned task breakpoints in a cpu */
56static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); 58static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
57 59
58/* Number of non-pinned cpu/task breakpoints in a cpu */ 60/* Number of non-pinned cpu/task breakpoints in a cpu */
59static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); 61static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
62
63static int nr_slots[TYPE_MAX];
64
65static int constraints_initialized;
60 66
61/* Gather the number of total pinned and un-pinned bp in a cpuset */ 67/* Gather the number of total pinned and un-pinned bp in a cpuset */
62struct bp_busy_slots { 68struct bp_busy_slots {
@@ -67,16 +73,29 @@ struct bp_busy_slots {
67/* Serialize accesses to the above constraints */ 73/* Serialize accesses to the above constraints */
68static DEFINE_MUTEX(nr_bp_mutex); 74static DEFINE_MUTEX(nr_bp_mutex);
69 75
76__weak int hw_breakpoint_weight(struct perf_event *bp)
77{
78 return 1;
79}
80
81static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
82{
83 if (bp->attr.bp_type & HW_BREAKPOINT_RW)
84 return TYPE_DATA;
85
86 return TYPE_INST;
87}
88
70/* 89/*
71 * Report the maximum number of pinned breakpoints a task 90 * Report the maximum number of pinned breakpoints a task
72 * have in this cpu 91 * have in this cpu
73 */ 92 */
74static unsigned int max_task_bp_pinned(int cpu) 93static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
75{ 94{
76 int i; 95 int i;
77 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); 96 unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
78 97
79 for (i = HBP_NUM -1; i >= 0; i--) { 98 for (i = nr_slots[type] - 1; i >= 0; i--) {
80 if (tsk_pinned[i] > 0) 99 if (tsk_pinned[i] > 0)
81 return i + 1; 100 return i + 1;
82 } 101 }
@@ -84,7 +103,7 @@ static unsigned int max_task_bp_pinned(int cpu)
84 return 0; 103 return 0;
85} 104}
86 105
87static int task_bp_pinned(struct task_struct *tsk) 106static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
88{ 107{
89 struct perf_event_context *ctx = tsk->perf_event_ctxp; 108 struct perf_event_context *ctx = tsk->perf_event_ctxp;
90 struct list_head *list; 109 struct list_head *list;
@@ -105,7 +124,8 @@ static int task_bp_pinned(struct task_struct *tsk)
105 */ 124 */
106 list_for_each_entry(bp, list, event_entry) { 125 list_for_each_entry(bp, list, event_entry) {
107 if (bp->attr.type == PERF_TYPE_BREAKPOINT) 126 if (bp->attr.type == PERF_TYPE_BREAKPOINT)
108 count++; 127 if (find_slot_idx(bp) == type)
128 count += hw_breakpoint_weight(bp);
109 } 129 }
110 130
111 raw_spin_unlock_irqrestore(&ctx->lock, flags); 131 raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -118,18 +138,19 @@ static int task_bp_pinned(struct task_struct *tsk)
118 * a given cpu (cpu > -1) or in all of them (cpu = -1). 138 * a given cpu (cpu > -1) or in all of them (cpu = -1).
119 */ 139 */
120static void 140static void
121fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) 141fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
142 enum bp_type_idx type)
122{ 143{
123 int cpu = bp->cpu; 144 int cpu = bp->cpu;
124 struct task_struct *tsk = bp->ctx->task; 145 struct task_struct *tsk = bp->ctx->task;
125 146
126 if (cpu >= 0) { 147 if (cpu >= 0) {
127 slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); 148 slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
128 if (!tsk) 149 if (!tsk)
129 slots->pinned += max_task_bp_pinned(cpu); 150 slots->pinned += max_task_bp_pinned(cpu, type);
130 else 151 else
131 slots->pinned += task_bp_pinned(tsk); 152 slots->pinned += task_bp_pinned(tsk, type);
132 slots->flexible = per_cpu(nr_bp_flexible, cpu); 153 slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
133 154
134 return; 155 return;
135 } 156 }
@@ -137,16 +158,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
137 for_each_online_cpu(cpu) { 158 for_each_online_cpu(cpu) {
138 unsigned int nr; 159 unsigned int nr;
139 160
140 nr = per_cpu(nr_cpu_bp_pinned, cpu); 161 nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
141 if (!tsk) 162 if (!tsk)
142 nr += max_task_bp_pinned(cpu); 163 nr += max_task_bp_pinned(cpu, type);
143 else 164 else
144 nr += task_bp_pinned(tsk); 165 nr += task_bp_pinned(tsk, type);
145 166
146 if (nr > slots->pinned) 167 if (nr > slots->pinned)
147 slots->pinned = nr; 168 slots->pinned = nr;
148 169
149 nr = per_cpu(nr_bp_flexible, cpu); 170 nr = per_cpu(nr_bp_flexible[type], cpu);
150 171
151 if (nr > slots->flexible) 172 if (nr > slots->flexible)
152 slots->flexible = nr; 173 slots->flexible = nr;
@@ -154,31 +175,49 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
154} 175}
155 176
156/* 177/*
178 * For now, continue to consider flexible as pinned, until we can
179 * ensure no flexible event can ever be scheduled before a pinned event
180 * in a same cpu.
181 */
182static void
183fetch_this_slot(struct bp_busy_slots *slots, int weight)
184{
185 slots->pinned += weight;
186}
187
188/*
157 * Add a pinned breakpoint for the given task in our constraint table 189 * Add a pinned breakpoint for the given task in our constraint table
158 */ 190 */
159static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) 191static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
192 enum bp_type_idx type, int weight)
160{ 193{
161 unsigned int *tsk_pinned; 194 unsigned int *tsk_pinned;
162 int count = 0; 195 int old_count = 0;
196 int old_idx = 0;
197 int idx = 0;
163 198
164 count = task_bp_pinned(tsk); 199 old_count = task_bp_pinned(tsk, type);
200 old_idx = old_count - 1;
201 idx = old_idx + weight;
165 202
166 tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); 203 tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
167 if (enable) { 204 if (enable) {
168 tsk_pinned[count]++; 205 tsk_pinned[idx]++;
169 if (count > 0) 206 if (old_count > 0)
170 tsk_pinned[count-1]--; 207 tsk_pinned[old_idx]--;
171 } else { 208 } else {
172 tsk_pinned[count]--; 209 tsk_pinned[idx]--;
173 if (count > 0) 210 if (old_count > 0)
174 tsk_pinned[count-1]++; 211 tsk_pinned[old_idx]++;
175 } 212 }
176} 213}
177 214
178/* 215/*
179 * Add/remove the given breakpoint in our constraint table 216 * Add/remove the given breakpoint in our constraint table
180 */ 217 */
181static void toggle_bp_slot(struct perf_event *bp, bool enable) 218static void
219toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
220 int weight)
182{ 221{
183 int cpu = bp->cpu; 222 int cpu = bp->cpu;
184 struct task_struct *tsk = bp->ctx->task; 223 struct task_struct *tsk = bp->ctx->task;
@@ -186,20 +225,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
186 /* Pinned counter task profiling */ 225 /* Pinned counter task profiling */
187 if (tsk) { 226 if (tsk) {
188 if (cpu >= 0) { 227 if (cpu >= 0) {
189 toggle_bp_task_slot(tsk, cpu, enable); 228 toggle_bp_task_slot(tsk, cpu, enable, type, weight);
190 return; 229 return;
191 } 230 }
192 231
193 for_each_online_cpu(cpu) 232 for_each_online_cpu(cpu)
194 toggle_bp_task_slot(tsk, cpu, enable); 233 toggle_bp_task_slot(tsk, cpu, enable, type, weight);
195 return; 234 return;
196 } 235 }
197 236
198 /* Pinned counter cpu profiling */ 237 /* Pinned counter cpu profiling */
199 if (enable) 238 if (enable)
200 per_cpu(nr_cpu_bp_pinned, bp->cpu)++; 239 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
201 else 240 else
202 per_cpu(nr_cpu_bp_pinned, bp->cpu)--; 241 per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
203} 242}
204 243
205/* 244/*
@@ -246,14 +285,29 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
246static int __reserve_bp_slot(struct perf_event *bp) 285static int __reserve_bp_slot(struct perf_event *bp)
247{ 286{
248 struct bp_busy_slots slots = {0}; 287 struct bp_busy_slots slots = {0};
288 enum bp_type_idx type;
289 int weight;
249 290
250 fetch_bp_busy_slots(&slots, bp); 291 /* We couldn't initialize breakpoint constraints on boot */
292 if (!constraints_initialized)
293 return -ENOMEM;
294
295 /* Basic checks */
296 if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
297 bp->attr.bp_type == HW_BREAKPOINT_INVALID)
298 return -EINVAL;
299
300 type = find_slot_idx(bp);
301 weight = hw_breakpoint_weight(bp);
302
303 fetch_bp_busy_slots(&slots, bp, type);
304 fetch_this_slot(&slots, weight);
251 305
252 /* Flexible counters need to keep at least one slot */ 306 /* Flexible counters need to keep at least one slot */
253 if (slots.pinned + (!!slots.flexible) == HBP_NUM) 307 if (slots.pinned + (!!slots.flexible) > nr_slots[type])
254 return -ENOSPC; 308 return -ENOSPC;
255 309
256 toggle_bp_slot(bp, true); 310 toggle_bp_slot(bp, true, type, weight);
257 311
258 return 0; 312 return 0;
259} 313}
@@ -273,7 +327,12 @@ int reserve_bp_slot(struct perf_event *bp)
273 327
274static void __release_bp_slot(struct perf_event *bp) 328static void __release_bp_slot(struct perf_event *bp)
275{ 329{
276 toggle_bp_slot(bp, false); 330 enum bp_type_idx type;
331 int weight;
332
333 type = find_slot_idx(bp);
334 weight = hw_breakpoint_weight(bp);
335 toggle_bp_slot(bp, false, type, weight);
277} 336}
278 337
279void release_bp_slot(struct perf_event *bp) 338void release_bp_slot(struct perf_event *bp)
@@ -308,6 +367,28 @@ int dbg_release_bp_slot(struct perf_event *bp)
308 return 0; 367 return 0;
309} 368}
310 369
370static int validate_hw_breakpoint(struct perf_event *bp)
371{
372 int ret;
373
374 ret = arch_validate_hwbkpt_settings(bp);
375 if (ret)
376 return ret;
377
378 if (arch_check_bp_in_kernelspace(bp)) {
379 if (bp->attr.exclude_kernel)
380 return -EINVAL;
381 /*
382 * Don't let unprivileged users set a breakpoint in the trap
383 * path to avoid trap recursion attacks.
384 */
385 if (!capable(CAP_SYS_ADMIN))
386 return -EPERM;
387 }
388
389 return 0;
390}
391
311int register_perf_hw_breakpoint(struct perf_event *bp) 392int register_perf_hw_breakpoint(struct perf_event *bp)
312{ 393{
313 int ret; 394 int ret;
@@ -316,17 +397,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
316 if (ret) 397 if (ret)
317 return ret; 398 return ret;
318 399
319 /* 400 ret = validate_hw_breakpoint(bp);
320 * Ptrace breakpoints can be temporary perf events only
321 * meant to reserve a slot. In this case, it is created disabled and
322 * we don't want to check the params right now (as we put a null addr)
323 * But perf tools create events as disabled and we want to check
324 * the params for them.
325 * This is a quick hack that will be removed soon, once we remove
326 * the tmp breakpoints from ptrace
327 */
328 if (!bp->attr.disabled || !bp->overflow_handler)
329 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
330 401
331 /* if arch_validate_hwbkpt_settings() fails then release bp slot */ 402 /* if arch_validate_hwbkpt_settings() fails then release bp slot */
332 if (ret) 403 if (ret)
@@ -373,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
373 if (attr->disabled) 444 if (attr->disabled)
374 goto end; 445 goto end;
375 446
376 err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); 447 err = validate_hw_breakpoint(bp);
377 if (!err) 448 if (!err)
378 perf_event_enable(bp); 449 perf_event_enable(bp);
379 450
@@ -480,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
480 551
481static int __init init_hw_breakpoint(void) 552static int __init init_hw_breakpoint(void)
482{ 553{
554 unsigned int **task_bp_pinned;
555 int cpu, err_cpu;
556 int i;
557
558 for (i = 0; i < TYPE_MAX; i++)
559 nr_slots[i] = hw_breakpoint_slots(i);
560
561 for_each_possible_cpu(cpu) {
562 for (i = 0; i < TYPE_MAX; i++) {
563 task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
564 *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
565 GFP_KERNEL);
566 if (!*task_bp_pinned)
567 goto err_alloc;
568 }
569 }
570
571 constraints_initialized = 1;
572
483 return register_die_notifier(&hw_breakpoint_exceptions_nb); 573 return register_die_notifier(&hw_breakpoint_exceptions_nb);
574
575 err_alloc:
576 for_each_possible_cpu(err_cpu) {
577 if (err_cpu == cpu)
578 break;
579 for (i = 0; i < TYPE_MAX; i++)
580 kfree(per_cpu(nr_task_bp_pinned[i], cpu));
581 }
582
583 return -ENOMEM;
484} 584}
485core_initcall(init_hw_breakpoint); 585core_initcall(init_hw_breakpoint);
486 586
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 0ed46f3e51e9..282035f3ae96 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1588,6 +1588,72 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1588 arch_remove_kprobe(p); 1588 arch_remove_kprobe(p);
1589} 1589}
1590 1590
1591/* Disable one kprobe */
1592int __kprobes disable_kprobe(struct kprobe *kp)
1593{
1594 int ret = 0;
1595 struct kprobe *p;
1596
1597 mutex_lock(&kprobe_mutex);
1598
1599 /* Check whether specified probe is valid. */
1600 p = __get_valid_kprobe(kp);
1601 if (unlikely(p == NULL)) {
1602 ret = -EINVAL;
1603 goto out;
1604 }
1605
1606 /* If the probe is already disabled (or gone), just return */
1607 if (kprobe_disabled(kp))
1608 goto out;
1609
1610 kp->flags |= KPROBE_FLAG_DISABLED;
1611 if (p != kp)
1612 /* When kp != p, p is always enabled. */
1613 try_to_disable_aggr_kprobe(p);
1614
1615 if (!kprobes_all_disarmed && kprobe_disabled(p))
1616 disarm_kprobe(p);
1617out:
1618 mutex_unlock(&kprobe_mutex);
1619 return ret;
1620}
1621EXPORT_SYMBOL_GPL(disable_kprobe);
1622
1623/* Enable one kprobe */
1624int __kprobes enable_kprobe(struct kprobe *kp)
1625{
1626 int ret = 0;
1627 struct kprobe *p;
1628
1629 mutex_lock(&kprobe_mutex);
1630
1631 /* Check whether specified probe is valid. */
1632 p = __get_valid_kprobe(kp);
1633 if (unlikely(p == NULL)) {
1634 ret = -EINVAL;
1635 goto out;
1636 }
1637
1638 if (kprobe_gone(kp)) {
1639 /* This kprobe has gone, we couldn't enable it. */
1640 ret = -EINVAL;
1641 goto out;
1642 }
1643
1644 if (p != kp)
1645 kp->flags &= ~KPROBE_FLAG_DISABLED;
1646
1647 if (!kprobes_all_disarmed && kprobe_disabled(p)) {
1648 p->flags &= ~KPROBE_FLAG_DISABLED;
1649 arm_kprobe(p);
1650 }
1651out:
1652 mutex_unlock(&kprobe_mutex);
1653 return ret;
1654}
1655EXPORT_SYMBOL_GPL(enable_kprobe);
1656
1591void __kprobes dump_kprobe(struct kprobe *kp) 1657void __kprobes dump_kprobe(struct kprobe *kp)
1592{ 1658{
1593 printk(KERN_WARNING "Dumping kprobe:\n"); 1659 printk(KERN_WARNING "Dumping kprobe:\n");
@@ -1805,72 +1871,6 @@ static const struct file_operations debugfs_kprobes_operations = {
1805 .release = seq_release, 1871 .release = seq_release,
1806}; 1872};
1807 1873
1808/* Disable one kprobe */
1809int __kprobes disable_kprobe(struct kprobe *kp)
1810{
1811 int ret = 0;
1812 struct kprobe *p;
1813
1814 mutex_lock(&kprobe_mutex);
1815
1816 /* Check whether specified probe is valid. */
1817 p = __get_valid_kprobe(kp);
1818 if (unlikely(p == NULL)) {
1819 ret = -EINVAL;
1820 goto out;
1821 }
1822
1823 /* If the probe is already disabled (or gone), just return */
1824 if (kprobe_disabled(kp))
1825 goto out;
1826
1827 kp->flags |= KPROBE_FLAG_DISABLED;
1828 if (p != kp)
1829 /* When kp != p, p is always enabled. */
1830 try_to_disable_aggr_kprobe(p);
1831
1832 if (!kprobes_all_disarmed && kprobe_disabled(p))
1833 disarm_kprobe(p);
1834out:
1835 mutex_unlock(&kprobe_mutex);
1836 return ret;
1837}
1838EXPORT_SYMBOL_GPL(disable_kprobe);
1839
1840/* Enable one kprobe */
1841int __kprobes enable_kprobe(struct kprobe *kp)
1842{
1843 int ret = 0;
1844 struct kprobe *p;
1845
1846 mutex_lock(&kprobe_mutex);
1847
1848 /* Check whether specified probe is valid. */
1849 p = __get_valid_kprobe(kp);
1850 if (unlikely(p == NULL)) {
1851 ret = -EINVAL;
1852 goto out;
1853 }
1854
1855 if (kprobe_gone(kp)) {
1856 /* This kprobe has gone, we couldn't enable it. */
1857 ret = -EINVAL;
1858 goto out;
1859 }
1860
1861 if (p != kp)
1862 kp->flags &= ~KPROBE_FLAG_DISABLED;
1863
1864 if (!kprobes_all_disarmed && kprobe_disabled(p)) {
1865 p->flags &= ~KPROBE_FLAG_DISABLED;
1866 arm_kprobe(p);
1867 }
1868out:
1869 mutex_unlock(&kprobe_mutex);
1870 return ret;
1871}
1872EXPORT_SYMBOL_GPL(enable_kprobe);
1873
1874static void __kprobes arm_all_kprobes(void) 1874static void __kprobes arm_all_kprobes(void)
1875{ 1875{
1876 struct hlist_head *head; 1876 struct hlist_head *head;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 2594e1ce41cb..e9c759f06c1d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3227,7 +3227,7 @@ void lock_release(struct lockdep_map *lock, int nested,
3227 raw_local_irq_save(flags); 3227 raw_local_irq_save(flags);
3228 check_flags(flags); 3228 check_flags(flags);
3229 current->lockdep_recursion = 1; 3229 current->lockdep_recursion = 1;
3230 trace_lock_release(lock, nested, ip); 3230 trace_lock_release(lock, ip);
3231 __lock_release(lock, nested, ip); 3231 __lock_release(lock, nested, ip);
3232 current->lockdep_recursion = 0; 3232 current->lockdep_recursion = 0;
3233 raw_local_irq_restore(flags); 3233 raw_local_irq_restore(flags);
@@ -3380,7 +3380,7 @@ found_it:
3380 hlock->holdtime_stamp = now; 3380 hlock->holdtime_stamp = now;
3381 } 3381 }
3382 3382
3383 trace_lock_acquired(lock, ip, waittime); 3383 trace_lock_acquired(lock, ip);
3384 3384
3385 stats = get_lock_stats(hlock_class(hlock)); 3385 stats = get_lock_stats(hlock_class(hlock));
3386 if (waittime) { 3386 if (waittime) {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2f3fbf84215a..a4fa381db3c2 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -16,6 +16,7 @@
16#include <linux/file.h> 16#include <linux/file.h>
17#include <linux/poll.h> 17#include <linux/poll.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/hash.h>
19#include <linux/sysfs.h> 20#include <linux/sysfs.h>
20#include <linux/dcache.h> 21#include <linux/dcache.h>
21#include <linux/percpu.h> 22#include <linux/percpu.h>
@@ -82,14 +83,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
82void __weak hw_perf_disable(void) { barrier(); } 83void __weak hw_perf_disable(void) { barrier(); }
83void __weak hw_perf_enable(void) { barrier(); } 84void __weak hw_perf_enable(void) { barrier(); }
84 85
85int __weak
86hw_perf_group_sched_in(struct perf_event *group_leader,
87 struct perf_cpu_context *cpuctx,
88 struct perf_event_context *ctx)
89{
90 return 0;
91}
92
93void __weak perf_event_print_debug(void) { } 86void __weak perf_event_print_debug(void) { }
94 87
95static DEFINE_PER_CPU(int, perf_disable_count); 88static DEFINE_PER_CPU(int, perf_disable_count);
@@ -262,6 +255,18 @@ static void update_event_times(struct perf_event *event)
262 event->total_time_running = run_end - event->tstamp_running; 255 event->total_time_running = run_end - event->tstamp_running;
263} 256}
264 257
258/*
259 * Update total_time_enabled and total_time_running for all events in a group.
260 */
261static void update_group_times(struct perf_event *leader)
262{
263 struct perf_event *event;
264
265 update_event_times(leader);
266 list_for_each_entry(event, &leader->sibling_list, group_entry)
267 update_event_times(event);
268}
269
265static struct list_head * 270static struct list_head *
266ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) 271ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
267{ 272{
@@ -315,8 +320,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
315static void 320static void
316list_del_event(struct perf_event *event, struct perf_event_context *ctx) 321list_del_event(struct perf_event *event, struct perf_event_context *ctx)
317{ 322{
318 struct perf_event *sibling, *tmp;
319
320 if (list_empty(&event->group_entry)) 323 if (list_empty(&event->group_entry))
321 return; 324 return;
322 ctx->nr_events--; 325 ctx->nr_events--;
@@ -329,7 +332,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
329 if (event->group_leader != event) 332 if (event->group_leader != event)
330 event->group_leader->nr_siblings--; 333 event->group_leader->nr_siblings--;
331 334
332 update_event_times(event); 335 update_group_times(event);
333 336
334 /* 337 /*
335 * If event was in error state, then keep it 338 * If event was in error state, then keep it
@@ -340,6 +343,12 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
340 */ 343 */
341 if (event->state > PERF_EVENT_STATE_OFF) 344 if (event->state > PERF_EVENT_STATE_OFF)
342 event->state = PERF_EVENT_STATE_OFF; 345 event->state = PERF_EVENT_STATE_OFF;
346}
347
348static void
349perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
350{
351 struct perf_event *sibling, *tmp;
343 352
344 /* 353 /*
345 * If this was a group event with sibling events then 354 * If this was a group event with sibling events then
@@ -505,18 +514,6 @@ retry:
505} 514}
506 515
507/* 516/*
508 * Update total_time_enabled and total_time_running for all events in a group.
509 */
510static void update_group_times(struct perf_event *leader)
511{
512 struct perf_event *event;
513
514 update_event_times(leader);
515 list_for_each_entry(event, &leader->sibling_list, group_entry)
516 update_event_times(event);
517}
518
519/*
520 * Cross CPU call to disable a performance event 517 * Cross CPU call to disable a performance event
521 */ 518 */
522static void __perf_event_disable(void *info) 519static void __perf_event_disable(void *info)
@@ -640,15 +637,20 @@ group_sched_in(struct perf_event *group_event,
640 struct perf_cpu_context *cpuctx, 637 struct perf_cpu_context *cpuctx,
641 struct perf_event_context *ctx) 638 struct perf_event_context *ctx)
642{ 639{
643 struct perf_event *event, *partial_group; 640 struct perf_event *event, *partial_group = NULL;
641 const struct pmu *pmu = group_event->pmu;
642 bool txn = false;
644 int ret; 643 int ret;
645 644
646 if (group_event->state == PERF_EVENT_STATE_OFF) 645 if (group_event->state == PERF_EVENT_STATE_OFF)
647 return 0; 646 return 0;
648 647
649 ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); 648 /* Check if group transaction availabe */
650 if (ret) 649 if (pmu->start_txn)
651 return ret < 0 ? ret : 0; 650 txn = true;
651
652 if (txn)
653 pmu->start_txn(pmu);
652 654
653 if (event_sched_in(group_event, cpuctx, ctx)) 655 if (event_sched_in(group_event, cpuctx, ctx))
654 return -EAGAIN; 656 return -EAGAIN;
@@ -663,9 +665,19 @@ group_sched_in(struct perf_event *group_event,
663 } 665 }
664 } 666 }
665 667
666 return 0; 668 if (!txn)
669 return 0;
670
671 ret = pmu->commit_txn(pmu);
672 if (!ret) {
673 pmu->cancel_txn(pmu);
674 return 0;
675 }
667 676
668group_error: 677group_error:
678 if (txn)
679 pmu->cancel_txn(pmu);
680
669 /* 681 /*
670 * Groups can be scheduled in as one unit only, so undo any 682 * Groups can be scheduled in as one unit only, so undo any
671 * partial group before returning: 683 * partial group before returning:
@@ -1367,6 +1379,8 @@ void perf_event_task_sched_in(struct task_struct *task)
1367 if (cpuctx->task_ctx == ctx) 1379 if (cpuctx->task_ctx == ctx)
1368 return; 1380 return;
1369 1381
1382 perf_disable();
1383
1370 /* 1384 /*
1371 * We want to keep the following priority order: 1385 * We want to keep the following priority order:
1372 * cpu pinned (that don't need to move), task pinned, 1386 * cpu pinned (that don't need to move), task pinned,
@@ -1379,6 +1393,8 @@ void perf_event_task_sched_in(struct task_struct *task)
1379 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); 1393 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
1380 1394
1381 cpuctx->task_ctx = ctx; 1395 cpuctx->task_ctx = ctx;
1396
1397 perf_enable();
1382} 1398}
1383 1399
1384#define MAX_INTERRUPTS (~0ULL) 1400#define MAX_INTERRUPTS (~0ULL)
@@ -1856,9 +1872,30 @@ int perf_event_release_kernel(struct perf_event *event)
1856{ 1872{
1857 struct perf_event_context *ctx = event->ctx; 1873 struct perf_event_context *ctx = event->ctx;
1858 1874
1875 /*
1876 * Remove from the PMU, can't get re-enabled since we got
1877 * here because the last ref went.
1878 */
1879 perf_event_disable(event);
1880
1859 WARN_ON_ONCE(ctx->parent_ctx); 1881 WARN_ON_ONCE(ctx->parent_ctx);
1860 mutex_lock(&ctx->mutex); 1882 /*
1861 perf_event_remove_from_context(event); 1883 * There are two ways this annotation is useful:
1884 *
1885 * 1) there is a lock recursion from perf_event_exit_task
1886 * see the comment there.
1887 *
1888 * 2) there is a lock-inversion with mmap_sem through
1889 * perf_event_read_group(), which takes faults while
1890 * holding ctx->mutex, however this is called after
1891 * the last filedesc died, so there is no possibility
1892 * to trigger the AB-BA case.
1893 */
1894 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
1895 raw_spin_lock_irq(&ctx->lock);
1896 list_del_event(event, ctx);
1897 perf_destroy_group(event, ctx);
1898 raw_spin_unlock_irq(&ctx->lock);
1862 mutex_unlock(&ctx->mutex); 1899 mutex_unlock(&ctx->mutex);
1863 1900
1864 mutex_lock(&event->owner->perf_event_mutex); 1901 mutex_lock(&event->owner->perf_event_mutex);
@@ -2642,6 +2679,7 @@ static int perf_fasync(int fd, struct file *filp, int on)
2642} 2679}
2643 2680
2644static const struct file_operations perf_fops = { 2681static const struct file_operations perf_fops = {
2682 .llseek = no_llseek,
2645 .release = perf_release, 2683 .release = perf_release,
2646 .read = perf_read, 2684 .read = perf_read,
2647 .poll = perf_poll, 2685 .poll = perf_poll,
@@ -2792,6 +2830,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
2792 2830
2793 2831
2794/* 2832/*
2833 * We assume there is only KVM supporting the callbacks.
2834 * Later on, we might change it to a list if there is
2835 * another virtualization implementation supporting the callbacks.
2836 */
2837struct perf_guest_info_callbacks *perf_guest_cbs;
2838
2839int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
2840{
2841 perf_guest_cbs = cbs;
2842 return 0;
2843}
2844EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
2845
2846int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
2847{
2848 perf_guest_cbs = NULL;
2849 return 0;
2850}
2851EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
2852
2853/*
2795 * Output 2854 * Output
2796 */ 2855 */
2797static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, 2856static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
@@ -3743,7 +3802,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)
3743 .event_id = { 3802 .event_id = {
3744 .header = { 3803 .header = {
3745 .type = PERF_RECORD_MMAP, 3804 .type = PERF_RECORD_MMAP,
3746 .misc = 0, 3805 .misc = PERF_RECORD_MISC_USER,
3747 /* .size */ 3806 /* .size */
3748 }, 3807 },
3749 /* .pid */ 3808 /* .pid */
@@ -3961,36 +4020,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3961 perf_swevent_overflow(event, 0, nmi, data, regs); 4020 perf_swevent_overflow(event, 0, nmi, data, regs);
3962} 4021}
3963 4022
3964static int perf_swevent_is_counting(struct perf_event *event)
3965{
3966 /*
3967 * The event is active, we're good!
3968 */
3969 if (event->state == PERF_EVENT_STATE_ACTIVE)
3970 return 1;
3971
3972 /*
3973 * The event is off/error, not counting.
3974 */
3975 if (event->state != PERF_EVENT_STATE_INACTIVE)
3976 return 0;
3977
3978 /*
3979 * The event is inactive, if the context is active
3980 * we're part of a group that didn't make it on the 'pmu',
3981 * not counting.
3982 */
3983 if (event->ctx->is_active)
3984 return 0;
3985
3986 /*
3987 * We're inactive and the context is too, this means the
3988 * task is scheduled out, we're counting events that happen
3989 * to us, like migration events.
3990 */
3991 return 1;
3992}
3993
3994static int perf_tp_event_match(struct perf_event *event, 4023static int perf_tp_event_match(struct perf_event *event,
3995 struct perf_sample_data *data); 4024 struct perf_sample_data *data);
3996 4025
@@ -4014,12 +4043,6 @@ static int perf_swevent_match(struct perf_event *event,
4014 struct perf_sample_data *data, 4043 struct perf_sample_data *data,
4015 struct pt_regs *regs) 4044 struct pt_regs *regs)
4016{ 4045{
4017 if (event->cpu != -1 && event->cpu != smp_processor_id())
4018 return 0;
4019
4020 if (!perf_swevent_is_counting(event))
4021 return 0;
4022
4023 if (event->attr.type != type) 4046 if (event->attr.type != type)
4024 return 0; 4047 return 0;
4025 4048
@@ -4036,18 +4059,53 @@ static int perf_swevent_match(struct perf_event *event,
4036 return 1; 4059 return 1;
4037} 4060}
4038 4061
4039static void perf_swevent_ctx_event(struct perf_event_context *ctx, 4062static inline u64 swevent_hash(u64 type, u32 event_id)
4040 enum perf_type_id type, 4063{
4041 u32 event_id, u64 nr, int nmi, 4064 u64 val = event_id | (type << 32);
4042 struct perf_sample_data *data, 4065
4043 struct pt_regs *regs) 4066 return hash_64(val, SWEVENT_HLIST_BITS);
4067}
4068
4069static struct hlist_head *
4070find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id)
4071{
4072 u64 hash;
4073 struct swevent_hlist *hlist;
4074
4075 hash = swevent_hash(type, event_id);
4076
4077 hlist = rcu_dereference(ctx->swevent_hlist);
4078 if (!hlist)
4079 return NULL;
4080
4081 return &hlist->heads[hash];
4082}
4083
4084static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4085 u64 nr, int nmi,
4086 struct perf_sample_data *data,
4087 struct pt_regs *regs)
4044{ 4088{
4089 struct perf_cpu_context *cpuctx;
4045 struct perf_event *event; 4090 struct perf_event *event;
4091 struct hlist_node *node;
4092 struct hlist_head *head;
4046 4093
4047 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 4094 cpuctx = &__get_cpu_var(perf_cpu_context);
4095
4096 rcu_read_lock();
4097
4098 head = find_swevent_head(cpuctx, type, event_id);
4099
4100 if (!head)
4101 goto end;
4102
4103 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4048 if (perf_swevent_match(event, type, event_id, data, regs)) 4104 if (perf_swevent_match(event, type, event_id, data, regs))
4049 perf_swevent_add(event, nr, nmi, data, regs); 4105 perf_swevent_add(event, nr, nmi, data, regs);
4050 } 4106 }
4107end:
4108 rcu_read_unlock();
4051} 4109}
4052 4110
4053int perf_swevent_get_recursion_context(void) 4111int perf_swevent_get_recursion_context(void)
@@ -4085,27 +4143,6 @@ void perf_swevent_put_recursion_context(int rctx)
4085} 4143}
4086EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); 4144EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
4087 4145
4088static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4089 u64 nr, int nmi,
4090 struct perf_sample_data *data,
4091 struct pt_regs *regs)
4092{
4093 struct perf_cpu_context *cpuctx;
4094 struct perf_event_context *ctx;
4095
4096 cpuctx = &__get_cpu_var(perf_cpu_context);
4097 rcu_read_lock();
4098 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
4099 nr, nmi, data, regs);
4100 /*
4101 * doesn't really matter which of the child contexts the
4102 * events ends up in.
4103 */
4104 ctx = rcu_dereference(current->perf_event_ctxp);
4105 if (ctx)
4106 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
4107 rcu_read_unlock();
4108}
4109 4146
4110void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4147void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4111 struct pt_regs *regs, u64 addr) 4148 struct pt_regs *regs, u64 addr)
@@ -4131,16 +4168,28 @@ static void perf_swevent_read(struct perf_event *event)
4131static int perf_swevent_enable(struct perf_event *event) 4168static int perf_swevent_enable(struct perf_event *event)
4132{ 4169{
4133 struct hw_perf_event *hwc = &event->hw; 4170 struct hw_perf_event *hwc = &event->hw;
4171 struct perf_cpu_context *cpuctx;
4172 struct hlist_head *head;
4173
4174 cpuctx = &__get_cpu_var(perf_cpu_context);
4134 4175
4135 if (hwc->sample_period) { 4176 if (hwc->sample_period) {
4136 hwc->last_period = hwc->sample_period; 4177 hwc->last_period = hwc->sample_period;
4137 perf_swevent_set_period(event); 4178 perf_swevent_set_period(event);
4138 } 4179 }
4180
4181 head = find_swevent_head(cpuctx, event->attr.type, event->attr.config);
4182 if (WARN_ON_ONCE(!head))
4183 return -EINVAL;
4184
4185 hlist_add_head_rcu(&event->hlist_entry, head);
4186
4139 return 0; 4187 return 0;
4140} 4188}
4141 4189
4142static void perf_swevent_disable(struct perf_event *event) 4190static void perf_swevent_disable(struct perf_event *event)
4143{ 4191{
4192 hlist_del_rcu(&event->hlist_entry);
4144} 4193}
4145 4194
4146static const struct pmu perf_ops_generic = { 4195static const struct pmu perf_ops_generic = {
@@ -4168,15 +4217,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4168 perf_sample_data_init(&data, 0); 4217 perf_sample_data_init(&data, 0);
4169 data.period = event->hw.last_period; 4218 data.period = event->hw.last_period;
4170 regs = get_irq_regs(); 4219 regs = get_irq_regs();
4171 /*
4172 * In case we exclude kernel IPs or are somehow not in interrupt
4173 * context, provide the next best thing, the user IP.
4174 */
4175 if ((event->attr.exclude_kernel || !regs) &&
4176 !event->attr.exclude_user)
4177 regs = task_pt_regs(current);
4178 4220
4179 if (regs) { 4221 if (regs && !perf_exclude_event(event, regs)) {
4180 if (!(event->attr.exclude_idle && current->pid == 0)) 4222 if (!(event->attr.exclude_idle && current->pid == 0))
4181 if (perf_event_overflow(event, 0, &data, regs)) 4223 if (perf_event_overflow(event, 0, &data, regs))
4182 ret = HRTIMER_NORESTART; 4224 ret = HRTIMER_NORESTART;
@@ -4324,6 +4366,105 @@ static const struct pmu perf_ops_task_clock = {
4324 .read = task_clock_perf_event_read, 4366 .read = task_clock_perf_event_read,
4325}; 4367};
4326 4368
4369static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4370{
4371 struct swevent_hlist *hlist;
4372
4373 hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
4374 kfree(hlist);
4375}
4376
4377static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
4378{
4379 struct swevent_hlist *hlist;
4380
4381 if (!cpuctx->swevent_hlist)
4382 return;
4383
4384 hlist = cpuctx->swevent_hlist;
4385 rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
4386 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
4387}
4388
4389static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
4390{
4391 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
4392
4393 mutex_lock(&cpuctx->hlist_mutex);
4394
4395 if (!--cpuctx->hlist_refcount)
4396 swevent_hlist_release(cpuctx);
4397
4398 mutex_unlock(&cpuctx->hlist_mutex);
4399}
4400
4401static void swevent_hlist_put(struct perf_event *event)
4402{
4403 int cpu;
4404
4405 if (event->cpu != -1) {
4406 swevent_hlist_put_cpu(event, event->cpu);
4407 return;
4408 }
4409
4410 for_each_possible_cpu(cpu)
4411 swevent_hlist_put_cpu(event, cpu);
4412}
4413
4414static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4415{
4416 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
4417 int err = 0;
4418
4419 mutex_lock(&cpuctx->hlist_mutex);
4420
4421 if (!cpuctx->swevent_hlist && cpu_online(cpu)) {
4422 struct swevent_hlist *hlist;
4423
4424 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
4425 if (!hlist) {
4426 err = -ENOMEM;
4427 goto exit;
4428 }
4429 rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
4430 }
4431 cpuctx->hlist_refcount++;
4432 exit:
4433 mutex_unlock(&cpuctx->hlist_mutex);
4434
4435 return err;
4436}
4437
4438static int swevent_hlist_get(struct perf_event *event)
4439{
4440 int err;
4441 int cpu, failed_cpu;
4442
4443 if (event->cpu != -1)
4444 return swevent_hlist_get_cpu(event, event->cpu);
4445
4446 get_online_cpus();
4447 for_each_possible_cpu(cpu) {
4448 err = swevent_hlist_get_cpu(event, cpu);
4449 if (err) {
4450 failed_cpu = cpu;
4451 goto fail;
4452 }
4453 }
4454 put_online_cpus();
4455
4456 return 0;
4457 fail:
4458 for_each_possible_cpu(cpu) {
4459 if (cpu == failed_cpu)
4460 break;
4461 swevent_hlist_put_cpu(event, cpu);
4462 }
4463
4464 put_online_cpus();
4465 return err;
4466}
4467
4327#ifdef CONFIG_EVENT_TRACING 4468#ifdef CONFIG_EVENT_TRACING
4328 4469
4329void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4470void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
@@ -4357,10 +4498,13 @@ static int perf_tp_event_match(struct perf_event *event,
4357static void tp_perf_event_destroy(struct perf_event *event) 4498static void tp_perf_event_destroy(struct perf_event *event)
4358{ 4499{
4359 perf_trace_disable(event->attr.config); 4500 perf_trace_disable(event->attr.config);
4501 swevent_hlist_put(event);
4360} 4502}
4361 4503
4362static const struct pmu *tp_perf_event_init(struct perf_event *event) 4504static const struct pmu *tp_perf_event_init(struct perf_event *event)
4363{ 4505{
4506 int err;
4507
4364 /* 4508 /*
4365 * Raw tracepoint data is a severe data leak, only allow root to 4509 * Raw tracepoint data is a severe data leak, only allow root to
4366 * have these. 4510 * have these.
@@ -4374,6 +4518,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4374 return NULL; 4518 return NULL;
4375 4519
4376 event->destroy = tp_perf_event_destroy; 4520 event->destroy = tp_perf_event_destroy;
4521 err = swevent_hlist_get(event);
4522 if (err) {
4523 perf_trace_disable(event->attr.config);
4524 return ERR_PTR(err);
4525 }
4377 4526
4378 return &perf_ops_generic; 4527 return &perf_ops_generic;
4379} 4528}
@@ -4474,6 +4623,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
4474 WARN_ON(event->parent); 4623 WARN_ON(event->parent);
4475 4624
4476 atomic_dec(&perf_swevent_enabled[event_id]); 4625 atomic_dec(&perf_swevent_enabled[event_id]);
4626 swevent_hlist_put(event);
4477} 4627}
4478 4628
4479static const struct pmu *sw_perf_event_init(struct perf_event *event) 4629static const struct pmu *sw_perf_event_init(struct perf_event *event)
@@ -4512,6 +4662,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4512 case PERF_COUNT_SW_ALIGNMENT_FAULTS: 4662 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4513 case PERF_COUNT_SW_EMULATION_FAULTS: 4663 case PERF_COUNT_SW_EMULATION_FAULTS:
4514 if (!event->parent) { 4664 if (!event->parent) {
4665 int err;
4666
4667 err = swevent_hlist_get(event);
4668 if (err)
4669 return ERR_PTR(err);
4670
4515 atomic_inc(&perf_swevent_enabled[event_id]); 4671 atomic_inc(&perf_swevent_enabled[event_id]);
4516 event->destroy = sw_perf_event_destroy; 4672 event->destroy = sw_perf_event_destroy;
4517 } 4673 }
@@ -4897,7 +5053,7 @@ err_fput_free_put_context:
4897 5053
4898err_free_put_context: 5054err_free_put_context:
4899 if (err < 0) 5055 if (err < 0)
4900 kfree(event); 5056 free_event(event);
4901 5057
4902err_put_context: 5058err_put_context:
4903 if (err < 0) 5059 if (err < 0)
@@ -5176,7 +5332,7 @@ void perf_event_exit_task(struct task_struct *child)
5176 * 5332 *
5177 * But since its the parent context it won't be the same instance. 5333 * But since its the parent context it won't be the same instance.
5178 */ 5334 */
5179 mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); 5335 mutex_lock(&child_ctx->mutex);
5180 5336
5181again: 5337again:
5182 list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, 5338 list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
@@ -5384,6 +5540,7 @@ static void __init perf_event_init_all_cpus(void)
5384 5540
5385 for_each_possible_cpu(cpu) { 5541 for_each_possible_cpu(cpu) {
5386 cpuctx = &per_cpu(perf_cpu_context, cpu); 5542 cpuctx = &per_cpu(perf_cpu_context, cpu);
5543 mutex_init(&cpuctx->hlist_mutex);
5387 __perf_event_init_context(&cpuctx->ctx, NULL); 5544 __perf_event_init_context(&cpuctx->ctx, NULL);
5388 } 5545 }
5389} 5546}
@@ -5397,6 +5554,16 @@ static void __cpuinit perf_event_init_cpu(int cpu)
5397 spin_lock(&perf_resource_lock); 5554 spin_lock(&perf_resource_lock);
5398 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; 5555 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
5399 spin_unlock(&perf_resource_lock); 5556 spin_unlock(&perf_resource_lock);
5557
5558 mutex_lock(&cpuctx->hlist_mutex);
5559 if (cpuctx->hlist_refcount > 0) {
5560 struct swevent_hlist *hlist;
5561
5562 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
5563 WARN_ON_ONCE(!hlist);
5564 rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
5565 }
5566 mutex_unlock(&cpuctx->hlist_mutex);
5400} 5567}
5401 5568
5402#ifdef CONFIG_HOTPLUG_CPU 5569#ifdef CONFIG_HOTPLUG_CPU
@@ -5416,6 +5583,10 @@ static void perf_event_exit_cpu(int cpu)
5416 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 5583 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
5417 struct perf_event_context *ctx = &cpuctx->ctx; 5584 struct perf_event_context *ctx = &cpuctx->ctx;
5418 5585
5586 mutex_lock(&cpuctx->hlist_mutex);
5587 swevent_hlist_release(cpuctx);
5588 mutex_unlock(&cpuctx->hlist_mutex);
5589
5419 mutex_lock(&ctx->mutex); 5590 mutex_lock(&ctx->mutex);
5420 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); 5591 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
5421 mutex_unlock(&ctx->mutex); 5592 mutex_unlock(&ctx->mutex);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 42ad8ae729a0..9fb51237b18c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -76,7 +76,6 @@ void __ptrace_unlink(struct task_struct *child)
76 child->parent = child->real_parent; 76 child->parent = child->real_parent;
77 list_del_init(&child->ptrace_entry); 77 list_del_init(&child->ptrace_entry);
78 78
79 arch_ptrace_untrace(child);
80 if (task_is_traced(child)) 79 if (task_is_traced(child))
81 ptrace_untrace(child); 80 ptrace_untrace(child);
82} 81}
diff --git a/kernel/sched.c b/kernel/sched.c
index b531d7934083..78554dd0d1a4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -323,6 +323,15 @@ static inline struct task_group *task_group(struct task_struct *p)
323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
324static inline void set_task_rq(struct task_struct *p, unsigned int cpu) 324static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
325{ 325{
326 /*
327 * Strictly speaking this rcu_read_lock() is not needed since the
328 * task_group is tied to the cgroup, which in turn can never go away
329 * as long as there are tasks attached to it.
330 *
331 * However since task_group() uses task_subsys_state() which is an
332 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
333 */
334 rcu_read_lock();
326#ifdef CONFIG_FAIR_GROUP_SCHED 335#ifdef CONFIG_FAIR_GROUP_SCHED
327 p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; 336 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
328 p->se.parent = task_group(p)->se[cpu]; 337 p->se.parent = task_group(p)->se[cpu];
@@ -332,6 +341,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
332 p->rt.rt_rq = task_group(p)->rt_rq[cpu]; 341 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
333 p->rt.parent = task_group(p)->rt_se[cpu]; 342 p->rt.parent = task_group(p)->rt_se[cpu];
334#endif 343#endif
344 rcu_read_unlock();
335} 345}
336 346
337#else 347#else
@@ -2058,49 +2068,6 @@ static bool migrate_task(struct task_struct *p, int dest_cpu)
2058} 2068}
2059 2069
2060/* 2070/*
2061 * wait_task_context_switch - wait for a thread to complete at least one
2062 * context switch.
2063 *
2064 * @p must not be current.
2065 */
2066void wait_task_context_switch(struct task_struct *p)
2067{
2068 unsigned long nvcsw, nivcsw, flags;
2069 int running;
2070 struct rq *rq;
2071
2072 nvcsw = p->nvcsw;
2073 nivcsw = p->nivcsw;
2074 for (;;) {
2075 /*
2076 * The runqueue is assigned before the actual context
2077 * switch. We need to take the runqueue lock.
2078 *
2079 * We could check initially without the lock but it is
2080 * very likely that we need to take the lock in every
2081 * iteration.
2082 */
2083 rq = task_rq_lock(p, &flags);
2084 running = task_running(rq, p);
2085 task_rq_unlock(rq, &flags);
2086
2087 if (likely(!running))
2088 break;
2089 /*
2090 * The switch count is incremented before the actual
2091 * context switch. We thus wait for two switches to be
2092 * sure at least one completed.
2093 */
2094 if ((p->nvcsw - nvcsw) > 1)
2095 break;
2096 if ((p->nivcsw - nivcsw) > 1)
2097 break;
2098
2099 cpu_relax();
2100 }
2101}
2102
2103/*
2104 * wait_task_inactive - wait for a thread to unschedule. 2071 * wait_task_inactive - wait for a thread to unschedule.
2105 * 2072 *
2106 * If @match_state is nonzero, it's the @p->state value just checked and 2073 * If @match_state is nonzero, it's the @p->state value just checked and
@@ -3724,7 +3691,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3724 * the mutex owner just released it and exited. 3691 * the mutex owner just released it and exited.
3725 */ 3692 */
3726 if (probe_kernel_address(&owner->cpu, cpu)) 3693 if (probe_kernel_address(&owner->cpu, cpu))
3727 goto out; 3694 return 0;
3728#else 3695#else
3729 cpu = owner->cpu; 3696 cpu = owner->cpu;
3730#endif 3697#endif
@@ -3734,14 +3701,14 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3734 * the cpu field may no longer be valid. 3701 * the cpu field may no longer be valid.
3735 */ 3702 */
3736 if (cpu >= nr_cpumask_bits) 3703 if (cpu >= nr_cpumask_bits)
3737 goto out; 3704 return 0;
3738 3705
3739 /* 3706 /*
3740 * We need to validate that we can do a 3707 * We need to validate that we can do a
3741 * get_cpu() and that we have the percpu area. 3708 * get_cpu() and that we have the percpu area.
3742 */ 3709 */
3743 if (!cpu_online(cpu)) 3710 if (!cpu_online(cpu))
3744 goto out; 3711 return 0;
3745 3712
3746 rq = cpu_rq(cpu); 3713 rq = cpu_rq(cpu);
3747 3714
@@ -3760,7 +3727,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3760 3727
3761 cpu_relax(); 3728 cpu_relax();
3762 } 3729 }
3763out: 3730
3764 return 1; 3731 return 1;
3765} 3732}
3766#endif 3733#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 13e13d428cd3..8b1797c4545b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -44,9 +44,6 @@ config HAVE_FTRACE_MCOUNT_RECORD
44 help 44 help
45 See Documentation/trace/ftrace-design.txt 45 See Documentation/trace/ftrace-design.txt
46 46
47config HAVE_HW_BRANCH_TRACER
48 bool
49
50config HAVE_SYSCALL_TRACEPOINTS 47config HAVE_SYSCALL_TRACEPOINTS
51 bool 48 bool
52 help 49 help
@@ -374,14 +371,6 @@ config STACK_TRACER
374 371
375 Say N if unsure. 372 Say N if unsure.
376 373
377config HW_BRANCH_TRACER
378 depends on HAVE_HW_BRANCH_TRACER
379 bool "Trace hw branches"
380 select GENERIC_TRACER
381 help
382 This tracer records all branches on the system in a circular
383 buffer, giving access to the last N branches for each cpu.
384
385config KMEMTRACE 374config KMEMTRACE
386 bool "Trace SLAB allocations" 375 bool "Trace SLAB allocations"
387 select GENERIC_TRACER 376 select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 78edc6490038..ffb1a5b0550e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 44obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
46obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 45obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
47obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 46obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 40cd1718fb1b..2cd96399463f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -34,7 +34,6 @@ enum trace_type {
34 TRACE_GRAPH_RET, 34 TRACE_GRAPH_RET,
35 TRACE_GRAPH_ENT, 35 TRACE_GRAPH_ENT,
36 TRACE_USER_STACK, 36 TRACE_USER_STACK,
37 TRACE_HW_BRANCHES,
38 TRACE_KMEM_ALLOC, 37 TRACE_KMEM_ALLOC,
39 TRACE_KMEM_FREE, 38 TRACE_KMEM_FREE,
40 TRACE_BLK, 39 TRACE_BLK,
@@ -103,29 +102,17 @@ struct syscall_trace_exit {
103 long ret; 102 long ret;
104}; 103};
105 104
106struct kprobe_trace_entry { 105struct kprobe_trace_entry_head {
107 struct trace_entry ent; 106 struct trace_entry ent;
108 unsigned long ip; 107 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111}; 108};
112 109
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ 110struct kretprobe_trace_entry_head {
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent; 111 struct trace_entry ent;
119 unsigned long func; 112 unsigned long func;
120 unsigned long ret_ip; 113 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123}; 114};
124 115
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
129/* 116/*
130 * trace_flag_type is an enumeration that holds different 117 * trace_flag_type is an enumeration that holds different
131 * states when a trace occurs. These are: 118 * states when a trace occurs. These are:
@@ -229,7 +216,6 @@ extern void __ftrace_bad_type(void);
229 TRACE_GRAPH_ENT); \ 216 TRACE_GRAPH_ENT); \
230 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 217 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
231 TRACE_GRAPH_RET); \ 218 TRACE_GRAPH_RET); \
232 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
233 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ 219 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
234 TRACE_KMEM_ALLOC); \ 220 TRACE_KMEM_ALLOC); \
235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 221 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
@@ -470,8 +456,6 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
470 struct trace_array *tr); 456 struct trace_array *tr);
471extern int trace_selftest_startup_branch(struct tracer *trace, 457extern int trace_selftest_startup_branch(struct tracer *trace,
472 struct trace_array *tr); 458 struct trace_array *tr);
473extern int trace_selftest_startup_hw_branches(struct tracer *trace,
474 struct trace_array *tr);
475extern int trace_selftest_startup_ksym(struct tracer *trace, 459extern int trace_selftest_startup_ksym(struct tracer *trace,
476 struct trace_array *tr); 460 struct trace_array *tr);
477#endif /* CONFIG_FTRACE_STARTUP_TEST */ 461#endif /* CONFIG_FTRACE_STARTUP_TEST */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c16a08f399df..dc008c1240da 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -318,18 +318,6 @@ FTRACE_ENTRY(branch, trace_branch,
318 __entry->func, __entry->file, __entry->correct) 318 __entry->func, __entry->file, __entry->correct)
319); 319);
320 320
321FTRACE_ENTRY(hw_branch, hw_branch_entry,
322
323 TRACE_HW_BRANCHES,
324
325 F_STRUCT(
326 __field( u64, from )
327 __field( u64, to )
328 ),
329
330 F_printk("from: %llx to: %llx", __entry->from, __entry->to)
331);
332
333FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, 321FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
334 322
335 TRACE_KMEM_ALLOC, 323 TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 239ea5d77d68..57bb1bb32999 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1400,7 +1400,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1400 } 1400 }
1401 1401
1402 err = -EINVAL; 1402 err = -EINVAL;
1403 if (!call) 1403 if (&call->list == &ftrace_events)
1404 goto out_unlock; 1404 goto out_unlock;
1405 1405
1406 err = -EEXIST; 1406 err = -EEXIST;
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
deleted file mode 100644
index 7b97000745f5..000000000000
--- a/kernel/trace/trace_hw_branches.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * h/w branch tracer for x86 based on BTS
3 *
4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */
7#include <linux/kallsyms.h>
8#include <linux/debugfs.h>
9#include <linux/ftrace.h>
10#include <linux/module.h>
11#include <linux/cpu.h>
12#include <linux/smp.h>
13#include <linux/fs.h>
14
15#include <asm/ds.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20
21#define BTS_BUFFER_SIZE (1 << 13)
22
23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25
26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27
28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly;
30static struct trace_array *hw_branch_trace __read_mostly;
31
32
33static void bts_trace_init_cpu(int cpu)
34{
35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
39
40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
41 per_cpu(hwb_tracer, cpu) = NULL;
42}
43
44static int bts_trace_init(struct trace_array *tr)
45{
46 int cpu;
47
48 hw_branch_trace = tr;
49 trace_hw_branches_enabled = 0;
50
51 get_online_cpus();
52 for_each_online_cpu(cpu) {
53 bts_trace_init_cpu(cpu);
54
55 if (likely(per_cpu(hwb_tracer, cpu)))
56 trace_hw_branches_enabled = 1;
57 }
58 trace_hw_branches_suspended = 0;
59 put_online_cpus();
60
61 /* If we could not enable tracing on a single cpu, we fail. */
62 return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
63}
64
65static void bts_trace_reset(struct trace_array *tr)
66{
67 int cpu;
68
69 get_online_cpus();
70 for_each_online_cpu(cpu) {
71 if (likely(per_cpu(hwb_tracer, cpu))) {
72 ds_release_bts(per_cpu(hwb_tracer, cpu));
73 per_cpu(hwb_tracer, cpu) = NULL;
74 }
75 }
76 trace_hw_branches_enabled = 0;
77 trace_hw_branches_suspended = 0;
78 put_online_cpus();
79}
80
81static void bts_trace_start(struct trace_array *tr)
82{
83 int cpu;
84
85 get_online_cpus();
86 for_each_online_cpu(cpu)
87 if (likely(per_cpu(hwb_tracer, cpu)))
88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
89 trace_hw_branches_suspended = 0;
90 put_online_cpus();
91}
92
93static void bts_trace_stop(struct trace_array *tr)
94{
95 int cpu;
96
97 get_online_cpus();
98 for_each_online_cpu(cpu)
99 if (likely(per_cpu(hwb_tracer, cpu)))
100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
101 trace_hw_branches_suspended = 1;
102 put_online_cpus();
103}
104
105static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
106 unsigned long action, void *hcpu)
107{
108 int cpu = (long)hcpu;
109
110 switch (action) {
111 case CPU_ONLINE:
112 case CPU_DOWN_FAILED:
113 /* The notification is sent with interrupts enabled. */
114 if (trace_hw_branches_enabled) {
115 bts_trace_init_cpu(cpu);
116
117 if (trace_hw_branches_suspended &&
118 likely(per_cpu(hwb_tracer, cpu)))
119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
120 }
121 break;
122
123 case CPU_DOWN_PREPARE:
124 /* The notification is sent with interrupts enabled. */
125 if (likely(per_cpu(hwb_tracer, cpu))) {
126 ds_release_bts(per_cpu(hwb_tracer, cpu));
127 per_cpu(hwb_tracer, cpu) = NULL;
128 }
129 }
130
131 return NOTIFY_DONE;
132}
133
134static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
135 .notifier_call = bts_hotcpu_handler
136};
137
138static void bts_trace_print_header(struct seq_file *m)
139{
140 seq_puts(m, "# CPU# TO <- FROM\n");
141}
142
143static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
144{
145 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
146 struct trace_entry *entry = iter->ent;
147 struct trace_seq *seq = &iter->seq;
148 struct hw_branch_entry *it;
149
150 trace_assign_type(it, entry);
151
152 if (entry->type == TRACE_HW_BRANCHES) {
153 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
154 seq_print_ip_sym(seq, it->to, symflags) &&
155 trace_seq_printf(seq, "\t <- ") &&
156 seq_print_ip_sym(seq, it->from, symflags) &&
157 trace_seq_printf(seq, "\n"))
158 return TRACE_TYPE_HANDLED;
159 return TRACE_TYPE_PARTIAL_LINE;
160 }
161 return TRACE_TYPE_UNHANDLED;
162}
163
164void trace_hw_branch(u64 from, u64 to)
165{
166 struct ftrace_event_call *call = &event_hw_branch;
167 struct trace_array *tr = hw_branch_trace;
168 struct ring_buffer_event *event;
169 struct ring_buffer *buf;
170 struct hw_branch_entry *entry;
171 unsigned long irq1;
172 int cpu;
173
174 if (unlikely(!tr))
175 return;
176
177 if (unlikely(!trace_hw_branches_enabled))
178 return;
179
180 local_irq_save(irq1);
181 cpu = raw_smp_processor_id();
182 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
183 goto out;
184
185 buf = tr->buffer;
186 event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
187 sizeof(*entry), 0, 0);
188 if (!event)
189 goto out;
190 entry = ring_buffer_event_data(event);
191 tracing_generic_entry_update(&entry->ent, 0, from);
192 entry->ent.type = TRACE_HW_BRANCHES;
193 entry->from = from;
194 entry->to = to;
195 if (!filter_check_discard(call, entry, buf, event))
196 trace_buffer_unlock_commit(buf, event, 0, 0);
197
198 out:
199 atomic_dec(&tr->data[cpu]->disabled);
200 local_irq_restore(irq1);
201}
202
203static void trace_bts_at(const struct bts_trace *trace, void *at)
204{
205 struct bts_struct bts;
206 int err = 0;
207
208 WARN_ON_ONCE(!trace->read);
209 if (!trace->read)
210 return;
211
212 err = trace->read(this_tracer, at, &bts);
213 if (err < 0)
214 return;
215
216 switch (bts.qualifier) {
217 case BTS_BRANCH:
218 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
219 break;
220 }
221}
222
223/*
224 * Collect the trace on the current cpu and write it into the ftrace buffer.
225 *
226 * pre: tracing must be suspended on the current cpu
227 */
228static void trace_bts_cpu(void *arg)
229{
230 struct trace_array *tr = (struct trace_array *)arg;
231 const struct bts_trace *trace;
232 unsigned char *at;
233
234 if (unlikely(!tr))
235 return;
236
237 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
238 return;
239
240 if (unlikely(!this_tracer))
241 return;
242
243 trace = ds_read_bts(this_tracer);
244 if (!trace)
245 return;
246
247 for (at = trace->ds.top; (void *)at < trace->ds.end;
248 at += trace->ds.size)
249 trace_bts_at(trace, at);
250
251 for (at = trace->ds.begin; (void *)at < trace->ds.top;
252 at += trace->ds.size)
253 trace_bts_at(trace, at);
254}
255
256static void trace_bts_prepare(struct trace_iterator *iter)
257{
258 int cpu;
259
260 get_online_cpus();
261 for_each_online_cpu(cpu)
262 if (likely(per_cpu(hwb_tracer, cpu)))
263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
264 /*
265 * We need to collect the trace on the respective cpu since ftrace
266 * implicitly adds the record for the current cpu.
267 * Once that is more flexible, we could collect the data from any cpu.
268 */
269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
270
271 for_each_online_cpu(cpu)
272 if (likely(per_cpu(hwb_tracer, cpu)))
273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
274 put_online_cpus();
275}
276
277static void trace_bts_close(struct trace_iterator *iter)
278{
279 tracing_reset_online_cpus(iter->tr);
280}
281
282void trace_hw_branch_oops(void)
283{
284 if (this_tracer) {
285 ds_suspend_bts_noirq(this_tracer);
286 trace_bts_cpu(hw_branch_trace);
287 ds_resume_bts_noirq(this_tracer);
288 }
289}
290
291struct tracer bts_tracer __read_mostly =
292{
293 .name = "hw-branch-tracer",
294 .init = bts_trace_init,
295 .reset = bts_trace_reset,
296 .print_header = bts_trace_print_header,
297 .print_line = bts_trace_print_line,
298 .start = bts_trace_start,
299 .stop = bts_trace_stop,
300 .open = trace_bts_prepare,
301 .close = trace_bts_close,
302#ifdef CONFIG_FTRACE_SELFTEST
303 .selftest = trace_selftest_startup_hw_branches,
304#endif /* CONFIG_FTRACE_SELFTEST */
305};
306
307__init static int init_bts_trace(void)
308{
309 register_hotcpu_notifier(&bts_hotcpu_notifier);
310 return register_tracer(&bts_tracer);
311}
312device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 0e3ded64cdb7..9a082bba9537 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -29,6 +29,8 @@
29#include <linux/ctype.h> 29#include <linux/ctype.h>
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <asm/bitsperlong.h>
32 34
33#include "trace.h" 35#include "trace.h"
34#include "trace_output.h" 36#include "trace_output.h"
@@ -40,7 +42,6 @@
40 42
41/* Reserved field names */ 43/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip" 44#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip" 45#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func" 46#define FIELD_STRING_FUNC "__probe_func"
46 47
@@ -52,56 +53,102 @@ const char *reserved_field_names[] = {
52 "common_tgid", 53 "common_tgid",
53 "common_lock_depth", 54 "common_lock_depth",
54 FIELD_STRING_IP, 55 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP, 56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC, 57 FIELD_STRING_FUNC,
58}; 58};
59 59
60struct fetch_func { 60/* Printing function type */
61 unsigned long (*func)(struct pt_regs *, void *); 61typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *);
62#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
63#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
64
65/* Printing in basic type function template */
66#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
67static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
68 const char *name, void *data)\
69{ \
70 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
71} \
72static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
73
74DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
75DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
76DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
77DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
78DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
82
83/* Data fetch function type */
84typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
85
86struct fetch_param {
87 fetch_func_t fn;
62 void *data; 88 void *data;
63}; 89};
64 90
65static __kprobes unsigned long call_fetch(struct fetch_func *f, 91static __kprobes void call_fetch(struct fetch_param *fprm,
66 struct pt_regs *regs) 92 struct pt_regs *regs, void *dest)
67{ 93{
68 return f->func(regs, f->data); 94 return fprm->fn(regs, fprm->data, dest);
69} 95}
70 96
71/* fetch handlers */ 97#define FETCH_FUNC_NAME(kind, type) fetch_##kind##_##type
72static __kprobes unsigned long fetch_register(struct pt_regs *regs, 98/*
73 void *offset) 99 * Define macro for basic types - we don't need to define s* types, because
74{ 100 * we have to care only about bitwidth at recording time.
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset)); 101 */
102#define DEFINE_BASIC_FETCH_FUNCS(kind) \
103DEFINE_FETCH_##kind(u8) \
104DEFINE_FETCH_##kind(u16) \
105DEFINE_FETCH_##kind(u32) \
106DEFINE_FETCH_##kind(u64)
107
108#define CHECK_BASIC_FETCH_FUNCS(kind, fn) \
109 ((FETCH_FUNC_NAME(kind, u8) == fn) || \
110 (FETCH_FUNC_NAME(kind, u16) == fn) || \
111 (FETCH_FUNC_NAME(kind, u32) == fn) || \
112 (FETCH_FUNC_NAME(kind, u64) == fn))
113
114/* Data fetch function templates */
115#define DEFINE_FETCH_reg(type) \
116static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
117 void *offset, void *dest) \
118{ \
119 *(type *)dest = (type)regs_get_register(regs, \
120 (unsigned int)((unsigned long)offset)); \
76} 121}
77 122DEFINE_BASIC_FETCH_FUNCS(reg)
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs, 123
79 void *num) 124#define DEFINE_FETCH_stack(type) \
80{ 125static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
81 return regs_get_kernel_stack_nth(regs, 126 void *offset, void *dest) \
82 (unsigned int)((unsigned long)num)); 127{ \
128 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
129 (unsigned int)((unsigned long)offset)); \
83} 130}
131DEFINE_BASIC_FETCH_FUNCS(stack)
84 132
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) 133#define DEFINE_FETCH_retval(type) \
86{ 134static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
87 unsigned long retval; 135 void *dummy, void *dest) \
88 136{ \
89 if (probe_kernel_address(addr, retval)) 137 *(type *)dest = (type)regs_return_value(regs); \
90 return 0;
91 return retval;
92} 138}
93 139DEFINE_BASIC_FETCH_FUNCS(retval)
94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 140
95 void *dummy) 141#define DEFINE_FETCH_memory(type) \
96{ 142static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
97 return regs_return_value(regs); 143 void *addr, void *dest) \
98} 144{ \
99 145 type retval; \
100static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, 146 if (probe_kernel_address(addr, retval)) \
101 void *dummy) 147 *(type *)dest = 0; \
102{ 148 else \
103 return kernel_stack_pointer(regs); 149 *(type *)dest = retval; \
104} 150}
151DEFINE_BASIC_FETCH_FUNCS(memory)
105 152
106/* Memory fetching by symbol */ 153/* Memory fetching by symbol */
107struct symbol_cache { 154struct symbol_cache {
@@ -145,51 +192,126 @@ static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
145 return sc; 192 return sc;
146} 193}
147 194
148static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) 195#define DEFINE_FETCH_symbol(type) \
149{ 196static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
150 struct symbol_cache *sc = data; 197 void *data, void *dest) \
151 198{ \
152 if (sc->addr) 199 struct symbol_cache *sc = data; \
153 return fetch_memory(regs, (void *)sc->addr); 200 if (sc->addr) \
154 else 201 fetch_memory_##type(regs, (void *)sc->addr, dest); \
155 return 0; 202 else \
203 *(type *)dest = 0; \
156} 204}
205DEFINE_BASIC_FETCH_FUNCS(symbol)
157 206
158/* Special indirect memory access interface */ 207/* Dereference memory access function */
159struct indirect_fetch_data { 208struct deref_fetch_param {
160 struct fetch_func orig; 209 struct fetch_param orig;
161 long offset; 210 long offset;
162}; 211};
163 212
164static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) 213#define DEFINE_FETCH_deref(type) \
165{ 214static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
166 struct indirect_fetch_data *ind = data; 215 void *data, void *dest) \
167 unsigned long addr; 216{ \
168 217 struct deref_fetch_param *dprm = data; \
169 addr = call_fetch(&ind->orig, regs); 218 unsigned long addr; \
170 if (addr) { 219 call_fetch(&dprm->orig, regs, &addr); \
171 addr += ind->offset; 220 if (addr) { \
172 return fetch_memory(regs, (void *)addr); 221 addr += dprm->offset; \
173 } else 222 fetch_memory_##type(regs, (void *)addr, dest); \
174 return 0; 223 } else \
224 *(type *)dest = 0; \
175} 225}
226DEFINE_BASIC_FETCH_FUNCS(deref)
176 227
177static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) 228static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
178{ 229{
179 if (data->orig.func == fetch_indirect) 230 if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn))
180 free_indirect_fetch_data(data->orig.data); 231 free_deref_fetch_param(data->orig.data);
181 else if (data->orig.func == fetch_symbol) 232 else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn))
182 free_symbol_cache(data->orig.data); 233 free_symbol_cache(data->orig.data);
183 kfree(data); 234 kfree(data);
184} 235}
185 236
237/* Default (unsigned long) fetch type */
238#define __DEFAULT_FETCH_TYPE(t) u##t
239#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
240#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
241#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
242
243#define ASSIGN_FETCH_FUNC(kind, type) \
244 .kind = FETCH_FUNC_NAME(kind, type)
245
246#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
247 {.name = #ptype, \
248 .size = sizeof(ftype), \
249 .is_signed = sign, \
250 .print = PRINT_TYPE_FUNC_NAME(ptype), \
251 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
252ASSIGN_FETCH_FUNC(reg, ftype), \
253ASSIGN_FETCH_FUNC(stack, ftype), \
254ASSIGN_FETCH_FUNC(retval, ftype), \
255ASSIGN_FETCH_FUNC(memory, ftype), \
256ASSIGN_FETCH_FUNC(symbol, ftype), \
257ASSIGN_FETCH_FUNC(deref, ftype), \
258 }
259
260/* Fetch type information table */
261static const struct fetch_type {
262 const char *name; /* Name of type */
263 size_t size; /* Byte size of type */
264 int is_signed; /* Signed flag */
265 print_type_func_t print; /* Print functions */
266 const char *fmt; /* Fromat string */
267 /* Fetch functions */
268 fetch_func_t reg;
269 fetch_func_t stack;
270 fetch_func_t retval;
271 fetch_func_t memory;
272 fetch_func_t symbol;
273 fetch_func_t deref;
274} fetch_type_table[] = {
275 ASSIGN_FETCH_TYPE(u8, u8, 0),
276 ASSIGN_FETCH_TYPE(u16, u16, 0),
277 ASSIGN_FETCH_TYPE(u32, u32, 0),
278 ASSIGN_FETCH_TYPE(u64, u64, 0),
279 ASSIGN_FETCH_TYPE(s8, u8, 1),
280 ASSIGN_FETCH_TYPE(s16, u16, 1),
281 ASSIGN_FETCH_TYPE(s32, u32, 1),
282 ASSIGN_FETCH_TYPE(s64, u64, 1),
283};
284
285static const struct fetch_type *find_fetch_type(const char *type)
286{
287 int i;
288
289 if (!type)
290 type = DEFAULT_FETCH_TYPE_STR;
291
292 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
293 if (strcmp(type, fetch_type_table[i].name) == 0)
294 return &fetch_type_table[i];
295 return NULL;
296}
297
298/* Special function : only accept unsigned long */
299static __kprobes void fetch_stack_address(struct pt_regs *regs,
300 void *dummy, void *dest)
301{
302 *(unsigned long *)dest = kernel_stack_pointer(regs);
303}
304
186/** 305/**
187 * Kprobe event core functions 306 * Kprobe event core functions
188 */ 307 */
189 308
190struct probe_arg { 309struct probe_arg {
191 struct fetch_func fetch; 310 struct fetch_param fetch;
192 const char *name; 311 unsigned int offset; /* Offset from argument entry */
312 const char *name; /* Name of this argument */
313 const char *comm; /* Command of this argument */
314 const struct fetch_type *type; /* Type of this argument */
193}; 315};
194 316
195/* Flags for trace_probe */ 317/* Flags for trace_probe */
@@ -204,6 +326,7 @@ struct trace_probe {
204 const char *symbol; /* symbol name */ 326 const char *symbol; /* symbol name */
205 struct ftrace_event_class class; 327 struct ftrace_event_class class;
206 struct ftrace_event_call call; 328 struct ftrace_event_call call;
329 ssize_t size; /* trace entry size */
207 unsigned int nr_args; 330 unsigned int nr_args;
208 struct probe_arg args[]; 331 struct probe_arg args[];
209}; 332};
@@ -212,6 +335,7 @@ struct trace_probe {
212 (offsetof(struct trace_probe, args) + \ 335 (offsetof(struct trace_probe, args) + \
213 (sizeof(struct probe_arg) * (n))) 336 (sizeof(struct probe_arg) * (n)))
214 337
338
215static __kprobes int probe_is_return(struct trace_probe *tp) 339static __kprobes int probe_is_return(struct trace_probe *tp)
216{ 340{
217 return tp->rp.handler != NULL; 341 return tp->rp.handler != NULL;
@@ -222,49 +346,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
222 return tp->symbol ? tp->symbol : "unknown"; 346 return tp->symbol ? tp->symbol : "unknown";
223} 347}
224 348
225static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
226{
227 int ret = -EINVAL;
228
229 if (ff->func == fetch_register) {
230 const char *name;
231 name = regs_query_register_name((unsigned int)((long)ff->data));
232 ret = snprintf(buf, n, "%%%s", name);
233 } else if (ff->func == fetch_stack)
234 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
235 else if (ff->func == fetch_memory)
236 ret = snprintf(buf, n, "@0x%p", ff->data);
237 else if (ff->func == fetch_symbol) {
238 struct symbol_cache *sc = ff->data;
239 if (sc->offset)
240 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
241 sc->offset);
242 else
243 ret = snprintf(buf, n, "@%s", sc->symbol);
244 } else if (ff->func == fetch_retvalue)
245 ret = snprintf(buf, n, "$retval");
246 else if (ff->func == fetch_stack_address)
247 ret = snprintf(buf, n, "$stack");
248 else if (ff->func == fetch_indirect) {
249 struct indirect_fetch_data *id = ff->data;
250 size_t l = 0;
251 ret = snprintf(buf, n, "%+ld(", id->offset);
252 if (ret >= n)
253 goto end;
254 l += ret;
255 ret = probe_arg_string(buf + l, n - l, &id->orig);
256 if (ret < 0)
257 goto end;
258 l += ret;
259 ret = snprintf(buf + l, n - l, ")");
260 ret += l;
261 }
262end:
263 if (ret >= n)
264 return -ENOSPC;
265 return ret;
266}
267
268static int register_probe_event(struct trace_probe *tp); 349static int register_probe_event(struct trace_probe *tp);
269static void unregister_probe_event(struct trace_probe *tp); 350static void unregister_probe_event(struct trace_probe *tp);
270 351
@@ -348,11 +429,12 @@ error:
348 429
349static void free_probe_arg(struct probe_arg *arg) 430static void free_probe_arg(struct probe_arg *arg)
350{ 431{
351 if (arg->fetch.func == fetch_symbol) 432 if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn))
433 free_deref_fetch_param(arg->fetch.data);
434 else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn))
352 free_symbol_cache(arg->fetch.data); 435 free_symbol_cache(arg->fetch.data);
353 else if (arg->fetch.func == fetch_indirect)
354 free_indirect_fetch_data(arg->fetch.data);
355 kfree(arg->name); 436 kfree(arg->name);
437 kfree(arg->comm);
356} 438}
357 439
358static void free_trace_probe(struct trace_probe *tp) 440static void free_trace_probe(struct trace_probe *tp)
@@ -458,28 +540,30 @@ static int split_symbol_offset(char *symbol, unsigned long *offset)
458#define PARAM_MAX_ARGS 16 540#define PARAM_MAX_ARGS 16
459#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) 541#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
460 542
461static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) 543static int parse_probe_vars(char *arg, const struct fetch_type *t,
544 struct fetch_param *f, int is_return)
462{ 545{
463 int ret = 0; 546 int ret = 0;
464 unsigned long param; 547 unsigned long param;
465 548
466 if (strcmp(arg, "retval") == 0) { 549 if (strcmp(arg, "retval") == 0) {
467 if (is_return) { 550 if (is_return)
468 ff->func = fetch_retvalue; 551 f->fn = t->retval;
469 ff->data = NULL; 552 else
470 } else
471 ret = -EINVAL; 553 ret = -EINVAL;
472 } else if (strncmp(arg, "stack", 5) == 0) { 554 } else if (strncmp(arg, "stack", 5) == 0) {
473 if (arg[5] == '\0') { 555 if (arg[5] == '\0') {
474 ff->func = fetch_stack_address; 556 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
475 ff->data = NULL; 557 f->fn = fetch_stack_address;
558 else
559 ret = -EINVAL;
476 } else if (isdigit(arg[5])) { 560 } else if (isdigit(arg[5])) {
477 ret = strict_strtoul(arg + 5, 10, &param); 561 ret = strict_strtoul(arg + 5, 10, &param);
478 if (ret || param > PARAM_MAX_STACK) 562 if (ret || param > PARAM_MAX_STACK)
479 ret = -EINVAL; 563 ret = -EINVAL;
480 else { 564 else {
481 ff->func = fetch_stack; 565 f->fn = t->stack;
482 ff->data = (void *)param; 566 f->data = (void *)param;
483 } 567 }
484 } else 568 } else
485 ret = -EINVAL; 569 ret = -EINVAL;
@@ -489,7 +573,8 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
489} 573}
490 574
491/* Recursive argument parser */ 575/* Recursive argument parser */
492static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 576static int __parse_probe_arg(char *arg, const struct fetch_type *t,
577 struct fetch_param *f, int is_return)
493{ 578{
494 int ret = 0; 579 int ret = 0;
495 unsigned long param; 580 unsigned long param;
@@ -498,13 +583,13 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
498 583
499 switch (arg[0]) { 584 switch (arg[0]) {
500 case '$': 585 case '$':
501 ret = parse_probe_vars(arg + 1, ff, is_return); 586 ret = parse_probe_vars(arg + 1, t, f, is_return);
502 break; 587 break;
503 case '%': /* named register */ 588 case '%': /* named register */
504 ret = regs_query_register_offset(arg + 1); 589 ret = regs_query_register_offset(arg + 1);
505 if (ret >= 0) { 590 if (ret >= 0) {
506 ff->func = fetch_register; 591 f->fn = t->reg;
507 ff->data = (void *)(unsigned long)ret; 592 f->data = (void *)(unsigned long)ret;
508 ret = 0; 593 ret = 0;
509 } 594 }
510 break; 595 break;
@@ -513,26 +598,22 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
513 ret = strict_strtoul(arg + 1, 0, &param); 598 ret = strict_strtoul(arg + 1, 0, &param);
514 if (ret) 599 if (ret)
515 break; 600 break;
516 ff->func = fetch_memory; 601 f->fn = t->memory;
517 ff->data = (void *)param; 602 f->data = (void *)param;
518 } else { 603 } else {
519 ret = split_symbol_offset(arg + 1, &offset); 604 ret = split_symbol_offset(arg + 1, &offset);
520 if (ret) 605 if (ret)
521 break; 606 break;
522 ff->data = alloc_symbol_cache(arg + 1, offset); 607 f->data = alloc_symbol_cache(arg + 1, offset);
523 if (ff->data) 608 if (f->data)
524 ff->func = fetch_symbol; 609 f->fn = t->symbol;
525 else
526 ret = -EINVAL;
527 } 610 }
528 break; 611 break;
529 case '+': /* indirect memory */ 612 case '+': /* deref memory */
530 case '-': 613 case '-':
531 tmp = strchr(arg, '('); 614 tmp = strchr(arg, '(');
532 if (!tmp) { 615 if (!tmp)
533 ret = -EINVAL;
534 break; 616 break;
535 }
536 *tmp = '\0'; 617 *tmp = '\0';
537 ret = strict_strtol(arg + 1, 0, &offset); 618 ret = strict_strtol(arg + 1, 0, &offset);
538 if (ret) 619 if (ret)
@@ -542,38 +623,58 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
542 arg = tmp + 1; 623 arg = tmp + 1;
543 tmp = strrchr(arg, ')'); 624 tmp = strrchr(arg, ')');
544 if (tmp) { 625 if (tmp) {
545 struct indirect_fetch_data *id; 626 struct deref_fetch_param *dprm;
627 const struct fetch_type *t2 = find_fetch_type(NULL);
546 *tmp = '\0'; 628 *tmp = '\0';
547 id = kzalloc(sizeof(struct indirect_fetch_data), 629 dprm = kzalloc(sizeof(struct deref_fetch_param),
548 GFP_KERNEL); 630 GFP_KERNEL);
549 if (!id) 631 if (!dprm)
550 return -ENOMEM; 632 return -ENOMEM;
551 id->offset = offset; 633 dprm->offset = offset;
552 ret = __parse_probe_arg(arg, &id->orig, is_return); 634 ret = __parse_probe_arg(arg, t2, &dprm->orig,
635 is_return);
553 if (ret) 636 if (ret)
554 kfree(id); 637 kfree(dprm);
555 else { 638 else {
556 ff->func = fetch_indirect; 639 f->fn = t->deref;
557 ff->data = (void *)id; 640 f->data = (void *)dprm;
558 } 641 }
559 } else 642 }
560 ret = -EINVAL;
561 break; 643 break;
562 default:
563 /* TODO: support custom handler */
564 ret = -EINVAL;
565 } 644 }
645 if (!ret && !f->fn)
646 ret = -EINVAL;
566 return ret; 647 return ret;
567} 648}
568 649
569/* String length checking wrapper */ 650/* String length checking wrapper */
570static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 651static int parse_probe_arg(char *arg, struct trace_probe *tp,
652 struct probe_arg *parg, int is_return)
571{ 653{
654 const char *t;
655
572 if (strlen(arg) > MAX_ARGSTR_LEN) { 656 if (strlen(arg) > MAX_ARGSTR_LEN) {
573 pr_info("Argument is too long.: %s\n", arg); 657 pr_info("Argument is too long.: %s\n", arg);
574 return -ENOSPC; 658 return -ENOSPC;
575 } 659 }
576 return __parse_probe_arg(arg, ff, is_return); 660 parg->comm = kstrdup(arg, GFP_KERNEL);
661 if (!parg->comm) {
662 pr_info("Failed to allocate memory for command '%s'.\n", arg);
663 return -ENOMEM;
664 }
665 t = strchr(parg->comm, ':');
666 if (t) {
667 arg[t - parg->comm] = '\0';
668 t++;
669 }
670 parg->type = find_fetch_type(t);
671 if (!parg->type) {
672 pr_info("Unsupported type: %s\n", t);
673 return -EINVAL;
674 }
675 parg->offset = tp->size;
676 tp->size += parg->type->size;
677 return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
577} 678}
578 679
579/* Return 1 if name is reserved or already used by another argument */ 680/* Return 1 if name is reserved or already used by another argument */
@@ -603,15 +704,18 @@ static int create_trace_probe(int argc, char **argv)
603 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 704 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
604 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 705 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
605 * %REG : fetch register REG 706 * %REG : fetch register REG
606 * Indirect memory fetch: 707 * Dereferencing memory fetch:
607 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 708 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
608 * Alias name of args: 709 * Alias name of args:
609 * NAME=FETCHARG : set NAME as alias of FETCHARG. 710 * NAME=FETCHARG : set NAME as alias of FETCHARG.
711 * Type of args:
712 * FETCHARG:TYPE : use TYPE instead of unsigned long.
610 */ 713 */
611 struct trace_probe *tp; 714 struct trace_probe *tp;
612 int i, ret = 0; 715 int i, ret = 0;
613 int is_return = 0, is_delete = 0; 716 int is_return = 0, is_delete = 0;
614 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; 717 char *symbol = NULL, *event = NULL, *group = NULL;
718 char *arg, *tmp;
615 unsigned long offset = 0; 719 unsigned long offset = 0;
616 void *addr = NULL; 720 void *addr = NULL;
617 char buf[MAX_EVENT_NAME_LEN]; 721 char buf[MAX_EVENT_NAME_LEN];
@@ -724,13 +828,6 @@ static int create_trace_probe(int argc, char **argv)
724 else 828 else
725 arg = argv[i]; 829 arg = argv[i];
726 830
727 if (conflict_field_name(argv[i], tp->args, i)) {
728 pr_info("Argument%d name '%s' conflicts with "
729 "another field.\n", i, argv[i]);
730 ret = -EINVAL;
731 goto error;
732 }
733
734 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 831 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
735 if (!tp->args[i].name) { 832 if (!tp->args[i].name) {
736 pr_info("Failed to allocate argument%d name '%s'.\n", 833 pr_info("Failed to allocate argument%d name '%s'.\n",
@@ -738,9 +835,19 @@ static int create_trace_probe(int argc, char **argv)
738 ret = -ENOMEM; 835 ret = -ENOMEM;
739 goto error; 836 goto error;
740 } 837 }
838 tmp = strchr(tp->args[i].name, ':');
839 if (tmp)
840 *tmp = '_'; /* convert : to _ */
841
842 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
843 pr_info("Argument%d name '%s' conflicts with "
844 "another field.\n", i, argv[i]);
845 ret = -EINVAL;
846 goto error;
847 }
741 848
742 /* Parse fetch argument */ 849 /* Parse fetch argument */
743 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); 850 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
744 if (ret) { 851 if (ret) {
745 pr_info("Parse error at argument%d. (%d)\n", i, ret); 852 pr_info("Parse error at argument%d. (%d)\n", i, ret);
746 kfree(tp->args[i].name); 853 kfree(tp->args[i].name);
@@ -795,8 +902,7 @@ static void probes_seq_stop(struct seq_file *m, void *v)
795static int probes_seq_show(struct seq_file *m, void *v) 902static int probes_seq_show(struct seq_file *m, void *v)
796{ 903{
797 struct trace_probe *tp = v; 904 struct trace_probe *tp = v;
798 int i, ret; 905 int i;
799 char buf[MAX_ARGSTR_LEN + 1];
800 906
801 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 907 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
802 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 908 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
@@ -808,15 +914,10 @@ static int probes_seq_show(struct seq_file *m, void *v)
808 else 914 else
809 seq_printf(m, " %s", probe_symbol(tp)); 915 seq_printf(m, " %s", probe_symbol(tp));
810 916
811 for (i = 0; i < tp->nr_args; i++) { 917 for (i = 0; i < tp->nr_args; i++)
812 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); 918 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
813 if (ret < 0) {
814 pr_warning("Argument%d decoding error(%d).\n", i, ret);
815 return ret;
816 }
817 seq_printf(m, " %s=%s", tp->args[i].name, buf);
818 }
819 seq_printf(m, "\n"); 919 seq_printf(m, "\n");
920
820 return 0; 921 return 0;
821} 922}
822 923
@@ -946,9 +1047,10 @@ static const struct file_operations kprobe_profile_ops = {
946static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1047static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
947{ 1048{
948 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1049 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
949 struct kprobe_trace_entry *entry; 1050 struct kprobe_trace_entry_head *entry;
950 struct ring_buffer_event *event; 1051 struct ring_buffer_event *event;
951 struct ring_buffer *buffer; 1052 struct ring_buffer *buffer;
1053 u8 *data;
952 int size, i, pc; 1054 int size, i, pc;
953 unsigned long irq_flags; 1055 unsigned long irq_flags;
954 struct ftrace_event_call *call = &tp->call; 1056 struct ftrace_event_call *call = &tp->call;
@@ -958,7 +1060,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
958 local_save_flags(irq_flags); 1060 local_save_flags(irq_flags);
959 pc = preempt_count(); 1061 pc = preempt_count();
960 1062
961 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1063 size = sizeof(*entry) + tp->size;
962 1064
963 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1065 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
964 size, irq_flags, pc); 1066 size, irq_flags, pc);
@@ -966,10 +1068,10 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
966 return; 1068 return;
967 1069
968 entry = ring_buffer_event_data(event); 1070 entry = ring_buffer_event_data(event);
969 entry->nargs = tp->nr_args;
970 entry->ip = (unsigned long)kp->addr; 1071 entry->ip = (unsigned long)kp->addr;
1072 data = (u8 *)&entry[1];
971 for (i = 0; i < tp->nr_args; i++) 1073 for (i = 0; i < tp->nr_args; i++)
972 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1074 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
973 1075
974 if (!filter_current_check_discard(buffer, call, entry, event)) 1076 if (!filter_current_check_discard(buffer, call, entry, event))
975 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1077 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -980,9 +1082,10 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
980 struct pt_regs *regs) 1082 struct pt_regs *regs)
981{ 1083{
982 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1084 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
983 struct kretprobe_trace_entry *entry; 1085 struct kretprobe_trace_entry_head *entry;
984 struct ring_buffer_event *event; 1086 struct ring_buffer_event *event;
985 struct ring_buffer *buffer; 1087 struct ring_buffer *buffer;
1088 u8 *data;
986 int size, i, pc; 1089 int size, i, pc;
987 unsigned long irq_flags; 1090 unsigned long irq_flags;
988 struct ftrace_event_call *call = &tp->call; 1091 struct ftrace_event_call *call = &tp->call;
@@ -990,7 +1093,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
990 local_save_flags(irq_flags); 1093 local_save_flags(irq_flags);
991 pc = preempt_count(); 1094 pc = preempt_count();
992 1095
993 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1096 size = sizeof(*entry) + tp->size;
994 1097
995 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 1098 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
996 size, irq_flags, pc); 1099 size, irq_flags, pc);
@@ -998,11 +1101,11 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
998 return; 1101 return;
999 1102
1000 entry = ring_buffer_event_data(event); 1103 entry = ring_buffer_event_data(event);
1001 entry->nargs = tp->nr_args;
1002 entry->func = (unsigned long)tp->rp.kp.addr; 1104 entry->func = (unsigned long)tp->rp.kp.addr;
1003 entry->ret_ip = (unsigned long)ri->ret_addr; 1105 entry->ret_ip = (unsigned long)ri->ret_addr;
1106 data = (u8 *)&entry[1];
1004 for (i = 0; i < tp->nr_args; i++) 1107 for (i = 0; i < tp->nr_args; i++)
1005 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1108 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1006 1109
1007 if (!filter_current_check_discard(buffer, call, entry, event)) 1110 if (!filter_current_check_discard(buffer, call, entry, event))
1008 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1111 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1013,12 +1116,13 @@ enum print_line_t
1013print_kprobe_event(struct trace_iterator *iter, int flags, 1116print_kprobe_event(struct trace_iterator *iter, int flags,
1014 struct trace_event *event) 1117 struct trace_event *event)
1015{ 1118{
1016 struct kprobe_trace_entry *field; 1119 struct kprobe_trace_entry_head *field;
1017 struct trace_seq *s = &iter->seq; 1120 struct trace_seq *s = &iter->seq;
1018 struct trace_probe *tp; 1121 struct trace_probe *tp;
1122 u8 *data;
1019 int i; 1123 int i;
1020 1124
1021 field = (struct kprobe_trace_entry *)iter->ent; 1125 field = (struct kprobe_trace_entry_head *)iter->ent;
1022 tp = container_of(event, struct trace_probe, call.event); 1126 tp = container_of(event, struct trace_probe, call.event);
1023 1127
1024 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1128 if (!trace_seq_printf(s, "%s: (", tp->call.name))
@@ -1030,9 +1134,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
1030 if (!trace_seq_puts(s, ")")) 1134 if (!trace_seq_puts(s, ")"))
1031 goto partial; 1135 goto partial;
1032 1136
1033 for (i = 0; i < field->nargs; i++) 1137 data = (u8 *)&field[1];
1034 if (!trace_seq_printf(s, " %s=%lx", 1138 for (i = 0; i < tp->nr_args; i++)
1035 tp->args[i].name, field->args[i])) 1139 if (!tp->args[i].type->print(s, tp->args[i].name,
1140 data + tp->args[i].offset))
1036 goto partial; 1141 goto partial;
1037 1142
1038 if (!trace_seq_puts(s, "\n")) 1143 if (!trace_seq_puts(s, "\n"))
@@ -1047,12 +1152,13 @@ enum print_line_t
1047print_kretprobe_event(struct trace_iterator *iter, int flags, 1152print_kretprobe_event(struct trace_iterator *iter, int flags,
1048 struct trace_event *event) 1153 struct trace_event *event)
1049{ 1154{
1050 struct kretprobe_trace_entry *field; 1155 struct kretprobe_trace_entry_head *field;
1051 struct trace_seq *s = &iter->seq; 1156 struct trace_seq *s = &iter->seq;
1052 struct trace_probe *tp; 1157 struct trace_probe *tp;
1158 u8 *data;
1053 int i; 1159 int i;
1054 1160
1055 field = (struct kretprobe_trace_entry *)iter->ent; 1161 field = (struct kretprobe_trace_entry_head *)iter->ent;
1056 tp = container_of(event, struct trace_probe, call.event); 1162 tp = container_of(event, struct trace_probe, call.event);
1057 1163
1058 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1164 if (!trace_seq_printf(s, "%s: (", tp->call.name))
@@ -1070,9 +1176,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
1070 if (!trace_seq_puts(s, ")")) 1176 if (!trace_seq_puts(s, ")"))
1071 goto partial; 1177 goto partial;
1072 1178
1073 for (i = 0; i < field->nargs; i++) 1179 data = (u8 *)&field[1];
1074 if (!trace_seq_printf(s, " %s=%lx", 1180 for (i = 0; i < tp->nr_args; i++)
1075 tp->args[i].name, field->args[i])) 1181 if (!tp->args[i].type->print(s, tp->args[i].name,
1182 data + tp->args[i].offset))
1076 goto partial; 1183 goto partial;
1077 1184
1078 if (!trace_seq_puts(s, "\n")) 1185 if (!trace_seq_puts(s, "\n"))
@@ -1126,29 +1233,43 @@ static int probe_event_raw_init(struct ftrace_event_call *event_call)
1126static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 1233static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1127{ 1234{
1128 int ret, i; 1235 int ret, i;
1129 struct kprobe_trace_entry field; 1236 struct kprobe_trace_entry_head field;
1130 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1237 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1131 1238
1132 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1239 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1133 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1134 /* Set argument names as fields */ 1240 /* Set argument names as fields */
1135 for (i = 0; i < tp->nr_args; i++) 1241 for (i = 0; i < tp->nr_args; i++) {
1136 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1242 ret = trace_define_field(event_call, tp->args[i].type->name,
1243 tp->args[i].name,
1244 sizeof(field) + tp->args[i].offset,
1245 tp->args[i].type->size,
1246 tp->args[i].type->is_signed,
1247 FILTER_OTHER);
1248 if (ret)
1249 return ret;
1250 }
1137 return 0; 1251 return 0;
1138} 1252}
1139 1253
1140static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 1254static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1141{ 1255{
1142 int ret, i; 1256 int ret, i;
1143 struct kretprobe_trace_entry field; 1257 struct kretprobe_trace_entry_head field;
1144 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1258 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1145 1259
1146 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1260 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1147 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1261 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1148 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1149 /* Set argument names as fields */ 1262 /* Set argument names as fields */
1150 for (i = 0; i < tp->nr_args; i++) 1263 for (i = 0; i < tp->nr_args; i++) {
1151 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1264 ret = trace_define_field(event_call, tp->args[i].type->name,
1265 tp->args[i].name,
1266 sizeof(field) + tp->args[i].offset,
1267 tp->args[i].type->size,
1268 tp->args[i].type->is_signed,
1269 FILTER_OTHER);
1270 if (ret)
1271 return ret;
1272 }
1152 return 0; 1273 return 0;
1153} 1274}
1154 1275
@@ -1173,8 +1294,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1173 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 1294 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1174 1295
1175 for (i = 0; i < tp->nr_args; i++) { 1296 for (i = 0; i < tp->nr_args; i++) {
1176 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx", 1297 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1177 tp->args[i].name); 1298 tp->args[i].name, tp->args[i].type->fmt);
1178 } 1299 }
1179 1300
1180 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1301 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
@@ -1216,12 +1337,13 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1216{ 1337{
1217 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1338 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1218 struct ftrace_event_call *call = &tp->call; 1339 struct ftrace_event_call *call = &tp->call;
1219 struct kprobe_trace_entry *entry; 1340 struct kprobe_trace_entry_head *entry;
1341 u8 *data;
1220 int size, __size, i; 1342 int size, __size, i;
1221 unsigned long irq_flags; 1343 unsigned long irq_flags;
1222 int rctx; 1344 int rctx;
1223 1345
1224 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1346 __size = sizeof(*entry) + tp->size;
1225 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1347 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1226 size -= sizeof(u32); 1348 size -= sizeof(u32);
1227 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1349 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1233,10 +1355,10 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1233 if (!entry) 1355 if (!entry)
1234 return; 1356 return;
1235 1357
1236 entry->nargs = tp->nr_args;
1237 entry->ip = (unsigned long)kp->addr; 1358 entry->ip = (unsigned long)kp->addr;
1359 data = (u8 *)&entry[1];
1238 for (i = 0; i < tp->nr_args; i++) 1360 for (i = 0; i < tp->nr_args; i++)
1239 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1361 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1240 1362
1241 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs); 1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1242} 1364}
@@ -1247,12 +1369,13 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1247{ 1369{
1248 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1370 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1249 struct ftrace_event_call *call = &tp->call; 1371 struct ftrace_event_call *call = &tp->call;
1250 struct kretprobe_trace_entry *entry; 1372 struct kretprobe_trace_entry_head *entry;
1373 u8 *data;
1251 int size, __size, i; 1374 int size, __size, i;
1252 unsigned long irq_flags; 1375 unsigned long irq_flags;
1253 int rctx; 1376 int rctx;
1254 1377
1255 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1378 __size = sizeof(*entry) + tp->size;
1256 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1379 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1257 size -= sizeof(u32); 1380 size -= sizeof(u32);
1258 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1381 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1264,11 +1387,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1264 if (!entry) 1387 if (!entry)
1265 return; 1388 return;
1266 1389
1267 entry->nargs = tp->nr_args;
1268 entry->func = (unsigned long)tp->rp.kp.addr; 1390 entry->func = (unsigned long)tp->rp.kp.addr;
1269 entry->ret_ip = (unsigned long)ri->ret_addr; 1391 entry->ret_ip = (unsigned long)ri->ret_addr;
1392 data = (u8 *)&entry[1];
1270 for (i = 0; i < tp->nr_args; i++) 1393 for (i = 0; i < tp->nr_args; i++)
1271 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1394 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1272 1395
1273 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, 1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1274 irq_flags, regs); 1397 irq_flags, regs);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index d59cd6879477..8eaf00749b65 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -34,12 +34,6 @@
34 34
35#include <asm/atomic.h> 35#include <asm/atomic.h>
36 36
37/*
38 * For now, let us restrict the no. of symbols traced simultaneously to number
39 * of available hardware breakpoint registers.
40 */
41#define KSYM_TRACER_MAX HBP_NUM
42
43#define KSYM_TRACER_OP_LEN 3 /* rw- */ 37#define KSYM_TRACER_OP_LEN 3 /* rw- */
44 38
45struct trace_ksym { 39struct trace_ksym {
@@ -53,7 +47,6 @@ struct trace_ksym {
53 47
54static struct trace_array *ksym_trace_array; 48static struct trace_array *ksym_trace_array;
55 49
56static unsigned int ksym_filter_entry_count;
57static unsigned int ksym_tracing_enabled; 50static unsigned int ksym_tracing_enabled;
58 51
59static HLIST_HEAD(ksym_filter_head); 52static HLIST_HEAD(ksym_filter_head);
@@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
181 struct trace_ksym *entry; 174 struct trace_ksym *entry;
182 int ret = -ENOMEM; 175 int ret = -ENOMEM;
183 176
184 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
185 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
186 " new requests for tracing can be accepted now.\n",
187 KSYM_TRACER_MAX);
188 return -ENOSPC;
189 }
190
191 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); 177 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
192 if (!entry) 178 if (!entry)
193 return -ENOMEM; 179 return -ENOMEM;
@@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
203 189
204 if (IS_ERR(entry->ksym_hbp)) { 190 if (IS_ERR(entry->ksym_hbp)) {
205 ret = PTR_ERR(entry->ksym_hbp); 191 ret = PTR_ERR(entry->ksym_hbp);
206 printk(KERN_INFO "ksym_tracer request failed. Try again" 192 if (ret == -ENOSPC) {
207 " later!!\n"); 193 printk(KERN_ERR "ksym_tracer: Maximum limit reached."
194 " No new requests for tracing can be accepted now.\n");
195 } else {
196 printk(KERN_INFO "ksym_tracer request failed. Try again"
197 " later!!\n");
198 }
208 goto err; 199 goto err;
209 } 200 }
210 201
211 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); 202 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
212 ksym_filter_entry_count++;
213 203
214 return 0; 204 return 0;
215 205
@@ -265,7 +255,6 @@ static void __ksym_trace_reset(void)
265 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, 255 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
266 ksym_hlist) { 256 ksym_hlist) {
267 unregister_wide_hw_breakpoint(entry->ksym_hbp); 257 unregister_wide_hw_breakpoint(entry->ksym_hbp);
268 ksym_filter_entry_count--;
269 hlist_del_rcu(&(entry->ksym_hlist)); 258 hlist_del_rcu(&(entry->ksym_hlist));
270 synchronize_rcu(); 259 synchronize_rcu();
271 kfree(entry); 260 kfree(entry);
@@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,
338 goto out_unlock; 327 goto out_unlock;
339 } 328 }
340 /* Error or "symbol:---" case: drop it */ 329 /* Error or "symbol:---" case: drop it */
341 ksym_filter_entry_count--;
342 hlist_del_rcu(&(entry->ksym_hlist)); 330 hlist_del_rcu(&(entry->ksym_hlist));
343 synchronize_rcu(); 331 synchronize_rcu();
344 kfree(entry); 332 kfree(entry);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 6a9d36ddfcf2..250e7f9bd2f0 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,7 +17,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_BRANCH: 17 case TRACE_BRANCH:
18 case TRACE_GRAPH_ENT: 18 case TRACE_GRAPH_ENT:
19 case TRACE_GRAPH_RET: 19 case TRACE_GRAPH_RET:
20 case TRACE_HW_BRANCHES:
21 case TRACE_KSYM: 20 case TRACE_KSYM:
22 return 1; 21 return 1;
23 } 22 }
@@ -756,62 +755,6 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
756} 755}
757#endif /* CONFIG_BRANCH_TRACER */ 756#endif /* CONFIG_BRANCH_TRACER */
758 757
759#ifdef CONFIG_HW_BRANCH_TRACER
760int
761trace_selftest_startup_hw_branches(struct tracer *trace,
762 struct trace_array *tr)
763{
764 struct trace_iterator *iter;
765 struct tracer tracer;
766 unsigned long count;
767 int ret;
768
769 if (!trace->open) {
770 printk(KERN_CONT "missing open function...");
771 return -1;
772 }
773
774 ret = tracer_init(trace, tr);
775 if (ret) {
776 warn_failed_init_tracer(trace, ret);
777 return ret;
778 }
779
780 /*
781 * The hw-branch tracer needs to collect the trace from the various
782 * cpu trace buffers - before tracing is stopped.
783 */
784 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
785 if (!iter)
786 return -ENOMEM;
787
788 memcpy(&tracer, trace, sizeof(tracer));
789
790 iter->trace = &tracer;
791 iter->tr = tr;
792 iter->pos = -1;
793 mutex_init(&iter->mutex);
794
795 trace->open(iter);
796
797 mutex_destroy(&iter->mutex);
798 kfree(iter);
799
800 tracing_stop();
801
802 ret = trace_test_buffer(tr, &count);
803 trace->reset(tr);
804 tracing_start();
805
806 if (!ret && !count) {
807 printk(KERN_CONT "no entries found..");
808 ret = -1;
809 }
810
811 return ret;
812}
813#endif /* CONFIG_HW_BRANCH_TRACER */
814
815#ifdef CONFIG_KSYM_TRACER 758#ifdef CONFIG_KSYM_TRACER
816static int ksym_selftest_dummy; 759static int ksym_selftest_dummy;
817 760
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dee48658805c..5bfb213984b2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -774,7 +774,7 @@ void flush_delayed_work(struct delayed_work *dwork)
774{ 774{
775 if (del_timer_sync(&dwork->timer)) { 775 if (del_timer_sync(&dwork->timer)) {
776 struct cpu_workqueue_struct *cwq; 776 struct cpu_workqueue_struct *cwq;
777 cwq = wq_per_cpu(keventd_wq, get_cpu()); 777 cwq = wq_per_cpu(get_wq_data(&dwork->work)->wq, get_cpu());
778 __queue_work(cwq, &dwork->work); 778 __queue_work(cwq, &dwork->work);
779 put_cpu(); 779 put_cpu();
780 } 780 }