diff options
author | Robert Richter <robert.richter@amd.com> | 2010-10-15 06:45:00 -0400 |
---|---|---|
committer | Robert Richter <robert.richter@amd.com> | 2010-10-15 06:45:00 -0400 |
commit | 6268464b370e234e0255330190f9bd5d19386ad7 (patch) | |
tree | 5742641092ce64227dd2086d78baaede57da1f80 /kernel | |
parent | 7df01d96b295e400167e78061b81d4c91630b12d (diff) | |
parent | 0fdf13606b67f830559abdaad15980c7f4f05ec4 (diff) |
Merge remote branch 'tip/perf/core' into oprofile/core
Conflicts:
arch/arm/oprofile/common.c
kernel/perf_event.c
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/exit.c | 4 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 67 | ||||
-rw-r--r-- | kernel/jump_label.c | 429 | ||||
-rw-r--r-- | kernel/kfifo.c | 2 | ||||
-rw-r--r-- | kernel/kprobes.c | 26 | ||||
-rw-r--r-- | kernel/module.c | 10 | ||||
-rw-r--r-- | kernel/perf_event.c | 2357 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/smp.c | 17 | ||||
-rw-r--r-- | kernel/test_kprobes.c | 12 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 5 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 123 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 28 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 55 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 131 | ||||
-rw-r--r-- | kernel/trace/trace_workqueue.c | 10 | ||||
-rw-r--r-- | kernel/tracepoint.c | 14 | ||||
-rw-r--r-- | kernel/watchdog.c | 41 |
21 files changed, 2243 insertions, 1115 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a74be0..d52b473c99a1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ | 12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ |
13 | async.o range.o | 13 | async.o range.o jump_label.o |
14 | obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o | 14 | obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o |
15 | obj-y += groups.o | 15 | obj-y += groups.o |
16 | 16 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 03120229db28..e2bdf37f9fde 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
149 | { | 149 | { |
150 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 150 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
151 | 151 | ||
152 | #ifdef CONFIG_PERF_EVENTS | 152 | perf_event_delayed_put(tsk); |
153 | WARN_ON_ONCE(tsk->perf_event_ctxp); | ||
154 | #endif | ||
155 | trace_sched_process_free(tsk); | 153 | trace_sched_process_free(tsk); |
156 | put_task_struct(tsk); | 154 | put_task_struct(tsk); |
157 | } | 155 | } |
diff --git a/kernel/fork.c b/kernel/fork.c index b7e9d60a675d..c445f8cc408d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -356,10 +356,10 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
356 | if (IS_ERR(pol)) | 356 | if (IS_ERR(pol)) |
357 | goto fail_nomem_policy; | 357 | goto fail_nomem_policy; |
358 | vma_set_policy(tmp, pol); | 358 | vma_set_policy(tmp, pol); |
359 | tmp->vm_mm = mm; | ||
359 | if (anon_vma_fork(tmp, mpnt)) | 360 | if (anon_vma_fork(tmp, mpnt)) |
360 | goto fail_nomem_anon_vma_fork; | 361 | goto fail_nomem_anon_vma_fork; |
361 | tmp->vm_flags &= ~VM_LOCKED; | 362 | tmp->vm_flags &= ~VM_LOCKED; |
362 | tmp->vm_mm = mm; | ||
363 | tmp->vm_next = tmp->vm_prev = NULL; | 363 | tmp->vm_next = tmp->vm_prev = NULL; |
364 | file = tmp->vm_file; | 364 | file = tmp->vm_file; |
365 | if (file) { | 365 | if (file) { |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c7c2aed9e2dc..3b714e839c10 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, | |||
433 | perf_overflow_handler_t triggered, | 433 | perf_overflow_handler_t triggered, |
434 | struct task_struct *tsk) | 434 | struct task_struct *tsk) |
435 | { | 435 | { |
436 | return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), | 436 | return perf_event_create_kernel_counter(attr, -1, tsk, triggered); |
437 | triggered); | ||
438 | } | 437 | } |
439 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); | 438 | EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); |
440 | 439 | ||
@@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
516 | get_online_cpus(); | 515 | get_online_cpus(); |
517 | for_each_online_cpu(cpu) { | 516 | for_each_online_cpu(cpu) { |
518 | pevent = per_cpu_ptr(cpu_events, cpu); | 517 | pevent = per_cpu_ptr(cpu_events, cpu); |
519 | bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); | 518 | bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); |
520 | 519 | ||
521 | *pevent = bp; | 520 | *pevent = bp; |
522 | 521 | ||
@@ -566,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { | |||
566 | .priority = 0x7fffffff | 565 | .priority = 0x7fffffff |
567 | }; | 566 | }; |
568 | 567 | ||
568 | static void bp_perf_event_destroy(struct perf_event *event) | ||
569 | { | ||
570 | release_bp_slot(event); | ||
571 | } | ||
572 | |||
573 | static int hw_breakpoint_event_init(struct perf_event *bp) | ||
574 | { | ||
575 | int err; | ||
576 | |||
577 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | ||
578 | return -ENOENT; | ||
579 | |||
580 | err = register_perf_hw_breakpoint(bp); | ||
581 | if (err) | ||
582 | return err; | ||
583 | |||
584 | bp->destroy = bp_perf_event_destroy; | ||
585 | |||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | static int hw_breakpoint_add(struct perf_event *bp, int flags) | ||
590 | { | ||
591 | if (!(flags & PERF_EF_START)) | ||
592 | bp->hw.state = PERF_HES_STOPPED; | ||
593 | |||
594 | return arch_install_hw_breakpoint(bp); | ||
595 | } | ||
596 | |||
597 | static void hw_breakpoint_del(struct perf_event *bp, int flags) | ||
598 | { | ||
599 | arch_uninstall_hw_breakpoint(bp); | ||
600 | } | ||
601 | |||
602 | static void hw_breakpoint_start(struct perf_event *bp, int flags) | ||
603 | { | ||
604 | bp->hw.state = 0; | ||
605 | } | ||
606 | |||
607 | static void hw_breakpoint_stop(struct perf_event *bp, int flags) | ||
608 | { | ||
609 | bp->hw.state = PERF_HES_STOPPED; | ||
610 | } | ||
611 | |||
612 | static struct pmu perf_breakpoint = { | ||
613 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ | ||
614 | |||
615 | .event_init = hw_breakpoint_event_init, | ||
616 | .add = hw_breakpoint_add, | ||
617 | .del = hw_breakpoint_del, | ||
618 | .start = hw_breakpoint_start, | ||
619 | .stop = hw_breakpoint_stop, | ||
620 | .read = hw_breakpoint_pmu_read, | ||
621 | }; | ||
622 | |||
569 | static int __init init_hw_breakpoint(void) | 623 | static int __init init_hw_breakpoint(void) |
570 | { | 624 | { |
571 | unsigned int **task_bp_pinned; | 625 | unsigned int **task_bp_pinned; |
@@ -587,6 +641,8 @@ static int __init init_hw_breakpoint(void) | |||
587 | 641 | ||
588 | constraints_initialized = 1; | 642 | constraints_initialized = 1; |
589 | 643 | ||
644 | perf_pmu_register(&perf_breakpoint); | ||
645 | |||
590 | return register_die_notifier(&hw_breakpoint_exceptions_nb); | 646 | return register_die_notifier(&hw_breakpoint_exceptions_nb); |
591 | 647 | ||
592 | err_alloc: | 648 | err_alloc: |
@@ -602,8 +658,3 @@ static int __init init_hw_breakpoint(void) | |||
602 | core_initcall(init_hw_breakpoint); | 658 | core_initcall(init_hw_breakpoint); |
603 | 659 | ||
604 | 660 | ||
605 | struct pmu perf_ops_bp = { | ||
606 | .enable = arch_install_hw_breakpoint, | ||
607 | .disable = arch_uninstall_hw_breakpoint, | ||
608 | .read = hw_breakpoint_pmu_read, | ||
609 | }; | ||
diff --git a/kernel/jump_label.c b/kernel/jump_label.c new file mode 100644 index 000000000000..7be868bf25c6 --- /dev/null +++ b/kernel/jump_label.c | |||
@@ -0,0 +1,429 @@ | |||
1 | /* | ||
2 | * jump label support | ||
3 | * | ||
4 | * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> | ||
5 | * | ||
6 | */ | ||
7 | #include <linux/jump_label.h> | ||
8 | #include <linux/memory.h> | ||
9 | #include <linux/uaccess.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/jhash.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/sort.h> | ||
15 | #include <linux/err.h> | ||
16 | |||
17 | #ifdef HAVE_JUMP_LABEL | ||
18 | |||
19 | #define JUMP_LABEL_HASH_BITS 6 | ||
20 | #define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS) | ||
21 | static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE]; | ||
22 | |||
23 | /* mutex to protect coming/going of the the jump_label table */ | ||
24 | static DEFINE_MUTEX(jump_label_mutex); | ||
25 | |||
26 | struct jump_label_entry { | ||
27 | struct hlist_node hlist; | ||
28 | struct jump_entry *table; | ||
29 | int nr_entries; | ||
30 | /* hang modules off here */ | ||
31 | struct hlist_head modules; | ||
32 | unsigned long key; | ||
33 | }; | ||
34 | |||
35 | struct jump_label_module_entry { | ||
36 | struct hlist_node hlist; | ||
37 | struct jump_entry *table; | ||
38 | int nr_entries; | ||
39 | struct module *mod; | ||
40 | }; | ||
41 | |||
42 | static int jump_label_cmp(const void *a, const void *b) | ||
43 | { | ||
44 | const struct jump_entry *jea = a; | ||
45 | const struct jump_entry *jeb = b; | ||
46 | |||
47 | if (jea->key < jeb->key) | ||
48 | return -1; | ||
49 | |||
50 | if (jea->key > jeb->key) | ||
51 | return 1; | ||
52 | |||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | static void | ||
57 | sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) | ||
58 | { | ||
59 | unsigned long size; | ||
60 | |||
61 | size = (((unsigned long)stop - (unsigned long)start) | ||
62 | / sizeof(struct jump_entry)); | ||
63 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); | ||
64 | } | ||
65 | |||
66 | static struct jump_label_entry *get_jump_label_entry(jump_label_t key) | ||
67 | { | ||
68 | struct hlist_head *head; | ||
69 | struct hlist_node *node; | ||
70 | struct jump_label_entry *e; | ||
71 | u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0); | ||
72 | |||
73 | head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; | ||
74 | hlist_for_each_entry(e, node, head, hlist) { | ||
75 | if (key == e->key) | ||
76 | return e; | ||
77 | } | ||
78 | return NULL; | ||
79 | } | ||
80 | |||
81 | static struct jump_label_entry * | ||
82 | add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table) | ||
83 | { | ||
84 | struct hlist_head *head; | ||
85 | struct jump_label_entry *e; | ||
86 | u32 hash; | ||
87 | |||
88 | e = get_jump_label_entry(key); | ||
89 | if (e) | ||
90 | return ERR_PTR(-EEXIST); | ||
91 | |||
92 | e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL); | ||
93 | if (!e) | ||
94 | return ERR_PTR(-ENOMEM); | ||
95 | |||
96 | hash = jhash((void *)&key, sizeof(jump_label_t), 0); | ||
97 | head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; | ||
98 | e->key = key; | ||
99 | e->table = table; | ||
100 | e->nr_entries = nr_entries; | ||
101 | INIT_HLIST_HEAD(&(e->modules)); | ||
102 | hlist_add_head(&e->hlist, head); | ||
103 | return e; | ||
104 | } | ||
105 | |||
106 | static int | ||
107 | build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop) | ||
108 | { | ||
109 | struct jump_entry *iter, *iter_begin; | ||
110 | struct jump_label_entry *entry; | ||
111 | int count; | ||
112 | |||
113 | sort_jump_label_entries(start, stop); | ||
114 | iter = start; | ||
115 | while (iter < stop) { | ||
116 | entry = get_jump_label_entry(iter->key); | ||
117 | if (!entry) { | ||
118 | iter_begin = iter; | ||
119 | count = 0; | ||
120 | while ((iter < stop) && | ||
121 | (iter->key == iter_begin->key)) { | ||
122 | iter++; | ||
123 | count++; | ||
124 | } | ||
125 | entry = add_jump_label_entry(iter_begin->key, | ||
126 | count, iter_begin); | ||
127 | if (IS_ERR(entry)) | ||
128 | return PTR_ERR(entry); | ||
129 | } else { | ||
130 | WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n"); | ||
131 | return -1; | ||
132 | } | ||
133 | } | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | /*** | ||
138 | * jump_label_update - update jump label text | ||
139 | * @key - key value associated with a a jump label | ||
140 | * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE | ||
141 | * | ||
142 | * Will enable/disable the jump for jump label @key, depending on the | ||
143 | * value of @type. | ||
144 | * | ||
145 | */ | ||
146 | |||
147 | void jump_label_update(unsigned long key, enum jump_label_type type) | ||
148 | { | ||
149 | struct jump_entry *iter; | ||
150 | struct jump_label_entry *entry; | ||
151 | struct hlist_node *module_node; | ||
152 | struct jump_label_module_entry *e_module; | ||
153 | int count; | ||
154 | |||
155 | mutex_lock(&jump_label_mutex); | ||
156 | entry = get_jump_label_entry((jump_label_t)key); | ||
157 | if (entry) { | ||
158 | count = entry->nr_entries; | ||
159 | iter = entry->table; | ||
160 | while (count--) { | ||
161 | if (kernel_text_address(iter->code)) | ||
162 | arch_jump_label_transform(iter, type); | ||
163 | iter++; | ||
164 | } | ||
165 | /* eanble/disable jump labels in modules */ | ||
166 | hlist_for_each_entry(e_module, module_node, &(entry->modules), | ||
167 | hlist) { | ||
168 | count = e_module->nr_entries; | ||
169 | iter = e_module->table; | ||
170 | while (count--) { | ||
171 | if (kernel_text_address(iter->code)) | ||
172 | arch_jump_label_transform(iter, type); | ||
173 | iter++; | ||
174 | } | ||
175 | } | ||
176 | } | ||
177 | mutex_unlock(&jump_label_mutex); | ||
178 | } | ||
179 | |||
180 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | ||
181 | { | ||
182 | if (entry->code <= (unsigned long)end && | ||
183 | entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start) | ||
184 | return 1; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | #ifdef CONFIG_MODULES | ||
190 | |||
191 | static int module_conflict(void *start, void *end) | ||
192 | { | ||
193 | struct hlist_head *head; | ||
194 | struct hlist_node *node, *node_next, *module_node, *module_node_next; | ||
195 | struct jump_label_entry *e; | ||
196 | struct jump_label_module_entry *e_module; | ||
197 | struct jump_entry *iter; | ||
198 | int i, count; | ||
199 | int conflict = 0; | ||
200 | |||
201 | for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { | ||
202 | head = &jump_label_table[i]; | ||
203 | hlist_for_each_entry_safe(e, node, node_next, head, hlist) { | ||
204 | hlist_for_each_entry_safe(e_module, module_node, | ||
205 | module_node_next, | ||
206 | &(e->modules), hlist) { | ||
207 | count = e_module->nr_entries; | ||
208 | iter = e_module->table; | ||
209 | while (count--) { | ||
210 | if (addr_conflict(iter, start, end)) { | ||
211 | conflict = 1; | ||
212 | goto out; | ||
213 | } | ||
214 | iter++; | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | } | ||
219 | out: | ||
220 | return conflict; | ||
221 | } | ||
222 | |||
223 | #endif | ||
224 | |||
225 | /*** | ||
226 | * jump_label_text_reserved - check if addr range is reserved | ||
227 | * @start: start text addr | ||
228 | * @end: end text addr | ||
229 | * | ||
230 | * checks if the text addr located between @start and @end | ||
231 | * overlaps with any of the jump label patch addresses. Code | ||
232 | * that wants to modify kernel text should first verify that | ||
233 | * it does not overlap with any of the jump label addresses. | ||
234 | * | ||
235 | * returns 1 if there is an overlap, 0 otherwise | ||
236 | */ | ||
237 | int jump_label_text_reserved(void *start, void *end) | ||
238 | { | ||
239 | struct jump_entry *iter; | ||
240 | struct jump_entry *iter_start = __start___jump_table; | ||
241 | struct jump_entry *iter_stop = __start___jump_table; | ||
242 | int conflict = 0; | ||
243 | |||
244 | mutex_lock(&jump_label_mutex); | ||
245 | iter = iter_start; | ||
246 | while (iter < iter_stop) { | ||
247 | if (addr_conflict(iter, start, end)) { | ||
248 | conflict = 1; | ||
249 | goto out; | ||
250 | } | ||
251 | iter++; | ||
252 | } | ||
253 | |||
254 | /* now check modules */ | ||
255 | #ifdef CONFIG_MODULES | ||
256 | conflict = module_conflict(start, end); | ||
257 | #endif | ||
258 | out: | ||
259 | mutex_unlock(&jump_label_mutex); | ||
260 | return conflict; | ||
261 | } | ||
262 | |||
263 | static __init int init_jump_label(void) | ||
264 | { | ||
265 | int ret; | ||
266 | struct jump_entry *iter_start = __start___jump_table; | ||
267 | struct jump_entry *iter_stop = __stop___jump_table; | ||
268 | struct jump_entry *iter; | ||
269 | |||
270 | mutex_lock(&jump_label_mutex); | ||
271 | ret = build_jump_label_hashtable(__start___jump_table, | ||
272 | __stop___jump_table); | ||
273 | iter = iter_start; | ||
274 | while (iter < iter_stop) { | ||
275 | arch_jump_label_text_poke_early(iter->code); | ||
276 | iter++; | ||
277 | } | ||
278 | mutex_unlock(&jump_label_mutex); | ||
279 | return ret; | ||
280 | } | ||
281 | early_initcall(init_jump_label); | ||
282 | |||
283 | #ifdef CONFIG_MODULES | ||
284 | |||
285 | static struct jump_label_module_entry * | ||
286 | add_jump_label_module_entry(struct jump_label_entry *entry, | ||
287 | struct jump_entry *iter_begin, | ||
288 | int count, struct module *mod) | ||
289 | { | ||
290 | struct jump_label_module_entry *e; | ||
291 | |||
292 | e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL); | ||
293 | if (!e) | ||
294 | return ERR_PTR(-ENOMEM); | ||
295 | e->mod = mod; | ||
296 | e->nr_entries = count; | ||
297 | e->table = iter_begin; | ||
298 | hlist_add_head(&e->hlist, &entry->modules); | ||
299 | return e; | ||
300 | } | ||
301 | |||
302 | static int add_jump_label_module(struct module *mod) | ||
303 | { | ||
304 | struct jump_entry *iter, *iter_begin; | ||
305 | struct jump_label_entry *entry; | ||
306 | struct jump_label_module_entry *module_entry; | ||
307 | int count; | ||
308 | |||
309 | /* if the module doesn't have jump label entries, just return */ | ||
310 | if (!mod->num_jump_entries) | ||
311 | return 0; | ||
312 | |||
313 | sort_jump_label_entries(mod->jump_entries, | ||
314 | mod->jump_entries + mod->num_jump_entries); | ||
315 | iter = mod->jump_entries; | ||
316 | while (iter < mod->jump_entries + mod->num_jump_entries) { | ||
317 | entry = get_jump_label_entry(iter->key); | ||
318 | iter_begin = iter; | ||
319 | count = 0; | ||
320 | while ((iter < mod->jump_entries + mod->num_jump_entries) && | ||
321 | (iter->key == iter_begin->key)) { | ||
322 | iter++; | ||
323 | count++; | ||
324 | } | ||
325 | if (!entry) { | ||
326 | entry = add_jump_label_entry(iter_begin->key, 0, NULL); | ||
327 | if (IS_ERR(entry)) | ||
328 | return PTR_ERR(entry); | ||
329 | } | ||
330 | module_entry = add_jump_label_module_entry(entry, iter_begin, | ||
331 | count, mod); | ||
332 | if (IS_ERR(module_entry)) | ||
333 | return PTR_ERR(module_entry); | ||
334 | } | ||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | static void remove_jump_label_module(struct module *mod) | ||
339 | { | ||
340 | struct hlist_head *head; | ||
341 | struct hlist_node *node, *node_next, *module_node, *module_node_next; | ||
342 | struct jump_label_entry *e; | ||
343 | struct jump_label_module_entry *e_module; | ||
344 | int i; | ||
345 | |||
346 | /* if the module doesn't have jump label entries, just return */ | ||
347 | if (!mod->num_jump_entries) | ||
348 | return; | ||
349 | |||
350 | for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { | ||
351 | head = &jump_label_table[i]; | ||
352 | hlist_for_each_entry_safe(e, node, node_next, head, hlist) { | ||
353 | hlist_for_each_entry_safe(e_module, module_node, | ||
354 | module_node_next, | ||
355 | &(e->modules), hlist) { | ||
356 | if (e_module->mod == mod) { | ||
357 | hlist_del(&e_module->hlist); | ||
358 | kfree(e_module); | ||
359 | } | ||
360 | } | ||
361 | if (hlist_empty(&e->modules) && (e->nr_entries == 0)) { | ||
362 | hlist_del(&e->hlist); | ||
363 | kfree(e); | ||
364 | } | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | |||
369 | static int | ||
370 | jump_label_module_notify(struct notifier_block *self, unsigned long val, | ||
371 | void *data) | ||
372 | { | ||
373 | struct module *mod = data; | ||
374 | int ret = 0; | ||
375 | |||
376 | switch (val) { | ||
377 | case MODULE_STATE_COMING: | ||
378 | mutex_lock(&jump_label_mutex); | ||
379 | ret = add_jump_label_module(mod); | ||
380 | if (ret) | ||
381 | remove_jump_label_module(mod); | ||
382 | mutex_unlock(&jump_label_mutex); | ||
383 | break; | ||
384 | case MODULE_STATE_GOING: | ||
385 | mutex_lock(&jump_label_mutex); | ||
386 | remove_jump_label_module(mod); | ||
387 | mutex_unlock(&jump_label_mutex); | ||
388 | break; | ||
389 | } | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | /*** | ||
394 | * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() | ||
395 | * @mod: module to patch | ||
396 | * | ||
397 | * Allow for run-time selection of the optimal nops. Before the module | ||
398 | * loads patch these with arch_get_jump_label_nop(), which is specified by | ||
399 | * the arch specific jump label code. | ||
400 | */ | ||
401 | void jump_label_apply_nops(struct module *mod) | ||
402 | { | ||
403 | struct jump_entry *iter; | ||
404 | |||
405 | /* if the module doesn't have jump label entries, just return */ | ||
406 | if (!mod->num_jump_entries) | ||
407 | return; | ||
408 | |||
409 | iter = mod->jump_entries; | ||
410 | while (iter < mod->jump_entries + mod->num_jump_entries) { | ||
411 | arch_jump_label_text_poke_early(iter->code); | ||
412 | iter++; | ||
413 | } | ||
414 | } | ||
415 | |||
416 | struct notifier_block jump_label_module_nb = { | ||
417 | .notifier_call = jump_label_module_notify, | ||
418 | .priority = 0, | ||
419 | }; | ||
420 | |||
421 | static __init int init_jump_label_module(void) | ||
422 | { | ||
423 | return register_module_notifier(&jump_label_module_nb); | ||
424 | } | ||
425 | early_initcall(init_jump_label_module); | ||
426 | |||
427 | #endif /* CONFIG_MODULES */ | ||
428 | |||
429 | #endif | ||
diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 6b5580c57644..01a0700e873f 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c | |||
@@ -365,8 +365,6 @@ static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, | |||
365 | n = setup_sgl_buf(sgl, fifo->data + off, nents, l); | 365 | n = setup_sgl_buf(sgl, fifo->data + off, nents, l); |
366 | n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); | 366 | n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); |
367 | 367 | ||
368 | if (n) | ||
369 | sg_mark_end(sgl + n - 1); | ||
370 | return n; | 368 | return n; |
371 | } | 369 | } |
372 | 370 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 282035f3ae96..ec4210c6501e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/memory.h> | 47 | #include <linux/memory.h> |
48 | #include <linux/ftrace.h> | 48 | #include <linux/ftrace.h> |
49 | #include <linux/cpu.h> | 49 | #include <linux/cpu.h> |
50 | #include <linux/jump_label.h> | ||
50 | 51 | ||
51 | #include <asm-generic/sections.h> | 52 | #include <asm-generic/sections.h> |
52 | #include <asm/cacheflush.h> | 53 | #include <asm/cacheflush.h> |
@@ -399,7 +400,7 @@ static inline int kprobe_optready(struct kprobe *p) | |||
399 | * Return an optimized kprobe whose optimizing code replaces | 400 | * Return an optimized kprobe whose optimizing code replaces |
400 | * instructions including addr (exclude breakpoint). | 401 | * instructions including addr (exclude breakpoint). |
401 | */ | 402 | */ |
402 | struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) | 403 | static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) |
403 | { | 404 | { |
404 | int i; | 405 | int i; |
405 | struct kprobe *p = NULL; | 406 | struct kprobe *p = NULL; |
@@ -831,6 +832,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, | |||
831 | 832 | ||
832 | void __kprobes kretprobe_hash_lock(struct task_struct *tsk, | 833 | void __kprobes kretprobe_hash_lock(struct task_struct *tsk, |
833 | struct hlist_head **head, unsigned long *flags) | 834 | struct hlist_head **head, unsigned long *flags) |
835 | __acquires(hlist_lock) | ||
834 | { | 836 | { |
835 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 837 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
836 | spinlock_t *hlist_lock; | 838 | spinlock_t *hlist_lock; |
@@ -842,6 +844,7 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk, | |||
842 | 844 | ||
843 | static void __kprobes kretprobe_table_lock(unsigned long hash, | 845 | static void __kprobes kretprobe_table_lock(unsigned long hash, |
844 | unsigned long *flags) | 846 | unsigned long *flags) |
847 | __acquires(hlist_lock) | ||
845 | { | 848 | { |
846 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 849 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
847 | spin_lock_irqsave(hlist_lock, *flags); | 850 | spin_lock_irqsave(hlist_lock, *flags); |
@@ -849,6 +852,7 @@ static void __kprobes kretprobe_table_lock(unsigned long hash, | |||
849 | 852 | ||
850 | void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, | 853 | void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, |
851 | unsigned long *flags) | 854 | unsigned long *flags) |
855 | __releases(hlist_lock) | ||
852 | { | 856 | { |
853 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 857 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
854 | spinlock_t *hlist_lock; | 858 | spinlock_t *hlist_lock; |
@@ -857,7 +861,9 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, | |||
857 | spin_unlock_irqrestore(hlist_lock, *flags); | 861 | spin_unlock_irqrestore(hlist_lock, *flags); |
858 | } | 862 | } |
859 | 863 | ||
860 | void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) | 864 | static void __kprobes kretprobe_table_unlock(unsigned long hash, |
865 | unsigned long *flags) | ||
866 | __releases(hlist_lock) | ||
861 | { | 867 | { |
862 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 868 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
863 | spin_unlock_irqrestore(hlist_lock, *flags); | 869 | spin_unlock_irqrestore(hlist_lock, *flags); |
@@ -1141,7 +1147,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1141 | preempt_disable(); | 1147 | preempt_disable(); |
1142 | if (!kernel_text_address((unsigned long) p->addr) || | 1148 | if (!kernel_text_address((unsigned long) p->addr) || |
1143 | in_kprobes_functions((unsigned long) p->addr) || | 1149 | in_kprobes_functions((unsigned long) p->addr) || |
1144 | ftrace_text_reserved(p->addr, p->addr)) { | 1150 | ftrace_text_reserved(p->addr, p->addr) || |
1151 | jump_label_text_reserved(p->addr, p->addr)) { | ||
1145 | preempt_enable(); | 1152 | preempt_enable(); |
1146 | return -EINVAL; | 1153 | return -EINVAL; |
1147 | } | 1154 | } |
@@ -1339,18 +1346,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) | |||
1339 | if (num <= 0) | 1346 | if (num <= 0) |
1340 | return -EINVAL; | 1347 | return -EINVAL; |
1341 | for (i = 0; i < num; i++) { | 1348 | for (i = 0; i < num; i++) { |
1342 | unsigned long addr; | 1349 | unsigned long addr, offset; |
1343 | jp = jps[i]; | 1350 | jp = jps[i]; |
1344 | addr = arch_deref_entry_point(jp->entry); | 1351 | addr = arch_deref_entry_point(jp->entry); |
1345 | 1352 | ||
1346 | if (!kernel_text_address(addr)) | 1353 | /* Verify probepoint is a function entry point */ |
1347 | ret = -EINVAL; | 1354 | if (kallsyms_lookup_size_offset(addr, NULL, &offset) && |
1348 | else { | 1355 | offset == 0) { |
1349 | /* Todo: Verify probepoint is a function entry point */ | ||
1350 | jp->kp.pre_handler = setjmp_pre_handler; | 1356 | jp->kp.pre_handler = setjmp_pre_handler; |
1351 | jp->kp.break_handler = longjmp_break_handler; | 1357 | jp->kp.break_handler = longjmp_break_handler; |
1352 | ret = register_kprobe(&jp->kp); | 1358 | ret = register_kprobe(&jp->kp); |
1353 | } | 1359 | } else |
1360 | ret = -EINVAL; | ||
1361 | |||
1354 | if (ret < 0) { | 1362 | if (ret < 0) { |
1355 | if (i > 0) | 1363 | if (i > 0) |
1356 | unregister_jprobes(jps, i); | 1364 | unregister_jprobes(jps, i); |
diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b4..2df46301a7a4 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/async.h> | 55 | #include <linux/async.h> |
56 | #include <linux/percpu.h> | 56 | #include <linux/percpu.h> |
57 | #include <linux/kmemleak.h> | 57 | #include <linux/kmemleak.h> |
58 | #include <linux/jump_label.h> | ||
58 | 59 | ||
59 | #define CREATE_TRACE_POINTS | 60 | #define CREATE_TRACE_POINTS |
60 | #include <trace/events/module.h> | 61 | #include <trace/events/module.h> |
@@ -1537,6 +1538,7 @@ static int __unlink_module(void *_mod) | |||
1537 | { | 1538 | { |
1538 | struct module *mod = _mod; | 1539 | struct module *mod = _mod; |
1539 | list_del(&mod->list); | 1540 | list_del(&mod->list); |
1541 | module_bug_cleanup(mod); | ||
1540 | return 0; | 1542 | return 0; |
1541 | } | 1543 | } |
1542 | 1544 | ||
@@ -2308,6 +2310,11 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
2308 | sizeof(*mod->tracepoints), | 2310 | sizeof(*mod->tracepoints), |
2309 | &mod->num_tracepoints); | 2311 | &mod->num_tracepoints); |
2310 | #endif | 2312 | #endif |
2313 | #ifdef HAVE_JUMP_LABEL | ||
2314 | mod->jump_entries = section_objs(info, "__jump_table", | ||
2315 | sizeof(*mod->jump_entries), | ||
2316 | &mod->num_jump_entries); | ||
2317 | #endif | ||
2311 | #ifdef CONFIG_EVENT_TRACING | 2318 | #ifdef CONFIG_EVENT_TRACING |
2312 | mod->trace_events = section_objs(info, "_ftrace_events", | 2319 | mod->trace_events = section_objs(info, "_ftrace_events", |
2313 | sizeof(*mod->trace_events), | 2320 | sizeof(*mod->trace_events), |
@@ -2625,6 +2632,7 @@ static struct module *load_module(void __user *umod, | |||
2625 | if (err < 0) | 2632 | if (err < 0) |
2626 | goto ddebug; | 2633 | goto ddebug; |
2627 | 2634 | ||
2635 | module_bug_finalize(info.hdr, info.sechdrs, mod); | ||
2628 | list_add_rcu(&mod->list, &modules); | 2636 | list_add_rcu(&mod->list, &modules); |
2629 | mutex_unlock(&module_mutex); | 2637 | mutex_unlock(&module_mutex); |
2630 | 2638 | ||
@@ -2650,6 +2658,8 @@ static struct module *load_module(void __user *umod, | |||
2650 | mutex_lock(&module_mutex); | 2658 | mutex_lock(&module_mutex); |
2651 | /* Unlink carefully: kallsyms could be walking list. */ | 2659 | /* Unlink carefully: kallsyms could be walking list. */ |
2652 | list_del_rcu(&mod->list); | 2660 | list_del_rcu(&mod->list); |
2661 | module_bug_cleanup(mod); | ||
2662 | |||
2653 | ddebug: | 2663 | ddebug: |
2654 | if (!mod->taints) | 2664 | if (!mod->taints) |
2655 | dynamic_debug_remove(info.debug); | 2665 | dynamic_debug_remove(info.debug); |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fc512684423f..1ec3916ffef0 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -31,24 +31,18 @@ | |||
31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
34 | #include <linux/hw_breakpoint.h> | ||
35 | 34 | ||
36 | #include <asm/irq_regs.h> | 35 | #include <asm/irq_regs.h> |
37 | 36 | ||
38 | /* | ||
39 | * Each CPU has a list of per CPU events: | ||
40 | */ | ||
41 | static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | ||
42 | |||
43 | int perf_max_events __read_mostly = 1; | ||
44 | static int perf_reserved_percpu __read_mostly; | ||
45 | static int perf_overcommit __read_mostly = 1; | ||
46 | |||
47 | static atomic_t nr_events __read_mostly; | 37 | static atomic_t nr_events __read_mostly; |
48 | static atomic_t nr_mmap_events __read_mostly; | 38 | static atomic_t nr_mmap_events __read_mostly; |
49 | static atomic_t nr_comm_events __read_mostly; | 39 | static atomic_t nr_comm_events __read_mostly; |
50 | static atomic_t nr_task_events __read_mostly; | 40 | static atomic_t nr_task_events __read_mostly; |
51 | 41 | ||
42 | static LIST_HEAD(pmus); | ||
43 | static DEFINE_MUTEX(pmus_lock); | ||
44 | static struct srcu_struct pmus_srcu; | ||
45 | |||
52 | /* | 46 | /* |
53 | * perf event paranoia level: | 47 | * perf event paranoia level: |
54 | * -1 - not paranoid at all | 48 | * -1 - not paranoid at all |
@@ -67,22 +61,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; | |||
67 | 61 | ||
68 | static atomic64_t perf_event_id; | 62 | static atomic64_t perf_event_id; |
69 | 63 | ||
70 | /* | ||
71 | * Lock for (sysadmin-configurable) event reservations: | ||
72 | */ | ||
73 | static DEFINE_SPINLOCK(perf_resource_lock); | ||
74 | |||
75 | /* | ||
76 | * Architecture provided APIs - weak aliases: | ||
77 | */ | ||
78 | extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
79 | { | ||
80 | return NULL; | ||
81 | } | ||
82 | |||
83 | void __weak hw_perf_disable(void) { barrier(); } | ||
84 | void __weak hw_perf_enable(void) { barrier(); } | ||
85 | |||
86 | void __weak perf_event_print_debug(void) { } | 64 | void __weak perf_event_print_debug(void) { } |
87 | 65 | ||
88 | extern __weak const char *perf_pmu_name(void) | 66 | extern __weak const char *perf_pmu_name(void) |
@@ -90,18 +68,36 @@ extern __weak const char *perf_pmu_name(void) | |||
90 | return "pmu"; | 68 | return "pmu"; |
91 | } | 69 | } |
92 | 70 | ||
93 | static DEFINE_PER_CPU(int, perf_disable_count); | 71 | void perf_pmu_disable(struct pmu *pmu) |
72 | { | ||
73 | int *count = this_cpu_ptr(pmu->pmu_disable_count); | ||
74 | if (!(*count)++) | ||
75 | pmu->pmu_disable(pmu); | ||
76 | } | ||
94 | 77 | ||
95 | void perf_disable(void) | 78 | void perf_pmu_enable(struct pmu *pmu) |
96 | { | 79 | { |
97 | if (!__get_cpu_var(perf_disable_count)++) | 80 | int *count = this_cpu_ptr(pmu->pmu_disable_count); |
98 | hw_perf_disable(); | 81 | if (!--(*count)) |
82 | pmu->pmu_enable(pmu); | ||
99 | } | 83 | } |
100 | 84 | ||
101 | void perf_enable(void) | 85 | static DEFINE_PER_CPU(struct list_head, rotation_list); |
86 | |||
87 | /* | ||
88 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
89 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
90 | * disabled, while rotate_context is called from IRQ context. | ||
91 | */ | ||
92 | static void perf_pmu_rotate_start(struct pmu *pmu) | ||
102 | { | 93 | { |
103 | if (!--__get_cpu_var(perf_disable_count)) | 94 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
104 | hw_perf_enable(); | 95 | struct list_head *head = &__get_cpu_var(rotation_list); |
96 | |||
97 | WARN_ON(!irqs_disabled()); | ||
98 | |||
99 | if (list_empty(&cpuctx->rotation_list)) | ||
100 | list_add(&cpuctx->rotation_list, head); | ||
105 | } | 101 | } |
106 | 102 | ||
107 | static void get_ctx(struct perf_event_context *ctx) | 103 | static void get_ctx(struct perf_event_context *ctx) |
@@ -156,13 +152,13 @@ static u64 primary_event_id(struct perf_event *event) | |||
156 | * the context could get moved to another task. | 152 | * the context could get moved to another task. |
157 | */ | 153 | */ |
158 | static struct perf_event_context * | 154 | static struct perf_event_context * |
159 | perf_lock_task_context(struct task_struct *task, unsigned long *flags) | 155 | perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) |
160 | { | 156 | { |
161 | struct perf_event_context *ctx; | 157 | struct perf_event_context *ctx; |
162 | 158 | ||
163 | rcu_read_lock(); | 159 | rcu_read_lock(); |
164 | retry: | 160 | retry: |
165 | ctx = rcu_dereference(task->perf_event_ctxp); | 161 | ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); |
166 | if (ctx) { | 162 | if (ctx) { |
167 | /* | 163 | /* |
168 | * If this context is a clone of another, it might | 164 | * If this context is a clone of another, it might |
@@ -175,7 +171,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
175 | * can't get swapped on us any more. | 171 | * can't get swapped on us any more. |
176 | */ | 172 | */ |
177 | raw_spin_lock_irqsave(&ctx->lock, *flags); | 173 | raw_spin_lock_irqsave(&ctx->lock, *flags); |
178 | if (ctx != rcu_dereference(task->perf_event_ctxp)) { | 174 | if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { |
179 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); | 175 | raw_spin_unlock_irqrestore(&ctx->lock, *flags); |
180 | goto retry; | 176 | goto retry; |
181 | } | 177 | } |
@@ -194,12 +190,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
194 | * can't get swapped to another task. This also increments its | 190 | * can't get swapped to another task. This also increments its |
195 | * reference count so that the context can't get freed. | 191 | * reference count so that the context can't get freed. |
196 | */ | 192 | */ |
197 | static struct perf_event_context *perf_pin_task_context(struct task_struct *task) | 193 | static struct perf_event_context * |
194 | perf_pin_task_context(struct task_struct *task, int ctxn) | ||
198 | { | 195 | { |
199 | struct perf_event_context *ctx; | 196 | struct perf_event_context *ctx; |
200 | unsigned long flags; | 197 | unsigned long flags; |
201 | 198 | ||
202 | ctx = perf_lock_task_context(task, &flags); | 199 | ctx = perf_lock_task_context(task, ctxn, &flags); |
203 | if (ctx) { | 200 | if (ctx) { |
204 | ++ctx->pin_count; | 201 | ++ctx->pin_count; |
205 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 202 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
@@ -307,6 +304,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
307 | } | 304 | } |
308 | 305 | ||
309 | list_add_rcu(&event->event_entry, &ctx->event_list); | 306 | list_add_rcu(&event->event_entry, &ctx->event_list); |
307 | if (!ctx->nr_events) | ||
308 | perf_pmu_rotate_start(ctx->pmu); | ||
310 | ctx->nr_events++; | 309 | ctx->nr_events++; |
311 | if (event->attr.inherit_stat) | 310 | if (event->attr.inherit_stat) |
312 | ctx->nr_stat++; | 311 | ctx->nr_stat++; |
@@ -441,7 +440,7 @@ event_sched_out(struct perf_event *event, | |||
441 | event->state = PERF_EVENT_STATE_OFF; | 440 | event->state = PERF_EVENT_STATE_OFF; |
442 | } | 441 | } |
443 | event->tstamp_stopped = ctx->time; | 442 | event->tstamp_stopped = ctx->time; |
444 | event->pmu->disable(event); | 443 | event->pmu->del(event, 0); |
445 | event->oncpu = -1; | 444 | event->oncpu = -1; |
446 | 445 | ||
447 | if (!is_software_event(event)) | 446 | if (!is_software_event(event)) |
@@ -471,6 +470,12 @@ group_sched_out(struct perf_event *group_event, | |||
471 | cpuctx->exclusive = 0; | 470 | cpuctx->exclusive = 0; |
472 | } | 471 | } |
473 | 472 | ||
473 | static inline struct perf_cpu_context * | ||
474 | __get_cpu_context(struct perf_event_context *ctx) | ||
475 | { | ||
476 | return this_cpu_ptr(ctx->pmu->pmu_cpu_context); | ||
477 | } | ||
478 | |||
474 | /* | 479 | /* |
475 | * Cross CPU call to remove a performance event | 480 | * Cross CPU call to remove a performance event |
476 | * | 481 | * |
@@ -479,9 +484,9 @@ group_sched_out(struct perf_event *group_event, | |||
479 | */ | 484 | */ |
480 | static void __perf_event_remove_from_context(void *info) | 485 | static void __perf_event_remove_from_context(void *info) |
481 | { | 486 | { |
482 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
483 | struct perf_event *event = info; | 487 | struct perf_event *event = info; |
484 | struct perf_event_context *ctx = event->ctx; | 488 | struct perf_event_context *ctx = event->ctx; |
489 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
485 | 490 | ||
486 | /* | 491 | /* |
487 | * If this is a task context, we need to check whether it is | 492 | * If this is a task context, we need to check whether it is |
@@ -492,27 +497,11 @@ static void __perf_event_remove_from_context(void *info) | |||
492 | return; | 497 | return; |
493 | 498 | ||
494 | raw_spin_lock(&ctx->lock); | 499 | raw_spin_lock(&ctx->lock); |
495 | /* | ||
496 | * Protect the list operation against NMI by disabling the | ||
497 | * events on a global level. | ||
498 | */ | ||
499 | perf_disable(); | ||
500 | 500 | ||
501 | event_sched_out(event, cpuctx, ctx); | 501 | event_sched_out(event, cpuctx, ctx); |
502 | 502 | ||
503 | list_del_event(event, ctx); | 503 | list_del_event(event, ctx); |
504 | 504 | ||
505 | if (!ctx->task) { | ||
506 | /* | ||
507 | * Allow more per task events with respect to the | ||
508 | * reservation: | ||
509 | */ | ||
510 | cpuctx->max_pertask = | ||
511 | min(perf_max_events - ctx->nr_events, | ||
512 | perf_max_events - perf_reserved_percpu); | ||
513 | } | ||
514 | |||
515 | perf_enable(); | ||
516 | raw_spin_unlock(&ctx->lock); | 505 | raw_spin_unlock(&ctx->lock); |
517 | } | 506 | } |
518 | 507 | ||
@@ -577,8 +566,8 @@ retry: | |||
577 | static void __perf_event_disable(void *info) | 566 | static void __perf_event_disable(void *info) |
578 | { | 567 | { |
579 | struct perf_event *event = info; | 568 | struct perf_event *event = info; |
580 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
581 | struct perf_event_context *ctx = event->ctx; | 569 | struct perf_event_context *ctx = event->ctx; |
570 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
582 | 571 | ||
583 | /* | 572 | /* |
584 | * If this is a per-task event, need to check whether this | 573 | * If this is a per-task event, need to check whether this |
@@ -633,7 +622,7 @@ void perf_event_disable(struct perf_event *event) | |||
633 | return; | 622 | return; |
634 | } | 623 | } |
635 | 624 | ||
636 | retry: | 625 | retry: |
637 | task_oncpu_function_call(task, __perf_event_disable, event); | 626 | task_oncpu_function_call(task, __perf_event_disable, event); |
638 | 627 | ||
639 | raw_spin_lock_irq(&ctx->lock); | 628 | raw_spin_lock_irq(&ctx->lock); |
@@ -672,7 +661,7 @@ event_sched_in(struct perf_event *event, | |||
672 | */ | 661 | */ |
673 | smp_wmb(); | 662 | smp_wmb(); |
674 | 663 | ||
675 | if (event->pmu->enable(event)) { | 664 | if (event->pmu->add(event, PERF_EF_START)) { |
676 | event->state = PERF_EVENT_STATE_INACTIVE; | 665 | event->state = PERF_EVENT_STATE_INACTIVE; |
677 | event->oncpu = -1; | 666 | event->oncpu = -1; |
678 | return -EAGAIN; | 667 | return -EAGAIN; |
@@ -696,22 +685,15 @@ group_sched_in(struct perf_event *group_event, | |||
696 | struct perf_event_context *ctx) | 685 | struct perf_event_context *ctx) |
697 | { | 686 | { |
698 | struct perf_event *event, *partial_group = NULL; | 687 | struct perf_event *event, *partial_group = NULL; |
699 | const struct pmu *pmu = group_event->pmu; | 688 | struct pmu *pmu = group_event->pmu; |
700 | bool txn = false; | ||
701 | 689 | ||
702 | if (group_event->state == PERF_EVENT_STATE_OFF) | 690 | if (group_event->state == PERF_EVENT_STATE_OFF) |
703 | return 0; | 691 | return 0; |
704 | 692 | ||
705 | /* Check if group transaction availabe */ | 693 | pmu->start_txn(pmu); |
706 | if (pmu->start_txn) | ||
707 | txn = true; | ||
708 | |||
709 | if (txn) | ||
710 | pmu->start_txn(pmu); | ||
711 | 694 | ||
712 | if (event_sched_in(group_event, cpuctx, ctx)) { | 695 | if (event_sched_in(group_event, cpuctx, ctx)) { |
713 | if (txn) | 696 | pmu->cancel_txn(pmu); |
714 | pmu->cancel_txn(pmu); | ||
715 | return -EAGAIN; | 697 | return -EAGAIN; |
716 | } | 698 | } |
717 | 699 | ||
@@ -725,7 +707,7 @@ group_sched_in(struct perf_event *group_event, | |||
725 | } | 707 | } |
726 | } | 708 | } |
727 | 709 | ||
728 | if (!txn || !pmu->commit_txn(pmu)) | 710 | if (!pmu->commit_txn(pmu)) |
729 | return 0; | 711 | return 0; |
730 | 712 | ||
731 | group_error: | 713 | group_error: |
@@ -740,8 +722,7 @@ group_error: | |||
740 | } | 722 | } |
741 | event_sched_out(group_event, cpuctx, ctx); | 723 | event_sched_out(group_event, cpuctx, ctx); |
742 | 724 | ||
743 | if (txn) | 725 | pmu->cancel_txn(pmu); |
744 | pmu->cancel_txn(pmu); | ||
745 | 726 | ||
746 | return -EAGAIN; | 727 | return -EAGAIN; |
747 | } | 728 | } |
@@ -794,10 +775,10 @@ static void add_event_to_ctx(struct perf_event *event, | |||
794 | */ | 775 | */ |
795 | static void __perf_install_in_context(void *info) | 776 | static void __perf_install_in_context(void *info) |
796 | { | 777 | { |
797 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
798 | struct perf_event *event = info; | 778 | struct perf_event *event = info; |
799 | struct perf_event_context *ctx = event->ctx; | 779 | struct perf_event_context *ctx = event->ctx; |
800 | struct perf_event *leader = event->group_leader; | 780 | struct perf_event *leader = event->group_leader; |
781 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
801 | int err; | 782 | int err; |
802 | 783 | ||
803 | /* | 784 | /* |
@@ -817,12 +798,6 @@ static void __perf_install_in_context(void *info) | |||
817 | ctx->is_active = 1; | 798 | ctx->is_active = 1; |
818 | update_context_time(ctx); | 799 | update_context_time(ctx); |
819 | 800 | ||
820 | /* | ||
821 | * Protect the list operation against NMI by disabling the | ||
822 | * events on a global level. NOP for non NMI based events. | ||
823 | */ | ||
824 | perf_disable(); | ||
825 | |||
826 | add_event_to_ctx(event, ctx); | 801 | add_event_to_ctx(event, ctx); |
827 | 802 | ||
828 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 803 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
@@ -860,12 +835,7 @@ static void __perf_install_in_context(void *info) | |||
860 | } | 835 | } |
861 | } | 836 | } |
862 | 837 | ||
863 | if (!err && !ctx->task && cpuctx->max_pertask) | 838 | unlock: |
864 | cpuctx->max_pertask--; | ||
865 | |||
866 | unlock: | ||
867 | perf_enable(); | ||
868 | |||
869 | raw_spin_unlock(&ctx->lock); | 839 | raw_spin_unlock(&ctx->lock); |
870 | } | 840 | } |
871 | 841 | ||
@@ -888,6 +858,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
888 | { | 858 | { |
889 | struct task_struct *task = ctx->task; | 859 | struct task_struct *task = ctx->task; |
890 | 860 | ||
861 | event->ctx = ctx; | ||
862 | |||
891 | if (!task) { | 863 | if (!task) { |
892 | /* | 864 | /* |
893 | * Per cpu events are installed via an smp call and | 865 | * Per cpu events are installed via an smp call and |
@@ -936,10 +908,12 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
936 | 908 | ||
937 | event->state = PERF_EVENT_STATE_INACTIVE; | 909 | event->state = PERF_EVENT_STATE_INACTIVE; |
938 | event->tstamp_enabled = ctx->time - event->total_time_enabled; | 910 | event->tstamp_enabled = ctx->time - event->total_time_enabled; |
939 | list_for_each_entry(sub, &event->sibling_list, group_entry) | 911 | list_for_each_entry(sub, &event->sibling_list, group_entry) { |
940 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) | 912 | if (sub->state >= PERF_EVENT_STATE_INACTIVE) { |
941 | sub->tstamp_enabled = | 913 | sub->tstamp_enabled = |
942 | ctx->time - sub->total_time_enabled; | 914 | ctx->time - sub->total_time_enabled; |
915 | } | ||
916 | } | ||
943 | } | 917 | } |
944 | 918 | ||
945 | /* | 919 | /* |
@@ -948,9 +922,9 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
948 | static void __perf_event_enable(void *info) | 922 | static void __perf_event_enable(void *info) |
949 | { | 923 | { |
950 | struct perf_event *event = info; | 924 | struct perf_event *event = info; |
951 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
952 | struct perf_event_context *ctx = event->ctx; | 925 | struct perf_event_context *ctx = event->ctx; |
953 | struct perf_event *leader = event->group_leader; | 926 | struct perf_event *leader = event->group_leader; |
927 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
954 | int err; | 928 | int err; |
955 | 929 | ||
956 | /* | 930 | /* |
@@ -984,12 +958,10 @@ static void __perf_event_enable(void *info) | |||
984 | if (!group_can_go_on(event, cpuctx, 1)) { | 958 | if (!group_can_go_on(event, cpuctx, 1)) { |
985 | err = -EEXIST; | 959 | err = -EEXIST; |
986 | } else { | 960 | } else { |
987 | perf_disable(); | ||
988 | if (event == leader) | 961 | if (event == leader) |
989 | err = group_sched_in(event, cpuctx, ctx); | 962 | err = group_sched_in(event, cpuctx, ctx); |
990 | else | 963 | else |
991 | err = event_sched_in(event, cpuctx, ctx); | 964 | err = event_sched_in(event, cpuctx, ctx); |
992 | perf_enable(); | ||
993 | } | 965 | } |
994 | 966 | ||
995 | if (err) { | 967 | if (err) { |
@@ -1005,7 +977,7 @@ static void __perf_event_enable(void *info) | |||
1005 | } | 977 | } |
1006 | } | 978 | } |
1007 | 979 | ||
1008 | unlock: | 980 | unlock: |
1009 | raw_spin_unlock(&ctx->lock); | 981 | raw_spin_unlock(&ctx->lock); |
1010 | } | 982 | } |
1011 | 983 | ||
@@ -1046,7 +1018,7 @@ void perf_event_enable(struct perf_event *event) | |||
1046 | if (event->state == PERF_EVENT_STATE_ERROR) | 1018 | if (event->state == PERF_EVENT_STATE_ERROR) |
1047 | event->state = PERF_EVENT_STATE_OFF; | 1019 | event->state = PERF_EVENT_STATE_OFF; |
1048 | 1020 | ||
1049 | retry: | 1021 | retry: |
1050 | raw_spin_unlock_irq(&ctx->lock); | 1022 | raw_spin_unlock_irq(&ctx->lock); |
1051 | task_oncpu_function_call(task, __perf_event_enable, event); | 1023 | task_oncpu_function_call(task, __perf_event_enable, event); |
1052 | 1024 | ||
@@ -1066,7 +1038,7 @@ void perf_event_enable(struct perf_event *event) | |||
1066 | if (event->state == PERF_EVENT_STATE_OFF) | 1038 | if (event->state == PERF_EVENT_STATE_OFF) |
1067 | __perf_event_mark_enabled(event, ctx); | 1039 | __perf_event_mark_enabled(event, ctx); |
1068 | 1040 | ||
1069 | out: | 1041 | out: |
1070 | raw_spin_unlock_irq(&ctx->lock); | 1042 | raw_spin_unlock_irq(&ctx->lock); |
1071 | } | 1043 | } |
1072 | 1044 | ||
@@ -1097,26 +1069,26 @@ static void ctx_sched_out(struct perf_event_context *ctx, | |||
1097 | struct perf_event *event; | 1069 | struct perf_event *event; |
1098 | 1070 | ||
1099 | raw_spin_lock(&ctx->lock); | 1071 | raw_spin_lock(&ctx->lock); |
1072 | perf_pmu_disable(ctx->pmu); | ||
1100 | ctx->is_active = 0; | 1073 | ctx->is_active = 0; |
1101 | if (likely(!ctx->nr_events)) | 1074 | if (likely(!ctx->nr_events)) |
1102 | goto out; | 1075 | goto out; |
1103 | update_context_time(ctx); | 1076 | update_context_time(ctx); |
1104 | 1077 | ||
1105 | perf_disable(); | ||
1106 | if (!ctx->nr_active) | 1078 | if (!ctx->nr_active) |
1107 | goto out_enable; | 1079 | goto out; |
1108 | 1080 | ||
1109 | if (event_type & EVENT_PINNED) | 1081 | if (event_type & EVENT_PINNED) { |
1110 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | 1082 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) |
1111 | group_sched_out(event, cpuctx, ctx); | 1083 | group_sched_out(event, cpuctx, ctx); |
1084 | } | ||
1112 | 1085 | ||
1113 | if (event_type & EVENT_FLEXIBLE) | 1086 | if (event_type & EVENT_FLEXIBLE) { |
1114 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | 1087 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) |
1115 | group_sched_out(event, cpuctx, ctx); | 1088 | group_sched_out(event, cpuctx, ctx); |
1116 | 1089 | } | |
1117 | out_enable: | 1090 | out: |
1118 | perf_enable(); | 1091 | perf_pmu_enable(ctx->pmu); |
1119 | out: | ||
1120 | raw_spin_unlock(&ctx->lock); | 1092 | raw_spin_unlock(&ctx->lock); |
1121 | } | 1093 | } |
1122 | 1094 | ||
@@ -1214,34 +1186,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1214 | } | 1186 | } |
1215 | } | 1187 | } |
1216 | 1188 | ||
1217 | /* | 1189 | void perf_event_context_sched_out(struct task_struct *task, int ctxn, |
1218 | * Called from scheduler to remove the events of the current task, | 1190 | struct task_struct *next) |
1219 | * with interrupts disabled. | ||
1220 | * | ||
1221 | * We stop each event and update the event value in event->count. | ||
1222 | * | ||
1223 | * This does not protect us against NMI, but disable() | ||
1224 | * sets the disabled bit in the control field of event _before_ | ||
1225 | * accessing the event control register. If a NMI hits, then it will | ||
1226 | * not restart the event. | ||
1227 | */ | ||
1228 | void perf_event_task_sched_out(struct task_struct *task, | ||
1229 | struct task_struct *next) | ||
1230 | { | 1191 | { |
1231 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1192 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; |
1232 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1233 | struct perf_event_context *next_ctx; | 1193 | struct perf_event_context *next_ctx; |
1234 | struct perf_event_context *parent; | 1194 | struct perf_event_context *parent; |
1195 | struct perf_cpu_context *cpuctx; | ||
1235 | int do_switch = 1; | 1196 | int do_switch = 1; |
1236 | 1197 | ||
1237 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | 1198 | if (likely(!ctx)) |
1199 | return; | ||
1238 | 1200 | ||
1239 | if (likely(!ctx || !cpuctx->task_ctx)) | 1201 | cpuctx = __get_cpu_context(ctx); |
1202 | if (!cpuctx->task_ctx) | ||
1240 | return; | 1203 | return; |
1241 | 1204 | ||
1242 | rcu_read_lock(); | 1205 | rcu_read_lock(); |
1243 | parent = rcu_dereference(ctx->parent_ctx); | 1206 | parent = rcu_dereference(ctx->parent_ctx); |
1244 | next_ctx = next->perf_event_ctxp; | 1207 | next_ctx = next->perf_event_ctxp[ctxn]; |
1245 | if (parent && next_ctx && | 1208 | if (parent && next_ctx && |
1246 | rcu_dereference(next_ctx->parent_ctx) == parent) { | 1209 | rcu_dereference(next_ctx->parent_ctx) == parent) { |
1247 | /* | 1210 | /* |
@@ -1260,8 +1223,8 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1260 | * XXX do we need a memory barrier of sorts | 1223 | * XXX do we need a memory barrier of sorts |
1261 | * wrt to rcu_dereference() of perf_event_ctxp | 1224 | * wrt to rcu_dereference() of perf_event_ctxp |
1262 | */ | 1225 | */ |
1263 | task->perf_event_ctxp = next_ctx; | 1226 | task->perf_event_ctxp[ctxn] = next_ctx; |
1264 | next->perf_event_ctxp = ctx; | 1227 | next->perf_event_ctxp[ctxn] = ctx; |
1265 | ctx->task = next; | 1228 | ctx->task = next; |
1266 | next_ctx->task = task; | 1229 | next_ctx->task = task; |
1267 | do_switch = 0; | 1230 | do_switch = 0; |
@@ -1279,10 +1242,35 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1279 | } | 1242 | } |
1280 | } | 1243 | } |
1281 | 1244 | ||
1245 | #define for_each_task_context_nr(ctxn) \ | ||
1246 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) | ||
1247 | |||
1248 | /* | ||
1249 | * Called from scheduler to remove the events of the current task, | ||
1250 | * with interrupts disabled. | ||
1251 | * | ||
1252 | * We stop each event and update the event value in event->count. | ||
1253 | * | ||
1254 | * This does not protect us against NMI, but disable() | ||
1255 | * sets the disabled bit in the control field of event _before_ | ||
1256 | * accessing the event control register. If a NMI hits, then it will | ||
1257 | * not restart the event. | ||
1258 | */ | ||
1259 | void perf_event_task_sched_out(struct task_struct *task, | ||
1260 | struct task_struct *next) | ||
1261 | { | ||
1262 | int ctxn; | ||
1263 | |||
1264 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1265 | |||
1266 | for_each_task_context_nr(ctxn) | ||
1267 | perf_event_context_sched_out(task, ctxn, next); | ||
1268 | } | ||
1269 | |||
1282 | static void task_ctx_sched_out(struct perf_event_context *ctx, | 1270 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
1283 | enum event_type_t event_type) | 1271 | enum event_type_t event_type) |
1284 | { | 1272 | { |
1285 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1273 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1286 | 1274 | ||
1287 | if (!cpuctx->task_ctx) | 1275 | if (!cpuctx->task_ctx) |
1288 | return; | 1276 | return; |
@@ -1355,9 +1343,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, | |||
1355 | if (event->cpu != -1 && event->cpu != smp_processor_id()) | 1343 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1356 | continue; | 1344 | continue; |
1357 | 1345 | ||
1358 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1346 | if (group_can_go_on(event, cpuctx, can_add_hw)) { |
1359 | if (group_sched_in(event, cpuctx, ctx)) | 1347 | if (group_sched_in(event, cpuctx, ctx)) |
1360 | can_add_hw = 0; | 1348 | can_add_hw = 0; |
1349 | } | ||
1361 | } | 1350 | } |
1362 | } | 1351 | } |
1363 | 1352 | ||
@@ -1373,8 +1362,6 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
1373 | 1362 | ||
1374 | ctx->timestamp = perf_clock(); | 1363 | ctx->timestamp = perf_clock(); |
1375 | 1364 | ||
1376 | perf_disable(); | ||
1377 | |||
1378 | /* | 1365 | /* |
1379 | * First go through the list and put on any pinned groups | 1366 | * First go through the list and put on any pinned groups |
1380 | * in order to give them the best chance of going on. | 1367 | * in order to give them the best chance of going on. |
@@ -1386,8 +1373,7 @@ ctx_sched_in(struct perf_event_context *ctx, | |||
1386 | if (event_type & EVENT_FLEXIBLE) | 1373 | if (event_type & EVENT_FLEXIBLE) |
1387 | ctx_flexible_sched_in(ctx, cpuctx); | 1374 | ctx_flexible_sched_in(ctx, cpuctx); |
1388 | 1375 | ||
1389 | perf_enable(); | 1376 | out: |
1390 | out: | ||
1391 | raw_spin_unlock(&ctx->lock); | 1377 | raw_spin_unlock(&ctx->lock); |
1392 | } | 1378 | } |
1393 | 1379 | ||
@@ -1399,43 +1385,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
1399 | ctx_sched_in(ctx, cpuctx, event_type); | 1385 | ctx_sched_in(ctx, cpuctx, event_type); |
1400 | } | 1386 | } |
1401 | 1387 | ||
1402 | static void task_ctx_sched_in(struct task_struct *task, | 1388 | static void task_ctx_sched_in(struct perf_event_context *ctx, |
1403 | enum event_type_t event_type) | 1389 | enum event_type_t event_type) |
1404 | { | 1390 | { |
1405 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1391 | struct perf_cpu_context *cpuctx; |
1406 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1407 | 1392 | ||
1408 | if (likely(!ctx)) | 1393 | cpuctx = __get_cpu_context(ctx); |
1409 | return; | ||
1410 | if (cpuctx->task_ctx == ctx) | 1394 | if (cpuctx->task_ctx == ctx) |
1411 | return; | 1395 | return; |
1396 | |||
1412 | ctx_sched_in(ctx, cpuctx, event_type); | 1397 | ctx_sched_in(ctx, cpuctx, event_type); |
1413 | cpuctx->task_ctx = ctx; | 1398 | cpuctx->task_ctx = ctx; |
1414 | } | 1399 | } |
1415 | /* | ||
1416 | * Called from scheduler to add the events of the current task | ||
1417 | * with interrupts disabled. | ||
1418 | * | ||
1419 | * We restore the event value and then enable it. | ||
1420 | * | ||
1421 | * This does not protect us against NMI, but enable() | ||
1422 | * sets the enabled bit in the control field of event _before_ | ||
1423 | * accessing the event control register. If a NMI hits, then it will | ||
1424 | * keep the event running. | ||
1425 | */ | ||
1426 | void perf_event_task_sched_in(struct task_struct *task) | ||
1427 | { | ||
1428 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1429 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1430 | 1400 | ||
1431 | if (likely(!ctx)) | 1401 | void perf_event_context_sched_in(struct perf_event_context *ctx) |
1432 | return; | 1402 | { |
1403 | struct perf_cpu_context *cpuctx; | ||
1433 | 1404 | ||
1405 | cpuctx = __get_cpu_context(ctx); | ||
1434 | if (cpuctx->task_ctx == ctx) | 1406 | if (cpuctx->task_ctx == ctx) |
1435 | return; | 1407 | return; |
1436 | 1408 | ||
1437 | perf_disable(); | 1409 | perf_pmu_disable(ctx->pmu); |
1438 | |||
1439 | /* | 1410 | /* |
1440 | * We want to keep the following priority order: | 1411 | * We want to keep the following priority order: |
1441 | * cpu pinned (that don't need to move), task pinned, | 1412 | * cpu pinned (that don't need to move), task pinned, |
@@ -1449,7 +1420,37 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1449 | 1420 | ||
1450 | cpuctx->task_ctx = ctx; | 1421 | cpuctx->task_ctx = ctx; |
1451 | 1422 | ||
1452 | perf_enable(); | 1423 | /* |
1424 | * Since these rotations are per-cpu, we need to ensure the | ||
1425 | * cpu-context we got scheduled on is actually rotating. | ||
1426 | */ | ||
1427 | perf_pmu_rotate_start(ctx->pmu); | ||
1428 | perf_pmu_enable(ctx->pmu); | ||
1429 | } | ||
1430 | |||
1431 | /* | ||
1432 | * Called from scheduler to add the events of the current task | ||
1433 | * with interrupts disabled. | ||
1434 | * | ||
1435 | * We restore the event value and then enable it. | ||
1436 | * | ||
1437 | * This does not protect us against NMI, but enable() | ||
1438 | * sets the enabled bit in the control field of event _before_ | ||
1439 | * accessing the event control register. If a NMI hits, then it will | ||
1440 | * keep the event running. | ||
1441 | */ | ||
1442 | void perf_event_task_sched_in(struct task_struct *task) | ||
1443 | { | ||
1444 | struct perf_event_context *ctx; | ||
1445 | int ctxn; | ||
1446 | |||
1447 | for_each_task_context_nr(ctxn) { | ||
1448 | ctx = task->perf_event_ctxp[ctxn]; | ||
1449 | if (likely(!ctx)) | ||
1450 | continue; | ||
1451 | |||
1452 | perf_event_context_sched_in(ctx); | ||
1453 | } | ||
1453 | } | 1454 | } |
1454 | 1455 | ||
1455 | #define MAX_INTERRUPTS (~0ULL) | 1456 | #define MAX_INTERRUPTS (~0ULL) |
@@ -1529,22 +1530,6 @@ do { \ | |||
1529 | return div64_u64(dividend, divisor); | 1530 | return div64_u64(dividend, divisor); |
1530 | } | 1531 | } |
1531 | 1532 | ||
1532 | static void perf_event_stop(struct perf_event *event) | ||
1533 | { | ||
1534 | if (!event->pmu->stop) | ||
1535 | return event->pmu->disable(event); | ||
1536 | |||
1537 | return event->pmu->stop(event); | ||
1538 | } | ||
1539 | |||
1540 | static int perf_event_start(struct perf_event *event) | ||
1541 | { | ||
1542 | if (!event->pmu->start) | ||
1543 | return event->pmu->enable(event); | ||
1544 | |||
1545 | return event->pmu->start(event); | ||
1546 | } | ||
1547 | |||
1548 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | 1533 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) |
1549 | { | 1534 | { |
1550 | struct hw_perf_event *hwc = &event->hw; | 1535 | struct hw_perf_event *hwc = &event->hw; |
@@ -1564,15 +1549,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | |||
1564 | hwc->sample_period = sample_period; | 1549 | hwc->sample_period = sample_period; |
1565 | 1550 | ||
1566 | if (local64_read(&hwc->period_left) > 8*sample_period) { | 1551 | if (local64_read(&hwc->period_left) > 8*sample_period) { |
1567 | perf_disable(); | 1552 | event->pmu->stop(event, PERF_EF_UPDATE); |
1568 | perf_event_stop(event); | ||
1569 | local64_set(&hwc->period_left, 0); | 1553 | local64_set(&hwc->period_left, 0); |
1570 | perf_event_start(event); | 1554 | event->pmu->start(event, PERF_EF_RELOAD); |
1571 | perf_enable(); | ||
1572 | } | 1555 | } |
1573 | } | 1556 | } |
1574 | 1557 | ||
1575 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1558 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) |
1576 | { | 1559 | { |
1577 | struct perf_event *event; | 1560 | struct perf_event *event; |
1578 | struct hw_perf_event *hwc; | 1561 | struct hw_perf_event *hwc; |
@@ -1597,23 +1580,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1597 | */ | 1580 | */ |
1598 | if (interrupts == MAX_INTERRUPTS) { | 1581 | if (interrupts == MAX_INTERRUPTS) { |
1599 | perf_log_throttle(event, 1); | 1582 | perf_log_throttle(event, 1); |
1600 | perf_disable(); | 1583 | event->pmu->start(event, 0); |
1601 | event->pmu->unthrottle(event); | ||
1602 | perf_enable(); | ||
1603 | } | 1584 | } |
1604 | 1585 | ||
1605 | if (!event->attr.freq || !event->attr.sample_freq) | 1586 | if (!event->attr.freq || !event->attr.sample_freq) |
1606 | continue; | 1587 | continue; |
1607 | 1588 | ||
1608 | perf_disable(); | ||
1609 | event->pmu->read(event); | 1589 | event->pmu->read(event); |
1610 | now = local64_read(&event->count); | 1590 | now = local64_read(&event->count); |
1611 | delta = now - hwc->freq_count_stamp; | 1591 | delta = now - hwc->freq_count_stamp; |
1612 | hwc->freq_count_stamp = now; | 1592 | hwc->freq_count_stamp = now; |
1613 | 1593 | ||
1614 | if (delta > 0) | 1594 | if (delta > 0) |
1615 | perf_adjust_period(event, TICK_NSEC, delta); | 1595 | perf_adjust_period(event, period, delta); |
1616 | perf_enable(); | ||
1617 | } | 1596 | } |
1618 | raw_spin_unlock(&ctx->lock); | 1597 | raw_spin_unlock(&ctx->lock); |
1619 | } | 1598 | } |
@@ -1631,32 +1610,38 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1631 | raw_spin_unlock(&ctx->lock); | 1610 | raw_spin_unlock(&ctx->lock); |
1632 | } | 1611 | } |
1633 | 1612 | ||
1634 | void perf_event_task_tick(struct task_struct *curr) | 1613 | /* |
1614 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
1615 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
1616 | * disabled, while rotate_context is called from IRQ context. | ||
1617 | */ | ||
1618 | static void perf_rotate_context(struct perf_cpu_context *cpuctx) | ||
1635 | { | 1619 | { |
1636 | struct perf_cpu_context *cpuctx; | 1620 | u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; |
1637 | struct perf_event_context *ctx; | 1621 | struct perf_event_context *ctx = NULL; |
1638 | int rotate = 0; | 1622 | int rotate = 0, remove = 1; |
1639 | |||
1640 | if (!atomic_read(&nr_events)) | ||
1641 | return; | ||
1642 | 1623 | ||
1643 | cpuctx = &__get_cpu_var(perf_cpu_context); | 1624 | if (cpuctx->ctx.nr_events) { |
1644 | if (cpuctx->ctx.nr_events && | 1625 | remove = 0; |
1645 | cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) | 1626 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) |
1646 | rotate = 1; | 1627 | rotate = 1; |
1628 | } | ||
1647 | 1629 | ||
1648 | ctx = curr->perf_event_ctxp; | 1630 | ctx = cpuctx->task_ctx; |
1649 | if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) | 1631 | if (ctx && ctx->nr_events) { |
1650 | rotate = 1; | 1632 | remove = 0; |
1633 | if (ctx->nr_events != ctx->nr_active) | ||
1634 | rotate = 1; | ||
1635 | } | ||
1651 | 1636 | ||
1652 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1637 | perf_pmu_disable(cpuctx->ctx.pmu); |
1638 | perf_ctx_adjust_freq(&cpuctx->ctx, interval); | ||
1653 | if (ctx) | 1639 | if (ctx) |
1654 | perf_ctx_adjust_freq(ctx); | 1640 | perf_ctx_adjust_freq(ctx, interval); |
1655 | 1641 | ||
1656 | if (!rotate) | 1642 | if (!rotate) |
1657 | return; | 1643 | goto done; |
1658 | 1644 | ||
1659 | perf_disable(); | ||
1660 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | 1645 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
1661 | if (ctx) | 1646 | if (ctx) |
1662 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); | 1647 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); |
@@ -1667,8 +1652,27 @@ void perf_event_task_tick(struct task_struct *curr) | |||
1667 | 1652 | ||
1668 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | 1653 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
1669 | if (ctx) | 1654 | if (ctx) |
1670 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); | 1655 | task_ctx_sched_in(ctx, EVENT_FLEXIBLE); |
1671 | perf_enable(); | 1656 | |
1657 | done: | ||
1658 | if (remove) | ||
1659 | list_del_init(&cpuctx->rotation_list); | ||
1660 | |||
1661 | perf_pmu_enable(cpuctx->ctx.pmu); | ||
1662 | } | ||
1663 | |||
1664 | void perf_event_task_tick(void) | ||
1665 | { | ||
1666 | struct list_head *head = &__get_cpu_var(rotation_list); | ||
1667 | struct perf_cpu_context *cpuctx, *tmp; | ||
1668 | |||
1669 | WARN_ON(!irqs_disabled()); | ||
1670 | |||
1671 | list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { | ||
1672 | if (cpuctx->jiffies_interval == 1 || | ||
1673 | !(jiffies % cpuctx->jiffies_interval)) | ||
1674 | perf_rotate_context(cpuctx); | ||
1675 | } | ||
1672 | } | 1676 | } |
1673 | 1677 | ||
1674 | static int event_enable_on_exec(struct perf_event *event, | 1678 | static int event_enable_on_exec(struct perf_event *event, |
@@ -1690,20 +1694,18 @@ static int event_enable_on_exec(struct perf_event *event, | |||
1690 | * Enable all of a task's events that have been marked enable-on-exec. | 1694 | * Enable all of a task's events that have been marked enable-on-exec. |
1691 | * This expects task == current. | 1695 | * This expects task == current. |
1692 | */ | 1696 | */ |
1693 | static void perf_event_enable_on_exec(struct task_struct *task) | 1697 | static void perf_event_enable_on_exec(struct perf_event_context *ctx) |
1694 | { | 1698 | { |
1695 | struct perf_event_context *ctx; | ||
1696 | struct perf_event *event; | 1699 | struct perf_event *event; |
1697 | unsigned long flags; | 1700 | unsigned long flags; |
1698 | int enabled = 0; | 1701 | int enabled = 0; |
1699 | int ret; | 1702 | int ret; |
1700 | 1703 | ||
1701 | local_irq_save(flags); | 1704 | local_irq_save(flags); |
1702 | ctx = task->perf_event_ctxp; | ||
1703 | if (!ctx || !ctx->nr_events) | 1705 | if (!ctx || !ctx->nr_events) |
1704 | goto out; | 1706 | goto out; |
1705 | 1707 | ||
1706 | __perf_event_task_sched_out(ctx); | 1708 | task_ctx_sched_out(ctx, EVENT_ALL); |
1707 | 1709 | ||
1708 | raw_spin_lock(&ctx->lock); | 1710 | raw_spin_lock(&ctx->lock); |
1709 | 1711 | ||
@@ -1727,8 +1729,8 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1727 | 1729 | ||
1728 | raw_spin_unlock(&ctx->lock); | 1730 | raw_spin_unlock(&ctx->lock); |
1729 | 1731 | ||
1730 | perf_event_task_sched_in(task); | 1732 | perf_event_context_sched_in(ctx); |
1731 | out: | 1733 | out: |
1732 | local_irq_restore(flags); | 1734 | local_irq_restore(flags); |
1733 | } | 1735 | } |
1734 | 1736 | ||
@@ -1737,9 +1739,9 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1737 | */ | 1739 | */ |
1738 | static void __perf_event_read(void *info) | 1740 | static void __perf_event_read(void *info) |
1739 | { | 1741 | { |
1740 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1741 | struct perf_event *event = info; | 1742 | struct perf_event *event = info; |
1742 | struct perf_event_context *ctx = event->ctx; | 1743 | struct perf_event_context *ctx = event->ctx; |
1744 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | ||
1743 | 1745 | ||
1744 | /* | 1746 | /* |
1745 | * If this is a task context, we need to check whether it is | 1747 | * If this is a task context, we need to check whether it is |
@@ -1787,11 +1789,219 @@ static u64 perf_event_read(struct perf_event *event) | |||
1787 | } | 1789 | } |
1788 | 1790 | ||
1789 | /* | 1791 | /* |
1790 | * Initialize the perf_event context in a task_struct: | 1792 | * Callchain support |
1791 | */ | 1793 | */ |
1794 | |||
1795 | struct callchain_cpus_entries { | ||
1796 | struct rcu_head rcu_head; | ||
1797 | struct perf_callchain_entry *cpu_entries[0]; | ||
1798 | }; | ||
1799 | |||
1800 | static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); | ||
1801 | static atomic_t nr_callchain_events; | ||
1802 | static DEFINE_MUTEX(callchain_mutex); | ||
1803 | struct callchain_cpus_entries *callchain_cpus_entries; | ||
1804 | |||
1805 | |||
1806 | __weak void perf_callchain_kernel(struct perf_callchain_entry *entry, | ||
1807 | struct pt_regs *regs) | ||
1808 | { | ||
1809 | } | ||
1810 | |||
1811 | __weak void perf_callchain_user(struct perf_callchain_entry *entry, | ||
1812 | struct pt_regs *regs) | ||
1813 | { | ||
1814 | } | ||
1815 | |||
1816 | static void release_callchain_buffers_rcu(struct rcu_head *head) | ||
1817 | { | ||
1818 | struct callchain_cpus_entries *entries; | ||
1819 | int cpu; | ||
1820 | |||
1821 | entries = container_of(head, struct callchain_cpus_entries, rcu_head); | ||
1822 | |||
1823 | for_each_possible_cpu(cpu) | ||
1824 | kfree(entries->cpu_entries[cpu]); | ||
1825 | |||
1826 | kfree(entries); | ||
1827 | } | ||
1828 | |||
1829 | static void release_callchain_buffers(void) | ||
1830 | { | ||
1831 | struct callchain_cpus_entries *entries; | ||
1832 | |||
1833 | entries = callchain_cpus_entries; | ||
1834 | rcu_assign_pointer(callchain_cpus_entries, NULL); | ||
1835 | call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); | ||
1836 | } | ||
1837 | |||
1838 | static int alloc_callchain_buffers(void) | ||
1839 | { | ||
1840 | int cpu; | ||
1841 | int size; | ||
1842 | struct callchain_cpus_entries *entries; | ||
1843 | |||
1844 | /* | ||
1845 | * We can't use the percpu allocation API for data that can be | ||
1846 | * accessed from NMI. Use a temporary manual per cpu allocation | ||
1847 | * until that gets sorted out. | ||
1848 | */ | ||
1849 | size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * | ||
1850 | num_possible_cpus(); | ||
1851 | |||
1852 | entries = kzalloc(size, GFP_KERNEL); | ||
1853 | if (!entries) | ||
1854 | return -ENOMEM; | ||
1855 | |||
1856 | size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; | ||
1857 | |||
1858 | for_each_possible_cpu(cpu) { | ||
1859 | entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, | ||
1860 | cpu_to_node(cpu)); | ||
1861 | if (!entries->cpu_entries[cpu]) | ||
1862 | goto fail; | ||
1863 | } | ||
1864 | |||
1865 | rcu_assign_pointer(callchain_cpus_entries, entries); | ||
1866 | |||
1867 | return 0; | ||
1868 | |||
1869 | fail: | ||
1870 | for_each_possible_cpu(cpu) | ||
1871 | kfree(entries->cpu_entries[cpu]); | ||
1872 | kfree(entries); | ||
1873 | |||
1874 | return -ENOMEM; | ||
1875 | } | ||
1876 | |||
1877 | static int get_callchain_buffers(void) | ||
1878 | { | ||
1879 | int err = 0; | ||
1880 | int count; | ||
1881 | |||
1882 | mutex_lock(&callchain_mutex); | ||
1883 | |||
1884 | count = atomic_inc_return(&nr_callchain_events); | ||
1885 | if (WARN_ON_ONCE(count < 1)) { | ||
1886 | err = -EINVAL; | ||
1887 | goto exit; | ||
1888 | } | ||
1889 | |||
1890 | if (count > 1) { | ||
1891 | /* If the allocation failed, give up */ | ||
1892 | if (!callchain_cpus_entries) | ||
1893 | err = -ENOMEM; | ||
1894 | goto exit; | ||
1895 | } | ||
1896 | |||
1897 | err = alloc_callchain_buffers(); | ||
1898 | if (err) | ||
1899 | release_callchain_buffers(); | ||
1900 | exit: | ||
1901 | mutex_unlock(&callchain_mutex); | ||
1902 | |||
1903 | return err; | ||
1904 | } | ||
1905 | |||
1906 | static void put_callchain_buffers(void) | ||
1907 | { | ||
1908 | if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { | ||
1909 | release_callchain_buffers(); | ||
1910 | mutex_unlock(&callchain_mutex); | ||
1911 | } | ||
1912 | } | ||
1913 | |||
1914 | static int get_recursion_context(int *recursion) | ||
1915 | { | ||
1916 | int rctx; | ||
1917 | |||
1918 | if (in_nmi()) | ||
1919 | rctx = 3; | ||
1920 | else if (in_irq()) | ||
1921 | rctx = 2; | ||
1922 | else if (in_softirq()) | ||
1923 | rctx = 1; | ||
1924 | else | ||
1925 | rctx = 0; | ||
1926 | |||
1927 | if (recursion[rctx]) | ||
1928 | return -1; | ||
1929 | |||
1930 | recursion[rctx]++; | ||
1931 | barrier(); | ||
1932 | |||
1933 | return rctx; | ||
1934 | } | ||
1935 | |||
1936 | static inline void put_recursion_context(int *recursion, int rctx) | ||
1937 | { | ||
1938 | barrier(); | ||
1939 | recursion[rctx]--; | ||
1940 | } | ||
1941 | |||
1942 | static struct perf_callchain_entry *get_callchain_entry(int *rctx) | ||
1943 | { | ||
1944 | int cpu; | ||
1945 | struct callchain_cpus_entries *entries; | ||
1946 | |||
1947 | *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); | ||
1948 | if (*rctx == -1) | ||
1949 | return NULL; | ||
1950 | |||
1951 | entries = rcu_dereference(callchain_cpus_entries); | ||
1952 | if (!entries) | ||
1953 | return NULL; | ||
1954 | |||
1955 | cpu = smp_processor_id(); | ||
1956 | |||
1957 | return &entries->cpu_entries[cpu][*rctx]; | ||
1958 | } | ||
1959 | |||
1792 | static void | 1960 | static void |
1793 | __perf_event_init_context(struct perf_event_context *ctx, | 1961 | put_callchain_entry(int rctx) |
1794 | struct task_struct *task) | 1962 | { |
1963 | put_recursion_context(__get_cpu_var(callchain_recursion), rctx); | ||
1964 | } | ||
1965 | |||
1966 | static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1967 | { | ||
1968 | int rctx; | ||
1969 | struct perf_callchain_entry *entry; | ||
1970 | |||
1971 | |||
1972 | entry = get_callchain_entry(&rctx); | ||
1973 | if (rctx == -1) | ||
1974 | return NULL; | ||
1975 | |||
1976 | if (!entry) | ||
1977 | goto exit_put; | ||
1978 | |||
1979 | entry->nr = 0; | ||
1980 | |||
1981 | if (!user_mode(regs)) { | ||
1982 | perf_callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
1983 | perf_callchain_kernel(entry, regs); | ||
1984 | if (current->mm) | ||
1985 | regs = task_pt_regs(current); | ||
1986 | else | ||
1987 | regs = NULL; | ||
1988 | } | ||
1989 | |||
1990 | if (regs) { | ||
1991 | perf_callchain_store(entry, PERF_CONTEXT_USER); | ||
1992 | perf_callchain_user(entry, regs); | ||
1993 | } | ||
1994 | |||
1995 | exit_put: | ||
1996 | put_callchain_entry(rctx); | ||
1997 | |||
1998 | return entry; | ||
1999 | } | ||
2000 | |||
2001 | /* | ||
2002 | * Initialize the perf_event context in a task_struct: | ||
2003 | */ | ||
2004 | static void __perf_event_init_context(struct perf_event_context *ctx) | ||
1795 | { | 2005 | { |
1796 | raw_spin_lock_init(&ctx->lock); | 2006 | raw_spin_lock_init(&ctx->lock); |
1797 | mutex_init(&ctx->mutex); | 2007 | mutex_init(&ctx->mutex); |
@@ -1799,45 +2009,38 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
1799 | INIT_LIST_HEAD(&ctx->flexible_groups); | 2009 | INIT_LIST_HEAD(&ctx->flexible_groups); |
1800 | INIT_LIST_HEAD(&ctx->event_list); | 2010 | INIT_LIST_HEAD(&ctx->event_list); |
1801 | atomic_set(&ctx->refcount, 1); | 2011 | atomic_set(&ctx->refcount, 1); |
1802 | ctx->task = task; | ||
1803 | } | 2012 | } |
1804 | 2013 | ||
1805 | static struct perf_event_context *find_get_context(pid_t pid, int cpu) | 2014 | static struct perf_event_context * |
2015 | alloc_perf_context(struct pmu *pmu, struct task_struct *task) | ||
1806 | { | 2016 | { |
1807 | struct perf_event_context *ctx; | 2017 | struct perf_event_context *ctx; |
1808 | struct perf_cpu_context *cpuctx; | ||
1809 | struct task_struct *task; | ||
1810 | unsigned long flags; | ||
1811 | int err; | ||
1812 | |||
1813 | if (pid == -1 && cpu != -1) { | ||
1814 | /* Must be root to operate on a CPU event: */ | ||
1815 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
1816 | return ERR_PTR(-EACCES); | ||
1817 | 2018 | ||
1818 | if (cpu < 0 || cpu >= nr_cpumask_bits) | 2019 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); |
1819 | return ERR_PTR(-EINVAL); | 2020 | if (!ctx) |
2021 | return NULL; | ||
1820 | 2022 | ||
1821 | /* | 2023 | __perf_event_init_context(ctx); |
1822 | * We could be clever and allow to attach a event to an | 2024 | if (task) { |
1823 | * offline CPU and activate it when the CPU comes up, but | 2025 | ctx->task = task; |
1824 | * that's for later. | 2026 | get_task_struct(task); |
1825 | */ | 2027 | } |
1826 | if (!cpu_online(cpu)) | 2028 | ctx->pmu = pmu; |
1827 | return ERR_PTR(-ENODEV); | ||
1828 | 2029 | ||
1829 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 2030 | return ctx; |
1830 | ctx = &cpuctx->ctx; | 2031 | } |
1831 | get_ctx(ctx); | ||
1832 | 2032 | ||
1833 | return ctx; | 2033 | static struct task_struct * |
1834 | } | 2034 | find_lively_task_by_vpid(pid_t vpid) |
2035 | { | ||
2036 | struct task_struct *task; | ||
2037 | int err; | ||
1835 | 2038 | ||
1836 | rcu_read_lock(); | 2039 | rcu_read_lock(); |
1837 | if (!pid) | 2040 | if (!vpid) |
1838 | task = current; | 2041 | task = current; |
1839 | else | 2042 | else |
1840 | task = find_task_by_vpid(pid); | 2043 | task = find_task_by_vpid(vpid); |
1841 | if (task) | 2044 | if (task) |
1842 | get_task_struct(task); | 2045 | get_task_struct(task); |
1843 | rcu_read_unlock(); | 2046 | rcu_read_unlock(); |
@@ -1857,35 +2060,79 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) | |||
1857 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2060 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
1858 | goto errout; | 2061 | goto errout; |
1859 | 2062 | ||
1860 | retry: | 2063 | return task; |
1861 | ctx = perf_lock_task_context(task, &flags); | 2064 | errout: |
2065 | put_task_struct(task); | ||
2066 | return ERR_PTR(err); | ||
2067 | |||
2068 | } | ||
2069 | |||
2070 | static struct perf_event_context * | ||
2071 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | ||
2072 | { | ||
2073 | struct perf_event_context *ctx; | ||
2074 | struct perf_cpu_context *cpuctx; | ||
2075 | unsigned long flags; | ||
2076 | int ctxn, err; | ||
2077 | |||
2078 | if (!task && cpu != -1) { | ||
2079 | /* Must be root to operate on a CPU event: */ | ||
2080 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
2081 | return ERR_PTR(-EACCES); | ||
2082 | |||
2083 | if (cpu < 0 || cpu >= nr_cpumask_bits) | ||
2084 | return ERR_PTR(-EINVAL); | ||
2085 | |||
2086 | /* | ||
2087 | * We could be clever and allow to attach a event to an | ||
2088 | * offline CPU and activate it when the CPU comes up, but | ||
2089 | * that's for later. | ||
2090 | */ | ||
2091 | if (!cpu_online(cpu)) | ||
2092 | return ERR_PTR(-ENODEV); | ||
2093 | |||
2094 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
2095 | ctx = &cpuctx->ctx; | ||
2096 | get_ctx(ctx); | ||
2097 | |||
2098 | return ctx; | ||
2099 | } | ||
2100 | |||
2101 | err = -EINVAL; | ||
2102 | ctxn = pmu->task_ctx_nr; | ||
2103 | if (ctxn < 0) | ||
2104 | goto errout; | ||
2105 | |||
2106 | retry: | ||
2107 | ctx = perf_lock_task_context(task, ctxn, &flags); | ||
1862 | if (ctx) { | 2108 | if (ctx) { |
1863 | unclone_ctx(ctx); | 2109 | unclone_ctx(ctx); |
1864 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2110 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
1865 | } | 2111 | } |
1866 | 2112 | ||
1867 | if (!ctx) { | 2113 | if (!ctx) { |
1868 | ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); | 2114 | ctx = alloc_perf_context(pmu, task); |
1869 | err = -ENOMEM; | 2115 | err = -ENOMEM; |
1870 | if (!ctx) | 2116 | if (!ctx) |
1871 | goto errout; | 2117 | goto errout; |
1872 | __perf_event_init_context(ctx, task); | 2118 | |
1873 | get_ctx(ctx); | 2119 | get_ctx(ctx); |
1874 | if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { | 2120 | |
2121 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { | ||
1875 | /* | 2122 | /* |
1876 | * We raced with some other task; use | 2123 | * We raced with some other task; use |
1877 | * the context they set. | 2124 | * the context they set. |
1878 | */ | 2125 | */ |
2126 | put_task_struct(task); | ||
1879 | kfree(ctx); | 2127 | kfree(ctx); |
1880 | goto retry; | 2128 | goto retry; |
1881 | } | 2129 | } |
1882 | get_task_struct(task); | ||
1883 | } | 2130 | } |
1884 | 2131 | ||
1885 | put_task_struct(task); | 2132 | put_task_struct(task); |
1886 | return ctx; | 2133 | return ctx; |
1887 | 2134 | ||
1888 | errout: | 2135 | errout: |
1889 | put_task_struct(task); | 2136 | put_task_struct(task); |
1890 | return ERR_PTR(err); | 2137 | return ERR_PTR(err); |
1891 | } | 2138 | } |
@@ -1918,6 +2165,8 @@ static void free_event(struct perf_event *event) | |||
1918 | atomic_dec(&nr_comm_events); | 2165 | atomic_dec(&nr_comm_events); |
1919 | if (event->attr.task) | 2166 | if (event->attr.task) |
1920 | atomic_dec(&nr_task_events); | 2167 | atomic_dec(&nr_task_events); |
2168 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) | ||
2169 | put_callchain_buffers(); | ||
1921 | } | 2170 | } |
1922 | 2171 | ||
1923 | if (event->buffer) { | 2172 | if (event->buffer) { |
@@ -1928,7 +2177,9 @@ static void free_event(struct perf_event *event) | |||
1928 | if (event->destroy) | 2177 | if (event->destroy) |
1929 | event->destroy(event); | 2178 | event->destroy(event); |
1930 | 2179 | ||
1931 | put_ctx(event->ctx); | 2180 | if (event->ctx) |
2181 | put_ctx(event->ctx); | ||
2182 | |||
1932 | call_rcu(&event->rcu_head, free_event_rcu); | 2183 | call_rcu(&event->rcu_head, free_event_rcu); |
1933 | } | 2184 | } |
1934 | 2185 | ||
@@ -2349,6 +2600,9 @@ int perf_event_task_disable(void) | |||
2349 | 2600 | ||
2350 | static int perf_event_index(struct perf_event *event) | 2601 | static int perf_event_index(struct perf_event *event) |
2351 | { | 2602 | { |
2603 | if (event->hw.state & PERF_HES_STOPPED) | ||
2604 | return 0; | ||
2605 | |||
2352 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 2606 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
2353 | return 0; | 2607 | return 0; |
2354 | 2608 | ||
@@ -2961,16 +3215,6 @@ void perf_event_do_pending(void) | |||
2961 | } | 3215 | } |
2962 | 3216 | ||
2963 | /* | 3217 | /* |
2964 | * Callchain support -- arch specific | ||
2965 | */ | ||
2966 | |||
2967 | __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
2968 | { | ||
2969 | return NULL; | ||
2970 | } | ||
2971 | |||
2972 | |||
2973 | /* | ||
2974 | * We assume there is only KVM supporting the callbacks. | 3218 | * We assume there is only KVM supporting the callbacks. |
2975 | * Later on, we might change it to a list if there is | 3219 | * Later on, we might change it to a list if there is |
2976 | * another virtualization implementation supporting the callbacks. | 3220 | * another virtualization implementation supporting the callbacks. |
@@ -3076,7 +3320,7 @@ again: | |||
3076 | if (handle->wakeup != local_read(&buffer->wakeup)) | 3320 | if (handle->wakeup != local_read(&buffer->wakeup)) |
3077 | perf_output_wakeup(handle); | 3321 | perf_output_wakeup(handle); |
3078 | 3322 | ||
3079 | out: | 3323 | out: |
3080 | preempt_enable(); | 3324 | preempt_enable(); |
3081 | } | 3325 | } |
3082 | 3326 | ||
@@ -3464,14 +3708,20 @@ static void perf_event_output(struct perf_event *event, int nmi, | |||
3464 | struct perf_output_handle handle; | 3708 | struct perf_output_handle handle; |
3465 | struct perf_event_header header; | 3709 | struct perf_event_header header; |
3466 | 3710 | ||
3711 | /* protect the callchain buffers */ | ||
3712 | rcu_read_lock(); | ||
3713 | |||
3467 | perf_prepare_sample(&header, data, event, regs); | 3714 | perf_prepare_sample(&header, data, event, regs); |
3468 | 3715 | ||
3469 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) | 3716 | if (perf_output_begin(&handle, event, header.size, nmi, 1)) |
3470 | return; | 3717 | goto exit; |
3471 | 3718 | ||
3472 | perf_output_sample(&handle, &header, data, event); | 3719 | perf_output_sample(&handle, &header, data, event); |
3473 | 3720 | ||
3474 | perf_output_end(&handle); | 3721 | perf_output_end(&handle); |
3722 | |||
3723 | exit: | ||
3724 | rcu_read_unlock(); | ||
3475 | } | 3725 | } |
3476 | 3726 | ||
3477 | /* | 3727 | /* |
@@ -3585,16 +3835,27 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, | |||
3585 | static void perf_event_task_event(struct perf_task_event *task_event) | 3835 | static void perf_event_task_event(struct perf_task_event *task_event) |
3586 | { | 3836 | { |
3587 | struct perf_cpu_context *cpuctx; | 3837 | struct perf_cpu_context *cpuctx; |
3588 | struct perf_event_context *ctx = task_event->task_ctx; | 3838 | struct perf_event_context *ctx; |
3839 | struct pmu *pmu; | ||
3840 | int ctxn; | ||
3589 | 3841 | ||
3590 | rcu_read_lock(); | 3842 | rcu_read_lock(); |
3591 | cpuctx = &get_cpu_var(perf_cpu_context); | 3843 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3592 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3844 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3593 | if (!ctx) | 3845 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3594 | ctx = rcu_dereference(current->perf_event_ctxp); | 3846 | |
3595 | if (ctx) | 3847 | ctx = task_event->task_ctx; |
3596 | perf_event_task_ctx(ctx, task_event); | 3848 | if (!ctx) { |
3597 | put_cpu_var(perf_cpu_context); | 3849 | ctxn = pmu->task_ctx_nr; |
3850 | if (ctxn < 0) | ||
3851 | goto next; | ||
3852 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
3853 | } | ||
3854 | if (ctx) | ||
3855 | perf_event_task_ctx(ctx, task_event); | ||
3856 | next: | ||
3857 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
3858 | } | ||
3598 | rcu_read_unlock(); | 3859 | rcu_read_unlock(); |
3599 | } | 3860 | } |
3600 | 3861 | ||
@@ -3699,8 +3960,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3699 | { | 3960 | { |
3700 | struct perf_cpu_context *cpuctx; | 3961 | struct perf_cpu_context *cpuctx; |
3701 | struct perf_event_context *ctx; | 3962 | struct perf_event_context *ctx; |
3702 | unsigned int size; | ||
3703 | char comm[TASK_COMM_LEN]; | 3963 | char comm[TASK_COMM_LEN]; |
3964 | unsigned int size; | ||
3965 | struct pmu *pmu; | ||
3966 | int ctxn; | ||
3704 | 3967 | ||
3705 | memset(comm, 0, sizeof(comm)); | 3968 | memset(comm, 0, sizeof(comm)); |
3706 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); | 3969 | strlcpy(comm, comm_event->task->comm, sizeof(comm)); |
@@ -3712,21 +3975,36 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3712 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; | 3975 | comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; |
3713 | 3976 | ||
3714 | rcu_read_lock(); | 3977 | rcu_read_lock(); |
3715 | cpuctx = &get_cpu_var(perf_cpu_context); | 3978 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3716 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3979 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3717 | ctx = rcu_dereference(current->perf_event_ctxp); | 3980 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3718 | if (ctx) | 3981 | |
3719 | perf_event_comm_ctx(ctx, comm_event); | 3982 | ctxn = pmu->task_ctx_nr; |
3720 | put_cpu_var(perf_cpu_context); | 3983 | if (ctxn < 0) |
3984 | goto next; | ||
3985 | |||
3986 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
3987 | if (ctx) | ||
3988 | perf_event_comm_ctx(ctx, comm_event); | ||
3989 | next: | ||
3990 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
3991 | } | ||
3721 | rcu_read_unlock(); | 3992 | rcu_read_unlock(); |
3722 | } | 3993 | } |
3723 | 3994 | ||
3724 | void perf_event_comm(struct task_struct *task) | 3995 | void perf_event_comm(struct task_struct *task) |
3725 | { | 3996 | { |
3726 | struct perf_comm_event comm_event; | 3997 | struct perf_comm_event comm_event; |
3998 | struct perf_event_context *ctx; | ||
3999 | int ctxn; | ||
4000 | |||
4001 | for_each_task_context_nr(ctxn) { | ||
4002 | ctx = task->perf_event_ctxp[ctxn]; | ||
4003 | if (!ctx) | ||
4004 | continue; | ||
3727 | 4005 | ||
3728 | if (task->perf_event_ctxp) | 4006 | perf_event_enable_on_exec(ctx); |
3729 | perf_event_enable_on_exec(task); | 4007 | } |
3730 | 4008 | ||
3731 | if (!atomic_read(&nr_comm_events)) | 4009 | if (!atomic_read(&nr_comm_events)) |
3732 | return; | 4010 | return; |
@@ -3828,6 +4106,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) | |||
3828 | char tmp[16]; | 4106 | char tmp[16]; |
3829 | char *buf = NULL; | 4107 | char *buf = NULL; |
3830 | const char *name; | 4108 | const char *name; |
4109 | struct pmu *pmu; | ||
4110 | int ctxn; | ||
3831 | 4111 | ||
3832 | memset(tmp, 0, sizeof(tmp)); | 4112 | memset(tmp, 0, sizeof(tmp)); |
3833 | 4113 | ||
@@ -3880,12 +4160,23 @@ got_name: | |||
3880 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; | 4160 | mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; |
3881 | 4161 | ||
3882 | rcu_read_lock(); | 4162 | rcu_read_lock(); |
3883 | cpuctx = &get_cpu_var(perf_cpu_context); | 4163 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3884 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); | 4164 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3885 | ctx = rcu_dereference(current->perf_event_ctxp); | 4165 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
3886 | if (ctx) | 4166 | vma->vm_flags & VM_EXEC); |
3887 | perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); | 4167 | |
3888 | put_cpu_var(perf_cpu_context); | 4168 | ctxn = pmu->task_ctx_nr; |
4169 | if (ctxn < 0) | ||
4170 | goto next; | ||
4171 | |||
4172 | ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); | ||
4173 | if (ctx) { | ||
4174 | perf_event_mmap_ctx(ctx, mmap_event, | ||
4175 | vma->vm_flags & VM_EXEC); | ||
4176 | } | ||
4177 | next: | ||
4178 | put_cpu_ptr(pmu->pmu_cpu_context); | ||
4179 | } | ||
3889 | rcu_read_unlock(); | 4180 | rcu_read_unlock(); |
3890 | 4181 | ||
3891 | kfree(buf); | 4182 | kfree(buf); |
@@ -3967,8 +4258,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3967 | struct hw_perf_event *hwc = &event->hw; | 4258 | struct hw_perf_event *hwc = &event->hw; |
3968 | int ret = 0; | 4259 | int ret = 0; |
3969 | 4260 | ||
3970 | throttle = (throttle && event->pmu->unthrottle != NULL); | ||
3971 | |||
3972 | if (!throttle) { | 4261 | if (!throttle) { |
3973 | hwc->interrupts++; | 4262 | hwc->interrupts++; |
3974 | } else { | 4263 | } else { |
@@ -4036,6 +4325,17 @@ int perf_event_overflow(struct perf_event *event, int nmi, | |||
4036 | * Generic software event infrastructure | 4325 | * Generic software event infrastructure |
4037 | */ | 4326 | */ |
4038 | 4327 | ||
4328 | struct swevent_htable { | ||
4329 | struct swevent_hlist *swevent_hlist; | ||
4330 | struct mutex hlist_mutex; | ||
4331 | int hlist_refcount; | ||
4332 | |||
4333 | /* Recursion avoidance in each contexts */ | ||
4334 | int recursion[PERF_NR_CONTEXTS]; | ||
4335 | }; | ||
4336 | |||
4337 | static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); | ||
4338 | |||
4039 | /* | 4339 | /* |
4040 | * We directly increment event->count and keep a second value in | 4340 | * We directly increment event->count and keep a second value in |
4041 | * event->hw.period_left to count intervals. This period event | 4341 | * event->hw.period_left to count intervals. This period event |
@@ -4093,7 +4393,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, | |||
4093 | } | 4393 | } |
4094 | } | 4394 | } |
4095 | 4395 | ||
4096 | static void perf_swevent_add(struct perf_event *event, u64 nr, | 4396 | static void perf_swevent_event(struct perf_event *event, u64 nr, |
4097 | int nmi, struct perf_sample_data *data, | 4397 | int nmi, struct perf_sample_data *data, |
4098 | struct pt_regs *regs) | 4398 | struct pt_regs *regs) |
4099 | { | 4399 | { |
@@ -4119,6 +4419,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
4119 | static int perf_exclude_event(struct perf_event *event, | 4419 | static int perf_exclude_event(struct perf_event *event, |
4120 | struct pt_regs *regs) | 4420 | struct pt_regs *regs) |
4121 | { | 4421 | { |
4422 | if (event->hw.state & PERF_HES_STOPPED) | ||
4423 | return 0; | ||
4424 | |||
4122 | if (regs) { | 4425 | if (regs) { |
4123 | if (event->attr.exclude_user && user_mode(regs)) | 4426 | if (event->attr.exclude_user && user_mode(regs)) |
4124 | return 1; | 4427 | return 1; |
@@ -4165,11 +4468,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id) | |||
4165 | 4468 | ||
4166 | /* For the read side: events when they trigger */ | 4469 | /* For the read side: events when they trigger */ |
4167 | static inline struct hlist_head * | 4470 | static inline struct hlist_head * |
4168 | find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) | 4471 | find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id) |
4169 | { | 4472 | { |
4170 | struct swevent_hlist *hlist; | 4473 | struct swevent_hlist *hlist; |
4171 | 4474 | ||
4172 | hlist = rcu_dereference(ctx->swevent_hlist); | 4475 | hlist = rcu_dereference(swhash->swevent_hlist); |
4173 | if (!hlist) | 4476 | if (!hlist) |
4174 | return NULL; | 4477 | return NULL; |
4175 | 4478 | ||
@@ -4178,7 +4481,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) | |||
4178 | 4481 | ||
4179 | /* For the event head insertion and removal in the hlist */ | 4482 | /* For the event head insertion and removal in the hlist */ |
4180 | static inline struct hlist_head * | 4483 | static inline struct hlist_head * |
4181 | find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) | 4484 | find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) |
4182 | { | 4485 | { |
4183 | struct swevent_hlist *hlist; | 4486 | struct swevent_hlist *hlist; |
4184 | u32 event_id = event->attr.config; | 4487 | u32 event_id = event->attr.config; |
@@ -4189,7 +4492,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) | |||
4189 | * and release. Which makes the protected version suitable here. | 4492 | * and release. Which makes the protected version suitable here. |
4190 | * The context lock guarantees that. | 4493 | * The context lock guarantees that. |
4191 | */ | 4494 | */ |
4192 | hlist = rcu_dereference_protected(ctx->swevent_hlist, | 4495 | hlist = rcu_dereference_protected(swhash->swevent_hlist, |
4193 | lockdep_is_held(&event->ctx->lock)); | 4496 | lockdep_is_held(&event->ctx->lock)); |
4194 | if (!hlist) | 4497 | if (!hlist) |
4195 | return NULL; | 4498 | return NULL; |
@@ -4202,23 +4505,19 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
4202 | struct perf_sample_data *data, | 4505 | struct perf_sample_data *data, |
4203 | struct pt_regs *regs) | 4506 | struct pt_regs *regs) |
4204 | { | 4507 | { |
4205 | struct perf_cpu_context *cpuctx; | 4508 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4206 | struct perf_event *event; | 4509 | struct perf_event *event; |
4207 | struct hlist_node *node; | 4510 | struct hlist_node *node; |
4208 | struct hlist_head *head; | 4511 | struct hlist_head *head; |
4209 | 4512 | ||
4210 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
4211 | |||
4212 | rcu_read_lock(); | 4513 | rcu_read_lock(); |
4213 | 4514 | head = find_swevent_head_rcu(swhash, type, event_id); | |
4214 | head = find_swevent_head_rcu(cpuctx, type, event_id); | ||
4215 | |||
4216 | if (!head) | 4515 | if (!head) |
4217 | goto end; | 4516 | goto end; |
4218 | 4517 | ||
4219 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4518 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
4220 | if (perf_swevent_match(event, type, event_id, data, regs)) | 4519 | if (perf_swevent_match(event, type, event_id, data, regs)) |
4221 | perf_swevent_add(event, nr, nmi, data, regs); | 4520 | perf_swevent_event(event, nr, nmi, data, regs); |
4222 | } | 4521 | } |
4223 | end: | 4522 | end: |
4224 | rcu_read_unlock(); | 4523 | rcu_read_unlock(); |
@@ -4226,33 +4525,17 @@ end: | |||
4226 | 4525 | ||
4227 | int perf_swevent_get_recursion_context(void) | 4526 | int perf_swevent_get_recursion_context(void) |
4228 | { | 4527 | { |
4229 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4528 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4230 | int rctx; | ||
4231 | 4529 | ||
4232 | if (in_nmi()) | 4530 | return get_recursion_context(swhash->recursion); |
4233 | rctx = 3; | ||
4234 | else if (in_irq()) | ||
4235 | rctx = 2; | ||
4236 | else if (in_softirq()) | ||
4237 | rctx = 1; | ||
4238 | else | ||
4239 | rctx = 0; | ||
4240 | |||
4241 | if (cpuctx->recursion[rctx]) | ||
4242 | return -1; | ||
4243 | |||
4244 | cpuctx->recursion[rctx]++; | ||
4245 | barrier(); | ||
4246 | |||
4247 | return rctx; | ||
4248 | } | 4531 | } |
4249 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); | 4532 | EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); |
4250 | 4533 | ||
4251 | void inline perf_swevent_put_recursion_context(int rctx) | 4534 | void inline perf_swevent_put_recursion_context(int rctx) |
4252 | { | 4535 | { |
4253 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4536 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); |
4254 | barrier(); | 4537 | |
4255 | cpuctx->recursion[rctx]--; | 4538 | put_recursion_context(swhash->recursion, rctx); |
4256 | } | 4539 | } |
4257 | 4540 | ||
4258 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, | 4541 | void __perf_sw_event(u32 event_id, u64 nr, int nmi, |
@@ -4278,20 +4561,20 @@ static void perf_swevent_read(struct perf_event *event) | |||
4278 | { | 4561 | { |
4279 | } | 4562 | } |
4280 | 4563 | ||
4281 | static int perf_swevent_enable(struct perf_event *event) | 4564 | static int perf_swevent_add(struct perf_event *event, int flags) |
4282 | { | 4565 | { |
4566 | struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); | ||
4283 | struct hw_perf_event *hwc = &event->hw; | 4567 | struct hw_perf_event *hwc = &event->hw; |
4284 | struct perf_cpu_context *cpuctx; | ||
4285 | struct hlist_head *head; | 4568 | struct hlist_head *head; |
4286 | 4569 | ||
4287 | cpuctx = &__get_cpu_var(perf_cpu_context); | ||
4288 | |||
4289 | if (hwc->sample_period) { | 4570 | if (hwc->sample_period) { |
4290 | hwc->last_period = hwc->sample_period; | 4571 | hwc->last_period = hwc->sample_period; |
4291 | perf_swevent_set_period(event); | 4572 | perf_swevent_set_period(event); |
4292 | } | 4573 | } |
4293 | 4574 | ||
4294 | head = find_swevent_head(cpuctx, event); | 4575 | hwc->state = !(flags & PERF_EF_START); |
4576 | |||
4577 | head = find_swevent_head(swhash, event); | ||
4295 | if (WARN_ON_ONCE(!head)) | 4578 | if (WARN_ON_ONCE(!head)) |
4296 | return -EINVAL; | 4579 | return -EINVAL; |
4297 | 4580 | ||
@@ -4300,202 +4583,27 @@ static int perf_swevent_enable(struct perf_event *event) | |||
4300 | return 0; | 4583 | return 0; |
4301 | } | 4584 | } |
4302 | 4585 | ||
4303 | static void perf_swevent_disable(struct perf_event *event) | 4586 | static void perf_swevent_del(struct perf_event *event, int flags) |
4304 | { | 4587 | { |
4305 | hlist_del_rcu(&event->hlist_entry); | 4588 | hlist_del_rcu(&event->hlist_entry); |
4306 | } | 4589 | } |
4307 | 4590 | ||
4308 | static void perf_swevent_void(struct perf_event *event) | 4591 | static void perf_swevent_start(struct perf_event *event, int flags) |
4309 | { | 4592 | { |
4593 | event->hw.state = 0; | ||
4310 | } | 4594 | } |
4311 | 4595 | ||
4312 | static int perf_swevent_int(struct perf_event *event) | 4596 | static void perf_swevent_stop(struct perf_event *event, int flags) |
4313 | { | 4597 | { |
4314 | return 0; | 4598 | event->hw.state = PERF_HES_STOPPED; |
4315 | } | ||
4316 | |||
4317 | static const struct pmu perf_ops_generic = { | ||
4318 | .enable = perf_swevent_enable, | ||
4319 | .disable = perf_swevent_disable, | ||
4320 | .start = perf_swevent_int, | ||
4321 | .stop = perf_swevent_void, | ||
4322 | .read = perf_swevent_read, | ||
4323 | .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ | ||
4324 | }; | ||
4325 | |||
4326 | /* | ||
4327 | * hrtimer based swevent callback | ||
4328 | */ | ||
4329 | |||
4330 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | ||
4331 | { | ||
4332 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
4333 | struct perf_sample_data data; | ||
4334 | struct pt_regs *regs; | ||
4335 | struct perf_event *event; | ||
4336 | u64 period; | ||
4337 | |||
4338 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); | ||
4339 | event->pmu->read(event); | ||
4340 | |||
4341 | perf_sample_data_init(&data, 0); | ||
4342 | data.period = event->hw.last_period; | ||
4343 | regs = get_irq_regs(); | ||
4344 | |||
4345 | if (regs && !perf_exclude_event(event, regs)) { | ||
4346 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
4347 | if (perf_event_overflow(event, 0, &data, regs)) | ||
4348 | ret = HRTIMER_NORESTART; | ||
4349 | } | ||
4350 | |||
4351 | period = max_t(u64, 10000, event->hw.sample_period); | ||
4352 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
4353 | |||
4354 | return ret; | ||
4355 | } | 4599 | } |
4356 | 4600 | ||
4357 | static void perf_swevent_start_hrtimer(struct perf_event *event) | ||
4358 | { | ||
4359 | struct hw_perf_event *hwc = &event->hw; | ||
4360 | |||
4361 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
4362 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4363 | if (hwc->sample_period) { | ||
4364 | u64 period; | ||
4365 | |||
4366 | if (hwc->remaining) { | ||
4367 | if (hwc->remaining < 0) | ||
4368 | period = 10000; | ||
4369 | else | ||
4370 | period = hwc->remaining; | ||
4371 | hwc->remaining = 0; | ||
4372 | } else { | ||
4373 | period = max_t(u64, 10000, hwc->sample_period); | ||
4374 | } | ||
4375 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4376 | ns_to_ktime(period), 0, | ||
4377 | HRTIMER_MODE_REL, 0); | ||
4378 | } | ||
4379 | } | ||
4380 | |||
4381 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) | ||
4382 | { | ||
4383 | struct hw_perf_event *hwc = &event->hw; | ||
4384 | |||
4385 | if (hwc->sample_period) { | ||
4386 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4387 | hwc->remaining = ktime_to_ns(remaining); | ||
4388 | |||
4389 | hrtimer_cancel(&hwc->hrtimer); | ||
4390 | } | ||
4391 | } | ||
4392 | |||
4393 | /* | ||
4394 | * Software event: cpu wall time clock | ||
4395 | */ | ||
4396 | |||
4397 | static void cpu_clock_perf_event_update(struct perf_event *event) | ||
4398 | { | ||
4399 | int cpu = raw_smp_processor_id(); | ||
4400 | s64 prev; | ||
4401 | u64 now; | ||
4402 | |||
4403 | now = cpu_clock(cpu); | ||
4404 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4405 | local64_add(now - prev, &event->count); | ||
4406 | } | ||
4407 | |||
4408 | static int cpu_clock_perf_event_enable(struct perf_event *event) | ||
4409 | { | ||
4410 | struct hw_perf_event *hwc = &event->hw; | ||
4411 | int cpu = raw_smp_processor_id(); | ||
4412 | |||
4413 | local64_set(&hwc->prev_count, cpu_clock(cpu)); | ||
4414 | perf_swevent_start_hrtimer(event); | ||
4415 | |||
4416 | return 0; | ||
4417 | } | ||
4418 | |||
4419 | static void cpu_clock_perf_event_disable(struct perf_event *event) | ||
4420 | { | ||
4421 | perf_swevent_cancel_hrtimer(event); | ||
4422 | cpu_clock_perf_event_update(event); | ||
4423 | } | ||
4424 | |||
4425 | static void cpu_clock_perf_event_read(struct perf_event *event) | ||
4426 | { | ||
4427 | cpu_clock_perf_event_update(event); | ||
4428 | } | ||
4429 | |||
4430 | static const struct pmu perf_ops_cpu_clock = { | ||
4431 | .enable = cpu_clock_perf_event_enable, | ||
4432 | .disable = cpu_clock_perf_event_disable, | ||
4433 | .read = cpu_clock_perf_event_read, | ||
4434 | }; | ||
4435 | |||
4436 | /* | ||
4437 | * Software event: task time clock | ||
4438 | */ | ||
4439 | |||
4440 | static void task_clock_perf_event_update(struct perf_event *event, u64 now) | ||
4441 | { | ||
4442 | u64 prev; | ||
4443 | s64 delta; | ||
4444 | |||
4445 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4446 | delta = now - prev; | ||
4447 | local64_add(delta, &event->count); | ||
4448 | } | ||
4449 | |||
4450 | static int task_clock_perf_event_enable(struct perf_event *event) | ||
4451 | { | ||
4452 | struct hw_perf_event *hwc = &event->hw; | ||
4453 | u64 now; | ||
4454 | |||
4455 | now = event->ctx->time; | ||
4456 | |||
4457 | local64_set(&hwc->prev_count, now); | ||
4458 | |||
4459 | perf_swevent_start_hrtimer(event); | ||
4460 | |||
4461 | return 0; | ||
4462 | } | ||
4463 | |||
4464 | static void task_clock_perf_event_disable(struct perf_event *event) | ||
4465 | { | ||
4466 | perf_swevent_cancel_hrtimer(event); | ||
4467 | task_clock_perf_event_update(event, event->ctx->time); | ||
4468 | |||
4469 | } | ||
4470 | |||
4471 | static void task_clock_perf_event_read(struct perf_event *event) | ||
4472 | { | ||
4473 | u64 time; | ||
4474 | |||
4475 | if (!in_nmi()) { | ||
4476 | update_context_time(event->ctx); | ||
4477 | time = event->ctx->time; | ||
4478 | } else { | ||
4479 | u64 now = perf_clock(); | ||
4480 | u64 delta = now - event->ctx->timestamp; | ||
4481 | time = event->ctx->time + delta; | ||
4482 | } | ||
4483 | |||
4484 | task_clock_perf_event_update(event, time); | ||
4485 | } | ||
4486 | |||
4487 | static const struct pmu perf_ops_task_clock = { | ||
4488 | .enable = task_clock_perf_event_enable, | ||
4489 | .disable = task_clock_perf_event_disable, | ||
4490 | .read = task_clock_perf_event_read, | ||
4491 | }; | ||
4492 | |||
4493 | /* Deref the hlist from the update side */ | 4601 | /* Deref the hlist from the update side */ |
4494 | static inline struct swevent_hlist * | 4602 | static inline struct swevent_hlist * |
4495 | swevent_hlist_deref(struct perf_cpu_context *cpuctx) | 4603 | swevent_hlist_deref(struct swevent_htable *swhash) |
4496 | { | 4604 | { |
4497 | return rcu_dereference_protected(cpuctx->swevent_hlist, | 4605 | return rcu_dereference_protected(swhash->swevent_hlist, |
4498 | lockdep_is_held(&cpuctx->hlist_mutex)); | 4606 | lockdep_is_held(&swhash->hlist_mutex)); |
4499 | } | 4607 | } |
4500 | 4608 | ||
4501 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | 4609 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) |
@@ -4506,27 +4614,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | |||
4506 | kfree(hlist); | 4614 | kfree(hlist); |
4507 | } | 4615 | } |
4508 | 4616 | ||
4509 | static void swevent_hlist_release(struct perf_cpu_context *cpuctx) | 4617 | static void swevent_hlist_release(struct swevent_htable *swhash) |
4510 | { | 4618 | { |
4511 | struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); | 4619 | struct swevent_hlist *hlist = swevent_hlist_deref(swhash); |
4512 | 4620 | ||
4513 | if (!hlist) | 4621 | if (!hlist) |
4514 | return; | 4622 | return; |
4515 | 4623 | ||
4516 | rcu_assign_pointer(cpuctx->swevent_hlist, NULL); | 4624 | rcu_assign_pointer(swhash->swevent_hlist, NULL); |
4517 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); | 4625 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); |
4518 | } | 4626 | } |
4519 | 4627 | ||
4520 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) | 4628 | static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) |
4521 | { | 4629 | { |
4522 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 4630 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
4523 | 4631 | ||
4524 | mutex_lock(&cpuctx->hlist_mutex); | 4632 | mutex_lock(&swhash->hlist_mutex); |
4525 | 4633 | ||
4526 | if (!--cpuctx->hlist_refcount) | 4634 | if (!--swhash->hlist_refcount) |
4527 | swevent_hlist_release(cpuctx); | 4635 | swevent_hlist_release(swhash); |
4528 | 4636 | ||
4529 | mutex_unlock(&cpuctx->hlist_mutex); | 4637 | mutex_unlock(&swhash->hlist_mutex); |
4530 | } | 4638 | } |
4531 | 4639 | ||
4532 | static void swevent_hlist_put(struct perf_event *event) | 4640 | static void swevent_hlist_put(struct perf_event *event) |
@@ -4544,12 +4652,12 @@ static void swevent_hlist_put(struct perf_event *event) | |||
4544 | 4652 | ||
4545 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | 4653 | static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) |
4546 | { | 4654 | { |
4547 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 4655 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
4548 | int err = 0; | 4656 | int err = 0; |
4549 | 4657 | ||
4550 | mutex_lock(&cpuctx->hlist_mutex); | 4658 | mutex_lock(&swhash->hlist_mutex); |
4551 | 4659 | ||
4552 | if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { | 4660 | if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { |
4553 | struct swevent_hlist *hlist; | 4661 | struct swevent_hlist *hlist; |
4554 | 4662 | ||
4555 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 4663 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); |
@@ -4557,11 +4665,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | |||
4557 | err = -ENOMEM; | 4665 | err = -ENOMEM; |
4558 | goto exit; | 4666 | goto exit; |
4559 | } | 4667 | } |
4560 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | 4668 | rcu_assign_pointer(swhash->swevent_hlist, hlist); |
4561 | } | 4669 | } |
4562 | cpuctx->hlist_refcount++; | 4670 | swhash->hlist_refcount++; |
4563 | exit: | 4671 | exit: |
4564 | mutex_unlock(&cpuctx->hlist_mutex); | 4672 | mutex_unlock(&swhash->hlist_mutex); |
4565 | 4673 | ||
4566 | return err; | 4674 | return err; |
4567 | } | 4675 | } |
@@ -4585,7 +4693,7 @@ static int swevent_hlist_get(struct perf_event *event) | |||
4585 | put_online_cpus(); | 4693 | put_online_cpus(); |
4586 | 4694 | ||
4587 | return 0; | 4695 | return 0; |
4588 | fail: | 4696 | fail: |
4589 | for_each_possible_cpu(cpu) { | 4697 | for_each_possible_cpu(cpu) { |
4590 | if (cpu == failed_cpu) | 4698 | if (cpu == failed_cpu) |
4591 | break; | 4699 | break; |
@@ -4596,17 +4704,64 @@ static int swevent_hlist_get(struct perf_event *event) | |||
4596 | return err; | 4704 | return err; |
4597 | } | 4705 | } |
4598 | 4706 | ||
4599 | #ifdef CONFIG_EVENT_TRACING | 4707 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
4708 | |||
4709 | static void sw_perf_event_destroy(struct perf_event *event) | ||
4710 | { | ||
4711 | u64 event_id = event->attr.config; | ||
4712 | |||
4713 | WARN_ON(event->parent); | ||
4714 | |||
4715 | atomic_dec(&perf_swevent_enabled[event_id]); | ||
4716 | swevent_hlist_put(event); | ||
4717 | } | ||
4718 | |||
4719 | static int perf_swevent_init(struct perf_event *event) | ||
4720 | { | ||
4721 | int event_id = event->attr.config; | ||
4722 | |||
4723 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
4724 | return -ENOENT; | ||
4725 | |||
4726 | switch (event_id) { | ||
4727 | case PERF_COUNT_SW_CPU_CLOCK: | ||
4728 | case PERF_COUNT_SW_TASK_CLOCK: | ||
4729 | return -ENOENT; | ||
4730 | |||
4731 | default: | ||
4732 | break; | ||
4733 | } | ||
4734 | |||
4735 | if (event_id > PERF_COUNT_SW_MAX) | ||
4736 | return -ENOENT; | ||
4737 | |||
4738 | if (!event->parent) { | ||
4739 | int err; | ||
4740 | |||
4741 | err = swevent_hlist_get(event); | ||
4742 | if (err) | ||
4743 | return err; | ||
4744 | |||
4745 | atomic_inc(&perf_swevent_enabled[event_id]); | ||
4746 | event->destroy = sw_perf_event_destroy; | ||
4747 | } | ||
4748 | |||
4749 | return 0; | ||
4750 | } | ||
4751 | |||
4752 | static struct pmu perf_swevent = { | ||
4753 | .task_ctx_nr = perf_sw_context, | ||
4600 | 4754 | ||
4601 | static const struct pmu perf_ops_tracepoint = { | 4755 | .event_init = perf_swevent_init, |
4602 | .enable = perf_trace_enable, | 4756 | .add = perf_swevent_add, |
4603 | .disable = perf_trace_disable, | 4757 | .del = perf_swevent_del, |
4604 | .start = perf_swevent_int, | 4758 | .start = perf_swevent_start, |
4605 | .stop = perf_swevent_void, | 4759 | .stop = perf_swevent_stop, |
4606 | .read = perf_swevent_read, | 4760 | .read = perf_swevent_read, |
4607 | .unthrottle = perf_swevent_void, | ||
4608 | }; | 4761 | }; |
4609 | 4762 | ||
4763 | #ifdef CONFIG_EVENT_TRACING | ||
4764 | |||
4610 | static int perf_tp_filter_match(struct perf_event *event, | 4765 | static int perf_tp_filter_match(struct perf_event *event, |
4611 | struct perf_sample_data *data) | 4766 | struct perf_sample_data *data) |
4612 | { | 4767 | { |
@@ -4650,7 +4805,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | |||
4650 | 4805 | ||
4651 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { | 4806 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
4652 | if (perf_tp_event_match(event, &data, regs)) | 4807 | if (perf_tp_event_match(event, &data, regs)) |
4653 | perf_swevent_add(event, count, 1, &data, regs); | 4808 | perf_swevent_event(event, count, 1, &data, regs); |
4654 | } | 4809 | } |
4655 | 4810 | ||
4656 | perf_swevent_put_recursion_context(rctx); | 4811 | perf_swevent_put_recursion_context(rctx); |
@@ -4662,10 +4817,13 @@ static void tp_perf_event_destroy(struct perf_event *event) | |||
4662 | perf_trace_destroy(event); | 4817 | perf_trace_destroy(event); |
4663 | } | 4818 | } |
4664 | 4819 | ||
4665 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4820 | static int perf_tp_event_init(struct perf_event *event) |
4666 | { | 4821 | { |
4667 | int err; | 4822 | int err; |
4668 | 4823 | ||
4824 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
4825 | return -ENOENT; | ||
4826 | |||
4669 | /* | 4827 | /* |
4670 | * Raw tracepoint data is a severe data leak, only allow root to | 4828 | * Raw tracepoint data is a severe data leak, only allow root to |
4671 | * have these. | 4829 | * have these. |
@@ -4673,15 +4831,31 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4673 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && | 4831 | if ((event->attr.sample_type & PERF_SAMPLE_RAW) && |
4674 | perf_paranoid_tracepoint_raw() && | 4832 | perf_paranoid_tracepoint_raw() && |
4675 | !capable(CAP_SYS_ADMIN)) | 4833 | !capable(CAP_SYS_ADMIN)) |
4676 | return ERR_PTR(-EPERM); | 4834 | return -EPERM; |
4677 | 4835 | ||
4678 | err = perf_trace_init(event); | 4836 | err = perf_trace_init(event); |
4679 | if (err) | 4837 | if (err) |
4680 | return NULL; | 4838 | return err; |
4681 | 4839 | ||
4682 | event->destroy = tp_perf_event_destroy; | 4840 | event->destroy = tp_perf_event_destroy; |
4683 | 4841 | ||
4684 | return &perf_ops_tracepoint; | 4842 | return 0; |
4843 | } | ||
4844 | |||
4845 | static struct pmu perf_tracepoint = { | ||
4846 | .task_ctx_nr = perf_sw_context, | ||
4847 | |||
4848 | .event_init = perf_tp_event_init, | ||
4849 | .add = perf_trace_add, | ||
4850 | .del = perf_trace_del, | ||
4851 | .start = perf_swevent_start, | ||
4852 | .stop = perf_swevent_stop, | ||
4853 | .read = perf_swevent_read, | ||
4854 | }; | ||
4855 | |||
4856 | static inline void perf_tp_register(void) | ||
4857 | { | ||
4858 | perf_pmu_register(&perf_tracepoint); | ||
4685 | } | 4859 | } |
4686 | 4860 | ||
4687 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4861 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4709,9 +4883,8 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4709 | 4883 | ||
4710 | #else | 4884 | #else |
4711 | 4885 | ||
4712 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4886 | static inline void perf_tp_register(void) |
4713 | { | 4887 | { |
4714 | return NULL; | ||
4715 | } | 4888 | } |
4716 | 4889 | ||
4717 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4890 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4726,105 +4899,389 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4726 | #endif /* CONFIG_EVENT_TRACING */ | 4899 | #endif /* CONFIG_EVENT_TRACING */ |
4727 | 4900 | ||
4728 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4901 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
4729 | static void bp_perf_event_destroy(struct perf_event *event) | 4902 | void perf_bp_event(struct perf_event *bp, void *data) |
4730 | { | 4903 | { |
4731 | release_bp_slot(event); | 4904 | struct perf_sample_data sample; |
4905 | struct pt_regs *regs = data; | ||
4906 | |||
4907 | perf_sample_data_init(&sample, bp->attr.bp_addr); | ||
4908 | |||
4909 | if (!bp->hw.state && !perf_exclude_event(bp, regs)) | ||
4910 | perf_swevent_event(bp, 1, 1, &sample, regs); | ||
4732 | } | 4911 | } |
4912 | #endif | ||
4733 | 4913 | ||
4734 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4914 | /* |
4915 | * hrtimer based swevent callback | ||
4916 | */ | ||
4917 | |||
4918 | static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) | ||
4735 | { | 4919 | { |
4736 | int err; | 4920 | enum hrtimer_restart ret = HRTIMER_RESTART; |
4921 | struct perf_sample_data data; | ||
4922 | struct pt_regs *regs; | ||
4923 | struct perf_event *event; | ||
4924 | u64 period; | ||
4737 | 4925 | ||
4738 | err = register_perf_hw_breakpoint(bp); | 4926 | event = container_of(hrtimer, struct perf_event, hw.hrtimer); |
4739 | if (err) | 4927 | event->pmu->read(event); |
4740 | return ERR_PTR(err); | ||
4741 | 4928 | ||
4742 | bp->destroy = bp_perf_event_destroy; | 4929 | perf_sample_data_init(&data, 0); |
4930 | data.period = event->hw.last_period; | ||
4931 | regs = get_irq_regs(); | ||
4932 | |||
4933 | if (regs && !perf_exclude_event(event, regs)) { | ||
4934 | if (!(event->attr.exclude_idle && current->pid == 0)) | ||
4935 | if (perf_event_overflow(event, 0, &data, regs)) | ||
4936 | ret = HRTIMER_NORESTART; | ||
4937 | } | ||
4743 | 4938 | ||
4744 | return &perf_ops_bp; | 4939 | period = max_t(u64, 10000, event->hw.sample_period); |
4940 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
4941 | |||
4942 | return ret; | ||
4745 | } | 4943 | } |
4746 | 4944 | ||
4747 | void perf_bp_event(struct perf_event *bp, void *data) | 4945 | static void perf_swevent_start_hrtimer(struct perf_event *event) |
4748 | { | 4946 | { |
4749 | struct perf_sample_data sample; | 4947 | struct hw_perf_event *hwc = &event->hw; |
4750 | struct pt_regs *regs = data; | ||
4751 | 4948 | ||
4752 | perf_sample_data_init(&sample, bp->attr.bp_addr); | 4949 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
4950 | hwc->hrtimer.function = perf_swevent_hrtimer; | ||
4951 | if (hwc->sample_period) { | ||
4952 | s64 period = local64_read(&hwc->period_left); | ||
4953 | |||
4954 | if (period) { | ||
4955 | if (period < 0) | ||
4956 | period = 10000; | ||
4753 | 4957 | ||
4754 | if (!perf_exclude_event(bp, regs)) | 4958 | local64_set(&hwc->period_left, 0); |
4755 | perf_swevent_add(bp, 1, 1, &sample, regs); | 4959 | } else { |
4960 | period = max_t(u64, 10000, hwc->sample_period); | ||
4961 | } | ||
4962 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
4963 | ns_to_ktime(period), 0, | ||
4964 | HRTIMER_MODE_REL_PINNED, 0); | ||
4965 | } | ||
4756 | } | 4966 | } |
4757 | #else | 4967 | |
4758 | static const struct pmu *bp_perf_event_init(struct perf_event *bp) | 4968 | static void perf_swevent_cancel_hrtimer(struct perf_event *event) |
4759 | { | 4969 | { |
4760 | return NULL; | 4970 | struct hw_perf_event *hwc = &event->hw; |
4971 | |||
4972 | if (hwc->sample_period) { | ||
4973 | ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); | ||
4974 | local64_set(&hwc->period_left, ktime_to_ns(remaining)); | ||
4975 | |||
4976 | hrtimer_cancel(&hwc->hrtimer); | ||
4977 | } | ||
4761 | } | 4978 | } |
4762 | 4979 | ||
4763 | void perf_bp_event(struct perf_event *bp, void *regs) | 4980 | /* |
4981 | * Software event: cpu wall time clock | ||
4982 | */ | ||
4983 | |||
4984 | static void cpu_clock_event_update(struct perf_event *event) | ||
4764 | { | 4985 | { |
4986 | s64 prev; | ||
4987 | u64 now; | ||
4988 | |||
4989 | now = local_clock(); | ||
4990 | prev = local64_xchg(&event->hw.prev_count, now); | ||
4991 | local64_add(now - prev, &event->count); | ||
4765 | } | 4992 | } |
4766 | #endif | ||
4767 | 4993 | ||
4768 | atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 4994 | static void cpu_clock_event_start(struct perf_event *event, int flags) |
4995 | { | ||
4996 | local64_set(&event->hw.prev_count, local_clock()); | ||
4997 | perf_swevent_start_hrtimer(event); | ||
4998 | } | ||
4769 | 4999 | ||
4770 | static void sw_perf_event_destroy(struct perf_event *event) | 5000 | static void cpu_clock_event_stop(struct perf_event *event, int flags) |
4771 | { | 5001 | { |
4772 | u64 event_id = event->attr.config; | 5002 | perf_swevent_cancel_hrtimer(event); |
5003 | cpu_clock_event_update(event); | ||
5004 | } | ||
4773 | 5005 | ||
4774 | WARN_ON(event->parent); | 5006 | static int cpu_clock_event_add(struct perf_event *event, int flags) |
5007 | { | ||
5008 | if (flags & PERF_EF_START) | ||
5009 | cpu_clock_event_start(event, flags); | ||
4775 | 5010 | ||
4776 | atomic_dec(&perf_swevent_enabled[event_id]); | 5011 | return 0; |
4777 | swevent_hlist_put(event); | ||
4778 | } | 5012 | } |
4779 | 5013 | ||
4780 | static const struct pmu *sw_perf_event_init(struct perf_event *event) | 5014 | static void cpu_clock_event_del(struct perf_event *event, int flags) |
4781 | { | 5015 | { |
4782 | const struct pmu *pmu = NULL; | 5016 | cpu_clock_event_stop(event, flags); |
4783 | u64 event_id = event->attr.config; | 5017 | } |
5018 | |||
5019 | static void cpu_clock_event_read(struct perf_event *event) | ||
5020 | { | ||
5021 | cpu_clock_event_update(event); | ||
5022 | } | ||
5023 | |||
5024 | static int cpu_clock_event_init(struct perf_event *event) | ||
5025 | { | ||
5026 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
5027 | return -ENOENT; | ||
5028 | |||
5029 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | ||
5030 | return -ENOENT; | ||
4784 | 5031 | ||
5032 | return 0; | ||
5033 | } | ||
5034 | |||
5035 | static struct pmu perf_cpu_clock = { | ||
5036 | .task_ctx_nr = perf_sw_context, | ||
5037 | |||
5038 | .event_init = cpu_clock_event_init, | ||
5039 | .add = cpu_clock_event_add, | ||
5040 | .del = cpu_clock_event_del, | ||
5041 | .start = cpu_clock_event_start, | ||
5042 | .stop = cpu_clock_event_stop, | ||
5043 | .read = cpu_clock_event_read, | ||
5044 | }; | ||
5045 | |||
5046 | /* | ||
5047 | * Software event: task time clock | ||
5048 | */ | ||
5049 | |||
5050 | static void task_clock_event_update(struct perf_event *event, u64 now) | ||
5051 | { | ||
5052 | u64 prev; | ||
5053 | s64 delta; | ||
5054 | |||
5055 | prev = local64_xchg(&event->hw.prev_count, now); | ||
5056 | delta = now - prev; | ||
5057 | local64_add(delta, &event->count); | ||
5058 | } | ||
5059 | |||
5060 | static void task_clock_event_start(struct perf_event *event, int flags) | ||
5061 | { | ||
5062 | local64_set(&event->hw.prev_count, event->ctx->time); | ||
5063 | perf_swevent_start_hrtimer(event); | ||
5064 | } | ||
5065 | |||
5066 | static void task_clock_event_stop(struct perf_event *event, int flags) | ||
5067 | { | ||
5068 | perf_swevent_cancel_hrtimer(event); | ||
5069 | task_clock_event_update(event, event->ctx->time); | ||
5070 | } | ||
5071 | |||
5072 | static int task_clock_event_add(struct perf_event *event, int flags) | ||
5073 | { | ||
5074 | if (flags & PERF_EF_START) | ||
5075 | task_clock_event_start(event, flags); | ||
5076 | |||
5077 | return 0; | ||
5078 | } | ||
5079 | |||
5080 | static void task_clock_event_del(struct perf_event *event, int flags) | ||
5081 | { | ||
5082 | task_clock_event_stop(event, PERF_EF_UPDATE); | ||
5083 | } | ||
5084 | |||
5085 | static void task_clock_event_read(struct perf_event *event) | ||
5086 | { | ||
5087 | u64 time; | ||
5088 | |||
5089 | if (!in_nmi()) { | ||
5090 | update_context_time(event->ctx); | ||
5091 | time = event->ctx->time; | ||
5092 | } else { | ||
5093 | u64 now = perf_clock(); | ||
5094 | u64 delta = now - event->ctx->timestamp; | ||
5095 | time = event->ctx->time + delta; | ||
5096 | } | ||
5097 | |||
5098 | task_clock_event_update(event, time); | ||
5099 | } | ||
5100 | |||
5101 | static int task_clock_event_init(struct perf_event *event) | ||
5102 | { | ||
5103 | if (event->attr.type != PERF_TYPE_SOFTWARE) | ||
5104 | return -ENOENT; | ||
5105 | |||
5106 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | ||
5107 | return -ENOENT; | ||
5108 | |||
5109 | return 0; | ||
5110 | } | ||
5111 | |||
5112 | static struct pmu perf_task_clock = { | ||
5113 | .task_ctx_nr = perf_sw_context, | ||
5114 | |||
5115 | .event_init = task_clock_event_init, | ||
5116 | .add = task_clock_event_add, | ||
5117 | .del = task_clock_event_del, | ||
5118 | .start = task_clock_event_start, | ||
5119 | .stop = task_clock_event_stop, | ||
5120 | .read = task_clock_event_read, | ||
5121 | }; | ||
5122 | |||
5123 | static void perf_pmu_nop_void(struct pmu *pmu) | ||
5124 | { | ||
5125 | } | ||
5126 | |||
5127 | static int perf_pmu_nop_int(struct pmu *pmu) | ||
5128 | { | ||
5129 | return 0; | ||
5130 | } | ||
5131 | |||
5132 | static void perf_pmu_start_txn(struct pmu *pmu) | ||
5133 | { | ||
5134 | perf_pmu_disable(pmu); | ||
5135 | } | ||
5136 | |||
5137 | static int perf_pmu_commit_txn(struct pmu *pmu) | ||
5138 | { | ||
5139 | perf_pmu_enable(pmu); | ||
5140 | return 0; | ||
5141 | } | ||
5142 | |||
5143 | static void perf_pmu_cancel_txn(struct pmu *pmu) | ||
5144 | { | ||
5145 | perf_pmu_enable(pmu); | ||
5146 | } | ||
5147 | |||
5148 | /* | ||
5149 | * Ensures all contexts with the same task_ctx_nr have the same | ||
5150 | * pmu_cpu_context too. | ||
5151 | */ | ||
5152 | static void *find_pmu_context(int ctxn) | ||
5153 | { | ||
5154 | struct pmu *pmu; | ||
5155 | |||
5156 | if (ctxn < 0) | ||
5157 | return NULL; | ||
5158 | |||
5159 | list_for_each_entry(pmu, &pmus, entry) { | ||
5160 | if (pmu->task_ctx_nr == ctxn) | ||
5161 | return pmu->pmu_cpu_context; | ||
5162 | } | ||
5163 | |||
5164 | return NULL; | ||
5165 | } | ||
5166 | |||
5167 | static void free_pmu_context(void * __percpu cpu_context) | ||
5168 | { | ||
5169 | struct pmu *pmu; | ||
5170 | |||
5171 | mutex_lock(&pmus_lock); | ||
4785 | /* | 5172 | /* |
4786 | * Software events (currently) can't in general distinguish | 5173 | * Like a real lame refcount. |
4787 | * between user, kernel and hypervisor events. | ||
4788 | * However, context switches and cpu migrations are considered | ||
4789 | * to be kernel events, and page faults are never hypervisor | ||
4790 | * events. | ||
4791 | */ | 5174 | */ |
4792 | switch (event_id) { | 5175 | list_for_each_entry(pmu, &pmus, entry) { |
4793 | case PERF_COUNT_SW_CPU_CLOCK: | 5176 | if (pmu->pmu_cpu_context == cpu_context) |
4794 | pmu = &perf_ops_cpu_clock; | 5177 | goto out; |
5178 | } | ||
4795 | 5179 | ||
4796 | break; | 5180 | free_percpu(cpu_context); |
4797 | case PERF_COUNT_SW_TASK_CLOCK: | 5181 | out: |
4798 | /* | 5182 | mutex_unlock(&pmus_lock); |
4799 | * If the user instantiates this as a per-cpu event, | 5183 | } |
4800 | * use the cpu_clock event instead. | ||
4801 | */ | ||
4802 | if (event->ctx->task) | ||
4803 | pmu = &perf_ops_task_clock; | ||
4804 | else | ||
4805 | pmu = &perf_ops_cpu_clock; | ||
4806 | 5184 | ||
4807 | break; | 5185 | int perf_pmu_register(struct pmu *pmu) |
4808 | case PERF_COUNT_SW_PAGE_FAULTS: | 5186 | { |
4809 | case PERF_COUNT_SW_PAGE_FAULTS_MIN: | 5187 | int cpu, ret; |
4810 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | ||
4811 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | ||
4812 | case PERF_COUNT_SW_CPU_MIGRATIONS: | ||
4813 | case PERF_COUNT_SW_ALIGNMENT_FAULTS: | ||
4814 | case PERF_COUNT_SW_EMULATION_FAULTS: | ||
4815 | if (!event->parent) { | ||
4816 | int err; | ||
4817 | |||
4818 | err = swevent_hlist_get(event); | ||
4819 | if (err) | ||
4820 | return ERR_PTR(err); | ||
4821 | 5188 | ||
4822 | atomic_inc(&perf_swevent_enabled[event_id]); | 5189 | mutex_lock(&pmus_lock); |
4823 | event->destroy = sw_perf_event_destroy; | 5190 | ret = -ENOMEM; |
5191 | pmu->pmu_disable_count = alloc_percpu(int); | ||
5192 | if (!pmu->pmu_disable_count) | ||
5193 | goto unlock; | ||
5194 | |||
5195 | pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); | ||
5196 | if (pmu->pmu_cpu_context) | ||
5197 | goto got_cpu_context; | ||
5198 | |||
5199 | pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); | ||
5200 | if (!pmu->pmu_cpu_context) | ||
5201 | goto free_pdc; | ||
5202 | |||
5203 | for_each_possible_cpu(cpu) { | ||
5204 | struct perf_cpu_context *cpuctx; | ||
5205 | |||
5206 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
5207 | __perf_event_init_context(&cpuctx->ctx); | ||
5208 | cpuctx->ctx.type = cpu_context; | ||
5209 | cpuctx->ctx.pmu = pmu; | ||
5210 | cpuctx->jiffies_interval = 1; | ||
5211 | INIT_LIST_HEAD(&cpuctx->rotation_list); | ||
5212 | } | ||
5213 | |||
5214 | got_cpu_context: | ||
5215 | if (!pmu->start_txn) { | ||
5216 | if (pmu->pmu_enable) { | ||
5217 | /* | ||
5218 | * If we have pmu_enable/pmu_disable calls, install | ||
5219 | * transaction stubs that use that to try and batch | ||
5220 | * hardware accesses. | ||
5221 | */ | ||
5222 | pmu->start_txn = perf_pmu_start_txn; | ||
5223 | pmu->commit_txn = perf_pmu_commit_txn; | ||
5224 | pmu->cancel_txn = perf_pmu_cancel_txn; | ||
5225 | } else { | ||
5226 | pmu->start_txn = perf_pmu_nop_void; | ||
5227 | pmu->commit_txn = perf_pmu_nop_int; | ||
5228 | pmu->cancel_txn = perf_pmu_nop_void; | ||
5229 | } | ||
5230 | } | ||
5231 | |||
5232 | if (!pmu->pmu_enable) { | ||
5233 | pmu->pmu_enable = perf_pmu_nop_void; | ||
5234 | pmu->pmu_disable = perf_pmu_nop_void; | ||
5235 | } | ||
5236 | |||
5237 | list_add_rcu(&pmu->entry, &pmus); | ||
5238 | ret = 0; | ||
5239 | unlock: | ||
5240 | mutex_unlock(&pmus_lock); | ||
5241 | |||
5242 | return ret; | ||
5243 | |||
5244 | free_pdc: | ||
5245 | free_percpu(pmu->pmu_disable_count); | ||
5246 | goto unlock; | ||
5247 | } | ||
5248 | |||
5249 | void perf_pmu_unregister(struct pmu *pmu) | ||
5250 | { | ||
5251 | mutex_lock(&pmus_lock); | ||
5252 | list_del_rcu(&pmu->entry); | ||
5253 | mutex_unlock(&pmus_lock); | ||
5254 | |||
5255 | /* | ||
5256 | * We dereference the pmu list under both SRCU and regular RCU, so | ||
5257 | * synchronize against both of those. | ||
5258 | */ | ||
5259 | synchronize_srcu(&pmus_srcu); | ||
5260 | synchronize_rcu(); | ||
5261 | |||
5262 | free_percpu(pmu->pmu_disable_count); | ||
5263 | free_pmu_context(pmu->pmu_cpu_context); | ||
5264 | } | ||
5265 | |||
5266 | struct pmu *perf_init_event(struct perf_event *event) | ||
5267 | { | ||
5268 | struct pmu *pmu = NULL; | ||
5269 | int idx; | ||
5270 | |||
5271 | idx = srcu_read_lock(&pmus_srcu); | ||
5272 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
5273 | int ret = pmu->event_init(event); | ||
5274 | if (!ret) | ||
5275 | goto unlock; | ||
5276 | |||
5277 | if (ret != -ENOENT) { | ||
5278 | pmu = ERR_PTR(ret); | ||
5279 | goto unlock; | ||
4824 | } | 5280 | } |
4825 | pmu = &perf_ops_generic; | ||
4826 | break; | ||
4827 | } | 5281 | } |
5282 | pmu = ERR_PTR(-ENOENT); | ||
5283 | unlock: | ||
5284 | srcu_read_unlock(&pmus_srcu, idx); | ||
4828 | 5285 | ||
4829 | return pmu; | 5286 | return pmu; |
4830 | } | 5287 | } |
@@ -4833,20 +5290,17 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) | |||
4833 | * Allocate and initialize a event structure | 5290 | * Allocate and initialize a event structure |
4834 | */ | 5291 | */ |
4835 | static struct perf_event * | 5292 | static struct perf_event * |
4836 | perf_event_alloc(struct perf_event_attr *attr, | 5293 | perf_event_alloc(struct perf_event_attr *attr, int cpu, |
4837 | int cpu, | ||
4838 | struct perf_event_context *ctx, | ||
4839 | struct perf_event *group_leader, | 5294 | struct perf_event *group_leader, |
4840 | struct perf_event *parent_event, | 5295 | struct perf_event *parent_event, |
4841 | perf_overflow_handler_t overflow_handler, | 5296 | perf_overflow_handler_t overflow_handler) |
4842 | gfp_t gfpflags) | ||
4843 | { | 5297 | { |
4844 | const struct pmu *pmu; | 5298 | struct pmu *pmu; |
4845 | struct perf_event *event; | 5299 | struct perf_event *event; |
4846 | struct hw_perf_event *hwc; | 5300 | struct hw_perf_event *hwc; |
4847 | long err; | 5301 | long err; |
4848 | 5302 | ||
4849 | event = kzalloc(sizeof(*event), gfpflags); | 5303 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
4850 | if (!event) | 5304 | if (!event) |
4851 | return ERR_PTR(-ENOMEM); | 5305 | return ERR_PTR(-ENOMEM); |
4852 | 5306 | ||
@@ -4871,7 +5325,6 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4871 | event->attr = *attr; | 5325 | event->attr = *attr; |
4872 | event->group_leader = group_leader; | 5326 | event->group_leader = group_leader; |
4873 | event->pmu = NULL; | 5327 | event->pmu = NULL; |
4874 | event->ctx = ctx; | ||
4875 | event->oncpu = -1; | 5328 | event->oncpu = -1; |
4876 | 5329 | ||
4877 | event->parent = parent_event; | 5330 | event->parent = parent_event; |
@@ -4905,29 +5358,8 @@ perf_event_alloc(struct perf_event_attr *attr, | |||
4905 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) | 5358 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) |
4906 | goto done; | 5359 | goto done; |
4907 | 5360 | ||
4908 | switch (attr->type) { | 5361 | pmu = perf_init_event(event); |
4909 | case PERF_TYPE_RAW: | ||
4910 | case PERF_TYPE_HARDWARE: | ||
4911 | case PERF_TYPE_HW_CACHE: | ||
4912 | pmu = hw_perf_event_init(event); | ||
4913 | break; | ||
4914 | 5362 | ||
4915 | case PERF_TYPE_SOFTWARE: | ||
4916 | pmu = sw_perf_event_init(event); | ||
4917 | break; | ||
4918 | |||
4919 | case PERF_TYPE_TRACEPOINT: | ||
4920 | pmu = tp_perf_event_init(event); | ||
4921 | break; | ||
4922 | |||
4923 | case PERF_TYPE_BREAKPOINT: | ||
4924 | pmu = bp_perf_event_init(event); | ||
4925 | break; | ||
4926 | |||
4927 | |||
4928 | default: | ||
4929 | break; | ||
4930 | } | ||
4931 | done: | 5363 | done: |
4932 | err = 0; | 5364 | err = 0; |
4933 | if (!pmu) | 5365 | if (!pmu) |
@@ -4952,6 +5384,13 @@ done: | |||
4952 | atomic_inc(&nr_comm_events); | 5384 | atomic_inc(&nr_comm_events); |
4953 | if (event->attr.task) | 5385 | if (event->attr.task) |
4954 | atomic_inc(&nr_task_events); | 5386 | atomic_inc(&nr_task_events); |
5387 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { | ||
5388 | err = get_callchain_buffers(); | ||
5389 | if (err) { | ||
5390 | free_event(event); | ||
5391 | return ERR_PTR(err); | ||
5392 | } | ||
5393 | } | ||
4955 | } | 5394 | } |
4956 | 5395 | ||
4957 | return event; | 5396 | return event; |
@@ -5099,12 +5538,16 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5099 | struct perf_event_attr __user *, attr_uptr, | 5538 | struct perf_event_attr __user *, attr_uptr, |
5100 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | 5539 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) |
5101 | { | 5540 | { |
5102 | struct perf_event *event, *group_leader = NULL, *output_event = NULL; | 5541 | struct perf_event *group_leader = NULL, *output_event = NULL; |
5542 | struct perf_event *event, *sibling; | ||
5103 | struct perf_event_attr attr; | 5543 | struct perf_event_attr attr; |
5104 | struct perf_event_context *ctx; | 5544 | struct perf_event_context *ctx; |
5105 | struct file *event_file = NULL; | 5545 | struct file *event_file = NULL; |
5106 | struct file *group_file = NULL; | 5546 | struct file *group_file = NULL; |
5547 | struct task_struct *task = NULL; | ||
5548 | struct pmu *pmu; | ||
5107 | int event_fd; | 5549 | int event_fd; |
5550 | int move_group = 0; | ||
5108 | int fput_needed = 0; | 5551 | int fput_needed = 0; |
5109 | int err; | 5552 | int err; |
5110 | 5553 | ||
@@ -5130,20 +5573,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5130 | if (event_fd < 0) | 5573 | if (event_fd < 0) |
5131 | return event_fd; | 5574 | return event_fd; |
5132 | 5575 | ||
5133 | /* | ||
5134 | * Get the target context (task or percpu): | ||
5135 | */ | ||
5136 | ctx = find_get_context(pid, cpu); | ||
5137 | if (IS_ERR(ctx)) { | ||
5138 | err = PTR_ERR(ctx); | ||
5139 | goto err_fd; | ||
5140 | } | ||
5141 | |||
5142 | if (group_fd != -1) { | 5576 | if (group_fd != -1) { |
5143 | group_leader = perf_fget_light(group_fd, &fput_needed); | 5577 | group_leader = perf_fget_light(group_fd, &fput_needed); |
5144 | if (IS_ERR(group_leader)) { | 5578 | if (IS_ERR(group_leader)) { |
5145 | err = PTR_ERR(group_leader); | 5579 | err = PTR_ERR(group_leader); |
5146 | goto err_put_context; | 5580 | goto err_fd; |
5147 | } | 5581 | } |
5148 | group_file = group_leader->filp; | 5582 | group_file = group_leader->filp; |
5149 | if (flags & PERF_FLAG_FD_OUTPUT) | 5583 | if (flags & PERF_FLAG_FD_OUTPUT) |
@@ -5152,6 +5586,58 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5152 | group_leader = NULL; | 5586 | group_leader = NULL; |
5153 | } | 5587 | } |
5154 | 5588 | ||
5589 | event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL); | ||
5590 | if (IS_ERR(event)) { | ||
5591 | err = PTR_ERR(event); | ||
5592 | goto err_fd; | ||
5593 | } | ||
5594 | |||
5595 | /* | ||
5596 | * Special case software events and allow them to be part of | ||
5597 | * any hardware group. | ||
5598 | */ | ||
5599 | pmu = event->pmu; | ||
5600 | |||
5601 | if (group_leader && | ||
5602 | (is_software_event(event) != is_software_event(group_leader))) { | ||
5603 | if (is_software_event(event)) { | ||
5604 | /* | ||
5605 | * If event and group_leader are not both a software | ||
5606 | * event, and event is, then group leader is not. | ||
5607 | * | ||
5608 | * Allow the addition of software events to !software | ||
5609 | * groups, this is safe because software events never | ||
5610 | * fail to schedule. | ||
5611 | */ | ||
5612 | pmu = group_leader->pmu; | ||
5613 | } else if (is_software_event(group_leader) && | ||
5614 | (group_leader->group_flags & PERF_GROUP_SOFTWARE)) { | ||
5615 | /* | ||
5616 | * In case the group is a pure software group, and we | ||
5617 | * try to add a hardware event, move the whole group to | ||
5618 | * the hardware context. | ||
5619 | */ | ||
5620 | move_group = 1; | ||
5621 | } | ||
5622 | } | ||
5623 | |||
5624 | if (pid != -1) { | ||
5625 | task = find_lively_task_by_vpid(pid); | ||
5626 | if (IS_ERR(task)) { | ||
5627 | err = PTR_ERR(task); | ||
5628 | goto err_group_fd; | ||
5629 | } | ||
5630 | } | ||
5631 | |||
5632 | /* | ||
5633 | * Get the target context (task or percpu): | ||
5634 | */ | ||
5635 | ctx = find_get_context(pmu, task, cpu); | ||
5636 | if (IS_ERR(ctx)) { | ||
5637 | err = PTR_ERR(ctx); | ||
5638 | goto err_group_fd; | ||
5639 | } | ||
5640 | |||
5155 | /* | 5641 | /* |
5156 | * Look up the group leader (we will attach this event to it): | 5642 | * Look up the group leader (we will attach this event to it): |
5157 | */ | 5643 | */ |
@@ -5163,42 +5649,66 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5163 | * becoming part of another group-sibling): | 5649 | * becoming part of another group-sibling): |
5164 | */ | 5650 | */ |
5165 | if (group_leader->group_leader != group_leader) | 5651 | if (group_leader->group_leader != group_leader) |
5166 | goto err_put_context; | 5652 | goto err_context; |
5167 | /* | 5653 | /* |
5168 | * Do not allow to attach to a group in a different | 5654 | * Do not allow to attach to a group in a different |
5169 | * task or CPU context: | 5655 | * task or CPU context: |
5170 | */ | 5656 | */ |
5171 | if (group_leader->ctx != ctx) | 5657 | if (move_group) { |
5172 | goto err_put_context; | 5658 | if (group_leader->ctx->type != ctx->type) |
5659 | goto err_context; | ||
5660 | } else { | ||
5661 | if (group_leader->ctx != ctx) | ||
5662 | goto err_context; | ||
5663 | } | ||
5664 | |||
5173 | /* | 5665 | /* |
5174 | * Only a group leader can be exclusive or pinned | 5666 | * Only a group leader can be exclusive or pinned |
5175 | */ | 5667 | */ |
5176 | if (attr.exclusive || attr.pinned) | 5668 | if (attr.exclusive || attr.pinned) |
5177 | goto err_put_context; | 5669 | goto err_context; |
5178 | } | ||
5179 | |||
5180 | event = perf_event_alloc(&attr, cpu, ctx, group_leader, | ||
5181 | NULL, NULL, GFP_KERNEL); | ||
5182 | if (IS_ERR(event)) { | ||
5183 | err = PTR_ERR(event); | ||
5184 | goto err_put_context; | ||
5185 | } | 5670 | } |
5186 | 5671 | ||
5187 | if (output_event) { | 5672 | if (output_event) { |
5188 | err = perf_event_set_output(event, output_event); | 5673 | err = perf_event_set_output(event, output_event); |
5189 | if (err) | 5674 | if (err) |
5190 | goto err_free_put_context; | 5675 | goto err_context; |
5191 | } | 5676 | } |
5192 | 5677 | ||
5193 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); | 5678 | event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); |
5194 | if (IS_ERR(event_file)) { | 5679 | if (IS_ERR(event_file)) { |
5195 | err = PTR_ERR(event_file); | 5680 | err = PTR_ERR(event_file); |
5196 | goto err_free_put_context; | 5681 | goto err_context; |
5682 | } | ||
5683 | |||
5684 | if (move_group) { | ||
5685 | struct perf_event_context *gctx = group_leader->ctx; | ||
5686 | |||
5687 | mutex_lock(&gctx->mutex); | ||
5688 | perf_event_remove_from_context(group_leader); | ||
5689 | list_for_each_entry(sibling, &group_leader->sibling_list, | ||
5690 | group_entry) { | ||
5691 | perf_event_remove_from_context(sibling); | ||
5692 | put_ctx(gctx); | ||
5693 | } | ||
5694 | mutex_unlock(&gctx->mutex); | ||
5695 | put_ctx(gctx); | ||
5197 | } | 5696 | } |
5198 | 5697 | ||
5199 | event->filp = event_file; | 5698 | event->filp = event_file; |
5200 | WARN_ON_ONCE(ctx->parent_ctx); | 5699 | WARN_ON_ONCE(ctx->parent_ctx); |
5201 | mutex_lock(&ctx->mutex); | 5700 | mutex_lock(&ctx->mutex); |
5701 | |||
5702 | if (move_group) { | ||
5703 | perf_install_in_context(ctx, group_leader, cpu); | ||
5704 | get_ctx(ctx); | ||
5705 | list_for_each_entry(sibling, &group_leader->sibling_list, | ||
5706 | group_entry) { | ||
5707 | perf_install_in_context(ctx, sibling, cpu); | ||
5708 | get_ctx(ctx); | ||
5709 | } | ||
5710 | } | ||
5711 | |||
5202 | perf_install_in_context(ctx, event, cpu); | 5712 | perf_install_in_context(ctx, event, cpu); |
5203 | ++ctx->generation; | 5713 | ++ctx->generation; |
5204 | mutex_unlock(&ctx->mutex); | 5714 | mutex_unlock(&ctx->mutex); |
@@ -5219,11 +5729,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5219 | fd_install(event_fd, event_file); | 5729 | fd_install(event_fd, event_file); |
5220 | return event_fd; | 5730 | return event_fd; |
5221 | 5731 | ||
5222 | err_free_put_context: | 5732 | err_context: |
5223 | free_event(event); | ||
5224 | err_put_context: | ||
5225 | fput_light(group_file, fput_needed); | ||
5226 | put_ctx(ctx); | 5733 | put_ctx(ctx); |
5734 | err_group_fd: | ||
5735 | fput_light(group_file, fput_needed); | ||
5736 | free_event(event); | ||
5227 | err_fd: | 5737 | err_fd: |
5228 | put_unused_fd(event_fd); | 5738 | put_unused_fd(event_fd); |
5229 | return err; | 5739 | return err; |
@@ -5234,32 +5744,31 @@ err_fd: | |||
5234 | * | 5744 | * |
5235 | * @attr: attributes of the counter to create | 5745 | * @attr: attributes of the counter to create |
5236 | * @cpu: cpu in which the counter is bound | 5746 | * @cpu: cpu in which the counter is bound |
5237 | * @pid: task to profile | 5747 | * @task: task to profile (NULL for percpu) |
5238 | */ | 5748 | */ |
5239 | struct perf_event * | 5749 | struct perf_event * |
5240 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | 5750 | perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, |
5241 | pid_t pid, | 5751 | struct task_struct *task, |
5242 | perf_overflow_handler_t overflow_handler) | 5752 | perf_overflow_handler_t overflow_handler) |
5243 | { | 5753 | { |
5244 | struct perf_event *event; | ||
5245 | struct perf_event_context *ctx; | 5754 | struct perf_event_context *ctx; |
5755 | struct perf_event *event; | ||
5246 | int err; | 5756 | int err; |
5247 | 5757 | ||
5248 | /* | 5758 | /* |
5249 | * Get the target context (task or percpu): | 5759 | * Get the target context (task or percpu): |
5250 | */ | 5760 | */ |
5251 | 5761 | ||
5252 | ctx = find_get_context(pid, cpu); | 5762 | event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler); |
5253 | if (IS_ERR(ctx)) { | ||
5254 | err = PTR_ERR(ctx); | ||
5255 | goto err_exit; | ||
5256 | } | ||
5257 | |||
5258 | event = perf_event_alloc(attr, cpu, ctx, NULL, | ||
5259 | NULL, overflow_handler, GFP_KERNEL); | ||
5260 | if (IS_ERR(event)) { | 5763 | if (IS_ERR(event)) { |
5261 | err = PTR_ERR(event); | 5764 | err = PTR_ERR(event); |
5262 | goto err_put_context; | 5765 | goto err; |
5766 | } | ||
5767 | |||
5768 | ctx = find_get_context(event->pmu, task, cpu); | ||
5769 | if (IS_ERR(ctx)) { | ||
5770 | err = PTR_ERR(ctx); | ||
5771 | goto err_free; | ||
5263 | } | 5772 | } |
5264 | 5773 | ||
5265 | event->filp = NULL; | 5774 | event->filp = NULL; |
@@ -5277,112 +5786,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5277 | 5786 | ||
5278 | return event; | 5787 | return event; |
5279 | 5788 | ||
5280 | err_put_context: | 5789 | err_free: |
5281 | put_ctx(ctx); | 5790 | free_event(event); |
5282 | err_exit: | 5791 | err: |
5283 | return ERR_PTR(err); | 5792 | return ERR_PTR(err); |
5284 | } | 5793 | } |
5285 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | 5794 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); |
5286 | 5795 | ||
5287 | /* | ||
5288 | * inherit a event from parent task to child task: | ||
5289 | */ | ||
5290 | static struct perf_event * | ||
5291 | inherit_event(struct perf_event *parent_event, | ||
5292 | struct task_struct *parent, | ||
5293 | struct perf_event_context *parent_ctx, | ||
5294 | struct task_struct *child, | ||
5295 | struct perf_event *group_leader, | ||
5296 | struct perf_event_context *child_ctx) | ||
5297 | { | ||
5298 | struct perf_event *child_event; | ||
5299 | |||
5300 | /* | ||
5301 | * Instead of creating recursive hierarchies of events, | ||
5302 | * we link inherited events back to the original parent, | ||
5303 | * which has a filp for sure, which we use as the reference | ||
5304 | * count: | ||
5305 | */ | ||
5306 | if (parent_event->parent) | ||
5307 | parent_event = parent_event->parent; | ||
5308 | |||
5309 | child_event = perf_event_alloc(&parent_event->attr, | ||
5310 | parent_event->cpu, child_ctx, | ||
5311 | group_leader, parent_event, | ||
5312 | NULL, GFP_KERNEL); | ||
5313 | if (IS_ERR(child_event)) | ||
5314 | return child_event; | ||
5315 | get_ctx(child_ctx); | ||
5316 | |||
5317 | /* | ||
5318 | * Make the child state follow the state of the parent event, | ||
5319 | * not its attr.disabled bit. We hold the parent's mutex, | ||
5320 | * so we won't race with perf_event_{en, dis}able_family. | ||
5321 | */ | ||
5322 | if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) | ||
5323 | child_event->state = PERF_EVENT_STATE_INACTIVE; | ||
5324 | else | ||
5325 | child_event->state = PERF_EVENT_STATE_OFF; | ||
5326 | |||
5327 | if (parent_event->attr.freq) { | ||
5328 | u64 sample_period = parent_event->hw.sample_period; | ||
5329 | struct hw_perf_event *hwc = &child_event->hw; | ||
5330 | |||
5331 | hwc->sample_period = sample_period; | ||
5332 | hwc->last_period = sample_period; | ||
5333 | |||
5334 | local64_set(&hwc->period_left, sample_period); | ||
5335 | } | ||
5336 | |||
5337 | child_event->overflow_handler = parent_event->overflow_handler; | ||
5338 | |||
5339 | /* | ||
5340 | * Link it up in the child's context: | ||
5341 | */ | ||
5342 | add_event_to_ctx(child_event, child_ctx); | ||
5343 | |||
5344 | /* | ||
5345 | * Get a reference to the parent filp - we will fput it | ||
5346 | * when the child event exits. This is safe to do because | ||
5347 | * we are in the parent and we know that the filp still | ||
5348 | * exists and has a nonzero count: | ||
5349 | */ | ||
5350 | atomic_long_inc(&parent_event->filp->f_count); | ||
5351 | |||
5352 | /* | ||
5353 | * Link this into the parent event's child list | ||
5354 | */ | ||
5355 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | ||
5356 | mutex_lock(&parent_event->child_mutex); | ||
5357 | list_add_tail(&child_event->child_list, &parent_event->child_list); | ||
5358 | mutex_unlock(&parent_event->child_mutex); | ||
5359 | |||
5360 | return child_event; | ||
5361 | } | ||
5362 | |||
5363 | static int inherit_group(struct perf_event *parent_event, | ||
5364 | struct task_struct *parent, | ||
5365 | struct perf_event_context *parent_ctx, | ||
5366 | struct task_struct *child, | ||
5367 | struct perf_event_context *child_ctx) | ||
5368 | { | ||
5369 | struct perf_event *leader; | ||
5370 | struct perf_event *sub; | ||
5371 | struct perf_event *child_ctr; | ||
5372 | |||
5373 | leader = inherit_event(parent_event, parent, parent_ctx, | ||
5374 | child, NULL, child_ctx); | ||
5375 | if (IS_ERR(leader)) | ||
5376 | return PTR_ERR(leader); | ||
5377 | list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { | ||
5378 | child_ctr = inherit_event(sub, parent, parent_ctx, | ||
5379 | child, leader, child_ctx); | ||
5380 | if (IS_ERR(child_ctr)) | ||
5381 | return PTR_ERR(child_ctr); | ||
5382 | } | ||
5383 | return 0; | ||
5384 | } | ||
5385 | |||
5386 | static void sync_child_event(struct perf_event *child_event, | 5796 | static void sync_child_event(struct perf_event *child_event, |
5387 | struct task_struct *child) | 5797 | struct task_struct *child) |
5388 | { | 5798 | { |
@@ -5439,16 +5849,13 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
5439 | } | 5849 | } |
5440 | } | 5850 | } |
5441 | 5851 | ||
5442 | /* | 5852 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) |
5443 | * When a child task exits, feed back event values to parent events. | ||
5444 | */ | ||
5445 | void perf_event_exit_task(struct task_struct *child) | ||
5446 | { | 5853 | { |
5447 | struct perf_event *child_event, *tmp; | 5854 | struct perf_event *child_event, *tmp; |
5448 | struct perf_event_context *child_ctx; | 5855 | struct perf_event_context *child_ctx; |
5449 | unsigned long flags; | 5856 | unsigned long flags; |
5450 | 5857 | ||
5451 | if (likely(!child->perf_event_ctxp)) { | 5858 | if (likely(!child->perf_event_ctxp[ctxn])) { |
5452 | perf_event_task(child, NULL, 0); | 5859 | perf_event_task(child, NULL, 0); |
5453 | return; | 5860 | return; |
5454 | } | 5861 | } |
@@ -5460,7 +5867,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
5460 | * scheduled, so we are now safe from rescheduling changing | 5867 | * scheduled, so we are now safe from rescheduling changing |
5461 | * our context. | 5868 | * our context. |
5462 | */ | 5869 | */ |
5463 | child_ctx = child->perf_event_ctxp; | 5870 | child_ctx = child->perf_event_ctxp[ctxn]; |
5464 | __perf_event_task_sched_out(child_ctx); | 5871 | __perf_event_task_sched_out(child_ctx); |
5465 | 5872 | ||
5466 | /* | 5873 | /* |
@@ -5469,7 +5876,7 @@ void perf_event_exit_task(struct task_struct *child) | |||
5469 | * incremented the context's refcount before we do put_ctx below. | 5876 | * incremented the context's refcount before we do put_ctx below. |
5470 | */ | 5877 | */ |
5471 | raw_spin_lock(&child_ctx->lock); | 5878 | raw_spin_lock(&child_ctx->lock); |
5472 | child->perf_event_ctxp = NULL; | 5879 | child->perf_event_ctxp[ctxn] = NULL; |
5473 | /* | 5880 | /* |
5474 | * If this context is a clone; unclone it so it can't get | 5881 | * If this context is a clone; unclone it so it can't get |
5475 | * swapped to another process while we're removing all | 5882 | * swapped to another process while we're removing all |
@@ -5522,6 +5929,17 @@ again: | |||
5522 | put_ctx(child_ctx); | 5929 | put_ctx(child_ctx); |
5523 | } | 5930 | } |
5524 | 5931 | ||
5932 | /* | ||
5933 | * When a child task exits, feed back event values to parent events. | ||
5934 | */ | ||
5935 | void perf_event_exit_task(struct task_struct *child) | ||
5936 | { | ||
5937 | int ctxn; | ||
5938 | |||
5939 | for_each_task_context_nr(ctxn) | ||
5940 | perf_event_exit_task_context(child, ctxn); | ||
5941 | } | ||
5942 | |||
5525 | static void perf_free_event(struct perf_event *event, | 5943 | static void perf_free_event(struct perf_event *event, |
5526 | struct perf_event_context *ctx) | 5944 | struct perf_event_context *ctx) |
5527 | { | 5945 | { |
@@ -5543,48 +5961,165 @@ static void perf_free_event(struct perf_event *event, | |||
5543 | 5961 | ||
5544 | /* | 5962 | /* |
5545 | * free an unexposed, unused context as created by inheritance by | 5963 | * free an unexposed, unused context as created by inheritance by |
5546 | * init_task below, used by fork() in case of fail. | 5964 | * perf_event_init_task below, used by fork() in case of fail. |
5547 | */ | 5965 | */ |
5548 | void perf_event_free_task(struct task_struct *task) | 5966 | void perf_event_free_task(struct task_struct *task) |
5549 | { | 5967 | { |
5550 | struct perf_event_context *ctx = task->perf_event_ctxp; | 5968 | struct perf_event_context *ctx; |
5551 | struct perf_event *event, *tmp; | 5969 | struct perf_event *event, *tmp; |
5970 | int ctxn; | ||
5552 | 5971 | ||
5553 | if (!ctx) | 5972 | for_each_task_context_nr(ctxn) { |
5554 | return; | 5973 | ctx = task->perf_event_ctxp[ctxn]; |
5974 | if (!ctx) | ||
5975 | continue; | ||
5555 | 5976 | ||
5556 | mutex_lock(&ctx->mutex); | 5977 | mutex_lock(&ctx->mutex); |
5557 | again: | 5978 | again: |
5558 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 5979 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, |
5559 | perf_free_event(event, ctx); | 5980 | group_entry) |
5981 | perf_free_event(event, ctx); | ||
5560 | 5982 | ||
5561 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, | 5983 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5562 | group_entry) | 5984 | group_entry) |
5563 | perf_free_event(event, ctx); | 5985 | perf_free_event(event, ctx); |
5564 | 5986 | ||
5565 | if (!list_empty(&ctx->pinned_groups) || | 5987 | if (!list_empty(&ctx->pinned_groups) || |
5566 | !list_empty(&ctx->flexible_groups)) | 5988 | !list_empty(&ctx->flexible_groups)) |
5567 | goto again; | 5989 | goto again; |
5568 | 5990 | ||
5569 | mutex_unlock(&ctx->mutex); | 5991 | mutex_unlock(&ctx->mutex); |
5570 | 5992 | ||
5571 | put_ctx(ctx); | 5993 | put_ctx(ctx); |
5994 | } | ||
5995 | } | ||
5996 | |||
5997 | void perf_event_delayed_put(struct task_struct *task) | ||
5998 | { | ||
5999 | int ctxn; | ||
6000 | |||
6001 | for_each_task_context_nr(ctxn) | ||
6002 | WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); | ||
6003 | } | ||
6004 | |||
6005 | /* | ||
6006 | * inherit a event from parent task to child task: | ||
6007 | */ | ||
6008 | static struct perf_event * | ||
6009 | inherit_event(struct perf_event *parent_event, | ||
6010 | struct task_struct *parent, | ||
6011 | struct perf_event_context *parent_ctx, | ||
6012 | struct task_struct *child, | ||
6013 | struct perf_event *group_leader, | ||
6014 | struct perf_event_context *child_ctx) | ||
6015 | { | ||
6016 | struct perf_event *child_event; | ||
6017 | unsigned long flags; | ||
6018 | |||
6019 | /* | ||
6020 | * Instead of creating recursive hierarchies of events, | ||
6021 | * we link inherited events back to the original parent, | ||
6022 | * which has a filp for sure, which we use as the reference | ||
6023 | * count: | ||
6024 | */ | ||
6025 | if (parent_event->parent) | ||
6026 | parent_event = parent_event->parent; | ||
6027 | |||
6028 | child_event = perf_event_alloc(&parent_event->attr, | ||
6029 | parent_event->cpu, | ||
6030 | group_leader, parent_event, | ||
6031 | NULL); | ||
6032 | if (IS_ERR(child_event)) | ||
6033 | return child_event; | ||
6034 | get_ctx(child_ctx); | ||
6035 | |||
6036 | /* | ||
6037 | * Make the child state follow the state of the parent event, | ||
6038 | * not its attr.disabled bit. We hold the parent's mutex, | ||
6039 | * so we won't race with perf_event_{en, dis}able_family. | ||
6040 | */ | ||
6041 | if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) | ||
6042 | child_event->state = PERF_EVENT_STATE_INACTIVE; | ||
6043 | else | ||
6044 | child_event->state = PERF_EVENT_STATE_OFF; | ||
6045 | |||
6046 | if (parent_event->attr.freq) { | ||
6047 | u64 sample_period = parent_event->hw.sample_period; | ||
6048 | struct hw_perf_event *hwc = &child_event->hw; | ||
6049 | |||
6050 | hwc->sample_period = sample_period; | ||
6051 | hwc->last_period = sample_period; | ||
6052 | |||
6053 | local64_set(&hwc->period_left, sample_period); | ||
6054 | } | ||
6055 | |||
6056 | child_event->ctx = child_ctx; | ||
6057 | child_event->overflow_handler = parent_event->overflow_handler; | ||
6058 | |||
6059 | /* | ||
6060 | * Link it up in the child's context: | ||
6061 | */ | ||
6062 | raw_spin_lock_irqsave(&child_ctx->lock, flags); | ||
6063 | add_event_to_ctx(child_event, child_ctx); | ||
6064 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | ||
6065 | |||
6066 | /* | ||
6067 | * Get a reference to the parent filp - we will fput it | ||
6068 | * when the child event exits. This is safe to do because | ||
6069 | * we are in the parent and we know that the filp still | ||
6070 | * exists and has a nonzero count: | ||
6071 | */ | ||
6072 | atomic_long_inc(&parent_event->filp->f_count); | ||
6073 | |||
6074 | /* | ||
6075 | * Link this into the parent event's child list | ||
6076 | */ | ||
6077 | WARN_ON_ONCE(parent_event->ctx->parent_ctx); | ||
6078 | mutex_lock(&parent_event->child_mutex); | ||
6079 | list_add_tail(&child_event->child_list, &parent_event->child_list); | ||
6080 | mutex_unlock(&parent_event->child_mutex); | ||
6081 | |||
6082 | return child_event; | ||
6083 | } | ||
6084 | |||
6085 | static int inherit_group(struct perf_event *parent_event, | ||
6086 | struct task_struct *parent, | ||
6087 | struct perf_event_context *parent_ctx, | ||
6088 | struct task_struct *child, | ||
6089 | struct perf_event_context *child_ctx) | ||
6090 | { | ||
6091 | struct perf_event *leader; | ||
6092 | struct perf_event *sub; | ||
6093 | struct perf_event *child_ctr; | ||
6094 | |||
6095 | leader = inherit_event(parent_event, parent, parent_ctx, | ||
6096 | child, NULL, child_ctx); | ||
6097 | if (IS_ERR(leader)) | ||
6098 | return PTR_ERR(leader); | ||
6099 | list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { | ||
6100 | child_ctr = inherit_event(sub, parent, parent_ctx, | ||
6101 | child, leader, child_ctx); | ||
6102 | if (IS_ERR(child_ctr)) | ||
6103 | return PTR_ERR(child_ctr); | ||
6104 | } | ||
6105 | return 0; | ||
5572 | } | 6106 | } |
5573 | 6107 | ||
5574 | static int | 6108 | static int |
5575 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | 6109 | inherit_task_group(struct perf_event *event, struct task_struct *parent, |
5576 | struct perf_event_context *parent_ctx, | 6110 | struct perf_event_context *parent_ctx, |
5577 | struct task_struct *child, | 6111 | struct task_struct *child, int ctxn, |
5578 | int *inherited_all) | 6112 | int *inherited_all) |
5579 | { | 6113 | { |
5580 | int ret; | 6114 | int ret; |
5581 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | 6115 | struct perf_event_context *child_ctx; |
5582 | 6116 | ||
5583 | if (!event->attr.inherit) { | 6117 | if (!event->attr.inherit) { |
5584 | *inherited_all = 0; | 6118 | *inherited_all = 0; |
5585 | return 0; | 6119 | return 0; |
5586 | } | 6120 | } |
5587 | 6121 | ||
6122 | child_ctx = child->perf_event_ctxp[ctxn]; | ||
5588 | if (!child_ctx) { | 6123 | if (!child_ctx) { |
5589 | /* | 6124 | /* |
5590 | * This is executed from the parent task context, so | 6125 | * This is executed from the parent task context, so |
@@ -5593,14 +6128,11 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
5593 | * child. | 6128 | * child. |
5594 | */ | 6129 | */ |
5595 | 6130 | ||
5596 | child_ctx = kzalloc(sizeof(struct perf_event_context), | 6131 | child_ctx = alloc_perf_context(event->pmu, child); |
5597 | GFP_KERNEL); | ||
5598 | if (!child_ctx) | 6132 | if (!child_ctx) |
5599 | return -ENOMEM; | 6133 | return -ENOMEM; |
5600 | 6134 | ||
5601 | __perf_event_init_context(child_ctx, child); | 6135 | child->perf_event_ctxp[ctxn] = child_ctx; |
5602 | child->perf_event_ctxp = child_ctx; | ||
5603 | get_task_struct(child); | ||
5604 | } | 6136 | } |
5605 | 6137 | ||
5606 | ret = inherit_group(event, parent, parent_ctx, | 6138 | ret = inherit_group(event, parent, parent_ctx, |
@@ -5612,11 +6144,10 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
5612 | return ret; | 6144 | return ret; |
5613 | } | 6145 | } |
5614 | 6146 | ||
5615 | |||
5616 | /* | 6147 | /* |
5617 | * Initialize the perf_event context in task_struct | 6148 | * Initialize the perf_event context in task_struct |
5618 | */ | 6149 | */ |
5619 | int perf_event_init_task(struct task_struct *child) | 6150 | int perf_event_init_context(struct task_struct *child, int ctxn) |
5620 | { | 6151 | { |
5621 | struct perf_event_context *child_ctx, *parent_ctx; | 6152 | struct perf_event_context *child_ctx, *parent_ctx; |
5622 | struct perf_event_context *cloned_ctx; | 6153 | struct perf_event_context *cloned_ctx; |
@@ -5625,19 +6156,19 @@ int perf_event_init_task(struct task_struct *child) | |||
5625 | int inherited_all = 1; | 6156 | int inherited_all = 1; |
5626 | int ret = 0; | 6157 | int ret = 0; |
5627 | 6158 | ||
5628 | child->perf_event_ctxp = NULL; | 6159 | child->perf_event_ctxp[ctxn] = NULL; |
5629 | 6160 | ||
5630 | mutex_init(&child->perf_event_mutex); | 6161 | mutex_init(&child->perf_event_mutex); |
5631 | INIT_LIST_HEAD(&child->perf_event_list); | 6162 | INIT_LIST_HEAD(&child->perf_event_list); |
5632 | 6163 | ||
5633 | if (likely(!parent->perf_event_ctxp)) | 6164 | if (likely(!parent->perf_event_ctxp[ctxn])) |
5634 | return 0; | 6165 | return 0; |
5635 | 6166 | ||
5636 | /* | 6167 | /* |
5637 | * If the parent's context is a clone, pin it so it won't get | 6168 | * If the parent's context is a clone, pin it so it won't get |
5638 | * swapped under us. | 6169 | * swapped under us. |
5639 | */ | 6170 | */ |
5640 | parent_ctx = perf_pin_task_context(parent); | 6171 | parent_ctx = perf_pin_task_context(parent, ctxn); |
5641 | 6172 | ||
5642 | /* | 6173 | /* |
5643 | * No need to check if parent_ctx != NULL here; since we saw | 6174 | * No need to check if parent_ctx != NULL here; since we saw |
@@ -5657,20 +6188,20 @@ int perf_event_init_task(struct task_struct *child) | |||
5657 | * the list, not manipulating it: | 6188 | * the list, not manipulating it: |
5658 | */ | 6189 | */ |
5659 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { | 6190 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5660 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6191 | ret = inherit_task_group(event, parent, parent_ctx, |
5661 | &inherited_all); | 6192 | child, ctxn, &inherited_all); |
5662 | if (ret) | 6193 | if (ret) |
5663 | break; | 6194 | break; |
5664 | } | 6195 | } |
5665 | 6196 | ||
5666 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6197 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5667 | ret = inherit_task_group(event, parent, parent_ctx, child, | 6198 | ret = inherit_task_group(event, parent, parent_ctx, |
5668 | &inherited_all); | 6199 | child, ctxn, &inherited_all); |
5669 | if (ret) | 6200 | if (ret) |
5670 | break; | 6201 | break; |
5671 | } | 6202 | } |
5672 | 6203 | ||
5673 | child_ctx = child->perf_event_ctxp; | 6204 | child_ctx = child->perf_event_ctxp[ctxn]; |
5674 | 6205 | ||
5675 | if (child_ctx && inherited_all) { | 6206 | if (child_ctx && inherited_all) { |
5676 | /* | 6207 | /* |
@@ -5699,63 +6230,98 @@ int perf_event_init_task(struct task_struct *child) | |||
5699 | return ret; | 6230 | return ret; |
5700 | } | 6231 | } |
5701 | 6232 | ||
6233 | /* | ||
6234 | * Initialize the perf_event context in task_struct | ||
6235 | */ | ||
6236 | int perf_event_init_task(struct task_struct *child) | ||
6237 | { | ||
6238 | int ctxn, ret; | ||
6239 | |||
6240 | for_each_task_context_nr(ctxn) { | ||
6241 | ret = perf_event_init_context(child, ctxn); | ||
6242 | if (ret) | ||
6243 | return ret; | ||
6244 | } | ||
6245 | |||
6246 | return 0; | ||
6247 | } | ||
6248 | |||
5702 | static void __init perf_event_init_all_cpus(void) | 6249 | static void __init perf_event_init_all_cpus(void) |
5703 | { | 6250 | { |
6251 | struct swevent_htable *swhash; | ||
5704 | int cpu; | 6252 | int cpu; |
5705 | struct perf_cpu_context *cpuctx; | ||
5706 | 6253 | ||
5707 | for_each_possible_cpu(cpu) { | 6254 | for_each_possible_cpu(cpu) { |
5708 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 6255 | swhash = &per_cpu(swevent_htable, cpu); |
5709 | mutex_init(&cpuctx->hlist_mutex); | 6256 | mutex_init(&swhash->hlist_mutex); |
5710 | __perf_event_init_context(&cpuctx->ctx, NULL); | 6257 | INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); |
5711 | } | 6258 | } |
5712 | } | 6259 | } |
5713 | 6260 | ||
5714 | static void __cpuinit perf_event_init_cpu(int cpu) | 6261 | static void __cpuinit perf_event_init_cpu(int cpu) |
5715 | { | 6262 | { |
5716 | struct perf_cpu_context *cpuctx; | 6263 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
5717 | |||
5718 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
5719 | |||
5720 | spin_lock(&perf_resource_lock); | ||
5721 | cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; | ||
5722 | spin_unlock(&perf_resource_lock); | ||
5723 | 6264 | ||
5724 | mutex_lock(&cpuctx->hlist_mutex); | 6265 | mutex_lock(&swhash->hlist_mutex); |
5725 | if (cpuctx->hlist_refcount > 0) { | 6266 | if (swhash->hlist_refcount > 0) { |
5726 | struct swevent_hlist *hlist; | 6267 | struct swevent_hlist *hlist; |
5727 | 6268 | ||
5728 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 6269 | hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); |
5729 | WARN_ON_ONCE(!hlist); | 6270 | WARN_ON(!hlist); |
5730 | rcu_assign_pointer(cpuctx->swevent_hlist, hlist); | 6271 | rcu_assign_pointer(swhash->swevent_hlist, hlist); |
5731 | } | 6272 | } |
5732 | mutex_unlock(&cpuctx->hlist_mutex); | 6273 | mutex_unlock(&swhash->hlist_mutex); |
5733 | } | 6274 | } |
5734 | 6275 | ||
5735 | #ifdef CONFIG_HOTPLUG_CPU | 6276 | #ifdef CONFIG_HOTPLUG_CPU |
5736 | static void __perf_event_exit_cpu(void *info) | 6277 | static void perf_pmu_rotate_stop(struct pmu *pmu) |
5737 | { | 6278 | { |
5738 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 6279 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
5739 | struct perf_event_context *ctx = &cpuctx->ctx; | 6280 | |
6281 | WARN_ON(!irqs_disabled()); | ||
6282 | |||
6283 | list_del_init(&cpuctx->rotation_list); | ||
6284 | } | ||
6285 | |||
6286 | static void __perf_event_exit_context(void *__info) | ||
6287 | { | ||
6288 | struct perf_event_context *ctx = __info; | ||
5740 | struct perf_event *event, *tmp; | 6289 | struct perf_event *event, *tmp; |
5741 | 6290 | ||
6291 | perf_pmu_rotate_stop(ctx->pmu); | ||
6292 | |||
5742 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 6293 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5743 | __perf_event_remove_from_context(event); | 6294 | __perf_event_remove_from_context(event); |
5744 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | 6295 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) |
5745 | __perf_event_remove_from_context(event); | 6296 | __perf_event_remove_from_context(event); |
5746 | } | 6297 | } |
6298 | |||
6299 | static void perf_event_exit_cpu_context(int cpu) | ||
6300 | { | ||
6301 | struct perf_event_context *ctx; | ||
6302 | struct pmu *pmu; | ||
6303 | int idx; | ||
6304 | |||
6305 | idx = srcu_read_lock(&pmus_srcu); | ||
6306 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
6307 | ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; | ||
6308 | |||
6309 | mutex_lock(&ctx->mutex); | ||
6310 | smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); | ||
6311 | mutex_unlock(&ctx->mutex); | ||
6312 | } | ||
6313 | srcu_read_unlock(&pmus_srcu, idx); | ||
6314 | } | ||
6315 | |||
5747 | static void perf_event_exit_cpu(int cpu) | 6316 | static void perf_event_exit_cpu(int cpu) |
5748 | { | 6317 | { |
5749 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 6318 | struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); |
5750 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
5751 | 6319 | ||
5752 | mutex_lock(&cpuctx->hlist_mutex); | 6320 | mutex_lock(&swhash->hlist_mutex); |
5753 | swevent_hlist_release(cpuctx); | 6321 | swevent_hlist_release(swhash); |
5754 | mutex_unlock(&cpuctx->hlist_mutex); | 6322 | mutex_unlock(&swhash->hlist_mutex); |
5755 | 6323 | ||
5756 | mutex_lock(&ctx->mutex); | 6324 | perf_event_exit_cpu_context(cpu); |
5757 | smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); | ||
5758 | mutex_unlock(&ctx->mutex); | ||
5759 | } | 6325 | } |
5760 | #else | 6326 | #else |
5761 | static inline void perf_event_exit_cpu(int cpu) { } | 6327 | static inline void perf_event_exit_cpu(int cpu) { } |
@@ -5785,118 +6351,13 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
5785 | return NOTIFY_OK; | 6351 | return NOTIFY_OK; |
5786 | } | 6352 | } |
5787 | 6353 | ||
5788 | /* | ||
5789 | * This has to have a higher priority than migration_notifier in sched.c. | ||
5790 | */ | ||
5791 | static struct notifier_block __cpuinitdata perf_cpu_nb = { | ||
5792 | .notifier_call = perf_cpu_notify, | ||
5793 | .priority = 20, | ||
5794 | }; | ||
5795 | |||
5796 | void __init perf_event_init(void) | 6354 | void __init perf_event_init(void) |
5797 | { | 6355 | { |
5798 | perf_event_init_all_cpus(); | 6356 | perf_event_init_all_cpus(); |
5799 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, | 6357 | init_srcu_struct(&pmus_srcu); |
5800 | (void *)(long)smp_processor_id()); | 6358 | perf_pmu_register(&perf_swevent); |
5801 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, | 6359 | perf_pmu_register(&perf_cpu_clock); |
5802 | (void *)(long)smp_processor_id()); | 6360 | perf_pmu_register(&perf_task_clock); |
5803 | register_cpu_notifier(&perf_cpu_nb); | 6361 | perf_tp_register(); |
5804 | } | 6362 | perf_cpu_notifier(perf_cpu_notify); |
5805 | |||
5806 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, | ||
5807 | struct sysdev_class_attribute *attr, | ||
5808 | char *buf) | ||
5809 | { | ||
5810 | return sprintf(buf, "%d\n", perf_reserved_percpu); | ||
5811 | } | ||
5812 | |||
5813 | static ssize_t | ||
5814 | perf_set_reserve_percpu(struct sysdev_class *class, | ||
5815 | struct sysdev_class_attribute *attr, | ||
5816 | const char *buf, | ||
5817 | size_t count) | ||
5818 | { | ||
5819 | struct perf_cpu_context *cpuctx; | ||
5820 | unsigned long val; | ||
5821 | int err, cpu, mpt; | ||
5822 | |||
5823 | err = strict_strtoul(buf, 10, &val); | ||
5824 | if (err) | ||
5825 | return err; | ||
5826 | if (val > perf_max_events) | ||
5827 | return -EINVAL; | ||
5828 | |||
5829 | spin_lock(&perf_resource_lock); | ||
5830 | perf_reserved_percpu = val; | ||
5831 | for_each_online_cpu(cpu) { | ||
5832 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
5833 | raw_spin_lock_irq(&cpuctx->ctx.lock); | ||
5834 | mpt = min(perf_max_events - cpuctx->ctx.nr_events, | ||
5835 | perf_max_events - perf_reserved_percpu); | ||
5836 | cpuctx->max_pertask = mpt; | ||
5837 | raw_spin_unlock_irq(&cpuctx->ctx.lock); | ||
5838 | } | ||
5839 | spin_unlock(&perf_resource_lock); | ||
5840 | |||
5841 | return count; | ||
5842 | } | ||
5843 | |||
5844 | static ssize_t perf_show_overcommit(struct sysdev_class *class, | ||
5845 | struct sysdev_class_attribute *attr, | ||
5846 | char *buf) | ||
5847 | { | ||
5848 | return sprintf(buf, "%d\n", perf_overcommit); | ||
5849 | } | ||
5850 | |||
5851 | static ssize_t | ||
5852 | perf_set_overcommit(struct sysdev_class *class, | ||
5853 | struct sysdev_class_attribute *attr, | ||
5854 | const char *buf, size_t count) | ||
5855 | { | ||
5856 | unsigned long val; | ||
5857 | int err; | ||
5858 | |||
5859 | err = strict_strtoul(buf, 10, &val); | ||
5860 | if (err) | ||
5861 | return err; | ||
5862 | if (val > 1) | ||
5863 | return -EINVAL; | ||
5864 | |||
5865 | spin_lock(&perf_resource_lock); | ||
5866 | perf_overcommit = val; | ||
5867 | spin_unlock(&perf_resource_lock); | ||
5868 | |||
5869 | return count; | ||
5870 | } | ||
5871 | |||
5872 | static SYSDEV_CLASS_ATTR( | ||
5873 | reserve_percpu, | ||
5874 | 0644, | ||
5875 | perf_show_reserve_percpu, | ||
5876 | perf_set_reserve_percpu | ||
5877 | ); | ||
5878 | |||
5879 | static SYSDEV_CLASS_ATTR( | ||
5880 | overcommit, | ||
5881 | 0644, | ||
5882 | perf_show_overcommit, | ||
5883 | perf_set_overcommit | ||
5884 | ); | ||
5885 | |||
5886 | static struct attribute *perfclass_attrs[] = { | ||
5887 | &attr_reserve_percpu.attr, | ||
5888 | &attr_overcommit.attr, | ||
5889 | NULL | ||
5890 | }; | ||
5891 | |||
5892 | static struct attribute_group perfclass_attr_group = { | ||
5893 | .attrs = perfclass_attrs, | ||
5894 | .name = "perf_events", | ||
5895 | }; | ||
5896 | |||
5897 | static int __init perf_event_sysfs_init(void) | ||
5898 | { | ||
5899 | return sysfs_create_group(&cpu_sysdev_class.kset.kobj, | ||
5900 | &perfclass_attr_group); | ||
5901 | } | 6363 | } |
5902 | device_initcall(perf_event_sysfs_init); | ||
diff --git a/kernel/sched.c b/kernel/sched.c index dc85ceb90832..c0d2067f3e0d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3584,7 +3584,7 @@ void scheduler_tick(void) | |||
3584 | curr->sched_class->task_tick(rq, curr, 0); | 3584 | curr->sched_class->task_tick(rq, curr, 0); |
3585 | raw_spin_unlock(&rq->lock); | 3585 | raw_spin_unlock(&rq->lock); |
3586 | 3586 | ||
3587 | perf_event_task_tick(curr); | 3587 | perf_event_task_tick(); |
3588 | 3588 | ||
3589 | #ifdef CONFIG_SMP | 3589 | #ifdef CONFIG_SMP |
3590 | rq->idle_at_tick = idle_cpu(cpu); | 3590 | rq->idle_at_tick = idle_cpu(cpu); |
diff --git a/kernel/smp.c b/kernel/smp.c index 75c970c715d3..ed6aacfcb7ef 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -365,9 +365,10 @@ call: | |||
365 | EXPORT_SYMBOL_GPL(smp_call_function_any); | 365 | EXPORT_SYMBOL_GPL(smp_call_function_any); |
366 | 366 | ||
367 | /** | 367 | /** |
368 | * __smp_call_function_single(): Run a function on another CPU | 368 | * __smp_call_function_single(): Run a function on a specific CPU |
369 | * @cpu: The CPU to run on. | 369 | * @cpu: The CPU to run on. |
370 | * @data: Pre-allocated and setup data structure | 370 | * @data: Pre-allocated and setup data structure |
371 | * @wait: If true, wait until function has completed on specified CPU. | ||
371 | * | 372 | * |
372 | * Like smp_call_function_single(), but allow caller to pass in a | 373 | * Like smp_call_function_single(), but allow caller to pass in a |
373 | * pre-allocated data structure. Useful for embedding @data inside | 374 | * pre-allocated data structure. Useful for embedding @data inside |
@@ -376,8 +377,10 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); | |||
376 | void __smp_call_function_single(int cpu, struct call_single_data *data, | 377 | void __smp_call_function_single(int cpu, struct call_single_data *data, |
377 | int wait) | 378 | int wait) |
378 | { | 379 | { |
379 | csd_lock(data); | 380 | unsigned int this_cpu; |
381 | unsigned long flags; | ||
380 | 382 | ||
383 | this_cpu = get_cpu(); | ||
381 | /* | 384 | /* |
382 | * Can deadlock when called with interrupts disabled. | 385 | * Can deadlock when called with interrupts disabled. |
383 | * We allow cpu's that are not yet online though, as no one else can | 386 | * We allow cpu's that are not yet online though, as no one else can |
@@ -387,7 +390,15 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
387 | WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() | 390 | WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() |
388 | && !oops_in_progress); | 391 | && !oops_in_progress); |
389 | 392 | ||
390 | generic_exec_single(cpu, data, wait); | 393 | if (cpu == this_cpu) { |
394 | local_irq_save(flags); | ||
395 | data->func(data->info); | ||
396 | local_irq_restore(flags); | ||
397 | } else { | ||
398 | csd_lock(data); | ||
399 | generic_exec_single(cpu, data, wait); | ||
400 | } | ||
401 | put_cpu(); | ||
391 | } | 402 | } |
392 | 403 | ||
393 | /** | 404 | /** |
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index 4f104515a19b..f8b11a283171 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c | |||
@@ -115,7 +115,9 @@ static int test_kprobes(void) | |||
115 | int ret; | 115 | int ret; |
116 | struct kprobe *kps[2] = {&kp, &kp2}; | 116 | struct kprobe *kps[2] = {&kp, &kp2}; |
117 | 117 | ||
118 | kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | 118 | /* addr and flags should be cleard for reusing kprobe. */ |
119 | kp.addr = NULL; | ||
120 | kp.flags = 0; | ||
119 | ret = register_kprobes(kps, 2); | 121 | ret = register_kprobes(kps, 2); |
120 | if (ret < 0) { | 122 | if (ret < 0) { |
121 | printk(KERN_ERR "Kprobe smoke test failed: " | 123 | printk(KERN_ERR "Kprobe smoke test failed: " |
@@ -210,7 +212,9 @@ static int test_jprobes(void) | |||
210 | int ret; | 212 | int ret; |
211 | struct jprobe *jps[2] = {&jp, &jp2}; | 213 | struct jprobe *jps[2] = {&jp, &jp2}; |
212 | 214 | ||
213 | jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | 215 | /* addr and flags should be cleard for reusing kprobe. */ |
216 | jp.kp.addr = NULL; | ||
217 | jp.kp.flags = 0; | ||
214 | ret = register_jprobes(jps, 2); | 218 | ret = register_jprobes(jps, 2); |
215 | if (ret < 0) { | 219 | if (ret < 0) { |
216 | printk(KERN_ERR "Kprobe smoke test failed: " | 220 | printk(KERN_ERR "Kprobe smoke test failed: " |
@@ -323,7 +327,9 @@ static int test_kretprobes(void) | |||
323 | int ret; | 327 | int ret; |
324 | struct kretprobe *rps[2] = {&rp, &rp2}; | 328 | struct kretprobe *rps[2] = {&rp, &rp2}; |
325 | 329 | ||
326 | rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | 330 | /* addr and flags should be cleard for reusing kprobe. */ |
331 | rp.kp.addr = NULL; | ||
332 | rp.kp.flags = 0; | ||
327 | ret = register_kretprobes(rps, 2); | 333 | ret = register_kretprobes(rps, 2); |
328 | if (ret < 0) { | 334 | if (ret < 0) { |
329 | printk(KERN_ERR "Kprobe smoke test failed: " | 335 | printk(KERN_ERR "Kprobe smoke test failed: " |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 538501c6ea50..e550d2eda1df 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS | |||
49 | help | 49 | help |
50 | See Documentation/trace/ftrace-design.txt | 50 | See Documentation/trace/ftrace-design.txt |
51 | 51 | ||
52 | config HAVE_C_RECORDMCOUNT | ||
53 | bool | ||
54 | help | ||
55 | C version of recordmcount available? | ||
56 | |||
52 | config TRACER_MAX_TRACE | 57 | config TRACER_MAX_TRACE |
53 | bool | 58 | bool |
54 | 59 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fa7ece649fe1..65fb077ea79c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -884,10 +884,8 @@ enum { | |||
884 | FTRACE_ENABLE_CALLS = (1 << 0), | 884 | FTRACE_ENABLE_CALLS = (1 << 0), |
885 | FTRACE_DISABLE_CALLS = (1 << 1), | 885 | FTRACE_DISABLE_CALLS = (1 << 1), |
886 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | 886 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), |
887 | FTRACE_ENABLE_MCOUNT = (1 << 3), | 887 | FTRACE_START_FUNC_RET = (1 << 3), |
888 | FTRACE_DISABLE_MCOUNT = (1 << 4), | 888 | FTRACE_STOP_FUNC_RET = (1 << 4), |
889 | FTRACE_START_FUNC_RET = (1 << 5), | ||
890 | FTRACE_STOP_FUNC_RET = (1 << 6), | ||
891 | }; | 889 | }; |
892 | 890 | ||
893 | static int ftrace_filtered; | 891 | static int ftrace_filtered; |
@@ -1226,8 +1224,6 @@ static void ftrace_shutdown(int command) | |||
1226 | 1224 | ||
1227 | static void ftrace_startup_sysctl(void) | 1225 | static void ftrace_startup_sysctl(void) |
1228 | { | 1226 | { |
1229 | int command = FTRACE_ENABLE_MCOUNT; | ||
1230 | |||
1231 | if (unlikely(ftrace_disabled)) | 1227 | if (unlikely(ftrace_disabled)) |
1232 | return; | 1228 | return; |
1233 | 1229 | ||
@@ -1235,23 +1231,17 @@ static void ftrace_startup_sysctl(void) | |||
1235 | saved_ftrace_func = NULL; | 1231 | saved_ftrace_func = NULL; |
1236 | /* ftrace_start_up is true if we want ftrace running */ | 1232 | /* ftrace_start_up is true if we want ftrace running */ |
1237 | if (ftrace_start_up) | 1233 | if (ftrace_start_up) |
1238 | command |= FTRACE_ENABLE_CALLS; | 1234 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); |
1239 | |||
1240 | ftrace_run_update_code(command); | ||
1241 | } | 1235 | } |
1242 | 1236 | ||
1243 | static void ftrace_shutdown_sysctl(void) | 1237 | static void ftrace_shutdown_sysctl(void) |
1244 | { | 1238 | { |
1245 | int command = FTRACE_DISABLE_MCOUNT; | ||
1246 | |||
1247 | if (unlikely(ftrace_disabled)) | 1239 | if (unlikely(ftrace_disabled)) |
1248 | return; | 1240 | return; |
1249 | 1241 | ||
1250 | /* ftrace_start_up is true if ftrace is running */ | 1242 | /* ftrace_start_up is true if ftrace is running */ |
1251 | if (ftrace_start_up) | 1243 | if (ftrace_start_up) |
1252 | command |= FTRACE_DISABLE_CALLS; | 1244 | ftrace_run_update_code(FTRACE_DISABLE_CALLS); |
1253 | |||
1254 | ftrace_run_update_code(command); | ||
1255 | } | 1245 | } |
1256 | 1246 | ||
1257 | static cycle_t ftrace_update_time; | 1247 | static cycle_t ftrace_update_time; |
@@ -1368,24 +1358,29 @@ enum { | |||
1368 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ | 1358 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ |
1369 | 1359 | ||
1370 | struct ftrace_iterator { | 1360 | struct ftrace_iterator { |
1371 | struct ftrace_page *pg; | 1361 | loff_t pos; |
1372 | int hidx; | 1362 | loff_t func_pos; |
1373 | int idx; | 1363 | struct ftrace_page *pg; |
1374 | unsigned flags; | 1364 | struct dyn_ftrace *func; |
1375 | struct trace_parser parser; | 1365 | struct ftrace_func_probe *probe; |
1366 | struct trace_parser parser; | ||
1367 | int hidx; | ||
1368 | int idx; | ||
1369 | unsigned flags; | ||
1376 | }; | 1370 | }; |
1377 | 1371 | ||
1378 | static void * | 1372 | static void * |
1379 | t_hash_next(struct seq_file *m, void *v, loff_t *pos) | 1373 | t_hash_next(struct seq_file *m, loff_t *pos) |
1380 | { | 1374 | { |
1381 | struct ftrace_iterator *iter = m->private; | 1375 | struct ftrace_iterator *iter = m->private; |
1382 | struct hlist_node *hnd = v; | 1376 | struct hlist_node *hnd = NULL; |
1383 | struct hlist_head *hhd; | 1377 | struct hlist_head *hhd; |
1384 | 1378 | ||
1385 | WARN_ON(!(iter->flags & FTRACE_ITER_HASH)); | ||
1386 | |||
1387 | (*pos)++; | 1379 | (*pos)++; |
1380 | iter->pos = *pos; | ||
1388 | 1381 | ||
1382 | if (iter->probe) | ||
1383 | hnd = &iter->probe->node; | ||
1389 | retry: | 1384 | retry: |
1390 | if (iter->hidx >= FTRACE_FUNC_HASHSIZE) | 1385 | if (iter->hidx >= FTRACE_FUNC_HASHSIZE) |
1391 | return NULL; | 1386 | return NULL; |
@@ -1408,7 +1403,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos) | |||
1408 | } | 1403 | } |
1409 | } | 1404 | } |
1410 | 1405 | ||
1411 | return hnd; | 1406 | if (WARN_ON_ONCE(!hnd)) |
1407 | return NULL; | ||
1408 | |||
1409 | iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node); | ||
1410 | |||
1411 | return iter; | ||
1412 | } | 1412 | } |
1413 | 1413 | ||
1414 | static void *t_hash_start(struct seq_file *m, loff_t *pos) | 1414 | static void *t_hash_start(struct seq_file *m, loff_t *pos) |
@@ -1417,26 +1417,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) | |||
1417 | void *p = NULL; | 1417 | void *p = NULL; |
1418 | loff_t l; | 1418 | loff_t l; |
1419 | 1419 | ||
1420 | if (!(iter->flags & FTRACE_ITER_HASH)) | 1420 | if (iter->func_pos > *pos) |
1421 | *pos = 0; | 1421 | return NULL; |
1422 | |||
1423 | iter->flags |= FTRACE_ITER_HASH; | ||
1424 | 1422 | ||
1425 | iter->hidx = 0; | 1423 | iter->hidx = 0; |
1426 | for (l = 0; l <= *pos; ) { | 1424 | for (l = 0; l <= (*pos - iter->func_pos); ) { |
1427 | p = t_hash_next(m, p, &l); | 1425 | p = t_hash_next(m, &l); |
1428 | if (!p) | 1426 | if (!p) |
1429 | break; | 1427 | break; |
1430 | } | 1428 | } |
1431 | return p; | 1429 | if (!p) |
1430 | return NULL; | ||
1431 | |||
1432 | /* Only set this if we have an item */ | ||
1433 | iter->flags |= FTRACE_ITER_HASH; | ||
1434 | |||
1435 | return iter; | ||
1432 | } | 1436 | } |
1433 | 1437 | ||
1434 | static int t_hash_show(struct seq_file *m, void *v) | 1438 | static int |
1439 | t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) | ||
1435 | { | 1440 | { |
1436 | struct ftrace_func_probe *rec; | 1441 | struct ftrace_func_probe *rec; |
1437 | struct hlist_node *hnd = v; | ||
1438 | 1442 | ||
1439 | rec = hlist_entry(hnd, struct ftrace_func_probe, node); | 1443 | rec = iter->probe; |
1444 | if (WARN_ON_ONCE(!rec)) | ||
1445 | return -EIO; | ||
1440 | 1446 | ||
1441 | if (rec->ops->print) | 1447 | if (rec->ops->print) |
1442 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); | 1448 | return rec->ops->print(m, rec->ip, rec->ops, rec->data); |
@@ -1457,12 +1463,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1457 | struct dyn_ftrace *rec = NULL; | 1463 | struct dyn_ftrace *rec = NULL; |
1458 | 1464 | ||
1459 | if (iter->flags & FTRACE_ITER_HASH) | 1465 | if (iter->flags & FTRACE_ITER_HASH) |
1460 | return t_hash_next(m, v, pos); | 1466 | return t_hash_next(m, pos); |
1461 | 1467 | ||
1462 | (*pos)++; | 1468 | (*pos)++; |
1469 | iter->pos = *pos; | ||
1463 | 1470 | ||
1464 | if (iter->flags & FTRACE_ITER_PRINTALL) | 1471 | if (iter->flags & FTRACE_ITER_PRINTALL) |
1465 | return NULL; | 1472 | return t_hash_start(m, pos); |
1466 | 1473 | ||
1467 | retry: | 1474 | retry: |
1468 | if (iter->idx >= iter->pg->index) { | 1475 | if (iter->idx >= iter->pg->index) { |
@@ -1491,7 +1498,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1491 | } | 1498 | } |
1492 | } | 1499 | } |
1493 | 1500 | ||
1494 | return rec; | 1501 | if (!rec) |
1502 | return t_hash_start(m, pos); | ||
1503 | |||
1504 | iter->func_pos = *pos; | ||
1505 | iter->func = rec; | ||
1506 | |||
1507 | return iter; | ||
1508 | } | ||
1509 | |||
1510 | static void reset_iter_read(struct ftrace_iterator *iter) | ||
1511 | { | ||
1512 | iter->pos = 0; | ||
1513 | iter->func_pos = 0; | ||
1514 | iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH); | ||
1495 | } | 1515 | } |
1496 | 1516 | ||
1497 | static void *t_start(struct seq_file *m, loff_t *pos) | 1517 | static void *t_start(struct seq_file *m, loff_t *pos) |
@@ -1502,6 +1522,12 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
1502 | 1522 | ||
1503 | mutex_lock(&ftrace_lock); | 1523 | mutex_lock(&ftrace_lock); |
1504 | /* | 1524 | /* |
1525 | * If an lseek was done, then reset and start from beginning. | ||
1526 | */ | ||
1527 | if (*pos < iter->pos) | ||
1528 | reset_iter_read(iter); | ||
1529 | |||
1530 | /* | ||
1505 | * For set_ftrace_filter reading, if we have the filter | 1531 | * For set_ftrace_filter reading, if we have the filter |
1506 | * off, we can short cut and just print out that all | 1532 | * off, we can short cut and just print out that all |
1507 | * functions are enabled. | 1533 | * functions are enabled. |
@@ -1518,6 +1544,11 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
1518 | if (iter->flags & FTRACE_ITER_HASH) | 1544 | if (iter->flags & FTRACE_ITER_HASH) |
1519 | return t_hash_start(m, pos); | 1545 | return t_hash_start(m, pos); |
1520 | 1546 | ||
1547 | /* | ||
1548 | * Unfortunately, we need to restart at ftrace_pages_start | ||
1549 | * every time we let go of the ftrace_mutex. This is because | ||
1550 | * those pointers can change without the lock. | ||
1551 | */ | ||
1521 | iter->pg = ftrace_pages_start; | 1552 | iter->pg = ftrace_pages_start; |
1522 | iter->idx = 0; | 1553 | iter->idx = 0; |
1523 | for (l = 0; l <= *pos; ) { | 1554 | for (l = 0; l <= *pos; ) { |
@@ -1526,10 +1557,14 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
1526 | break; | 1557 | break; |
1527 | } | 1558 | } |
1528 | 1559 | ||
1529 | if (!p && iter->flags & FTRACE_ITER_FILTER) | 1560 | if (!p) { |
1530 | return t_hash_start(m, pos); | 1561 | if (iter->flags & FTRACE_ITER_FILTER) |
1562 | return t_hash_start(m, pos); | ||
1531 | 1563 | ||
1532 | return p; | 1564 | return NULL; |
1565 | } | ||
1566 | |||
1567 | return iter; | ||
1533 | } | 1568 | } |
1534 | 1569 | ||
1535 | static void t_stop(struct seq_file *m, void *p) | 1570 | static void t_stop(struct seq_file *m, void *p) |
@@ -1540,16 +1575,18 @@ static void t_stop(struct seq_file *m, void *p) | |||
1540 | static int t_show(struct seq_file *m, void *v) | 1575 | static int t_show(struct seq_file *m, void *v) |
1541 | { | 1576 | { |
1542 | struct ftrace_iterator *iter = m->private; | 1577 | struct ftrace_iterator *iter = m->private; |
1543 | struct dyn_ftrace *rec = v; | 1578 | struct dyn_ftrace *rec; |
1544 | 1579 | ||
1545 | if (iter->flags & FTRACE_ITER_HASH) | 1580 | if (iter->flags & FTRACE_ITER_HASH) |
1546 | return t_hash_show(m, v); | 1581 | return t_hash_show(m, iter); |
1547 | 1582 | ||
1548 | if (iter->flags & FTRACE_ITER_PRINTALL) { | 1583 | if (iter->flags & FTRACE_ITER_PRINTALL) { |
1549 | seq_printf(m, "#### all functions enabled ####\n"); | 1584 | seq_printf(m, "#### all functions enabled ####\n"); |
1550 | return 0; | 1585 | return 0; |
1551 | } | 1586 | } |
1552 | 1587 | ||
1588 | rec = iter->func; | ||
1589 | |||
1553 | if (!rec) | 1590 | if (!rec) |
1554 | return 0; | 1591 | return 0; |
1555 | 1592 | ||
@@ -2418,7 +2455,7 @@ static const struct file_operations ftrace_filter_fops = { | |||
2418 | .open = ftrace_filter_open, | 2455 | .open = ftrace_filter_open, |
2419 | .read = seq_read, | 2456 | .read = seq_read, |
2420 | .write = ftrace_filter_write, | 2457 | .write = ftrace_filter_write, |
2421 | .llseek = no_llseek, | 2458 | .llseek = ftrace_regex_lseek, |
2422 | .release = ftrace_filter_release, | 2459 | .release = ftrace_filter_release, |
2423 | }; | 2460 | }; |
2424 | 2461 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 492197e2f86c..4e2f03410377 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) | |||
2606 | } | 2606 | } |
2607 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | 2607 | EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); |
2608 | 2608 | ||
2609 | /* | ||
2610 | * The total entries in the ring buffer is the running counter | ||
2611 | * of entries entered into the ring buffer, minus the sum of | ||
2612 | * the entries read from the ring buffer and the number of | ||
2613 | * entries that were overwritten. | ||
2614 | */ | ||
2615 | static inline unsigned long | ||
2616 | rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) | ||
2617 | { | ||
2618 | return local_read(&cpu_buffer->entries) - | ||
2619 | (local_read(&cpu_buffer->overrun) + cpu_buffer->read); | ||
2620 | } | ||
2621 | |||
2609 | /** | 2622 | /** |
2610 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer | 2623 | * ring_buffer_entries_cpu - get the number of entries in a cpu buffer |
2611 | * @buffer: The ring buffer | 2624 | * @buffer: The ring buffer |
@@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); | |||
2614 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) | 2627 | unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) |
2615 | { | 2628 | { |
2616 | struct ring_buffer_per_cpu *cpu_buffer; | 2629 | struct ring_buffer_per_cpu *cpu_buffer; |
2617 | unsigned long ret; | ||
2618 | 2630 | ||
2619 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | 2631 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
2620 | return 0; | 2632 | return 0; |
2621 | 2633 | ||
2622 | cpu_buffer = buffer->buffers[cpu]; | 2634 | cpu_buffer = buffer->buffers[cpu]; |
2623 | ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) | ||
2624 | - cpu_buffer->read; | ||
2625 | 2635 | ||
2626 | return ret; | 2636 | return rb_num_of_entries(cpu_buffer); |
2627 | } | 2637 | } |
2628 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); | 2638 | EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); |
2629 | 2639 | ||
@@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) | |||
2684 | /* if you care about this being correct, lock the buffer */ | 2694 | /* if you care about this being correct, lock the buffer */ |
2685 | for_each_buffer_cpu(buffer, cpu) { | 2695 | for_each_buffer_cpu(buffer, cpu) { |
2686 | cpu_buffer = buffer->buffers[cpu]; | 2696 | cpu_buffer = buffer->buffers[cpu]; |
2687 | entries += (local_read(&cpu_buffer->entries) - | 2697 | entries += rb_num_of_entries(cpu_buffer); |
2688 | local_read(&cpu_buffer->overrun)) - cpu_buffer->read; | ||
2689 | } | 2698 | } |
2690 | 2699 | ||
2691 | return entries; | 2700 | return entries; |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 31cc4cb0dbf2..39c059ca670e 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include "trace.h" | 10 | #include "trace.h" |
11 | 11 | ||
12 | static char *perf_trace_buf[4]; | 12 | static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * Force it to be aligned to unsigned long to avoid misaligned accesses | 15 | * Force it to be aligned to unsigned long to avoid misaligned accesses |
@@ -24,7 +24,7 @@ static int total_ref_count; | |||
24 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | 24 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, |
25 | struct perf_event *p_event) | 25 | struct perf_event *p_event) |
26 | { | 26 | { |
27 | struct hlist_head *list; | 27 | struct hlist_head __percpu *list; |
28 | int ret = -ENOMEM; | 28 | int ret = -ENOMEM; |
29 | int cpu; | 29 | int cpu; |
30 | 30 | ||
@@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, | |||
42 | tp_event->perf_events = list; | 42 | tp_event->perf_events = list; |
43 | 43 | ||
44 | if (!total_ref_count) { | 44 | if (!total_ref_count) { |
45 | char *buf; | 45 | char __percpu *buf; |
46 | int i; | 46 | int i; |
47 | 47 | ||
48 | for (i = 0; i < 4; i++) { | 48 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { |
49 | buf = (char *)alloc_percpu(perf_trace_t); | 49 | buf = (char __percpu *)alloc_percpu(perf_trace_t); |
50 | if (!buf) | 50 | if (!buf) |
51 | goto fail; | 51 | goto fail; |
52 | 52 | ||
@@ -65,7 +65,7 @@ fail: | |||
65 | if (!total_ref_count) { | 65 | if (!total_ref_count) { |
66 | int i; | 66 | int i; |
67 | 67 | ||
68 | for (i = 0; i < 4; i++) { | 68 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { |
69 | free_percpu(perf_trace_buf[i]); | 69 | free_percpu(perf_trace_buf[i]); |
70 | perf_trace_buf[i] = NULL; | 70 | perf_trace_buf[i] = NULL; |
71 | } | 71 | } |
@@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event) | |||
101 | return ret; | 101 | return ret; |
102 | } | 102 | } |
103 | 103 | ||
104 | int perf_trace_enable(struct perf_event *p_event) | 104 | int perf_trace_add(struct perf_event *p_event, int flags) |
105 | { | 105 | { |
106 | struct ftrace_event_call *tp_event = p_event->tp_event; | 106 | struct ftrace_event_call *tp_event = p_event->tp_event; |
107 | struct hlist_head __percpu *pcpu_list; | ||
107 | struct hlist_head *list; | 108 | struct hlist_head *list; |
108 | 109 | ||
109 | list = tp_event->perf_events; | 110 | pcpu_list = tp_event->perf_events; |
110 | if (WARN_ON_ONCE(!list)) | 111 | if (WARN_ON_ONCE(!pcpu_list)) |
111 | return -EINVAL; | 112 | return -EINVAL; |
112 | 113 | ||
113 | list = this_cpu_ptr(list); | 114 | if (!(flags & PERF_EF_START)) |
115 | p_event->hw.state = PERF_HES_STOPPED; | ||
116 | |||
117 | list = this_cpu_ptr(pcpu_list); | ||
114 | hlist_add_head_rcu(&p_event->hlist_entry, list); | 118 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
115 | 119 | ||
116 | return 0; | 120 | return 0; |
117 | } | 121 | } |
118 | 122 | ||
119 | void perf_trace_disable(struct perf_event *p_event) | 123 | void perf_trace_del(struct perf_event *p_event, int flags) |
120 | { | 124 | { |
121 | hlist_del_rcu(&p_event->hlist_entry); | 125 | hlist_del_rcu(&p_event->hlist_entry); |
122 | } | 126 | } |
@@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event) | |||
142 | tp_event->perf_events = NULL; | 146 | tp_event->perf_events = NULL; |
143 | 147 | ||
144 | if (!--total_ref_count) { | 148 | if (!--total_ref_count) { |
145 | for (i = 0; i < 4; i++) { | 149 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { |
146 | free_percpu(perf_trace_buf[i]); | 150 | free_percpu(perf_trace_buf[i]); |
147 | perf_trace_buf[i] = NULL; | 151 | perf_trace_buf[i] = NULL; |
148 | } | 152 | } |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4c758f146328..398c0e8b332c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -600,21 +600,29 @@ out: | |||
600 | 600 | ||
601 | enum { | 601 | enum { |
602 | FORMAT_HEADER = 1, | 602 | FORMAT_HEADER = 1, |
603 | FORMAT_PRINTFMT = 2, | 603 | FORMAT_FIELD_SEPERATOR = 2, |
604 | FORMAT_PRINTFMT = 3, | ||
604 | }; | 605 | }; |
605 | 606 | ||
606 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) | 607 | static void *f_next(struct seq_file *m, void *v, loff_t *pos) |
607 | { | 608 | { |
608 | struct ftrace_event_call *call = m->private; | 609 | struct ftrace_event_call *call = m->private; |
609 | struct ftrace_event_field *field; | 610 | struct ftrace_event_field *field; |
610 | struct list_head *head; | 611 | struct list_head *common_head = &ftrace_common_fields; |
612 | struct list_head *head = trace_get_fields(call); | ||
611 | 613 | ||
612 | (*pos)++; | 614 | (*pos)++; |
613 | 615 | ||
614 | switch ((unsigned long)v) { | 616 | switch ((unsigned long)v) { |
615 | case FORMAT_HEADER: | 617 | case FORMAT_HEADER: |
616 | head = &ftrace_common_fields; | 618 | if (unlikely(list_empty(common_head))) |
619 | return NULL; | ||
620 | |||
621 | field = list_entry(common_head->prev, | ||
622 | struct ftrace_event_field, link); | ||
623 | return field; | ||
617 | 624 | ||
625 | case FORMAT_FIELD_SEPERATOR: | ||
618 | if (unlikely(list_empty(head))) | 626 | if (unlikely(list_empty(head))) |
619 | return NULL; | 627 | return NULL; |
620 | 628 | ||
@@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) | |||
626 | return NULL; | 634 | return NULL; |
627 | } | 635 | } |
628 | 636 | ||
629 | head = trace_get_fields(call); | ||
630 | |||
631 | /* | ||
632 | * To separate common fields from event fields, the | ||
633 | * LSB is set on the first event field. Clear it in case. | ||
634 | */ | ||
635 | v = (void *)((unsigned long)v & ~1L); | ||
636 | |||
637 | field = v; | 637 | field = v; |
638 | /* | 638 | if (field->link.prev == common_head) |
639 | * If this is a common field, and at the end of the list, then | 639 | return (void *)FORMAT_FIELD_SEPERATOR; |
640 | * continue with main list. | 640 | else if (field->link.prev == head) |
641 | */ | ||
642 | if (field->link.prev == &ftrace_common_fields) { | ||
643 | if (unlikely(list_empty(head))) | ||
644 | return NULL; | ||
645 | field = list_entry(head->prev, struct ftrace_event_field, link); | ||
646 | /* Set the LSB to notify f_show to print an extra newline */ | ||
647 | field = (struct ftrace_event_field *) | ||
648 | ((unsigned long)field | 1); | ||
649 | return field; | ||
650 | } | ||
651 | |||
652 | /* If we are done tell f_show to print the format */ | ||
653 | if (field->link.prev == head) | ||
654 | return (void *)FORMAT_PRINTFMT; | 641 | return (void *)FORMAT_PRINTFMT; |
655 | 642 | ||
656 | field = list_entry(field->link.prev, struct ftrace_event_field, link); | 643 | field = list_entry(field->link.prev, struct ftrace_event_field, link); |
@@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v) | |||
688 | seq_printf(m, "format:\n"); | 675 | seq_printf(m, "format:\n"); |
689 | return 0; | 676 | return 0; |
690 | 677 | ||
678 | case FORMAT_FIELD_SEPERATOR: | ||
679 | seq_putc(m, '\n'); | ||
680 | return 0; | ||
681 | |||
691 | case FORMAT_PRINTFMT: | 682 | case FORMAT_PRINTFMT: |
692 | seq_printf(m, "\nprint fmt: %s\n", | 683 | seq_printf(m, "\nprint fmt: %s\n", |
693 | call->print_fmt); | 684 | call->print_fmt); |
694 | return 0; | 685 | return 0; |
695 | } | 686 | } |
696 | 687 | ||
697 | /* | ||
698 | * To separate common fields from event fields, the | ||
699 | * LSB is set on the first event field. Clear it and | ||
700 | * print a newline if it is set. | ||
701 | */ | ||
702 | if ((unsigned long)v & 1) { | ||
703 | seq_putc(m, '\n'); | ||
704 | v = (void *)((unsigned long)v & ~1L); | ||
705 | } | ||
706 | |||
707 | field = v; | 688 | field = v; |
708 | 689 | ||
709 | /* | 690 | /* |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6f233698518e..ef49e9370b25 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -15,15 +15,19 @@ | |||
15 | #include "trace.h" | 15 | #include "trace.h" |
16 | #include "trace_output.h" | 16 | #include "trace_output.h" |
17 | 17 | ||
18 | /* When set, irq functions will be ignored */ | ||
19 | static int ftrace_graph_skip_irqs; | ||
20 | |||
18 | struct fgraph_cpu_data { | 21 | struct fgraph_cpu_data { |
19 | pid_t last_pid; | 22 | pid_t last_pid; |
20 | int depth; | 23 | int depth; |
24 | int depth_irq; | ||
21 | int ignore; | 25 | int ignore; |
22 | unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; | 26 | unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; |
23 | }; | 27 | }; |
24 | 28 | ||
25 | struct fgraph_data { | 29 | struct fgraph_data { |
26 | struct fgraph_cpu_data *cpu_data; | 30 | struct fgraph_cpu_data __percpu *cpu_data; |
27 | 31 | ||
28 | /* Place to preserve last processed entry. */ | 32 | /* Place to preserve last processed entry. */ |
29 | struct ftrace_graph_ent_entry ent; | 33 | struct ftrace_graph_ent_entry ent; |
@@ -41,6 +45,7 @@ struct fgraph_data { | |||
41 | #define TRACE_GRAPH_PRINT_PROC 0x8 | 45 | #define TRACE_GRAPH_PRINT_PROC 0x8 |
42 | #define TRACE_GRAPH_PRINT_DURATION 0x10 | 46 | #define TRACE_GRAPH_PRINT_DURATION 0x10 |
43 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 | 47 | #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 |
48 | #define TRACE_GRAPH_PRINT_IRQS 0x40 | ||
44 | 49 | ||
45 | static struct tracer_opt trace_opts[] = { | 50 | static struct tracer_opt trace_opts[] = { |
46 | /* Display overruns? (for self-debug purpose) */ | 51 | /* Display overruns? (for self-debug purpose) */ |
@@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = { | |||
55 | { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, | 60 | { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, |
56 | /* Display absolute time of an entry */ | 61 | /* Display absolute time of an entry */ |
57 | { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, | 62 | { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, |
63 | /* Display interrupts */ | ||
64 | { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, | ||
58 | { } /* Empty entry */ | 65 | { } /* Empty entry */ |
59 | }; | 66 | }; |
60 | 67 | ||
61 | static struct tracer_flags tracer_flags = { | 68 | static struct tracer_flags tracer_flags = { |
62 | /* Don't display overruns and proc by default */ | 69 | /* Don't display overruns and proc by default */ |
63 | .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | | 70 | .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | |
64 | TRACE_GRAPH_PRINT_DURATION, | 71 | TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS, |
65 | .opts = trace_opts | 72 | .opts = trace_opts |
66 | }; | 73 | }; |
67 | 74 | ||
@@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr, | |||
204 | return 1; | 211 | return 1; |
205 | } | 212 | } |
206 | 213 | ||
214 | static inline int ftrace_graph_ignore_irqs(void) | ||
215 | { | ||
216 | if (!ftrace_graph_skip_irqs) | ||
217 | return 0; | ||
218 | |||
219 | return in_irq(); | ||
220 | } | ||
221 | |||
207 | int trace_graph_entry(struct ftrace_graph_ent *trace) | 222 | int trace_graph_entry(struct ftrace_graph_ent *trace) |
208 | { | 223 | { |
209 | struct trace_array *tr = graph_array; | 224 | struct trace_array *tr = graph_array; |
@@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) | |||
218 | return 0; | 233 | return 0; |
219 | 234 | ||
220 | /* trace it when it is-nested-in or is a function enabled. */ | 235 | /* trace it when it is-nested-in or is a function enabled. */ |
221 | if (!(trace->depth || ftrace_graph_addr(trace->func))) | 236 | if (!(trace->depth || ftrace_graph_addr(trace->func)) || |
237 | ftrace_graph_ignore_irqs()) | ||
222 | return 0; | 238 | return 0; |
223 | 239 | ||
224 | local_irq_save(flags); | 240 | local_irq_save(flags); |
@@ -649,8 +665,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | |||
649 | 665 | ||
650 | /* Print nsecs (we don't want to exceed 7 numbers) */ | 666 | /* Print nsecs (we don't want to exceed 7 numbers) */ |
651 | if (len < 7) { | 667 | if (len < 7) { |
652 | snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", | 668 | size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len); |
653 | nsecs_rem); | 669 | |
670 | snprintf(nsecs_str, slen, "%03lu", nsecs_rem); | ||
654 | ret = trace_seq_printf(s, ".%s", nsecs_str); | 671 | ret = trace_seq_printf(s, ".%s", nsecs_str); |
655 | if (!ret) | 672 | if (!ret) |
656 | return TRACE_TYPE_PARTIAL_LINE; | 673 | return TRACE_TYPE_PARTIAL_LINE; |
@@ -855,6 +872,92 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
855 | return 0; | 872 | return 0; |
856 | } | 873 | } |
857 | 874 | ||
875 | /* | ||
876 | * Entry check for irq code | ||
877 | * | ||
878 | * returns 1 if | ||
879 | * - we are inside irq code | ||
880 | * - we just extered irq code | ||
881 | * | ||
882 | * retunns 0 if | ||
883 | * - funcgraph-interrupts option is set | ||
884 | * - we are not inside irq code | ||
885 | */ | ||
886 | static int | ||
887 | check_irq_entry(struct trace_iterator *iter, u32 flags, | ||
888 | unsigned long addr, int depth) | ||
889 | { | ||
890 | int cpu = iter->cpu; | ||
891 | struct fgraph_data *data = iter->private; | ||
892 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
893 | |||
894 | if (flags & TRACE_GRAPH_PRINT_IRQS) | ||
895 | return 0; | ||
896 | |||
897 | /* | ||
898 | * We are inside the irq code | ||
899 | */ | ||
900 | if (*depth_irq >= 0) | ||
901 | return 1; | ||
902 | |||
903 | if ((addr < (unsigned long)__irqentry_text_start) || | ||
904 | (addr >= (unsigned long)__irqentry_text_end)) | ||
905 | return 0; | ||
906 | |||
907 | /* | ||
908 | * We are entering irq code. | ||
909 | */ | ||
910 | *depth_irq = depth; | ||
911 | return 1; | ||
912 | } | ||
913 | |||
914 | /* | ||
915 | * Return check for irq code | ||
916 | * | ||
917 | * returns 1 if | ||
918 | * - we are inside irq code | ||
919 | * - we just left irq code | ||
920 | * | ||
921 | * returns 0 if | ||
922 | * - funcgraph-interrupts option is set | ||
923 | * - we are not inside irq code | ||
924 | */ | ||
925 | static int | ||
926 | check_irq_return(struct trace_iterator *iter, u32 flags, int depth) | ||
927 | { | ||
928 | int cpu = iter->cpu; | ||
929 | struct fgraph_data *data = iter->private; | ||
930 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
931 | |||
932 | if (flags & TRACE_GRAPH_PRINT_IRQS) | ||
933 | return 0; | ||
934 | |||
935 | /* | ||
936 | * We are not inside the irq code. | ||
937 | */ | ||
938 | if (*depth_irq == -1) | ||
939 | return 0; | ||
940 | |||
941 | /* | ||
942 | * We are inside the irq code, and this is returning entry. | ||
943 | * Let's not trace it and clear the entry depth, since | ||
944 | * we are out of irq code. | ||
945 | * | ||
946 | * This condition ensures that we 'leave the irq code' once | ||
947 | * we are out of the entry depth. Thus protecting us from | ||
948 | * the RETURN entry loss. | ||
949 | */ | ||
950 | if (*depth_irq >= depth) { | ||
951 | *depth_irq = -1; | ||
952 | return 1; | ||
953 | } | ||
954 | |||
955 | /* | ||
956 | * We are inside the irq code, and this is not the entry. | ||
957 | */ | ||
958 | return 1; | ||
959 | } | ||
960 | |||
858 | static enum print_line_t | 961 | static enum print_line_t |
859 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, | 962 | print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, |
860 | struct trace_iterator *iter, u32 flags) | 963 | struct trace_iterator *iter, u32 flags) |
@@ -865,6 +968,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, | |||
865 | static enum print_line_t ret; | 968 | static enum print_line_t ret; |
866 | int cpu = iter->cpu; | 969 | int cpu = iter->cpu; |
867 | 970 | ||
971 | if (check_irq_entry(iter, flags, call->func, call->depth)) | ||
972 | return TRACE_TYPE_HANDLED; | ||
973 | |||
868 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) | 974 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) |
869 | return TRACE_TYPE_PARTIAL_LINE; | 975 | return TRACE_TYPE_PARTIAL_LINE; |
870 | 976 | ||
@@ -902,6 +1008,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
902 | int ret; | 1008 | int ret; |
903 | int i; | 1009 | int i; |
904 | 1010 | ||
1011 | if (check_irq_return(iter, flags, trace->depth)) | ||
1012 | return TRACE_TYPE_HANDLED; | ||
1013 | |||
905 | if (data) { | 1014 | if (data) { |
906 | struct fgraph_cpu_data *cpu_data; | 1015 | struct fgraph_cpu_data *cpu_data; |
907 | int cpu = iter->cpu; | 1016 | int cpu = iter->cpu; |
@@ -1210,9 +1319,12 @@ void graph_trace_open(struct trace_iterator *iter) | |||
1210 | pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); | 1319 | pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); |
1211 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); | 1320 | int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); |
1212 | int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); | 1321 | int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); |
1322 | int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); | ||
1323 | |||
1213 | *pid = -1; | 1324 | *pid = -1; |
1214 | *depth = 0; | 1325 | *depth = 0; |
1215 | *ignore = 0; | 1326 | *ignore = 0; |
1327 | *depth_irq = -1; | ||
1216 | } | 1328 | } |
1217 | 1329 | ||
1218 | iter->private = data; | 1330 | iter->private = data; |
@@ -1235,6 +1347,14 @@ void graph_trace_close(struct trace_iterator *iter) | |||
1235 | } | 1347 | } |
1236 | } | 1348 | } |
1237 | 1349 | ||
1350 | static int func_graph_set_flag(u32 old_flags, u32 bit, int set) | ||
1351 | { | ||
1352 | if (bit == TRACE_GRAPH_PRINT_IRQS) | ||
1353 | ftrace_graph_skip_irqs = !set; | ||
1354 | |||
1355 | return 0; | ||
1356 | } | ||
1357 | |||
1238 | static struct trace_event_functions graph_functions = { | 1358 | static struct trace_event_functions graph_functions = { |
1239 | .trace = print_graph_function_event, | 1359 | .trace = print_graph_function_event, |
1240 | }; | 1360 | }; |
@@ -1261,6 +1381,7 @@ static struct tracer graph_trace __read_mostly = { | |||
1261 | .print_line = print_graph_function, | 1381 | .print_line = print_graph_function, |
1262 | .print_header = print_graph_headers, | 1382 | .print_header = print_graph_headers, |
1263 | .flags = &tracer_flags, | 1383 | .flags = &tracer_flags, |
1384 | .set_flag = func_graph_set_flag, | ||
1264 | #ifdef CONFIG_FTRACE_SELFTEST | 1385 | #ifdef CONFIG_FTRACE_SELFTEST |
1265 | .selftest = trace_selftest_startup_function_graph, | 1386 | .selftest = trace_selftest_startup_function_graph, |
1266 | #endif | 1387 | #endif |
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index a7cc3793baf6..209b379a4721 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c | |||
@@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void) | |||
263 | { | 263 | { |
264 | int ret, cpu; | 264 | int ret, cpu; |
265 | 265 | ||
266 | for_each_possible_cpu(cpu) { | ||
267 | spin_lock_init(&workqueue_cpu_stat(cpu)->lock); | ||
268 | INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); | ||
269 | } | ||
270 | |||
266 | ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); | 271 | ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); |
267 | if (ret) | 272 | if (ret) |
268 | goto out; | 273 | goto out; |
@@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void) | |||
279 | if (ret) | 284 | if (ret) |
280 | goto no_creation; | 285 | goto no_creation; |
281 | 286 | ||
282 | for_each_possible_cpu(cpu) { | ||
283 | spin_lock_init(&workqueue_cpu_stat(cpu)->lock); | ||
284 | INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); | ||
285 | } | ||
286 | |||
287 | return 0; | 287 | return 0; |
288 | 288 | ||
289 | no_creation: | 289 | no_creation: |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index c77f3eceea25..d6073a50a6ca 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/jump_label.h> | ||
28 | 29 | ||
29 | extern struct tracepoint __start___tracepoints[]; | 30 | extern struct tracepoint __start___tracepoints[]; |
30 | extern struct tracepoint __stop___tracepoints[]; | 31 | extern struct tracepoint __stop___tracepoints[]; |
@@ -263,7 +264,13 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
263 | * is used. | 264 | * is used. |
264 | */ | 265 | */ |
265 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | 266 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); |
266 | elem->state = active; | 267 | if (!elem->state && active) { |
268 | enable_jump_label(&elem->state); | ||
269 | elem->state = active; | ||
270 | } else if (elem->state && !active) { | ||
271 | disable_jump_label(&elem->state); | ||
272 | elem->state = active; | ||
273 | } | ||
267 | } | 274 | } |
268 | 275 | ||
269 | /* | 276 | /* |
@@ -277,7 +284,10 @@ static void disable_tracepoint(struct tracepoint *elem) | |||
277 | if (elem->unregfunc && elem->state) | 284 | if (elem->unregfunc && elem->state) |
278 | elem->unregfunc(); | 285 | elem->unregfunc(); |
279 | 286 | ||
280 | elem->state = 0; | 287 | if (elem->state) { |
288 | disable_jump_label(&elem->state); | ||
289 | elem->state = 0; | ||
290 | } | ||
281 | rcu_assign_pointer(elem->funcs, NULL); | 291 | rcu_assign_pointer(elem->funcs, NULL); |
282 | } | 292 | } |
283 | 293 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f9c3c52ecc1..dc8e16824b51 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int __read_mostly did_panic; | ||
47 | static int __initdata no_watchdog; | 46 | static int __initdata no_watchdog; |
48 | 47 | ||
49 | 48 | ||
@@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts) | |||
187 | return 0; | 186 | return 0; |
188 | } | 187 | } |
189 | 188 | ||
190 | static int | ||
191 | watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr) | ||
192 | { | ||
193 | did_panic = 1; | ||
194 | |||
195 | return NOTIFY_DONE; | ||
196 | } | ||
197 | |||
198 | static struct notifier_block panic_block = { | ||
199 | .notifier_call = watchdog_panic, | ||
200 | }; | ||
201 | |||
202 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 189 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
203 | static struct perf_event_attr wd_hw_attr = { | 190 | static struct perf_event_attr wd_hw_attr = { |
204 | .type = PERF_TYPE_HARDWARE, | 191 | .type = PERF_TYPE_HARDWARE, |
@@ -371,14 +358,14 @@ static int watchdog_nmi_enable(int cpu) | |||
371 | /* Try to register using hardware perf events */ | 358 | /* Try to register using hardware perf events */ |
372 | wd_attr = &wd_hw_attr; | 359 | wd_attr = &wd_hw_attr; |
373 | wd_attr->sample_period = hw_nmi_get_sample_period(); | 360 | wd_attr->sample_period = hw_nmi_get_sample_period(); |
374 | event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); | 361 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); |
375 | if (!IS_ERR(event)) { | 362 | if (!IS_ERR(event)) { |
376 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); | 363 | printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); |
377 | goto out_save; | 364 | goto out_save; |
378 | } | 365 | } |
379 | 366 | ||
380 | printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); | 367 | printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); |
381 | return -1; | 368 | return PTR_ERR(event); |
382 | 369 | ||
383 | /* success path */ | 370 | /* success path */ |
384 | out_save: | 371 | out_save: |
@@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu) | |||
422 | static int watchdog_enable(int cpu) | 409 | static int watchdog_enable(int cpu) |
423 | { | 410 | { |
424 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); | 411 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); |
412 | int err; | ||
425 | 413 | ||
426 | /* enable the perf event */ | 414 | /* enable the perf event */ |
427 | if (watchdog_nmi_enable(cpu) != 0) | 415 | err = watchdog_nmi_enable(cpu); |
428 | return -1; | 416 | if (err) |
417 | return err; | ||
429 | 418 | ||
430 | /* create the watchdog thread */ | 419 | /* create the watchdog thread */ |
431 | if (!p) { | 420 | if (!p) { |
432 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); | 421 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); |
433 | if (IS_ERR(p)) { | 422 | if (IS_ERR(p)) { |
434 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); | 423 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); |
435 | return -1; | 424 | return PTR_ERR(p); |
436 | } | 425 | } |
437 | kthread_bind(p, cpu); | 426 | kthread_bind(p, cpu); |
438 | per_cpu(watchdog_touch_ts, cpu) = 0; | 427 | per_cpu(watchdog_touch_ts, cpu) = 0; |
@@ -484,6 +473,9 @@ static void watchdog_disable_all_cpus(void) | |||
484 | { | 473 | { |
485 | int cpu; | 474 | int cpu; |
486 | 475 | ||
476 | if (no_watchdog) | ||
477 | return; | ||
478 | |||
487 | for_each_online_cpu(cpu) | 479 | for_each_online_cpu(cpu) |
488 | watchdog_disable(cpu); | 480 | watchdog_disable(cpu); |
489 | 481 | ||
@@ -526,17 +518,16 @@ static int __cpuinit | |||
526 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 518 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
527 | { | 519 | { |
528 | int hotcpu = (unsigned long)hcpu; | 520 | int hotcpu = (unsigned long)hcpu; |
521 | int err = 0; | ||
529 | 522 | ||
530 | switch (action) { | 523 | switch (action) { |
531 | case CPU_UP_PREPARE: | 524 | case CPU_UP_PREPARE: |
532 | case CPU_UP_PREPARE_FROZEN: | 525 | case CPU_UP_PREPARE_FROZEN: |
533 | if (watchdog_prepare_cpu(hotcpu)) | 526 | err = watchdog_prepare_cpu(hotcpu); |
534 | return NOTIFY_BAD; | ||
535 | break; | 527 | break; |
536 | case CPU_ONLINE: | 528 | case CPU_ONLINE: |
537 | case CPU_ONLINE_FROZEN: | 529 | case CPU_ONLINE_FROZEN: |
538 | if (watchdog_enable(hotcpu)) | 530 | err = watchdog_enable(hotcpu); |
539 | return NOTIFY_BAD; | ||
540 | break; | 531 | break; |
541 | #ifdef CONFIG_HOTPLUG_CPU | 532 | #ifdef CONFIG_HOTPLUG_CPU |
542 | case CPU_UP_CANCELED: | 533 | case CPU_UP_CANCELED: |
@@ -549,7 +540,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
549 | break; | 540 | break; |
550 | #endif /* CONFIG_HOTPLUG_CPU */ | 541 | #endif /* CONFIG_HOTPLUG_CPU */ |
551 | } | 542 | } |
552 | return NOTIFY_OK; | 543 | return notifier_from_errno(err); |
553 | } | 544 | } |
554 | 545 | ||
555 | static struct notifier_block __cpuinitdata cpu_nfb = { | 546 | static struct notifier_block __cpuinitdata cpu_nfb = { |
@@ -565,13 +556,11 @@ static int __init spawn_watchdog_task(void) | |||
565 | return 0; | 556 | return 0; |
566 | 557 | ||
567 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 558 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
568 | WARN_ON(err == NOTIFY_BAD); | 559 | WARN_ON(notifier_to_errno(err)); |
569 | 560 | ||
570 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | 561 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); |
571 | register_cpu_notifier(&cpu_nfb); | 562 | register_cpu_notifier(&cpu_nfb); |
572 | 563 | ||
573 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); | ||
574 | |||
575 | return 0; | 564 | return 0; |
576 | } | 565 | } |
577 | early_initcall(spawn_watchdog_task); | 566 | early_initcall(spawn_watchdog_task); |