aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c3
-rw-r--r--kernel/cpu.c105
-rw-r--r--kernel/cpuset.c20
-rw-r--r--kernel/cred.c60
-rw-r--r--kernel/debug/kdb/kdb_main.c12
-rw-r--r--kernel/exec_domain.c18
-rw-r--r--kernel/exit.c40
-rw-r--r--kernel/fork.c51
-rw-r--r--kernel/futex.c17
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/kmod.c193
-rw-r--r--kernel/module.c348
-rw-r--r--kernel/mutex.c7
-rw-r--r--kernel/padata.c4
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/perf_event.c753
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/posix-cpu-timers.c12
-rw-r--r--kernel/posix-timers.c11
-rw-r--r--kernel/power/Kconfig9
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/nvs.c (renamed from kernel/power/hibernate_nvs.c)24
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/profile.c8
-rw-r--r--kernel/ptrace.c26
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/sched.c186
-rw-r--r--kernel/sched_debug.c10
-rw-r--r--kernel/sched_fair.c24
-rw-r--r--kernel/signal.c23
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/sys.c6
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/tick-sched.c21
-rw-r--r--kernel/timer.c19
-rw-r--r--kernel/trace/blktrace.c140
-rw-r--r--kernel/trace/ftrace.c7
-rw-r--r--kernel/trace/kmemtrace.c70
-rw-r--r--kernel/trace/ring_buffer.c19
-rw-r--r--kernel/trace/trace.c15
-rw-r--r--kernel/trace/trace.h9
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_event_perf.c192
-rw-r--r--kernel/trace/trace_events.c139
-rw-r--r--kernel/trace/trace_events_filter.c28
-rw-r--r--kernel/trace/trace_export.c16
-rw-r--r--kernel/trace/trace_functions_graph.c13
-rw-r--r--kernel/trace/trace_kprobe.c113
-rw-r--r--kernel/trace/trace_output.c137
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--kernel/trace/trace_sched_switch.c20
-rw-r--r--kernel/trace/trace_sched_wakeup.c28
-rw-r--r--kernel/trace/trace_syscalls.c146
-rw-r--r--kernel/trace/trace_workqueue.c26
-rw-r--r--kernel/tracepoint.c91
-rw-r--r--kernel/workqueue.c9
60 files changed, 1867 insertions, 1417 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 291775021b2e..3ac6f5b0a64b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2994,7 +2994,6 @@ static void cgroup_event_remove(struct work_struct *work)
2994 remove); 2994 remove);
2995 struct cgroup *cgrp = event->cgrp; 2995 struct cgroup *cgrp = event->cgrp;
2996 2996
2997 /* TODO: check return code */
2998 event->cft->unregister_event(cgrp, event->cft, event->eventfd); 2997 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
2999 2998
3000 eventfd_ctx_put(event->eventfd); 2999 eventfd_ctx_put(event->eventfd);
@@ -4599,7 +4598,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4599 parent_css = parent->subsys[subsys_id]; 4598 parent_css = parent->subsys[subsys_id];
4600 child_css = child->subsys[subsys_id]; 4599 child_css = child->subsys[subsys_id];
4601 parent_id = parent_css->id; 4600 parent_id = parent_css->id;
4602 depth = parent_id->depth; 4601 depth = parent_id->depth + 1;
4603 4602
4604 child_id = get_new_cssid(ss, depth); 4603 child_id = get_new_cssid(ss, depth);
4605 if (IS_ERR(child_id)) 4604 if (IS_ERR(child_id))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 124ad9d6be16..97d1b426a4ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -20,13 +20,29 @@
20/* Serializes the updates to cpu_online_mask, cpu_present_mask */ 20/* Serializes the updates to cpu_online_mask, cpu_present_mask */
21static DEFINE_MUTEX(cpu_add_remove_lock); 21static DEFINE_MUTEX(cpu_add_remove_lock);
22 22
23static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); 23/*
24 * The following two API's must be used when attempting
25 * to serialize the updates to cpu_online_mask, cpu_present_mask.
26 */
27void cpu_maps_update_begin(void)
28{
29 mutex_lock(&cpu_add_remove_lock);
30}
31
32void cpu_maps_update_done(void)
33{
34 mutex_unlock(&cpu_add_remove_lock);
35}
36
37static RAW_NOTIFIER_HEAD(cpu_chain);
24 38
25/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. 39/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
26 * Should always be manipulated under cpu_add_remove_lock 40 * Should always be manipulated under cpu_add_remove_lock
27 */ 41 */
28static int cpu_hotplug_disabled; 42static int cpu_hotplug_disabled;
29 43
44#ifdef CONFIG_HOTPLUG_CPU
45
30static struct { 46static struct {
31 struct task_struct *active_writer; 47 struct task_struct *active_writer;
32 struct mutex lock; /* Synchronizes accesses to refcount, */ 48 struct mutex lock; /* Synchronizes accesses to refcount, */
@@ -41,8 +57,6 @@ static struct {
41 .refcount = 0, 57 .refcount = 0,
42}; 58};
43 59
44#ifdef CONFIG_HOTPLUG_CPU
45
46void get_online_cpus(void) 60void get_online_cpus(void)
47{ 61{
48 might_sleep(); 62 might_sleep();
@@ -67,22 +81,6 @@ void put_online_cpus(void)
67} 81}
68EXPORT_SYMBOL_GPL(put_online_cpus); 82EXPORT_SYMBOL_GPL(put_online_cpus);
69 83
70#endif /* CONFIG_HOTPLUG_CPU */
71
72/*
73 * The following two API's must be used when attempting
74 * to serialize the updates to cpu_online_mask, cpu_present_mask.
75 */
76void cpu_maps_update_begin(void)
77{
78 mutex_lock(&cpu_add_remove_lock);
79}
80
81void cpu_maps_update_done(void)
82{
83 mutex_unlock(&cpu_add_remove_lock);
84}
85
86/* 84/*
87 * This ensures that the hotplug operation can begin only when the 85 * This ensures that the hotplug operation can begin only when the
88 * refcount goes to zero. 86 * refcount goes to zero.
@@ -124,6 +122,12 @@ static void cpu_hotplug_done(void)
124 cpu_hotplug.active_writer = NULL; 122 cpu_hotplug.active_writer = NULL;
125 mutex_unlock(&cpu_hotplug.lock); 123 mutex_unlock(&cpu_hotplug.lock);
126} 124}
125
126#else /* #if CONFIG_HOTPLUG_CPU */
127static void cpu_hotplug_begin(void) {}
128static void cpu_hotplug_done(void) {}
129#endif /* #esle #if CONFIG_HOTPLUG_CPU */
130
127/* Need to know about CPUs going up/down? */ 131/* Need to know about CPUs going up/down? */
128int __ref register_cpu_notifier(struct notifier_block *nb) 132int __ref register_cpu_notifier(struct notifier_block *nb)
129{ 133{
@@ -134,8 +138,29 @@ int __ref register_cpu_notifier(struct notifier_block *nb)
134 return ret; 138 return ret;
135} 139}
136 140
141static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
142 int *nr_calls)
143{
144 int ret;
145
146 ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
147 nr_calls);
148
149 return notifier_to_errno(ret);
150}
151
152static int cpu_notify(unsigned long val, void *v)
153{
154 return __cpu_notify(val, v, -1, NULL);
155}
156
137#ifdef CONFIG_HOTPLUG_CPU 157#ifdef CONFIG_HOTPLUG_CPU
138 158
159static void cpu_notify_nofail(unsigned long val, void *v)
160{
161 BUG_ON(cpu_notify(val, v));
162}
163
139EXPORT_SYMBOL(register_cpu_notifier); 164EXPORT_SYMBOL(register_cpu_notifier);
140 165
141void __ref unregister_cpu_notifier(struct notifier_block *nb) 166void __ref unregister_cpu_notifier(struct notifier_block *nb)
@@ -181,8 +206,7 @@ static int __ref take_cpu_down(void *_param)
181 if (err < 0) 206 if (err < 0)
182 return err; 207 return err;
183 208
184 raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, 209 cpu_notify(CPU_DYING | param->mod, param->hcpu);
185 param->hcpu);
186 210
187 if (task_cpu(param->caller) == cpu) 211 if (task_cpu(param->caller) == cpu)
188 move_task_off_dead_cpu(cpu, param->caller); 212 move_task_off_dead_cpu(cpu, param->caller);
@@ -212,17 +236,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
212 236
213 cpu_hotplug_begin(); 237 cpu_hotplug_begin();
214 set_cpu_active(cpu, false); 238 set_cpu_active(cpu, false);
215 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, 239 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
216 hcpu, -1, &nr_calls); 240 if (err) {
217 if (err == NOTIFY_BAD) {
218 set_cpu_active(cpu, true); 241 set_cpu_active(cpu, true);
219 242
220 nr_calls--; 243 nr_calls--;
221 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 244 __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
222 hcpu, nr_calls, NULL);
223 printk("%s: attempt to take down CPU %u failed\n", 245 printk("%s: attempt to take down CPU %u failed\n",
224 __func__, cpu); 246 __func__, cpu);
225 err = -EINVAL;
226 goto out_release; 247 goto out_release;
227 } 248 }
228 249
@@ -230,9 +251,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
230 if (err) { 251 if (err) {
231 set_cpu_active(cpu, true); 252 set_cpu_active(cpu, true);
232 /* CPU didn't die: tell everyone. Can't complain. */ 253 /* CPU didn't die: tell everyone. Can't complain. */
233 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, 254 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
234 hcpu) == NOTIFY_BAD)
235 BUG();
236 255
237 goto out_release; 256 goto out_release;
238 } 257 }
@@ -246,19 +265,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
246 __cpu_die(cpu); 265 __cpu_die(cpu);
247 266
248 /* CPU is completely dead: tell everyone. Too late to complain. */ 267 /* CPU is completely dead: tell everyone. Too late to complain. */
249 if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod, 268 cpu_notify_nofail(CPU_DEAD | mod, hcpu);
250 hcpu) == NOTIFY_BAD)
251 BUG();
252 269
253 check_for_tasks(cpu); 270 check_for_tasks(cpu);
254 271
255out_release: 272out_release:
256 cpu_hotplug_done(); 273 cpu_hotplug_done();
257 if (!err) { 274 if (!err)
258 if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod, 275 cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
259 hcpu) == NOTIFY_BAD)
260 BUG();
261 }
262 return err; 276 return err;
263} 277}
264 278
@@ -293,13 +307,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
293 return -EINVAL; 307 return -EINVAL;
294 308
295 cpu_hotplug_begin(); 309 cpu_hotplug_begin();
296 ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, 310 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
297 -1, &nr_calls); 311 if (ret) {
298 if (ret == NOTIFY_BAD) {
299 nr_calls--; 312 nr_calls--;
300 printk("%s: attempt to bring up CPU %u failed\n", 313 printk("%s: attempt to bring up CPU %u failed\n",
301 __func__, cpu); 314 __func__, cpu);
302 ret = -EINVAL;
303 goto out_notify; 315 goto out_notify;
304 } 316 }
305 317
@@ -312,12 +324,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
312 set_cpu_active(cpu, true); 324 set_cpu_active(cpu, true);
313 325
314 /* Now call notifier in preparation. */ 326 /* Now call notifier in preparation. */
315 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); 327 cpu_notify(CPU_ONLINE | mod, hcpu);
316 328
317out_notify: 329out_notify:
318 if (ret != 0) 330 if (ret != 0)
319 __raw_notifier_call_chain(&cpu_chain, 331 __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
320 CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
321 cpu_hotplug_done(); 332 cpu_hotplug_done();
322 333
323 return ret; 334 return ret;
@@ -383,7 +394,7 @@ static cpumask_var_t frozen_cpus;
383 394
384int disable_nonboot_cpus(void) 395int disable_nonboot_cpus(void)
385{ 396{
386 int cpu, first_cpu, error; 397 int cpu, first_cpu, error = 0;
387 398
388 cpu_maps_update_begin(); 399 cpu_maps_update_begin();
389 first_cpu = cpumask_first(cpu_online_mask); 400 first_cpu = cpumask_first(cpu_online_mask);
@@ -481,7 +492,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
481 if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus)) 492 if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
482 val = CPU_STARTING_FROZEN; 493 val = CPU_STARTING_FROZEN;
483#endif /* CONFIG_PM_SLEEP_SMP */ 494#endif /* CONFIG_PM_SLEEP_SMP */
484 raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu); 495 cpu_notify(val, (void *)(long)cpu);
485} 496}
486 497
487#endif /* CONFIG_SMP */ 498#endif /* CONFIG_SMP */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 61d6af7fa676..02b9611eadde 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2469,7 +2469,8 @@ void cpuset_unlock(void)
2469} 2469}
2470 2470
2471/** 2471/**
2472 * cpuset_mem_spread_node() - On which node to begin search for a page 2472 * cpuset_mem_spread_node() - On which node to begin search for a file page
2473 * cpuset_slab_spread_node() - On which node to begin search for a slab page
2473 * 2474 *
2474 * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for 2475 * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
2475 * tasks in a cpuset with is_spread_page or is_spread_slab set), 2476 * tasks in a cpuset with is_spread_page or is_spread_slab set),
@@ -2494,16 +2495,27 @@ void cpuset_unlock(void)
2494 * See kmem_cache_alloc_node(). 2495 * See kmem_cache_alloc_node().
2495 */ 2496 */
2496 2497
2497int cpuset_mem_spread_node(void) 2498static int cpuset_spread_node(int *rotor)
2498{ 2499{
2499 int node; 2500 int node;
2500 2501
2501 node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed); 2502 node = next_node(*rotor, current->mems_allowed);
2502 if (node == MAX_NUMNODES) 2503 if (node == MAX_NUMNODES)
2503 node = first_node(current->mems_allowed); 2504 node = first_node(current->mems_allowed);
2504 current->cpuset_mem_spread_rotor = node; 2505 *rotor = node;
2505 return node; 2506 return node;
2506} 2507}
2508
2509int cpuset_mem_spread_node(void)
2510{
2511 return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
2512}
2513
2514int cpuset_slab_spread_node(void)
2515{
2516 return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
2517}
2518
2507EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); 2519EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2508 2520
2509/** 2521/**
diff --git a/kernel/cred.c b/kernel/cred.c
index 2c24870c55d1..a2d5504fbcc2 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -347,66 +347,6 @@ struct cred *prepare_exec_creds(void)
347} 347}
348 348
349/* 349/*
350 * prepare new credentials for the usermode helper dispatcher
351 */
352struct cred *prepare_usermodehelper_creds(void)
353{
354#ifdef CONFIG_KEYS
355 struct thread_group_cred *tgcred = NULL;
356#endif
357 struct cred *new;
358
359#ifdef CONFIG_KEYS
360 tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC);
361 if (!tgcred)
362 return NULL;
363#endif
364
365 new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
366 if (!new)
367 goto free_tgcred;
368
369 kdebug("prepare_usermodehelper_creds() alloc %p", new);
370
371 memcpy(new, &init_cred, sizeof(struct cred));
372
373 atomic_set(&new->usage, 1);
374 set_cred_subscribers(new, 0);
375 get_group_info(new->group_info);
376 get_uid(new->user);
377
378#ifdef CONFIG_KEYS
379 new->thread_keyring = NULL;
380 new->request_key_auth = NULL;
381 new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT;
382
383 atomic_set(&tgcred->usage, 1);
384 spin_lock_init(&tgcred->lock);
385 new->tgcred = tgcred;
386#endif
387
388#ifdef CONFIG_SECURITY
389 new->security = NULL;
390#endif
391 if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
392 goto error;
393 validate_creds(new);
394
395 BUG_ON(atomic_read(&new->usage) != 1);
396 return new;
397
398error:
399 put_cred(new);
400 return NULL;
401
402free_tgcred:
403#ifdef CONFIG_KEYS
404 kfree(tgcred);
405#endif
406 return NULL;
407}
408
409/*
410 * Copy credentials for the new process created by fork() 350 * Copy credentials for the new process created by fork()
411 * 351 *
412 * We share if we can, but under some circumstances we have to generate a new 352 * We share if we can, but under some circumstances we have to generate a new
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index b724c791b6d4..184cd8209c36 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1857,12 +1857,6 @@ static int kdb_ef(int argc, const char **argv)
1857} 1857}
1858 1858
1859#if defined(CONFIG_MODULES) 1859#if defined(CONFIG_MODULES)
1860/* modules using other modules */
1861struct module_use {
1862 struct list_head list;
1863 struct module *module_which_uses;
1864};
1865
1866/* 1860/*
1867 * kdb_lsmod - This function implements the 'lsmod' command. Lists 1861 * kdb_lsmod - This function implements the 'lsmod' command. Lists
1868 * currently loaded kernel modules. 1862 * currently loaded kernel modules.
@@ -1894,9 +1888,9 @@ static int kdb_lsmod(int argc, const char **argv)
1894 { 1888 {
1895 struct module_use *use; 1889 struct module_use *use;
1896 kdb_printf(" [ "); 1890 kdb_printf(" [ ");
1897 list_for_each_entry(use, &mod->modules_which_use_me, 1891 list_for_each_entry(use, &mod->source_list,
1898 list) 1892 source_list)
1899 kdb_printf("%s ", use->module_which_uses->name); 1893 kdb_printf("%s ", use->target->name);
1900 kdb_printf("]\n"); 1894 kdb_printf("]\n");
1901 } 1895 }
1902#endif 1896#endif
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c35452cadded..dd62f8e714ca 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -27,7 +27,7 @@ static struct exec_domain *exec_domains = &default_exec_domain;
27static DEFINE_RWLOCK(exec_domains_lock); 27static DEFINE_RWLOCK(exec_domains_lock);
28 28
29 29
30static u_long ident_map[32] = { 30static unsigned long ident_map[32] = {
31 0, 1, 2, 3, 4, 5, 6, 7, 31 0, 1, 2, 3, 4, 5, 6, 7,
32 8, 9, 10, 11, 12, 13, 14, 15, 32 8, 9, 10, 11, 12, 13, 14, 15,
33 16, 17, 18, 19, 20, 21, 22, 23, 33 16, 17, 18, 19, 20, 21, 22, 23,
@@ -56,10 +56,10 @@ default_handler(int segment, struct pt_regs *regp)
56} 56}
57 57
58static struct exec_domain * 58static struct exec_domain *
59lookup_exec_domain(u_long personality) 59lookup_exec_domain(unsigned int personality)
60{ 60{
61 struct exec_domain * ep; 61 unsigned int pers = personality(personality);
62 u_long pers = personality(personality); 62 struct exec_domain *ep;
63 63
64 read_lock(&exec_domains_lock); 64 read_lock(&exec_domains_lock);
65 for (ep = exec_domains; ep; ep = ep->next) { 65 for (ep = exec_domains; ep; ep = ep->next) {
@@ -70,7 +70,7 @@ lookup_exec_domain(u_long personality)
70 70
71#ifdef CONFIG_MODULES 71#ifdef CONFIG_MODULES
72 read_unlock(&exec_domains_lock); 72 read_unlock(&exec_domains_lock);
73 request_module("personality-%ld", pers); 73 request_module("personality-%d", pers);
74 read_lock(&exec_domains_lock); 74 read_lock(&exec_domains_lock);
75 75
76 for (ep = exec_domains; ep; ep = ep->next) { 76 for (ep = exec_domains; ep; ep = ep->next) {
@@ -135,7 +135,7 @@ unregister:
135} 135}
136 136
137int 137int
138__set_personality(u_long personality) 138__set_personality(unsigned int personality)
139{ 139{
140 struct exec_domain *ep, *oep; 140 struct exec_domain *ep, *oep;
141 141
@@ -188,9 +188,9 @@ static int __init proc_execdomains_init(void)
188module_init(proc_execdomains_init); 188module_init(proc_execdomains_init);
189#endif 189#endif
190 190
191SYSCALL_DEFINE1(personality, u_long, personality) 191SYSCALL_DEFINE1(personality, unsigned int, personality)
192{ 192{
193 u_long old = current->personality; 193 unsigned int old = current->personality;
194 194
195 if (personality != 0xffffffff) { 195 if (personality != 0xffffffff) {
196 set_personality(personality); 196 set_personality(personality);
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(personality, u_long, personality)
198 return -EINVAL; 198 return -EINVAL;
199 } 199 }
200 200
201 return (long)old; 201 return old;
202} 202}
203 203
204 204
diff --git a/kernel/exit.c b/kernel/exit.c
index 019a2843bf95..ceffc67b564a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -58,11 +58,11 @@
58 58
59static void exit_mm(struct task_struct * tsk); 59static void exit_mm(struct task_struct * tsk);
60 60
61static void __unhash_process(struct task_struct *p) 61static void __unhash_process(struct task_struct *p, bool group_dead)
62{ 62{
63 nr_threads--; 63 nr_threads--;
64 detach_pid(p, PIDTYPE_PID); 64 detach_pid(p, PIDTYPE_PID);
65 if (thread_group_leader(p)) { 65 if (group_dead) {
66 detach_pid(p, PIDTYPE_PGID); 66 detach_pid(p, PIDTYPE_PGID);
67 detach_pid(p, PIDTYPE_SID); 67 detach_pid(p, PIDTYPE_SID);
68 68
@@ -79,10 +79,9 @@ static void __unhash_process(struct task_struct *p)
79static void __exit_signal(struct task_struct *tsk) 79static void __exit_signal(struct task_struct *tsk)
80{ 80{
81 struct signal_struct *sig = tsk->signal; 81 struct signal_struct *sig = tsk->signal;
82 bool group_dead = thread_group_leader(tsk);
82 struct sighand_struct *sighand; 83 struct sighand_struct *sighand;
83 84 struct tty_struct *uninitialized_var(tty);
84 BUG_ON(!sig);
85 BUG_ON(!atomic_read(&sig->count));
86 85
87 sighand = rcu_dereference_check(tsk->sighand, 86 sighand = rcu_dereference_check(tsk->sighand,
88 rcu_read_lock_held() || 87 rcu_read_lock_held() ||
@@ -90,14 +89,16 @@ static void __exit_signal(struct task_struct *tsk)
90 spin_lock(&sighand->siglock); 89 spin_lock(&sighand->siglock);
91 90
92 posix_cpu_timers_exit(tsk); 91 posix_cpu_timers_exit(tsk);
93 if (atomic_dec_and_test(&sig->count)) 92 if (group_dead) {
94 posix_cpu_timers_exit_group(tsk); 93 posix_cpu_timers_exit_group(tsk);
95 else { 94 tty = sig->tty;
95 sig->tty = NULL;
96 } else {
96 /* 97 /*
97 * If there is any task waiting for the group exit 98 * If there is any task waiting for the group exit
98 * then notify it: 99 * then notify it:
99 */ 100 */
100 if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) 101 if (sig->notify_count > 0 && !--sig->notify_count)
101 wake_up_process(sig->group_exit_task); 102 wake_up_process(sig->group_exit_task);
102 103
103 if (tsk == sig->curr_target) 104 if (tsk == sig->curr_target)
@@ -123,32 +124,24 @@ static void __exit_signal(struct task_struct *tsk)
123 sig->oublock += task_io_get_oublock(tsk); 124 sig->oublock += task_io_get_oublock(tsk);
124 task_io_accounting_add(&sig->ioac, &tsk->ioac); 125 task_io_accounting_add(&sig->ioac, &tsk->ioac);
125 sig->sum_sched_runtime += tsk->se.sum_exec_runtime; 126 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
126 sig = NULL; /* Marker for below. */
127 } 127 }
128 128
129 __unhash_process(tsk); 129 sig->nr_threads--;
130 __unhash_process(tsk, group_dead);
130 131
131 /* 132 /*
132 * Do this under ->siglock, we can race with another thread 133 * Do this under ->siglock, we can race with another thread
133 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. 134 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
134 */ 135 */
135 flush_sigqueue(&tsk->pending); 136 flush_sigqueue(&tsk->pending);
136
137 tsk->signal = NULL;
138 tsk->sighand = NULL; 137 tsk->sighand = NULL;
139 spin_unlock(&sighand->siglock); 138 spin_unlock(&sighand->siglock);
140 139
141 __cleanup_sighand(sighand); 140 __cleanup_sighand(sighand);
142 clear_tsk_thread_flag(tsk,TIF_SIGPENDING); 141 clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
143 if (sig) { 142 if (group_dead) {
144 flush_sigqueue(&sig->shared_pending); 143 flush_sigqueue(&sig->shared_pending);
145 taskstats_tgid_free(sig); 144 tty_kref_put(tty);
146 /*
147 * Make sure ->signal can't go away under rq->lock,
148 * see account_group_exec_runtime().
149 */
150 task_rq_unlock_wait(tsk);
151 __cleanup_signal(sig);
152 } 145 }
153} 146}
154 147
@@ -856,12 +849,9 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
856 849
857 tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE; 850 tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
858 851
859 /* mt-exec, de_thread() is waiting for us */ 852 /* mt-exec, de_thread() is waiting for group leader */
860 if (thread_group_leader(tsk) && 853 if (unlikely(tsk->signal->notify_count < 0))
861 tsk->signal->group_exit_task &&
862 tsk->signal->notify_count < 0)
863 wake_up_process(tsk->signal->group_exit_task); 854 wake_up_process(tsk->signal->group_exit_task);
864
865 write_unlock_irq(&tasklist_lock); 855 write_unlock_irq(&tasklist_lock);
866 856
867 tracehook_report_death(tsk, signal, cookie, group_dead); 857 tracehook_report_death(tsk, signal, cookie, group_dead);
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d57d9e3a6e9..b6cce14ba047 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -165,6 +165,18 @@ void free_task(struct task_struct *tsk)
165} 165}
166EXPORT_SYMBOL(free_task); 166EXPORT_SYMBOL(free_task);
167 167
168static inline void free_signal_struct(struct signal_struct *sig)
169{
170 taskstats_tgid_free(sig);
171 kmem_cache_free(signal_cachep, sig);
172}
173
174static inline void put_signal_struct(struct signal_struct *sig)
175{
176 if (atomic_dec_and_test(&sig->sigcnt))
177 free_signal_struct(sig);
178}
179
168void __put_task_struct(struct task_struct *tsk) 180void __put_task_struct(struct task_struct *tsk)
169{ 181{
170 WARN_ON(!tsk->exit_state); 182 WARN_ON(!tsk->exit_state);
@@ -173,6 +185,7 @@ void __put_task_struct(struct task_struct *tsk)
173 185
174 exit_creds(tsk); 186 exit_creds(tsk);
175 delayacct_tsk_free(tsk); 187 delayacct_tsk_free(tsk);
188 put_signal_struct(tsk->signal);
176 189
177 if (!profile_handoff_task(tsk)) 190 if (!profile_handoff_task(tsk))
178 free_task(tsk); 191 free_task(tsk);
@@ -864,8 +877,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
864 if (!sig) 877 if (!sig)
865 return -ENOMEM; 878 return -ENOMEM;
866 879
867 atomic_set(&sig->count, 1); 880 sig->nr_threads = 1;
868 atomic_set(&sig->live, 1); 881 atomic_set(&sig->live, 1);
882 atomic_set(&sig->sigcnt, 1);
869 init_waitqueue_head(&sig->wait_chldexit); 883 init_waitqueue_head(&sig->wait_chldexit);
870 if (clone_flags & CLONE_NEWPID) 884 if (clone_flags & CLONE_NEWPID)
871 sig->flags |= SIGNAL_UNKILLABLE; 885 sig->flags |= SIGNAL_UNKILLABLE;
@@ -889,13 +903,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
889 return 0; 903 return 0;
890} 904}
891 905
892void __cleanup_signal(struct signal_struct *sig)
893{
894 thread_group_cputime_free(sig);
895 tty_kref_put(sig->tty);
896 kmem_cache_free(signal_cachep, sig);
897}
898
899static void copy_flags(unsigned long clone_flags, struct task_struct *p) 906static void copy_flags(unsigned long clone_flags, struct task_struct *p)
900{ 907{
901 unsigned long new_flags = p->flags; 908 unsigned long new_flags = p->flags;
@@ -1245,8 +1252,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1245 } 1252 }
1246 1253
1247 if (clone_flags & CLONE_THREAD) { 1254 if (clone_flags & CLONE_THREAD) {
1248 atomic_inc(&current->signal->count); 1255 current->signal->nr_threads++;
1249 atomic_inc(&current->signal->live); 1256 atomic_inc(&current->signal->live);
1257 atomic_inc(&current->signal->sigcnt);
1250 p->group_leader = current->group_leader; 1258 p->group_leader = current->group_leader;
1251 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); 1259 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
1252 } 1260 }
@@ -1259,7 +1267,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1259 p->nsproxy->pid_ns->child_reaper = p; 1267 p->nsproxy->pid_ns->child_reaper = p;
1260 1268
1261 p->signal->leader_pid = pid; 1269 p->signal->leader_pid = pid;
1262 tty_kref_put(p->signal->tty);
1263 p->signal->tty = tty_kref_get(current->signal->tty); 1270 p->signal->tty = tty_kref_get(current->signal->tty);
1264 attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); 1271 attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
1265 attach_pid(p, PIDTYPE_SID, task_session(current)); 1272 attach_pid(p, PIDTYPE_SID, task_session(current));
@@ -1292,7 +1299,7 @@ bad_fork_cleanup_mm:
1292 mmput(p->mm); 1299 mmput(p->mm);
1293bad_fork_cleanup_signal: 1300bad_fork_cleanup_signal:
1294 if (!(clone_flags & CLONE_THREAD)) 1301 if (!(clone_flags & CLONE_THREAD))
1295 __cleanup_signal(p->signal); 1302 free_signal_struct(p->signal);
1296bad_fork_cleanup_sighand: 1303bad_fork_cleanup_sighand:
1297 __cleanup_sighand(p->sighand); 1304 __cleanup_sighand(p->sighand);
1298bad_fork_cleanup_fs: 1305bad_fork_cleanup_fs:
@@ -1327,6 +1334,16 @@ noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_re
1327 return regs; 1334 return regs;
1328} 1335}
1329 1336
1337static inline void init_idle_pids(struct pid_link *links)
1338{
1339 enum pid_type type;
1340
1341 for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
1342 INIT_HLIST_NODE(&links[type].node); /* not really needed */
1343 links[type].pid = &init_struct_pid;
1344 }
1345}
1346
1330struct task_struct * __cpuinit fork_idle(int cpu) 1347struct task_struct * __cpuinit fork_idle(int cpu)
1331{ 1348{
1332 struct task_struct *task; 1349 struct task_struct *task;
@@ -1334,8 +1351,10 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1334 1351
1335 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, 1352 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1336 &init_struct_pid, 0); 1353 &init_struct_pid, 0);
1337 if (!IS_ERR(task)) 1354 if (!IS_ERR(task)) {
1355 init_idle_pids(task->pids);
1338 init_idle(task, cpu); 1356 init_idle(task, cpu);
1357 }
1339 1358
1340 return task; 1359 return task;
1341} 1360}
@@ -1507,14 +1526,6 @@ static void check_unshare_flags(unsigned long *flags_ptr)
1507 *flags_ptr |= CLONE_SIGHAND; 1526 *flags_ptr |= CLONE_SIGHAND;
1508 1527
1509 /* 1528 /*
1510 * If unsharing signal handlers and the task was created
1511 * using CLONE_THREAD, then must unshare the thread
1512 */
1513 if ((*flags_ptr & CLONE_SIGHAND) &&
1514 (atomic_read(&current->signal->count) > 1))
1515 *flags_ptr |= CLONE_THREAD;
1516
1517 /*
1518 * If unsharing namespace, must also unshare filesystem information. 1529 * If unsharing namespace, must also unshare filesystem information.
1519 */ 1530 */
1520 if (*flags_ptr & CLONE_NEWNS) 1531 if (*flags_ptr & CLONE_NEWNS)
diff --git a/kernel/futex.c b/kernel/futex.c
index e7a35f1039e7..6a3a5fa1526d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -429,20 +429,11 @@ static void free_pi_state(struct futex_pi_state *pi_state)
429static struct task_struct * futex_find_get_task(pid_t pid) 429static struct task_struct * futex_find_get_task(pid_t pid)
430{ 430{
431 struct task_struct *p; 431 struct task_struct *p;
432 const struct cred *cred = current_cred(), *pcred;
433 432
434 rcu_read_lock(); 433 rcu_read_lock();
435 p = find_task_by_vpid(pid); 434 p = find_task_by_vpid(pid);
436 if (!p) { 435 if (p)
437 p = ERR_PTR(-ESRCH); 436 get_task_struct(p);
438 } else {
439 pcred = __task_cred(p);
440 if (cred->euid != pcred->euid &&
441 cred->euid != pcred->uid)
442 p = ERR_PTR(-ESRCH);
443 else
444 get_task_struct(p);
445 }
446 437
447 rcu_read_unlock(); 438 rcu_read_unlock();
448 439
@@ -564,8 +555,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
564 if (!pid) 555 if (!pid)
565 return -ESRCH; 556 return -ESRCH;
566 p = futex_find_get_task(pid); 557 p = futex_find_get_task(pid);
567 if (IS_ERR(p)) 558 if (!p)
568 return PTR_ERR(p); 559 return -ESRCH;
569 560
570 /* 561 /*
571 * We need to look at the task state flags to figure out, 562 * We need to look at the task state flags to figure out,
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b9b134b35088..5c69e996bd0f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -89,7 +89,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
89 89
90 do { 90 do {
91 seq = read_seqbegin(&xtime_lock); 91 seq = read_seqbegin(&xtime_lock);
92 xts = current_kernel_time(); 92 xts = __current_kernel_time();
93 tom = wall_to_monotonic; 93 tom = wall_to_monotonic;
94 } while (read_seqretry(&xtime_lock, seq)); 94 } while (read_seqretry(&xtime_lock, seq));
95 95
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3164ba7ce151..e1497481fe8a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -456,6 +456,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
456 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ 456 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
457 desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); 457 desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
458 desc->status |= flags; 458 desc->status |= flags;
459
460 if (chip != desc->chip)
461 irq_chip_set_defaults(desc->chip);
459 } 462 }
460 463
461 return ret; 464 return ret;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 474a84715eac..131b1703936f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1089,9 +1089,10 @@ void crash_kexec(struct pt_regs *regs)
1089 1089
1090size_t crash_get_memory_size(void) 1090size_t crash_get_memory_size(void)
1091{ 1091{
1092 size_t size; 1092 size_t size = 0;
1093 mutex_lock(&kexec_mutex); 1093 mutex_lock(&kexec_mutex);
1094 size = crashk_res.end - crashk_res.start + 1; 1094 if (crashk_res.end != crashk_res.start)
1095 size = crashk_res.end - crashk_res.start + 1;
1095 mutex_unlock(&kexec_mutex); 1096 mutex_unlock(&kexec_mutex);
1096 return size; 1097 return size;
1097} 1098}
@@ -1134,7 +1135,7 @@ int crash_shrink_memory(unsigned long new_size)
1134 1135
1135 free_reserved_phys_range(end, crashk_res.end); 1136 free_reserved_phys_range(end, crashk_res.end);
1136 1137
1137 if (start == end) 1138 if ((start == end) && (crashk_res.parent != NULL))
1138 release_resource(&crashk_res); 1139 release_resource(&crashk_res);
1139 crashk_res.end = end - 1; 1140 crashk_res.end = end - 1;
1140 1141
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bf0e231d9702..6e9b19667a8d 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -116,27 +116,16 @@ int __request_module(bool wait, const char *fmt, ...)
116 116
117 trace_module_request(module_name, wait, _RET_IP_); 117 trace_module_request(module_name, wait, _RET_IP_);
118 118
119 ret = call_usermodehelper(modprobe_path, argv, envp, 119 ret = call_usermodehelper_fns(modprobe_path, argv, envp,
120 wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); 120 wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC,
121 NULL, NULL, NULL);
122
121 atomic_dec(&kmod_concurrent); 123 atomic_dec(&kmod_concurrent);
122 return ret; 124 return ret;
123} 125}
124EXPORT_SYMBOL(__request_module); 126EXPORT_SYMBOL(__request_module);
125#endif /* CONFIG_MODULES */ 127#endif /* CONFIG_MODULES */
126 128
127struct subprocess_info {
128 struct work_struct work;
129 struct completion *complete;
130 struct cred *cred;
131 char *path;
132 char **argv;
133 char **envp;
134 enum umh_wait wait;
135 int retval;
136 struct file *stdin;
137 void (*cleanup)(char **argv, char **envp);
138};
139
140/* 129/*
141 * This is the task which runs the usermode application 130 * This is the task which runs the usermode application
142 */ 131 */
@@ -145,36 +134,10 @@ static int ____call_usermodehelper(void *data)
145 struct subprocess_info *sub_info = data; 134 struct subprocess_info *sub_info = data;
146 int retval; 135 int retval;
147 136
148 BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
149
150 /* Unblock all signals */
151 spin_lock_irq(&current->sighand->siglock); 137 spin_lock_irq(&current->sighand->siglock);
152 flush_signal_handlers(current, 1); 138 flush_signal_handlers(current, 1);
153 sigemptyset(&current->blocked);
154 recalc_sigpending();
155 spin_unlock_irq(&current->sighand->siglock); 139 spin_unlock_irq(&current->sighand->siglock);
156 140
157 /* Install the credentials */
158 commit_creds(sub_info->cred);
159 sub_info->cred = NULL;
160
161 /* Install input pipe when needed */
162 if (sub_info->stdin) {
163 struct files_struct *f = current->files;
164 struct fdtable *fdt;
165 /* no races because files should be private here */
166 sys_close(0);
167 fd_install(0, sub_info->stdin);
168 spin_lock(&f->file_lock);
169 fdt = files_fdtable(f);
170 FD_SET(0, fdt->open_fds);
171 FD_CLR(0, fdt->close_on_exec);
172 spin_unlock(&f->file_lock);
173
174 /* and disallow core files too */
175 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0};
176 }
177
178 /* We can run anywhere, unlike our parent keventd(). */ 141 /* We can run anywhere, unlike our parent keventd(). */
179 set_cpus_allowed_ptr(current, cpu_all_mask); 142 set_cpus_allowed_ptr(current, cpu_all_mask);
180 143
@@ -184,9 +147,16 @@ static int ____call_usermodehelper(void *data)
184 */ 147 */
185 set_user_nice(current, 0); 148 set_user_nice(current, 0);
186 149
150 if (sub_info->init) {
151 retval = sub_info->init(sub_info);
152 if (retval)
153 goto fail;
154 }
155
187 retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); 156 retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
188 157
189 /* Exec failed? */ 158 /* Exec failed? */
159fail:
190 sub_info->retval = retval; 160 sub_info->retval = retval;
191 do_exit(0); 161 do_exit(0);
192} 162}
@@ -194,9 +164,7 @@ static int ____call_usermodehelper(void *data)
194void call_usermodehelper_freeinfo(struct subprocess_info *info) 164void call_usermodehelper_freeinfo(struct subprocess_info *info)
195{ 165{
196 if (info->cleanup) 166 if (info->cleanup)
197 (*info->cleanup)(info->argv, info->envp); 167 (*info->cleanup)(info);
198 if (info->cred)
199 put_cred(info->cred);
200 kfree(info); 168 kfree(info);
201} 169}
202EXPORT_SYMBOL(call_usermodehelper_freeinfo); 170EXPORT_SYMBOL(call_usermodehelper_freeinfo);
@@ -207,16 +175,16 @@ static int wait_for_helper(void *data)
207 struct subprocess_info *sub_info = data; 175 struct subprocess_info *sub_info = data;
208 pid_t pid; 176 pid_t pid;
209 177
210 /* Install a handler: if SIGCLD isn't handled sys_wait4 won't 178 /* If SIGCLD is ignored sys_wait4 won't populate the status. */
211 * populate the status, but will return -ECHILD. */ 179 spin_lock_irq(&current->sighand->siglock);
212 allow_signal(SIGCHLD); 180 current->sighand->action[SIGCHLD-1].sa.sa_handler = SIG_DFL;
181 spin_unlock_irq(&current->sighand->siglock);
213 182
214 pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); 183 pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
215 if (pid < 0) { 184 if (pid < 0) {
216 sub_info->retval = pid; 185 sub_info->retval = pid;
217 } else { 186 } else {
218 int ret; 187 int ret = -ECHILD;
219
220 /* 188 /*
221 * Normally it is bogus to call wait4() from in-kernel because 189 * Normally it is bogus to call wait4() from in-kernel because
222 * wait4() wants to write the exit code to a userspace address. 190 * wait4() wants to write the exit code to a userspace address.
@@ -237,10 +205,7 @@ static int wait_for_helper(void *data)
237 sub_info->retval = ret; 205 sub_info->retval = ret;
238 } 206 }
239 207
240 if (sub_info->wait == UMH_NO_WAIT) 208 complete(sub_info->complete);
241 call_usermodehelper_freeinfo(sub_info);
242 else
243 complete(sub_info->complete);
244 return 0; 209 return 0;
245} 210}
246 211
@@ -249,15 +214,13 @@ static void __call_usermodehelper(struct work_struct *work)
249{ 214{
250 struct subprocess_info *sub_info = 215 struct subprocess_info *sub_info =
251 container_of(work, struct subprocess_info, work); 216 container_of(work, struct subprocess_info, work);
252 pid_t pid;
253 enum umh_wait wait = sub_info->wait; 217 enum umh_wait wait = sub_info->wait;
254 218 pid_t pid;
255 BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
256 219
257 /* CLONE_VFORK: wait until the usermode helper has execve'd 220 /* CLONE_VFORK: wait until the usermode helper has execve'd
258 * successfully We need the data structures to stay around 221 * successfully We need the data structures to stay around
259 * until that is done. */ 222 * until that is done. */
260 if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT) 223 if (wait == UMH_WAIT_PROC)
261 pid = kernel_thread(wait_for_helper, sub_info, 224 pid = kernel_thread(wait_for_helper, sub_info,
262 CLONE_FS | CLONE_FILES | SIGCHLD); 225 CLONE_FS | CLONE_FILES | SIGCHLD);
263 else 226 else
@@ -266,15 +229,16 @@ static void __call_usermodehelper(struct work_struct *work)
266 229
267 switch (wait) { 230 switch (wait) {
268 case UMH_NO_WAIT: 231 case UMH_NO_WAIT:
232 call_usermodehelper_freeinfo(sub_info);
269 break; 233 break;
270 234
271 case UMH_WAIT_PROC: 235 case UMH_WAIT_PROC:
272 if (pid > 0) 236 if (pid > 0)
273 break; 237 break;
274 sub_info->retval = pid;
275 /* FALLTHROUGH */ 238 /* FALLTHROUGH */
276
277 case UMH_WAIT_EXEC: 239 case UMH_WAIT_EXEC:
240 if (pid < 0)
241 sub_info->retval = pid;
278 complete(sub_info->complete); 242 complete(sub_info->complete);
279 } 243 }
280} 244}
@@ -376,80 +340,37 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
376 sub_info->path = path; 340 sub_info->path = path;
377 sub_info->argv = argv; 341 sub_info->argv = argv;
378 sub_info->envp = envp; 342 sub_info->envp = envp;
379 sub_info->cred = prepare_usermodehelper_creds();
380 if (!sub_info->cred) {
381 kfree(sub_info);
382 return NULL;
383 }
384
385 out: 343 out:
386 return sub_info; 344 return sub_info;
387} 345}
388EXPORT_SYMBOL(call_usermodehelper_setup); 346EXPORT_SYMBOL(call_usermodehelper_setup);
389 347
390/** 348/**
391 * call_usermodehelper_setkeys - set the session keys for usermode helper 349 * call_usermodehelper_setfns - set a cleanup/init function
392 * @info: a subprocess_info returned by call_usermodehelper_setup
393 * @session_keyring: the session keyring for the process
394 */
395void call_usermodehelper_setkeys(struct subprocess_info *info,
396 struct key *session_keyring)
397{
398#ifdef CONFIG_KEYS
399 struct thread_group_cred *tgcred = info->cred->tgcred;
400 key_put(tgcred->session_keyring);
401 tgcred->session_keyring = key_get(session_keyring);
402#else
403 BUG();
404#endif
405}
406EXPORT_SYMBOL(call_usermodehelper_setkeys);
407
408/**
409 * call_usermodehelper_setcleanup - set a cleanup function
410 * @info: a subprocess_info returned by call_usermodehelper_setup 350 * @info: a subprocess_info returned by call_usermodehelper_setup
411 * @cleanup: a cleanup function 351 * @cleanup: a cleanup function
352 * @init: an init function
353 * @data: arbitrary context sensitive data
412 * 354 *
413 * The cleanup function is just befor ethe subprocess_info is about to 355 * The init function is used to customize the helper process prior to
356 * exec. A non-zero return code causes the process to error out, exit,
357 * and return the failure to the calling process
358 *
359 * The cleanup function is just before ethe subprocess_info is about to
414 * be freed. This can be used for freeing the argv and envp. The 360 * be freed. This can be used for freeing the argv and envp. The
415 * Function must be runnable in either a process context or the 361 * Function must be runnable in either a process context or the
416 * context in which call_usermodehelper_exec is called. 362 * context in which call_usermodehelper_exec is called.
417 */ 363 */
418void call_usermodehelper_setcleanup(struct subprocess_info *info, 364void call_usermodehelper_setfns(struct subprocess_info *info,
419 void (*cleanup)(char **argv, char **envp)) 365 int (*init)(struct subprocess_info *info),
366 void (*cleanup)(struct subprocess_info *info),
367 void *data)
420{ 368{
421 info->cleanup = cleanup; 369 info->cleanup = cleanup;
370 info->init = init;
371 info->data = data;
422} 372}
423EXPORT_SYMBOL(call_usermodehelper_setcleanup); 373EXPORT_SYMBOL(call_usermodehelper_setfns);
424
425/**
426 * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin
427 * @sub_info: a subprocess_info returned by call_usermodehelper_setup
428 * @filp: set to the write-end of a pipe
429 *
430 * This constructs a pipe, and sets the read end to be the stdin of the
431 * subprocess, and returns the write-end in *@filp.
432 */
433int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
434 struct file **filp)
435{
436 struct file *f;
437
438 f = create_write_pipe(0);
439 if (IS_ERR(f))
440 return PTR_ERR(f);
441 *filp = f;
442
443 f = create_read_pipe(f, 0);
444 if (IS_ERR(f)) {
445 free_write_pipe(*filp);
446 return PTR_ERR(f);
447 }
448 sub_info->stdin = f;
449
450 return 0;
451}
452EXPORT_SYMBOL(call_usermodehelper_stdinpipe);
453 374
454/** 375/**
455 * call_usermodehelper_exec - start a usermode application 376 * call_usermodehelper_exec - start a usermode application
@@ -469,9 +390,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
469 DECLARE_COMPLETION_ONSTACK(done); 390 DECLARE_COMPLETION_ONSTACK(done);
470 int retval = 0; 391 int retval = 0;
471 392
472 BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
473 validate_creds(sub_info->cred);
474
475 helper_lock(); 393 helper_lock();
476 if (sub_info->path[0] == '\0') 394 if (sub_info->path[0] == '\0')
477 goto out; 395 goto out;
@@ -498,41 +416,6 @@ unlock:
498} 416}
499EXPORT_SYMBOL(call_usermodehelper_exec); 417EXPORT_SYMBOL(call_usermodehelper_exec);
500 418
501/**
502 * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin
503 * @path: path to usermode executable
504 * @argv: arg vector for process
505 * @envp: environment for process
506 * @filp: set to the write-end of a pipe
507 *
508 * This is a simple wrapper which executes a usermode-helper function
509 * with a pipe as stdin. It is implemented entirely in terms of
510 * lower-level call_usermodehelper_* functions.
511 */
512int call_usermodehelper_pipe(char *path, char **argv, char **envp,
513 struct file **filp)
514{
515 struct subprocess_info *sub_info;
516 int ret;
517
518 sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
519 if (sub_info == NULL)
520 return -ENOMEM;
521
522 ret = call_usermodehelper_stdinpipe(sub_info, filp);
523 if (ret < 0) {
524 call_usermodehelper_freeinfo(sub_info);
525 return ret;
526 }
527
528 ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
529 if (ret < 0) /* Failed to execute helper, close pipe */
530 filp_close(*filp, NULL);
531
532 return ret;
533}
534EXPORT_SYMBOL(call_usermodehelper_pipe);
535
536void __init usermodehelper_init(void) 419void __init usermodehelper_init(void)
537{ 420{
538 khelper_wq = create_singlethread_workqueue("khelper"); 421 khelper_wq = create_singlethread_workqueue("khelper");
diff --git a/kernel/module.c b/kernel/module.c
index 333fbcc96978..5d2d28197c82 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -72,7 +72,11 @@
72/* If this is set, the section belongs in the init part of the module */ 72/* If this is set, the section belongs in the init part of the module */
73#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 73#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
74 74
75/* List of modules, protected by module_mutex or preempt_disable 75/*
76 * Mutex protects:
77 * 1) List of modules (also safely readable with preempt_disable),
78 * 2) module_use links,
79 * 3) module_addr_min/module_addr_max.
76 * (delete uses stop_machine/add uses RCU list operations). */ 80 * (delete uses stop_machine/add uses RCU list operations). */
77DEFINE_MUTEX(module_mutex); 81DEFINE_MUTEX(module_mutex);
78EXPORT_SYMBOL_GPL(module_mutex); 82EXPORT_SYMBOL_GPL(module_mutex);
@@ -90,7 +94,8 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
90 94
91static BLOCKING_NOTIFIER_HEAD(module_notify_list); 95static BLOCKING_NOTIFIER_HEAD(module_notify_list);
92 96
93/* Bounds of module allocation, for speeding __module_address */ 97/* Bounds of module allocation, for speeding __module_address.
98 * Protected by module_mutex. */
94static unsigned long module_addr_min = -1UL, module_addr_max = 0; 99static unsigned long module_addr_min = -1UL, module_addr_max = 0;
95 100
96int register_module_notifier(struct notifier_block * nb) 101int register_module_notifier(struct notifier_block * nb)
@@ -329,7 +334,7 @@ static bool find_symbol_in_section(const struct symsearch *syms,
329} 334}
330 335
331/* Find a symbol and return it, along with, (optional) crc and 336/* Find a symbol and return it, along with, (optional) crc and
332 * (optional) module which owns it */ 337 * (optional) module which owns it. Needs preempt disabled or module_mutex. */
333const struct kernel_symbol *find_symbol(const char *name, 338const struct kernel_symbol *find_symbol(const char *name,
334 struct module **owner, 339 struct module **owner,
335 const unsigned long **crc, 340 const unsigned long **crc,
@@ -403,7 +408,7 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
403 Elf_Shdr *sechdrs, 408 Elf_Shdr *sechdrs,
404 const char *secstrings) 409 const char *secstrings)
405{ 410{
406 return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); 411 return find_sec(hdr, sechdrs, secstrings, ".data..percpu");
407} 412}
408 413
409static void percpu_modcopy(struct module *mod, 414static void percpu_modcopy(struct module *mod,
@@ -523,7 +528,8 @@ static void module_unload_init(struct module *mod)
523{ 528{
524 int cpu; 529 int cpu;
525 530
526 INIT_LIST_HEAD(&mod->modules_which_use_me); 531 INIT_LIST_HEAD(&mod->source_list);
532 INIT_LIST_HEAD(&mod->target_list);
527 for_each_possible_cpu(cpu) { 533 for_each_possible_cpu(cpu) {
528 per_cpu_ptr(mod->refptr, cpu)->incs = 0; 534 per_cpu_ptr(mod->refptr, cpu)->incs = 0;
529 per_cpu_ptr(mod->refptr, cpu)->decs = 0; 535 per_cpu_ptr(mod->refptr, cpu)->decs = 0;
@@ -535,20 +541,13 @@ static void module_unload_init(struct module *mod)
535 mod->waiter = current; 541 mod->waiter = current;
536} 542}
537 543
538/* modules using other modules */
539struct module_use
540{
541 struct list_head list;
542 struct module *module_which_uses;
543};
544
545/* Does a already use b? */ 544/* Does a already use b? */
546static int already_uses(struct module *a, struct module *b) 545static int already_uses(struct module *a, struct module *b)
547{ 546{
548 struct module_use *use; 547 struct module_use *use;
549 548
550 list_for_each_entry(use, &b->modules_which_use_me, list) { 549 list_for_each_entry(use, &b->source_list, source_list) {
551 if (use->module_which_uses == a) { 550 if (use->source == a) {
552 DEBUGP("%s uses %s!\n", a->name, b->name); 551 DEBUGP("%s uses %s!\n", a->name, b->name);
553 return 1; 552 return 1;
554 } 553 }
@@ -557,62 +556,68 @@ static int already_uses(struct module *a, struct module *b)
557 return 0; 556 return 0;
558} 557}
559 558
560/* Module a uses b */ 559/*
561int use_module(struct module *a, struct module *b) 560 * Module a uses b
561 * - we add 'a' as a "source", 'b' as a "target" of module use
562 * - the module_use is added to the list of 'b' sources (so
563 * 'b' can walk the list to see who sourced them), and of 'a'
564 * targets (so 'a' can see what modules it targets).
565 */
566static int add_module_usage(struct module *a, struct module *b)
562{ 567{
563 struct module_use *use; 568 struct module_use *use;
564 int no_warn, err;
565 569
566 if (b == NULL || already_uses(a, b)) return 1; 570 DEBUGP("Allocating new usage for %s.\n", a->name);
571 use = kmalloc(sizeof(*use), GFP_ATOMIC);
572 if (!use) {
573 printk(KERN_WARNING "%s: out of memory loading\n", a->name);
574 return -ENOMEM;
575 }
576
577 use->source = a;
578 use->target = b;
579 list_add(&use->source_list, &b->source_list);
580 list_add(&use->target_list, &a->target_list);
581 return 0;
582}
567 583
568 /* If we're interrupted or time out, we fail. */ 584/* Module a uses b: caller needs module_mutex() */
569 if (wait_event_interruptible_timeout( 585int ref_module(struct module *a, struct module *b)
570 module_wq, (err = strong_try_module_get(b)) != -EBUSY, 586{
571 30 * HZ) <= 0) { 587 int err;
572 printk("%s: gave up waiting for init of module %s.\n", 588
573 a->name, b->name); 589 if (b == NULL || already_uses(a, b))
574 return 0; 590 return 0;
575 }
576 591
577 /* If strong_try_module_get() returned a different error, we fail. */ 592 /* If module isn't available, we fail. */
593 err = strong_try_module_get(b);
578 if (err) 594 if (err)
579 return 0; 595 return err;
580 596
581 DEBUGP("Allocating new usage for %s.\n", a->name); 597 err = add_module_usage(a, b);
582 use = kmalloc(sizeof(*use), GFP_ATOMIC); 598 if (err) {
583 if (!use) {
584 printk("%s: out of memory loading\n", a->name);
585 module_put(b); 599 module_put(b);
586 return 0; 600 return err;
587 } 601 }
588 602 return 0;
589 use->module_which_uses = a;
590 list_add(&use->list, &b->modules_which_use_me);
591 no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name);
592 return 1;
593} 603}
594EXPORT_SYMBOL_GPL(use_module); 604EXPORT_SYMBOL_GPL(ref_module);
595 605
596/* Clear the unload stuff of the module. */ 606/* Clear the unload stuff of the module. */
597static void module_unload_free(struct module *mod) 607static void module_unload_free(struct module *mod)
598{ 608{
599 struct module *i; 609 struct module_use *use, *tmp;
600
601 list_for_each_entry(i, &modules, list) {
602 struct module_use *use;
603 610
604 list_for_each_entry(use, &i->modules_which_use_me, list) { 611 mutex_lock(&module_mutex);
605 if (use->module_which_uses == mod) { 612 list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) {
606 DEBUGP("%s unusing %s\n", mod->name, i->name); 613 struct module *i = use->target;
607 module_put(i); 614 DEBUGP("%s unusing %s\n", mod->name, i->name);
608 list_del(&use->list); 615 module_put(i);
609 kfree(use); 616 list_del(&use->source_list);
610 sysfs_remove_link(i->holders_dir, mod->name); 617 list_del(&use->target_list);
611 /* There can be at most one match. */ 618 kfree(use);
612 break;
613 }
614 }
615 } 619 }
620 mutex_unlock(&module_mutex);
616} 621}
617 622
618#ifdef CONFIG_MODULE_FORCE_UNLOAD 623#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -735,7 +740,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
735 goto out; 740 goto out;
736 } 741 }
737 742
738 if (!list_empty(&mod->modules_which_use_me)) { 743 if (!list_empty(&mod->source_list)) {
739 /* Other modules depend on us: get rid of them first. */ 744 /* Other modules depend on us: get rid of them first. */
740 ret = -EWOULDBLOCK; 745 ret = -EWOULDBLOCK;
741 goto out; 746 goto out;
@@ -779,13 +784,14 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
779 blocking_notifier_call_chain(&module_notify_list, 784 blocking_notifier_call_chain(&module_notify_list,
780 MODULE_STATE_GOING, mod); 785 MODULE_STATE_GOING, mod);
781 async_synchronize_full(); 786 async_synchronize_full();
782 mutex_lock(&module_mutex); 787
783 /* Store the name of the last unloaded module for diagnostic purposes */ 788 /* Store the name of the last unloaded module for diagnostic purposes */
784 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); 789 strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
785 ddebug_remove_module(mod->name); 790 ddebug_remove_module(mod->name);
786 free_module(mod);
787 791
788 out: 792 free_module(mod);
793 return 0;
794out:
789 mutex_unlock(&module_mutex); 795 mutex_unlock(&module_mutex);
790 return ret; 796 return ret;
791} 797}
@@ -799,9 +805,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
799 805
800 /* Always include a trailing , so userspace can differentiate 806 /* Always include a trailing , so userspace can differentiate
801 between this and the old multi-field proc format. */ 807 between this and the old multi-field proc format. */
802 list_for_each_entry(use, &mod->modules_which_use_me, list) { 808 list_for_each_entry(use, &mod->source_list, source_list) {
803 printed_something = 1; 809 printed_something = 1;
804 seq_printf(m, "%s,", use->module_which_uses->name); 810 seq_printf(m, "%s,", use->source->name);
805 } 811 }
806 812
807 if (mod->init != NULL && mod->exit == NULL) { 813 if (mod->init != NULL && mod->exit == NULL) {
@@ -880,11 +886,11 @@ static inline void module_unload_free(struct module *mod)
880{ 886{
881} 887}
882 888
883int use_module(struct module *a, struct module *b) 889int ref_module(struct module *a, struct module *b)
884{ 890{
885 return strong_try_module_get(b) == 0; 891 return strong_try_module_get(b);
886} 892}
887EXPORT_SYMBOL_GPL(use_module); 893EXPORT_SYMBOL_GPL(ref_module);
888 894
889static inline void module_unload_init(struct module *mod) 895static inline void module_unload_init(struct module *mod)
890{ 896{
@@ -1001,6 +1007,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
1001{ 1007{
1002 const unsigned long *crc; 1008 const unsigned long *crc;
1003 1009
1010 /* Since this should be found in kernel (which can't be removed),
1011 * no locking is necessary. */
1004 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, 1012 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
1005 &crc, true, false)) 1013 &crc, true, false))
1006 BUG(); 1014 BUG();
@@ -1043,29 +1051,62 @@ static inline int same_magic(const char *amagic, const char *bmagic,
1043} 1051}
1044#endif /* CONFIG_MODVERSIONS */ 1052#endif /* CONFIG_MODVERSIONS */
1045 1053
1046/* Resolve a symbol for this module. I.e. if we find one, record usage. 1054/* Resolve a symbol for this module. I.e. if we find one, record usage. */
1047 Must be holding module_mutex. */
1048static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs, 1055static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
1049 unsigned int versindex, 1056 unsigned int versindex,
1050 const char *name, 1057 const char *name,
1051 struct module *mod) 1058 struct module *mod,
1059 char ownername[])
1052{ 1060{
1053 struct module *owner; 1061 struct module *owner;
1054 const struct kernel_symbol *sym; 1062 const struct kernel_symbol *sym;
1055 const unsigned long *crc; 1063 const unsigned long *crc;
1064 int err;
1056 1065
1066 mutex_lock(&module_mutex);
1057 sym = find_symbol(name, &owner, &crc, 1067 sym = find_symbol(name, &owner, &crc,
1058 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); 1068 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
1059 /* use_module can fail due to OOM, 1069 if (!sym)
1060 or module initialization or unloading */ 1070 goto unlock;
1061 if (sym) { 1071
1062 if (!check_version(sechdrs, versindex, name, mod, crc, owner) 1072 if (!check_version(sechdrs, versindex, name, mod, crc, owner)) {
1063 || !use_module(mod, owner)) 1073 sym = ERR_PTR(-EINVAL);
1064 sym = NULL; 1074 goto getname;
1065 } 1075 }
1076
1077 err = ref_module(mod, owner);
1078 if (err) {
1079 sym = ERR_PTR(err);
1080 goto getname;
1081 }
1082
1083getname:
1084 /* We must make copy under the lock if we failed to get ref. */
1085 strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
1086unlock:
1087 mutex_unlock(&module_mutex);
1066 return sym; 1088 return sym;
1067} 1089}
1068 1090
1091static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs,
1092 unsigned int versindex,
1093 const char *name,
1094 struct module *mod)
1095{
1096 const struct kernel_symbol *ksym;
1097 char ownername[MODULE_NAME_LEN];
1098
1099 if (wait_event_interruptible_timeout(module_wq,
1100 !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name,
1101 mod, ownername)) ||
1102 PTR_ERR(ksym) != -EBUSY,
1103 30 * HZ) <= 0) {
1104 printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
1105 mod->name, ownername);
1106 }
1107 return ksym;
1108}
1109
1069/* 1110/*
1070 * /sys/module/foo/sections stuff 1111 * /sys/module/foo/sections stuff
1071 * J. Corbet <corbet@lwn.net> 1112 * J. Corbet <corbet@lwn.net>
@@ -1295,7 +1336,34 @@ static inline void remove_notes_attrs(struct module *mod)
1295#endif 1336#endif
1296 1337
1297#ifdef CONFIG_SYSFS 1338#ifdef CONFIG_SYSFS
1298int module_add_modinfo_attrs(struct module *mod) 1339static void add_usage_links(struct module *mod)
1340{
1341#ifdef CONFIG_MODULE_UNLOAD
1342 struct module_use *use;
1343 int nowarn;
1344
1345 mutex_lock(&module_mutex);
1346 list_for_each_entry(use, &mod->target_list, target_list) {
1347 nowarn = sysfs_create_link(use->target->holders_dir,
1348 &mod->mkobj.kobj, mod->name);
1349 }
1350 mutex_unlock(&module_mutex);
1351#endif
1352}
1353
1354static void del_usage_links(struct module *mod)
1355{
1356#ifdef CONFIG_MODULE_UNLOAD
1357 struct module_use *use;
1358
1359 mutex_lock(&module_mutex);
1360 list_for_each_entry(use, &mod->target_list, target_list)
1361 sysfs_remove_link(use->target->holders_dir, mod->name);
1362 mutex_unlock(&module_mutex);
1363#endif
1364}
1365
1366static int module_add_modinfo_attrs(struct module *mod)
1299{ 1367{
1300 struct module_attribute *attr; 1368 struct module_attribute *attr;
1301 struct module_attribute *temp_attr; 1369 struct module_attribute *temp_attr;
@@ -1321,7 +1389,7 @@ int module_add_modinfo_attrs(struct module *mod)
1321 return error; 1389 return error;
1322} 1390}
1323 1391
1324void module_remove_modinfo_attrs(struct module *mod) 1392static void module_remove_modinfo_attrs(struct module *mod)
1325{ 1393{
1326 struct module_attribute *attr; 1394 struct module_attribute *attr;
1327 int i; 1395 int i;
@@ -1337,7 +1405,7 @@ void module_remove_modinfo_attrs(struct module *mod)
1337 kfree(mod->modinfo_attrs); 1405 kfree(mod->modinfo_attrs);
1338} 1406}
1339 1407
1340int mod_sysfs_init(struct module *mod) 1408static int mod_sysfs_init(struct module *mod)
1341{ 1409{
1342 int err; 1410 int err;
1343 struct kobject *kobj; 1411 struct kobject *kobj;
@@ -1371,12 +1439,16 @@ out:
1371 return err; 1439 return err;
1372} 1440}
1373 1441
1374int mod_sysfs_setup(struct module *mod, 1442static int mod_sysfs_setup(struct module *mod,
1375 struct kernel_param *kparam, 1443 struct kernel_param *kparam,
1376 unsigned int num_params) 1444 unsigned int num_params)
1377{ 1445{
1378 int err; 1446 int err;
1379 1447
1448 err = mod_sysfs_init(mod);
1449 if (err)
1450 goto out;
1451
1380 mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj); 1452 mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj);
1381 if (!mod->holders_dir) { 1453 if (!mod->holders_dir) {
1382 err = -ENOMEM; 1454 err = -ENOMEM;
@@ -1391,6 +1463,8 @@ int mod_sysfs_setup(struct module *mod,
1391 if (err) 1463 if (err)
1392 goto out_unreg_param; 1464 goto out_unreg_param;
1393 1465
1466 add_usage_links(mod);
1467
1394 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); 1468 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
1395 return 0; 1469 return 0;
1396 1470
@@ -1400,6 +1474,7 @@ out_unreg_holders:
1400 kobject_put(mod->holders_dir); 1474 kobject_put(mod->holders_dir);
1401out_unreg: 1475out_unreg:
1402 kobject_put(&mod->mkobj.kobj); 1476 kobject_put(&mod->mkobj.kobj);
1477out:
1403 return err; 1478 return err;
1404} 1479}
1405 1480
@@ -1410,14 +1485,40 @@ static void mod_sysfs_fini(struct module *mod)
1410 1485
1411#else /* CONFIG_SYSFS */ 1486#else /* CONFIG_SYSFS */
1412 1487
1488static inline int mod_sysfs_init(struct module *mod)
1489{
1490 return 0;
1491}
1492
1493static inline int mod_sysfs_setup(struct module *mod,
1494 struct kernel_param *kparam,
1495 unsigned int num_params)
1496{
1497 return 0;
1498}
1499
1500static inline int module_add_modinfo_attrs(struct module *mod)
1501{
1502 return 0;
1503}
1504
1505static inline void module_remove_modinfo_attrs(struct module *mod)
1506{
1507}
1508
1413static void mod_sysfs_fini(struct module *mod) 1509static void mod_sysfs_fini(struct module *mod)
1414{ 1510{
1415} 1511}
1416 1512
1513static void del_usage_links(struct module *mod)
1514{
1515}
1516
1417#endif /* CONFIG_SYSFS */ 1517#endif /* CONFIG_SYSFS */
1418 1518
1419static void mod_kobject_remove(struct module *mod) 1519static void mod_kobject_remove(struct module *mod)
1420{ 1520{
1521 del_usage_links(mod);
1421 module_remove_modinfo_attrs(mod); 1522 module_remove_modinfo_attrs(mod);
1422 module_param_sysfs_remove(mod); 1523 module_param_sysfs_remove(mod);
1423 kobject_put(mod->mkobj.drivers_dir); 1524 kobject_put(mod->mkobj.drivers_dir);
@@ -1436,13 +1537,15 @@ static int __unlink_module(void *_mod)
1436 return 0; 1537 return 0;
1437} 1538}
1438 1539
1439/* Free a module, remove from lists, etc (must hold module_mutex). */ 1540/* Free a module, remove from lists, etc. */
1440static void free_module(struct module *mod) 1541static void free_module(struct module *mod)
1441{ 1542{
1442 trace_module_free(mod); 1543 trace_module_free(mod);
1443 1544
1444 /* Delete from various lists */ 1545 /* Delete from various lists */
1546 mutex_lock(&module_mutex);
1445 stop_machine(__unlink_module, mod, NULL); 1547 stop_machine(__unlink_module, mod, NULL);
1548 mutex_unlock(&module_mutex);
1446 remove_notes_attrs(mod); 1549 remove_notes_attrs(mod);
1447 remove_sect_attrs(mod); 1550 remove_sect_attrs(mod);
1448 mod_kobject_remove(mod); 1551 mod_kobject_remove(mod);
@@ -1493,6 +1596,8 @@ EXPORT_SYMBOL_GPL(__symbol_get);
1493/* 1596/*
1494 * Ensure that an exported symbol [global namespace] does not already exist 1597 * Ensure that an exported symbol [global namespace] does not already exist
1495 * in the kernel or in some other module's exported symbol table. 1598 * in the kernel or in some other module's exported symbol table.
1599 *
1600 * You must hold the module_mutex.
1496 */ 1601 */
1497static int verify_export_symbols(struct module *mod) 1602static int verify_export_symbols(struct module *mod)
1498{ 1603{
@@ -1558,21 +1663,23 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1558 break; 1663 break;
1559 1664
1560 case SHN_UNDEF: 1665 case SHN_UNDEF:
1561 ksym = resolve_symbol(sechdrs, versindex, 1666 ksym = resolve_symbol_wait(sechdrs, versindex,
1562 strtab + sym[i].st_name, mod); 1667 strtab + sym[i].st_name,
1668 mod);
1563 /* Ok if resolved. */ 1669 /* Ok if resolved. */
1564 if (ksym) { 1670 if (ksym && !IS_ERR(ksym)) {
1565 sym[i].st_value = ksym->value; 1671 sym[i].st_value = ksym->value;
1566 break; 1672 break;
1567 } 1673 }
1568 1674
1569 /* Ok if weak. */ 1675 /* Ok if weak. */
1570 if (ELF_ST_BIND(sym[i].st_info) == STB_WEAK) 1676 if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
1571 break; 1677 break;
1572 1678
1573 printk(KERN_WARNING "%s: Unknown symbol %s\n", 1679 printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
1574 mod->name, strtab + sym[i].st_name); 1680 mod->name, strtab + sym[i].st_name,
1575 ret = -ENOENT; 1681 PTR_ERR(ksym));
1682 ret = PTR_ERR(ksym) ?: -ENOENT;
1576 break; 1683 break;
1577 1684
1578 default: 1685 default:
@@ -1955,16 +2062,24 @@ static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
1955#endif 2062#endif
1956} 2063}
1957 2064
2065static void dynamic_debug_remove(struct _ddebug *debug)
2066{
2067 if (debug)
2068 ddebug_remove_module(debug->modname);
2069}
2070
1958static void *module_alloc_update_bounds(unsigned long size) 2071static void *module_alloc_update_bounds(unsigned long size)
1959{ 2072{
1960 void *ret = module_alloc(size); 2073 void *ret = module_alloc(size);
1961 2074
1962 if (ret) { 2075 if (ret) {
2076 mutex_lock(&module_mutex);
1963 /* Update module bounds. */ 2077 /* Update module bounds. */
1964 if ((unsigned long)ret < module_addr_min) 2078 if ((unsigned long)ret < module_addr_min)
1965 module_addr_min = (unsigned long)ret; 2079 module_addr_min = (unsigned long)ret;
1966 if ((unsigned long)ret + size > module_addr_max) 2080 if ((unsigned long)ret + size > module_addr_max)
1967 module_addr_max = (unsigned long)ret + size; 2081 module_addr_max = (unsigned long)ret + size;
2082 mutex_unlock(&module_mutex);
1968 } 2083 }
1969 return ret; 2084 return ret;
1970} 2085}
@@ -2014,6 +2129,9 @@ static noinline struct module *load_module(void __user *umod,
2014 long err = 0; 2129 long err = 0;
2015 void *ptr = NULL; /* Stops spurious gcc warning */ 2130 void *ptr = NULL; /* Stops spurious gcc warning */
2016 unsigned long symoffs, stroffs, *strmap; 2131 unsigned long symoffs, stroffs, *strmap;
2132 void __percpu *percpu;
2133 struct _ddebug *debug = NULL;
2134 unsigned int num_debug = 0;
2017 2135
2018 mm_segment_t old_fs; 2136 mm_segment_t old_fs;
2019 2137
@@ -2138,11 +2256,6 @@ static noinline struct module *load_module(void __user *umod,
2138 goto free_mod; 2256 goto free_mod;
2139 } 2257 }
2140 2258
2141 if (find_module(mod->name)) {
2142 err = -EEXIST;
2143 goto free_mod;
2144 }
2145
2146 mod->state = MODULE_STATE_COMING; 2259 mod->state = MODULE_STATE_COMING;
2147 2260
2148 /* Allow arches to frob section contents and sizes. */ 2261 /* Allow arches to frob section contents and sizes. */
@@ -2158,6 +2271,8 @@ static noinline struct module *load_module(void __user *umod,
2158 goto free_mod; 2271 goto free_mod;
2159 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2272 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2160 } 2273 }
2274 /* Keep this around for failure path. */
2275 percpu = mod_percpu(mod);
2161 2276
2162 /* Determine total sizes, and put offsets in sh_entsize. For now 2277 /* Determine total sizes, and put offsets in sh_entsize. For now
2163 this is done generically; there doesn't appear to be any 2278 this is done generically; there doesn't appear to be any
@@ -2231,11 +2346,6 @@ static noinline struct module *load_module(void __user *umod,
2231 /* Now we've moved module, initialize linked lists, etc. */ 2346 /* Now we've moved module, initialize linked lists, etc. */
2232 module_unload_init(mod); 2347 module_unload_init(mod);
2233 2348
2234 /* add kobject, so we can reference it. */
2235 err = mod_sysfs_init(mod);
2236 if (err)
2237 goto free_unload;
2238
2239 /* Set up license info based on the info section */ 2349 /* Set up license info based on the info section */
2240 set_license(mod, get_modinfo(sechdrs, infoindex, "license")); 2350 set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
2241 2351
@@ -2360,11 +2470,6 @@ static noinline struct module *load_module(void __user *umod,
2360 goto cleanup; 2470 goto cleanup;
2361 } 2471 }
2362 2472
2363 /* Find duplicate symbols */
2364 err = verify_export_symbols(mod);
2365 if (err < 0)
2366 goto cleanup;
2367
2368 /* Set up and sort exception table */ 2473 /* Set up and sort exception table */
2369 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table", 2474 mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
2370 sizeof(*mod->extable), &mod->num_exentries); 2475 sizeof(*mod->extable), &mod->num_exentries);
@@ -2379,15 +2484,9 @@ static noinline struct module *load_module(void __user *umod,
2379 kfree(strmap); 2484 kfree(strmap);
2380 strmap = NULL; 2485 strmap = NULL;
2381 2486
2382 if (!mod->taints) { 2487 if (!mod->taints)
2383 struct _ddebug *debug;
2384 unsigned int num_debug;
2385
2386 debug = section_objs(hdr, sechdrs, secstrings, "__verbose", 2488 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2387 sizeof(*debug), &num_debug); 2489 sizeof(*debug), &num_debug);
2388 if (debug)
2389 dynamic_debug_setup(debug, num_debug);
2390 }
2391 2490
2392 err = module_finalize(hdr, sechdrs, mod); 2491 err = module_finalize(hdr, sechdrs, mod);
2393 if (err < 0) 2492 if (err < 0)
@@ -2423,7 +2522,22 @@ static noinline struct module *load_module(void __user *umod,
2423 * function to insert in a way safe to concurrent readers. 2522 * function to insert in a way safe to concurrent readers.
2424 * The mutex protects against concurrent writers. 2523 * The mutex protects against concurrent writers.
2425 */ 2524 */
2525 mutex_lock(&module_mutex);
2526 if (find_module(mod->name)) {
2527 err = -EEXIST;
2528 goto unlock;
2529 }
2530
2531 if (debug)
2532 dynamic_debug_setup(debug, num_debug);
2533
2534 /* Find duplicate symbols */
2535 err = verify_export_symbols(mod);
2536 if (err < 0)
2537 goto ddebug;
2538
2426 list_add_rcu(&mod->list, &modules); 2539 list_add_rcu(&mod->list, &modules);
2540 mutex_unlock(&module_mutex);
2427 2541
2428 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL); 2542 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
2429 if (err < 0) 2543 if (err < 0)
@@ -2432,6 +2546,7 @@ static noinline struct module *load_module(void __user *umod,
2432 err = mod_sysfs_setup(mod, mod->kp, mod->num_kp); 2546 err = mod_sysfs_setup(mod, mod->kp, mod->num_kp);
2433 if (err < 0) 2547 if (err < 0)
2434 goto unlink; 2548 goto unlink;
2549
2435 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2550 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2436 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2551 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2437 2552
@@ -2444,15 +2559,17 @@ static noinline struct module *load_module(void __user *umod,
2444 return mod; 2559 return mod;
2445 2560
2446 unlink: 2561 unlink:
2562 mutex_lock(&module_mutex);
2447 /* Unlink carefully: kallsyms could be walking list. */ 2563 /* Unlink carefully: kallsyms could be walking list. */
2448 list_del_rcu(&mod->list); 2564 list_del_rcu(&mod->list);
2565 ddebug:
2566 dynamic_debug_remove(debug);
2567 unlock:
2568 mutex_unlock(&module_mutex);
2449 synchronize_sched(); 2569 synchronize_sched();
2450 module_arch_cleanup(mod); 2570 module_arch_cleanup(mod);
2451 cleanup: 2571 cleanup:
2452 free_modinfo(mod); 2572 free_modinfo(mod);
2453 kobject_del(&mod->mkobj.kobj);
2454 kobject_put(&mod->mkobj.kobj);
2455 free_unload:
2456 module_unload_free(mod); 2573 module_unload_free(mod);
2457#if defined(CONFIG_MODULE_UNLOAD) 2574#if defined(CONFIG_MODULE_UNLOAD)
2458 free_percpu(mod->refptr); 2575 free_percpu(mod->refptr);
@@ -2463,7 +2580,7 @@ static noinline struct module *load_module(void __user *umod,
2463 module_free(mod, mod->module_core); 2580 module_free(mod, mod->module_core);
2464 /* mod will be freed with core. Don't access it beyond this line! */ 2581 /* mod will be freed with core. Don't access it beyond this line! */
2465 free_percpu: 2582 free_percpu:
2466 percpu_modfree(mod); 2583 free_percpu(percpu);
2467 free_mod: 2584 free_mod:
2468 kfree(args); 2585 kfree(args);
2469 kfree(strmap); 2586 kfree(strmap);
@@ -2499,19 +2616,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2499 if (!capable(CAP_SYS_MODULE) || modules_disabled) 2616 if (!capable(CAP_SYS_MODULE) || modules_disabled)
2500 return -EPERM; 2617 return -EPERM;
2501 2618
2502 /* Only one module load at a time, please */
2503 if (mutex_lock_interruptible(&module_mutex) != 0)
2504 return -EINTR;
2505
2506 /* Do all the hard work */ 2619 /* Do all the hard work */
2507 mod = load_module(umod, len, uargs); 2620 mod = load_module(umod, len, uargs);
2508 if (IS_ERR(mod)) { 2621 if (IS_ERR(mod))
2509 mutex_unlock(&module_mutex);
2510 return PTR_ERR(mod); 2622 return PTR_ERR(mod);
2511 }
2512
2513 /* Drop lock so they can recurse */
2514 mutex_unlock(&module_mutex);
2515 2623
2516 blocking_notifier_call_chain(&module_notify_list, 2624 blocking_notifier_call_chain(&module_notify_list,
2517 MODULE_STATE_COMING, mod); 2625 MODULE_STATE_COMING, mod);
@@ -2528,9 +2636,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2528 module_put(mod); 2636 module_put(mod);
2529 blocking_notifier_call_chain(&module_notify_list, 2637 blocking_notifier_call_chain(&module_notify_list,
2530 MODULE_STATE_GOING, mod); 2638 MODULE_STATE_GOING, mod);
2531 mutex_lock(&module_mutex);
2532 free_module(mod); 2639 free_module(mod);
2533 mutex_unlock(&module_mutex);
2534 wake_up(&module_wq); 2640 wake_up(&module_wq);
2535 return ret; 2641 return ret;
2536 } 2642 }
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 632f04c57d82..4c0b7b3e6d2e 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -172,6 +172,13 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
172 struct thread_info *owner; 172 struct thread_info *owner;
173 173
174 /* 174 /*
175 * If we own the BKL, then don't spin. The owner of
176 * the mutex might be waiting on us to release the BKL.
177 */
178 if (unlikely(current->lock_depth >= 0))
179 break;
180
181 /*
175 * If there's an owner, wait for it to either 182 * If there's an owner, wait for it to either
176 * release the lock or go to sleep. 183 * release the lock or go to sleep.
177 */ 184 */
diff --git a/kernel/padata.c b/kernel/padata.c
index b1c9857f8402..fdd8ae609ce3 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -659,7 +659,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
659 err = __padata_add_cpu(pinst, cpu); 659 err = __padata_add_cpu(pinst, cpu);
660 mutex_unlock(&pinst->lock); 660 mutex_unlock(&pinst->lock);
661 if (err) 661 if (err)
662 return NOTIFY_BAD; 662 return notifier_from_errno(err);
663 break; 663 break;
664 664
665 case CPU_DOWN_PREPARE: 665 case CPU_DOWN_PREPARE:
@@ -670,7 +670,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
670 err = __padata_remove_cpu(pinst, cpu); 670 err = __padata_remove_cpu(pinst, cpu);
671 mutex_unlock(&pinst->lock); 671 mutex_unlock(&pinst->lock);
672 if (err) 672 if (err)
673 return NOTIFY_BAD; 673 return notifier_from_errno(err);
674 break; 674 break;
675 675
676 case CPU_UP_CANCELED: 676 case CPU_UP_CANCELED:
diff --git a/kernel/panic.c b/kernel/panic.c
index dbe13dbb057a..3b16cd93fa7d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -87,6 +87,7 @@ NORET_TYPE void panic(const char * fmt, ...)
87 */ 87 */
88 preempt_disable(); 88 preempt_disable();
89 89
90 console_verbose();
90 bust_spinlocks(1); 91 bust_spinlocks(1);
91 va_start(args, fmt); 92 va_start(args, fmt);
92 vsnprintf(buf, sizeof(buf), fmt, args); 93 vsnprintf(buf, sizeof(buf), fmt, args);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a4fa381db3c2..ff86c558af4c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
283static void 283static void
284list_add_event(struct perf_event *event, struct perf_event_context *ctx) 284list_add_event(struct perf_event *event, struct perf_event_context *ctx)
285{ 285{
286 struct perf_event *group_leader = event->group_leader; 286 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
287 event->attach_state |= PERF_ATTACH_CONTEXT;
287 288
288 /* 289 /*
289 * Depending on whether it is a standalone or sibling event, 290 * If we're a stand alone event or group leader, we go to the context
290 * add it straight to the context's event list, or to the group 291 * list, group events are kept attached to the group so that
291 * leader's sibling list: 292 * perf_group_detach can, at all times, locate all siblings.
292 */ 293 */
293 if (group_leader == event) { 294 if (event->group_leader == event) {
294 struct list_head *list; 295 struct list_head *list;
295 296
296 if (is_software_event(event)) 297 if (is_software_event(event))
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
298 299
299 list = ctx_group_list(event, ctx); 300 list = ctx_group_list(event, ctx);
300 list_add_tail(&event->group_entry, list); 301 list_add_tail(&event->group_entry, list);
301 } else {
302 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
303 !is_software_event(event))
304 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
305
306 list_add_tail(&event->group_entry, &group_leader->sibling_list);
307 group_leader->nr_siblings++;
308 } 302 }
309 303
310 list_add_rcu(&event->event_entry, &ctx->event_list); 304 list_add_rcu(&event->event_entry, &ctx->event_list);
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
313 ctx->nr_stat++; 307 ctx->nr_stat++;
314} 308}
315 309
310static void perf_group_attach(struct perf_event *event)
311{
312 struct perf_event *group_leader = event->group_leader;
313
314 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
315 event->attach_state |= PERF_ATTACH_GROUP;
316
317 if (group_leader == event)
318 return;
319
320 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
321 !is_software_event(event))
322 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
323
324 list_add_tail(&event->group_entry, &group_leader->sibling_list);
325 group_leader->nr_siblings++;
326}
327
316/* 328/*
317 * Remove a event from the lists for its context. 329 * Remove a event from the lists for its context.
318 * Must be called with ctx->mutex and ctx->lock held. 330 * Must be called with ctx->mutex and ctx->lock held.
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
320static void 332static void
321list_del_event(struct perf_event *event, struct perf_event_context *ctx) 333list_del_event(struct perf_event *event, struct perf_event_context *ctx)
322{ 334{
323 if (list_empty(&event->group_entry)) 335 /*
336 * We can have double detach due to exit/hot-unplug + close.
337 */
338 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
324 return; 339 return;
340
341 event->attach_state &= ~PERF_ATTACH_CONTEXT;
342
325 ctx->nr_events--; 343 ctx->nr_events--;
326 if (event->attr.inherit_stat) 344 if (event->attr.inherit_stat)
327 ctx->nr_stat--; 345 ctx->nr_stat--;
328 346
329 list_del_init(&event->group_entry);
330 list_del_rcu(&event->event_entry); 347 list_del_rcu(&event->event_entry);
331 348
332 if (event->group_leader != event) 349 if (event->group_leader == event)
333 event->group_leader->nr_siblings--; 350 list_del_init(&event->group_entry);
334 351
335 update_group_times(event); 352 update_group_times(event);
336 353
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
345 event->state = PERF_EVENT_STATE_OFF; 362 event->state = PERF_EVENT_STATE_OFF;
346} 363}
347 364
348static void 365static void perf_group_detach(struct perf_event *event)
349perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
350{ 366{
351 struct perf_event *sibling, *tmp; 367 struct perf_event *sibling, *tmp;
368 struct list_head *list = NULL;
369
370 /*
371 * We can have double detach due to exit/hot-unplug + close.
372 */
373 if (!(event->attach_state & PERF_ATTACH_GROUP))
374 return;
375
376 event->attach_state &= ~PERF_ATTACH_GROUP;
377
378 /*
379 * If this is a sibling, remove it from its group.
380 */
381 if (event->group_leader != event) {
382 list_del_init(&event->group_entry);
383 event->group_leader->nr_siblings--;
384 return;
385 }
386
387 if (!list_empty(&event->group_entry))
388 list = &event->group_entry;
352 389
353 /* 390 /*
354 * If this was a group event with sibling events then 391 * If this was a group event with sibling events then
355 * upgrade the siblings to singleton events by adding them 392 * upgrade the siblings to singleton events by adding them
356 * to the context list directly: 393 * to whatever list we are on.
357 */ 394 */
358 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { 395 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
359 struct list_head *list; 396 if (list)
360 397 list_move_tail(&sibling->group_entry, list);
361 list = ctx_group_list(event, ctx);
362 list_move_tail(&sibling->group_entry, list);
363 sibling->group_leader = sibling; 398 sibling->group_leader = sibling;
364 399
365 /* Inherit group flags from the previous leader */ 400 /* Inherit group flags from the previous leader */
@@ -652,8 +687,11 @@ group_sched_in(struct perf_event *group_event,
652 if (txn) 687 if (txn)
653 pmu->start_txn(pmu); 688 pmu->start_txn(pmu);
654 689
655 if (event_sched_in(group_event, cpuctx, ctx)) 690 if (event_sched_in(group_event, cpuctx, ctx)) {
691 if (txn)
692 pmu->cancel_txn(pmu);
656 return -EAGAIN; 693 return -EAGAIN;
694 }
657 695
658 /* 696 /*
659 * Schedule in siblings as one group (if any): 697 * Schedule in siblings as one group (if any):
@@ -675,9 +713,6 @@ group_sched_in(struct perf_event *group_event,
675 } 713 }
676 714
677group_error: 715group_error:
678 if (txn)
679 pmu->cancel_txn(pmu);
680
681 /* 716 /*
682 * Groups can be scheduled in as one unit only, so undo any 717 * Groups can be scheduled in as one unit only, so undo any
683 * partial group before returning: 718 * partial group before returning:
@@ -689,6 +724,9 @@ group_error:
689 } 724 }
690 event_sched_out(group_event, cpuctx, ctx); 725 event_sched_out(group_event, cpuctx, ctx);
691 726
727 if (txn)
728 pmu->cancel_txn(pmu);
729
692 return -EAGAIN; 730 return -EAGAIN;
693} 731}
694 732
@@ -727,6 +765,7 @@ static void add_event_to_ctx(struct perf_event *event,
727 struct perf_event_context *ctx) 765 struct perf_event_context *ctx)
728{ 766{
729 list_add_event(event, ctx); 767 list_add_event(event, ctx);
768 perf_group_attach(event);
730 event->tstamp_enabled = ctx->time; 769 event->tstamp_enabled = ctx->time;
731 event->tstamp_running = ctx->time; 770 event->tstamp_running = ctx->time;
732 event->tstamp_stopped = ctx->time; 771 event->tstamp_stopped = ctx->time;
@@ -1468,6 +1507,9 @@ do { \
1468 divisor = nsec * frequency; 1507 divisor = nsec * frequency;
1469 } 1508 }
1470 1509
1510 if (!divisor)
1511 return dividend;
1512
1471 return div64_u64(dividend, divisor); 1513 return div64_u64(dividend, divisor);
1472} 1514}
1473 1515
@@ -1490,7 +1532,7 @@ static int perf_event_start(struct perf_event *event)
1490static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 1532static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1491{ 1533{
1492 struct hw_perf_event *hwc = &event->hw; 1534 struct hw_perf_event *hwc = &event->hw;
1493 u64 period, sample_period; 1535 s64 period, sample_period;
1494 s64 delta; 1536 s64 delta;
1495 1537
1496 period = perf_calculate_period(event, nsec, count); 1538 period = perf_calculate_period(event, nsec, count);
@@ -1841,6 +1883,7 @@ static void free_event_rcu(struct rcu_head *head)
1841} 1883}
1842 1884
1843static void perf_pending_sync(struct perf_event *event); 1885static void perf_pending_sync(struct perf_event *event);
1886static void perf_mmap_data_put(struct perf_mmap_data *data);
1844 1887
1845static void free_event(struct perf_event *event) 1888static void free_event(struct perf_event *event)
1846{ 1889{
@@ -1856,9 +1899,9 @@ static void free_event(struct perf_event *event)
1856 atomic_dec(&nr_task_events); 1899 atomic_dec(&nr_task_events);
1857 } 1900 }
1858 1901
1859 if (event->output) { 1902 if (event->data) {
1860 fput(event->output->filp); 1903 perf_mmap_data_put(event->data);
1861 event->output = NULL; 1904 event->data = NULL;
1862 } 1905 }
1863 1906
1864 if (event->destroy) 1907 if (event->destroy)
@@ -1893,8 +1936,8 @@ int perf_event_release_kernel(struct perf_event *event)
1893 */ 1936 */
1894 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 1937 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
1895 raw_spin_lock_irq(&ctx->lock); 1938 raw_spin_lock_irq(&ctx->lock);
1939 perf_group_detach(event);
1896 list_del_event(event, ctx); 1940 list_del_event(event, ctx);
1897 perf_destroy_group(event, ctx);
1898 raw_spin_unlock_irq(&ctx->lock); 1941 raw_spin_unlock_irq(&ctx->lock);
1899 mutex_unlock(&ctx->mutex); 1942 mutex_unlock(&ctx->mutex);
1900 1943
@@ -2175,7 +2218,27 @@ unlock:
2175 return ret; 2218 return ret;
2176} 2219}
2177 2220
2178static int perf_event_set_output(struct perf_event *event, int output_fd); 2221static const struct file_operations perf_fops;
2222
2223static struct perf_event *perf_fget_light(int fd, int *fput_needed)
2224{
2225 struct file *file;
2226
2227 file = fget_light(fd, fput_needed);
2228 if (!file)
2229 return ERR_PTR(-EBADF);
2230
2231 if (file->f_op != &perf_fops) {
2232 fput_light(file, *fput_needed);
2233 *fput_needed = 0;
2234 return ERR_PTR(-EBADF);
2235 }
2236
2237 return file->private_data;
2238}
2239
2240static int perf_event_set_output(struct perf_event *event,
2241 struct perf_event *output_event);
2179static int perf_event_set_filter(struct perf_event *event, void __user *arg); 2242static int perf_event_set_filter(struct perf_event *event, void __user *arg);
2180 2243
2181static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2244static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2265,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2202 return perf_event_period(event, (u64 __user *)arg); 2265 return perf_event_period(event, (u64 __user *)arg);
2203 2266
2204 case PERF_EVENT_IOC_SET_OUTPUT: 2267 case PERF_EVENT_IOC_SET_OUTPUT:
2205 return perf_event_set_output(event, arg); 2268 {
2269 struct perf_event *output_event = NULL;
2270 int fput_needed = 0;
2271 int ret;
2272
2273 if (arg != -1) {
2274 output_event = perf_fget_light(arg, &fput_needed);
2275 if (IS_ERR(output_event))
2276 return PTR_ERR(output_event);
2277 }
2278
2279 ret = perf_event_set_output(event, output_event);
2280 if (output_event)
2281 fput_light(output_event->filp, fput_needed);
2282
2283 return ret;
2284 }
2206 2285
2207 case PERF_EVENT_IOC_SET_FILTER: 2286 case PERF_EVENT_IOC_SET_FILTER:
2208 return perf_event_set_filter(event, (void __user *)arg); 2287 return perf_event_set_filter(event, (void __user *)arg);
@@ -2297,11 +2376,6 @@ unlock:
2297 rcu_read_unlock(); 2376 rcu_read_unlock();
2298} 2377}
2299 2378
2300static unsigned long perf_data_size(struct perf_mmap_data *data)
2301{
2302 return data->nr_pages << (PAGE_SHIFT + data->data_order);
2303}
2304
2305#ifndef CONFIG_PERF_USE_VMALLOC 2379#ifndef CONFIG_PERF_USE_VMALLOC
2306 2380
2307/* 2381/*
@@ -2320,6 +2394,19 @@ perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
2320 return virt_to_page(data->data_pages[pgoff - 1]); 2394 return virt_to_page(data->data_pages[pgoff - 1]);
2321} 2395}
2322 2396
2397static void *perf_mmap_alloc_page(int cpu)
2398{
2399 struct page *page;
2400 int node;
2401
2402 node = (cpu == -1) ? cpu : cpu_to_node(cpu);
2403 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
2404 if (!page)
2405 return NULL;
2406
2407 return page_address(page);
2408}
2409
2323static struct perf_mmap_data * 2410static struct perf_mmap_data *
2324perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2411perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2325{ 2412{
@@ -2327,8 +2414,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2327 unsigned long size; 2414 unsigned long size;
2328 int i; 2415 int i;
2329 2416
2330 WARN_ON(atomic_read(&event->mmap_count));
2331
2332 size = sizeof(struct perf_mmap_data); 2417 size = sizeof(struct perf_mmap_data);
2333 size += nr_pages * sizeof(void *); 2418 size += nr_pages * sizeof(void *);
2334 2419
@@ -2336,17 +2421,16 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2336 if (!data) 2421 if (!data)
2337 goto fail; 2422 goto fail;
2338 2423
2339 data->user_page = (void *)get_zeroed_page(GFP_KERNEL); 2424 data->user_page = perf_mmap_alloc_page(event->cpu);
2340 if (!data->user_page) 2425 if (!data->user_page)
2341 goto fail_user_page; 2426 goto fail_user_page;
2342 2427
2343 for (i = 0; i < nr_pages; i++) { 2428 for (i = 0; i < nr_pages; i++) {
2344 data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); 2429 data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
2345 if (!data->data_pages[i]) 2430 if (!data->data_pages[i])
2346 goto fail_data_pages; 2431 goto fail_data_pages;
2347 } 2432 }
2348 2433
2349 data->data_order = 0;
2350 data->nr_pages = nr_pages; 2434 data->nr_pages = nr_pages;
2351 2435
2352 return data; 2436 return data;
@@ -2382,6 +2466,11 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2382 kfree(data); 2466 kfree(data);
2383} 2467}
2384 2468
2469static inline int page_order(struct perf_mmap_data *data)
2470{
2471 return 0;
2472}
2473
2385#else 2474#else
2386 2475
2387/* 2476/*
@@ -2390,10 +2479,15 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
2390 * Required for architectures that have d-cache aliasing issues. 2479 * Required for architectures that have d-cache aliasing issues.
2391 */ 2480 */
2392 2481
2482static inline int page_order(struct perf_mmap_data *data)
2483{
2484 return data->page_order;
2485}
2486
2393static struct page * 2487static struct page *
2394perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) 2488perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
2395{ 2489{
2396 if (pgoff > (1UL << data->data_order)) 2490 if (pgoff > (1UL << page_order(data)))
2397 return NULL; 2491 return NULL;
2398 2492
2399 return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); 2493 return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
@@ -2413,7 +2507,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
2413 int i, nr; 2507 int i, nr;
2414 2508
2415 data = container_of(work, struct perf_mmap_data, work); 2509 data = container_of(work, struct perf_mmap_data, work);
2416 nr = 1 << data->data_order; 2510 nr = 1 << page_order(data);
2417 2511
2418 base = data->user_page; 2512 base = data->user_page;
2419 for (i = 0; i < nr + 1; i++) 2513 for (i = 0; i < nr + 1; i++)
@@ -2435,8 +2529,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2435 unsigned long size; 2529 unsigned long size;
2436 void *all_buf; 2530 void *all_buf;
2437 2531
2438 WARN_ON(atomic_read(&event->mmap_count));
2439
2440 size = sizeof(struct perf_mmap_data); 2532 size = sizeof(struct perf_mmap_data);
2441 size += sizeof(void *); 2533 size += sizeof(void *);
2442 2534
@@ -2452,7 +2544,7 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2452 2544
2453 data->user_page = all_buf; 2545 data->user_page = all_buf;
2454 data->data_pages[0] = all_buf + PAGE_SIZE; 2546 data->data_pages[0] = all_buf + PAGE_SIZE;
2455 data->data_order = ilog2(nr_pages); 2547 data->page_order = ilog2(nr_pages);
2456 data->nr_pages = 1; 2548 data->nr_pages = 1;
2457 2549
2458 return data; 2550 return data;
@@ -2466,6 +2558,11 @@ fail:
2466 2558
2467#endif 2559#endif
2468 2560
2561static unsigned long perf_data_size(struct perf_mmap_data *data)
2562{
2563 return data->nr_pages << (PAGE_SHIFT + page_order(data));
2564}
2565
2469static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2566static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2470{ 2567{
2471 struct perf_event *event = vma->vm_file->private_data; 2568 struct perf_event *event = vma->vm_file->private_data;
@@ -2506,8 +2603,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2506{ 2603{
2507 long max_size = perf_data_size(data); 2604 long max_size = perf_data_size(data);
2508 2605
2509 atomic_set(&data->lock, -1);
2510
2511 if (event->attr.watermark) { 2606 if (event->attr.watermark) {
2512 data->watermark = min_t(long, max_size, 2607 data->watermark = min_t(long, max_size,
2513 event->attr.wakeup_watermark); 2608 event->attr.wakeup_watermark);
@@ -2516,7 +2611,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2516 if (!data->watermark) 2611 if (!data->watermark)
2517 data->watermark = max_size / 2; 2612 data->watermark = max_size / 2;
2518 2613
2519 2614 atomic_set(&data->refcount, 1);
2520 rcu_assign_pointer(event->data, data); 2615 rcu_assign_pointer(event->data, data);
2521} 2616}
2522 2617
@@ -2528,13 +2623,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2528 perf_mmap_data_free(data); 2623 perf_mmap_data_free(data);
2529} 2624}
2530 2625
2531static void perf_mmap_data_release(struct perf_event *event) 2626static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
2532{ 2627{
2533 struct perf_mmap_data *data = event->data; 2628 struct perf_mmap_data *data;
2534 2629
2535 WARN_ON(atomic_read(&event->mmap_count)); 2630 rcu_read_lock();
2631 data = rcu_dereference(event->data);
2632 if (data) {
2633 if (!atomic_inc_not_zero(&data->refcount))
2634 data = NULL;
2635 }
2636 rcu_read_unlock();
2637
2638 return data;
2639}
2640
2641static void perf_mmap_data_put(struct perf_mmap_data *data)
2642{
2643 if (!atomic_dec_and_test(&data->refcount))
2644 return;
2536 2645
2537 rcu_assign_pointer(event->data, NULL);
2538 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2646 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
2539} 2647}
2540 2648
@@ -2549,15 +2657,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2549{ 2657{
2550 struct perf_event *event = vma->vm_file->private_data; 2658 struct perf_event *event = vma->vm_file->private_data;
2551 2659
2552 WARN_ON_ONCE(event->ctx->parent_ctx);
2553 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2660 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2554 unsigned long size = perf_data_size(event->data); 2661 unsigned long size = perf_data_size(event->data);
2555 struct user_struct *user = current_user(); 2662 struct user_struct *user = event->mmap_user;
2663 struct perf_mmap_data *data = event->data;
2556 2664
2557 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2665 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2558 vma->vm_mm->locked_vm -= event->data->nr_locked; 2666 vma->vm_mm->locked_vm -= event->mmap_locked;
2559 perf_mmap_data_release(event); 2667 rcu_assign_pointer(event->data, NULL);
2560 mutex_unlock(&event->mmap_mutex); 2668 mutex_unlock(&event->mmap_mutex);
2669
2670 perf_mmap_data_put(data);
2671 free_uid(user);
2561 } 2672 }
2562} 2673}
2563 2674
@@ -2580,6 +2691,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2580 long user_extra, extra; 2691 long user_extra, extra;
2581 int ret = 0; 2692 int ret = 0;
2582 2693
2694 /*
2695 * Don't allow mmap() of inherited per-task counters. This would
2696 * create a performance issue due to all children writing to the
2697 * same buffer.
2698 */
2699 if (event->cpu == -1 && event->attr.inherit)
2700 return -EINVAL;
2701
2583 if (!(vma->vm_flags & VM_SHARED)) 2702 if (!(vma->vm_flags & VM_SHARED))
2584 return -EINVAL; 2703 return -EINVAL;
2585 2704
@@ -2601,13 +2720,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2601 2720
2602 WARN_ON_ONCE(event->ctx->parent_ctx); 2721 WARN_ON_ONCE(event->ctx->parent_ctx);
2603 mutex_lock(&event->mmap_mutex); 2722 mutex_lock(&event->mmap_mutex);
2604 if (event->output) { 2723 if (event->data) {
2605 ret = -EINVAL; 2724 if (event->data->nr_pages == nr_pages)
2606 goto unlock; 2725 atomic_inc(&event->data->refcount);
2607 } 2726 else
2608
2609 if (atomic_inc_not_zero(&event->mmap_count)) {
2610 if (nr_pages != event->data->nr_pages)
2611 ret = -EINVAL; 2727 ret = -EINVAL;
2612 goto unlock; 2728 goto unlock;
2613 } 2729 }
@@ -2639,21 +2755,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2639 WARN_ON(event->data); 2755 WARN_ON(event->data);
2640 2756
2641 data = perf_mmap_data_alloc(event, nr_pages); 2757 data = perf_mmap_data_alloc(event, nr_pages);
2642 ret = -ENOMEM; 2758 if (!data) {
2643 if (!data) 2759 ret = -ENOMEM;
2644 goto unlock; 2760 goto unlock;
2761 }
2645 2762
2646 ret = 0;
2647 perf_mmap_data_init(event, data); 2763 perf_mmap_data_init(event, data);
2648
2649 atomic_set(&event->mmap_count, 1);
2650 atomic_long_add(user_extra, &user->locked_vm);
2651 vma->vm_mm->locked_vm += extra;
2652 event->data->nr_locked = extra;
2653 if (vma->vm_flags & VM_WRITE) 2764 if (vma->vm_flags & VM_WRITE)
2654 event->data->writable = 1; 2765 event->data->writable = 1;
2655 2766
2767 atomic_long_add(user_extra, &user->locked_vm);
2768 event->mmap_locked = extra;
2769 event->mmap_user = get_current_user();
2770 vma->vm_mm->locked_vm += event->mmap_locked;
2771
2656unlock: 2772unlock:
2773 if (!ret)
2774 atomic_inc(&event->mmap_count);
2657 mutex_unlock(&event->mmap_mutex); 2775 mutex_unlock(&event->mmap_mutex);
2658 2776
2659 vma->vm_flags |= VM_RESERVED; 2777 vma->vm_flags |= VM_RESERVED;
@@ -2885,127 +3003,87 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
2885} 3003}
2886 3004
2887/* 3005/*
2888 * Curious locking construct.
2889 *
2890 * We need to ensure a later event_id doesn't publish a head when a former 3006 * We need to ensure a later event_id doesn't publish a head when a former
2891 * event_id isn't done writing. However since we need to deal with NMIs we 3007 * event isn't done writing. However since we need to deal with NMIs we
2892 * cannot fully serialize things. 3008 * cannot fully serialize things.
2893 * 3009 *
2894 * What we do is serialize between CPUs so we only have to deal with NMI
2895 * nesting on a single CPU.
2896 *
2897 * We only publish the head (and generate a wakeup) when the outer-most 3010 * We only publish the head (and generate a wakeup) when the outer-most
2898 * event_id completes. 3011 * event completes.
2899 */ 3012 */
2900static void perf_output_lock(struct perf_output_handle *handle) 3013static void perf_output_get_handle(struct perf_output_handle *handle)
2901{ 3014{
2902 struct perf_mmap_data *data = handle->data; 3015 struct perf_mmap_data *data = handle->data;
2903 int cur, cpu = get_cpu();
2904
2905 handle->locked = 0;
2906 3016
2907 for (;;) { 3017 preempt_disable();
2908 cur = atomic_cmpxchg(&data->lock, -1, cpu); 3018 local_inc(&data->nest);
2909 if (cur == -1) { 3019 handle->wakeup = local_read(&data->wakeup);
2910 handle->locked = 1;
2911 break;
2912 }
2913 if (cur == cpu)
2914 break;
2915
2916 cpu_relax();
2917 }
2918} 3020}
2919 3021
2920static void perf_output_unlock(struct perf_output_handle *handle) 3022static void perf_output_put_handle(struct perf_output_handle *handle)
2921{ 3023{
2922 struct perf_mmap_data *data = handle->data; 3024 struct perf_mmap_data *data = handle->data;
2923 unsigned long head; 3025 unsigned long head;
2924 int cpu;
2925
2926 data->done_head = data->head;
2927
2928 if (!handle->locked)
2929 goto out;
2930 3026
2931again: 3027again:
2932 /* 3028 head = local_read(&data->head);
2933 * The xchg implies a full barrier that ensures all writes are done
2934 * before we publish the new head, matched by a rmb() in userspace when
2935 * reading this position.
2936 */
2937 while ((head = atomic_long_xchg(&data->done_head, 0)))
2938 data->user_page->data_head = head;
2939 3029
2940 /* 3030 /*
2941 * NMI can happen here, which means we can miss a done_head update. 3031 * IRQ/NMI can happen here, which means we can miss a head update.
2942 */ 3032 */
2943 3033
2944 cpu = atomic_xchg(&data->lock, -1); 3034 if (!local_dec_and_test(&data->nest))
2945 WARN_ON_ONCE(cpu != smp_processor_id()); 3035 goto out;
2946 3036
2947 /* 3037 /*
2948 * Therefore we have to validate we did not indeed do so. 3038 * Publish the known good head. Rely on the full barrier implied
3039 * by atomic_dec_and_test() order the data->head read and this
3040 * write.
2949 */ 3041 */
2950 if (unlikely(atomic_long_read(&data->done_head))) { 3042 data->user_page->data_head = head;
2951 /*
2952 * Since we had it locked, we can lock it again.
2953 */
2954 while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
2955 cpu_relax();
2956 3043
3044 /*
3045 * Now check if we missed an update, rely on the (compiler)
3046 * barrier in atomic_dec_and_test() to re-read data->head.
3047 */
3048 if (unlikely(head != local_read(&data->head))) {
3049 local_inc(&data->nest);
2957 goto again; 3050 goto again;
2958 } 3051 }
2959 3052
2960 if (atomic_xchg(&data->wakeup, 0)) 3053 if (handle->wakeup != local_read(&data->wakeup))
2961 perf_output_wakeup(handle); 3054 perf_output_wakeup(handle);
2962out: 3055
2963 put_cpu(); 3056 out:
3057 preempt_enable();
2964} 3058}
2965 3059
2966void perf_output_copy(struct perf_output_handle *handle, 3060__always_inline void perf_output_copy(struct perf_output_handle *handle,
2967 const void *buf, unsigned int len) 3061 const void *buf, unsigned int len)
2968{ 3062{
2969 unsigned int pages_mask;
2970 unsigned long offset;
2971 unsigned int size;
2972 void **pages;
2973
2974 offset = handle->offset;
2975 pages_mask = handle->data->nr_pages - 1;
2976 pages = handle->data->data_pages;
2977
2978 do { 3063 do {
2979 unsigned long page_offset; 3064 unsigned long size = min_t(unsigned long, handle->size, len);
2980 unsigned long page_size;
2981 int nr;
2982 3065
2983 nr = (offset >> PAGE_SHIFT) & pages_mask; 3066 memcpy(handle->addr, buf, size);
2984 page_size = 1UL << (handle->data->data_order + PAGE_SHIFT);
2985 page_offset = offset & (page_size - 1);
2986 size = min_t(unsigned int, page_size - page_offset, len);
2987 3067
2988 memcpy(pages[nr] + page_offset, buf, size); 3068 len -= size;
3069 handle->addr += size;
3070 buf += size;
3071 handle->size -= size;
3072 if (!handle->size) {
3073 struct perf_mmap_data *data = handle->data;
2989 3074
2990 len -= size; 3075 handle->page++;
2991 buf += size; 3076 handle->page &= data->nr_pages - 1;
2992 offset += size; 3077 handle->addr = data->data_pages[handle->page];
3078 handle->size = PAGE_SIZE << page_order(data);
3079 }
2993 } while (len); 3080 } while (len);
2994
2995 handle->offset = offset;
2996
2997 /*
2998 * Check we didn't copy past our reservation window, taking the
2999 * possible unsigned int wrap into account.
3000 */
3001 WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
3002} 3081}
3003 3082
3004int perf_output_begin(struct perf_output_handle *handle, 3083int perf_output_begin(struct perf_output_handle *handle,
3005 struct perf_event *event, unsigned int size, 3084 struct perf_event *event, unsigned int size,
3006 int nmi, int sample) 3085 int nmi, int sample)
3007{ 3086{
3008 struct perf_event *output_event;
3009 struct perf_mmap_data *data; 3087 struct perf_mmap_data *data;
3010 unsigned long tail, offset, head; 3088 unsigned long tail, offset, head;
3011 int have_lost; 3089 int have_lost;
@@ -3022,10 +3100,6 @@ int perf_output_begin(struct perf_output_handle *handle,
3022 if (event->parent) 3100 if (event->parent)
3023 event = event->parent; 3101 event = event->parent;
3024 3102
3025 output_event = rcu_dereference(event->output);
3026 if (output_event)
3027 event = output_event;
3028
3029 data = rcu_dereference(event->data); 3103 data = rcu_dereference(event->data);
3030 if (!data) 3104 if (!data)
3031 goto out; 3105 goto out;
@@ -3036,13 +3110,13 @@ int perf_output_begin(struct perf_output_handle *handle,
3036 handle->sample = sample; 3110 handle->sample = sample;
3037 3111
3038 if (!data->nr_pages) 3112 if (!data->nr_pages)
3039 goto fail; 3113 goto out;
3040 3114
3041 have_lost = atomic_read(&data->lost); 3115 have_lost = local_read(&data->lost);
3042 if (have_lost) 3116 if (have_lost)
3043 size += sizeof(lost_event); 3117 size += sizeof(lost_event);
3044 3118
3045 perf_output_lock(handle); 3119 perf_output_get_handle(handle);
3046 3120
3047 do { 3121 do {
3048 /* 3122 /*
@@ -3052,24 +3126,28 @@ int perf_output_begin(struct perf_output_handle *handle,
3052 */ 3126 */
3053 tail = ACCESS_ONCE(data->user_page->data_tail); 3127 tail = ACCESS_ONCE(data->user_page->data_tail);
3054 smp_rmb(); 3128 smp_rmb();
3055 offset = head = atomic_long_read(&data->head); 3129 offset = head = local_read(&data->head);
3056 head += size; 3130 head += size;
3057 if (unlikely(!perf_output_space(data, tail, offset, head))) 3131 if (unlikely(!perf_output_space(data, tail, offset, head)))
3058 goto fail; 3132 goto fail;
3059 } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); 3133 } while (local_cmpxchg(&data->head, offset, head) != offset);
3060 3134
3061 handle->offset = offset; 3135 if (head - local_read(&data->wakeup) > data->watermark)
3062 handle->head = head; 3136 local_add(data->watermark, &data->wakeup);
3063 3137
3064 if (head - tail > data->watermark) 3138 handle->page = offset >> (PAGE_SHIFT + page_order(data));
3065 atomic_set(&data->wakeup, 1); 3139 handle->page &= data->nr_pages - 1;
3140 handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
3141 handle->addr = data->data_pages[handle->page];
3142 handle->addr += handle->size;
3143 handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
3066 3144
3067 if (have_lost) { 3145 if (have_lost) {
3068 lost_event.header.type = PERF_RECORD_LOST; 3146 lost_event.header.type = PERF_RECORD_LOST;
3069 lost_event.header.misc = 0; 3147 lost_event.header.misc = 0;
3070 lost_event.header.size = sizeof(lost_event); 3148 lost_event.header.size = sizeof(lost_event);
3071 lost_event.id = event->id; 3149 lost_event.id = event->id;
3072 lost_event.lost = atomic_xchg(&data->lost, 0); 3150 lost_event.lost = local_xchg(&data->lost, 0);
3073 3151
3074 perf_output_put(handle, lost_event); 3152 perf_output_put(handle, lost_event);
3075 } 3153 }
@@ -3077,8 +3155,8 @@ int perf_output_begin(struct perf_output_handle *handle,
3077 return 0; 3155 return 0;
3078 3156
3079fail: 3157fail:
3080 atomic_inc(&data->lost); 3158 local_inc(&data->lost);
3081 perf_output_unlock(handle); 3159 perf_output_put_handle(handle);
3082out: 3160out:
3083 rcu_read_unlock(); 3161 rcu_read_unlock();
3084 3162
@@ -3093,14 +3171,14 @@ void perf_output_end(struct perf_output_handle *handle)
3093 int wakeup_events = event->attr.wakeup_events; 3171 int wakeup_events = event->attr.wakeup_events;
3094 3172
3095 if (handle->sample && wakeup_events) { 3173 if (handle->sample && wakeup_events) {
3096 int events = atomic_inc_return(&data->events); 3174 int events = local_inc_return(&data->events);
3097 if (events >= wakeup_events) { 3175 if (events >= wakeup_events) {
3098 atomic_sub(wakeup_events, &data->events); 3176 local_sub(wakeup_events, &data->events);
3099 atomic_set(&data->wakeup, 1); 3177 local_inc(&data->wakeup);
3100 } 3178 }
3101 } 3179 }
3102 3180
3103 perf_output_unlock(handle); 3181 perf_output_put_handle(handle);
3104 rcu_read_unlock(); 3182 rcu_read_unlock();
3105} 3183}
3106 3184
@@ -3436,22 +3514,13 @@ static void perf_event_task_output(struct perf_event *event,
3436{ 3514{
3437 struct perf_output_handle handle; 3515 struct perf_output_handle handle;
3438 struct task_struct *task = task_event->task; 3516 struct task_struct *task = task_event->task;
3439 unsigned long flags;
3440 int size, ret; 3517 int size, ret;
3441 3518
3442 /*
3443 * If this CPU attempts to acquire an rq lock held by a CPU spinning
3444 * in perf_output_lock() from interrupt context, it's game over.
3445 */
3446 local_irq_save(flags);
3447
3448 size = task_event->event_id.header.size; 3519 size = task_event->event_id.header.size;
3449 ret = perf_output_begin(&handle, event, size, 0, 0); 3520 ret = perf_output_begin(&handle, event, size, 0, 0);
3450 3521
3451 if (ret) { 3522 if (ret)
3452 local_irq_restore(flags);
3453 return; 3523 return;
3454 }
3455 3524
3456 task_event->event_id.pid = perf_event_pid(event, task); 3525 task_event->event_id.pid = perf_event_pid(event, task);
3457 task_event->event_id.ppid = perf_event_pid(event, current); 3526 task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3462,7 +3531,6 @@ static void perf_event_task_output(struct perf_event *event,
3462 perf_output_put(&handle, task_event->event_id); 3531 perf_output_put(&handle, task_event->event_id);
3463 3532
3464 perf_output_end(&handle); 3533 perf_output_end(&handle);
3465 local_irq_restore(flags);
3466} 3534}
3467 3535
3468static int perf_event_task_match(struct perf_event *event) 3536static int perf_event_task_match(struct perf_event *event)
@@ -3990,13 +4058,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3990 } 4058 }
3991} 4059}
3992 4060
3993static void perf_swevent_unthrottle(struct perf_event *event)
3994{
3995 /*
3996 * Nothing to do, we already reset hwc->interrupts.
3997 */
3998}
3999
4000static void perf_swevent_add(struct perf_event *event, u64 nr, 4061static void perf_swevent_add(struct perf_event *event, u64 nr,
4001 int nmi, struct perf_sample_data *data, 4062 int nmi, struct perf_sample_data *data,
4002 struct pt_regs *regs) 4063 struct pt_regs *regs)
@@ -4020,9 +4081,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4020 perf_swevent_overflow(event, 0, nmi, data, regs); 4081 perf_swevent_overflow(event, 0, nmi, data, regs);
4021} 4082}
4022 4083
4023static int perf_tp_event_match(struct perf_event *event,
4024 struct perf_sample_data *data);
4025
4026static int perf_exclude_event(struct perf_event *event, 4084static int perf_exclude_event(struct perf_event *event,
4027 struct pt_regs *regs) 4085 struct pt_regs *regs)
4028{ 4086{
@@ -4052,10 +4110,6 @@ static int perf_swevent_match(struct perf_event *event,
4052 if (perf_exclude_event(event, regs)) 4110 if (perf_exclude_event(event, regs))
4053 return 0; 4111 return 0;
4054 4112
4055 if (event->attr.type == PERF_TYPE_TRACEPOINT &&
4056 !perf_tp_event_match(event, data))
4057 return 0;
4058
4059 return 1; 4113 return 1;
4060} 4114}
4061 4115
@@ -4066,19 +4120,46 @@ static inline u64 swevent_hash(u64 type, u32 event_id)
4066 return hash_64(val, SWEVENT_HLIST_BITS); 4120 return hash_64(val, SWEVENT_HLIST_BITS);
4067} 4121}
4068 4122
4069static struct hlist_head * 4123static inline struct hlist_head *
4070find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id) 4124__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
4071{ 4125{
4072 u64 hash; 4126 u64 hash = swevent_hash(type, event_id);
4073 struct swevent_hlist *hlist;
4074 4127
4075 hash = swevent_hash(type, event_id); 4128 return &hlist->heads[hash];
4129}
4130
4131/* For the read side: events when they trigger */
4132static inline struct hlist_head *
4133find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
4134{
4135 struct swevent_hlist *hlist;
4076 4136
4077 hlist = rcu_dereference(ctx->swevent_hlist); 4137 hlist = rcu_dereference(ctx->swevent_hlist);
4078 if (!hlist) 4138 if (!hlist)
4079 return NULL; 4139 return NULL;
4080 4140
4081 return &hlist->heads[hash]; 4141 return __find_swevent_head(hlist, type, event_id);
4142}
4143
4144/* For the event head insertion and removal in the hlist */
4145static inline struct hlist_head *
4146find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
4147{
4148 struct swevent_hlist *hlist;
4149 u32 event_id = event->attr.config;
4150 u64 type = event->attr.type;
4151
4152 /*
4153 * Event scheduling is always serialized against hlist allocation
4154 * and release. Which makes the protected version suitable here.
4155 * The context lock guarantees that.
4156 */
4157 hlist = rcu_dereference_protected(ctx->swevent_hlist,
4158 lockdep_is_held(&event->ctx->lock));
4159 if (!hlist)
4160 return NULL;
4161
4162 return __find_swevent_head(hlist, type, event_id);
4082} 4163}
4083 4164
4084static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 4165static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
@@ -4095,7 +4176,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4095 4176
4096 rcu_read_lock(); 4177 rcu_read_lock();
4097 4178
4098 head = find_swevent_head(cpuctx, type, event_id); 4179 head = find_swevent_head_rcu(cpuctx, type, event_id);
4099 4180
4100 if (!head) 4181 if (!head)
4101 goto end; 4182 goto end;
@@ -4110,7 +4191,7 @@ end:
4110 4191
4111int perf_swevent_get_recursion_context(void) 4192int perf_swevent_get_recursion_context(void)
4112{ 4193{
4113 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 4194 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
4114 int rctx; 4195 int rctx;
4115 4196
4116 if (in_nmi()) 4197 if (in_nmi())
@@ -4122,10 +4203,8 @@ int perf_swevent_get_recursion_context(void)
4122 else 4203 else
4123 rctx = 0; 4204 rctx = 0;
4124 4205
4125 if (cpuctx->recursion[rctx]) { 4206 if (cpuctx->recursion[rctx])
4126 put_cpu_var(perf_cpu_context);
4127 return -1; 4207 return -1;
4128 }
4129 4208
4130 cpuctx->recursion[rctx]++; 4209 cpuctx->recursion[rctx]++;
4131 barrier(); 4210 barrier();
@@ -4139,7 +4218,6 @@ void perf_swevent_put_recursion_context(int rctx)
4139 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4218 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
4140 barrier(); 4219 barrier();
4141 cpuctx->recursion[rctx]--; 4220 cpuctx->recursion[rctx]--;
4142 put_cpu_var(perf_cpu_context);
4143} 4221}
4144EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); 4222EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
4145 4223
@@ -4150,6 +4228,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4150 struct perf_sample_data data; 4228 struct perf_sample_data data;
4151 int rctx; 4229 int rctx;
4152 4230
4231 preempt_disable_notrace();
4153 rctx = perf_swevent_get_recursion_context(); 4232 rctx = perf_swevent_get_recursion_context();
4154 if (rctx < 0) 4233 if (rctx < 0)
4155 return; 4234 return;
@@ -4159,6 +4238,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4159 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); 4238 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
4160 4239
4161 perf_swevent_put_recursion_context(rctx); 4240 perf_swevent_put_recursion_context(rctx);
4241 preempt_enable_notrace();
4162} 4242}
4163 4243
4164static void perf_swevent_read(struct perf_event *event) 4244static void perf_swevent_read(struct perf_event *event)
@@ -4178,7 +4258,7 @@ static int perf_swevent_enable(struct perf_event *event)
4178 perf_swevent_set_period(event); 4258 perf_swevent_set_period(event);
4179 } 4259 }
4180 4260
4181 head = find_swevent_head(cpuctx, event->attr.type, event->attr.config); 4261 head = find_swevent_head(cpuctx, event);
4182 if (WARN_ON_ONCE(!head)) 4262 if (WARN_ON_ONCE(!head))
4183 return -EINVAL; 4263 return -EINVAL;
4184 4264
@@ -4192,11 +4272,22 @@ static void perf_swevent_disable(struct perf_event *event)
4192 hlist_del_rcu(&event->hlist_entry); 4272 hlist_del_rcu(&event->hlist_entry);
4193} 4273}
4194 4274
4275static void perf_swevent_void(struct perf_event *event)
4276{
4277}
4278
4279static int perf_swevent_int(struct perf_event *event)
4280{
4281 return 0;
4282}
4283
4195static const struct pmu perf_ops_generic = { 4284static const struct pmu perf_ops_generic = {
4196 .enable = perf_swevent_enable, 4285 .enable = perf_swevent_enable,
4197 .disable = perf_swevent_disable, 4286 .disable = perf_swevent_disable,
4287 .start = perf_swevent_int,
4288 .stop = perf_swevent_void,
4198 .read = perf_swevent_read, 4289 .read = perf_swevent_read,
4199 .unthrottle = perf_swevent_unthrottle, 4290 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4200}; 4291};
4201 4292
4202/* 4293/*
@@ -4366,6 +4457,14 @@ static const struct pmu perf_ops_task_clock = {
4366 .read = task_clock_perf_event_read, 4457 .read = task_clock_perf_event_read,
4367}; 4458};
4368 4459
4460/* Deref the hlist from the update side */
4461static inline struct swevent_hlist *
4462swevent_hlist_deref(struct perf_cpu_context *cpuctx)
4463{
4464 return rcu_dereference_protected(cpuctx->swevent_hlist,
4465 lockdep_is_held(&cpuctx->hlist_mutex));
4466}
4467
4369static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) 4468static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4370{ 4469{
4371 struct swevent_hlist *hlist; 4470 struct swevent_hlist *hlist;
@@ -4376,12 +4475,11 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4376 4475
4377static void swevent_hlist_release(struct perf_cpu_context *cpuctx) 4476static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
4378{ 4477{
4379 struct swevent_hlist *hlist; 4478 struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx);
4380 4479
4381 if (!cpuctx->swevent_hlist) 4480 if (!hlist)
4382 return; 4481 return;
4383 4482
4384 hlist = cpuctx->swevent_hlist;
4385 rcu_assign_pointer(cpuctx->swevent_hlist, NULL); 4483 rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
4386 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); 4484 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
4387} 4485}
@@ -4418,7 +4516,7 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4418 4516
4419 mutex_lock(&cpuctx->hlist_mutex); 4517 mutex_lock(&cpuctx->hlist_mutex);
4420 4518
4421 if (!cpuctx->swevent_hlist && cpu_online(cpu)) { 4519 if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) {
4422 struct swevent_hlist *hlist; 4520 struct swevent_hlist *hlist;
4423 4521
4424 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 4522 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -4467,10 +4565,48 @@ static int swevent_hlist_get(struct perf_event *event)
4467 4565
4468#ifdef CONFIG_EVENT_TRACING 4566#ifdef CONFIG_EVENT_TRACING
4469 4567
4470void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4568static const struct pmu perf_ops_tracepoint = {
4471 int entry_size, struct pt_regs *regs) 4569 .enable = perf_trace_enable,
4570 .disable = perf_trace_disable,
4571 .start = perf_swevent_int,
4572 .stop = perf_swevent_void,
4573 .read = perf_swevent_read,
4574 .unthrottle = perf_swevent_void,
4575};
4576
4577static int perf_tp_filter_match(struct perf_event *event,
4578 struct perf_sample_data *data)
4579{
4580 void *record = data->raw->data;
4581
4582 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4583 return 1;
4584 return 0;
4585}
4586
4587static int perf_tp_event_match(struct perf_event *event,
4588 struct perf_sample_data *data,
4589 struct pt_regs *regs)
4590{
4591 /*
4592 * All tracepoints are from kernel-space.
4593 */
4594 if (event->attr.exclude_kernel)
4595 return 0;
4596
4597 if (!perf_tp_filter_match(event, data))
4598 return 0;
4599
4600 return 1;
4601}
4602
4603void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4604 struct pt_regs *regs, struct hlist_head *head)
4472{ 4605{
4473 struct perf_sample_data data; 4606 struct perf_sample_data data;
4607 struct perf_event *event;
4608 struct hlist_node *node;
4609
4474 struct perf_raw_record raw = { 4610 struct perf_raw_record raw = {
4475 .size = entry_size, 4611 .size = entry_size,
4476 .data = record, 4612 .data = record,
@@ -4479,26 +4615,18 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4479 perf_sample_data_init(&data, addr); 4615 perf_sample_data_init(&data, addr);
4480 data.raw = &raw; 4616 data.raw = &raw;
4481 4617
4482 /* Trace events already protected against recursion */ 4618 rcu_read_lock();
4483 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4619 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4484 &data, regs); 4620 if (perf_tp_event_match(event, &data, regs))
4621 perf_swevent_add(event, count, 1, &data, regs);
4622 }
4623 rcu_read_unlock();
4485} 4624}
4486EXPORT_SYMBOL_GPL(perf_tp_event); 4625EXPORT_SYMBOL_GPL(perf_tp_event);
4487 4626
4488static int perf_tp_event_match(struct perf_event *event,
4489 struct perf_sample_data *data)
4490{
4491 void *record = data->raw->data;
4492
4493 if (likely(!event->filter) || filter_match_preds(event->filter, record))
4494 return 1;
4495 return 0;
4496}
4497
4498static void tp_perf_event_destroy(struct perf_event *event) 4627static void tp_perf_event_destroy(struct perf_event *event)
4499{ 4628{
4500 perf_trace_disable(event->attr.config); 4629 perf_trace_destroy(event);
4501 swevent_hlist_put(event);
4502} 4630}
4503 4631
4504static const struct pmu *tp_perf_event_init(struct perf_event *event) 4632static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4514,17 +4642,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4514 !capable(CAP_SYS_ADMIN)) 4642 !capable(CAP_SYS_ADMIN))
4515 return ERR_PTR(-EPERM); 4643 return ERR_PTR(-EPERM);
4516 4644
4517 if (perf_trace_enable(event->attr.config)) 4645 err = perf_trace_init(event);
4646 if (err)
4518 return NULL; 4647 return NULL;
4519 4648
4520 event->destroy = tp_perf_event_destroy; 4649 event->destroy = tp_perf_event_destroy;
4521 err = swevent_hlist_get(event);
4522 if (err) {
4523 perf_trace_disable(event->attr.config);
4524 return ERR_PTR(err);
4525 }
4526 4650
4527 return &perf_ops_generic; 4651 return &perf_ops_tracepoint;
4528} 4652}
4529 4653
4530static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4654static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4552,12 +4676,6 @@ static void perf_event_free_filter(struct perf_event *event)
4552 4676
4553#else 4677#else
4554 4678
4555static int perf_tp_event_match(struct perf_event *event,
4556 struct perf_sample_data *data)
4557{
4558 return 1;
4559}
4560
4561static const struct pmu *tp_perf_event_init(struct perf_event *event) 4679static const struct pmu *tp_perf_event_init(struct perf_event *event)
4562{ 4680{
4563 return NULL; 4681 return NULL;
@@ -4886,54 +5004,53 @@ err_size:
4886 goto out; 5004 goto out;
4887} 5005}
4888 5006
4889static int perf_event_set_output(struct perf_event *event, int output_fd) 5007static int
5008perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
4890{ 5009{
4891 struct perf_event *output_event = NULL; 5010 struct perf_mmap_data *data = NULL, *old_data = NULL;
4892 struct file *output_file = NULL;
4893 struct perf_event *old_output;
4894 int fput_needed = 0;
4895 int ret = -EINVAL; 5011 int ret = -EINVAL;
4896 5012
4897 if (!output_fd) 5013 if (!output_event)
4898 goto set; 5014 goto set;
4899 5015
4900 output_file = fget_light(output_fd, &fput_needed); 5016 /* don't allow circular references */
4901 if (!output_file) 5017 if (event == output_event)
4902 return -EBADF;
4903
4904 if (output_file->f_op != &perf_fops)
4905 goto out; 5018 goto out;
4906 5019
4907 output_event = output_file->private_data; 5020 /*
4908 5021 * Don't allow cross-cpu buffers
4909 /* Don't chain output fds */ 5022 */
4910 if (output_event->output) 5023 if (output_event->cpu != event->cpu)
4911 goto out; 5024 goto out;
4912 5025
4913 /* Don't set an output fd when we already have an output channel */ 5026 /*
4914 if (event->data) 5027 * If its not a per-cpu buffer, it must be the same task.
5028 */
5029 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
4915 goto out; 5030 goto out;
4916 5031
4917 atomic_long_inc(&output_file->f_count);
4918
4919set: 5032set:
4920 mutex_lock(&event->mmap_mutex); 5033 mutex_lock(&event->mmap_mutex);
4921 old_output = event->output; 5034 /* Can't redirect output if we've got an active mmap() */
4922 rcu_assign_pointer(event->output, output_event); 5035 if (atomic_read(&event->mmap_count))
4923 mutex_unlock(&event->mmap_mutex); 5036 goto unlock;
4924 5037
4925 if (old_output) { 5038 if (output_event) {
4926 /* 5039 /* get the buffer we want to redirect to */
4927 * we need to make sure no existing perf_output_*() 5040 data = perf_mmap_data_get(output_event);
4928 * is still referencing this event. 5041 if (!data)
4929 */ 5042 goto unlock;
4930 synchronize_rcu();
4931 fput(old_output->filp);
4932 } 5043 }
4933 5044
5045 old_data = event->data;
5046 rcu_assign_pointer(event->data, data);
4934 ret = 0; 5047 ret = 0;
5048unlock:
5049 mutex_unlock(&event->mmap_mutex);
5050
5051 if (old_data)
5052 perf_mmap_data_put(old_data);
4935out: 5053out:
4936 fput_light(output_file, fput_needed);
4937 return ret; 5054 return ret;
4938} 5055}
4939 5056
@@ -4949,13 +5066,13 @@ SYSCALL_DEFINE5(perf_event_open,
4949 struct perf_event_attr __user *, attr_uptr, 5066 struct perf_event_attr __user *, attr_uptr,
4950 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5067 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
4951{ 5068{
4952 struct perf_event *event, *group_leader; 5069 struct perf_event *event, *group_leader = NULL, *output_event = NULL;
4953 struct perf_event_attr attr; 5070 struct perf_event_attr attr;
4954 struct perf_event_context *ctx; 5071 struct perf_event_context *ctx;
4955 struct file *event_file = NULL; 5072 struct file *event_file = NULL;
4956 struct file *group_file = NULL; 5073 struct file *group_file = NULL;
5074 int event_fd;
4957 int fput_needed = 0; 5075 int fput_needed = 0;
4958 int fput_needed2 = 0;
4959 int err; 5076 int err;
4960 5077
4961 /* for future expandability... */ 5078 /* for future expandability... */
@@ -4976,26 +5093,38 @@ SYSCALL_DEFINE5(perf_event_open,
4976 return -EINVAL; 5093 return -EINVAL;
4977 } 5094 }
4978 5095
5096 event_fd = get_unused_fd_flags(O_RDWR);
5097 if (event_fd < 0)
5098 return event_fd;
5099
4979 /* 5100 /*
4980 * Get the target context (task or percpu): 5101 * Get the target context (task or percpu):
4981 */ 5102 */
4982 ctx = find_get_context(pid, cpu); 5103 ctx = find_get_context(pid, cpu);
4983 if (IS_ERR(ctx)) 5104 if (IS_ERR(ctx)) {
4984 return PTR_ERR(ctx); 5105 err = PTR_ERR(ctx);
5106 goto err_fd;
5107 }
5108
5109 if (group_fd != -1) {
5110 group_leader = perf_fget_light(group_fd, &fput_needed);
5111 if (IS_ERR(group_leader)) {
5112 err = PTR_ERR(group_leader);
5113 goto err_put_context;
5114 }
5115 group_file = group_leader->filp;
5116 if (flags & PERF_FLAG_FD_OUTPUT)
5117 output_event = group_leader;
5118 if (flags & PERF_FLAG_FD_NO_GROUP)
5119 group_leader = NULL;
5120 }
4985 5121
4986 /* 5122 /*
4987 * Look up the group leader (we will attach this event to it): 5123 * Look up the group leader (we will attach this event to it):
4988 */ 5124 */
4989 group_leader = NULL; 5125 if (group_leader) {
4990 if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
4991 err = -EINVAL; 5126 err = -EINVAL;
4992 group_file = fget_light(group_fd, &fput_needed);
4993 if (!group_file)
4994 goto err_put_context;
4995 if (group_file->f_op != &perf_fops)
4996 goto err_put_context;
4997 5127
4998 group_leader = group_file->private_data;
4999 /* 5128 /*
5000 * Do not allow a recursive hierarchy (this new sibling 5129 * Do not allow a recursive hierarchy (this new sibling
5001 * becoming part of another group-sibling): 5130 * becoming part of another group-sibling):
@@ -5017,22 +5146,21 @@ SYSCALL_DEFINE5(perf_event_open,
5017 5146
5018 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 5147 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5019 NULL, NULL, GFP_KERNEL); 5148 NULL, NULL, GFP_KERNEL);
5020 err = PTR_ERR(event); 5149 if (IS_ERR(event)) {
5021 if (IS_ERR(event)) 5150 err = PTR_ERR(event);
5022 goto err_put_context; 5151 goto err_put_context;
5152 }
5023 5153
5024 err = anon_inode_getfd("[perf_event]", &perf_fops, event, O_RDWR); 5154 if (output_event) {
5025 if (err < 0) 5155 err = perf_event_set_output(event, output_event);
5026 goto err_free_put_context; 5156 if (err)
5157 goto err_free_put_context;
5158 }
5027 5159
5028 event_file = fget_light(err, &fput_needed2); 5160 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5029 if (!event_file) 5161 if (IS_ERR(event_file)) {
5162 err = PTR_ERR(event_file);
5030 goto err_free_put_context; 5163 goto err_free_put_context;
5031
5032 if (flags & PERF_FLAG_FD_OUTPUT) {
5033 err = perf_event_set_output(event, group_fd);
5034 if (err)
5035 goto err_fput_free_put_context;
5036 } 5164 }
5037 5165
5038 event->filp = event_file; 5166 event->filp = event_file;
@@ -5048,19 +5176,23 @@ SYSCALL_DEFINE5(perf_event_open,
5048 list_add_tail(&event->owner_entry, &current->perf_event_list); 5176 list_add_tail(&event->owner_entry, &current->perf_event_list);
5049 mutex_unlock(&current->perf_event_mutex); 5177 mutex_unlock(&current->perf_event_mutex);
5050 5178
5051err_fput_free_put_context: 5179 /*
5052 fput_light(event_file, fput_needed2); 5180 * Drop the reference on the group_event after placing the
5181 * new event on the sibling_list. This ensures destruction
5182 * of the group leader will find the pointer to itself in
5183 * perf_group_detach().
5184 */
5185 fput_light(group_file, fput_needed);
5186 fd_install(event_fd, event_file);
5187 return event_fd;
5053 5188
5054err_free_put_context: 5189err_free_put_context:
5055 if (err < 0) 5190 free_event(event);
5056 free_event(event);
5057
5058err_put_context: 5191err_put_context:
5059 if (err < 0)
5060 put_ctx(ctx);
5061
5062 fput_light(group_file, fput_needed); 5192 fput_light(group_file, fput_needed);
5063 5193 put_ctx(ctx);
5194err_fd:
5195 put_unused_fd(event_fd);
5064 return err; 5196 return err;
5065} 5197}
5066 5198
@@ -5371,6 +5503,7 @@ static void perf_free_event(struct perf_event *event,
5371 5503
5372 fput(parent->filp); 5504 fput(parent->filp);
5373 5505
5506 perf_group_detach(event);
5374 list_del_event(event, ctx); 5507 list_del_event(event, ctx);
5375 free_event(event); 5508 free_event(event);
5376} 5509}
diff --git a/kernel/pid.c b/kernel/pid.c
index aebb30d9c233..e9fd8c132d26 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -513,6 +513,13 @@ void __init pidhash_init(void)
513 513
514void __init pidmap_init(void) 514void __init pidmap_init(void)
515{ 515{
516 /* bump default and minimum pid_max based on number of cpus */
517 pid_max = min(pid_max_max, max_t(int, pid_max,
518 PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
519 pid_max_min = max_t(int, pid_max_min,
520 PIDS_PER_CPU_MIN * num_possible_cpus());
521 pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
522
516 init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); 523 init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
517 /* Reserve PID 0. We never call free_pidmap(0) */ 524 /* Reserve PID 0. We never call free_pidmap(0) */
518 set_bit(0, init_pid_ns.pidmap[0].page); 525 set_bit(0, init_pid_ns.pidmap[0].page);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 00bb252f29a2..9829646d399c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -363,7 +363,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
363 } 363 }
364 } else { 364 } else {
365 read_lock(&tasklist_lock); 365 read_lock(&tasklist_lock);
366 if (thread_group_leader(p) && p->signal) { 366 if (thread_group_leader(p) && p->sighand) {
367 error = 367 error =
368 cpu_clock_sample_group(which_clock, 368 cpu_clock_sample_group(which_clock,
369 p, &rtn); 369 p, &rtn);
@@ -439,7 +439,7 @@ int posix_cpu_timer_del(struct k_itimer *timer)
439 439
440 if (likely(p != NULL)) { 440 if (likely(p != NULL)) {
441 read_lock(&tasklist_lock); 441 read_lock(&tasklist_lock);
442 if (unlikely(p->signal == NULL)) { 442 if (unlikely(p->sighand == NULL)) {
443 /* 443 /*
444 * We raced with the reaping of the task. 444 * We raced with the reaping of the task.
445 * The deletion should have cleared us off the list. 445 * The deletion should have cleared us off the list.
@@ -691,10 +691,10 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
691 read_lock(&tasklist_lock); 691 read_lock(&tasklist_lock);
692 /* 692 /*
693 * We need the tasklist_lock to protect against reaping that 693 * We need the tasklist_lock to protect against reaping that
694 * clears p->signal. If p has just been reaped, we can no 694 * clears p->sighand. If p has just been reaped, we can no
695 * longer get any information about it at all. 695 * longer get any information about it at all.
696 */ 696 */
697 if (unlikely(p->signal == NULL)) { 697 if (unlikely(p->sighand == NULL)) {
698 read_unlock(&tasklist_lock); 698 read_unlock(&tasklist_lock);
699 put_task_struct(p); 699 put_task_struct(p);
700 timer->it.cpu.task = NULL; 700 timer->it.cpu.task = NULL;
@@ -863,7 +863,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
863 clear_dead = p->exit_state; 863 clear_dead = p->exit_state;
864 } else { 864 } else {
865 read_lock(&tasklist_lock); 865 read_lock(&tasklist_lock);
866 if (unlikely(p->signal == NULL)) { 866 if (unlikely(p->sighand == NULL)) {
867 /* 867 /*
868 * The process has been reaped. 868 * The process has been reaped.
869 * We can't even collect a sample any more. 869 * We can't even collect a sample any more.
@@ -1199,7 +1199,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
1199 spin_lock(&p->sighand->siglock); 1199 spin_lock(&p->sighand->siglock);
1200 } else { 1200 } else {
1201 read_lock(&tasklist_lock); 1201 read_lock(&tasklist_lock);
1202 if (unlikely(p->signal == NULL)) { 1202 if (unlikely(p->sighand == NULL)) {
1203 /* 1203 /*
1204 * The process has been reaped. 1204 * The process has been reaped.
1205 * We can't even collect a sample any more. 1205 * We can't even collect a sample any more.
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 00d1fda58ab6..ad723420acc3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -559,14 +559,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
559 new_timer->it_id = (timer_t) new_timer_id; 559 new_timer->it_id = (timer_t) new_timer_id;
560 new_timer->it_clock = which_clock; 560 new_timer->it_clock = which_clock;
561 new_timer->it_overrun = -1; 561 new_timer->it_overrun = -1;
562 error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
563 if (error)
564 goto out;
565 562
566 /*
567 * return the timer_id now. The next step is hard to
568 * back out if there is an error.
569 */
570 if (copy_to_user(created_timer_id, 563 if (copy_to_user(created_timer_id,
571 &new_timer_id, sizeof (new_timer_id))) { 564 &new_timer_id, sizeof (new_timer_id))) {
572 error = -EFAULT; 565 error = -EFAULT;
@@ -597,6 +590,10 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
597 new_timer->sigq->info.si_tid = new_timer->it_id; 590 new_timer->sigq->info.si_tid = new_timer->it_id;
598 new_timer->sigq->info.si_code = SI_TIMER; 591 new_timer->sigq->info.si_code = SI_TIMER;
599 592
593 error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
594 if (error)
595 goto out;
596
600 spin_lock_irq(&current->sighand->siglock); 597 spin_lock_irq(&current->sighand->siglock);
601 new_timer->it_signal = current->signal; 598 new_timer->it_signal = current->signal;
602 list_add(&new_timer->list, &current->signal->posix_timers); 599 list_add(&new_timer->list, &current->signal->posix_timers);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5c36ea9d55d2..ca6066a6952e 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -99,9 +99,13 @@ config PM_SLEEP_ADVANCED_DEBUG
99 depends on PM_ADVANCED_DEBUG 99 depends on PM_ADVANCED_DEBUG
100 default n 100 default n
101 101
102config SUSPEND_NVS
103 bool
104
102config SUSPEND 105config SUSPEND
103 bool "Suspend to RAM and standby" 106 bool "Suspend to RAM and standby"
104 depends on PM && ARCH_SUSPEND_POSSIBLE 107 depends on PM && ARCH_SUSPEND_POSSIBLE
108 select SUSPEND_NVS if HAS_IOMEM
105 default y 109 default y
106 ---help--- 110 ---help---
107 Allow the system to enter sleep states in which main memory is 111 Allow the system to enter sleep states in which main memory is
@@ -130,13 +134,10 @@ config SUSPEND_FREEZER
130 134
131 Turning OFF this setting is NOT recommended! If in doubt, say Y. 135 Turning OFF this setting is NOT recommended! If in doubt, say Y.
132 136
133config HIBERNATION_NVS
134 bool
135
136config HIBERNATION 137config HIBERNATION
137 bool "Hibernation (aka 'suspend to disk')" 138 bool "Hibernation (aka 'suspend to disk')"
138 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE 139 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
139 select HIBERNATION_NVS if HAS_IOMEM 140 select SUSPEND_NVS if HAS_IOMEM
140 ---help--- 141 ---help---
141 Enable the suspend to disk (STD) functionality, which is usually 142 Enable the suspend to disk (STD) functionality, which is usually
142 called "hibernation" in user interfaces. STD checkpoints the 143 called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 524e058dcf06..f9063c6b185d 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -10,6 +10,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o
10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 10obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ 11obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
12 block_io.o 12 block_io.o
13obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o 13obj-$(CONFIG_SUSPEND_NVS) += nvs.o
14 14
15obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 15obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/nvs.c
index fdcad9ed5a7b..1836db60bbb6 100644
--- a/kernel/power/hibernate_nvs.c
+++ b/kernel/power/nvs.c
@@ -15,7 +15,7 @@
15 15
16/* 16/*
17 * Platforms, like ACPI, may want us to save some memory used by them during 17 * Platforms, like ACPI, may want us to save some memory used by them during
18 * hibernation and to restore the contents of this memory during the subsequent 18 * suspend and to restore the contents of this memory during the subsequent
19 * resume. The code below implements a mechanism allowing us to do that. 19 * resume. The code below implements a mechanism allowing us to do that.
20 */ 20 */
21 21
@@ -30,7 +30,7 @@ struct nvs_page {
30static LIST_HEAD(nvs_list); 30static LIST_HEAD(nvs_list);
31 31
32/** 32/**
33 * hibernate_nvs_register - register platform NVS memory region to save 33 * suspend_nvs_register - register platform NVS memory region to save
34 * @start - physical address of the region 34 * @start - physical address of the region
35 * @size - size of the region 35 * @size - size of the region
36 * 36 *
@@ -38,7 +38,7 @@ static LIST_HEAD(nvs_list);
38 * things so that the data from page-aligned addresses in this region will 38 * things so that the data from page-aligned addresses in this region will
39 * be copied into separate RAM pages. 39 * be copied into separate RAM pages.
40 */ 40 */
41int hibernate_nvs_register(unsigned long start, unsigned long size) 41int suspend_nvs_register(unsigned long start, unsigned long size)
42{ 42{
43 struct nvs_page *entry, *next; 43 struct nvs_page *entry, *next;
44 44
@@ -68,9 +68,9 @@ int hibernate_nvs_register(unsigned long start, unsigned long size)
68} 68}
69 69
70/** 70/**
71 * hibernate_nvs_free - free data pages allocated for saving NVS regions 71 * suspend_nvs_free - free data pages allocated for saving NVS regions
72 */ 72 */
73void hibernate_nvs_free(void) 73void suspend_nvs_free(void)
74{ 74{
75 struct nvs_page *entry; 75 struct nvs_page *entry;
76 76
@@ -86,16 +86,16 @@ void hibernate_nvs_free(void)
86} 86}
87 87
88/** 88/**
89 * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions 89 * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
90 */ 90 */
91int hibernate_nvs_alloc(void) 91int suspend_nvs_alloc(void)
92{ 92{
93 struct nvs_page *entry; 93 struct nvs_page *entry;
94 94
95 list_for_each_entry(entry, &nvs_list, node) { 95 list_for_each_entry(entry, &nvs_list, node) {
96 entry->data = (void *)__get_free_page(GFP_KERNEL); 96 entry->data = (void *)__get_free_page(GFP_KERNEL);
97 if (!entry->data) { 97 if (!entry->data) {
98 hibernate_nvs_free(); 98 suspend_nvs_free();
99 return -ENOMEM; 99 return -ENOMEM;
100 } 100 }
101 } 101 }
@@ -103,9 +103,9 @@ int hibernate_nvs_alloc(void)
103} 103}
104 104
105/** 105/**
106 * hibernate_nvs_save - save NVS memory regions 106 * suspend_nvs_save - save NVS memory regions
107 */ 107 */
108void hibernate_nvs_save(void) 108void suspend_nvs_save(void)
109{ 109{
110 struct nvs_page *entry; 110 struct nvs_page *entry;
111 111
@@ -119,12 +119,12 @@ void hibernate_nvs_save(void)
119} 119}
120 120
121/** 121/**
122 * hibernate_nvs_restore - restore NVS memory regions 122 * suspend_nvs_restore - restore NVS memory regions
123 * 123 *
124 * This function is going to be called with interrupts disabled, so it 124 * This function is going to be called with interrupts disabled, so it
125 * cannot iounmap the virtual addresses used to access the NVS region. 125 * cannot iounmap the virtual addresses used to access the NVS region.
126 */ 126 */
127void hibernate_nvs_restore(void) 127void suspend_nvs_restore(void)
128{ 128{
129 struct nvs_page *entry; 129 struct nvs_page *entry;
130 130
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 56e7dbb8b996..f37cb7dd4402 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -16,6 +16,12 @@
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/gfp.h> 18#include <linux/gfp.h>
19#include <linux/io.h>
20#include <linux/kernel.h>
21#include <linux/list.h>
22#include <linux/mm.h>
23#include <linux/slab.h>
24#include <linux/suspend.h>
19 25
20#include "power.h" 26#include "power.h"
21 27
diff --git a/kernel/profile.c b/kernel/profile.c
index dfadc5b729f1..b22a899934cc 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -365,14 +365,14 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
365 switch (action) { 365 switch (action) {
366 case CPU_UP_PREPARE: 366 case CPU_UP_PREPARE:
367 case CPU_UP_PREPARE_FROZEN: 367 case CPU_UP_PREPARE_FROZEN:
368 node = cpu_to_node(cpu); 368 node = cpu_to_mem(cpu);
369 per_cpu(cpu_profile_flip, cpu) = 0; 369 per_cpu(cpu_profile_flip, cpu) = 0;
370 if (!per_cpu(cpu_profile_hits, cpu)[1]) { 370 if (!per_cpu(cpu_profile_hits, cpu)[1]) {
371 page = alloc_pages_exact_node(node, 371 page = alloc_pages_exact_node(node,
372 GFP_KERNEL | __GFP_ZERO, 372 GFP_KERNEL | __GFP_ZERO,
373 0); 373 0);
374 if (!page) 374 if (!page)
375 return NOTIFY_BAD; 375 return notifier_from_errno(-ENOMEM);
376 per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); 376 per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
377 } 377 }
378 if (!per_cpu(cpu_profile_hits, cpu)[0]) { 378 if (!per_cpu(cpu_profile_hits, cpu)[0]) {
@@ -388,7 +388,7 @@ out_free:
388 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); 388 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
389 per_cpu(cpu_profile_hits, cpu)[1] = NULL; 389 per_cpu(cpu_profile_hits, cpu)[1] = NULL;
390 __free_page(page); 390 __free_page(page);
391 return NOTIFY_BAD; 391 return notifier_from_errno(-ENOMEM);
392 case CPU_ONLINE: 392 case CPU_ONLINE:
393 case CPU_ONLINE_FROZEN: 393 case CPU_ONLINE_FROZEN:
394 if (prof_cpu_mask != NULL) 394 if (prof_cpu_mask != NULL)
@@ -567,7 +567,7 @@ static int create_hash_tables(void)
567 int cpu; 567 int cpu;
568 568
569 for_each_online_cpu(cpu) { 569 for_each_online_cpu(cpu) {
570 int node = cpu_to_node(cpu); 570 int node = cpu_to_mem(cpu);
571 struct page *page; 571 struct page *page;
572 572
573 page = alloc_pages_exact_node(node, 573 page = alloc_pages_exact_node(node,
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6af9cdd558b7..74a3d693c196 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -594,6 +594,32 @@ int ptrace_request(struct task_struct *child, long request,
594 ret = ptrace_detach(child, data); 594 ret = ptrace_detach(child, data);
595 break; 595 break;
596 596
597#ifdef CONFIG_BINFMT_ELF_FDPIC
598 case PTRACE_GETFDPIC: {
599 struct mm_struct *mm = get_task_mm(child);
600 unsigned long tmp = 0;
601
602 ret = -ESRCH;
603 if (!mm)
604 break;
605
606 switch (addr) {
607 case PTRACE_GETFDPIC_EXEC:
608 tmp = mm->context.exec_fdpic_loadmap;
609 break;
610 case PTRACE_GETFDPIC_INTERP:
611 tmp = mm->context.interp_fdpic_loadmap;
612 break;
613 default:
614 break;
615 }
616 mmput(mm);
617
618 ret = put_user(tmp, (unsigned long __user *) data);
619 break;
620 }
621#endif
622
597#ifdef PTRACE_SINGLESTEP 623#ifdef PTRACE_SINGLESTEP
598 case PTRACE_SINGLESTEP: 624 case PTRACE_SINGLESTEP:
599#endif 625#endif
diff --git a/kernel/relay.c b/kernel/relay.c
index 4268287148c1..c7cf397fb929 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -539,7 +539,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
539 "relay_hotcpu_callback: cpu %d buffer " 539 "relay_hotcpu_callback: cpu %d buffer "
540 "creation failed\n", hotcpu); 540 "creation failed\n", hotcpu);
541 mutex_unlock(&relay_channels_mutex); 541 mutex_unlock(&relay_channels_mutex);
542 return NOTIFY_BAD; 542 return notifier_from_errno(-ENOMEM);
543 } 543 }
544 } 544 }
545 mutex_unlock(&relay_channels_mutex); 545 mutex_unlock(&relay_channels_mutex);
diff --git a/kernel/sched.c b/kernel/sched.c
index 054a6012de99..f52a8801b7a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
306 */ 306 */
307struct task_group init_task_group; 307struct task_group init_task_group;
308 308
309/* return group to which a task belongs */
310static inline struct task_group *task_group(struct task_struct *p)
311{
312 struct task_group *tg;
313
314#ifdef CONFIG_CGROUP_SCHED
315 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
316 struct task_group, css);
317#else
318 tg = &init_task_group;
319#endif
320 return tg;
321}
322
323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
324static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
325{
326 /*
327 * Strictly speaking this rcu_read_lock() is not needed since the
328 * task_group is tied to the cgroup, which in turn can never go away
329 * as long as there are tasks attached to it.
330 *
331 * However since task_group() uses task_subsys_state() which is an
332 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
333 */
334 rcu_read_lock();
335#ifdef CONFIG_FAIR_GROUP_SCHED
336 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
337 p->se.parent = task_group(p)->se[cpu];
338#endif
339
340#ifdef CONFIG_RT_GROUP_SCHED
341 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
342 p->rt.parent = task_group(p)->rt_se[cpu];
343#endif
344 rcu_read_unlock();
345}
346
347#else
348
349static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
350static inline struct task_group *task_group(struct task_struct *p)
351{
352 return NULL;
353}
354
355#endif /* CONFIG_CGROUP_SCHED */ 309#endif /* CONFIG_CGROUP_SCHED */
356 310
357/* CFS-related fields in a runqueue */ 311/* CFS-related fields in a runqueue */
@@ -544,6 +498,8 @@ struct rq {
544 struct root_domain *rd; 498 struct root_domain *rd;
545 struct sched_domain *sd; 499 struct sched_domain *sd;
546 500
501 unsigned long cpu_power;
502
547 unsigned char idle_at_tick; 503 unsigned char idle_at_tick;
548 /* For active balancing */ 504 /* For active balancing */
549 int post_schedule; 505 int post_schedule;
@@ -642,6 +598,49 @@ static inline int cpu_of(struct rq *rq)
642#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 598#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
643#define raw_rq() (&__raw_get_cpu_var(runqueues)) 599#define raw_rq() (&__raw_get_cpu_var(runqueues))
644 600
601#ifdef CONFIG_CGROUP_SCHED
602
603/*
604 * Return the group to which this tasks belongs.
605 *
606 * We use task_subsys_state_check() and extend the RCU verification
607 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
608 * holds that lock for each task it moves into the cgroup. Therefore
609 * by holding that lock, we pin the task to the current cgroup.
610 */
611static inline struct task_group *task_group(struct task_struct *p)
612{
613 struct cgroup_subsys_state *css;
614
615 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
616 lockdep_is_held(&task_rq(p)->lock));
617 return container_of(css, struct task_group, css);
618}
619
620/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
621static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
622{
623#ifdef CONFIG_FAIR_GROUP_SCHED
624 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
625 p->se.parent = task_group(p)->se[cpu];
626#endif
627
628#ifdef CONFIG_RT_GROUP_SCHED
629 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
630 p->rt.parent = task_group(p)->rt_se[cpu];
631#endif
632}
633
634#else /* CONFIG_CGROUP_SCHED */
635
636static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
637static inline struct task_group *task_group(struct task_struct *p)
638{
639 return NULL;
640}
641
642#endif /* CONFIG_CGROUP_SCHED */
643
645inline void update_rq_clock(struct rq *rq) 644inline void update_rq_clock(struct rq *rq)
646{ 645{
647 if (!rq->skip_clock_update) 646 if (!rq->skip_clock_update)
@@ -969,14 +968,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
969 } 968 }
970} 969}
971 970
972void task_rq_unlock_wait(struct task_struct *p)
973{
974 struct rq *rq = task_rq(p);
975
976 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
977 raw_spin_unlock_wait(&rq->lock);
978}
979
980static void __task_rq_unlock(struct rq *rq) 971static void __task_rq_unlock(struct rq *rq)
981 __releases(rq->lock) 972 __releases(rq->lock)
982{ 973{
@@ -1263,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)
1263 s64 period = sched_avg_period(); 1254 s64 period = sched_avg_period();
1264 1255
1265 while ((s64)(rq->clock - rq->age_stamp) > period) { 1256 while ((s64)(rq->clock - rq->age_stamp) > period) {
1257 /*
1258 * Inline assembly required to prevent the compiler
1259 * optimising this loop into a divmod call.
1260 * See __iter_div_u64_rem() for another example of this.
1261 */
1262 asm("" : "+rm" (rq->age_stamp));
1266 rq->age_stamp += period; 1263 rq->age_stamp += period;
1267 rq->rt_avg /= 2; 1264 rq->rt_avg /= 2;
1268 } 1265 }
@@ -1507,24 +1504,9 @@ static unsigned long target_load(int cpu, int type)
1507 return max(rq->cpu_load[type-1], total); 1504 return max(rq->cpu_load[type-1], total);
1508} 1505}
1509 1506
1510static struct sched_group *group_of(int cpu)
1511{
1512 struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
1513
1514 if (!sd)
1515 return NULL;
1516
1517 return sd->groups;
1518}
1519
1520static unsigned long power_of(int cpu) 1507static unsigned long power_of(int cpu)
1521{ 1508{
1522 struct sched_group *group = group_of(cpu); 1509 return cpu_rq(cpu)->cpu_power;
1523
1524 if (!group)
1525 return SCHED_LOAD_SCALE;
1526
1527 return group->cpu_power;
1528} 1510}
1529 1511
1530static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); 1512static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
@@ -1681,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)
1681 1663
1682static void update_h_load(long cpu) 1664static void update_h_load(long cpu)
1683{ 1665{
1684 if (root_task_group_empty())
1685 return;
1686
1687 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 1666 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1688} 1667}
1689 1668
@@ -1862,8 +1841,8 @@ static void dec_nr_running(struct rq *rq)
1862static void set_load_weight(struct task_struct *p) 1841static void set_load_weight(struct task_struct *p)
1863{ 1842{
1864 if (task_has_rt_policy(p)) { 1843 if (task_has_rt_policy(p)) {
1865 p->se.load.weight = prio_to_weight[0] * 2; 1844 p->se.load.weight = 0;
1866 p->se.load.inv_weight = prio_to_wmult[0] >> 1; 1845 p->se.load.inv_weight = WMULT_CONST;
1867 return; 1846 return;
1868 } 1847 }
1869 1848
@@ -2515,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
2515 if (p->sched_class->task_fork) 2494 if (p->sched_class->task_fork)
2516 p->sched_class->task_fork(p); 2495 p->sched_class->task_fork(p);
2517 2496
2497 /*
2498 * The child is not yet in the pid-hash so no cgroup attach races,
2499 * and the cgroup is pinned to this child due to cgroup_fork()
2500 * is ran before sched_fork().
2501 *
2502 * Silence PROVE_RCU.
2503 */
2504 rcu_read_lock();
2518 set_task_cpu(p, cpu); 2505 set_task_cpu(p, cpu);
2506 rcu_read_unlock();
2519 2507
2520#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2508#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2521 if (likely(sched_info_on())) 2509 if (likely(sched_info_on()))
@@ -2885,9 +2873,9 @@ unsigned long nr_iowait(void)
2885 return sum; 2873 return sum;
2886} 2874}
2887 2875
2888unsigned long nr_iowait_cpu(void) 2876unsigned long nr_iowait_cpu(int cpu)
2889{ 2877{
2890 struct rq *this = this_rq(); 2878 struct rq *this = cpu_rq(cpu);
2891 return atomic_read(&this->nr_iowait); 2879 return atomic_read(&this->nr_iowait);
2892} 2880}
2893 2881
@@ -4062,6 +4050,23 @@ int __sched wait_for_completion_killable(struct completion *x)
4062EXPORT_SYMBOL(wait_for_completion_killable); 4050EXPORT_SYMBOL(wait_for_completion_killable);
4063 4051
4064/** 4052/**
4053 * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
4054 * @x: holds the state of this particular completion
4055 * @timeout: timeout value in jiffies
4056 *
4057 * This waits for either a completion of a specific task to be
4058 * signaled or for a specified timeout to expire. It can be
4059 * interrupted by a kill signal. The timeout is in jiffies.
4060 */
4061unsigned long __sched
4062wait_for_completion_killable_timeout(struct completion *x,
4063 unsigned long timeout)
4064{
4065 return wait_for_common(x, timeout, TASK_KILLABLE);
4066}
4067EXPORT_SYMBOL(wait_for_completion_killable_timeout);
4068
4069/**
4065 * try_wait_for_completion - try to decrement a completion without blocking 4070 * try_wait_for_completion - try to decrement a completion without blocking
4066 * @x: completion structure 4071 * @x: completion structure
4067 * 4072 *
@@ -4469,16 +4474,6 @@ recheck:
4469 } 4474 }
4470 4475
4471 if (user) { 4476 if (user) {
4472#ifdef CONFIG_RT_GROUP_SCHED
4473 /*
4474 * Do not allow realtime tasks into groups that have no runtime
4475 * assigned.
4476 */
4477 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4478 task_group(p)->rt_bandwidth.rt_runtime == 0)
4479 return -EPERM;
4480#endif
4481
4482 retval = security_task_setscheduler(p, policy, param); 4477 retval = security_task_setscheduler(p, policy, param);
4483 if (retval) 4478 if (retval)
4484 return retval; 4479 return retval;
@@ -4494,6 +4489,22 @@ recheck:
4494 * runqueue lock must be held. 4489 * runqueue lock must be held.
4495 */ 4490 */
4496 rq = __task_rq_lock(p); 4491 rq = __task_rq_lock(p);
4492
4493#ifdef CONFIG_RT_GROUP_SCHED
4494 if (user) {
4495 /*
4496 * Do not allow realtime tasks into groups that have no runtime
4497 * assigned.
4498 */
4499 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4500 task_group(p)->rt_bandwidth.rt_runtime == 0) {
4501 __task_rq_unlock(rq);
4502 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4503 return -EPERM;
4504 }
4505 }
4506#endif
4507
4497 /* recheck policy now with rq lock held */ 4508 /* recheck policy now with rq lock held */
4498 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 4509 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4499 policy = oldpolicy = -1; 4510 policy = oldpolicy = -1;
@@ -7596,6 +7607,7 @@ void __init sched_init(void)
7596#ifdef CONFIG_SMP 7607#ifdef CONFIG_SMP
7597 rq->sd = NULL; 7608 rq->sd = NULL;
7598 rq->rd = NULL; 7609 rq->rd = NULL;
7610 rq->cpu_power = SCHED_LOAD_SCALE;
7599 rq->post_schedule = 0; 7611 rq->post_schedule = 0;
7600 rq->active_balance = 0; 7612 rq->active_balance = 0;
7601 rq->next_balance = jiffies; 7613 rq->next_balance = jiffies;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 87a330a7185f..35565395d00d 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -381,15 +381,9 @@ __initcall(init_sched_debug_procfs);
381void proc_sched_show_task(struct task_struct *p, struct seq_file *m) 381void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
382{ 382{
383 unsigned long nr_switches; 383 unsigned long nr_switches;
384 unsigned long flags;
385 int num_threads = 1;
386
387 if (lock_task_sighand(p, &flags)) {
388 num_threads = atomic_read(&p->signal->count);
389 unlock_task_sighand(p, &flags);
390 }
391 384
392 SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads); 385 SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid,
386 get_nr_threads(p));
393 SEQ_printf(m, 387 SEQ_printf(m,
394 "---------------------------------------------------------\n"); 388 "---------------------------------------------------------\n");
395#define __P(F) \ 389#define __P(F) \
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 217e4a9393e4..a878b5332daa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1225,7 +1225,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1225 unsigned long this_load, load; 1225 unsigned long this_load, load;
1226 int idx, this_cpu, prev_cpu; 1226 int idx, this_cpu, prev_cpu;
1227 unsigned long tl_per_task; 1227 unsigned long tl_per_task;
1228 unsigned int imbalance;
1229 struct task_group *tg; 1228 struct task_group *tg;
1230 unsigned long weight; 1229 unsigned long weight;
1231 int balanced; 1230 int balanced;
@@ -1241,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1241 * effect of the currently running task from the load 1240 * effect of the currently running task from the load
1242 * of the current CPU: 1241 * of the current CPU:
1243 */ 1242 */
1243 rcu_read_lock();
1244 if (sync) { 1244 if (sync) {
1245 tg = task_group(current); 1245 tg = task_group(current);
1246 weight = current->se.load.weight; 1246 weight = current->se.load.weight;
@@ -1252,8 +1252,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1252 tg = task_group(p); 1252 tg = task_group(p);
1253 weight = p->se.load.weight; 1253 weight = p->se.load.weight;
1254 1254
1255 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1256
1257 /* 1255 /*
1258 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1256 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1259 * due to the sync cause above having dropped this_load to 0, we'll 1257 * due to the sync cause above having dropped this_load to 0, we'll
@@ -1263,9 +1261,22 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1263 * Otherwise check if either cpus are near enough in load to allow this 1261 * Otherwise check if either cpus are near enough in load to allow this
1264 * task to be woken on this_cpu. 1262 * task to be woken on this_cpu.
1265 */ 1263 */
1266 balanced = !this_load || 1264 if (this_load) {
1267 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= 1265 unsigned long this_eff_load, prev_eff_load;
1268 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1266
1267 this_eff_load = 100;
1268 this_eff_load *= power_of(prev_cpu);
1269 this_eff_load *= this_load +
1270 effective_load(tg, this_cpu, weight, weight);
1271
1272 prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
1273 prev_eff_load *= power_of(this_cpu);
1274 prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
1275
1276 balanced = this_eff_load <= prev_eff_load;
1277 } else
1278 balanced = true;
1279 rcu_read_unlock();
1269 1280
1270 /* 1281 /*
1271 * If the currently running task will sleep within 1282 * If the currently running task will sleep within
@@ -2298,6 +2309,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2298 if (!power) 2309 if (!power)
2299 power = 1; 2310 power = 1;
2300 2311
2312 cpu_rq(cpu)->cpu_power = power;
2301 sdg->cpu_power = power; 2313 sdg->cpu_power = power;
2302} 2314}
2303 2315
diff --git a/kernel/signal.c b/kernel/signal.c
index 825a3f24ad76..906ae5a1779c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -642,7 +642,7 @@ static inline bool si_fromuser(const struct siginfo *info)
642static int check_kill_permission(int sig, struct siginfo *info, 642static int check_kill_permission(int sig, struct siginfo *info,
643 struct task_struct *t) 643 struct task_struct *t)
644{ 644{
645 const struct cred *cred = current_cred(), *tcred; 645 const struct cred *cred, *tcred;
646 struct pid *sid; 646 struct pid *sid;
647 int error; 647 int error;
648 648
@@ -656,8 +656,10 @@ static int check_kill_permission(int sig, struct siginfo *info,
656 if (error) 656 if (error)
657 return error; 657 return error;
658 658
659 cred = current_cred();
659 tcred = __task_cred(t); 660 tcred = __task_cred(t);
660 if ((cred->euid ^ tcred->suid) && 661 if (!same_thread_group(current, t) &&
662 (cred->euid ^ tcred->suid) &&
661 (cred->euid ^ tcred->uid) && 663 (cred->euid ^ tcred->uid) &&
662 (cred->uid ^ tcred->suid) && 664 (cred->uid ^ tcred->suid) &&
663 (cred->uid ^ tcred->uid) && 665 (cred->uid ^ tcred->uid) &&
@@ -1083,23 +1085,24 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
1083/* 1085/*
1084 * Nuke all other threads in the group. 1086 * Nuke all other threads in the group.
1085 */ 1087 */
1086void zap_other_threads(struct task_struct *p) 1088int zap_other_threads(struct task_struct *p)
1087{ 1089{
1088 struct task_struct *t; 1090 struct task_struct *t = p;
1091 int count = 0;
1089 1092
1090 p->signal->group_stop_count = 0; 1093 p->signal->group_stop_count = 0;
1091 1094
1092 for (t = next_thread(p); t != p; t = next_thread(t)) { 1095 while_each_thread(p, t) {
1093 /* 1096 count++;
1094 * Don't bother with already dead threads 1097
1095 */ 1098 /* Don't bother with already dead threads */
1096 if (t->exit_state) 1099 if (t->exit_state)
1097 continue; 1100 continue;
1098
1099 /* SIGKILL will be handled before any pending SIGSTOP */
1100 sigaddset(&t->pending.signal, SIGKILL); 1101 sigaddset(&t->pending.signal, SIGKILL);
1101 signal_wake_up(t, 1); 1102 signal_wake_up(t, 1);
1102 } 1103 }
1104
1105 return count;
1103} 1106}
1104 1107
1105struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) 1108struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
diff --git a/kernel/smp.c b/kernel/smp.c
index 3fc697336183..75c970c715d3 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
52 case CPU_UP_PREPARE_FROZEN: 52 case CPU_UP_PREPARE_FROZEN:
53 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, 53 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
54 cpu_to_node(cpu))) 54 cpu_to_node(cpu)))
55 return NOTIFY_BAD; 55 return notifier_from_errno(-ENOMEM);
56 break; 56 break;
57 57
58#ifdef CONFIG_HOTPLUG_CPU 58#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0db913a5c60f..07b4f1b1a73a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -808,7 +808,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
808 p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); 808 p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
809 if (IS_ERR(p)) { 809 if (IS_ERR(p)) {
810 printk("ksoftirqd for %i failed\n", hotcpu); 810 printk("ksoftirqd for %i failed\n", hotcpu);
811 return NOTIFY_BAD; 811 return notifier_from_errno(PTR_ERR(p));
812 } 812 }
813 kthread_bind(p, hotcpu); 813 kthread_bind(p, hotcpu);
814 per_cpu(ksoftirqd, hotcpu) = p; 814 per_cpu(ksoftirqd, hotcpu) = p;
@@ -850,7 +850,7 @@ static __init int spawn_ksoftirqd(void)
850 void *cpu = (void *)(long)smp_processor_id(); 850 void *cpu = (void *)(long)smp_processor_id();
851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 851 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
852 852
853 BUG_ON(err == NOTIFY_BAD); 853 BUG_ON(err != NOTIFY_OK);
854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 854 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
855 register_cpu_notifier(&cpu_nfb); 855 register_cpu_notifier(&cpu_nfb);
856 return 0; 856 return 0;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b4e7431e7c78..70f8d90331e9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -321,7 +321,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
321 321
322#ifdef CONFIG_HOTPLUG_CPU 322#ifdef CONFIG_HOTPLUG_CPU
323 case CPU_UP_CANCELED: 323 case CPU_UP_CANCELED:
324 case CPU_DEAD: 324 case CPU_POST_DEAD:
325 { 325 {
326 struct cpu_stop_work *work; 326 struct cpu_stop_work *work;
327 327
diff --git a/kernel/sys.c b/kernel/sys.c
index 0d36d889c74d..e83ddbbaf89d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1632,9 +1632,9 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
1632 1632
1633char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 1633char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
1634 1634
1635static void argv_cleanup(char **argv, char **envp) 1635static void argv_cleanup(struct subprocess_info *info)
1636{ 1636{
1637 argv_free(argv); 1637 argv_free(info->argv);
1638} 1638}
1639 1639
1640/** 1640/**
@@ -1668,7 +1668,7 @@ int orderly_poweroff(bool force)
1668 goto out; 1668 goto out;
1669 } 1669 }
1670 1670
1671 call_usermodehelper_setcleanup(info, argv_cleanup); 1671 call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
1672 1672
1673 ret = call_usermodehelper_exec(info, UMH_NO_WAIT); 1673 ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
1674 1674
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 997080f00e0b..d24f761f4876 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = {
1471 }, 1471 },
1472#endif 1472#endif
1473 { 1473 {
1474 .procname = "pipe-max-pages", 1474 .procname = "pipe-max-size",
1475 .data = &pipe_max_pages, 1475 .data = &pipe_max_size,
1476 .maxlen = sizeof(int), 1476 .maxlen = sizeof(int),
1477 .mode = 0644, 1477 .mode = 0644,
1478 .proc_handler = &proc_dointvec_minmax, 1478 .proc_handler = &pipe_proc_fn,
1479 .extra1 = &two, 1479 .extra1 = &pipe_min_size,
1480 }, 1480 },
1481/* 1481/*
1482 * NOTE: do not add new entries to this table unless you have read 1482 * NOTE: do not add new entries to this table unless you have read
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1d7b9bc1c034..813993b5fb61 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -154,14 +154,14 @@ static void tick_nohz_update_jiffies(ktime_t now)
154 * Updates the per cpu time idle statistics counters 154 * Updates the per cpu time idle statistics counters
155 */ 155 */
156static void 156static void
157update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time) 157update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
158{ 158{
159 ktime_t delta; 159 ktime_t delta;
160 160
161 if (ts->idle_active) { 161 if (ts->idle_active) {
162 delta = ktime_sub(now, ts->idle_entrytime); 162 delta = ktime_sub(now, ts->idle_entrytime);
163 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); 163 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
164 if (nr_iowait_cpu() > 0) 164 if (nr_iowait_cpu(cpu) > 0)
165 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); 165 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
166 ts->idle_entrytime = now; 166 ts->idle_entrytime = now;
167 } 167 }
@@ -175,19 +175,19 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now)
175{ 175{
176 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 176 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
177 177
178 update_ts_time_stats(ts, now, NULL); 178 update_ts_time_stats(cpu, ts, now, NULL);
179 ts->idle_active = 0; 179 ts->idle_active = 0;
180 180
181 sched_clock_idle_wakeup_event(0); 181 sched_clock_idle_wakeup_event(0);
182} 182}
183 183
184static ktime_t tick_nohz_start_idle(struct tick_sched *ts) 184static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
185{ 185{
186 ktime_t now; 186 ktime_t now;
187 187
188 now = ktime_get(); 188 now = ktime_get();
189 189
190 update_ts_time_stats(ts, now, NULL); 190 update_ts_time_stats(cpu, ts, now, NULL);
191 191
192 ts->idle_entrytime = now; 192 ts->idle_entrytime = now;
193 ts->idle_active = 1; 193 ts->idle_active = 1;
@@ -216,7 +216,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
216 if (!tick_nohz_enabled) 216 if (!tick_nohz_enabled)
217 return -1; 217 return -1;
218 218
219 update_ts_time_stats(ts, ktime_get(), last_update_time); 219 update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
220 220
221 return ktime_to_us(ts->idle_sleeptime); 221 return ktime_to_us(ts->idle_sleeptime);
222} 222}
@@ -242,7 +242,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
242 if (!tick_nohz_enabled) 242 if (!tick_nohz_enabled)
243 return -1; 243 return -1;
244 244
245 update_ts_time_stats(ts, ktime_get(), last_update_time); 245 update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
246 246
247 return ktime_to_us(ts->iowait_sleeptime); 247 return ktime_to_us(ts->iowait_sleeptime);
248} 248}
@@ -284,7 +284,7 @@ void tick_nohz_stop_sched_tick(int inidle)
284 */ 284 */
285 ts->inidle = 1; 285 ts->inidle = 1;
286 286
287 now = tick_nohz_start_idle(ts); 287 now = tick_nohz_start_idle(cpu, ts);
288 288
289 /* 289 /*
290 * If this cpu is offline and it is the one which updates 290 * If this cpu is offline and it is the one which updates
@@ -315,9 +315,6 @@ void tick_nohz_stop_sched_tick(int inidle)
315 goto end; 315 goto end;
316 } 316 }
317 317
318 if (nohz_ratelimit(cpu))
319 goto end;
320
321 ts->idle_calls++; 318 ts->idle_calls++;
322 /* Read jiffies and the time when jiffies were updated last */ 319 /* Read jiffies and the time when jiffies were updated last */
323 do { 320 do {
@@ -328,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle)
328 } while (read_seqretry(&xtime_lock, seq)); 325 } while (read_seqretry(&xtime_lock, seq));
329 326
330 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || 327 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
331 arch_needs_cpu(cpu)) { 328 arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) {
332 next_jiffies = last_jiffies + 1; 329 next_jiffies = last_jiffies + 1;
333 delta_jiffies = 1; 330 delta_jiffies = 1;
334 } else { 331 } else {
diff --git a/kernel/timer.c b/kernel/timer.c
index be394af5bc22..ee305c8d4e18 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -752,11 +752,15 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
752 752
753 expires_limit = expires; 753 expires_limit = expires;
754 754
755 if (timer->slack > -1) 755 if (timer->slack >= 0) {
756 expires_limit = expires + timer->slack; 756 expires_limit = expires + timer->slack;
757 else if (time_after(expires, jiffies)) /* auto slack: use 0.4% */ 757 } else {
758 expires_limit = expires + (expires - jiffies)/256; 758 unsigned long now = jiffies;
759 759
760 /* No slack, if already expired else auto slack 0.4% */
761 if (time_after(expires, now))
762 expires_limit = expires + (expires - now)/256;
763 }
760 mask = expires ^ expires_limit; 764 mask = expires ^ expires_limit;
761 if (mask == 0) 765 if (mask == 0)
762 return expires; 766 return expires;
@@ -1680,11 +1684,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1680 unsigned long action, void *hcpu) 1684 unsigned long action, void *hcpu)
1681{ 1685{
1682 long cpu = (long)hcpu; 1686 long cpu = (long)hcpu;
1687 int err;
1688
1683 switch(action) { 1689 switch(action) {
1684 case CPU_UP_PREPARE: 1690 case CPU_UP_PREPARE:
1685 case CPU_UP_PREPARE_FROZEN: 1691 case CPU_UP_PREPARE_FROZEN:
1686 if (init_timers_cpu(cpu) < 0) 1692 err = init_timers_cpu(cpu);
1687 return NOTIFY_BAD; 1693 if (err < 0)
1694 return notifier_from_errno(err);
1688 break; 1695 break;
1689#ifdef CONFIG_HOTPLUG_CPU 1696#ifdef CONFIG_HOTPLUG_CPU
1690 case CPU_DEAD: 1697 case CPU_DEAD:
@@ -1710,7 +1717,7 @@ void __init init_timers(void)
1710 1717
1711 init_timer_stats(); 1718 init_timer_stats();
1712 1719
1713 BUG_ON(err == NOTIFY_BAD); 1720 BUG_ON(err != NOTIFY_OK);
1714 register_cpu_notifier(&timers_nb); 1721 register_cpu_notifier(&timers_nb);
1715 open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 1722 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1716} 1723}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b3bc91a3f510..638711c17504 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -675,28 +675,33 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
675 } 675 }
676} 676}
677 677
678static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) 678static void blk_add_trace_rq_abort(void *ignore,
679 struct request_queue *q, struct request *rq)
679{ 680{
680 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 681 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
681} 682}
682 683
683static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) 684static void blk_add_trace_rq_insert(void *ignore,
685 struct request_queue *q, struct request *rq)
684{ 686{
685 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 687 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
686} 688}
687 689
688static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) 690static void blk_add_trace_rq_issue(void *ignore,
691 struct request_queue *q, struct request *rq)
689{ 692{
690 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 693 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
691} 694}
692 695
693static void blk_add_trace_rq_requeue(struct request_queue *q, 696static void blk_add_trace_rq_requeue(void *ignore,
697 struct request_queue *q,
694 struct request *rq) 698 struct request *rq)
695{ 699{
696 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 700 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
697} 701}
698 702
699static void blk_add_trace_rq_complete(struct request_queue *q, 703static void blk_add_trace_rq_complete(void *ignore,
704 struct request_queue *q,
700 struct request *rq) 705 struct request *rq)
701{ 706{
702 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 707 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
@@ -724,34 +729,40 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
724 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 729 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
725} 730}
726 731
727static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) 732static void blk_add_trace_bio_bounce(void *ignore,
733 struct request_queue *q, struct bio *bio)
728{ 734{
729 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 735 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
730} 736}
731 737
732static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) 738static void blk_add_trace_bio_complete(void *ignore,
739 struct request_queue *q, struct bio *bio)
733{ 740{
734 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 741 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
735} 742}
736 743
737static void blk_add_trace_bio_backmerge(struct request_queue *q, 744static void blk_add_trace_bio_backmerge(void *ignore,
745 struct request_queue *q,
738 struct bio *bio) 746 struct bio *bio)
739{ 747{
740 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 748 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
741} 749}
742 750
743static void blk_add_trace_bio_frontmerge(struct request_queue *q, 751static void blk_add_trace_bio_frontmerge(void *ignore,
752 struct request_queue *q,
744 struct bio *bio) 753 struct bio *bio)
745{ 754{
746 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 755 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
747} 756}
748 757
749static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) 758static void blk_add_trace_bio_queue(void *ignore,
759 struct request_queue *q, struct bio *bio)
750{ 760{
751 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 761 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
752} 762}
753 763
754static void blk_add_trace_getrq(struct request_queue *q, 764static void blk_add_trace_getrq(void *ignore,
765 struct request_queue *q,
755 struct bio *bio, int rw) 766 struct bio *bio, int rw)
756{ 767{
757 if (bio) 768 if (bio)
@@ -765,7 +776,8 @@ static void blk_add_trace_getrq(struct request_queue *q,
765} 776}
766 777
767 778
768static void blk_add_trace_sleeprq(struct request_queue *q, 779static void blk_add_trace_sleeprq(void *ignore,
780 struct request_queue *q,
769 struct bio *bio, int rw) 781 struct bio *bio, int rw)
770{ 782{
771 if (bio) 783 if (bio)
@@ -779,7 +791,7 @@ static void blk_add_trace_sleeprq(struct request_queue *q,
779 } 791 }
780} 792}
781 793
782static void blk_add_trace_plug(struct request_queue *q) 794static void blk_add_trace_plug(void *ignore, struct request_queue *q)
783{ 795{
784 struct blk_trace *bt = q->blk_trace; 796 struct blk_trace *bt = q->blk_trace;
785 797
@@ -787,7 +799,7 @@ static void blk_add_trace_plug(struct request_queue *q)
787 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); 799 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
788} 800}
789 801
790static void blk_add_trace_unplug_io(struct request_queue *q) 802static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q)
791{ 803{
792 struct blk_trace *bt = q->blk_trace; 804 struct blk_trace *bt = q->blk_trace;
793 805
@@ -800,7 +812,7 @@ static void blk_add_trace_unplug_io(struct request_queue *q)
800 } 812 }
801} 813}
802 814
803static void blk_add_trace_unplug_timer(struct request_queue *q) 815static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
804{ 816{
805 struct blk_trace *bt = q->blk_trace; 817 struct blk_trace *bt = q->blk_trace;
806 818
@@ -813,7 +825,8 @@ static void blk_add_trace_unplug_timer(struct request_queue *q)
813 } 825 }
814} 826}
815 827
816static void blk_add_trace_split(struct request_queue *q, struct bio *bio, 828static void blk_add_trace_split(void *ignore,
829 struct request_queue *q, struct bio *bio,
817 unsigned int pdu) 830 unsigned int pdu)
818{ 831{
819 struct blk_trace *bt = q->blk_trace; 832 struct blk_trace *bt = q->blk_trace;
@@ -829,6 +842,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
829 842
830/** 843/**
831 * blk_add_trace_remap - Add a trace for a remap operation 844 * blk_add_trace_remap - Add a trace for a remap operation
845 * @ignore: trace callback data parameter (not used)
832 * @q: queue the io is for 846 * @q: queue the io is for
833 * @bio: the source bio 847 * @bio: the source bio
834 * @dev: target device 848 * @dev: target device
@@ -839,8 +853,9 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
839 * it spans a stripe (or similar). Add a trace for that action. 853 * it spans a stripe (or similar). Add a trace for that action.
840 * 854 *
841 **/ 855 **/
842static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, 856static void blk_add_trace_remap(void *ignore,
843 dev_t dev, sector_t from) 857 struct request_queue *q, struct bio *bio,
858 dev_t dev, sector_t from)
844{ 859{
845 struct blk_trace *bt = q->blk_trace; 860 struct blk_trace *bt = q->blk_trace;
846 struct blk_io_trace_remap r; 861 struct blk_io_trace_remap r;
@@ -859,6 +874,7 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
859 874
860/** 875/**
861 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 876 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
877 * @ignore: trace callback data parameter (not used)
862 * @q: queue the io is for 878 * @q: queue the io is for
863 * @rq: the source request 879 * @rq: the source request
864 * @dev: target device 880 * @dev: target device
@@ -869,7 +885,8 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
869 * Add a trace for that action. 885 * Add a trace for that action.
870 * 886 *
871 **/ 887 **/
872static void blk_add_trace_rq_remap(struct request_queue *q, 888static void blk_add_trace_rq_remap(void *ignore,
889 struct request_queue *q,
873 struct request *rq, dev_t dev, 890 struct request *rq, dev_t dev,
874 sector_t from) 891 sector_t from)
875{ 892{
@@ -921,64 +938,64 @@ static void blk_register_tracepoints(void)
921{ 938{
922 int ret; 939 int ret;
923 940
924 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); 941 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
925 WARN_ON(ret); 942 WARN_ON(ret);
926 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); 943 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
927 WARN_ON(ret); 944 WARN_ON(ret);
928 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); 945 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
929 WARN_ON(ret); 946 WARN_ON(ret);
930 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); 947 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
931 WARN_ON(ret); 948 WARN_ON(ret);
932 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); 949 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
933 WARN_ON(ret); 950 WARN_ON(ret);
934 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); 951 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
935 WARN_ON(ret); 952 WARN_ON(ret);
936 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); 953 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
937 WARN_ON(ret); 954 WARN_ON(ret);
938 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 955 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
939 WARN_ON(ret); 956 WARN_ON(ret);
940 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 957 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
941 WARN_ON(ret); 958 WARN_ON(ret);
942 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); 959 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
943 WARN_ON(ret); 960 WARN_ON(ret);
944 ret = register_trace_block_getrq(blk_add_trace_getrq); 961 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
945 WARN_ON(ret); 962 WARN_ON(ret);
946 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); 963 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
947 WARN_ON(ret); 964 WARN_ON(ret);
948 ret = register_trace_block_plug(blk_add_trace_plug); 965 ret = register_trace_block_plug(blk_add_trace_plug, NULL);
949 WARN_ON(ret); 966 WARN_ON(ret);
950 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); 967 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
951 WARN_ON(ret); 968 WARN_ON(ret);
952 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); 969 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
953 WARN_ON(ret); 970 WARN_ON(ret);
954 ret = register_trace_block_split(blk_add_trace_split); 971 ret = register_trace_block_split(blk_add_trace_split, NULL);
955 WARN_ON(ret); 972 WARN_ON(ret);
956 ret = register_trace_block_remap(blk_add_trace_remap); 973 ret = register_trace_block_remap(blk_add_trace_remap, NULL);
957 WARN_ON(ret); 974 WARN_ON(ret);
958 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); 975 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
959 WARN_ON(ret); 976 WARN_ON(ret);
960} 977}
961 978
962static void blk_unregister_tracepoints(void) 979static void blk_unregister_tracepoints(void)
963{ 980{
964 unregister_trace_block_rq_remap(blk_add_trace_rq_remap); 981 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
965 unregister_trace_block_remap(blk_add_trace_remap); 982 unregister_trace_block_remap(blk_add_trace_remap, NULL);
966 unregister_trace_block_split(blk_add_trace_split); 983 unregister_trace_block_split(blk_add_trace_split, NULL);
967 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 984 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
968 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); 985 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
969 unregister_trace_block_plug(blk_add_trace_plug); 986 unregister_trace_block_plug(blk_add_trace_plug, NULL);
970 unregister_trace_block_sleeprq(blk_add_trace_sleeprq); 987 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
971 unregister_trace_block_getrq(blk_add_trace_getrq); 988 unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
972 unregister_trace_block_bio_queue(blk_add_trace_bio_queue); 989 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
973 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 990 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
974 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 991 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
975 unregister_trace_block_bio_complete(blk_add_trace_bio_complete); 992 unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
976 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); 993 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
977 unregister_trace_block_rq_complete(blk_add_trace_rq_complete); 994 unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
978 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); 995 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
979 unregister_trace_block_rq_issue(blk_add_trace_rq_issue); 996 unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
980 unregister_trace_block_rq_insert(blk_add_trace_rq_insert); 997 unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
981 unregister_trace_block_rq_abort(blk_add_trace_rq_abort); 998 unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
982 999
983 tracepoint_synchronize_unregister(); 1000 tracepoint_synchronize_unregister();
984} 1001}
@@ -1321,7 +1338,7 @@ out:
1321} 1338}
1322 1339
1323static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1340static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1324 int flags) 1341 int flags, struct trace_event *event)
1325{ 1342{
1326 return print_one_line(iter, false); 1343 return print_one_line(iter, false);
1327} 1344}
@@ -1343,7 +1360,8 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1343} 1360}
1344 1361
1345static enum print_line_t 1362static enum print_line_t
1346blk_trace_event_print_binary(struct trace_iterator *iter, int flags) 1363blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
1364 struct trace_event *event)
1347{ 1365{
1348 return blk_trace_synthesize_old_trace(iter) ? 1366 return blk_trace_synthesize_old_trace(iter) ?
1349 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1367 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
@@ -1381,12 +1399,16 @@ static struct tracer blk_tracer __read_mostly = {
1381 .set_flag = blk_tracer_set_flag, 1399 .set_flag = blk_tracer_set_flag,
1382}; 1400};
1383 1401
1384static struct trace_event trace_blk_event = { 1402static struct trace_event_functions trace_blk_event_funcs = {
1385 .type = TRACE_BLK,
1386 .trace = blk_trace_event_print, 1403 .trace = blk_trace_event_print,
1387 .binary = blk_trace_event_print_binary, 1404 .binary = blk_trace_event_print_binary,
1388}; 1405};
1389 1406
1407static struct trace_event trace_blk_event = {
1408 .type = TRACE_BLK,
1409 .funcs = &trace_blk_event_funcs,
1410};
1411
1390static int __init init_blk_tracer(void) 1412static int __init init_blk_tracer(void)
1391{ 1413{
1392 if (!register_ftrace_event(&trace_blk_event)) { 1414 if (!register_ftrace_event(&trace_blk_event)) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 32837e19e3bd..6d2cb14f9449 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3234,7 +3234,8 @@ free:
3234} 3234}
3235 3235
3236static void 3236static void
3237ftrace_graph_probe_sched_switch(struct task_struct *prev, struct task_struct *next) 3237ftrace_graph_probe_sched_switch(void *ignore,
3238 struct task_struct *prev, struct task_struct *next)
3238{ 3239{
3239 unsigned long long timestamp; 3240 unsigned long long timestamp;
3240 int index; 3241 int index;
@@ -3288,7 +3289,7 @@ static int start_graph_tracing(void)
3288 } while (ret == -EAGAIN); 3289 } while (ret == -EAGAIN);
3289 3290
3290 if (!ret) { 3291 if (!ret) {
3291 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); 3292 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3292 if (ret) 3293 if (ret)
3293 pr_info("ftrace_graph: Couldn't activate tracepoint" 3294 pr_info("ftrace_graph: Couldn't activate tracepoint"
3294 " probe to kernel_sched_switch\n"); 3295 " probe to kernel_sched_switch\n");
@@ -3364,7 +3365,7 @@ void unregister_ftrace_graph(void)
3364 ftrace_graph_entry = ftrace_graph_entry_stub; 3365 ftrace_graph_entry = ftrace_graph_entry_stub;
3365 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 3366 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
3366 unregister_pm_notifier(&ftrace_suspend_notifier); 3367 unregister_pm_notifier(&ftrace_suspend_notifier);
3367 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); 3368 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3368 3369
3369 out: 3370 out:
3370 mutex_unlock(&ftrace_lock); 3371 mutex_unlock(&ftrace_lock);
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index a91da69f153a..bbfc1bb1660b 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -95,7 +95,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
95 trace_wake_up(); 95 trace_wake_up();
96} 96}
97 97
98static void kmemtrace_kmalloc(unsigned long call_site, 98static void kmemtrace_kmalloc(void *ignore,
99 unsigned long call_site,
99 const void *ptr, 100 const void *ptr,
100 size_t bytes_req, 101 size_t bytes_req,
101 size_t bytes_alloc, 102 size_t bytes_alloc,
@@ -105,7 +106,8 @@ static void kmemtrace_kmalloc(unsigned long call_site,
105 bytes_req, bytes_alloc, gfp_flags, -1); 106 bytes_req, bytes_alloc, gfp_flags, -1);
106} 107}
107 108
108static void kmemtrace_kmem_cache_alloc(unsigned long call_site, 109static void kmemtrace_kmem_cache_alloc(void *ignore,
110 unsigned long call_site,
109 const void *ptr, 111 const void *ptr,
110 size_t bytes_req, 112 size_t bytes_req,
111 size_t bytes_alloc, 113 size_t bytes_alloc,
@@ -115,7 +117,8 @@ static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
115 bytes_req, bytes_alloc, gfp_flags, -1); 117 bytes_req, bytes_alloc, gfp_flags, -1);
116} 118}
117 119
118static void kmemtrace_kmalloc_node(unsigned long call_site, 120static void kmemtrace_kmalloc_node(void *ignore,
121 unsigned long call_site,
119 const void *ptr, 122 const void *ptr,
120 size_t bytes_req, 123 size_t bytes_req,
121 size_t bytes_alloc, 124 size_t bytes_alloc,
@@ -126,7 +129,8 @@ static void kmemtrace_kmalloc_node(unsigned long call_site,
126 bytes_req, bytes_alloc, gfp_flags, node); 129 bytes_req, bytes_alloc, gfp_flags, node);
127} 130}
128 131
129static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site, 132static void kmemtrace_kmem_cache_alloc_node(void *ignore,
133 unsigned long call_site,
130 const void *ptr, 134 const void *ptr,
131 size_t bytes_req, 135 size_t bytes_req,
132 size_t bytes_alloc, 136 size_t bytes_alloc,
@@ -137,12 +141,14 @@ static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
137 bytes_req, bytes_alloc, gfp_flags, node); 141 bytes_req, bytes_alloc, gfp_flags, node);
138} 142}
139 143
140static void kmemtrace_kfree(unsigned long call_site, const void *ptr) 144static void
145kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
141{ 146{
142 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr); 147 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
143} 148}
144 149
145static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr) 150static void kmemtrace_kmem_cache_free(void *ignore,
151 unsigned long call_site, const void *ptr)
146{ 152{
147 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr); 153 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
148} 154}
@@ -151,34 +157,34 @@ static int kmemtrace_start_probes(void)
151{ 157{
152 int err; 158 int err;
153 159
154 err = register_trace_kmalloc(kmemtrace_kmalloc); 160 err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
155 if (err) 161 if (err)
156 return err; 162 return err;
157 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); 163 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
158 if (err) 164 if (err)
159 return err; 165 return err;
160 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node); 166 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
161 if (err) 167 if (err)
162 return err; 168 return err;
163 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); 169 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
164 if (err) 170 if (err)
165 return err; 171 return err;
166 err = register_trace_kfree(kmemtrace_kfree); 172 err = register_trace_kfree(kmemtrace_kfree, NULL);
167 if (err) 173 if (err)
168 return err; 174 return err;
169 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free); 175 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
170 176
171 return err; 177 return err;
172} 178}
173 179
174static void kmemtrace_stop_probes(void) 180static void kmemtrace_stop_probes(void)
175{ 181{
176 unregister_trace_kmalloc(kmemtrace_kmalloc); 182 unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
177 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); 183 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
178 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node); 184 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
179 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); 185 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
180 unregister_trace_kfree(kmemtrace_kfree); 186 unregister_trace_kfree(kmemtrace_kfree, NULL);
181 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free); 187 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
182} 188}
183 189
184static int kmem_trace_init(struct trace_array *tr) 190static int kmem_trace_init(struct trace_array *tr)
@@ -237,7 +243,8 @@ struct kmemtrace_user_event_alloc {
237}; 243};
238 244
239static enum print_line_t 245static enum print_line_t
240kmemtrace_print_alloc(struct trace_iterator *iter, int flags) 246kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
247 struct trace_event *event)
241{ 248{
242 struct trace_seq *s = &iter->seq; 249 struct trace_seq *s = &iter->seq;
243 struct kmemtrace_alloc_entry *entry; 250 struct kmemtrace_alloc_entry *entry;
@@ -257,7 +264,8 @@ kmemtrace_print_alloc(struct trace_iterator *iter, int flags)
257} 264}
258 265
259static enum print_line_t 266static enum print_line_t
260kmemtrace_print_free(struct trace_iterator *iter, int flags) 267kmemtrace_print_free(struct trace_iterator *iter, int flags,
268 struct trace_event *event)
261{ 269{
262 struct trace_seq *s = &iter->seq; 270 struct trace_seq *s = &iter->seq;
263 struct kmemtrace_free_entry *entry; 271 struct kmemtrace_free_entry *entry;
@@ -275,7 +283,8 @@ kmemtrace_print_free(struct trace_iterator *iter, int flags)
275} 283}
276 284
277static enum print_line_t 285static enum print_line_t
278kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags) 286kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
287 struct trace_event *event)
279{ 288{
280 struct trace_seq *s = &iter->seq; 289 struct trace_seq *s = &iter->seq;
281 struct kmemtrace_alloc_entry *entry; 290 struct kmemtrace_alloc_entry *entry;
@@ -309,7 +318,8 @@ kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags)
309} 318}
310 319
311static enum print_line_t 320static enum print_line_t
312kmemtrace_print_free_user(struct trace_iterator *iter, int flags) 321kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
322 struct trace_event *event)
313{ 323{
314 struct trace_seq *s = &iter->seq; 324 struct trace_seq *s = &iter->seq;
315 struct kmemtrace_free_entry *entry; 325 struct kmemtrace_free_entry *entry;
@@ -463,18 +473,26 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
463 } 473 }
464} 474}
465 475
466static struct trace_event kmem_trace_alloc = { 476static struct trace_event_functions kmem_trace_alloc_funcs = {
467 .type = TRACE_KMEM_ALLOC,
468 .trace = kmemtrace_print_alloc, 477 .trace = kmemtrace_print_alloc,
469 .binary = kmemtrace_print_alloc_user, 478 .binary = kmemtrace_print_alloc_user,
470}; 479};
471 480
472static struct trace_event kmem_trace_free = { 481static struct trace_event kmem_trace_alloc = {
473 .type = TRACE_KMEM_FREE, 482 .type = TRACE_KMEM_ALLOC,
483 .funcs = &kmem_trace_alloc_funcs,
484};
485
486static struct trace_event_functions kmem_trace_free_funcs = {
474 .trace = kmemtrace_print_free, 487 .trace = kmemtrace_print_free,
475 .binary = kmemtrace_print_free_user, 488 .binary = kmemtrace_print_free_user,
476}; 489};
477 490
491static struct trace_event kmem_trace_free = {
492 .type = TRACE_KMEM_FREE,
493 .funcs = &kmem_trace_free_funcs,
494};
495
478static struct tracer kmem_tracer __read_mostly = { 496static struct tracer kmem_tracer __read_mostly = {
479 .name = "kmemtrace", 497 .name = "kmemtrace",
480 .init = kmem_trace_init, 498 .init = kmem_trace_init,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7f6059c5aa94..1da7b6ea8b85 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1768,6 +1768,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1768 * must fill the old tail_page with padding. 1768 * must fill the old tail_page with padding.
1769 */ 1769 */
1770 if (tail >= BUF_PAGE_SIZE) { 1770 if (tail >= BUF_PAGE_SIZE) {
1771 /*
1772 * If the page was filled, then we still need
1773 * to update the real_end. Reset it to zero
1774 * and the reader will ignore it.
1775 */
1776 if (tail == BUF_PAGE_SIZE)
1777 tail_page->real_end = 0;
1778
1771 local_sub(length, &tail_page->write); 1779 local_sub(length, &tail_page->write);
1772 return; 1780 return;
1773 } 1781 }
@@ -3894,12 +3902,12 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3894 ret = read; 3902 ret = read;
3895 3903
3896 cpu_buffer->lost_events = 0; 3904 cpu_buffer->lost_events = 0;
3905
3906 commit = local_read(&bpage->commit);
3897 /* 3907 /*
3898 * Set a flag in the commit field if we lost events 3908 * Set a flag in the commit field if we lost events
3899 */ 3909 */
3900 if (missed_events) { 3910 if (missed_events) {
3901 commit = local_read(&bpage->commit);
3902
3903 /* If there is room at the end of the page to save the 3911 /* If there is room at the end of the page to save the
3904 * missed events, then record it there. 3912 * missed events, then record it there.
3905 */ 3913 */
@@ -3907,10 +3915,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3907 memcpy(&bpage->data[commit], &missed_events, 3915 memcpy(&bpage->data[commit], &missed_events,
3908 sizeof(missed_events)); 3916 sizeof(missed_events));
3909 local_add(RB_MISSED_STORED, &bpage->commit); 3917 local_add(RB_MISSED_STORED, &bpage->commit);
3918 commit += sizeof(missed_events);
3910 } 3919 }
3911 local_add(RB_MISSED_EVENTS, &bpage->commit); 3920 local_add(RB_MISSED_EVENTS, &bpage->commit);
3912 } 3921 }
3913 3922
3923 /*
3924 * This page may be off to user land. Zero it out here.
3925 */
3926 if (commit < BUF_PAGE_SIZE)
3927 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
3928
3914 out_unlock: 3929 out_unlock:
3915 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3930 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3916 3931
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a76339a9e65..086d36316805 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1936,7 +1936,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1936 } 1936 }
1937 1937
1938 if (event) 1938 if (event)
1939 return event->trace(iter, sym_flags); 1939 return event->funcs->trace(iter, sym_flags, event);
1940 1940
1941 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) 1941 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1942 goto partial; 1942 goto partial;
@@ -1962,7 +1962,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1962 1962
1963 event = ftrace_find_event(entry->type); 1963 event = ftrace_find_event(entry->type);
1964 if (event) 1964 if (event)
1965 return event->raw(iter, 0); 1965 return event->funcs->raw(iter, 0, event);
1966 1966
1967 if (!trace_seq_printf(s, "%d ?\n", entry->type)) 1967 if (!trace_seq_printf(s, "%d ?\n", entry->type))
1968 goto partial; 1968 goto partial;
@@ -1989,7 +1989,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1989 1989
1990 event = ftrace_find_event(entry->type); 1990 event = ftrace_find_event(entry->type);
1991 if (event) { 1991 if (event) {
1992 enum print_line_t ret = event->hex(iter, 0); 1992 enum print_line_t ret = event->funcs->hex(iter, 0, event);
1993 if (ret != TRACE_TYPE_HANDLED) 1993 if (ret != TRACE_TYPE_HANDLED)
1994 return ret; 1994 return ret;
1995 } 1995 }
@@ -2014,7 +2014,8 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2014 } 2014 }
2015 2015
2016 event = ftrace_find_event(entry->type); 2016 event = ftrace_find_event(entry->type);
2017 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; 2017 return event ? event->funcs->binary(iter, 0, event) :
2018 TRACE_TYPE_HANDLED;
2018} 2019}
2019 2020
2020int trace_empty(struct trace_iterator *iter) 2021int trace_empty(struct trace_iterator *iter)
@@ -3665,7 +3666,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3665 size_t count, loff_t *ppos) 3666 size_t count, loff_t *ppos)
3666{ 3667{
3667 struct ftrace_buffer_info *info = filp->private_data; 3668 struct ftrace_buffer_info *info = filp->private_data;
3668 unsigned int pos;
3669 ssize_t ret; 3669 ssize_t ret;
3670 size_t size; 3670 size_t size;
3671 3671
@@ -3692,11 +3692,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3692 if (ret < 0) 3692 if (ret < 0)
3693 return 0; 3693 return 0;
3694 3694
3695 pos = ring_buffer_page_len(info->spare);
3696
3697 if (pos < PAGE_SIZE)
3698 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3699
3700read: 3695read:
3701 size = PAGE_SIZE - info->read; 3696 size = PAGE_SIZE - info->read;
3702 if (size > count) 3697 if (size > count)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d1ce0bec1b3f..2cd96399463f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -405,12 +405,12 @@ void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
405void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, 405void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
406 int pc); 406 int pc);
407#else 407#else
408static inline void ftrace_trace_stack(struct trace_array *tr, 408static inline void ftrace_trace_stack(struct ring_buffer *buffer,
409 unsigned long flags, int skip, int pc) 409 unsigned long flags, int skip, int pc)
410{ 410{
411} 411}
412 412
413static inline void ftrace_trace_userstack(struct trace_array *tr, 413static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
414 unsigned long flags, int pc) 414 unsigned long flags, int pc)
415{ 415{
416} 416}
@@ -778,12 +778,15 @@ extern void print_subsystem_event_filter(struct event_subsystem *system,
778 struct trace_seq *s); 778 struct trace_seq *s);
779extern int filter_assign_type(const char *type); 779extern int filter_assign_type(const char *type);
780 780
781struct list_head *
782trace_get_fields(struct ftrace_event_call *event_call);
783
781static inline int 784static inline int
782filter_check_discard(struct ftrace_event_call *call, void *rec, 785filter_check_discard(struct ftrace_event_call *call, void *rec,
783 struct ring_buffer *buffer, 786 struct ring_buffer *buffer,
784 struct ring_buffer_event *event) 787 struct ring_buffer_event *event)
785{ 788{
786 if (unlikely(call->filter_active) && 789 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
787 !filter_match_preds(call->filter, rec)) { 790 !filter_match_preds(call->filter, rec)) {
788 ring_buffer_discard_commit(buffer, event); 791 ring_buffer_discard_commit(buffer, event);
789 return 1; 792 return 1;
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index b9bc4d470177..8d3538b4ea5f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -143,7 +143,7 @@ static void branch_trace_reset(struct trace_array *tr)
143} 143}
144 144
145static enum print_line_t trace_branch_print(struct trace_iterator *iter, 145static enum print_line_t trace_branch_print(struct trace_iterator *iter,
146 int flags) 146 int flags, struct trace_event *event)
147{ 147{
148 struct trace_branch *field; 148 struct trace_branch *field;
149 149
@@ -167,9 +167,13 @@ static void branch_print_header(struct seq_file *s)
167 " |\n"); 167 " |\n");
168} 168}
169 169
170static struct trace_event_functions trace_branch_funcs = {
171 .trace = trace_branch_print,
172};
173
170static struct trace_event trace_branch_event = { 174static struct trace_event trace_branch_event = {
171 .type = TRACE_BRANCH, 175 .type = TRACE_BRANCH,
172 .trace = trace_branch_print, 176 .funcs = &trace_branch_funcs,
173}; 177};
174 178
175static struct tracer branch_trace __read_mostly = 179static struct tracer branch_trace __read_mostly =
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 0565bb42566f..8a2b73f7c068 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,13 +9,9 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
14
15EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); 12EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
16 13
17static char *perf_trace_buf; 14static char *perf_trace_buf[4];
18static char *perf_trace_buf_nmi;
19 15
20/* 16/*
21 * Force it to be aligned to unsigned long to avoid misaligned accesses 17 * Force it to be aligned to unsigned long to avoid misaligned accesses
@@ -27,57 +23,84 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
27/* Count the events in use (per event id, not per instance) */ 23/* Count the events in use (per event id, not per instance) */
28static int total_ref_count; 24static int total_ref_count;
29 25
30static int perf_trace_event_enable(struct ftrace_event_call *event) 26static int perf_trace_event_init(struct ftrace_event_call *tp_event,
27 struct perf_event *p_event)
31{ 28{
32 char *buf; 29 struct hlist_head *list;
33 int ret = -ENOMEM; 30 int ret = -ENOMEM;
31 int cpu;
34 32
35 if (event->perf_refcount++ > 0) 33 p_event->tp_event = tp_event;
34 if (tp_event->perf_refcount++ > 0)
36 return 0; 35 return 0;
37 36
38 if (!total_ref_count) { 37 list = alloc_percpu(struct hlist_head);
39 buf = (char *)alloc_percpu(perf_trace_t); 38 if (!list)
40 if (!buf) 39 goto fail;
41 goto fail_buf;
42 40
43 rcu_assign_pointer(perf_trace_buf, buf); 41 for_each_possible_cpu(cpu)
42 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
44 43
45 buf = (char *)alloc_percpu(perf_trace_t); 44 tp_event->perf_events = list;
46 if (!buf)
47 goto fail_buf_nmi;
48 45
49 rcu_assign_pointer(perf_trace_buf_nmi, buf); 46 if (!total_ref_count) {
50 } 47 char *buf;
48 int i;
51 49
52 ret = event->perf_event_enable(event); 50 for (i = 0; i < 4; i++) {
53 if (!ret) { 51 buf = (char *)alloc_percpu(perf_trace_t);
54 total_ref_count++; 52 if (!buf)
55 return 0; 53 goto fail;
54
55 perf_trace_buf[i] = buf;
56 }
56 } 57 }
57 58
58fail_buf_nmi: 59 if (tp_event->class->reg)
60 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
61 else
62 ret = tracepoint_probe_register(tp_event->name,
63 tp_event->class->perf_probe,
64 tp_event);
65
66 if (ret)
67 goto fail;
68
69 total_ref_count++;
70 return 0;
71
72fail:
59 if (!total_ref_count) { 73 if (!total_ref_count) {
60 free_percpu(perf_trace_buf_nmi); 74 int i;
61 free_percpu(perf_trace_buf); 75
62 perf_trace_buf_nmi = NULL; 76 for (i = 0; i < 4; i++) {
63 perf_trace_buf = NULL; 77 free_percpu(perf_trace_buf[i]);
78 perf_trace_buf[i] = NULL;
79 }
80 }
81
82 if (!--tp_event->perf_refcount) {
83 free_percpu(tp_event->perf_events);
84 tp_event->perf_events = NULL;
64 } 85 }
65fail_buf:
66 event->perf_refcount--;
67 86
68 return ret; 87 return ret;
69} 88}
70 89
71int perf_trace_enable(int event_id) 90int perf_trace_init(struct perf_event *p_event)
72{ 91{
73 struct ftrace_event_call *event; 92 struct ftrace_event_call *tp_event;
93 int event_id = p_event->attr.config;
74 int ret = -EINVAL; 94 int ret = -EINVAL;
75 95
76 mutex_lock(&event_mutex); 96 mutex_lock(&event_mutex);
77 list_for_each_entry(event, &ftrace_events, list) { 97 list_for_each_entry(tp_event, &ftrace_events, list) {
78 if (event->id == event_id && event->perf_event_enable && 98 if (tp_event->event.type == event_id &&
79 try_module_get(event->mod)) { 99 tp_event->class &&
80 ret = perf_trace_event_enable(event); 100 (tp_event->class->perf_probe ||
101 tp_event->class->reg) &&
102 try_module_get(tp_event->mod)) {
103 ret = perf_trace_event_init(tp_event, p_event);
81 break; 104 break;
82 } 105 }
83 } 106 }
@@ -86,90 +109,87 @@ int perf_trace_enable(int event_id)
86 return ret; 109 return ret;
87} 110}
88 111
89static void perf_trace_event_disable(struct ftrace_event_call *event) 112int perf_trace_enable(struct perf_event *p_event)
90{ 113{
91 char *buf, *nmi_buf; 114 struct ftrace_event_call *tp_event = p_event->tp_event;
115 struct hlist_head *list;
92 116
93 if (--event->perf_refcount > 0) 117 list = tp_event->perf_events;
94 return; 118 if (WARN_ON_ONCE(!list))
119 return -EINVAL;
95 120
96 event->perf_event_disable(event); 121 list = this_cpu_ptr(list);
97 122 hlist_add_head_rcu(&p_event->hlist_entry, list);
98 if (!--total_ref_count) {
99 buf = perf_trace_buf;
100 rcu_assign_pointer(perf_trace_buf, NULL);
101 123
102 nmi_buf = perf_trace_buf_nmi; 124 return 0;
103 rcu_assign_pointer(perf_trace_buf_nmi, NULL); 125}
104
105 /*
106 * Ensure every events in profiling have finished before
107 * releasing the buffers
108 */
109 synchronize_sched();
110 126
111 free_percpu(buf); 127void perf_trace_disable(struct perf_event *p_event)
112 free_percpu(nmi_buf); 128{
113 } 129 hlist_del_rcu(&p_event->hlist_entry);
114} 130}
115 131
116void perf_trace_disable(int event_id) 132void perf_trace_destroy(struct perf_event *p_event)
117{ 133{
118 struct ftrace_event_call *event; 134 struct ftrace_event_call *tp_event = p_event->tp_event;
135 int i;
119 136
120 mutex_lock(&event_mutex); 137 mutex_lock(&event_mutex);
121 list_for_each_entry(event, &ftrace_events, list) { 138 if (--tp_event->perf_refcount > 0)
122 if (event->id == event_id) { 139 goto out;
123 perf_trace_event_disable(event); 140
124 module_put(event->mod); 141 if (tp_event->class->reg)
125 break; 142 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
143 else
144 tracepoint_probe_unregister(tp_event->name,
145 tp_event->class->perf_probe,
146 tp_event);
147
148 /*
149 * Ensure our callback won't be called anymore. See
150 * tracepoint_probe_unregister() and __DO_TRACE().
151 */
152 synchronize_sched();
153
154 free_percpu(tp_event->perf_events);
155 tp_event->perf_events = NULL;
156
157 if (!--total_ref_count) {
158 for (i = 0; i < 4; i++) {
159 free_percpu(perf_trace_buf[i]);
160 perf_trace_buf[i] = NULL;
126 } 161 }
127 } 162 }
163out:
128 mutex_unlock(&event_mutex); 164 mutex_unlock(&event_mutex);
129} 165}
130 166
131__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 167__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
132 int *rctxp, unsigned long *irq_flags) 168 struct pt_regs *regs, int *rctxp)
133{ 169{
134 struct trace_entry *entry; 170 struct trace_entry *entry;
135 char *trace_buf, *raw_data; 171 unsigned long flags;
136 int pc, cpu; 172 char *raw_data;
173 int pc;
137 174
138 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); 175 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
139 176
140 pc = preempt_count(); 177 pc = preempt_count();
141 178
142 /* Protect the per cpu buffer, begin the rcu read side */
143 local_irq_save(*irq_flags);
144
145 *rctxp = perf_swevent_get_recursion_context(); 179 *rctxp = perf_swevent_get_recursion_context();
146 if (*rctxp < 0) 180 if (*rctxp < 0)
147 goto err_recursion; 181 return NULL;
148
149 cpu = smp_processor_id();
150
151 if (in_nmi())
152 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
153 else
154 trace_buf = rcu_dereference_sched(perf_trace_buf);
155
156 if (!trace_buf)
157 goto err;
158 182
159 raw_data = per_cpu_ptr(trace_buf, cpu); 183 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
160 184
161 /* zero the dead bytes from align to not leak stack to user */ 185 /* zero the dead bytes from align to not leak stack to user */
162 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); 186 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
163 187
164 entry = (struct trace_entry *)raw_data; 188 entry = (struct trace_entry *)raw_data;
165 tracing_generic_entry_update(entry, *irq_flags, pc); 189 local_save_flags(flags);
190 tracing_generic_entry_update(entry, flags, pc);
166 entry->type = type; 191 entry->type = type;
167 192
168 return raw_data; 193 return raw_data;
169err:
170 perf_swevent_put_recursion_context(*rctxp);
171err_recursion:
172 local_irq_restore(*irq_flags);
173 return NULL;
174} 194}
175EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); 195EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c697c7043349..53cffc0b0801 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -29,11 +29,23 @@ DEFINE_MUTEX(event_mutex);
29 29
30LIST_HEAD(ftrace_events); 30LIST_HEAD(ftrace_events);
31 31
32struct list_head *
33trace_get_fields(struct ftrace_event_call *event_call)
34{
35 if (!event_call->class->get_fields)
36 return &event_call->class->fields;
37 return event_call->class->get_fields(event_call);
38}
39
32int trace_define_field(struct ftrace_event_call *call, const char *type, 40int trace_define_field(struct ftrace_event_call *call, const char *type,
33 const char *name, int offset, int size, int is_signed, 41 const char *name, int offset, int size, int is_signed,
34 int filter_type) 42 int filter_type)
35{ 43{
36 struct ftrace_event_field *field; 44 struct ftrace_event_field *field;
45 struct list_head *head;
46
47 if (WARN_ON(!call->class))
48 return 0;
37 49
38 field = kzalloc(sizeof(*field), GFP_KERNEL); 50 field = kzalloc(sizeof(*field), GFP_KERNEL);
39 if (!field) 51 if (!field)
@@ -56,7 +68,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
56 field->size = size; 68 field->size = size;
57 field->is_signed = is_signed; 69 field->is_signed = is_signed;
58 70
59 list_add(&field->link, &call->fields); 71 head = trace_get_fields(call);
72 list_add(&field->link, head);
60 73
61 return 0; 74 return 0;
62 75
@@ -94,8 +107,10 @@ static int trace_define_common_fields(struct ftrace_event_call *call)
94void trace_destroy_fields(struct ftrace_event_call *call) 107void trace_destroy_fields(struct ftrace_event_call *call)
95{ 108{
96 struct ftrace_event_field *field, *next; 109 struct ftrace_event_field *field, *next;
110 struct list_head *head;
97 111
98 list_for_each_entry_safe(field, next, &call->fields, link) { 112 head = trace_get_fields(call);
113 list_for_each_entry_safe(field, next, head, link) {
99 list_del(&field->link); 114 list_del(&field->link);
100 kfree(field->type); 115 kfree(field->type);
101 kfree(field->name); 116 kfree(field->name);
@@ -107,11 +122,9 @@ int trace_event_raw_init(struct ftrace_event_call *call)
107{ 122{
108 int id; 123 int id;
109 124
110 id = register_ftrace_event(call->event); 125 id = register_ftrace_event(&call->event);
111 if (!id) 126 if (!id)
112 return -ENODEV; 127 return -ENODEV;
113 call->id = id;
114 INIT_LIST_HEAD(&call->fields);
115 128
116 return 0; 129 return 0;
117} 130}
@@ -124,23 +137,33 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
124 137
125 switch (enable) { 138 switch (enable) {
126 case 0: 139 case 0:
127 if (call->enabled) { 140 if (call->flags & TRACE_EVENT_FL_ENABLED) {
128 call->enabled = 0; 141 call->flags &= ~TRACE_EVENT_FL_ENABLED;
129 tracing_stop_cmdline_record(); 142 tracing_stop_cmdline_record();
130 call->unregfunc(call); 143 if (call->class->reg)
144 call->class->reg(call, TRACE_REG_UNREGISTER);
145 else
146 tracepoint_probe_unregister(call->name,
147 call->class->probe,
148 call);
131 } 149 }
132 break; 150 break;
133 case 1: 151 case 1:
134 if (!call->enabled) { 152 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
135 tracing_start_cmdline_record(); 153 tracing_start_cmdline_record();
136 ret = call->regfunc(call); 154 if (call->class->reg)
155 ret = call->class->reg(call, TRACE_REG_REGISTER);
156 else
157 ret = tracepoint_probe_register(call->name,
158 call->class->probe,
159 call);
137 if (ret) { 160 if (ret) {
138 tracing_stop_cmdline_record(); 161 tracing_stop_cmdline_record();
139 pr_info("event trace: Could not enable event " 162 pr_info("event trace: Could not enable event "
140 "%s\n", call->name); 163 "%s\n", call->name);
141 break; 164 break;
142 } 165 }
143 call->enabled = 1; 166 call->flags |= TRACE_EVENT_FL_ENABLED;
144 } 167 }
145 break; 168 break;
146 } 169 }
@@ -171,15 +194,16 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
171 mutex_lock(&event_mutex); 194 mutex_lock(&event_mutex);
172 list_for_each_entry(call, &ftrace_events, list) { 195 list_for_each_entry(call, &ftrace_events, list) {
173 196
174 if (!call->name || !call->regfunc) 197 if (!call->name || !call->class ||
198 (!call->class->probe && !call->class->reg))
175 continue; 199 continue;
176 200
177 if (match && 201 if (match &&
178 strcmp(match, call->name) != 0 && 202 strcmp(match, call->name) != 0 &&
179 strcmp(match, call->system) != 0) 203 strcmp(match, call->class->system) != 0)
180 continue; 204 continue;
181 205
182 if (sub && strcmp(sub, call->system) != 0) 206 if (sub && strcmp(sub, call->class->system) != 0)
183 continue; 207 continue;
184 208
185 if (event && strcmp(event, call->name) != 0) 209 if (event && strcmp(event, call->name) != 0)
@@ -297,7 +321,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
297 * The ftrace subsystem is for showing formats only. 321 * The ftrace subsystem is for showing formats only.
298 * They can not be enabled or disabled via the event files. 322 * They can not be enabled or disabled via the event files.
299 */ 323 */
300 if (call->regfunc) 324 if (call->class && (call->class->probe || call->class->reg))
301 return call; 325 return call;
302 } 326 }
303 327
@@ -328,7 +352,7 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
328 (*pos)++; 352 (*pos)++;
329 353
330 list_for_each_entry_continue(call, &ftrace_events, list) { 354 list_for_each_entry_continue(call, &ftrace_events, list) {
331 if (call->enabled) 355 if (call->flags & TRACE_EVENT_FL_ENABLED)
332 return call; 356 return call;
333 } 357 }
334 358
@@ -355,8 +379,8 @@ static int t_show(struct seq_file *m, void *v)
355{ 379{
356 struct ftrace_event_call *call = v; 380 struct ftrace_event_call *call = v;
357 381
358 if (strcmp(call->system, TRACE_SYSTEM) != 0) 382 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
359 seq_printf(m, "%s:", call->system); 383 seq_printf(m, "%s:", call->class->system);
360 seq_printf(m, "%s\n", call->name); 384 seq_printf(m, "%s\n", call->name);
361 385
362 return 0; 386 return 0;
@@ -387,7 +411,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
387 struct ftrace_event_call *call = filp->private_data; 411 struct ftrace_event_call *call = filp->private_data;
388 char *buf; 412 char *buf;
389 413
390 if (call->enabled) 414 if (call->flags & TRACE_EVENT_FL_ENABLED)
391 buf = "1\n"; 415 buf = "1\n";
392 else 416 else
393 buf = "0\n"; 417 buf = "0\n";
@@ -450,10 +474,11 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
450 474
451 mutex_lock(&event_mutex); 475 mutex_lock(&event_mutex);
452 list_for_each_entry(call, &ftrace_events, list) { 476 list_for_each_entry(call, &ftrace_events, list) {
453 if (!call->name || !call->regfunc) 477 if (!call->name || !call->class ||
478 (!call->class->probe && !call->class->reg))
454 continue; 479 continue;
455 480
456 if (system && strcmp(call->system, system) != 0) 481 if (system && strcmp(call->class->system, system) != 0)
457 continue; 482 continue;
458 483
459 /* 484 /*
@@ -461,7 +486,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
461 * or if all events or cleared, or if we have 486 * or if all events or cleared, or if we have
462 * a mixture. 487 * a mixture.
463 */ 488 */
464 set |= (1 << !!call->enabled); 489 set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
465 490
466 /* 491 /*
467 * If we have a mixture, no need to look further. 492 * If we have a mixture, no need to look further.
@@ -525,6 +550,7 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
525{ 550{
526 struct ftrace_event_call *call = filp->private_data; 551 struct ftrace_event_call *call = filp->private_data;
527 struct ftrace_event_field *field; 552 struct ftrace_event_field *field;
553 struct list_head *head;
528 struct trace_seq *s; 554 struct trace_seq *s;
529 int common_field_count = 5; 555 int common_field_count = 5;
530 char *buf; 556 char *buf;
@@ -540,10 +566,11 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
540 trace_seq_init(s); 566 trace_seq_init(s);
541 567
542 trace_seq_printf(s, "name: %s\n", call->name); 568 trace_seq_printf(s, "name: %s\n", call->name);
543 trace_seq_printf(s, "ID: %d\n", call->id); 569 trace_seq_printf(s, "ID: %d\n", call->event.type);
544 trace_seq_printf(s, "format:\n"); 570 trace_seq_printf(s, "format:\n");
545 571
546 list_for_each_entry_reverse(field, &call->fields, link) { 572 head = trace_get_fields(call);
573 list_for_each_entry_reverse(field, head, link) {
547 /* 574 /*
548 * Smartly shows the array type(except dynamic array). 575 * Smartly shows the array type(except dynamic array).
549 * Normal: 576 * Normal:
@@ -613,7 +640,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
613 return -ENOMEM; 640 return -ENOMEM;
614 641
615 trace_seq_init(s); 642 trace_seq_init(s);
616 trace_seq_printf(s, "%d\n", call->id); 643 trace_seq_printf(s, "%d\n", call->event.type);
617 644
618 r = simple_read_from_buffer(ubuf, cnt, ppos, 645 r = simple_read_from_buffer(ubuf, cnt, ppos,
619 s->buffer, s->len); 646 s->buffer, s->len);
@@ -919,14 +946,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
919 const struct file_operations *filter, 946 const struct file_operations *filter,
920 const struct file_operations *format) 947 const struct file_operations *format)
921{ 948{
949 struct list_head *head;
922 int ret; 950 int ret;
923 951
924 /* 952 /*
925 * If the trace point header did not define TRACE_SYSTEM 953 * If the trace point header did not define TRACE_SYSTEM
926 * then the system would be called "TRACE_SYSTEM". 954 * then the system would be called "TRACE_SYSTEM".
927 */ 955 */
928 if (strcmp(call->system, TRACE_SYSTEM) != 0) 956 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
929 d_events = event_subsystem_dir(call->system, d_events); 957 d_events = event_subsystem_dir(call->class->system, d_events);
930 958
931 call->dir = debugfs_create_dir(call->name, d_events); 959 call->dir = debugfs_create_dir(call->name, d_events);
932 if (!call->dir) { 960 if (!call->dir) {
@@ -935,22 +963,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
935 return -1; 963 return -1;
936 } 964 }
937 965
938 if (call->regfunc) 966 if (call->class->probe || call->class->reg)
939 trace_create_file("enable", 0644, call->dir, call, 967 trace_create_file("enable", 0644, call->dir, call,
940 enable); 968 enable);
941 969
942 if (call->id && call->perf_event_enable) 970#ifdef CONFIG_PERF_EVENTS
971 if (call->event.type && (call->class->perf_probe || call->class->reg))
943 trace_create_file("id", 0444, call->dir, call, 972 trace_create_file("id", 0444, call->dir, call,
944 id); 973 id);
974#endif
945 975
946 if (call->define_fields) { 976 if (call->class->define_fields) {
947 ret = trace_define_common_fields(call); 977 /*
948 if (!ret) 978 * Other events may have the same class. Only update
949 ret = call->define_fields(call); 979 * the fields if they are not already defined.
950 if (ret < 0) { 980 */
951 pr_warning("Could not initialize trace point" 981 head = trace_get_fields(call);
952 " events/%s\n", call->name); 982 if (list_empty(head)) {
953 return ret; 983 ret = trace_define_common_fields(call);
984 if (!ret)
985 ret = call->class->define_fields(call);
986 if (ret < 0) {
987 pr_warning("Could not initialize trace point"
988 " events/%s\n", call->name);
989 return ret;
990 }
954 } 991 }
955 trace_create_file("filter", 0644, call->dir, call, 992 trace_create_file("filter", 0644, call->dir, call,
956 filter); 993 filter);
@@ -970,8 +1007,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
970 if (!call->name) 1007 if (!call->name)
971 return -EINVAL; 1008 return -EINVAL;
972 1009
973 if (call->raw_init) { 1010 if (call->class->raw_init) {
974 ret = call->raw_init(call); 1011 ret = call->class->raw_init(call);
975 if (ret < 0) { 1012 if (ret < 0) {
976 if (ret != -ENOSYS) 1013 if (ret != -ENOSYS)
977 pr_warning("Could not initialize trace " 1014 pr_warning("Could not initialize trace "
@@ -1035,13 +1072,13 @@ static void remove_subsystem_dir(const char *name)
1035static void __trace_remove_event_call(struct ftrace_event_call *call) 1072static void __trace_remove_event_call(struct ftrace_event_call *call)
1036{ 1073{
1037 ftrace_event_enable_disable(call, 0); 1074 ftrace_event_enable_disable(call, 0);
1038 if (call->event) 1075 if (call->event.funcs)
1039 __unregister_ftrace_event(call->event); 1076 __unregister_ftrace_event(&call->event);
1040 debugfs_remove_recursive(call->dir); 1077 debugfs_remove_recursive(call->dir);
1041 list_del(&call->list); 1078 list_del(&call->list);
1042 trace_destroy_fields(call); 1079 trace_destroy_fields(call);
1043 destroy_preds(call); 1080 destroy_preds(call);
1044 remove_subsystem_dir(call->system); 1081 remove_subsystem_dir(call->class->system);
1045} 1082}
1046 1083
1047/* Remove an event_call */ 1084/* Remove an event_call */
@@ -1132,8 +1169,8 @@ static void trace_module_add_events(struct module *mod)
1132 /* The linker may leave blanks */ 1169 /* The linker may leave blanks */
1133 if (!call->name) 1170 if (!call->name)
1134 continue; 1171 continue;
1135 if (call->raw_init) { 1172 if (call->class->raw_init) {
1136 ret = call->raw_init(call); 1173 ret = call->class->raw_init(call);
1137 if (ret < 0) { 1174 if (ret < 0) {
1138 if (ret != -ENOSYS) 1175 if (ret != -ENOSYS)
1139 pr_warning("Could not initialize trace " 1176 pr_warning("Could not initialize trace "
@@ -1286,8 +1323,8 @@ static __init int event_trace_init(void)
1286 /* The linker may leave blanks */ 1323 /* The linker may leave blanks */
1287 if (!call->name) 1324 if (!call->name)
1288 continue; 1325 continue;
1289 if (call->raw_init) { 1326 if (call->class->raw_init) {
1290 ret = call->raw_init(call); 1327 ret = call->class->raw_init(call);
1291 if (ret < 0) { 1328 if (ret < 0) {
1292 if (ret != -ENOSYS) 1329 if (ret != -ENOSYS)
1293 pr_warning("Could not initialize trace " 1330 pr_warning("Could not initialize trace "
@@ -1388,8 +1425,8 @@ static __init void event_trace_self_tests(void)
1388 1425
1389 list_for_each_entry(call, &ftrace_events, list) { 1426 list_for_each_entry(call, &ftrace_events, list) {
1390 1427
1391 /* Only test those that have a regfunc */ 1428 /* Only test those that have a probe */
1392 if (!call->regfunc) 1429 if (!call->class || !call->class->probe)
1393 continue; 1430 continue;
1394 1431
1395/* 1432/*
@@ -1399,8 +1436,8 @@ static __init void event_trace_self_tests(void)
1399 * syscalls as we test. 1436 * syscalls as we test.
1400 */ 1437 */
1401#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS 1438#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1402 if (call->system && 1439 if (call->class->system &&
1403 strcmp(call->system, "syscalls") == 0) 1440 strcmp(call->class->system, "syscalls") == 0)
1404 continue; 1441 continue;
1405#endif 1442#endif
1406 1443
@@ -1410,7 +1447,7 @@ static __init void event_trace_self_tests(void)
1410 * If an event is already enabled, someone is using 1447 * If an event is already enabled, someone is using
1411 * it and the self test should not be on. 1448 * it and the self test should not be on.
1412 */ 1449 */
1413 if (call->enabled) { 1450 if (call->flags & TRACE_EVENT_FL_ENABLED) {
1414 pr_warning("Enabled event during self test!\n"); 1451 pr_warning("Enabled event during self test!\n");
1415 WARN_ON_ONCE(1); 1452 WARN_ON_ONCE(1);
1416 continue; 1453 continue;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 58092d844a1f..57bb1bb32999 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -500,8 +500,10 @@ static struct ftrace_event_field *
500find_event_field(struct ftrace_event_call *call, char *name) 500find_event_field(struct ftrace_event_call *call, char *name)
501{ 501{
502 struct ftrace_event_field *field; 502 struct ftrace_event_field *field;
503 struct list_head *head;
503 504
504 list_for_each_entry(field, &call->fields, link) { 505 head = trace_get_fields(call);
506 list_for_each_entry(field, head, link) {
505 if (!strcmp(field->name, name)) 507 if (!strcmp(field->name, name))
506 return field; 508 return field;
507 } 509 }
@@ -545,7 +547,7 @@ static void filter_disable_preds(struct ftrace_event_call *call)
545 struct event_filter *filter = call->filter; 547 struct event_filter *filter = call->filter;
546 int i; 548 int i;
547 549
548 call->filter_active = 0; 550 call->flags &= ~TRACE_EVENT_FL_FILTERED;
549 filter->n_preds = 0; 551 filter->n_preds = 0;
550 552
551 for (i = 0; i < MAX_FILTER_PRED; i++) 553 for (i = 0; i < MAX_FILTER_PRED; i++)
@@ -572,7 +574,7 @@ void destroy_preds(struct ftrace_event_call *call)
572{ 574{
573 __free_preds(call->filter); 575 __free_preds(call->filter);
574 call->filter = NULL; 576 call->filter = NULL;
575 call->filter_active = 0; 577 call->flags &= ~TRACE_EVENT_FL_FILTERED;
576} 578}
577 579
578static struct event_filter *__alloc_preds(void) 580static struct event_filter *__alloc_preds(void)
@@ -611,7 +613,7 @@ static int init_preds(struct ftrace_event_call *call)
611 if (call->filter) 613 if (call->filter)
612 return 0; 614 return 0;
613 615
614 call->filter_active = 0; 616 call->flags &= ~TRACE_EVENT_FL_FILTERED;
615 call->filter = __alloc_preds(); 617 call->filter = __alloc_preds();
616 if (IS_ERR(call->filter)) 618 if (IS_ERR(call->filter))
617 return PTR_ERR(call->filter); 619 return PTR_ERR(call->filter);
@@ -625,10 +627,10 @@ static int init_subsystem_preds(struct event_subsystem *system)
625 int err; 627 int err;
626 628
627 list_for_each_entry(call, &ftrace_events, list) { 629 list_for_each_entry(call, &ftrace_events, list) {
628 if (!call->define_fields) 630 if (!call->class || !call->class->define_fields)
629 continue; 631 continue;
630 632
631 if (strcmp(call->system, system->name) != 0) 633 if (strcmp(call->class->system, system->name) != 0)
632 continue; 634 continue;
633 635
634 err = init_preds(call); 636 err = init_preds(call);
@@ -644,10 +646,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
644 struct ftrace_event_call *call; 646 struct ftrace_event_call *call;
645 647
646 list_for_each_entry(call, &ftrace_events, list) { 648 list_for_each_entry(call, &ftrace_events, list) {
647 if (!call->define_fields) 649 if (!call->class || !call->class->define_fields)
648 continue; 650 continue;
649 651
650 if (strcmp(call->system, system->name) != 0) 652 if (strcmp(call->class->system, system->name) != 0)
651 continue; 653 continue;
652 654
653 filter_disable_preds(call); 655 filter_disable_preds(call);
@@ -1249,10 +1251,10 @@ static int replace_system_preds(struct event_subsystem *system,
1249 list_for_each_entry(call, &ftrace_events, list) { 1251 list_for_each_entry(call, &ftrace_events, list) {
1250 struct event_filter *filter = call->filter; 1252 struct event_filter *filter = call->filter;
1251 1253
1252 if (!call->define_fields) 1254 if (!call->class || !call->class->define_fields)
1253 continue; 1255 continue;
1254 1256
1255 if (strcmp(call->system, system->name) != 0) 1257 if (strcmp(call->class->system, system->name) != 0)
1256 continue; 1258 continue;
1257 1259
1258 /* try to see if the filter can be applied */ 1260 /* try to see if the filter can be applied */
@@ -1266,7 +1268,7 @@ static int replace_system_preds(struct event_subsystem *system,
1266 if (err) 1268 if (err)
1267 filter_disable_preds(call); 1269 filter_disable_preds(call);
1268 else { 1270 else {
1269 call->filter_active = 1; 1271 call->flags |= TRACE_EVENT_FL_FILTERED;
1270 replace_filter_string(filter, filter_string); 1272 replace_filter_string(filter, filter_string);
1271 } 1273 }
1272 fail = false; 1274 fail = false;
@@ -1315,7 +1317,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1315 if (err) 1317 if (err)
1316 append_filter_err(ps, call->filter); 1318 append_filter_err(ps, call->filter);
1317 else 1319 else
1318 call->filter_active = 1; 1320 call->flags |= TRACE_EVENT_FL_FILTERED;
1319out: 1321out:
1320 filter_opstack_clear(ps); 1322 filter_opstack_clear(ps);
1321 postfix_clear(ps); 1323 postfix_clear(ps);
@@ -1393,7 +1395,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1393 mutex_lock(&event_mutex); 1395 mutex_lock(&event_mutex);
1394 1396
1395 list_for_each_entry(call, &ftrace_events, list) { 1397 list_for_each_entry(call, &ftrace_events, list) {
1396 if (call->id == event_id) 1398 if (call->event.type == event_id)
1397 break; 1399 break;
1398 } 1400 }
1399 1401
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index e091f64ba6ce..8536e2a65969 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -127,7 +127,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
127 127
128static int ftrace_raw_init_event(struct ftrace_event_call *call) 128static int ftrace_raw_init_event(struct ftrace_event_call *call)
129{ 129{
130 INIT_LIST_HEAD(&call->fields); 130 INIT_LIST_HEAD(&call->class->fields);
131 return 0; 131 return 0;
132} 132}
133 133
@@ -153,17 +153,21 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
153#define F_printk(fmt, args...) #fmt ", " __stringify(args) 153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154 154
155#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
156#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
157 \
158struct ftrace_event_class event_class_ftrace_##call = { \
159 .system = __stringify(TRACE_SYSTEM), \
160 .define_fields = ftrace_define_fields_##call, \
161 .raw_init = ftrace_raw_init_event, \
162}; \
157 \ 163 \
158struct ftrace_event_call __used \ 164struct ftrace_event_call __used \
159__attribute__((__aligned__(4))) \ 165__attribute__((__aligned__(4))) \
160__attribute__((section("_ftrace_events"))) event_##call = { \ 166__attribute__((section("_ftrace_events"))) event_##call = { \
161 .name = #call, \ 167 .name = #call, \
162 .id = type, \ 168 .event.type = etype, \
163 .system = __stringify(TRACE_SYSTEM), \ 169 .class = &event_class_ftrace_##call, \
164 .raw_init = ftrace_raw_init_event, \
165 .print_fmt = print, \ 170 .print_fmt = print, \
166 .define_fields = ftrace_define_fields_##call, \
167}; \ 171}; \
168 172
169#include "trace_entries.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index dd11c830eb84..79f4bac99a94 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1025,7 +1025,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1025 if (!event) 1025 if (!event)
1026 return TRACE_TYPE_UNHANDLED; 1026 return TRACE_TYPE_UNHANDLED;
1027 1027
1028 ret = event->trace(iter, sym_flags); 1028 ret = event->funcs->trace(iter, sym_flags, event);
1029 if (ret != TRACE_TYPE_HANDLED) 1029 if (ret != TRACE_TYPE_HANDLED)
1030 return ret; 1030 return ret;
1031 } 1031 }
@@ -1112,7 +1112,8 @@ print_graph_function(struct trace_iterator *iter)
1112} 1112}
1113 1113
1114static enum print_line_t 1114static enum print_line_t
1115print_graph_function_event(struct trace_iterator *iter, int flags) 1115print_graph_function_event(struct trace_iterator *iter, int flags,
1116 struct trace_event *event)
1116{ 1117{
1117 return print_graph_function(iter); 1118 return print_graph_function(iter);
1118} 1119}
@@ -1225,14 +1226,18 @@ void graph_trace_close(struct trace_iterator *iter)
1225 } 1226 }
1226} 1227}
1227 1228
1229static struct trace_event_functions graph_functions = {
1230 .trace = print_graph_function_event,
1231};
1232
1228static struct trace_event graph_trace_entry_event = { 1233static struct trace_event graph_trace_entry_event = {
1229 .type = TRACE_GRAPH_ENT, 1234 .type = TRACE_GRAPH_ENT,
1230 .trace = print_graph_function_event, 1235 .funcs = &graph_functions,
1231}; 1236};
1232 1237
1233static struct trace_event graph_trace_ret_event = { 1238static struct trace_event graph_trace_ret_event = {
1234 .type = TRACE_GRAPH_RET, 1239 .type = TRACE_GRAPH_RET,
1235 .trace = print_graph_function_event, 1240 .funcs = &graph_functions
1236}; 1241};
1237 1242
1238static struct tracer graph_trace __read_mostly = { 1243static struct tracer graph_trace __read_mostly = {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a7514326052b..f52b5f50299d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -324,8 +324,8 @@ struct trace_probe {
324 unsigned long nhit; 324 unsigned long nhit;
325 unsigned int flags; /* For TP_FLAG_* */ 325 unsigned int flags; /* For TP_FLAG_* */
326 const char *symbol; /* symbol name */ 326 const char *symbol; /* symbol name */
327 struct ftrace_event_class class;
327 struct ftrace_event_call call; 328 struct ftrace_event_call call;
328 struct trace_event event;
329 ssize_t size; /* trace entry size */ 329 ssize_t size; /* trace entry size */
330 unsigned int nr_args; 330 unsigned int nr_args;
331 struct probe_arg args[]; 331 struct probe_arg args[];
@@ -404,6 +404,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
404 goto error; 404 goto error;
405 } 405 }
406 406
407 tp->call.class = &tp->class;
407 tp->call.name = kstrdup(event, GFP_KERNEL); 408 tp->call.name = kstrdup(event, GFP_KERNEL);
408 if (!tp->call.name) 409 if (!tp->call.name)
409 goto error; 410 goto error;
@@ -413,8 +414,8 @@ static struct trace_probe *alloc_trace_probe(const char *group,
413 goto error; 414 goto error;
414 } 415 }
415 416
416 tp->call.system = kstrdup(group, GFP_KERNEL); 417 tp->class.system = kstrdup(group, GFP_KERNEL);
417 if (!tp->call.system) 418 if (!tp->class.system)
418 goto error; 419 goto error;
419 420
420 INIT_LIST_HEAD(&tp->list); 421 INIT_LIST_HEAD(&tp->list);
@@ -443,7 +444,7 @@ static void free_trace_probe(struct trace_probe *tp)
443 for (i = 0; i < tp->nr_args; i++) 444 for (i = 0; i < tp->nr_args; i++)
444 free_probe_arg(&tp->args[i]); 445 free_probe_arg(&tp->args[i]);
445 446
446 kfree(tp->call.system); 447 kfree(tp->call.class->system);
447 kfree(tp->call.name); 448 kfree(tp->call.name);
448 kfree(tp->symbol); 449 kfree(tp->symbol);
449 kfree(tp); 450 kfree(tp);
@@ -456,7 +457,7 @@ static struct trace_probe *find_probe_event(const char *event,
456 457
457 list_for_each_entry(tp, &probe_list, list) 458 list_for_each_entry(tp, &probe_list, list)
458 if (strcmp(tp->call.name, event) == 0 && 459 if (strcmp(tp->call.name, event) == 0 &&
459 strcmp(tp->call.system, group) == 0) 460 strcmp(tp->call.class->system, group) == 0)
460 return tp; 461 return tp;
461 return NULL; 462 return NULL;
462} 463}
@@ -481,7 +482,7 @@ static int register_trace_probe(struct trace_probe *tp)
481 mutex_lock(&probe_lock); 482 mutex_lock(&probe_lock);
482 483
483 /* register as an event */ 484 /* register as an event */
484 old_tp = find_probe_event(tp->call.name, tp->call.system); 485 old_tp = find_probe_event(tp->call.name, tp->call.class->system);
485 if (old_tp) { 486 if (old_tp) {
486 /* delete old event */ 487 /* delete old event */
487 unregister_trace_probe(old_tp); 488 unregister_trace_probe(old_tp);
@@ -904,7 +905,7 @@ static int probes_seq_show(struct seq_file *m, void *v)
904 int i; 905 int i;
905 906
906 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 907 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
907 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); 908 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
908 909
909 if (!tp->symbol) 910 if (!tp->symbol)
910 seq_printf(m, " 0x%p", tp->rp.kp.addr); 911 seq_printf(m, " 0x%p", tp->rp.kp.addr);
@@ -1061,8 +1062,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1061 1062
1062 size = sizeof(*entry) + tp->size; 1063 size = sizeof(*entry) + tp->size;
1063 1064
1064 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1065 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1065 irq_flags, pc); 1066 size, irq_flags, pc);
1066 if (!event) 1067 if (!event)
1067 return; 1068 return;
1068 1069
@@ -1094,8 +1095,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1094 1095
1095 size = sizeof(*entry) + tp->size; 1096 size = sizeof(*entry) + tp->size;
1096 1097
1097 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1098 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1098 irq_flags, pc); 1099 size, irq_flags, pc);
1099 if (!event) 1100 if (!event)
1100 return; 1101 return;
1101 1102
@@ -1112,18 +1113,17 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1112 1113
1113/* Event entry printers */ 1114/* Event entry printers */
1114enum print_line_t 1115enum print_line_t
1115print_kprobe_event(struct trace_iterator *iter, int flags) 1116print_kprobe_event(struct trace_iterator *iter, int flags,
1117 struct trace_event *event)
1116{ 1118{
1117 struct kprobe_trace_entry_head *field; 1119 struct kprobe_trace_entry_head *field;
1118 struct trace_seq *s = &iter->seq; 1120 struct trace_seq *s = &iter->seq;
1119 struct trace_event *event;
1120 struct trace_probe *tp; 1121 struct trace_probe *tp;
1121 u8 *data; 1122 u8 *data;
1122 int i; 1123 int i;
1123 1124
1124 field = (struct kprobe_trace_entry_head *)iter->ent; 1125 field = (struct kprobe_trace_entry_head *)iter->ent;
1125 event = ftrace_find_event(field->ent.type); 1126 tp = container_of(event, struct trace_probe, call.event);
1126 tp = container_of(event, struct trace_probe, event);
1127 1127
1128 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1128 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1129 goto partial; 1129 goto partial;
@@ -1149,18 +1149,17 @@ partial:
1149} 1149}
1150 1150
1151enum print_line_t 1151enum print_line_t
1152print_kretprobe_event(struct trace_iterator *iter, int flags) 1152print_kretprobe_event(struct trace_iterator *iter, int flags,
1153 struct trace_event *event)
1153{ 1154{
1154 struct kretprobe_trace_entry_head *field; 1155 struct kretprobe_trace_entry_head *field;
1155 struct trace_seq *s = &iter->seq; 1156 struct trace_seq *s = &iter->seq;
1156 struct trace_event *event;
1157 struct trace_probe *tp; 1157 struct trace_probe *tp;
1158 u8 *data; 1158 u8 *data;
1159 int i; 1159 int i;
1160 1160
1161 field = (struct kretprobe_trace_entry_head *)iter->ent; 1161 field = (struct kretprobe_trace_entry_head *)iter->ent;
1162 event = ftrace_find_event(field->ent.type); 1162 tp = container_of(event, struct trace_probe, call.event);
1163 tp = container_of(event, struct trace_probe, event);
1164 1163
1165 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1164 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1166 goto partial; 1165 goto partial;
@@ -1217,8 +1216,6 @@ static void probe_event_disable(struct ftrace_event_call *call)
1217 1216
1218static int probe_event_raw_init(struct ftrace_event_call *event_call) 1217static int probe_event_raw_init(struct ftrace_event_call *event_call)
1219{ 1218{
1220 INIT_LIST_HEAD(&event_call->fields);
1221
1222 return 0; 1219 return 0;
1223} 1220}
1224 1221
@@ -1341,9 +1338,9 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1341 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1338 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1342 struct ftrace_event_call *call = &tp->call; 1339 struct ftrace_event_call *call = &tp->call;
1343 struct kprobe_trace_entry_head *entry; 1340 struct kprobe_trace_entry_head *entry;
1341 struct hlist_head *head;
1344 u8 *data; 1342 u8 *data;
1345 int size, __size, i; 1343 int size, __size, i;
1346 unsigned long irq_flags;
1347 int rctx; 1344 int rctx;
1348 1345
1349 __size = sizeof(*entry) + tp->size; 1346 __size = sizeof(*entry) + tp->size;
@@ -1353,7 +1350,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1353 "profile buffer not large enough")) 1350 "profile buffer not large enough"))
1354 return; 1351 return;
1355 1352
1356 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); 1353 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1357 if (!entry) 1354 if (!entry)
1358 return; 1355 return;
1359 1356
@@ -1362,7 +1359,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1362 for (i = 0; i < tp->nr_args; i++) 1359 for (i = 0; i < tp->nr_args; i++)
1363 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1364 1361
1365 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs); 1362 head = this_cpu_ptr(call->perf_events);
1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1366} 1364}
1367 1365
1368/* Kretprobe profile handler */ 1366/* Kretprobe profile handler */
@@ -1372,9 +1370,9 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1372 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1370 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1373 struct ftrace_event_call *call = &tp->call; 1371 struct ftrace_event_call *call = &tp->call;
1374 struct kretprobe_trace_entry_head *entry; 1372 struct kretprobe_trace_entry_head *entry;
1373 struct hlist_head *head;
1375 u8 *data; 1374 u8 *data;
1376 int size, __size, i; 1375 int size, __size, i;
1377 unsigned long irq_flags;
1378 int rctx; 1376 int rctx;
1379 1377
1380 __size = sizeof(*entry) + tp->size; 1378 __size = sizeof(*entry) + tp->size;
@@ -1384,7 +1382,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1384 "profile buffer not large enough")) 1382 "profile buffer not large enough"))
1385 return; 1383 return;
1386 1384
1387 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags); 1385 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1388 if (!entry) 1386 if (!entry)
1389 return; 1387 return;
1390 1388
@@ -1394,8 +1392,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1394 for (i = 0; i < tp->nr_args; i++) 1392 for (i = 0; i < tp->nr_args; i++)
1395 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1396 1394
1397 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, 1395 head = this_cpu_ptr(call->perf_events);
1398 irq_flags, regs); 1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1399} 1397}
1400 1398
1401static int probe_perf_enable(struct ftrace_event_call *call) 1399static int probe_perf_enable(struct ftrace_event_call *call)
@@ -1425,6 +1423,26 @@ static void probe_perf_disable(struct ftrace_event_call *call)
1425} 1423}
1426#endif /* CONFIG_PERF_EVENTS */ 1424#endif /* CONFIG_PERF_EVENTS */
1427 1425
1426static __kprobes
1427int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1428{
1429 switch (type) {
1430 case TRACE_REG_REGISTER:
1431 return probe_event_enable(event);
1432 case TRACE_REG_UNREGISTER:
1433 probe_event_disable(event);
1434 return 0;
1435
1436#ifdef CONFIG_PERF_EVENTS
1437 case TRACE_REG_PERF_REGISTER:
1438 return probe_perf_enable(event);
1439 case TRACE_REG_PERF_UNREGISTER:
1440 probe_perf_disable(event);
1441 return 0;
1442#endif
1443 }
1444 return 0;
1445}
1428 1446
1429static __kprobes 1447static __kprobes
1430int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1448int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
@@ -1454,6 +1472,14 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1454 return 0; /* We don't tweek kernel, so just return 0 */ 1472 return 0; /* We don't tweek kernel, so just return 0 */
1455} 1473}
1456 1474
1475static struct trace_event_functions kretprobe_funcs = {
1476 .trace = print_kretprobe_event
1477};
1478
1479static struct trace_event_functions kprobe_funcs = {
1480 .trace = print_kprobe_event
1481};
1482
1457static int register_probe_event(struct trace_probe *tp) 1483static int register_probe_event(struct trace_probe *tp)
1458{ 1484{
1459 struct ftrace_event_call *call = &tp->call; 1485 struct ftrace_event_call *call = &tp->call;
@@ -1461,36 +1487,31 @@ static int register_probe_event(struct trace_probe *tp)
1461 1487
1462 /* Initialize ftrace_event_call */ 1488 /* Initialize ftrace_event_call */
1463 if (probe_is_return(tp)) { 1489 if (probe_is_return(tp)) {
1464 tp->event.trace = print_kretprobe_event; 1490 INIT_LIST_HEAD(&call->class->fields);
1465 call->raw_init = probe_event_raw_init; 1491 call->event.funcs = &kretprobe_funcs;
1466 call->define_fields = kretprobe_event_define_fields; 1492 call->class->raw_init = probe_event_raw_init;
1493 call->class->define_fields = kretprobe_event_define_fields;
1467 } else { 1494 } else {
1468 tp->event.trace = print_kprobe_event; 1495 INIT_LIST_HEAD(&call->class->fields);
1469 call->raw_init = probe_event_raw_init; 1496 call->event.funcs = &kprobe_funcs;
1470 call->define_fields = kprobe_event_define_fields; 1497 call->class->raw_init = probe_event_raw_init;
1498 call->class->define_fields = kprobe_event_define_fields;
1471 } 1499 }
1472 if (set_print_fmt(tp) < 0) 1500 if (set_print_fmt(tp) < 0)
1473 return -ENOMEM; 1501 return -ENOMEM;
1474 call->event = &tp->event; 1502 ret = register_ftrace_event(&call->event);
1475 call->id = register_ftrace_event(&tp->event); 1503 if (!ret) {
1476 if (!call->id) {
1477 kfree(call->print_fmt); 1504 kfree(call->print_fmt);
1478 return -ENODEV; 1505 return -ENODEV;
1479 } 1506 }
1480 call->enabled = 0; 1507 call->flags = 0;
1481 call->regfunc = probe_event_enable; 1508 call->class->reg = kprobe_register;
1482 call->unregfunc = probe_event_disable;
1483
1484#ifdef CONFIG_PERF_EVENTS
1485 call->perf_event_enable = probe_perf_enable;
1486 call->perf_event_disable = probe_perf_disable;
1487#endif
1488 call->data = tp; 1509 call->data = tp;
1489 ret = trace_add_event_call(call); 1510 ret = trace_add_event_call(call);
1490 if (ret) { 1511 if (ret) {
1491 pr_info("Failed to register kprobe event: %s\n", call->name); 1512 pr_info("Failed to register kprobe event: %s\n", call->name);
1492 kfree(call->print_fmt); 1513 kfree(call->print_fmt);
1493 unregister_ftrace_event(&tp->event); 1514 unregister_ftrace_event(&call->event);
1494 } 1515 }
1495 return ret; 1516 return ret;
1496} 1517}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index ab13d7008061..57c1b4596470 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -742,6 +742,9 @@ int register_ftrace_event(struct trace_event *event)
742 if (WARN_ON(!event)) 742 if (WARN_ON(!event))
743 goto out; 743 goto out;
744 744
745 if (WARN_ON(!event->funcs))
746 goto out;
747
745 INIT_LIST_HEAD(&event->list); 748 INIT_LIST_HEAD(&event->list);
746 749
747 if (!event->type) { 750 if (!event->type) {
@@ -774,14 +777,14 @@ int register_ftrace_event(struct trace_event *event)
774 goto out; 777 goto out;
775 } 778 }
776 779
777 if (event->trace == NULL) 780 if (event->funcs->trace == NULL)
778 event->trace = trace_nop_print; 781 event->funcs->trace = trace_nop_print;
779 if (event->raw == NULL) 782 if (event->funcs->raw == NULL)
780 event->raw = trace_nop_print; 783 event->funcs->raw = trace_nop_print;
781 if (event->hex == NULL) 784 if (event->funcs->hex == NULL)
782 event->hex = trace_nop_print; 785 event->funcs->hex = trace_nop_print;
783 if (event->binary == NULL) 786 if (event->funcs->binary == NULL)
784 event->binary = trace_nop_print; 787 event->funcs->binary = trace_nop_print;
785 788
786 key = event->type & (EVENT_HASHSIZE - 1); 789 key = event->type & (EVENT_HASHSIZE - 1);
787 790
@@ -823,13 +826,15 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
823 * Standard events 826 * Standard events
824 */ 827 */
825 828
826enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) 829enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
830 struct trace_event *event)
827{ 831{
828 return TRACE_TYPE_HANDLED; 832 return TRACE_TYPE_HANDLED;
829} 833}
830 834
831/* TRACE_FN */ 835/* TRACE_FN */
832static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) 836static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
837 struct trace_event *event)
833{ 838{
834 struct ftrace_entry *field; 839 struct ftrace_entry *field;
835 struct trace_seq *s = &iter->seq; 840 struct trace_seq *s = &iter->seq;
@@ -856,7 +861,8 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
856 return TRACE_TYPE_PARTIAL_LINE; 861 return TRACE_TYPE_PARTIAL_LINE;
857} 862}
858 863
859static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) 864static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags,
865 struct trace_event *event)
860{ 866{
861 struct ftrace_entry *field; 867 struct ftrace_entry *field;
862 868
@@ -870,7 +876,8 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
870 return TRACE_TYPE_HANDLED; 876 return TRACE_TYPE_HANDLED;
871} 877}
872 878
873static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) 879static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags,
880 struct trace_event *event)
874{ 881{
875 struct ftrace_entry *field; 882 struct ftrace_entry *field;
876 struct trace_seq *s = &iter->seq; 883 struct trace_seq *s = &iter->seq;
@@ -883,7 +890,8 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
883 return TRACE_TYPE_HANDLED; 890 return TRACE_TYPE_HANDLED;
884} 891}
885 892
886static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) 893static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags,
894 struct trace_event *event)
887{ 895{
888 struct ftrace_entry *field; 896 struct ftrace_entry *field;
889 struct trace_seq *s = &iter->seq; 897 struct trace_seq *s = &iter->seq;
@@ -896,14 +904,18 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
896 return TRACE_TYPE_HANDLED; 904 return TRACE_TYPE_HANDLED;
897} 905}
898 906
899static struct trace_event trace_fn_event = { 907static struct trace_event_functions trace_fn_funcs = {
900 .type = TRACE_FN,
901 .trace = trace_fn_trace, 908 .trace = trace_fn_trace,
902 .raw = trace_fn_raw, 909 .raw = trace_fn_raw,
903 .hex = trace_fn_hex, 910 .hex = trace_fn_hex,
904 .binary = trace_fn_bin, 911 .binary = trace_fn_bin,
905}; 912};
906 913
914static struct trace_event trace_fn_event = {
915 .type = TRACE_FN,
916 .funcs = &trace_fn_funcs,
917};
918
907/* TRACE_CTX an TRACE_WAKE */ 919/* TRACE_CTX an TRACE_WAKE */
908static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, 920static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
909 char *delim) 921 char *delim)
@@ -932,13 +944,14 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
932 return TRACE_TYPE_HANDLED; 944 return TRACE_TYPE_HANDLED;
933} 945}
934 946
935static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags) 947static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags,
948 struct trace_event *event)
936{ 949{
937 return trace_ctxwake_print(iter, "==>"); 950 return trace_ctxwake_print(iter, "==>");
938} 951}
939 952
940static enum print_line_t trace_wake_print(struct trace_iterator *iter, 953static enum print_line_t trace_wake_print(struct trace_iterator *iter,
941 int flags) 954 int flags, struct trace_event *event)
942{ 955{
943 return trace_ctxwake_print(iter, " +"); 956 return trace_ctxwake_print(iter, " +");
944} 957}
@@ -966,12 +979,14 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
966 return TRACE_TYPE_HANDLED; 979 return TRACE_TYPE_HANDLED;
967} 980}
968 981
969static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags) 982static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags,
983 struct trace_event *event)
970{ 984{
971 return trace_ctxwake_raw(iter, 0); 985 return trace_ctxwake_raw(iter, 0);
972} 986}
973 987
974static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags) 988static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags,
989 struct trace_event *event)
975{ 990{
976 return trace_ctxwake_raw(iter, '+'); 991 return trace_ctxwake_raw(iter, '+');
977} 992}
@@ -1000,18 +1015,20 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
1000 return TRACE_TYPE_HANDLED; 1015 return TRACE_TYPE_HANDLED;
1001} 1016}
1002 1017
1003static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags) 1018static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags,
1019 struct trace_event *event)
1004{ 1020{
1005 return trace_ctxwake_hex(iter, 0); 1021 return trace_ctxwake_hex(iter, 0);
1006} 1022}
1007 1023
1008static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags) 1024static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags,
1025 struct trace_event *event)
1009{ 1026{
1010 return trace_ctxwake_hex(iter, '+'); 1027 return trace_ctxwake_hex(iter, '+');
1011} 1028}
1012 1029
1013static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, 1030static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
1014 int flags) 1031 int flags, struct trace_event *event)
1015{ 1032{
1016 struct ctx_switch_entry *field; 1033 struct ctx_switch_entry *field;
1017 struct trace_seq *s = &iter->seq; 1034 struct trace_seq *s = &iter->seq;
@@ -1028,25 +1045,33 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
1028 return TRACE_TYPE_HANDLED; 1045 return TRACE_TYPE_HANDLED;
1029} 1046}
1030 1047
1031static struct trace_event trace_ctx_event = { 1048static struct trace_event_functions trace_ctx_funcs = {
1032 .type = TRACE_CTX,
1033 .trace = trace_ctx_print, 1049 .trace = trace_ctx_print,
1034 .raw = trace_ctx_raw, 1050 .raw = trace_ctx_raw,
1035 .hex = trace_ctx_hex, 1051 .hex = trace_ctx_hex,
1036 .binary = trace_ctxwake_bin, 1052 .binary = trace_ctxwake_bin,
1037}; 1053};
1038 1054
1039static struct trace_event trace_wake_event = { 1055static struct trace_event trace_ctx_event = {
1040 .type = TRACE_WAKE, 1056 .type = TRACE_CTX,
1057 .funcs = &trace_ctx_funcs,
1058};
1059
1060static struct trace_event_functions trace_wake_funcs = {
1041 .trace = trace_wake_print, 1061 .trace = trace_wake_print,
1042 .raw = trace_wake_raw, 1062 .raw = trace_wake_raw,
1043 .hex = trace_wake_hex, 1063 .hex = trace_wake_hex,
1044 .binary = trace_ctxwake_bin, 1064 .binary = trace_ctxwake_bin,
1045}; 1065};
1046 1066
1067static struct trace_event trace_wake_event = {
1068 .type = TRACE_WAKE,
1069 .funcs = &trace_wake_funcs,
1070};
1071
1047/* TRACE_SPECIAL */ 1072/* TRACE_SPECIAL */
1048static enum print_line_t trace_special_print(struct trace_iterator *iter, 1073static enum print_line_t trace_special_print(struct trace_iterator *iter,
1049 int flags) 1074 int flags, struct trace_event *event)
1050{ 1075{
1051 struct special_entry *field; 1076 struct special_entry *field;
1052 1077
@@ -1062,7 +1087,7 @@ static enum print_line_t trace_special_print(struct trace_iterator *iter,
1062} 1087}
1063 1088
1064static enum print_line_t trace_special_hex(struct trace_iterator *iter, 1089static enum print_line_t trace_special_hex(struct trace_iterator *iter,
1065 int flags) 1090 int flags, struct trace_event *event)
1066{ 1091{
1067 struct special_entry *field; 1092 struct special_entry *field;
1068 struct trace_seq *s = &iter->seq; 1093 struct trace_seq *s = &iter->seq;
@@ -1077,7 +1102,7 @@ static enum print_line_t trace_special_hex(struct trace_iterator *iter,
1077} 1102}
1078 1103
1079static enum print_line_t trace_special_bin(struct trace_iterator *iter, 1104static enum print_line_t trace_special_bin(struct trace_iterator *iter,
1080 int flags) 1105 int flags, struct trace_event *event)
1081{ 1106{
1082 struct special_entry *field; 1107 struct special_entry *field;
1083 struct trace_seq *s = &iter->seq; 1108 struct trace_seq *s = &iter->seq;
@@ -1091,18 +1116,22 @@ static enum print_line_t trace_special_bin(struct trace_iterator *iter,
1091 return TRACE_TYPE_HANDLED; 1116 return TRACE_TYPE_HANDLED;
1092} 1117}
1093 1118
1094static struct trace_event trace_special_event = { 1119static struct trace_event_functions trace_special_funcs = {
1095 .type = TRACE_SPECIAL,
1096 .trace = trace_special_print, 1120 .trace = trace_special_print,
1097 .raw = trace_special_print, 1121 .raw = trace_special_print,
1098 .hex = trace_special_hex, 1122 .hex = trace_special_hex,
1099 .binary = trace_special_bin, 1123 .binary = trace_special_bin,
1100}; 1124};
1101 1125
1126static struct trace_event trace_special_event = {
1127 .type = TRACE_SPECIAL,
1128 .funcs = &trace_special_funcs,
1129};
1130
1102/* TRACE_STACK */ 1131/* TRACE_STACK */
1103 1132
1104static enum print_line_t trace_stack_print(struct trace_iterator *iter, 1133static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1105 int flags) 1134 int flags, struct trace_event *event)
1106{ 1135{
1107 struct stack_entry *field; 1136 struct stack_entry *field;
1108 struct trace_seq *s = &iter->seq; 1137 struct trace_seq *s = &iter->seq;
@@ -1130,17 +1159,21 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1130 return TRACE_TYPE_PARTIAL_LINE; 1159 return TRACE_TYPE_PARTIAL_LINE;
1131} 1160}
1132 1161
1133static struct trace_event trace_stack_event = { 1162static struct trace_event_functions trace_stack_funcs = {
1134 .type = TRACE_STACK,
1135 .trace = trace_stack_print, 1163 .trace = trace_stack_print,
1136 .raw = trace_special_print, 1164 .raw = trace_special_print,
1137 .hex = trace_special_hex, 1165 .hex = trace_special_hex,
1138 .binary = trace_special_bin, 1166 .binary = trace_special_bin,
1139}; 1167};
1140 1168
1169static struct trace_event trace_stack_event = {
1170 .type = TRACE_STACK,
1171 .funcs = &trace_stack_funcs,
1172};
1173
1141/* TRACE_USER_STACK */ 1174/* TRACE_USER_STACK */
1142static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, 1175static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1143 int flags) 1176 int flags, struct trace_event *event)
1144{ 1177{
1145 struct userstack_entry *field; 1178 struct userstack_entry *field;
1146 struct trace_seq *s = &iter->seq; 1179 struct trace_seq *s = &iter->seq;
@@ -1159,17 +1192,22 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1159 return TRACE_TYPE_PARTIAL_LINE; 1192 return TRACE_TYPE_PARTIAL_LINE;
1160} 1193}
1161 1194
1162static struct trace_event trace_user_stack_event = { 1195static struct trace_event_functions trace_user_stack_funcs = {
1163 .type = TRACE_USER_STACK,
1164 .trace = trace_user_stack_print, 1196 .trace = trace_user_stack_print,
1165 .raw = trace_special_print, 1197 .raw = trace_special_print,
1166 .hex = trace_special_hex, 1198 .hex = trace_special_hex,
1167 .binary = trace_special_bin, 1199 .binary = trace_special_bin,
1168}; 1200};
1169 1201
1202static struct trace_event trace_user_stack_event = {
1203 .type = TRACE_USER_STACK,
1204 .funcs = &trace_user_stack_funcs,
1205};
1206
1170/* TRACE_BPRINT */ 1207/* TRACE_BPRINT */
1171static enum print_line_t 1208static enum print_line_t
1172trace_bprint_print(struct trace_iterator *iter, int flags) 1209trace_bprint_print(struct trace_iterator *iter, int flags,
1210 struct trace_event *event)
1173{ 1211{
1174 struct trace_entry *entry = iter->ent; 1212 struct trace_entry *entry = iter->ent;
1175 struct trace_seq *s = &iter->seq; 1213 struct trace_seq *s = &iter->seq;
@@ -1194,7 +1232,8 @@ trace_bprint_print(struct trace_iterator *iter, int flags)
1194 1232
1195 1233
1196static enum print_line_t 1234static enum print_line_t
1197trace_bprint_raw(struct trace_iterator *iter, int flags) 1235trace_bprint_raw(struct trace_iterator *iter, int flags,
1236 struct trace_event *event)
1198{ 1237{
1199 struct bprint_entry *field; 1238 struct bprint_entry *field;
1200 struct trace_seq *s = &iter->seq; 1239 struct trace_seq *s = &iter->seq;
@@ -1213,16 +1252,19 @@ trace_bprint_raw(struct trace_iterator *iter, int flags)
1213 return TRACE_TYPE_PARTIAL_LINE; 1252 return TRACE_TYPE_PARTIAL_LINE;
1214} 1253}
1215 1254
1255static struct trace_event_functions trace_bprint_funcs = {
1256 .trace = trace_bprint_print,
1257 .raw = trace_bprint_raw,
1258};
1216 1259
1217static struct trace_event trace_bprint_event = { 1260static struct trace_event trace_bprint_event = {
1218 .type = TRACE_BPRINT, 1261 .type = TRACE_BPRINT,
1219 .trace = trace_bprint_print, 1262 .funcs = &trace_bprint_funcs,
1220 .raw = trace_bprint_raw,
1221}; 1263};
1222 1264
1223/* TRACE_PRINT */ 1265/* TRACE_PRINT */
1224static enum print_line_t trace_print_print(struct trace_iterator *iter, 1266static enum print_line_t trace_print_print(struct trace_iterator *iter,
1225 int flags) 1267 int flags, struct trace_event *event)
1226{ 1268{
1227 struct print_entry *field; 1269 struct print_entry *field;
1228 struct trace_seq *s = &iter->seq; 1270 struct trace_seq *s = &iter->seq;
@@ -1241,7 +1283,8 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
1241 return TRACE_TYPE_PARTIAL_LINE; 1283 return TRACE_TYPE_PARTIAL_LINE;
1242} 1284}
1243 1285
1244static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) 1286static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
1287 struct trace_event *event)
1245{ 1288{
1246 struct print_entry *field; 1289 struct print_entry *field;
1247 1290
@@ -1256,12 +1299,16 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
1256 return TRACE_TYPE_PARTIAL_LINE; 1299 return TRACE_TYPE_PARTIAL_LINE;
1257} 1300}
1258 1301
1259static struct trace_event trace_print_event = { 1302static struct trace_event_functions trace_print_funcs = {
1260 .type = TRACE_PRINT,
1261 .trace = trace_print_print, 1303 .trace = trace_print_print,
1262 .raw = trace_print_raw, 1304 .raw = trace_print_raw,
1263}; 1305};
1264 1306
1307static struct trace_event trace_print_event = {
1308 .type = TRACE_PRINT,
1309 .funcs = &trace_print_funcs,
1310};
1311
1265 1312
1266static struct trace_event *events[] __initdata = { 1313static struct trace_event *events[] __initdata = {
1267 &trace_fn_event, 1314 &trace_fn_event,
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 9d91c72ba38b..c038eba0492b 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -25,7 +25,7 @@ extern void trace_event_read_unlock(void);
25extern struct trace_event *ftrace_find_event(int type); 25extern struct trace_event *ftrace_find_event(int type);
26 26
27extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 27extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
28 int flags); 28 int flags, struct trace_event *event);
29extern int 29extern int
30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); 30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
31 31
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index a55fccfede5d..8f758d070c43 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -50,7 +50,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
50} 50}
51 51
52static void 52static void
53probe_sched_switch(struct task_struct *prev, struct task_struct *next) 53probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
54{ 54{
55 struct trace_array_cpu *data; 55 struct trace_array_cpu *data;
56 unsigned long flags; 56 unsigned long flags;
@@ -108,7 +108,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
108} 108}
109 109
110static void 110static void
111probe_sched_wakeup(struct task_struct *wakee, int success) 111probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
112{ 112{
113 struct trace_array_cpu *data; 113 struct trace_array_cpu *data;
114 unsigned long flags; 114 unsigned long flags;
@@ -138,21 +138,21 @@ static int tracing_sched_register(void)
138{ 138{
139 int ret; 139 int ret;
140 140
141 ret = register_trace_sched_wakeup(probe_sched_wakeup); 141 ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
142 if (ret) { 142 if (ret) {
143 pr_info("wakeup trace: Couldn't activate tracepoint" 143 pr_info("wakeup trace: Couldn't activate tracepoint"
144 " probe to kernel_sched_wakeup\n"); 144 " probe to kernel_sched_wakeup\n");
145 return ret; 145 return ret;
146 } 146 }
147 147
148 ret = register_trace_sched_wakeup_new(probe_sched_wakeup); 148 ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
149 if (ret) { 149 if (ret) {
150 pr_info("wakeup trace: Couldn't activate tracepoint" 150 pr_info("wakeup trace: Couldn't activate tracepoint"
151 " probe to kernel_sched_wakeup_new\n"); 151 " probe to kernel_sched_wakeup_new\n");
152 goto fail_deprobe; 152 goto fail_deprobe;
153 } 153 }
154 154
155 ret = register_trace_sched_switch(probe_sched_switch); 155 ret = register_trace_sched_switch(probe_sched_switch, NULL);
156 if (ret) { 156 if (ret) {
157 pr_info("sched trace: Couldn't activate tracepoint" 157 pr_info("sched trace: Couldn't activate tracepoint"
158 " probe to kernel_sched_switch\n"); 158 " probe to kernel_sched_switch\n");
@@ -161,17 +161,17 @@ static int tracing_sched_register(void)
161 161
162 return ret; 162 return ret;
163fail_deprobe_wake_new: 163fail_deprobe_wake_new:
164 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 164 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
165fail_deprobe: 165fail_deprobe:
166 unregister_trace_sched_wakeup(probe_sched_wakeup); 166 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
167 return ret; 167 return ret;
168} 168}
169 169
170static void tracing_sched_unregister(void) 170static void tracing_sched_unregister(void)
171{ 171{
172 unregister_trace_sched_switch(probe_sched_switch); 172 unregister_trace_sched_switch(probe_sched_switch, NULL);
173 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 173 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
174 unregister_trace_sched_wakeup(probe_sched_wakeup); 174 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
175} 175}
176 176
177static void tracing_start_sched_switch(void) 177static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 8052446ceeaa..0e73bc2ef8c5 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -98,7 +98,8 @@ static int report_latency(cycle_t delta)
98 return 1; 98 return 1;
99} 99}
100 100
101static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) 101static void
102probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
102{ 103{
103 if (task != wakeup_task) 104 if (task != wakeup_task)
104 return; 105 return;
@@ -107,7 +108,8 @@ static void probe_wakeup_migrate_task(struct task_struct *task, int cpu)
107} 108}
108 109
109static void notrace 110static void notrace
110probe_wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) 111probe_wakeup_sched_switch(void *ignore,
112 struct task_struct *prev, struct task_struct *next)
111{ 113{
112 struct trace_array_cpu *data; 114 struct trace_array_cpu *data;
113 cycle_t T0, T1, delta; 115 cycle_t T0, T1, delta;
@@ -199,7 +201,7 @@ static void wakeup_reset(struct trace_array *tr)
199} 201}
200 202
201static void 203static void
202probe_wakeup(struct task_struct *p, int success) 204probe_wakeup(void *ignore, struct task_struct *p, int success)
203{ 205{
204 struct trace_array_cpu *data; 206 struct trace_array_cpu *data;
205 int cpu = smp_processor_id(); 207 int cpu = smp_processor_id();
@@ -263,28 +265,28 @@ static void start_wakeup_tracer(struct trace_array *tr)
263{ 265{
264 int ret; 266 int ret;
265 267
266 ret = register_trace_sched_wakeup(probe_wakeup); 268 ret = register_trace_sched_wakeup(probe_wakeup, NULL);
267 if (ret) { 269 if (ret) {
268 pr_info("wakeup trace: Couldn't activate tracepoint" 270 pr_info("wakeup trace: Couldn't activate tracepoint"
269 " probe to kernel_sched_wakeup\n"); 271 " probe to kernel_sched_wakeup\n");
270 return; 272 return;
271 } 273 }
272 274
273 ret = register_trace_sched_wakeup_new(probe_wakeup); 275 ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
274 if (ret) { 276 if (ret) {
275 pr_info("wakeup trace: Couldn't activate tracepoint" 277 pr_info("wakeup trace: Couldn't activate tracepoint"
276 " probe to kernel_sched_wakeup_new\n"); 278 " probe to kernel_sched_wakeup_new\n");
277 goto fail_deprobe; 279 goto fail_deprobe;
278 } 280 }
279 281
280 ret = register_trace_sched_switch(probe_wakeup_sched_switch); 282 ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
281 if (ret) { 283 if (ret) {
282 pr_info("sched trace: Couldn't activate tracepoint" 284 pr_info("sched trace: Couldn't activate tracepoint"
283 " probe to kernel_sched_switch\n"); 285 " probe to kernel_sched_switch\n");
284 goto fail_deprobe_wake_new; 286 goto fail_deprobe_wake_new;
285 } 287 }
286 288
287 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task); 289 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
288 if (ret) { 290 if (ret) {
289 pr_info("wakeup trace: Couldn't activate tracepoint" 291 pr_info("wakeup trace: Couldn't activate tracepoint"
290 " probe to kernel_sched_migrate_task\n"); 292 " probe to kernel_sched_migrate_task\n");
@@ -311,19 +313,19 @@ static void start_wakeup_tracer(struct trace_array *tr)
311 313
312 return; 314 return;
313fail_deprobe_wake_new: 315fail_deprobe_wake_new:
314 unregister_trace_sched_wakeup_new(probe_wakeup); 316 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
315fail_deprobe: 317fail_deprobe:
316 unregister_trace_sched_wakeup(probe_wakeup); 318 unregister_trace_sched_wakeup(probe_wakeup, NULL);
317} 319}
318 320
319static void stop_wakeup_tracer(struct trace_array *tr) 321static void stop_wakeup_tracer(struct trace_array *tr)
320{ 322{
321 tracer_enabled = 0; 323 tracer_enabled = 0;
322 unregister_ftrace_function(&trace_ops); 324 unregister_ftrace_function(&trace_ops);
323 unregister_trace_sched_switch(probe_wakeup_sched_switch); 325 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
324 unregister_trace_sched_wakeup_new(probe_wakeup); 326 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
325 unregister_trace_sched_wakeup(probe_wakeup); 327 unregister_trace_sched_wakeup(probe_wakeup, NULL);
326 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task); 328 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
327} 329}
328 330
329static int __wakeup_tracer_init(struct trace_array *tr) 331static int __wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 4d6d711717f2..34e35804304b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -15,6 +15,54 @@ static int sys_refcount_exit;
15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17 17
18static int syscall_enter_register(struct ftrace_event_call *event,
19 enum trace_reg type);
20static int syscall_exit_register(struct ftrace_event_call *event,
21 enum trace_reg type);
22
23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25
26static struct list_head *
27syscall_get_enter_fields(struct ftrace_event_call *call)
28{
29 struct syscall_metadata *entry = call->data;
30
31 return &entry->enter_fields;
32}
33
34static struct list_head *
35syscall_get_exit_fields(struct ftrace_event_call *call)
36{
37 struct syscall_metadata *entry = call->data;
38
39 return &entry->exit_fields;
40}
41
42struct trace_event_functions enter_syscall_print_funcs = {
43 .trace = print_syscall_enter,
44};
45
46struct trace_event_functions exit_syscall_print_funcs = {
47 .trace = print_syscall_exit,
48};
49
50struct ftrace_event_class event_class_syscall_enter = {
51 .system = "syscalls",
52 .reg = syscall_enter_register,
53 .define_fields = syscall_enter_define_fields,
54 .get_fields = syscall_get_enter_fields,
55 .raw_init = init_syscall_trace,
56};
57
58struct ftrace_event_class event_class_syscall_exit = {
59 .system = "syscalls",
60 .reg = syscall_exit_register,
61 .define_fields = syscall_exit_define_fields,
62 .get_fields = syscall_get_exit_fields,
63 .raw_init = init_syscall_trace,
64};
65
18extern unsigned long __start_syscalls_metadata[]; 66extern unsigned long __start_syscalls_metadata[];
19extern unsigned long __stop_syscalls_metadata[]; 67extern unsigned long __stop_syscalls_metadata[];
20 68
@@ -53,7 +101,8 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
53} 101}
54 102
55enum print_line_t 103enum print_line_t
56print_syscall_enter(struct trace_iterator *iter, int flags) 104print_syscall_enter(struct trace_iterator *iter, int flags,
105 struct trace_event *event)
57{ 106{
58 struct trace_seq *s = &iter->seq; 107 struct trace_seq *s = &iter->seq;
59 struct trace_entry *ent = iter->ent; 108 struct trace_entry *ent = iter->ent;
@@ -68,7 +117,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
68 if (!entry) 117 if (!entry)
69 goto end; 118 goto end;
70 119
71 if (entry->enter_event->id != ent->type) { 120 if (entry->enter_event->event.type != ent->type) {
72 WARN_ON_ONCE(1); 121 WARN_ON_ONCE(1);
73 goto end; 122 goto end;
74 } 123 }
@@ -105,7 +154,8 @@ end:
105} 154}
106 155
107enum print_line_t 156enum print_line_t
108print_syscall_exit(struct trace_iterator *iter, int flags) 157print_syscall_exit(struct trace_iterator *iter, int flags,
158 struct trace_event *event)
109{ 159{
110 struct trace_seq *s = &iter->seq; 160 struct trace_seq *s = &iter->seq;
111 struct trace_entry *ent = iter->ent; 161 struct trace_entry *ent = iter->ent;
@@ -123,7 +173,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
123 return TRACE_TYPE_HANDLED; 173 return TRACE_TYPE_HANDLED;
124 } 174 }
125 175
126 if (entry->exit_event->id != ent->type) { 176 if (entry->exit_event->event.type != ent->type) {
127 WARN_ON_ONCE(1); 177 WARN_ON_ONCE(1);
128 return TRACE_TYPE_UNHANDLED; 178 return TRACE_TYPE_UNHANDLED;
129 } 179 }
@@ -205,7 +255,7 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call)
205 kfree(call->print_fmt); 255 kfree(call->print_fmt);
206} 256}
207 257
208int syscall_enter_define_fields(struct ftrace_event_call *call) 258static int syscall_enter_define_fields(struct ftrace_event_call *call)
209{ 259{
210 struct syscall_trace_enter trace; 260 struct syscall_trace_enter trace;
211 struct syscall_metadata *meta = call->data; 261 struct syscall_metadata *meta = call->data;
@@ -228,7 +278,7 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
228 return ret; 278 return ret;
229} 279}
230 280
231int syscall_exit_define_fields(struct ftrace_event_call *call) 281static int syscall_exit_define_fields(struct ftrace_event_call *call)
232{ 282{
233 struct syscall_trace_exit trace; 283 struct syscall_trace_exit trace;
234 int ret; 284 int ret;
@@ -243,7 +293,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
243 return ret; 293 return ret;
244} 294}
245 295
246void ftrace_syscall_enter(struct pt_regs *regs, long id) 296void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
247{ 297{
248 struct syscall_trace_enter *entry; 298 struct syscall_trace_enter *entry;
249 struct syscall_metadata *sys_data; 299 struct syscall_metadata *sys_data;
@@ -265,7 +315,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
265 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 315 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
266 316
267 event = trace_current_buffer_lock_reserve(&buffer, 317 event = trace_current_buffer_lock_reserve(&buffer,
268 sys_data->enter_event->id, size, 0, 0); 318 sys_data->enter_event->event.type, size, 0, 0);
269 if (!event) 319 if (!event)
270 return; 320 return;
271 321
@@ -278,7 +328,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
278 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 328 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
279} 329}
280 330
281void ftrace_syscall_exit(struct pt_regs *regs, long ret) 331void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
282{ 332{
283 struct syscall_trace_exit *entry; 333 struct syscall_trace_exit *entry;
284 struct syscall_metadata *sys_data; 334 struct syscall_metadata *sys_data;
@@ -297,7 +347,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
297 return; 347 return;
298 348
299 event = trace_current_buffer_lock_reserve(&buffer, 349 event = trace_current_buffer_lock_reserve(&buffer,
300 sys_data->exit_event->id, sizeof(*entry), 0, 0); 350 sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
301 if (!event) 351 if (!event)
302 return; 352 return;
303 353
@@ -320,7 +370,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
320 return -ENOSYS; 370 return -ENOSYS;
321 mutex_lock(&syscall_trace_lock); 371 mutex_lock(&syscall_trace_lock);
322 if (!sys_refcount_enter) 372 if (!sys_refcount_enter)
323 ret = register_trace_sys_enter(ftrace_syscall_enter); 373 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
324 if (!ret) { 374 if (!ret) {
325 set_bit(num, enabled_enter_syscalls); 375 set_bit(num, enabled_enter_syscalls);
326 sys_refcount_enter++; 376 sys_refcount_enter++;
@@ -340,7 +390,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
340 sys_refcount_enter--; 390 sys_refcount_enter--;
341 clear_bit(num, enabled_enter_syscalls); 391 clear_bit(num, enabled_enter_syscalls);
342 if (!sys_refcount_enter) 392 if (!sys_refcount_enter)
343 unregister_trace_sys_enter(ftrace_syscall_enter); 393 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
344 mutex_unlock(&syscall_trace_lock); 394 mutex_unlock(&syscall_trace_lock);
345} 395}
346 396
@@ -354,7 +404,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
354 return -ENOSYS; 404 return -ENOSYS;
355 mutex_lock(&syscall_trace_lock); 405 mutex_lock(&syscall_trace_lock);
356 if (!sys_refcount_exit) 406 if (!sys_refcount_exit)
357 ret = register_trace_sys_exit(ftrace_syscall_exit); 407 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
358 if (!ret) { 408 if (!ret) {
359 set_bit(num, enabled_exit_syscalls); 409 set_bit(num, enabled_exit_syscalls);
360 sys_refcount_exit++; 410 sys_refcount_exit++;
@@ -374,7 +424,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
374 sys_refcount_exit--; 424 sys_refcount_exit--;
375 clear_bit(num, enabled_exit_syscalls); 425 clear_bit(num, enabled_exit_syscalls);
376 if (!sys_refcount_exit) 426 if (!sys_refcount_exit)
377 unregister_trace_sys_exit(ftrace_syscall_exit); 427 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
378 mutex_unlock(&syscall_trace_lock); 428 mutex_unlock(&syscall_trace_lock);
379} 429}
380 430
@@ -434,11 +484,11 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
434static int sys_perf_refcount_enter; 484static int sys_perf_refcount_enter;
435static int sys_perf_refcount_exit; 485static int sys_perf_refcount_exit;
436 486
437static void perf_syscall_enter(struct pt_regs *regs, long id) 487static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
438{ 488{
439 struct syscall_metadata *sys_data; 489 struct syscall_metadata *sys_data;
440 struct syscall_trace_enter *rec; 490 struct syscall_trace_enter *rec;
441 unsigned long flags; 491 struct hlist_head *head;
442 int syscall_nr; 492 int syscall_nr;
443 int rctx; 493 int rctx;
444 int size; 494 int size;
@@ -461,14 +511,16 @@ static void perf_syscall_enter(struct pt_regs *regs, long id)
461 return; 511 return;
462 512
463 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 513 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
464 sys_data->enter_event->id, &rctx, &flags); 514 sys_data->enter_event->event.type, regs, &rctx);
465 if (!rec) 515 if (!rec)
466 return; 516 return;
467 517
468 rec->nr = syscall_nr; 518 rec->nr = syscall_nr;
469 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 519 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
470 (unsigned long *)&rec->args); 520 (unsigned long *)&rec->args);
471 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); 521
522 head = this_cpu_ptr(sys_data->enter_event->perf_events);
523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
472} 524}
473 525
474int perf_sysenter_enable(struct ftrace_event_call *call) 526int perf_sysenter_enable(struct ftrace_event_call *call)
@@ -480,7 +532,7 @@ int perf_sysenter_enable(struct ftrace_event_call *call)
480 532
481 mutex_lock(&syscall_trace_lock); 533 mutex_lock(&syscall_trace_lock);
482 if (!sys_perf_refcount_enter) 534 if (!sys_perf_refcount_enter)
483 ret = register_trace_sys_enter(perf_syscall_enter); 535 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
484 if (ret) { 536 if (ret) {
485 pr_info("event trace: Could not activate" 537 pr_info("event trace: Could not activate"
486 "syscall entry trace point"); 538 "syscall entry trace point");
@@ -502,15 +554,15 @@ void perf_sysenter_disable(struct ftrace_event_call *call)
502 sys_perf_refcount_enter--; 554 sys_perf_refcount_enter--;
503 clear_bit(num, enabled_perf_enter_syscalls); 555 clear_bit(num, enabled_perf_enter_syscalls);
504 if (!sys_perf_refcount_enter) 556 if (!sys_perf_refcount_enter)
505 unregister_trace_sys_enter(perf_syscall_enter); 557 unregister_trace_sys_enter(perf_syscall_enter, NULL);
506 mutex_unlock(&syscall_trace_lock); 558 mutex_unlock(&syscall_trace_lock);
507} 559}
508 560
509static void perf_syscall_exit(struct pt_regs *regs, long ret) 561static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
510{ 562{
511 struct syscall_metadata *sys_data; 563 struct syscall_metadata *sys_data;
512 struct syscall_trace_exit *rec; 564 struct syscall_trace_exit *rec;
513 unsigned long flags; 565 struct hlist_head *head;
514 int syscall_nr; 566 int syscall_nr;
515 int rctx; 567 int rctx;
516 int size; 568 int size;
@@ -536,14 +588,15 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret)
536 return; 588 return;
537 589
538 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 590 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
539 sys_data->exit_event->id, &rctx, &flags); 591 sys_data->exit_event->event.type, regs, &rctx);
540 if (!rec) 592 if (!rec)
541 return; 593 return;
542 594
543 rec->nr = syscall_nr; 595 rec->nr = syscall_nr;
544 rec->ret = syscall_get_return_value(current, regs); 596 rec->ret = syscall_get_return_value(current, regs);
545 597
546 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); 598 head = this_cpu_ptr(sys_data->exit_event->perf_events);
599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
547} 600}
548 601
549int perf_sysexit_enable(struct ftrace_event_call *call) 602int perf_sysexit_enable(struct ftrace_event_call *call)
@@ -555,7 +608,7 @@ int perf_sysexit_enable(struct ftrace_event_call *call)
555 608
556 mutex_lock(&syscall_trace_lock); 609 mutex_lock(&syscall_trace_lock);
557 if (!sys_perf_refcount_exit) 610 if (!sys_perf_refcount_exit)
558 ret = register_trace_sys_exit(perf_syscall_exit); 611 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
559 if (ret) { 612 if (ret) {
560 pr_info("event trace: Could not activate" 613 pr_info("event trace: Could not activate"
561 "syscall exit trace point"); 614 "syscall exit trace point");
@@ -577,9 +630,50 @@ void perf_sysexit_disable(struct ftrace_event_call *call)
577 sys_perf_refcount_exit--; 630 sys_perf_refcount_exit--;
578 clear_bit(num, enabled_perf_exit_syscalls); 631 clear_bit(num, enabled_perf_exit_syscalls);
579 if (!sys_perf_refcount_exit) 632 if (!sys_perf_refcount_exit)
580 unregister_trace_sys_exit(perf_syscall_exit); 633 unregister_trace_sys_exit(perf_syscall_exit, NULL);
581 mutex_unlock(&syscall_trace_lock); 634 mutex_unlock(&syscall_trace_lock);
582} 635}
583 636
584#endif /* CONFIG_PERF_EVENTS */ 637#endif /* CONFIG_PERF_EVENTS */
585 638
639static int syscall_enter_register(struct ftrace_event_call *event,
640 enum trace_reg type)
641{
642 switch (type) {
643 case TRACE_REG_REGISTER:
644 return reg_event_syscall_enter(event);
645 case TRACE_REG_UNREGISTER:
646 unreg_event_syscall_enter(event);
647 return 0;
648
649#ifdef CONFIG_PERF_EVENTS
650 case TRACE_REG_PERF_REGISTER:
651 return perf_sysenter_enable(event);
652 case TRACE_REG_PERF_UNREGISTER:
653 perf_sysenter_disable(event);
654 return 0;
655#endif
656 }
657 return 0;
658}
659
660static int syscall_exit_register(struct ftrace_event_call *event,
661 enum trace_reg type)
662{
663 switch (type) {
664 case TRACE_REG_REGISTER:
665 return reg_event_syscall_exit(event);
666 case TRACE_REG_UNREGISTER:
667 unreg_event_syscall_exit(event);
668 return 0;
669
670#ifdef CONFIG_PERF_EVENTS
671 case TRACE_REG_PERF_REGISTER:
672 return perf_sysexit_enable(event);
673 case TRACE_REG_PERF_UNREGISTER:
674 perf_sysexit_disable(event);
675 return 0;
676#endif
677 }
678 return 0;
679}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index cc2d2faa7d9e..a7cc3793baf6 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -49,7 +49,8 @@ static void cpu_workqueue_stat_free(struct kref *kref)
49 49
50/* Insertion of a work */ 50/* Insertion of a work */
51static void 51static void
52probe_workqueue_insertion(struct task_struct *wq_thread, 52probe_workqueue_insertion(void *ignore,
53 struct task_struct *wq_thread,
53 struct work_struct *work) 54 struct work_struct *work)
54{ 55{
55 int cpu = cpumask_first(&wq_thread->cpus_allowed); 56 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -70,7 +71,8 @@ found:
70 71
71/* Execution of a work */ 72/* Execution of a work */
72static void 73static void
73probe_workqueue_execution(struct task_struct *wq_thread, 74probe_workqueue_execution(void *ignore,
75 struct task_struct *wq_thread,
74 struct work_struct *work) 76 struct work_struct *work)
75{ 77{
76 int cpu = cpumask_first(&wq_thread->cpus_allowed); 78 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -90,7 +92,8 @@ found:
90} 92}
91 93
92/* Creation of a cpu workqueue thread */ 94/* Creation of a cpu workqueue thread */
93static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) 95static void probe_workqueue_creation(void *ignore,
96 struct task_struct *wq_thread, int cpu)
94{ 97{
95 struct cpu_workqueue_stats *cws; 98 struct cpu_workqueue_stats *cws;
96 unsigned long flags; 99 unsigned long flags;
@@ -114,7 +117,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
114} 117}
115 118
116/* Destruction of a cpu workqueue thread */ 119/* Destruction of a cpu workqueue thread */
117static void probe_workqueue_destruction(struct task_struct *wq_thread) 120static void
121probe_workqueue_destruction(void *ignore, struct task_struct *wq_thread)
118{ 122{
119 /* Workqueue only execute on one cpu */ 123 /* Workqueue only execute on one cpu */
120 int cpu = cpumask_first(&wq_thread->cpus_allowed); 124 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -259,19 +263,19 @@ int __init trace_workqueue_early_init(void)
259{ 263{
260 int ret, cpu; 264 int ret, cpu;
261 265
262 ret = register_trace_workqueue_insertion(probe_workqueue_insertion); 266 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
263 if (ret) 267 if (ret)
264 goto out; 268 goto out;
265 269
266 ret = register_trace_workqueue_execution(probe_workqueue_execution); 270 ret = register_trace_workqueue_execution(probe_workqueue_execution, NULL);
267 if (ret) 271 if (ret)
268 goto no_insertion; 272 goto no_insertion;
269 273
270 ret = register_trace_workqueue_creation(probe_workqueue_creation); 274 ret = register_trace_workqueue_creation(probe_workqueue_creation, NULL);
271 if (ret) 275 if (ret)
272 goto no_execution; 276 goto no_execution;
273 277
274 ret = register_trace_workqueue_destruction(probe_workqueue_destruction); 278 ret = register_trace_workqueue_destruction(probe_workqueue_destruction, NULL);
275 if (ret) 279 if (ret)
276 goto no_creation; 280 goto no_creation;
277 281
@@ -283,11 +287,11 @@ int __init trace_workqueue_early_init(void)
283 return 0; 287 return 0;
284 288
285no_creation: 289no_creation:
286 unregister_trace_workqueue_creation(probe_workqueue_creation); 290 unregister_trace_workqueue_creation(probe_workqueue_creation, NULL);
287no_execution: 291no_execution:
288 unregister_trace_workqueue_execution(probe_workqueue_execution); 292 unregister_trace_workqueue_execution(probe_workqueue_execution, NULL);
289no_insertion: 293no_insertion:
290 unregister_trace_workqueue_insertion(probe_workqueue_insertion); 294 unregister_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
291out: 295out:
292 pr_warning("trace_workqueue: unable to trace workqueues\n"); 296 pr_warning("trace_workqueue: unable to trace workqueues\n");
293 297
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index cc89be5bc0f8..c77f3eceea25 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -54,7 +54,7 @@ static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
54 */ 54 */
55struct tracepoint_entry { 55struct tracepoint_entry {
56 struct hlist_node hlist; 56 struct hlist_node hlist;
57 void **funcs; 57 struct tracepoint_func *funcs;
58 int refcount; /* Number of times armed. 0 if disarmed. */ 58 int refcount; /* Number of times armed. 0 if disarmed. */
59 char name[0]; 59 char name[0];
60}; 60};
@@ -64,12 +64,12 @@ struct tp_probes {
64 struct rcu_head rcu; 64 struct rcu_head rcu;
65 struct list_head list; 65 struct list_head list;
66 } u; 66 } u;
67 void *probes[0]; 67 struct tracepoint_func probes[0];
68}; 68};
69 69
70static inline void *allocate_probes(int count) 70static inline void *allocate_probes(int count)
71{ 71{
72 struct tp_probes *p = kmalloc(count * sizeof(void *) 72 struct tp_probes *p = kmalloc(count * sizeof(struct tracepoint_func)
73 + sizeof(struct tp_probes), GFP_KERNEL); 73 + sizeof(struct tp_probes), GFP_KERNEL);
74 return p == NULL ? NULL : p->probes; 74 return p == NULL ? NULL : p->probes;
75} 75}
@@ -79,7 +79,7 @@ static void rcu_free_old_probes(struct rcu_head *head)
79 kfree(container_of(head, struct tp_probes, u.rcu)); 79 kfree(container_of(head, struct tp_probes, u.rcu));
80} 80}
81 81
82static inline void release_probes(void *old) 82static inline void release_probes(struct tracepoint_func *old)
83{ 83{
84 if (old) { 84 if (old) {
85 struct tp_probes *tp_probes = container_of(old, 85 struct tp_probes *tp_probes = container_of(old,
@@ -95,15 +95,16 @@ static void debug_print_probes(struct tracepoint_entry *entry)
95 if (!tracepoint_debug || !entry->funcs) 95 if (!tracepoint_debug || !entry->funcs)
96 return; 96 return;
97 97
98 for (i = 0; entry->funcs[i]; i++) 98 for (i = 0; entry->funcs[i].func; i++)
99 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); 99 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func);
100} 100}
101 101
102static void * 102static struct tracepoint_func *
103tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) 103tracepoint_entry_add_probe(struct tracepoint_entry *entry,
104 void *probe, void *data)
104{ 105{
105 int nr_probes = 0; 106 int nr_probes = 0;
106 void **old, **new; 107 struct tracepoint_func *old, *new;
107 108
108 WARN_ON(!probe); 109 WARN_ON(!probe);
109 110
@@ -111,8 +112,9 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
111 old = entry->funcs; 112 old = entry->funcs;
112 if (old) { 113 if (old) {
113 /* (N -> N+1), (N != 0, 1) probes */ 114 /* (N -> N+1), (N != 0, 1) probes */
114 for (nr_probes = 0; old[nr_probes]; nr_probes++) 115 for (nr_probes = 0; old[nr_probes].func; nr_probes++)
115 if (old[nr_probes] == probe) 116 if (old[nr_probes].func == probe &&
117 old[nr_probes].data == data)
116 return ERR_PTR(-EEXIST); 118 return ERR_PTR(-EEXIST);
117 } 119 }
118 /* + 2 : one for new probe, one for NULL func */ 120 /* + 2 : one for new probe, one for NULL func */
@@ -120,9 +122,10 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
120 if (new == NULL) 122 if (new == NULL)
121 return ERR_PTR(-ENOMEM); 123 return ERR_PTR(-ENOMEM);
122 if (old) 124 if (old)
123 memcpy(new, old, nr_probes * sizeof(void *)); 125 memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
124 new[nr_probes] = probe; 126 new[nr_probes].func = probe;
125 new[nr_probes + 1] = NULL; 127 new[nr_probes].data = data;
128 new[nr_probes + 1].func = NULL;
126 entry->refcount = nr_probes + 1; 129 entry->refcount = nr_probes + 1;
127 entry->funcs = new; 130 entry->funcs = new;
128 debug_print_probes(entry); 131 debug_print_probes(entry);
@@ -130,10 +133,11 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
130} 133}
131 134
132static void * 135static void *
133tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) 136tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
137 void *probe, void *data)
134{ 138{
135 int nr_probes = 0, nr_del = 0, i; 139 int nr_probes = 0, nr_del = 0, i;
136 void **old, **new; 140 struct tracepoint_func *old, *new;
137 141
138 old = entry->funcs; 142 old = entry->funcs;
139 143
@@ -142,8 +146,10 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
142 146
143 debug_print_probes(entry); 147 debug_print_probes(entry);
144 /* (N -> M), (N > 1, M >= 0) probes */ 148 /* (N -> M), (N > 1, M >= 0) probes */
145 for (nr_probes = 0; old[nr_probes]; nr_probes++) { 149 for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
146 if ((!probe || old[nr_probes] == probe)) 150 if (!probe ||
151 (old[nr_probes].func == probe &&
152 old[nr_probes].data == data))
147 nr_del++; 153 nr_del++;
148 } 154 }
149 155
@@ -160,10 +166,11 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
160 new = allocate_probes(nr_probes - nr_del + 1); 166 new = allocate_probes(nr_probes - nr_del + 1);
161 if (new == NULL) 167 if (new == NULL)
162 return ERR_PTR(-ENOMEM); 168 return ERR_PTR(-ENOMEM);
163 for (i = 0; old[i]; i++) 169 for (i = 0; old[i].func; i++)
164 if ((probe && old[i] != probe)) 170 if (probe &&
171 (old[i].func != probe || old[i].data != data))
165 new[j++] = old[i]; 172 new[j++] = old[i];
166 new[nr_probes - nr_del] = NULL; 173 new[nr_probes - nr_del].func = NULL;
167 entry->refcount = nr_probes - nr_del; 174 entry->refcount = nr_probes - nr_del;
168 entry->funcs = new; 175 entry->funcs = new;
169 } 176 }
@@ -315,18 +322,19 @@ static void tracepoint_update_probes(void)
315 module_update_tracepoints(); 322 module_update_tracepoints();
316} 323}
317 324
318static void *tracepoint_add_probe(const char *name, void *probe) 325static struct tracepoint_func *
326tracepoint_add_probe(const char *name, void *probe, void *data)
319{ 327{
320 struct tracepoint_entry *entry; 328 struct tracepoint_entry *entry;
321 void *old; 329 struct tracepoint_func *old;
322 330
323 entry = get_tracepoint(name); 331 entry = get_tracepoint(name);
324 if (!entry) { 332 if (!entry) {
325 entry = add_tracepoint(name); 333 entry = add_tracepoint(name);
326 if (IS_ERR(entry)) 334 if (IS_ERR(entry))
327 return entry; 335 return (struct tracepoint_func *)entry;
328 } 336 }
329 old = tracepoint_entry_add_probe(entry, probe); 337 old = tracepoint_entry_add_probe(entry, probe, data);
330 if (IS_ERR(old) && !entry->refcount) 338 if (IS_ERR(old) && !entry->refcount)
331 remove_tracepoint(entry); 339 remove_tracepoint(entry);
332 return old; 340 return old;
@@ -340,12 +348,12 @@ static void *tracepoint_add_probe(const char *name, void *probe)
340 * Returns 0 if ok, error value on error. 348 * Returns 0 if ok, error value on error.
341 * The probe address must at least be aligned on the architecture pointer size. 349 * The probe address must at least be aligned on the architecture pointer size.
342 */ 350 */
343int tracepoint_probe_register(const char *name, void *probe) 351int tracepoint_probe_register(const char *name, void *probe, void *data)
344{ 352{
345 void *old; 353 struct tracepoint_func *old;
346 354
347 mutex_lock(&tracepoints_mutex); 355 mutex_lock(&tracepoints_mutex);
348 old = tracepoint_add_probe(name, probe); 356 old = tracepoint_add_probe(name, probe, data);
349 mutex_unlock(&tracepoints_mutex); 357 mutex_unlock(&tracepoints_mutex);
350 if (IS_ERR(old)) 358 if (IS_ERR(old))
351 return PTR_ERR(old); 359 return PTR_ERR(old);
@@ -356,15 +364,16 @@ int tracepoint_probe_register(const char *name, void *probe)
356} 364}
357EXPORT_SYMBOL_GPL(tracepoint_probe_register); 365EXPORT_SYMBOL_GPL(tracepoint_probe_register);
358 366
359static void *tracepoint_remove_probe(const char *name, void *probe) 367static struct tracepoint_func *
368tracepoint_remove_probe(const char *name, void *probe, void *data)
360{ 369{
361 struct tracepoint_entry *entry; 370 struct tracepoint_entry *entry;
362 void *old; 371 struct tracepoint_func *old;
363 372
364 entry = get_tracepoint(name); 373 entry = get_tracepoint(name);
365 if (!entry) 374 if (!entry)
366 return ERR_PTR(-ENOENT); 375 return ERR_PTR(-ENOENT);
367 old = tracepoint_entry_remove_probe(entry, probe); 376 old = tracepoint_entry_remove_probe(entry, probe, data);
368 if (IS_ERR(old)) 377 if (IS_ERR(old))
369 return old; 378 return old;
370 if (!entry->refcount) 379 if (!entry->refcount)
@@ -382,12 +391,12 @@ static void *tracepoint_remove_probe(const char *name, void *probe)
382 * itself uses stop_machine(), which insures that every preempt disabled section 391 * itself uses stop_machine(), which insures that every preempt disabled section
383 * have finished. 392 * have finished.
384 */ 393 */
385int tracepoint_probe_unregister(const char *name, void *probe) 394int tracepoint_probe_unregister(const char *name, void *probe, void *data)
386{ 395{
387 void *old; 396 struct tracepoint_func *old;
388 397
389 mutex_lock(&tracepoints_mutex); 398 mutex_lock(&tracepoints_mutex);
390 old = tracepoint_remove_probe(name, probe); 399 old = tracepoint_remove_probe(name, probe, data);
391 mutex_unlock(&tracepoints_mutex); 400 mutex_unlock(&tracepoints_mutex);
392 if (IS_ERR(old)) 401 if (IS_ERR(old))
393 return PTR_ERR(old); 402 return PTR_ERR(old);
@@ -418,12 +427,13 @@ static void tracepoint_add_old_probes(void *old)
418 * 427 *
419 * caller must call tracepoint_probe_update_all() 428 * caller must call tracepoint_probe_update_all()
420 */ 429 */
421int tracepoint_probe_register_noupdate(const char *name, void *probe) 430int tracepoint_probe_register_noupdate(const char *name, void *probe,
431 void *data)
422{ 432{
423 void *old; 433 struct tracepoint_func *old;
424 434
425 mutex_lock(&tracepoints_mutex); 435 mutex_lock(&tracepoints_mutex);
426 old = tracepoint_add_probe(name, probe); 436 old = tracepoint_add_probe(name, probe, data);
427 if (IS_ERR(old)) { 437 if (IS_ERR(old)) {
428 mutex_unlock(&tracepoints_mutex); 438 mutex_unlock(&tracepoints_mutex);
429 return PTR_ERR(old); 439 return PTR_ERR(old);
@@ -441,12 +451,13 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
441 * 451 *
442 * caller must call tracepoint_probe_update_all() 452 * caller must call tracepoint_probe_update_all()
443 */ 453 */
444int tracepoint_probe_unregister_noupdate(const char *name, void *probe) 454int tracepoint_probe_unregister_noupdate(const char *name, void *probe,
455 void *data)
445{ 456{
446 void *old; 457 struct tracepoint_func *old;
447 458
448 mutex_lock(&tracepoints_mutex); 459 mutex_lock(&tracepoints_mutex);
449 old = tracepoint_remove_probe(name, probe); 460 old = tracepoint_remove_probe(name, probe, data);
450 if (IS_ERR(old)) { 461 if (IS_ERR(old)) {
451 mutex_unlock(&tracepoints_mutex); 462 mutex_unlock(&tracepoints_mutex);
452 return PTR_ERR(old); 463 return PTR_ERR(old);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 77dabbf64b8f..327d2deb4451 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1110,7 +1110,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
1110 unsigned int cpu = (unsigned long)hcpu; 1110 unsigned int cpu = (unsigned long)hcpu;
1111 struct cpu_workqueue_struct *cwq; 1111 struct cpu_workqueue_struct *cwq;
1112 struct workqueue_struct *wq; 1112 struct workqueue_struct *wq;
1113 int ret = NOTIFY_OK; 1113 int err = 0;
1114 1114
1115 action &= ~CPU_TASKS_FROZEN; 1115 action &= ~CPU_TASKS_FROZEN;
1116 1116
@@ -1124,12 +1124,13 @@ undo:
1124 1124
1125 switch (action) { 1125 switch (action) {
1126 case CPU_UP_PREPARE: 1126 case CPU_UP_PREPARE:
1127 if (!create_workqueue_thread(cwq, cpu)) 1127 err = create_workqueue_thread(cwq, cpu);
1128 if (!err)
1128 break; 1129 break;
1129 printk(KERN_ERR "workqueue [%s] for %i failed\n", 1130 printk(KERN_ERR "workqueue [%s] for %i failed\n",
1130 wq->name, cpu); 1131 wq->name, cpu);
1131 action = CPU_UP_CANCELED; 1132 action = CPU_UP_CANCELED;
1132 ret = NOTIFY_BAD; 1133 err = -ENOMEM;
1133 goto undo; 1134 goto undo;
1134 1135
1135 case CPU_ONLINE: 1136 case CPU_ONLINE:
@@ -1150,7 +1151,7 @@ undo:
1150 cpumask_clear_cpu(cpu, cpu_populated_map); 1151 cpumask_clear_cpu(cpu, cpu_populated_map);
1151 } 1152 }
1152 1153
1153 return ret; 1154 return notifier_from_errno(err);
1154} 1155}
1155 1156
1156#ifdef CONFIG_SMP 1157#ifdef CONFIG_SMP