aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/cgroup.c35
-rw-r--r--kernel/cpuset.c20
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/freezer.c2
-rw-r--r--kernel/mutex.c4
-rw-r--r--kernel/power/process.c11
-rw-r--r--kernel/power/qos.c20
-rw-r--r--kernel/printk/Makefile2
-rw-r--r--kernel/printk/braille.c49
-rw-r--r--kernel/printk/braille.h48
-rw-r--r--kernel/printk/console_cmdline.h14
-rw-r--r--kernel/printk/printk.c (renamed from kernel/printk.c)183
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/sched/core.c96
-rw-r--r--kernel/sched/cpupri.c4
-rw-r--r--kernel/sched/fair.c14
-rw-r--r--kernel/sysctl.c6
-rw-r--r--kernel/time/sched_clock.c2
-rw-r--r--kernel/time/tick-sched.c14
-rw-r--r--kernel/trace/ftrace.c105
-rw-r--r--kernel/trace/ring_buffer.c26
-rw-r--r--kernel/trace/trace.c254
-rw-r--r--kernel/trace/trace.h18
-rw-r--r--kernel/trace/trace_event_perf.c10
-rw-r--r--kernel/trace/trace_events.c292
-rw-r--r--kernel/trace/trace_events_filter.c21
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_functions_graph.c54
-rw-r--r--kernel/trace/trace_kprobe.c50
-rw-r--r--kernel/trace/trace_mmiotrace.c8
-rw-r--r--kernel/trace/trace_output.c14
-rw-r--r--kernel/trace/trace_syscalls.c26
-rw-r--r--kernel/trace/trace_uprobe.c53
-rw-r--r--kernel/user_namespace.c17
-rw-r--r--kernel/wait.c6
-rw-r--r--kernel/workqueue.c44
37 files changed, 938 insertions, 596 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 470839d1a30e..35ef1185e359 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5obj-y = fork.o exec_domain.o panic.o printk.o \ 5obj-y = fork.o exec_domain.o panic.o \
6 cpu.o exit.o itimer.o time.o softirq.o resource.o \ 6 cpu.o exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
8 signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
@@ -24,6 +24,7 @@ endif
24 24
25obj-y += sched/ 25obj-y += sched/
26obj-y += power/ 26obj-y += power/
27obj-y += printk/
27obj-y += cpu/ 28obj-y += cpu/
28 29
29obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o 30obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0e0b20b8c5db..781845a013ab 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1845,36 +1845,43 @@ out:
1845EXPORT_SYMBOL_GPL(cgroup_path); 1845EXPORT_SYMBOL_GPL(cgroup_path);
1846 1846
1847/** 1847/**
1848 * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy 1848 * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
1849 * @task: target task 1849 * @task: target task
1850 * @hierarchy_id: the hierarchy to look up @task's cgroup from
1851 * @buf: the buffer to write the path into 1850 * @buf: the buffer to write the path into
1852 * @buflen: the length of the buffer 1851 * @buflen: the length of the buffer
1853 * 1852 *
1854 * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and 1853 * Determine @task's cgroup on the first (the one with the lowest non-zero
1855 * copy its path into @buf. This function grabs cgroup_mutex and shouldn't 1854 * hierarchy_id) cgroup hierarchy and copy its path into @buf. This
1856 * be used inside locks used by cgroup controller callbacks. 1855 * function grabs cgroup_mutex and shouldn't be used inside locks used by
1856 * cgroup controller callbacks.
1857 *
1858 * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
1857 */ 1859 */
1858int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, 1860int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
1859 char *buf, size_t buflen)
1860{ 1861{
1861 struct cgroupfs_root *root; 1862 struct cgroupfs_root *root;
1862 struct cgroup *cgrp = NULL; 1863 struct cgroup *cgrp;
1863 int ret = -ENOENT; 1864 int hierarchy_id = 1, ret = 0;
1865
1866 if (buflen < 2)
1867 return -ENAMETOOLONG;
1864 1868
1865 mutex_lock(&cgroup_mutex); 1869 mutex_lock(&cgroup_mutex);
1866 1870
1867 root = idr_find(&cgroup_hierarchy_idr, hierarchy_id); 1871 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
1872
1868 if (root) { 1873 if (root) {
1869 cgrp = task_cgroup_from_root(task, root); 1874 cgrp = task_cgroup_from_root(task, root);
1870 ret = cgroup_path(cgrp, buf, buflen); 1875 ret = cgroup_path(cgrp, buf, buflen);
1876 } else {
1877 /* if no hierarchy exists, everyone is in "/" */
1878 memcpy(buf, "/", 2);
1871 } 1879 }
1872 1880
1873 mutex_unlock(&cgroup_mutex); 1881 mutex_unlock(&cgroup_mutex);
1874
1875 return ret; 1882 return ret;
1876} 1883}
1877EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy); 1884EXPORT_SYMBOL_GPL(task_cgroup_path);
1878 1885
1879/* 1886/*
1880 * Control Group taskset 1887 * Control Group taskset
@@ -4328,8 +4335,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4328 } 4335 }
4329 4336
4330 err = percpu_ref_init(&css->refcnt, css_release); 4337 err = percpu_ref_init(&css->refcnt, css_release);
4331 if (err) 4338 if (err) {
4339 ss->css_free(cgrp);
4332 goto err_free_all; 4340 goto err_free_all;
4341 }
4333 4342
4334 init_cgroup_css(css, ss, cgrp); 4343 init_cgroup_css(css, ss, cgrp);
4335 4344
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e5657788fedd..ea1966db34f2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -475,13 +475,17 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
475 475
476 /* 476 /*
477 * Cpusets with tasks - existing or newly being attached - can't 477 * Cpusets with tasks - existing or newly being attached - can't
478 * have empty cpus_allowed or mems_allowed. 478 * be changed to have empty cpus_allowed or mems_allowed.
479 */ 479 */
480 ret = -ENOSPC; 480 ret = -ENOSPC;
481 if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) && 481 if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress)) {
482 (cpumask_empty(trial->cpus_allowed) && 482 if (!cpumask_empty(cur->cpus_allowed) &&
483 nodes_empty(trial->mems_allowed))) 483 cpumask_empty(trial->cpus_allowed))
484 goto out; 484 goto out;
485 if (!nodes_empty(cur->mems_allowed) &&
486 nodes_empty(trial->mems_allowed))
487 goto out;
488 }
485 489
486 ret = 0; 490 ret = 0;
487out: 491out:
@@ -1608,11 +1612,13 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1608{ 1612{
1609 struct cpuset *cs = cgroup_cs(cgrp); 1613 struct cpuset *cs = cgroup_cs(cgrp);
1610 cpuset_filetype_t type = cft->private; 1614 cpuset_filetype_t type = cft->private;
1611 int retval = -ENODEV; 1615 int retval = 0;
1612 1616
1613 mutex_lock(&cpuset_mutex); 1617 mutex_lock(&cpuset_mutex);
1614 if (!is_cpuset_online(cs)) 1618 if (!is_cpuset_online(cs)) {
1619 retval = -ENODEV;
1615 goto out_unlock; 1620 goto out_unlock;
1621 }
1616 1622
1617 switch (type) { 1623 switch (type) {
1618 case FILE_CPU_EXCLUSIVE: 1624 case FILE_CPU_EXCLUSIVE:
diff --git a/kernel/fork.c b/kernel/fork.c
index 403d2bb8a968..e23bb19e2a3e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
1679 int __user *, parent_tidptr, 1679 int __user *, parent_tidptr,
1680 int __user *, child_tidptr, 1680 int __user *, child_tidptr,
1681 int, tls_val) 1681 int, tls_val)
1682#elif defined(CONFIG_CLONE_BACKWARDS3)
1683SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
1684 int, stack_size,
1685 int __user *, parent_tidptr,
1686 int __user *, child_tidptr,
1687 int, tls_val)
1682#else 1688#else
1683SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, 1689SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1684 int __user *, parent_tidptr, 1690 int __user *, parent_tidptr,
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 8b2afc1c9df0..b462fa197517 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -33,7 +33,7 @@ static DEFINE_SPINLOCK(freezer_lock);
33 */ 33 */
34bool freezing_slow_path(struct task_struct *p) 34bool freezing_slow_path(struct task_struct *p)
35{ 35{
36 if (p->flags & PF_NOFREEZE) 36 if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
37 return false; 37 return false;
38 38
39 if (pm_nosig_freezing || cgroup_freezing(p)) 39 if (pm_nosig_freezing || cgroup_freezing(p))
diff --git a/kernel/mutex.c b/kernel/mutex.c
index ff05f4bd86eb..a52ee7bb830d 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -686,7 +686,7 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
686 might_sleep(); 686 might_sleep();
687 ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 687 ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
688 0, &ctx->dep_map, _RET_IP_, ctx); 688 0, &ctx->dep_map, _RET_IP_, ctx);
689 if (!ret && ctx->acquired > 0) 689 if (!ret && ctx->acquired > 1)
690 return ww_mutex_deadlock_injection(lock, ctx); 690 return ww_mutex_deadlock_injection(lock, ctx);
691 691
692 return ret; 692 return ret;
@@ -702,7 +702,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
702 ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 702 ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
703 0, &ctx->dep_map, _RET_IP_, ctx); 703 0, &ctx->dep_map, _RET_IP_, ctx);
704 704
705 if (!ret && ctx->acquired > 0) 705 if (!ret && ctx->acquired > 1)
706 return ww_mutex_deadlock_injection(lock, ctx); 706 return ww_mutex_deadlock_injection(lock, ctx);
707 707
708 return ret; 708 return ret;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index fc0df8486449..06ec8869dbf1 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -109,6 +109,8 @@ static int try_to_freeze_tasks(bool user_only)
109 109
110/** 110/**
111 * freeze_processes - Signal user space processes to enter the refrigerator. 111 * freeze_processes - Signal user space processes to enter the refrigerator.
112 * The current thread will not be frozen. The same process that calls
113 * freeze_processes must later call thaw_processes.
112 * 114 *
113 * On success, returns 0. On failure, -errno and system is fully thawed. 115 * On success, returns 0. On failure, -errno and system is fully thawed.
114 */ 116 */
@@ -120,6 +122,9 @@ int freeze_processes(void)
120 if (error) 122 if (error)
121 return error; 123 return error;
122 124
125 /* Make sure this task doesn't get frozen */
126 current->flags |= PF_SUSPEND_TASK;
127
123 if (!pm_freezing) 128 if (!pm_freezing)
124 atomic_inc(&system_freezing_cnt); 129 atomic_inc(&system_freezing_cnt);
125 130
@@ -168,6 +173,7 @@ int freeze_kernel_threads(void)
168void thaw_processes(void) 173void thaw_processes(void)
169{ 174{
170 struct task_struct *g, *p; 175 struct task_struct *g, *p;
176 struct task_struct *curr = current;
171 177
172 if (pm_freezing) 178 if (pm_freezing)
173 atomic_dec(&system_freezing_cnt); 179 atomic_dec(&system_freezing_cnt);
@@ -182,10 +188,15 @@ void thaw_processes(void)
182 188
183 read_lock(&tasklist_lock); 189 read_lock(&tasklist_lock);
184 do_each_thread(g, p) { 190 do_each_thread(g, p) {
191 /* No other threads should have PF_SUSPEND_TASK set */
192 WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
185 __thaw_task(p); 193 __thaw_task(p);
186 } while_each_thread(g, p); 194 } while_each_thread(g, p);
187 read_unlock(&tasklist_lock); 195 read_unlock(&tasklist_lock);
188 196
197 WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
198 curr->flags &= ~PF_SUSPEND_TASK;
199
189 usermodehelper_enable(); 200 usermodehelper_enable();
190 201
191 schedule(); 202 schedule();
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 06fe28589e9c..a394297f8b2f 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -296,6 +296,17 @@ int pm_qos_request_active(struct pm_qos_request *req)
296} 296}
297EXPORT_SYMBOL_GPL(pm_qos_request_active); 297EXPORT_SYMBOL_GPL(pm_qos_request_active);
298 298
299static void __pm_qos_update_request(struct pm_qos_request *req,
300 s32 new_value)
301{
302 trace_pm_qos_update_request(req->pm_qos_class, new_value);
303
304 if (new_value != req->node.prio)
305 pm_qos_update_target(
306 pm_qos_array[req->pm_qos_class]->constraints,
307 &req->node, PM_QOS_UPDATE_REQ, new_value);
308}
309
299/** 310/**
300 * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout 311 * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout
301 * @work: work struct for the delayed work (timeout) 312 * @work: work struct for the delayed work (timeout)
@@ -308,7 +319,7 @@ static void pm_qos_work_fn(struct work_struct *work)
308 struct pm_qos_request, 319 struct pm_qos_request,
309 work); 320 work);
310 321
311 pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); 322 __pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
312} 323}
313 324
314/** 325/**
@@ -364,12 +375,7 @@ void pm_qos_update_request(struct pm_qos_request *req,
364 } 375 }
365 376
366 cancel_delayed_work_sync(&req->work); 377 cancel_delayed_work_sync(&req->work);
367 378 __pm_qos_update_request(req, new_value);
368 trace_pm_qos_update_request(req->pm_qos_class, new_value);
369 if (new_value != req->node.prio)
370 pm_qos_update_target(
371 pm_qos_array[req->pm_qos_class]->constraints,
372 &req->node, PM_QOS_UPDATE_REQ, new_value);
373} 379}
374EXPORT_SYMBOL_GPL(pm_qos_update_request); 380EXPORT_SYMBOL_GPL(pm_qos_update_request);
375 381
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
new file mode 100644
index 000000000000..85405bdcf2b3
--- /dev/null
+++ b/kernel/printk/Makefile
@@ -0,0 +1,2 @@
1obj-y = printk.o
2obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
new file mode 100644
index 000000000000..276762f3a460
--- /dev/null
+++ b/kernel/printk/braille.c
@@ -0,0 +1,49 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
3#include <linux/kernel.h>
4#include <linux/console.h>
5#include <linux/string.h>
6
7#include "console_cmdline.h"
8#include "braille.h"
9
10char *_braille_console_setup(char **str, char **brl_options)
11{
12 if (!memcmp(*str, "brl,", 4)) {
13 *brl_options = "";
14 *str += 4;
15 } else if (!memcmp(str, "brl=", 4)) {
16 *brl_options = *str + 4;
17 *str = strchr(*brl_options, ',');
18 if (!*str)
19 pr_err("need port name after brl=\n");
20 else
21 *((*str)++) = 0;
22 } else
23 return NULL;
24
25 return *str;
26}
27
28int
29_braille_register_console(struct console *console, struct console_cmdline *c)
30{
31 int rtn = 0;
32
33 if (c->brl_options) {
34 console->flags |= CON_BRL;
35 rtn = braille_register_console(console, c->index, c->options,
36 c->brl_options);
37 }
38
39 return rtn;
40}
41
42int
43_braille_unregister_console(struct console *console)
44{
45 if (console->flags & CON_BRL)
46 return braille_unregister_console(console);
47
48 return 0;
49}
diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h
new file mode 100644
index 000000000000..769d771145c8
--- /dev/null
+++ b/kernel/printk/braille.h
@@ -0,0 +1,48 @@
1#ifndef _PRINTK_BRAILLE_H
2#define _PRINTK_BRAILLE_H
3
4#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
5
6static inline void
7braille_set_options(struct console_cmdline *c, char *brl_options)
8{
9 c->brl_options = brl_options;
10}
11
12char *
13_braille_console_setup(char **str, char **brl_options);
14
15int
16_braille_register_console(struct console *console, struct console_cmdline *c);
17
18int
19_braille_unregister_console(struct console *console);
20
21#else
22
23static inline void
24braille_set_options(struct console_cmdline *c, char *brl_options)
25{
26}
27
28static inline char *
29_braille_console_setup(char **str, char **brl_options)
30{
31 return NULL;
32}
33
34static inline int
35_braille_register_console(struct console *console, struct console_cmdline *c)
36{
37 return 0;
38}
39
40static inline int
41_braille_unregister_console(struct console *console)
42{
43 return 0;
44}
45
46#endif
47
48#endif
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
new file mode 100644
index 000000000000..cbd69d842341
--- /dev/null
+++ b/kernel/printk/console_cmdline.h
@@ -0,0 +1,14 @@
1#ifndef _CONSOLE_CMDLINE_H
2#define _CONSOLE_CMDLINE_H
3
4struct console_cmdline
5{
6 char name[8]; /* Name of the driver */
7 int index; /* Minor dev. to use */
8 char *options; /* Options for the driver */
9#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
10 char *brl_options; /* Options for braille driver */
11#endif
12};
13
14#endif
diff --git a/kernel/printk.c b/kernel/printk/printk.c
index 69b0890ed7e5..5b5a7080e2a5 100644
--- a/kernel/printk.c
+++ b/kernel/printk/printk.c
@@ -51,6 +51,9 @@
51#define CREATE_TRACE_POINTS 51#define CREATE_TRACE_POINTS
52#include <trace/events/printk.h> 52#include <trace/events/printk.h>
53 53
54#include "console_cmdline.h"
55#include "braille.h"
56
54/* printk's without a loglevel use this.. */ 57/* printk's without a loglevel use this.. */
55#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL 58#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
56 59
@@ -105,19 +108,11 @@ static struct console *exclusive_console;
105/* 108/*
106 * Array of consoles built from command line options (console=) 109 * Array of consoles built from command line options (console=)
107 */ 110 */
108struct console_cmdline
109{
110 char name[8]; /* Name of the driver */
111 int index; /* Minor dev. to use */
112 char *options; /* Options for the driver */
113#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
114 char *brl_options; /* Options for braille driver */
115#endif
116};
117 111
118#define MAX_CMDLINECONSOLES 8 112#define MAX_CMDLINECONSOLES 8
119 113
120static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 114static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
115
121static int selected_console = -1; 116static int selected_console = -1;
122static int preferred_console = -1; 117static int preferred_console = -1;
123int console_set_on_cmdline; 118int console_set_on_cmdline;
@@ -178,7 +173,7 @@ static int console_may_schedule;
178 * 67 "g" 173 * 67 "g"
179 * 0032 00 00 00 padding to next message header 174 * 0032 00 00 00 padding to next message header
180 * 175 *
181 * The 'struct log' buffer header must never be directly exported to 176 * The 'struct printk_log' buffer header must never be directly exported to
182 * userspace, it is a kernel-private implementation detail that might 177 * userspace, it is a kernel-private implementation detail that might
183 * need to be changed in the future, when the requirements change. 178 * need to be changed in the future, when the requirements change.
184 * 179 *
@@ -200,7 +195,7 @@ enum log_flags {
200 LOG_CONT = 8, /* text is a fragment of a continuation line */ 195 LOG_CONT = 8, /* text is a fragment of a continuation line */
201}; 196};
202 197
203struct log { 198struct printk_log {
204 u64 ts_nsec; /* timestamp in nanoseconds */ 199 u64 ts_nsec; /* timestamp in nanoseconds */
205 u16 len; /* length of entire record */ 200 u16 len; /* length of entire record */
206 u16 text_len; /* length of text buffer */ 201 u16 text_len; /* length of text buffer */
@@ -248,7 +243,7 @@ static u32 clear_idx;
248#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 243#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
249#define LOG_ALIGN 4 244#define LOG_ALIGN 4
250#else 245#else
251#define LOG_ALIGN __alignof__(struct log) 246#define LOG_ALIGN __alignof__(struct printk_log)
252#endif 247#endif
253#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 248#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
254static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 249static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -259,35 +254,35 @@ static u32 log_buf_len = __LOG_BUF_LEN;
259static volatile unsigned int logbuf_cpu = UINT_MAX; 254static volatile unsigned int logbuf_cpu = UINT_MAX;
260 255
261/* human readable text of the record */ 256/* human readable text of the record */
262static char *log_text(const struct log *msg) 257static char *log_text(const struct printk_log *msg)
263{ 258{
264 return (char *)msg + sizeof(struct log); 259 return (char *)msg + sizeof(struct printk_log);
265} 260}
266 261
267/* optional key/value pair dictionary attached to the record */ 262/* optional key/value pair dictionary attached to the record */
268static char *log_dict(const struct log *msg) 263static char *log_dict(const struct printk_log *msg)
269{ 264{
270 return (char *)msg + sizeof(struct log) + msg->text_len; 265 return (char *)msg + sizeof(struct printk_log) + msg->text_len;
271} 266}
272 267
273/* get record by index; idx must point to valid msg */ 268/* get record by index; idx must point to valid msg */
274static struct log *log_from_idx(u32 idx) 269static struct printk_log *log_from_idx(u32 idx)
275{ 270{
276 struct log *msg = (struct log *)(log_buf + idx); 271 struct printk_log *msg = (struct printk_log *)(log_buf + idx);
277 272
278 /* 273 /*
279 * A length == 0 record is the end of buffer marker. Wrap around and 274 * A length == 0 record is the end of buffer marker. Wrap around and
280 * read the message at the start of the buffer. 275 * read the message at the start of the buffer.
281 */ 276 */
282 if (!msg->len) 277 if (!msg->len)
283 return (struct log *)log_buf; 278 return (struct printk_log *)log_buf;
284 return msg; 279 return msg;
285} 280}
286 281
287/* get next record; idx must point to valid msg */ 282/* get next record; idx must point to valid msg */
288static u32 log_next(u32 idx) 283static u32 log_next(u32 idx)
289{ 284{
290 struct log *msg = (struct log *)(log_buf + idx); 285 struct printk_log *msg = (struct printk_log *)(log_buf + idx);
291 286
292 /* length == 0 indicates the end of the buffer; wrap */ 287 /* length == 0 indicates the end of the buffer; wrap */
293 /* 288 /*
@@ -296,7 +291,7 @@ static u32 log_next(u32 idx)
296 * return the one after that. 291 * return the one after that.
297 */ 292 */
298 if (!msg->len) { 293 if (!msg->len) {
299 msg = (struct log *)log_buf; 294 msg = (struct printk_log *)log_buf;
300 return msg->len; 295 return msg->len;
301 } 296 }
302 return idx + msg->len; 297 return idx + msg->len;
@@ -308,11 +303,11 @@ static void log_store(int facility, int level,
308 const char *dict, u16 dict_len, 303 const char *dict, u16 dict_len,
309 const char *text, u16 text_len) 304 const char *text, u16 text_len)
310{ 305{
311 struct log *msg; 306 struct printk_log *msg;
312 u32 size, pad_len; 307 u32 size, pad_len;
313 308
314 /* number of '\0' padding bytes to next message */ 309 /* number of '\0' padding bytes to next message */
315 size = sizeof(struct log) + text_len + dict_len; 310 size = sizeof(struct printk_log) + text_len + dict_len;
316 pad_len = (-size) & (LOG_ALIGN - 1); 311 pad_len = (-size) & (LOG_ALIGN - 1);
317 size += pad_len; 312 size += pad_len;
318 313
@@ -324,7 +319,7 @@ static void log_store(int facility, int level,
324 else 319 else
325 free = log_first_idx - log_next_idx; 320 free = log_first_idx - log_next_idx;
326 321
327 if (free > size + sizeof(struct log)) 322 if (free > size + sizeof(struct printk_log))
328 break; 323 break;
329 324
330 /* drop old messages until we have enough contiuous space */ 325 /* drop old messages until we have enough contiuous space */
@@ -332,18 +327,18 @@ static void log_store(int facility, int level,
332 log_first_seq++; 327 log_first_seq++;
333 } 328 }
334 329
335 if (log_next_idx + size + sizeof(struct log) >= log_buf_len) { 330 if (log_next_idx + size + sizeof(struct printk_log) >= log_buf_len) {
336 /* 331 /*
337 * This message + an additional empty header does not fit 332 * This message + an additional empty header does not fit
338 * at the end of the buffer. Add an empty header with len == 0 333 * at the end of the buffer. Add an empty header with len == 0
339 * to signify a wrap around. 334 * to signify a wrap around.
340 */ 335 */
341 memset(log_buf + log_next_idx, 0, sizeof(struct log)); 336 memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
342 log_next_idx = 0; 337 log_next_idx = 0;
343 } 338 }
344 339
345 /* fill message */ 340 /* fill message */
346 msg = (struct log *)(log_buf + log_next_idx); 341 msg = (struct printk_log *)(log_buf + log_next_idx);
347 memcpy(log_text(msg), text, text_len); 342 memcpy(log_text(msg), text, text_len);
348 msg->text_len = text_len; 343 msg->text_len = text_len;
349 memcpy(log_dict(msg), dict, dict_len); 344 memcpy(log_dict(msg), dict, dict_len);
@@ -356,7 +351,7 @@ static void log_store(int facility, int level,
356 else 351 else
357 msg->ts_nsec = local_clock(); 352 msg->ts_nsec = local_clock();
358 memset(log_dict(msg) + dict_len, 0, pad_len); 353 memset(log_dict(msg) + dict_len, 0, pad_len);
359 msg->len = sizeof(struct log) + text_len + dict_len + pad_len; 354 msg->len = sizeof(struct printk_log) + text_len + dict_len + pad_len;
360 355
361 /* insert message */ 356 /* insert message */
362 log_next_idx += msg->len; 357 log_next_idx += msg->len;
@@ -479,7 +474,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
479 size_t count, loff_t *ppos) 474 size_t count, loff_t *ppos)
480{ 475{
481 struct devkmsg_user *user = file->private_data; 476 struct devkmsg_user *user = file->private_data;
482 struct log *msg; 477 struct printk_log *msg;
483 u64 ts_usec; 478 u64 ts_usec;
484 size_t i; 479 size_t i;
485 char cont = '-'; 480 char cont = '-';
@@ -724,14 +719,14 @@ void log_buf_kexec_setup(void)
724 VMCOREINFO_SYMBOL(log_first_idx); 719 VMCOREINFO_SYMBOL(log_first_idx);
725 VMCOREINFO_SYMBOL(log_next_idx); 720 VMCOREINFO_SYMBOL(log_next_idx);
726 /* 721 /*
727 * Export struct log size and field offsets. User space tools can 722 * Export struct printk_log size and field offsets. User space tools can
728 * parse it and detect any changes to structure down the line. 723 * parse it and detect any changes to structure down the line.
729 */ 724 */
730 VMCOREINFO_STRUCT_SIZE(log); 725 VMCOREINFO_STRUCT_SIZE(printk_log);
731 VMCOREINFO_OFFSET(log, ts_nsec); 726 VMCOREINFO_OFFSET(printk_log, ts_nsec);
732 VMCOREINFO_OFFSET(log, len); 727 VMCOREINFO_OFFSET(printk_log, len);
733 VMCOREINFO_OFFSET(log, text_len); 728 VMCOREINFO_OFFSET(printk_log, text_len);
734 VMCOREINFO_OFFSET(log, dict_len); 729 VMCOREINFO_OFFSET(printk_log, dict_len);
735} 730}
736#endif 731#endif
737 732
@@ -884,7 +879,7 @@ static size_t print_time(u64 ts, char *buf)
884 (unsigned long)ts, rem_nsec / 1000); 879 (unsigned long)ts, rem_nsec / 1000);
885} 880}
886 881
887static size_t print_prefix(const struct log *msg, bool syslog, char *buf) 882static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf)
888{ 883{
889 size_t len = 0; 884 size_t len = 0;
890 unsigned int prefix = (msg->facility << 3) | msg->level; 885 unsigned int prefix = (msg->facility << 3) | msg->level;
@@ -907,7 +902,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
907 return len; 902 return len;
908} 903}
909 904
910static size_t msg_print_text(const struct log *msg, enum log_flags prev, 905static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
911 bool syslog, char *buf, size_t size) 906 bool syslog, char *buf, size_t size)
912{ 907{
913 const char *text = log_text(msg); 908 const char *text = log_text(msg);
@@ -969,7 +964,7 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev,
969static int syslog_print(char __user *buf, int size) 964static int syslog_print(char __user *buf, int size)
970{ 965{
971 char *text; 966 char *text;
972 struct log *msg; 967 struct printk_log *msg;
973 int len = 0; 968 int len = 0;
974 969
975 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); 970 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
@@ -1060,7 +1055,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1060 idx = clear_idx; 1055 idx = clear_idx;
1061 prev = 0; 1056 prev = 0;
1062 while (seq < log_next_seq) { 1057 while (seq < log_next_seq) {
1063 struct log *msg = log_from_idx(idx); 1058 struct printk_log *msg = log_from_idx(idx);
1064 1059
1065 len += msg_print_text(msg, prev, true, NULL, 0); 1060 len += msg_print_text(msg, prev, true, NULL, 0);
1066 prev = msg->flags; 1061 prev = msg->flags;
@@ -1073,7 +1068,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1073 idx = clear_idx; 1068 idx = clear_idx;
1074 prev = 0; 1069 prev = 0;
1075 while (len > size && seq < log_next_seq) { 1070 while (len > size && seq < log_next_seq) {
1076 struct log *msg = log_from_idx(idx); 1071 struct printk_log *msg = log_from_idx(idx);
1077 1072
1078 len -= msg_print_text(msg, prev, true, NULL, 0); 1073 len -= msg_print_text(msg, prev, true, NULL, 0);
1079 prev = msg->flags; 1074 prev = msg->flags;
@@ -1087,7 +1082,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
1087 len = 0; 1082 len = 0;
1088 prev = 0; 1083 prev = 0;
1089 while (len >= 0 && seq < next_seq) { 1084 while (len >= 0 && seq < next_seq) {
1090 struct log *msg = log_from_idx(idx); 1085 struct printk_log *msg = log_from_idx(idx);
1091 int textlen; 1086 int textlen;
1092 1087
1093 textlen = msg_print_text(msg, prev, true, text, 1088 textlen = msg_print_text(msg, prev, true, text,
@@ -1233,7 +1228,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
1233 1228
1234 error = 0; 1229 error = 0;
1235 while (seq < log_next_seq) { 1230 while (seq < log_next_seq) {
1236 struct log *msg = log_from_idx(idx); 1231 struct printk_log *msg = log_from_idx(idx);
1237 1232
1238 error += msg_print_text(msg, prev, true, NULL, 0); 1233 error += msg_print_text(msg, prev, true, NULL, 0);
1239 idx = log_next(idx); 1234 idx = log_next(idx);
@@ -1719,10 +1714,10 @@ static struct cont {
1719 u8 level; 1714 u8 level;
1720 bool flushed:1; 1715 bool flushed:1;
1721} cont; 1716} cont;
1722static struct log *log_from_idx(u32 idx) { return NULL; } 1717static struct printk_log *log_from_idx(u32 idx) { return NULL; }
1723static u32 log_next(u32 idx) { return 0; } 1718static u32 log_next(u32 idx) { return 0; }
1724static void call_console_drivers(int level, const char *text, size_t len) {} 1719static void call_console_drivers(int level, const char *text, size_t len) {}
1725static size_t msg_print_text(const struct log *msg, enum log_flags prev, 1720static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
1726 bool syslog, char *buf, size_t size) { return 0; } 1721 bool syslog, char *buf, size_t size) { return 0; }
1727static size_t cont_print_text(char *text, size_t size) { return 0; } 1722static size_t cont_print_text(char *text, size_t size) { return 0; }
1728 1723
@@ -1761,23 +1756,23 @@ static int __add_preferred_console(char *name, int idx, char *options,
1761 * See if this tty is not yet registered, and 1756 * See if this tty is not yet registered, and
1762 * if we have a slot free. 1757 * if we have a slot free.
1763 */ 1758 */
1764 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) 1759 for (i = 0, c = console_cmdline;
1765 if (strcmp(console_cmdline[i].name, name) == 0 && 1760 i < MAX_CMDLINECONSOLES && c->name[0];
1766 console_cmdline[i].index == idx) { 1761 i++, c++) {
1767 if (!brl_options) 1762 if (strcmp(c->name, name) == 0 && c->index == idx) {
1768 selected_console = i; 1763 if (!brl_options)
1769 return 0; 1764 selected_console = i;
1765 return 0;
1770 } 1766 }
1767 }
1771 if (i == MAX_CMDLINECONSOLES) 1768 if (i == MAX_CMDLINECONSOLES)
1772 return -E2BIG; 1769 return -E2BIG;
1773 if (!brl_options) 1770 if (!brl_options)
1774 selected_console = i; 1771 selected_console = i;
1775 c = &console_cmdline[i];
1776 strlcpy(c->name, name, sizeof(c->name)); 1772 strlcpy(c->name, name, sizeof(c->name));
1777 c->options = options; 1773 c->options = options;
1778#ifdef CONFIG_A11Y_BRAILLE_CONSOLE 1774 braille_set_options(c, brl_options);
1779 c->brl_options = brl_options; 1775
1780#endif
1781 c->index = idx; 1776 c->index = idx;
1782 return 0; 1777 return 0;
1783} 1778}
@@ -1790,20 +1785,8 @@ static int __init console_setup(char *str)
1790 char *s, *options, *brl_options = NULL; 1785 char *s, *options, *brl_options = NULL;
1791 int idx; 1786 int idx;
1792 1787
1793#ifdef CONFIG_A11Y_BRAILLE_CONSOLE 1788 if (_braille_console_setup(&str, &brl_options))
1794 if (!memcmp(str, "brl,", 4)) { 1789 return 1;
1795 brl_options = "";
1796 str += 4;
1797 } else if (!memcmp(str, "brl=", 4)) {
1798 brl_options = str + 4;
1799 str = strchr(brl_options, ',');
1800 if (!str) {
1801 printk(KERN_ERR "need port name after brl=\n");
1802 return 1;
1803 }
1804 *(str++) = 0;
1805 }
1806#endif
1807 1790
1808 /* 1791 /*
1809 * Decode str into name, index, options. 1792 * Decode str into name, index, options.
@@ -1858,15 +1841,15 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
1858 struct console_cmdline *c; 1841 struct console_cmdline *c;
1859 int i; 1842 int i;
1860 1843
1861 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) 1844 for (i = 0, c = console_cmdline;
1862 if (strcmp(console_cmdline[i].name, name) == 0 && 1845 i < MAX_CMDLINECONSOLES && c->name[0];
1863 console_cmdline[i].index == idx) { 1846 i++, c++)
1864 c = &console_cmdline[i]; 1847 if (strcmp(c->name, name) == 0 && c->index == idx) {
1865 strlcpy(c->name, name_new, sizeof(c->name)); 1848 strlcpy(c->name, name_new, sizeof(c->name));
1866 c->name[sizeof(c->name) - 1] = 0; 1849 c->name[sizeof(c->name) - 1] = 0;
1867 c->options = options; 1850 c->options = options;
1868 c->index = idx_new; 1851 c->index = idx_new;
1869 return i; 1852 return i;
1870 } 1853 }
1871 /* not found */ 1854 /* not found */
1872 return -1; 1855 return -1;
@@ -2046,7 +2029,7 @@ void console_unlock(void)
2046 console_cont_flush(text, sizeof(text)); 2029 console_cont_flush(text, sizeof(text));
2047again: 2030again:
2048 for (;;) { 2031 for (;;) {
2049 struct log *msg; 2032 struct printk_log *msg;
2050 size_t len; 2033 size_t len;
2051 int level; 2034 int level;
2052 2035
@@ -2241,6 +2224,7 @@ void register_console(struct console *newcon)
2241 int i; 2224 int i;
2242 unsigned long flags; 2225 unsigned long flags;
2243 struct console *bcon = NULL; 2226 struct console *bcon = NULL;
2227 struct console_cmdline *c;
2244 2228
2245 /* 2229 /*
2246 * before we register a new CON_BOOT console, make sure we don't 2230 * before we register a new CON_BOOT console, make sure we don't
@@ -2288,30 +2272,25 @@ void register_console(struct console *newcon)
2288 * See if this console matches one we selected on 2272 * See if this console matches one we selected on
2289 * the command line. 2273 * the command line.
2290 */ 2274 */
2291 for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; 2275 for (i = 0, c = console_cmdline;
2292 i++) { 2276 i < MAX_CMDLINECONSOLES && c->name[0];
2293 if (strcmp(console_cmdline[i].name, newcon->name) != 0) 2277 i++, c++) {
2278 if (strcmp(c->name, newcon->name) != 0)
2294 continue; 2279 continue;
2295 if (newcon->index >= 0 && 2280 if (newcon->index >= 0 &&
2296 newcon->index != console_cmdline[i].index) 2281 newcon->index != c->index)
2297 continue; 2282 continue;
2298 if (newcon->index < 0) 2283 if (newcon->index < 0)
2299 newcon->index = console_cmdline[i].index; 2284 newcon->index = c->index;
2300#ifdef CONFIG_A11Y_BRAILLE_CONSOLE 2285
2301 if (console_cmdline[i].brl_options) { 2286 if (_braille_register_console(newcon, c))
2302 newcon->flags |= CON_BRL;
2303 braille_register_console(newcon,
2304 console_cmdline[i].index,
2305 console_cmdline[i].options,
2306 console_cmdline[i].brl_options);
2307 return; 2287 return;
2308 } 2288
2309#endif
2310 if (newcon->setup && 2289 if (newcon->setup &&
2311 newcon->setup(newcon, console_cmdline[i].options) != 0) 2290 newcon->setup(newcon, console_cmdline[i].options) != 0)
2312 break; 2291 break;
2313 newcon->flags |= CON_ENABLED; 2292 newcon->flags |= CON_ENABLED;
2314 newcon->index = console_cmdline[i].index; 2293 newcon->index = c->index;
2315 if (i == selected_console) { 2294 if (i == selected_console) {
2316 newcon->flags |= CON_CONSDEV; 2295 newcon->flags |= CON_CONSDEV;
2317 preferred_console = selected_console; 2296 preferred_console = selected_console;
@@ -2394,13 +2373,13 @@ EXPORT_SYMBOL(register_console);
2394int unregister_console(struct console *console) 2373int unregister_console(struct console *console)
2395{ 2374{
2396 struct console *a, *b; 2375 struct console *a, *b;
2397 int res = 1; 2376 int res;
2398 2377
2399#ifdef CONFIG_A11Y_BRAILLE_CONSOLE 2378 res = _braille_unregister_console(console);
2400 if (console->flags & CON_BRL) 2379 if (res)
2401 return braille_unregister_console(console); 2380 return res;
2402#endif
2403 2381
2382 res = 1;
2404 console_lock(); 2383 console_lock();
2405 if (console_drivers == console) { 2384 if (console_drivers == console) {
2406 console_drivers=console->next; 2385 console_drivers=console->next;
@@ -2666,7 +2645,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
2666bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, 2645bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
2667 char *line, size_t size, size_t *len) 2646 char *line, size_t size, size_t *len)
2668{ 2647{
2669 struct log *msg; 2648 struct printk_log *msg;
2670 size_t l = 0; 2649 size_t l = 0;
2671 bool ret = false; 2650 bool ret = false;
2672 2651
@@ -2778,7 +2757,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2778 idx = dumper->cur_idx; 2757 idx = dumper->cur_idx;
2779 prev = 0; 2758 prev = 0;
2780 while (seq < dumper->next_seq) { 2759 while (seq < dumper->next_seq) {
2781 struct log *msg = log_from_idx(idx); 2760 struct printk_log *msg = log_from_idx(idx);
2782 2761
2783 l += msg_print_text(msg, prev, true, NULL, 0); 2762 l += msg_print_text(msg, prev, true, NULL, 0);
2784 idx = log_next(idx); 2763 idx = log_next(idx);
@@ -2791,7 +2770,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2791 idx = dumper->cur_idx; 2770 idx = dumper->cur_idx;
2792 prev = 0; 2771 prev = 0;
2793 while (l > size && seq < dumper->next_seq) { 2772 while (l > size && seq < dumper->next_seq) {
2794 struct log *msg = log_from_idx(idx); 2773 struct printk_log *msg = log_from_idx(idx);
2795 2774
2796 l -= msg_print_text(msg, prev, true, NULL, 0); 2775 l -= msg_print_text(msg, prev, true, NULL, 0);
2797 idx = log_next(idx); 2776 idx = log_next(idx);
@@ -2806,7 +2785,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
2806 l = 0; 2785 l = 0;
2807 prev = 0; 2786 prev = 0;
2808 while (seq < dumper->next_seq) { 2787 while (seq < dumper->next_seq) {
2809 struct log *msg = log_from_idx(idx); 2788 struct printk_log *msg = log_from_idx(idx);
2810 2789
2811 l += msg_print_text(msg, prev, syslog, buf + l, size - l); 2790 l += msg_print_text(msg, prev, syslog, buf + l, size - l);
2812 idx = log_next(idx); 2791 idx = log_next(idx);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4041f5747e73..a146ee327f6a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -469,7 +469,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
469 /* Architecture-specific hardware disable .. */ 469 /* Architecture-specific hardware disable .. */
470 ptrace_disable(child); 470 ptrace_disable(child);
471 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); 471 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
472 flush_ptrace_hw_breakpoint(child);
473 472
474 write_lock_irq(&tasklist_lock); 473 write_lock_irq(&tasklist_lock);
475 /* 474 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7c32cb7bfeb..05c39f030314 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,6 +933,8 @@ static int effective_prio(struct task_struct *p)
933/** 933/**
934 * task_curr - is this task currently executing on a CPU? 934 * task_curr - is this task currently executing on a CPU?
935 * @p: the task in question. 935 * @p: the task in question.
936 *
937 * Return: 1 if the task is currently executing. 0 otherwise.
936 */ 938 */
937inline int task_curr(const struct task_struct *p) 939inline int task_curr(const struct task_struct *p)
938{ 940{
@@ -1482,7 +1484,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
1482 * the simpler "current->state = TASK_RUNNING" to mark yourself 1484 * the simpler "current->state = TASK_RUNNING" to mark yourself
1483 * runnable without the overhead of this. 1485 * runnable without the overhead of this.
1484 * 1486 *
1485 * Returns %true if @p was woken up, %false if it was already running 1487 * Return: %true if @p was woken up, %false if it was already running.
1486 * or @state didn't match @p's state. 1488 * or @state didn't match @p's state.
1487 */ 1489 */
1488static int 1490static int
@@ -1491,7 +1493,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1491 unsigned long flags; 1493 unsigned long flags;
1492 int cpu, success = 0; 1494 int cpu, success = 0;
1493 1495
1494 smp_wmb(); 1496 /*
1497 * If we are going to wake up a thread waiting for CONDITION we
1498 * need to ensure that CONDITION=1 done by the caller can not be
1499 * reordered with p->state check below. This pairs with mb() in
1500 * set_current_state() the waiting thread does.
1501 */
1502 smp_mb__before_spinlock();
1495 raw_spin_lock_irqsave(&p->pi_lock, flags); 1503 raw_spin_lock_irqsave(&p->pi_lock, flags);
1496 if (!(p->state & state)) 1504 if (!(p->state & state))
1497 goto out; 1505 goto out;
@@ -1577,8 +1585,9 @@ out:
1577 * @p: The process to be woken up. 1585 * @p: The process to be woken up.
1578 * 1586 *
1579 * Attempt to wake up the nominated process and move it to the set of runnable 1587 * Attempt to wake up the nominated process and move it to the set of runnable
1580 * processes. Returns 1 if the process was woken up, 0 if it was already 1588 * processes.
1581 * running. 1589 *
1590 * Return: 1 if the process was woken up, 0 if it was already running.
1582 * 1591 *
1583 * It may be assumed that this function implies a write memory barrier before 1592 * It may be assumed that this function implies a write memory barrier before
1584 * changing the task state if and only if any tasks are woken up. 1593 * changing the task state if and only if any tasks are woken up.
@@ -2191,6 +2200,8 @@ void scheduler_tick(void)
2191 * This makes sure that uptime, CFS vruntime, load 2200 * This makes sure that uptime, CFS vruntime, load
2192 * balancing, etc... continue to move forward, even 2201 * balancing, etc... continue to move forward, even
2193 * with a very low granularity. 2202 * with a very low granularity.
2203 *
2204 * Return: Maximum deferment in nanoseconds.
2194 */ 2205 */
2195u64 scheduler_tick_max_deferment(void) 2206u64 scheduler_tick_max_deferment(void)
2196{ 2207{
@@ -2394,6 +2405,12 @@ need_resched:
2394 if (sched_feat(HRTICK)) 2405 if (sched_feat(HRTICK))
2395 hrtick_clear(rq); 2406 hrtick_clear(rq);
2396 2407
2408 /*
2409 * Make sure that signal_pending_state()->signal_pending() below
2410 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
2411 * done by the caller to avoid the race with signal_wake_up().
2412 */
2413 smp_mb__before_spinlock();
2397 raw_spin_lock_irq(&rq->lock); 2414 raw_spin_lock_irq(&rq->lock);
2398 2415
2399 switch_count = &prev->nivcsw; 2416 switch_count = &prev->nivcsw;
@@ -2796,8 +2813,8 @@ EXPORT_SYMBOL(wait_for_completion);
2796 * specified timeout to expire. The timeout is in jiffies. It is not 2813 * specified timeout to expire. The timeout is in jiffies. It is not
2797 * interruptible. 2814 * interruptible.
2798 * 2815 *
2799 * The return value is 0 if timed out, and positive (at least 1, or number of 2816 * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
2800 * jiffies left till timeout) if completed. 2817 * till timeout) if completed.
2801 */ 2818 */
2802unsigned long __sched 2819unsigned long __sched
2803wait_for_completion_timeout(struct completion *x, unsigned long timeout) 2820wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@ -2829,8 +2846,8 @@ EXPORT_SYMBOL(wait_for_completion_io);
2829 * specified timeout to expire. The timeout is in jiffies. It is not 2846 * specified timeout to expire. The timeout is in jiffies. It is not
2830 * interruptible. The caller is accounted as waiting for IO. 2847 * interruptible. The caller is accounted as waiting for IO.
2831 * 2848 *
2832 * The return value is 0 if timed out, and positive (at least 1, or number of 2849 * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
2833 * jiffies left till timeout) if completed. 2850 * till timeout) if completed.
2834 */ 2851 */
2835unsigned long __sched 2852unsigned long __sched
2836wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) 2853wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
@@ -2846,7 +2863,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout);
2846 * This waits for completion of a specific task to be signaled. It is 2863 * This waits for completion of a specific task to be signaled. It is
2847 * interruptible. 2864 * interruptible.
2848 * 2865 *
2849 * The return value is -ERESTARTSYS if interrupted, 0 if completed. 2866 * Return: -ERESTARTSYS if interrupted, 0 if completed.
2850 */ 2867 */
2851int __sched wait_for_completion_interruptible(struct completion *x) 2868int __sched wait_for_completion_interruptible(struct completion *x)
2852{ 2869{
@@ -2865,8 +2882,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
2865 * This waits for either a completion of a specific task to be signaled or for a 2882 * This waits for either a completion of a specific task to be signaled or for a
2866 * specified timeout to expire. It is interruptible. The timeout is in jiffies. 2883 * specified timeout to expire. It is interruptible. The timeout is in jiffies.
2867 * 2884 *
2868 * The return value is -ERESTARTSYS if interrupted, 0 if timed out, 2885 * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
2869 * positive (at least 1, or number of jiffies left till timeout) if completed. 2886 * or number of jiffies left till timeout) if completed.
2870 */ 2887 */
2871long __sched 2888long __sched
2872wait_for_completion_interruptible_timeout(struct completion *x, 2889wait_for_completion_interruptible_timeout(struct completion *x,
@@ -2883,7 +2900,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
2883 * This waits to be signaled for completion of a specific task. It can be 2900 * This waits to be signaled for completion of a specific task. It can be
2884 * interrupted by a kill signal. 2901 * interrupted by a kill signal.
2885 * 2902 *
2886 * The return value is -ERESTARTSYS if interrupted, 0 if completed. 2903 * Return: -ERESTARTSYS if interrupted, 0 if completed.
2887 */ 2904 */
2888int __sched wait_for_completion_killable(struct completion *x) 2905int __sched wait_for_completion_killable(struct completion *x)
2889{ 2906{
@@ -2903,8 +2920,8 @@ EXPORT_SYMBOL(wait_for_completion_killable);
2903 * signaled or for a specified timeout to expire. It can be 2920 * signaled or for a specified timeout to expire. It can be
2904 * interrupted by a kill signal. The timeout is in jiffies. 2921 * interrupted by a kill signal. The timeout is in jiffies.
2905 * 2922 *
2906 * The return value is -ERESTARTSYS if interrupted, 0 if timed out, 2923 * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
2907 * positive (at least 1, or number of jiffies left till timeout) if completed. 2924 * or number of jiffies left till timeout) if completed.
2908 */ 2925 */
2909long __sched 2926long __sched
2910wait_for_completion_killable_timeout(struct completion *x, 2927wait_for_completion_killable_timeout(struct completion *x,
@@ -2918,7 +2935,7 @@ EXPORT_SYMBOL(wait_for_completion_killable_timeout);
2918 * try_wait_for_completion - try to decrement a completion without blocking 2935 * try_wait_for_completion - try to decrement a completion without blocking
2919 * @x: completion structure 2936 * @x: completion structure
2920 * 2937 *
2921 * Returns: 0 if a decrement cannot be done without blocking 2938 * Return: 0 if a decrement cannot be done without blocking
2922 * 1 if a decrement succeeded. 2939 * 1 if a decrement succeeded.
2923 * 2940 *
2924 * If a completion is being used as a counting completion, 2941 * If a completion is being used as a counting completion,
@@ -2945,7 +2962,7 @@ EXPORT_SYMBOL(try_wait_for_completion);
2945 * completion_done - Test to see if a completion has any waiters 2962 * completion_done - Test to see if a completion has any waiters
2946 * @x: completion structure 2963 * @x: completion structure
2947 * 2964 *
2948 * Returns: 0 if there are waiters (wait_for_completion() in progress) 2965 * Return: 0 if there are waiters (wait_for_completion() in progress)
2949 * 1 if there are no waiters. 2966 * 1 if there are no waiters.
2950 * 2967 *
2951 */ 2968 */
@@ -3182,7 +3199,7 @@ SYSCALL_DEFINE1(nice, int, increment)
3182 * task_prio - return the priority value of a given task. 3199 * task_prio - return the priority value of a given task.
3183 * @p: the task in question. 3200 * @p: the task in question.
3184 * 3201 *
3185 * This is the priority value as seen by users in /proc. 3202 * Return: The priority value as seen by users in /proc.
3186 * RT tasks are offset by -200. Normal tasks are centered 3203 * RT tasks are offset by -200. Normal tasks are centered
3187 * around 0, value goes from -16 to +15. 3204 * around 0, value goes from -16 to +15.
3188 */ 3205 */
@@ -3194,6 +3211,8 @@ int task_prio(const struct task_struct *p)
3194/** 3211/**
3195 * task_nice - return the nice value of a given task. 3212 * task_nice - return the nice value of a given task.
3196 * @p: the task in question. 3213 * @p: the task in question.
3214 *
3215 * Return: The nice value [ -20 ... 0 ... 19 ].
3197 */ 3216 */
3198int task_nice(const struct task_struct *p) 3217int task_nice(const struct task_struct *p)
3199{ 3218{
@@ -3204,6 +3223,8 @@ EXPORT_SYMBOL(task_nice);
3204/** 3223/**
3205 * idle_cpu - is a given cpu idle currently? 3224 * idle_cpu - is a given cpu idle currently?
3206 * @cpu: the processor in question. 3225 * @cpu: the processor in question.
3226 *
3227 * Return: 1 if the CPU is currently idle. 0 otherwise.
3207 */ 3228 */
3208int idle_cpu(int cpu) 3229int idle_cpu(int cpu)
3209{ 3230{
@@ -3226,6 +3247,8 @@ int idle_cpu(int cpu)
3226/** 3247/**
3227 * idle_task - return the idle task for a given cpu. 3248 * idle_task - return the idle task for a given cpu.
3228 * @cpu: the processor in question. 3249 * @cpu: the processor in question.
3250 *
3251 * Return: The idle task for the cpu @cpu.
3229 */ 3252 */
3230struct task_struct *idle_task(int cpu) 3253struct task_struct *idle_task(int cpu)
3231{ 3254{
@@ -3235,6 +3258,8 @@ struct task_struct *idle_task(int cpu)
3235/** 3258/**
3236 * find_process_by_pid - find a process with a matching PID value. 3259 * find_process_by_pid - find a process with a matching PID value.
3237 * @pid: the pid in question. 3260 * @pid: the pid in question.
3261 *
3262 * The task of @pid, if found. %NULL otherwise.
3238 */ 3263 */
3239static struct task_struct *find_process_by_pid(pid_t pid) 3264static struct task_struct *find_process_by_pid(pid_t pid)
3240{ 3265{
@@ -3432,6 +3457,8 @@ recheck:
3432 * @policy: new policy. 3457 * @policy: new policy.
3433 * @param: structure containing the new RT priority. 3458 * @param: structure containing the new RT priority.
3434 * 3459 *
3460 * Return: 0 on success. An error code otherwise.
3461 *
3435 * NOTE that the task may be already dead. 3462 * NOTE that the task may be already dead.
3436 */ 3463 */
3437int sched_setscheduler(struct task_struct *p, int policy, 3464int sched_setscheduler(struct task_struct *p, int policy,
@@ -3451,6 +3478,8 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
3451 * current context has permission. For example, this is needed in 3478 * current context has permission. For example, this is needed in
3452 * stop_machine(): we create temporary high priority worker threads, 3479 * stop_machine(): we create temporary high priority worker threads,
3453 * but our caller might not have that capability. 3480 * but our caller might not have that capability.
3481 *
3482 * Return: 0 on success. An error code otherwise.
3454 */ 3483 */
3455int sched_setscheduler_nocheck(struct task_struct *p, int policy, 3484int sched_setscheduler_nocheck(struct task_struct *p, int policy,
3456 const struct sched_param *param) 3485 const struct sched_param *param)
@@ -3485,6 +3514,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
3485 * @pid: the pid in question. 3514 * @pid: the pid in question.
3486 * @policy: new policy. 3515 * @policy: new policy.
3487 * @param: structure containing the new RT priority. 3516 * @param: structure containing the new RT priority.
3517 *
3518 * Return: 0 on success. An error code otherwise.
3488 */ 3519 */
3489SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, 3520SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
3490 struct sched_param __user *, param) 3521 struct sched_param __user *, param)
@@ -3500,6 +3531,8 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
3500 * sys_sched_setparam - set/change the RT priority of a thread 3531 * sys_sched_setparam - set/change the RT priority of a thread
3501 * @pid: the pid in question. 3532 * @pid: the pid in question.
3502 * @param: structure containing the new RT priority. 3533 * @param: structure containing the new RT priority.
3534 *
3535 * Return: 0 on success. An error code otherwise.
3503 */ 3536 */
3504SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) 3537SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
3505{ 3538{
@@ -3509,6 +3542,9 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
3509/** 3542/**
3510 * sys_sched_getscheduler - get the policy (scheduling class) of a thread 3543 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
3511 * @pid: the pid in question. 3544 * @pid: the pid in question.
3545 *
3546 * Return: On success, the policy of the thread. Otherwise, a negative error
3547 * code.
3512 */ 3548 */
3513SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) 3549SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
3514{ 3550{
@@ -3535,6 +3571,9 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
3535 * sys_sched_getparam - get the RT priority of a thread 3571 * sys_sched_getparam - get the RT priority of a thread
3536 * @pid: the pid in question. 3572 * @pid: the pid in question.
3537 * @param: structure containing the RT priority. 3573 * @param: structure containing the RT priority.
3574 *
3575 * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
3576 * code.
3538 */ 3577 */
3539SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) 3578SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
3540{ 3579{
@@ -3659,6 +3698,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
3659 * @pid: pid of the process 3698 * @pid: pid of the process
3660 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 3699 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
3661 * @user_mask_ptr: user-space pointer to the new cpu mask 3700 * @user_mask_ptr: user-space pointer to the new cpu mask
3701 *
3702 * Return: 0 on success. An error code otherwise.
3662 */ 3703 */
3663SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, 3704SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
3664 unsigned long __user *, user_mask_ptr) 3705 unsigned long __user *, user_mask_ptr)
@@ -3710,6 +3751,8 @@ out_unlock:
3710 * @pid: pid of the process 3751 * @pid: pid of the process
3711 * @len: length in bytes of the bitmask pointed to by user_mask_ptr 3752 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
3712 * @user_mask_ptr: user-space pointer to hold the current cpu mask 3753 * @user_mask_ptr: user-space pointer to hold the current cpu mask
3754 *
3755 * Return: 0 on success. An error code otherwise.
3713 */ 3756 */
3714SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, 3757SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
3715 unsigned long __user *, user_mask_ptr) 3758 unsigned long __user *, user_mask_ptr)
@@ -3744,6 +3787,8 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
3744 * 3787 *
3745 * This function yields the current CPU to other tasks. If there are no 3788 * This function yields the current CPU to other tasks. If there are no
3746 * other threads running on this CPU then this function will return. 3789 * other threads running on this CPU then this function will return.
3790 *
3791 * Return: 0.
3747 */ 3792 */
3748SYSCALL_DEFINE0(sched_yield) 3793SYSCALL_DEFINE0(sched_yield)
3749{ 3794{
@@ -3869,7 +3914,7 @@ EXPORT_SYMBOL(yield);
3869 * It's the caller's job to ensure that the target task struct 3914 * It's the caller's job to ensure that the target task struct
3870 * can't go away on us before we can do any checks. 3915 * can't go away on us before we can do any checks.
3871 * 3916 *
3872 * Returns: 3917 * Return:
3873 * true (>0) if we indeed boosted the target task. 3918 * true (>0) if we indeed boosted the target task.
3874 * false (0) if we failed to boost the target. 3919 * false (0) if we failed to boost the target.
3875 * -ESRCH if there's no task to yield to. 3920 * -ESRCH if there's no task to yield to.
@@ -3972,8 +4017,9 @@ long __sched io_schedule_timeout(long timeout)
3972 * sys_sched_get_priority_max - return maximum RT priority. 4017 * sys_sched_get_priority_max - return maximum RT priority.
3973 * @policy: scheduling class. 4018 * @policy: scheduling class.
3974 * 4019 *
3975 * this syscall returns the maximum rt_priority that can be used 4020 * Return: On success, this syscall returns the maximum
3976 * by a given scheduling class. 4021 * rt_priority that can be used by a given scheduling class.
4022 * On failure, a negative error code is returned.
3977 */ 4023 */
3978SYSCALL_DEFINE1(sched_get_priority_max, int, policy) 4024SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
3979{ 4025{
@@ -3997,8 +4043,9 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
3997 * sys_sched_get_priority_min - return minimum RT priority. 4043 * sys_sched_get_priority_min - return minimum RT priority.
3998 * @policy: scheduling class. 4044 * @policy: scheduling class.
3999 * 4045 *
4000 * this syscall returns the minimum rt_priority that can be used 4046 * Return: On success, this syscall returns the minimum
4001 * by a given scheduling class. 4047 * rt_priority that can be used by a given scheduling class.
4048 * On failure, a negative error code is returned.
4002 */ 4049 */
4003SYSCALL_DEFINE1(sched_get_priority_min, int, policy) 4050SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
4004{ 4051{
@@ -4024,6 +4071,9 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
4024 * 4071 *
4025 * this syscall writes the default timeslice value of a given process 4072 * this syscall writes the default timeslice value of a given process
4026 * into the user-space timespec buffer. A value of '0' means infinity. 4073 * into the user-space timespec buffer. A value of '0' means infinity.
4074 *
4075 * Return: On success, 0 and the timeslice is in @interval. Otherwise,
4076 * an error code.
4027 */ 4077 */
4028SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, 4078SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
4029 struct timespec __user *, interval) 4079 struct timespec __user *, interval)
@@ -6632,6 +6682,8 @@ void normalize_rt_tasks(void)
6632 * @cpu: the processor in question. 6682 * @cpu: the processor in question.
6633 * 6683 *
6634 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6684 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6685 *
6686 * Return: The current task for @cpu.
6635 */ 6687 */
6636struct task_struct *curr_task(int cpu) 6688struct task_struct *curr_task(int cpu)
6637{ 6689{
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 1095e878a46f..8b836b376d91 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -62,7 +62,7 @@ static int convert_prio(int prio)
62 * any discrepancies created by racing against the uncertainty of the current 62 * any discrepancies created by racing against the uncertainty of the current
63 * priority configuration. 63 * priority configuration.
64 * 64 *
65 * Returns: (int)bool - CPUs were found 65 * Return: (int)bool - CPUs were found
66 */ 66 */
67int cpupri_find(struct cpupri *cp, struct task_struct *p, 67int cpupri_find(struct cpupri *cp, struct task_struct *p,
68 struct cpumask *lowest_mask) 68 struct cpumask *lowest_mask)
@@ -203,7 +203,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
203 * cpupri_init - initialize the cpupri structure 203 * cpupri_init - initialize the cpupri structure
204 * @cp: The cpupri context 204 * @cp: The cpupri context
205 * 205 *
206 * Returns: -ENOMEM if memory fails. 206 * Return: -ENOMEM on memory allocation failure.
207 */ 207 */
208int cpupri_init(struct cpupri *cp) 208int cpupri_init(struct cpupri *cp)
209{ 209{
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bb456f44b7b1..68f1609ca149 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -851,7 +851,7 @@ void task_numa_fault(int node, int pages, bool migrated)
851{ 851{
852 struct task_struct *p = current; 852 struct task_struct *p = current;
853 853
854 if (!sched_feat_numa(NUMA)) 854 if (!numabalancing_enabled)
855 return; 855 return;
856 856
857 /* FIXME: Allocate task-specific structure for placement policy here */ 857 /* FIXME: Allocate task-specific structure for placement policy here */
@@ -2032,6 +2032,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
2032 */ 2032 */
2033 update_entity_load_avg(curr, 1); 2033 update_entity_load_avg(curr, 1);
2034 update_cfs_rq_blocked_load(cfs_rq, 1); 2034 update_cfs_rq_blocked_load(cfs_rq, 1);
2035 update_cfs_shares(cfs_rq);
2035 2036
2036#ifdef CONFIG_SCHED_HRTICK 2037#ifdef CONFIG_SCHED_HRTICK
2037 /* 2038 /*
@@ -4280,6 +4281,8 @@ struct sg_lb_stats {
4280 * get_sd_load_idx - Obtain the load index for a given sched domain. 4281 * get_sd_load_idx - Obtain the load index for a given sched domain.
4281 * @sd: The sched_domain whose load_idx is to be obtained. 4282 * @sd: The sched_domain whose load_idx is to be obtained.
4282 * @idle: The Idle status of the CPU for whose sd load_icx is obtained. 4283 * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
4284 *
4285 * Return: The load index.
4283 */ 4286 */
4284static inline int get_sd_load_idx(struct sched_domain *sd, 4287static inline int get_sd_load_idx(struct sched_domain *sd,
4285 enum cpu_idle_type idle) 4288 enum cpu_idle_type idle)
@@ -4574,6 +4577,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
4574 * 4577 *
4575 * Determine if @sg is a busier group than the previously selected 4578 * Determine if @sg is a busier group than the previously selected
4576 * busiest group. 4579 * busiest group.
4580 *
4581 * Return: %true if @sg is a busier group than the previously selected
4582 * busiest group. %false otherwise.
4577 */ 4583 */
4578static bool update_sd_pick_busiest(struct lb_env *env, 4584static bool update_sd_pick_busiest(struct lb_env *env,
4579 struct sd_lb_stats *sds, 4585 struct sd_lb_stats *sds,
@@ -4691,7 +4697,7 @@ static inline void update_sd_lb_stats(struct lb_env *env,
4691 * assuming lower CPU number will be equivalent to lower a SMT thread 4697 * assuming lower CPU number will be equivalent to lower a SMT thread
4692 * number. 4698 * number.
4693 * 4699 *
4694 * Returns 1 when packing is required and a task should be moved to 4700 * Return: 1 when packing is required and a task should be moved to
4695 * this CPU. The amount of the imbalance is returned in *imbalance. 4701 * this CPU. The amount of the imbalance is returned in *imbalance.
4696 * 4702 *
4697 * @env: The load balancing environment. 4703 * @env: The load balancing environment.
@@ -4869,7 +4875,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4869 * @balance: Pointer to a variable indicating if this_cpu 4875 * @balance: Pointer to a variable indicating if this_cpu
4870 * is the appropriate cpu to perform load balancing at this_level. 4876 * is the appropriate cpu to perform load balancing at this_level.
4871 * 4877 *
4872 * Returns: - the busiest group if imbalance exists. 4878 * Return: - The busiest group if imbalance exists.
4873 * - If no imbalance and user has opted for power-savings balance, 4879 * - If no imbalance and user has opted for power-savings balance,
4874 * return the least loaded group whose CPUs can be 4880 * return the least loaded group whose CPUs can be
4875 * put to idle by rebalancing its tasks onto our group. 4881 * put to idle by rebalancing its tasks onto our group.
@@ -5786,7 +5792,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
5786 entity_tick(cfs_rq, se, queued); 5792 entity_tick(cfs_rq, se, queued);
5787 } 5793 }
5788 5794
5789 if (sched_feat_numa(NUMA)) 5795 if (numabalancing_enabled)
5790 task_tick_numa(rq, curr); 5796 task_tick_numa(rq, curr);
5791 5797
5792 update_rq_runnable_avg(rq, 1); 5798 update_rq_runnable_avg(rq, 1);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ac09d98490aa..07f6fc468e17 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2346,7 +2346,11 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2346 int write, void *data) 2346 int write, void *data)
2347{ 2347{
2348 if (write) { 2348 if (write) {
2349 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp); 2349 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2350
2351 if (jif > INT_MAX)
2352 return 1;
2353 *valp = (int)jif;
2350 } else { 2354 } else {
2351 int val = *valp; 2355 int val = *valp;
2352 unsigned long lval; 2356 unsigned long lval;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a326f27d7f09..0b479a6a22bb 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -121,7 +121,7 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
121 BUG_ON(bits > 32); 121 BUG_ON(bits > 32);
122 WARN_ON(!irqs_disabled()); 122 WARN_ON(!irqs_disabled());
123 read_sched_clock = read; 123 read_sched_clock = read;
124 sched_clock_mask = (1 << bits) - 1; 124 sched_clock_mask = (1ULL << bits) - 1;
125 cd.rate = rate; 125 cd.rate = rate;
126 126
127 /* calculate the mult/shift to convert counter ticks to ns. */ 127 /* calculate the mult/shift to convert counter ticks to ns. */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e80183f4a6c4..e8a1516cc0a3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -182,7 +182,8 @@ static bool can_stop_full_tick(void)
182 * Don't allow the user to think they can get 182 * Don't allow the user to think they can get
183 * full NO_HZ with this machine. 183 * full NO_HZ with this machine.
184 */ 184 */
185 WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock"); 185 WARN_ONCE(have_nohz_full_mask,
186 "NO_HZ FULL will not work with unstable sched clock");
186 return false; 187 return false;
187 } 188 }
188#endif 189#endif
@@ -343,8 +344,6 @@ static int tick_nohz_init_all(void)
343 344
344void __init tick_nohz_init(void) 345void __init tick_nohz_init(void)
345{ 346{
346 int cpu;
347
348 if (!have_nohz_full_mask) { 347 if (!have_nohz_full_mask) {
349 if (tick_nohz_init_all() < 0) 348 if (tick_nohz_init_all() < 0)
350 return; 349 return;
@@ -827,13 +826,10 @@ void tick_nohz_irq_exit(void)
827{ 826{
828 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 827 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
829 828
830 if (ts->inidle) { 829 if (ts->inidle)
831 /* Cancel the timer because CPU already waken up from the C-states*/
832 menu_hrtimer_cancel();
833 __tick_nohz_idle_enter(ts); 830 __tick_nohz_idle_enter(ts);
834 } else { 831 else
835 tick_nohz_full_stop_tick(ts); 832 tick_nohz_full_stop_tick(ts);
836 }
837} 833}
838 834
839/** 835/**
@@ -931,8 +927,6 @@ void tick_nohz_idle_exit(void)
931 927
932 ts->inidle = 0; 928 ts->inidle = 0;
933 929
934 /* Cancel the timer because CPU already waken up from the C-states*/
935 menu_hrtimer_cancel();
936 if (ts->idle_active || ts->tick_stopped) 930 if (ts->idle_active || ts->tick_stopped)
937 now = ktime_get(); 931 now = ktime_get();
938 932
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 67708f46baae..a6d098c6df3f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1441,12 +1441,22 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
1441 * the hashes are freed with call_rcu_sched(). 1441 * the hashes are freed with call_rcu_sched().
1442 */ 1442 */
1443static int 1443static int
1444ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) 1444ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
1445{ 1445{
1446 struct ftrace_hash *filter_hash; 1446 struct ftrace_hash *filter_hash;
1447 struct ftrace_hash *notrace_hash; 1447 struct ftrace_hash *notrace_hash;
1448 int ret; 1448 int ret;
1449 1449
1450#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
1451 /*
1452 * There's a small race when adding ops that the ftrace handler
1453 * that wants regs, may be called without them. We can not
1454 * allow that handler to be called if regs is NULL.
1455 */
1456 if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS))
1457 return 0;
1458#endif
1459
1450 filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); 1460 filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
1451 notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); 1461 notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
1452 1462
@@ -2159,12 +2169,57 @@ static cycle_t ftrace_update_time;
2159static unsigned long ftrace_update_cnt; 2169static unsigned long ftrace_update_cnt;
2160unsigned long ftrace_update_tot_cnt; 2170unsigned long ftrace_update_tot_cnt;
2161 2171
2162static int ops_traces_mod(struct ftrace_ops *ops) 2172static inline int ops_traces_mod(struct ftrace_ops *ops)
2163{ 2173{
2164 struct ftrace_hash *hash; 2174 /*
2175 * Filter_hash being empty will default to trace module.
2176 * But notrace hash requires a test of individual module functions.
2177 */
2178 return ftrace_hash_empty(ops->filter_hash) &&
2179 ftrace_hash_empty(ops->notrace_hash);
2180}
2181
2182/*
2183 * Check if the current ops references the record.
2184 *
2185 * If the ops traces all functions, then it was already accounted for.
2186 * If the ops does not trace the current record function, skip it.
2187 * If the ops ignores the function via notrace filter, skip it.
2188 */
2189static inline bool
2190ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
2191{
2192 /* If ops isn't enabled, ignore it */
2193 if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
2194 return 0;
2195
2196 /* If ops traces all mods, we already accounted for it */
2197 if (ops_traces_mod(ops))
2198 return 0;
2199
2200 /* The function must be in the filter */
2201 if (!ftrace_hash_empty(ops->filter_hash) &&
2202 !ftrace_lookup_ip(ops->filter_hash, rec->ip))
2203 return 0;
2204
2205 /* If in notrace hash, we ignore it too */
2206 if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
2207 return 0;
2165 2208
2166 hash = ops->filter_hash; 2209 return 1;
2167 return ftrace_hash_empty(hash); 2210}
2211
2212static int referenced_filters(struct dyn_ftrace *rec)
2213{
2214 struct ftrace_ops *ops;
2215 int cnt = 0;
2216
2217 for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
2218 if (ops_references_rec(ops, rec))
2219 cnt++;
2220 }
2221
2222 return cnt;
2168} 2223}
2169 2224
2170static int ftrace_update_code(struct module *mod) 2225static int ftrace_update_code(struct module *mod)
@@ -2173,6 +2228,7 @@ static int ftrace_update_code(struct module *mod)
2173 struct dyn_ftrace *p; 2228 struct dyn_ftrace *p;
2174 cycle_t start, stop; 2229 cycle_t start, stop;
2175 unsigned long ref = 0; 2230 unsigned long ref = 0;
2231 bool test = false;
2176 int i; 2232 int i;
2177 2233
2178 /* 2234 /*
@@ -2186,9 +2242,12 @@ static int ftrace_update_code(struct module *mod)
2186 2242
2187 for (ops = ftrace_ops_list; 2243 for (ops = ftrace_ops_list;
2188 ops != &ftrace_list_end; ops = ops->next) { 2244 ops != &ftrace_list_end; ops = ops->next) {
2189 if (ops->flags & FTRACE_OPS_FL_ENABLED && 2245 if (ops->flags & FTRACE_OPS_FL_ENABLED) {
2190 ops_traces_mod(ops)) 2246 if (ops_traces_mod(ops))
2191 ref++; 2247 ref++;
2248 else
2249 test = true;
2250 }
2192 } 2251 }
2193 } 2252 }
2194 2253
@@ -2198,12 +2257,16 @@ static int ftrace_update_code(struct module *mod)
2198 for (pg = ftrace_new_pgs; pg; pg = pg->next) { 2257 for (pg = ftrace_new_pgs; pg; pg = pg->next) {
2199 2258
2200 for (i = 0; i < pg->index; i++) { 2259 for (i = 0; i < pg->index; i++) {
2260 int cnt = ref;
2261
2201 /* If something went wrong, bail without enabling anything */ 2262 /* If something went wrong, bail without enabling anything */
2202 if (unlikely(ftrace_disabled)) 2263 if (unlikely(ftrace_disabled))
2203 return -1; 2264 return -1;
2204 2265
2205 p = &pg->records[i]; 2266 p = &pg->records[i];
2206 p->flags = ref; 2267 if (test)
2268 cnt += referenced_filters(p);
2269 p->flags = cnt;
2207 2270
2208 /* 2271 /*
2209 * Do the initial record conversion from mcount jump 2272 * Do the initial record conversion from mcount jump
@@ -2223,7 +2286,7 @@ static int ftrace_update_code(struct module *mod)
2223 * conversion puts the module to the correct state, thus 2286 * conversion puts the module to the correct state, thus
2224 * passing the ftrace_make_call check. 2287 * passing the ftrace_make_call check.
2225 */ 2288 */
2226 if (ftrace_start_up && ref) { 2289 if (ftrace_start_up && cnt) {
2227 int failed = __ftrace_replace_code(p, 1); 2290 int failed = __ftrace_replace_code(p, 1);
2228 if (failed) 2291 if (failed)
2229 ftrace_bug(failed, p->ip); 2292 ftrace_bug(failed, p->ip);
@@ -3374,6 +3437,12 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
3374 return add_hash_entry(hash, ip); 3437 return add_hash_entry(hash, ip);
3375} 3438}
3376 3439
3440static void ftrace_ops_update_code(struct ftrace_ops *ops)
3441{
3442 if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled)
3443 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
3444}
3445
3377static int 3446static int
3378ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, 3447ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3379 unsigned long ip, int remove, int reset, int enable) 3448 unsigned long ip, int remove, int reset, int enable)
@@ -3416,9 +3485,8 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3416 3485
3417 mutex_lock(&ftrace_lock); 3486 mutex_lock(&ftrace_lock);
3418 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 3487 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
3419 if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED 3488 if (!ret)
3420 && ftrace_enabled) 3489 ftrace_ops_update_code(ops);
3421 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
3422 3490
3423 mutex_unlock(&ftrace_lock); 3491 mutex_unlock(&ftrace_lock);
3424 3492
@@ -3645,9 +3713,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
3645 mutex_lock(&ftrace_lock); 3713 mutex_lock(&ftrace_lock);
3646 ret = ftrace_hash_move(iter->ops, filter_hash, 3714 ret = ftrace_hash_move(iter->ops, filter_hash,
3647 orig_hash, iter->hash); 3715 orig_hash, iter->hash);
3648 if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) 3716 if (!ret)
3649 && ftrace_enabled) 3717 ftrace_ops_update_code(iter->ops);
3650 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
3651 3718
3652 mutex_unlock(&ftrace_lock); 3719 mutex_unlock(&ftrace_lock);
3653 } 3720 }
@@ -4218,7 +4285,7 @@ static inline void ftrace_startup_enable(int command) { }
4218# define ftrace_shutdown_sysctl() do { } while (0) 4285# define ftrace_shutdown_sysctl() do { } while (0)
4219 4286
4220static inline int 4287static inline int
4221ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) 4288ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
4222{ 4289{
4223 return 1; 4290 return 1;
4224} 4291}
@@ -4241,7 +4308,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4241 do_for_each_ftrace_op(op, ftrace_control_list) { 4308 do_for_each_ftrace_op(op, ftrace_control_list) {
4242 if (!(op->flags & FTRACE_OPS_FL_STUB) && 4309 if (!(op->flags & FTRACE_OPS_FL_STUB) &&
4243 !ftrace_function_local_disabled(op) && 4310 !ftrace_function_local_disabled(op) &&
4244 ftrace_ops_test(op, ip)) 4311 ftrace_ops_test(op, ip, regs))
4245 op->func(ip, parent_ip, op, regs); 4312 op->func(ip, parent_ip, op, regs);
4246 } while_for_each_ftrace_op(op); 4313 } while_for_each_ftrace_op(op);
4247 trace_recursion_clear(TRACE_CONTROL_BIT); 4314 trace_recursion_clear(TRACE_CONTROL_BIT);
@@ -4274,7 +4341,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4274 */ 4341 */
4275 preempt_disable_notrace(); 4342 preempt_disable_notrace();
4276 do_for_each_ftrace_op(op, ftrace_ops_list) { 4343 do_for_each_ftrace_op(op, ftrace_ops_list) {
4277 if (ftrace_ops_test(op, ip)) 4344 if (ftrace_ops_test(op, ip, regs))
4278 op->func(ip, parent_ip, op, regs); 4345 op->func(ip, parent_ip, op, regs);
4279 } while_for_each_ftrace_op(op); 4346 } while_for_each_ftrace_op(op);
4280 preempt_enable_notrace(); 4347 preempt_enable_notrace();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e444ff88f0a4..cc2f66f68dc5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -36,11 +36,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
36{ 36{
37 int ret; 37 int ret;
38 38
39 ret = trace_seq_printf(s, "# compressed entry header\n"); 39 ret = trace_seq_puts(s, "# compressed entry header\n");
40 ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); 40 ret = trace_seq_puts(s, "\ttype_len : 5 bits\n");
41 ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); 41 ret = trace_seq_puts(s, "\ttime_delta : 27 bits\n");
42 ret = trace_seq_printf(s, "\tarray : 32 bits\n"); 42 ret = trace_seq_puts(s, "\tarray : 32 bits\n");
43 ret = trace_seq_printf(s, "\n"); 43 ret = trace_seq_putc(s, '\n');
44 ret = trace_seq_printf(s, "\tpadding : type == %d\n", 44 ret = trace_seq_printf(s, "\tpadding : type == %d\n",
45 RINGBUF_TYPE_PADDING); 45 RINGBUF_TYPE_PADDING);
46 ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", 46 ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
@@ -1066,7 +1066,7 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1066} 1066}
1067 1067
1068/** 1068/**
1069 * check_pages - integrity check of buffer pages 1069 * rb_check_pages - integrity check of buffer pages
1070 * @cpu_buffer: CPU buffer with pages to test 1070 * @cpu_buffer: CPU buffer with pages to test
1071 * 1071 *
1072 * As a safety measure we check to make sure the data pages have not 1072 * As a safety measure we check to make sure the data pages have not
@@ -1258,7 +1258,7 @@ static int rb_cpu_notify(struct notifier_block *self,
1258#endif 1258#endif
1259 1259
1260/** 1260/**
1261 * ring_buffer_alloc - allocate a new ring_buffer 1261 * __ring_buffer_alloc - allocate a new ring_buffer
1262 * @size: the size in bytes per cpu that is needed. 1262 * @size: the size in bytes per cpu that is needed.
1263 * @flags: attributes to set for the ring buffer. 1263 * @flags: attributes to set for the ring buffer.
1264 * 1264 *
@@ -1607,6 +1607,7 @@ static void update_pages_handler(struct work_struct *work)
1607 * ring_buffer_resize - resize the ring buffer 1607 * ring_buffer_resize - resize the ring buffer
1608 * @buffer: the buffer to resize. 1608 * @buffer: the buffer to resize.
1609 * @size: the new size. 1609 * @size: the new size.
1610 * @cpu_id: the cpu buffer to resize
1610 * 1611 *
1611 * Minimum size is 2 * BUF_PAGE_SIZE. 1612 * Minimum size is 2 * BUF_PAGE_SIZE.
1612 * 1613 *
@@ -3956,11 +3957,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
3956 * expected. 3957 * expected.
3957 * 3958 *
3958 * After a sequence of ring_buffer_read_prepare calls, the user is 3959 * After a sequence of ring_buffer_read_prepare calls, the user is
3959 * expected to make at least one call to ring_buffer_prepare_sync. 3960 * expected to make at least one call to ring_buffer_read_prepare_sync.
3960 * Afterwards, ring_buffer_read_start is invoked to get things going 3961 * Afterwards, ring_buffer_read_start is invoked to get things going
3961 * for real. 3962 * for real.
3962 * 3963 *
3963 * This overall must be paired with ring_buffer_finish. 3964 * This overall must be paired with ring_buffer_read_finish.
3964 */ 3965 */
3965struct ring_buffer_iter * 3966struct ring_buffer_iter *
3966ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu) 3967ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
@@ -4009,7 +4010,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
4009 * an intervening ring_buffer_read_prepare_sync must have been 4010 * an intervening ring_buffer_read_prepare_sync must have been
4010 * performed. 4011 * performed.
4011 * 4012 *
4012 * Must be paired with ring_buffer_finish. 4013 * Must be paired with ring_buffer_read_finish.
4013 */ 4014 */
4014void 4015void
4015ring_buffer_read_start(struct ring_buffer_iter *iter) 4016ring_buffer_read_start(struct ring_buffer_iter *iter)
@@ -4031,7 +4032,7 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
4031EXPORT_SYMBOL_GPL(ring_buffer_read_start); 4032EXPORT_SYMBOL_GPL(ring_buffer_read_start);
4032 4033
4033/** 4034/**
4034 * ring_buffer_finish - finish reading the iterator of the buffer 4035 * ring_buffer_read_finish - finish reading the iterator of the buffer
4035 * @iter: The iterator retrieved by ring_buffer_start 4036 * @iter: The iterator retrieved by ring_buffer_start
4036 * 4037 *
4037 * This re-enables the recording to the buffer, and frees the 4038 * This re-enables the recording to the buffer, and frees the
@@ -4346,6 +4347,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4346/** 4347/**
4347 * ring_buffer_alloc_read_page - allocate a page to read from buffer 4348 * ring_buffer_alloc_read_page - allocate a page to read from buffer
4348 * @buffer: the buffer to allocate for. 4349 * @buffer: the buffer to allocate for.
4350 * @cpu: the cpu buffer to allocate.
4349 * 4351 *
4350 * This function is used in conjunction with ring_buffer_read_page. 4352 * This function is used in conjunction with ring_buffer_read_page.
4351 * When reading a full page from the ring buffer, these functions 4353 * When reading a full page from the ring buffer, these functions
@@ -4403,7 +4405,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4403 * to swap with a page in the ring buffer. 4405 * to swap with a page in the ring buffer.
4404 * 4406 *
4405 * for example: 4407 * for example:
4406 * rpage = ring_buffer_alloc_read_page(buffer); 4408 * rpage = ring_buffer_alloc_read_page(buffer, cpu);
4407 * if (!rpage) 4409 * if (!rpage)
4408 * return error; 4410 * return error;
4409 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); 4411 * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0cd500bffd9b..496f94d57698 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -243,20 +243,25 @@ int filter_current_check_discard(struct ring_buffer *buffer,
243} 243}
244EXPORT_SYMBOL_GPL(filter_current_check_discard); 244EXPORT_SYMBOL_GPL(filter_current_check_discard);
245 245
246cycle_t ftrace_now(int cpu) 246cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
247{ 247{
248 u64 ts; 248 u64 ts;
249 249
250 /* Early boot up does not have a buffer yet */ 250 /* Early boot up does not have a buffer yet */
251 if (!global_trace.trace_buffer.buffer) 251 if (!buf->buffer)
252 return trace_clock_local(); 252 return trace_clock_local();
253 253
254 ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu); 254 ts = ring_buffer_time_stamp(buf->buffer, cpu);
255 ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts); 255 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
256 256
257 return ts; 257 return ts;
258} 258}
259 259
260cycle_t ftrace_now(int cpu)
261{
262 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
263}
264
260/** 265/**
261 * tracing_is_enabled - Show if global_trace has been disabled 266 * tracing_is_enabled - Show if global_trace has been disabled
262 * 267 *
@@ -1211,7 +1216,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
1211 /* Make sure all commits have finished */ 1216 /* Make sure all commits have finished */
1212 synchronize_sched(); 1217 synchronize_sched();
1213 1218
1214 buf->time_start = ftrace_now(buf->cpu); 1219 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1215 1220
1216 for_each_online_cpu(cpu) 1221 for_each_online_cpu(cpu)
1217 ring_buffer_reset_cpu(buffer, cpu); 1222 ring_buffer_reset_cpu(buffer, cpu);
@@ -1219,23 +1224,17 @@ void tracing_reset_online_cpus(struct trace_buffer *buf)
1219 ring_buffer_record_enable(buffer); 1224 ring_buffer_record_enable(buffer);
1220} 1225}
1221 1226
1222void tracing_reset_current(int cpu) 1227/* Must have trace_types_lock held */
1223{
1224 tracing_reset(&global_trace.trace_buffer, cpu);
1225}
1226
1227void tracing_reset_all_online_cpus(void) 1228void tracing_reset_all_online_cpus(void)
1228{ 1229{
1229 struct trace_array *tr; 1230 struct trace_array *tr;
1230 1231
1231 mutex_lock(&trace_types_lock);
1232 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 1232 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1233 tracing_reset_online_cpus(&tr->trace_buffer); 1233 tracing_reset_online_cpus(&tr->trace_buffer);
1234#ifdef CONFIG_TRACER_MAX_TRACE 1234#ifdef CONFIG_TRACER_MAX_TRACE
1235 tracing_reset_online_cpus(&tr->max_buffer); 1235 tracing_reset_online_cpus(&tr->max_buffer);
1236#endif 1236#endif
1237 } 1237 }
1238 mutex_unlock(&trace_types_lock);
1239} 1238}
1240 1239
1241#define SAVED_CMDLINES 128 1240#define SAVED_CMDLINES 128
@@ -2843,6 +2842,17 @@ static int s_show(struct seq_file *m, void *v)
2843 return 0; 2842 return 0;
2844} 2843}
2845 2844
2845/*
2846 * Should be used after trace_array_get(), trace_types_lock
2847 * ensures that i_cdev was already initialized.
2848 */
2849static inline int tracing_get_cpu(struct inode *inode)
2850{
2851 if (inode->i_cdev) /* See trace_create_cpu_file() */
2852 return (long)inode->i_cdev - 1;
2853 return RING_BUFFER_ALL_CPUS;
2854}
2855
2846static const struct seq_operations tracer_seq_ops = { 2856static const struct seq_operations tracer_seq_ops = {
2847 .start = s_start, 2857 .start = s_start,
2848 .next = s_next, 2858 .next = s_next,
@@ -2851,9 +2861,9 @@ static const struct seq_operations tracer_seq_ops = {
2851}; 2861};
2852 2862
2853static struct trace_iterator * 2863static struct trace_iterator *
2854__tracing_open(struct trace_array *tr, struct trace_cpu *tc, 2864__tracing_open(struct inode *inode, struct file *file, bool snapshot)
2855 struct inode *inode, struct file *file, bool snapshot)
2856{ 2865{
2866 struct trace_array *tr = inode->i_private;
2857 struct trace_iterator *iter; 2867 struct trace_iterator *iter;
2858 int cpu; 2868 int cpu;
2859 2869
@@ -2894,8 +2904,8 @@ __tracing_open(struct trace_array *tr, struct trace_cpu *tc,
2894 iter->trace_buffer = &tr->trace_buffer; 2904 iter->trace_buffer = &tr->trace_buffer;
2895 iter->snapshot = snapshot; 2905 iter->snapshot = snapshot;
2896 iter->pos = -1; 2906 iter->pos = -1;
2907 iter->cpu_file = tracing_get_cpu(inode);
2897 mutex_init(&iter->mutex); 2908 mutex_init(&iter->mutex);
2898 iter->cpu_file = tc->cpu;
2899 2909
2900 /* Notify the tracer early; before we stop tracing. */ 2910 /* Notify the tracer early; before we stop tracing. */
2901 if (iter->trace && iter->trace->open) 2911 if (iter->trace && iter->trace->open)
@@ -2971,45 +2981,22 @@ static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
2971 filp->private_data = inode->i_private; 2981 filp->private_data = inode->i_private;
2972 2982
2973 return 0; 2983 return 0;
2974
2975}
2976
2977static int tracing_open_generic_tc(struct inode *inode, struct file *filp)
2978{
2979 struct trace_cpu *tc = inode->i_private;
2980 struct trace_array *tr = tc->tr;
2981
2982 if (tracing_disabled)
2983 return -ENODEV;
2984
2985 if (trace_array_get(tr) < 0)
2986 return -ENODEV;
2987
2988 filp->private_data = inode->i_private;
2989
2990 return 0;
2991
2992} 2984}
2993 2985
2994static int tracing_release(struct inode *inode, struct file *file) 2986static int tracing_release(struct inode *inode, struct file *file)
2995{ 2987{
2988 struct trace_array *tr = inode->i_private;
2996 struct seq_file *m = file->private_data; 2989 struct seq_file *m = file->private_data;
2997 struct trace_iterator *iter; 2990 struct trace_iterator *iter;
2998 struct trace_array *tr;
2999 int cpu; 2991 int cpu;
3000 2992
3001 /* Writes do not use seq_file, need to grab tr from inode */
3002 if (!(file->f_mode & FMODE_READ)) { 2993 if (!(file->f_mode & FMODE_READ)) {
3003 struct trace_cpu *tc = inode->i_private; 2994 trace_array_put(tr);
3004
3005 trace_array_put(tc->tr);
3006 return 0; 2995 return 0;
3007 } 2996 }
3008 2997
2998 /* Writes do not use seq_file */
3009 iter = m->private; 2999 iter = m->private;
3010 tr = iter->tr;
3011 trace_array_put(tr);
3012
3013 mutex_lock(&trace_types_lock); 3000 mutex_lock(&trace_types_lock);
3014 3001
3015 for_each_tracing_cpu(cpu) { 3002 for_each_tracing_cpu(cpu) {
@@ -3023,6 +3010,9 @@ static int tracing_release(struct inode *inode, struct file *file)
3023 if (!iter->snapshot) 3010 if (!iter->snapshot)
3024 /* reenable tracing if it was previously enabled */ 3011 /* reenable tracing if it was previously enabled */
3025 tracing_start_tr(tr); 3012 tracing_start_tr(tr);
3013
3014 __trace_array_put(tr);
3015
3026 mutex_unlock(&trace_types_lock); 3016 mutex_unlock(&trace_types_lock);
3027 3017
3028 mutex_destroy(&iter->mutex); 3018 mutex_destroy(&iter->mutex);
@@ -3042,15 +3032,6 @@ static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3042 return 0; 3032 return 0;
3043} 3033}
3044 3034
3045static int tracing_release_generic_tc(struct inode *inode, struct file *file)
3046{
3047 struct trace_cpu *tc = inode->i_private;
3048 struct trace_array *tr = tc->tr;
3049
3050 trace_array_put(tr);
3051 return 0;
3052}
3053
3054static int tracing_single_release_tr(struct inode *inode, struct file *file) 3035static int tracing_single_release_tr(struct inode *inode, struct file *file)
3055{ 3036{
3056 struct trace_array *tr = inode->i_private; 3037 struct trace_array *tr = inode->i_private;
@@ -3062,8 +3043,7 @@ static int tracing_single_release_tr(struct inode *inode, struct file *file)
3062 3043
3063static int tracing_open(struct inode *inode, struct file *file) 3044static int tracing_open(struct inode *inode, struct file *file)
3064{ 3045{
3065 struct trace_cpu *tc = inode->i_private; 3046 struct trace_array *tr = inode->i_private;
3066 struct trace_array *tr = tc->tr;
3067 struct trace_iterator *iter; 3047 struct trace_iterator *iter;
3068 int ret = 0; 3048 int ret = 0;
3069 3049
@@ -3071,16 +3051,17 @@ static int tracing_open(struct inode *inode, struct file *file)
3071 return -ENODEV; 3051 return -ENODEV;
3072 3052
3073 /* If this file was open for write, then erase contents */ 3053 /* If this file was open for write, then erase contents */
3074 if ((file->f_mode & FMODE_WRITE) && 3054 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3075 (file->f_flags & O_TRUNC)) { 3055 int cpu = tracing_get_cpu(inode);
3076 if (tc->cpu == RING_BUFFER_ALL_CPUS) 3056
3057 if (cpu == RING_BUFFER_ALL_CPUS)
3077 tracing_reset_online_cpus(&tr->trace_buffer); 3058 tracing_reset_online_cpus(&tr->trace_buffer);
3078 else 3059 else
3079 tracing_reset(&tr->trace_buffer, tc->cpu); 3060 tracing_reset(&tr->trace_buffer, cpu);
3080 } 3061 }
3081 3062
3082 if (file->f_mode & FMODE_READ) { 3063 if (file->f_mode & FMODE_READ) {
3083 iter = __tracing_open(tr, tc, inode, file, false); 3064 iter = __tracing_open(inode, file, false);
3084 if (IS_ERR(iter)) 3065 if (IS_ERR(iter))
3085 ret = PTR_ERR(iter); 3066 ret = PTR_ERR(iter);
3086 else if (trace_flags & TRACE_ITER_LATENCY_FMT) 3067 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3447,6 +3428,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3447static int tracing_trace_options_open(struct inode *inode, struct file *file) 3428static int tracing_trace_options_open(struct inode *inode, struct file *file)
3448{ 3429{
3449 struct trace_array *tr = inode->i_private; 3430 struct trace_array *tr = inode->i_private;
3431 int ret;
3450 3432
3451 if (tracing_disabled) 3433 if (tracing_disabled)
3452 return -ENODEV; 3434 return -ENODEV;
@@ -3454,7 +3436,11 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file)
3454 if (trace_array_get(tr) < 0) 3436 if (trace_array_get(tr) < 0)
3455 return -ENODEV; 3437 return -ENODEV;
3456 3438
3457 return single_open(file, tracing_trace_options_show, inode->i_private); 3439 ret = single_open(file, tracing_trace_options_show, inode->i_private);
3440 if (ret < 0)
3441 trace_array_put(tr);
3442
3443 return ret;
3458} 3444}
3459 3445
3460static const struct file_operations tracing_iter_fops = { 3446static const struct file_operations tracing_iter_fops = {
@@ -3537,14 +3523,14 @@ static const char readme_msg[] =
3537 "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n" 3523 "\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
3538 "\t\t\t Read the contents for more information\n" 3524 "\t\t\t Read the contents for more information\n"
3539#endif 3525#endif
3540#ifdef CONFIG_STACKTRACE 3526#ifdef CONFIG_STACK_TRACER
3541 " stack_trace\t\t- Shows the max stack trace when active\n" 3527 " stack_trace\t\t- Shows the max stack trace when active\n"
3542 " stack_max_size\t- Shows current max stack size that was traced\n" 3528 " stack_max_size\t- Shows current max stack size that was traced\n"
3543 "\t\t\t Write into this file to reset the max size (trigger a new trace)\n" 3529 "\t\t\t Write into this file to reset the max size (trigger a new trace)\n"
3544#ifdef CONFIG_DYNAMIC_FTRACE 3530#ifdef CONFIG_DYNAMIC_FTRACE
3545 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n" 3531 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
3546#endif 3532#endif
3547#endif /* CONFIG_STACKTRACE */ 3533#endif /* CONFIG_STACK_TRACER */
3548; 3534;
3549 3535
3550static ssize_t 3536static ssize_t
@@ -3941,8 +3927,7 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
3941 3927
3942static int tracing_open_pipe(struct inode *inode, struct file *filp) 3928static int tracing_open_pipe(struct inode *inode, struct file *filp)
3943{ 3929{
3944 struct trace_cpu *tc = inode->i_private; 3930 struct trace_array *tr = inode->i_private;
3945 struct trace_array *tr = tc->tr;
3946 struct trace_iterator *iter; 3931 struct trace_iterator *iter;
3947 int ret = 0; 3932 int ret = 0;
3948 3933
@@ -3958,6 +3943,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3958 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 3943 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
3959 if (!iter) { 3944 if (!iter) {
3960 ret = -ENOMEM; 3945 ret = -ENOMEM;
3946 __trace_array_put(tr);
3961 goto out; 3947 goto out;
3962 } 3948 }
3963 3949
@@ -3987,9 +3973,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3987 if (trace_clocks[tr->clock_id].in_ns) 3973 if (trace_clocks[tr->clock_id].in_ns)
3988 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 3974 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3989 3975
3990 iter->cpu_file = tc->cpu; 3976 iter->tr = tr;
3991 iter->tr = tc->tr; 3977 iter->trace_buffer = &tr->trace_buffer;
3992 iter->trace_buffer = &tc->tr->trace_buffer; 3978 iter->cpu_file = tracing_get_cpu(inode);
3993 mutex_init(&iter->mutex); 3979 mutex_init(&iter->mutex);
3994 filp->private_data = iter; 3980 filp->private_data = iter;
3995 3981
@@ -4012,8 +3998,7 @@ fail:
4012static int tracing_release_pipe(struct inode *inode, struct file *file) 3998static int tracing_release_pipe(struct inode *inode, struct file *file)
4013{ 3999{
4014 struct trace_iterator *iter = file->private_data; 4000 struct trace_iterator *iter = file->private_data;
4015 struct trace_cpu *tc = inode->i_private; 4001 struct trace_array *tr = inode->i_private;
4016 struct trace_array *tr = tc->tr;
4017 4002
4018 mutex_lock(&trace_types_lock); 4003 mutex_lock(&trace_types_lock);
4019 4004
@@ -4166,6 +4151,7 @@ waitagain:
4166 memset(&iter->seq, 0, 4151 memset(&iter->seq, 0,
4167 sizeof(struct trace_iterator) - 4152 sizeof(struct trace_iterator) -
4168 offsetof(struct trace_iterator, seq)); 4153 offsetof(struct trace_iterator, seq));
4154 cpumask_clear(iter->started);
4169 iter->pos = -1; 4155 iter->pos = -1;
4170 4156
4171 trace_event_read_lock(); 4157 trace_event_read_lock();
@@ -4366,15 +4352,16 @@ static ssize_t
4366tracing_entries_read(struct file *filp, char __user *ubuf, 4352tracing_entries_read(struct file *filp, char __user *ubuf,
4367 size_t cnt, loff_t *ppos) 4353 size_t cnt, loff_t *ppos)
4368{ 4354{
4369 struct trace_cpu *tc = filp->private_data; 4355 struct inode *inode = file_inode(filp);
4370 struct trace_array *tr = tc->tr; 4356 struct trace_array *tr = inode->i_private;
4357 int cpu = tracing_get_cpu(inode);
4371 char buf[64]; 4358 char buf[64];
4372 int r = 0; 4359 int r = 0;
4373 ssize_t ret; 4360 ssize_t ret;
4374 4361
4375 mutex_lock(&trace_types_lock); 4362 mutex_lock(&trace_types_lock);
4376 4363
4377 if (tc->cpu == RING_BUFFER_ALL_CPUS) { 4364 if (cpu == RING_BUFFER_ALL_CPUS) {
4378 int cpu, buf_size_same; 4365 int cpu, buf_size_same;
4379 unsigned long size; 4366 unsigned long size;
4380 4367
@@ -4401,7 +4388,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
4401 } else 4388 } else
4402 r = sprintf(buf, "X\n"); 4389 r = sprintf(buf, "X\n");
4403 } else 4390 } else
4404 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10); 4391 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4405 4392
4406 mutex_unlock(&trace_types_lock); 4393 mutex_unlock(&trace_types_lock);
4407 4394
@@ -4413,7 +4400,8 @@ static ssize_t
4413tracing_entries_write(struct file *filp, const char __user *ubuf, 4400tracing_entries_write(struct file *filp, const char __user *ubuf,
4414 size_t cnt, loff_t *ppos) 4401 size_t cnt, loff_t *ppos)
4415{ 4402{
4416 struct trace_cpu *tc = filp->private_data; 4403 struct inode *inode = file_inode(filp);
4404 struct trace_array *tr = inode->i_private;
4417 unsigned long val; 4405 unsigned long val;
4418 int ret; 4406 int ret;
4419 4407
@@ -4427,8 +4415,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
4427 4415
4428 /* value is in KB */ 4416 /* value is in KB */
4429 val <<= 10; 4417 val <<= 10;
4430 4418 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4431 ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
4432 if (ret < 0) 4419 if (ret < 0)
4433 return ret; 4420 return ret;
4434 4421
@@ -4482,7 +4469,7 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
4482 4469
4483 /* disable tracing ? */ 4470 /* disable tracing ? */
4484 if (trace_flags & TRACE_ITER_STOP_ON_FREE) 4471 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4485 tracing_off(); 4472 tracer_tracing_off(tr);
4486 /* resize the ring buffer to 0 */ 4473 /* resize the ring buffer to 0 */
4487 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 4474 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4488 4475
@@ -4647,12 +4634,12 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4647 * New clock may not be consistent with the previous clock. 4634 * New clock may not be consistent with the previous clock.
4648 * Reset the buffer so that it doesn't have incomparable timestamps. 4635 * Reset the buffer so that it doesn't have incomparable timestamps.
4649 */ 4636 */
4650 tracing_reset_online_cpus(&global_trace.trace_buffer); 4637 tracing_reset_online_cpus(&tr->trace_buffer);
4651 4638
4652#ifdef CONFIG_TRACER_MAX_TRACE 4639#ifdef CONFIG_TRACER_MAX_TRACE
4653 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer) 4640 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4654 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); 4641 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4655 tracing_reset_online_cpus(&global_trace.max_buffer); 4642 tracing_reset_online_cpus(&tr->max_buffer);
4656#endif 4643#endif
4657 4644
4658 mutex_unlock(&trace_types_lock); 4645 mutex_unlock(&trace_types_lock);
@@ -4689,8 +4676,7 @@ struct ftrace_buffer_info {
4689#ifdef CONFIG_TRACER_SNAPSHOT 4676#ifdef CONFIG_TRACER_SNAPSHOT
4690static int tracing_snapshot_open(struct inode *inode, struct file *file) 4677static int tracing_snapshot_open(struct inode *inode, struct file *file)
4691{ 4678{
4692 struct trace_cpu *tc = inode->i_private; 4679 struct trace_array *tr = inode->i_private;
4693 struct trace_array *tr = tc->tr;
4694 struct trace_iterator *iter; 4680 struct trace_iterator *iter;
4695 struct seq_file *m; 4681 struct seq_file *m;
4696 int ret = 0; 4682 int ret = 0;
@@ -4699,26 +4685,29 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
4699 return -ENODEV; 4685 return -ENODEV;
4700 4686
4701 if (file->f_mode & FMODE_READ) { 4687 if (file->f_mode & FMODE_READ) {
4702 iter = __tracing_open(tr, tc, inode, file, true); 4688 iter = __tracing_open(inode, file, true);
4703 if (IS_ERR(iter)) 4689 if (IS_ERR(iter))
4704 ret = PTR_ERR(iter); 4690 ret = PTR_ERR(iter);
4705 } else { 4691 } else {
4706 /* Writes still need the seq_file to hold the private data */ 4692 /* Writes still need the seq_file to hold the private data */
4693 ret = -ENOMEM;
4707 m = kzalloc(sizeof(*m), GFP_KERNEL); 4694 m = kzalloc(sizeof(*m), GFP_KERNEL);
4708 if (!m) 4695 if (!m)
4709 return -ENOMEM; 4696 goto out;
4710 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 4697 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4711 if (!iter) { 4698 if (!iter) {
4712 kfree(m); 4699 kfree(m);
4713 return -ENOMEM; 4700 goto out;
4714 } 4701 }
4702 ret = 0;
4703
4715 iter->tr = tr; 4704 iter->tr = tr;
4716 iter->trace_buffer = &tc->tr->max_buffer; 4705 iter->trace_buffer = &tr->max_buffer;
4717 iter->cpu_file = tc->cpu; 4706 iter->cpu_file = tracing_get_cpu(inode);
4718 m->private = iter; 4707 m->private = iter;
4719 file->private_data = m; 4708 file->private_data = m;
4720 } 4709 }
4721 4710out:
4722 if (ret < 0) 4711 if (ret < 0)
4723 trace_array_put(tr); 4712 trace_array_put(tr);
4724 4713
@@ -4873,11 +4862,11 @@ static const struct file_operations tracing_pipe_fops = {
4873}; 4862};
4874 4863
4875static const struct file_operations tracing_entries_fops = { 4864static const struct file_operations tracing_entries_fops = {
4876 .open = tracing_open_generic_tc, 4865 .open = tracing_open_generic_tr,
4877 .read = tracing_entries_read, 4866 .read = tracing_entries_read,
4878 .write = tracing_entries_write, 4867 .write = tracing_entries_write,
4879 .llseek = generic_file_llseek, 4868 .llseek = generic_file_llseek,
4880 .release = tracing_release_generic_tc, 4869 .release = tracing_release_generic_tr,
4881}; 4870};
4882 4871
4883static const struct file_operations tracing_total_entries_fops = { 4872static const struct file_operations tracing_total_entries_fops = {
@@ -4929,8 +4918,7 @@ static const struct file_operations snapshot_raw_fops = {
4929 4918
4930static int tracing_buffers_open(struct inode *inode, struct file *filp) 4919static int tracing_buffers_open(struct inode *inode, struct file *filp)
4931{ 4920{
4932 struct trace_cpu *tc = inode->i_private; 4921 struct trace_array *tr = inode->i_private;
4933 struct trace_array *tr = tc->tr;
4934 struct ftrace_buffer_info *info; 4922 struct ftrace_buffer_info *info;
4935 int ret; 4923 int ret;
4936 4924
@@ -4948,10 +4936,8 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
4948 4936
4949 mutex_lock(&trace_types_lock); 4937 mutex_lock(&trace_types_lock);
4950 4938
4951 tr->ref++;
4952
4953 info->iter.tr = tr; 4939 info->iter.tr = tr;
4954 info->iter.cpu_file = tc->cpu; 4940 info->iter.cpu_file = tracing_get_cpu(inode);
4955 info->iter.trace = tr->current_trace; 4941 info->iter.trace = tr->current_trace;
4956 info->iter.trace_buffer = &tr->trace_buffer; 4942 info->iter.trace_buffer = &tr->trace_buffer;
4957 info->spare = NULL; 4943 info->spare = NULL;
@@ -5268,14 +5254,14 @@ static ssize_t
5268tracing_stats_read(struct file *filp, char __user *ubuf, 5254tracing_stats_read(struct file *filp, char __user *ubuf,
5269 size_t count, loff_t *ppos) 5255 size_t count, loff_t *ppos)
5270{ 5256{
5271 struct trace_cpu *tc = filp->private_data; 5257 struct inode *inode = file_inode(filp);
5272 struct trace_array *tr = tc->tr; 5258 struct trace_array *tr = inode->i_private;
5273 struct trace_buffer *trace_buf = &tr->trace_buffer; 5259 struct trace_buffer *trace_buf = &tr->trace_buffer;
5260 int cpu = tracing_get_cpu(inode);
5274 struct trace_seq *s; 5261 struct trace_seq *s;
5275 unsigned long cnt; 5262 unsigned long cnt;
5276 unsigned long long t; 5263 unsigned long long t;
5277 unsigned long usec_rem; 5264 unsigned long usec_rem;
5278 int cpu = tc->cpu;
5279 5265
5280 s = kmalloc(sizeof(*s), GFP_KERNEL); 5266 s = kmalloc(sizeof(*s), GFP_KERNEL);
5281 if (!s) 5267 if (!s)
@@ -5328,9 +5314,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
5328} 5314}
5329 5315
5330static const struct file_operations tracing_stats_fops = { 5316static const struct file_operations tracing_stats_fops = {
5331 .open = tracing_open_generic, 5317 .open = tracing_open_generic_tr,
5332 .read = tracing_stats_read, 5318 .read = tracing_stats_read,
5333 .llseek = generic_file_llseek, 5319 .llseek = generic_file_llseek,
5320 .release = tracing_release_generic_tr,
5334}; 5321};
5335 5322
5336#ifdef CONFIG_DYNAMIC_FTRACE 5323#ifdef CONFIG_DYNAMIC_FTRACE
@@ -5519,10 +5506,20 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5519 return tr->percpu_dir; 5506 return tr->percpu_dir;
5520} 5507}
5521 5508
5509static struct dentry *
5510trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5511 void *data, long cpu, const struct file_operations *fops)
5512{
5513 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5514
5515 if (ret) /* See tracing_get_cpu() */
5516 ret->d_inode->i_cdev = (void *)(cpu + 1);
5517 return ret;
5518}
5519
5522static void 5520static void
5523tracing_init_debugfs_percpu(struct trace_array *tr, long cpu) 5521tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5524{ 5522{
5525 struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
5526 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 5523 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5527 struct dentry *d_cpu; 5524 struct dentry *d_cpu;
5528 char cpu_dir[30]; /* 30 characters should be more than enough */ 5525 char cpu_dir[30]; /* 30 characters should be more than enough */
@@ -5538,28 +5535,28 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5538 } 5535 }
5539 5536
5540 /* per cpu trace_pipe */ 5537 /* per cpu trace_pipe */
5541 trace_create_file("trace_pipe", 0444, d_cpu, 5538 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5542 (void *)&data->trace_cpu, &tracing_pipe_fops); 5539 tr, cpu, &tracing_pipe_fops);
5543 5540
5544 /* per cpu trace */ 5541 /* per cpu trace */
5545 trace_create_file("trace", 0644, d_cpu, 5542 trace_create_cpu_file("trace", 0644, d_cpu,
5546 (void *)&data->trace_cpu, &tracing_fops); 5543 tr, cpu, &tracing_fops);
5547 5544
5548 trace_create_file("trace_pipe_raw", 0444, d_cpu, 5545 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5549 (void *)&data->trace_cpu, &tracing_buffers_fops); 5546 tr, cpu, &tracing_buffers_fops);
5550 5547
5551 trace_create_file("stats", 0444, d_cpu, 5548 trace_create_cpu_file("stats", 0444, d_cpu,
5552 (void *)&data->trace_cpu, &tracing_stats_fops); 5549 tr, cpu, &tracing_stats_fops);
5553 5550
5554 trace_create_file("buffer_size_kb", 0444, d_cpu, 5551 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5555 (void *)&data->trace_cpu, &tracing_entries_fops); 5552 tr, cpu, &tracing_entries_fops);
5556 5553
5557#ifdef CONFIG_TRACER_SNAPSHOT 5554#ifdef CONFIG_TRACER_SNAPSHOT
5558 trace_create_file("snapshot", 0644, d_cpu, 5555 trace_create_cpu_file("snapshot", 0644, d_cpu,
5559 (void *)&data->trace_cpu, &snapshot_fops); 5556 tr, cpu, &snapshot_fops);
5560 5557
5561 trace_create_file("snapshot_raw", 0444, d_cpu, 5558 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5562 (void *)&data->trace_cpu, &snapshot_raw_fops); 5559 tr, cpu, &snapshot_raw_fops);
5563#endif 5560#endif
5564} 5561}
5565 5562
@@ -5868,17 +5865,6 @@ struct dentry *trace_instance_dir;
5868static void 5865static void
5869init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer); 5866init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5870 5867
5871static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
5872{
5873 int cpu;
5874
5875 for_each_tracing_cpu(cpu) {
5876 memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
5877 per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
5878 per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
5879 }
5880}
5881
5882static int 5868static int
5883allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size) 5869allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5884{ 5870{
@@ -5896,8 +5882,6 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
5896 return -ENOMEM; 5882 return -ENOMEM;
5897 } 5883 }
5898 5884
5899 init_trace_buffers(tr, buf);
5900
5901 /* Allocate the first page for all buffers */ 5885 /* Allocate the first page for all buffers */
5902 set_buffer_entries(&tr->trace_buffer, 5886 set_buffer_entries(&tr->trace_buffer,
5903 ring_buffer_size(tr->trace_buffer.buffer, 0)); 5887 ring_buffer_size(tr->trace_buffer.buffer, 0));
@@ -5964,17 +5948,15 @@ static int new_instance_create(const char *name)
5964 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 5948 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
5965 goto out_free_tr; 5949 goto out_free_tr;
5966 5950
5967 /* Holder for file callbacks */
5968 tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
5969 tr->trace_cpu.tr = tr;
5970
5971 tr->dir = debugfs_create_dir(name, trace_instance_dir); 5951 tr->dir = debugfs_create_dir(name, trace_instance_dir);
5972 if (!tr->dir) 5952 if (!tr->dir)
5973 goto out_free_tr; 5953 goto out_free_tr;
5974 5954
5975 ret = event_trace_add_tracer(tr->dir, tr); 5955 ret = event_trace_add_tracer(tr->dir, tr);
5976 if (ret) 5956 if (ret) {
5957 debugfs_remove_recursive(tr->dir);
5977 goto out_free_tr; 5958 goto out_free_tr;
5959 }
5978 5960
5979 init_tracer_debugfs(tr, tr->dir); 5961 init_tracer_debugfs(tr, tr->dir);
5980 5962
@@ -6120,13 +6102,13 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6120 tr, &tracing_iter_fops); 6102 tr, &tracing_iter_fops);
6121 6103
6122 trace_create_file("trace", 0644, d_tracer, 6104 trace_create_file("trace", 0644, d_tracer,
6123 (void *)&tr->trace_cpu, &tracing_fops); 6105 tr, &tracing_fops);
6124 6106
6125 trace_create_file("trace_pipe", 0444, d_tracer, 6107 trace_create_file("trace_pipe", 0444, d_tracer,
6126 (void *)&tr->trace_cpu, &tracing_pipe_fops); 6108 tr, &tracing_pipe_fops);
6127 6109
6128 trace_create_file("buffer_size_kb", 0644, d_tracer, 6110 trace_create_file("buffer_size_kb", 0644, d_tracer,
6129 (void *)&tr->trace_cpu, &tracing_entries_fops); 6111 tr, &tracing_entries_fops);
6130 6112
6131 trace_create_file("buffer_total_size_kb", 0444, d_tracer, 6113 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6132 tr, &tracing_total_entries_fops); 6114 tr, &tracing_total_entries_fops);
@@ -6141,11 +6123,11 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6141 &trace_clock_fops); 6123 &trace_clock_fops);
6142 6124
6143 trace_create_file("tracing_on", 0644, d_tracer, 6125 trace_create_file("tracing_on", 0644, d_tracer,
6144 tr, &rb_simple_fops); 6126 tr, &rb_simple_fops);
6145 6127
6146#ifdef CONFIG_TRACER_SNAPSHOT 6128#ifdef CONFIG_TRACER_SNAPSHOT
6147 trace_create_file("snapshot", 0644, d_tracer, 6129 trace_create_file("snapshot", 0644, d_tracer,
6148 (void *)&tr->trace_cpu, &snapshot_fops); 6130 tr, &snapshot_fops);
6149#endif 6131#endif
6150 6132
6151 for_each_tracing_cpu(cpu) 6133 for_each_tracing_cpu(cpu)
@@ -6439,10 +6421,6 @@ __init static int tracer_alloc_buffers(void)
6439 6421
6440 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 6422 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6441 6423
6442 /* Holder for file callbacks */
6443 global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
6444 global_trace.trace_cpu.tr = &global_trace;
6445
6446 INIT_LIST_HEAD(&global_trace.systems); 6424 INIT_LIST_HEAD(&global_trace.systems);
6447 INIT_LIST_HEAD(&global_trace.events); 6425 INIT_LIST_HEAD(&global_trace.events);
6448 list_add(&global_trace.list, &ftrace_trace_arrays); 6426 list_add(&global_trace.list, &ftrace_trace_arrays);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4a4f6e1828b6..afaae41b0a02 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -130,19 +130,12 @@ enum trace_flag_type {
130 130
131struct trace_array; 131struct trace_array;
132 132
133struct trace_cpu {
134 struct trace_array *tr;
135 struct dentry *dir;
136 int cpu;
137};
138
139/* 133/*
140 * The CPU trace array - it consists of thousands of trace entries 134 * The CPU trace array - it consists of thousands of trace entries
141 * plus some other descriptor data: (for example which task started 135 * plus some other descriptor data: (for example which task started
142 * the trace, etc.) 136 * the trace, etc.)
143 */ 137 */
144struct trace_array_cpu { 138struct trace_array_cpu {
145 struct trace_cpu trace_cpu;
146 atomic_t disabled; 139 atomic_t disabled;
147 void *buffer_page; /* ring buffer spare */ 140 void *buffer_page; /* ring buffer spare */
148 141
@@ -196,7 +189,6 @@ struct trace_array {
196 bool allocated_snapshot; 189 bool allocated_snapshot;
197#endif 190#endif
198 int buffer_disabled; 191 int buffer_disabled;
199 struct trace_cpu trace_cpu; /* place holder */
200#ifdef CONFIG_FTRACE_SYSCALLS 192#ifdef CONFIG_FTRACE_SYSCALLS
201 int sys_refcount_enter; 193 int sys_refcount_enter;
202 int sys_refcount_exit; 194 int sys_refcount_exit;
@@ -214,7 +206,6 @@ struct trace_array {
214 struct dentry *event_dir; 206 struct dentry *event_dir;
215 struct list_head systems; 207 struct list_head systems;
216 struct list_head events; 208 struct list_head events;
217 struct task_struct *waiter;
218 int ref; 209 int ref;
219}; 210};
220 211
@@ -680,6 +671,15 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
680 struct trace_array *tr); 671 struct trace_array *tr);
681extern int trace_selftest_startup_branch(struct tracer *trace, 672extern int trace_selftest_startup_branch(struct tracer *trace,
682 struct trace_array *tr); 673 struct trace_array *tr);
674/*
675 * Tracer data references selftest functions that only occur
676 * on boot up. These can be __init functions. Thus, when selftests
677 * are enabled, then the tracers need to reference __init functions.
678 */
679#define __tracer_data __refdata
680#else
681/* Tracers are seldom changed. Optimize when selftests are disabled. */
682#define __tracer_data __read_mostly
683#endif /* CONFIG_FTRACE_STARTUP_TEST */ 683#endif /* CONFIG_FTRACE_STARTUP_TEST */
684 684
685extern void *head_page(struct trace_array_cpu *data); 685extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 84b1e045faba..80c36bcf66e8 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
236 236
237 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); 237 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
238 238
239 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
240 "perf buffer not large enough"))
241 return NULL;
242
239 pc = preempt_count(); 243 pc = preempt_count();
240 244
241 *rctxp = perf_swevent_get_recursion_context(); 245 *rctxp = perf_swevent_get_recursion_context();
@@ -266,6 +270,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
266 struct pt_regs regs; 270 struct pt_regs regs;
267 int rctx; 271 int rctx;
268 272
273 head = this_cpu_ptr(event_function.perf_events);
274 if (hlist_empty(head))
275 return;
276
269#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ 277#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
270 sizeof(u64)) - sizeof(u32)) 278 sizeof(u64)) - sizeof(u32))
271 279
@@ -279,8 +287,6 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
279 287
280 entry->ip = ip; 288 entry->ip = ip;
281 entry->parent_ip = parent_ip; 289 entry->parent_ip = parent_ip;
282
283 head = this_cpu_ptr(event_function.perf_events);
284 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, 290 perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
285 1, &regs, head, NULL); 291 1, &regs, head, NULL);
286 292
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7d854290bf81..29a7ebcfb426 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -409,33 +409,42 @@ static void put_system(struct ftrace_subsystem_dir *dir)
409 mutex_unlock(&event_mutex); 409 mutex_unlock(&event_mutex);
410} 410}
411 411
412/* 412static void remove_subsystem(struct ftrace_subsystem_dir *dir)
413 * Open and update trace_array ref count.
414 * Must have the current trace_array passed to it.
415 */
416static int tracing_open_generic_file(struct inode *inode, struct file *filp)
417{ 413{
418 struct ftrace_event_file *file = inode->i_private; 414 if (!dir)
419 struct trace_array *tr = file->tr; 415 return;
420 int ret;
421 416
422 if (trace_array_get(tr) < 0) 417 if (!--dir->nr_events) {
423 return -ENODEV; 418 debugfs_remove_recursive(dir->entry);
419 list_del(&dir->list);
420 __put_system_dir(dir);
421 }
422}
424 423
425 ret = tracing_open_generic(inode, filp); 424static void *event_file_data(struct file *filp)
426 if (ret < 0) 425{
427 trace_array_put(tr); 426 return ACCESS_ONCE(file_inode(filp)->i_private);
428 return ret;
429} 427}
430 428
431static int tracing_release_generic_file(struct inode *inode, struct file *filp) 429static void remove_event_file_dir(struct ftrace_event_file *file)
432{ 430{
433 struct ftrace_event_file *file = inode->i_private; 431 struct dentry *dir = file->dir;
434 struct trace_array *tr = file->tr; 432 struct dentry *child;
435 433
436 trace_array_put(tr); 434 if (dir) {
435 spin_lock(&dir->d_lock); /* probably unneeded */
436 list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) {
437 if (child->d_inode) /* probably unneeded */
438 child->d_inode->i_private = NULL;
439 }
440 spin_unlock(&dir->d_lock);
437 441
438 return 0; 442 debugfs_remove_recursive(dir);
443 }
444
445 list_del(&file->list);
446 remove_subsystem(file->system);
447 kmem_cache_free(file_cachep, file);
439} 448}
440 449
441/* 450/*
@@ -679,15 +688,25 @@ static ssize_t
679event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 688event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
680 loff_t *ppos) 689 loff_t *ppos)
681{ 690{
682 struct ftrace_event_file *file = filp->private_data; 691 struct ftrace_event_file *file;
692 unsigned long flags;
683 char buf[4] = "0"; 693 char buf[4] = "0";
684 694
685 if (file->flags & FTRACE_EVENT_FL_ENABLED && 695 mutex_lock(&event_mutex);
686 !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) 696 file = event_file_data(filp);
697 if (likely(file))
698 flags = file->flags;
699 mutex_unlock(&event_mutex);
700
701 if (!file)
702 return -ENODEV;
703
704 if (flags & FTRACE_EVENT_FL_ENABLED &&
705 !(flags & FTRACE_EVENT_FL_SOFT_DISABLED))
687 strcpy(buf, "1"); 706 strcpy(buf, "1");
688 707
689 if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED || 708 if (flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
690 file->flags & FTRACE_EVENT_FL_SOFT_MODE) 709 flags & FTRACE_EVENT_FL_SOFT_MODE)
691 strcat(buf, "*"); 710 strcat(buf, "*");
692 711
693 strcat(buf, "\n"); 712 strcat(buf, "\n");
@@ -699,13 +718,10 @@ static ssize_t
699event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 718event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
700 loff_t *ppos) 719 loff_t *ppos)
701{ 720{
702 struct ftrace_event_file *file = filp->private_data; 721 struct ftrace_event_file *file;
703 unsigned long val; 722 unsigned long val;
704 int ret; 723 int ret;
705 724
706 if (!file)
707 return -EINVAL;
708
709 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 725 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
710 if (ret) 726 if (ret)
711 return ret; 727 return ret;
@@ -717,8 +733,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
717 switch (val) { 733 switch (val) {
718 case 0: 734 case 0:
719 case 1: 735 case 1:
736 ret = -ENODEV;
720 mutex_lock(&event_mutex); 737 mutex_lock(&event_mutex);
721 ret = ftrace_event_enable_disable(file, val); 738 file = event_file_data(filp);
739 if (likely(file))
740 ret = ftrace_event_enable_disable(file, val);
722 mutex_unlock(&event_mutex); 741 mutex_unlock(&event_mutex);
723 break; 742 break;
724 743
@@ -825,65 +844,39 @@ enum {
825 844
826static void *f_next(struct seq_file *m, void *v, loff_t *pos) 845static void *f_next(struct seq_file *m, void *v, loff_t *pos)
827{ 846{
828 struct ftrace_event_call *call = m->private; 847 struct ftrace_event_call *call = event_file_data(m->private);
829 struct ftrace_event_field *field;
830 struct list_head *common_head = &ftrace_common_fields; 848 struct list_head *common_head = &ftrace_common_fields;
831 struct list_head *head = trace_get_fields(call); 849 struct list_head *head = trace_get_fields(call);
850 struct list_head *node = v;
832 851
833 (*pos)++; 852 (*pos)++;
834 853
835 switch ((unsigned long)v) { 854 switch ((unsigned long)v) {
836 case FORMAT_HEADER: 855 case FORMAT_HEADER:
837 if (unlikely(list_empty(common_head))) 856 node = common_head;
838 return NULL; 857 break;
839
840 field = list_entry(common_head->prev,
841 struct ftrace_event_field, link);
842 return field;
843 858
844 case FORMAT_FIELD_SEPERATOR: 859 case FORMAT_FIELD_SEPERATOR:
845 if (unlikely(list_empty(head))) 860 node = head;
846 return NULL; 861 break;
847
848 field = list_entry(head->prev, struct ftrace_event_field, link);
849 return field;
850 862
851 case FORMAT_PRINTFMT: 863 case FORMAT_PRINTFMT:
852 /* all done */ 864 /* all done */
853 return NULL; 865 return NULL;
854 } 866 }
855 867
856 field = v; 868 node = node->prev;
857 if (field->link.prev == common_head) 869 if (node == common_head)
858 return (void *)FORMAT_FIELD_SEPERATOR; 870 return (void *)FORMAT_FIELD_SEPERATOR;
859 else if (field->link.prev == head) 871 else if (node == head)
860 return (void *)FORMAT_PRINTFMT; 872 return (void *)FORMAT_PRINTFMT;
861 873 else
862 field = list_entry(field->link.prev, struct ftrace_event_field, link); 874 return node;
863
864 return field;
865}
866
867static void *f_start(struct seq_file *m, loff_t *pos)
868{
869 loff_t l = 0;
870 void *p;
871
872 /* Start by showing the header */
873 if (!*pos)
874 return (void *)FORMAT_HEADER;
875
876 p = (void *)FORMAT_HEADER;
877 do {
878 p = f_next(m, p, &l);
879 } while (p && l < *pos);
880
881 return p;
882} 875}
883 876
884static int f_show(struct seq_file *m, void *v) 877static int f_show(struct seq_file *m, void *v)
885{ 878{
886 struct ftrace_event_call *call = m->private; 879 struct ftrace_event_call *call = event_file_data(m->private);
887 struct ftrace_event_field *field; 880 struct ftrace_event_field *field;
888 const char *array_descriptor; 881 const char *array_descriptor;
889 882
@@ -904,8 +897,7 @@ static int f_show(struct seq_file *m, void *v)
904 return 0; 897 return 0;
905 } 898 }
906 899
907 field = v; 900 field = list_entry(v, struct ftrace_event_field, link);
908
909 /* 901 /*
910 * Smartly shows the array type(except dynamic array). 902 * Smartly shows the array type(except dynamic array).
911 * Normal: 903 * Normal:
@@ -932,8 +924,25 @@ static int f_show(struct seq_file *m, void *v)
932 return 0; 924 return 0;
933} 925}
934 926
927static void *f_start(struct seq_file *m, loff_t *pos)
928{
929 void *p = (void *)FORMAT_HEADER;
930 loff_t l = 0;
931
932 /* ->stop() is called even if ->start() fails */
933 mutex_lock(&event_mutex);
934 if (!event_file_data(m->private))
935 return ERR_PTR(-ENODEV);
936
937 while (l < *pos && p)
938 p = f_next(m, p, &l);
939
940 return p;
941}
942
935static void f_stop(struct seq_file *m, void *p) 943static void f_stop(struct seq_file *m, void *p)
936{ 944{
945 mutex_unlock(&event_mutex);
937} 946}
938 947
939static const struct seq_operations trace_format_seq_ops = { 948static const struct seq_operations trace_format_seq_ops = {
@@ -945,7 +954,6 @@ static const struct seq_operations trace_format_seq_ops = {
945 954
946static int trace_format_open(struct inode *inode, struct file *file) 955static int trace_format_open(struct inode *inode, struct file *file)
947{ 956{
948 struct ftrace_event_call *call = inode->i_private;
949 struct seq_file *m; 957 struct seq_file *m;
950 int ret; 958 int ret;
951 959
@@ -954,7 +962,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
954 return ret; 962 return ret;
955 963
956 m = file->private_data; 964 m = file->private_data;
957 m->private = call; 965 m->private = file;
958 966
959 return 0; 967 return 0;
960} 968}
@@ -962,45 +970,47 @@ static int trace_format_open(struct inode *inode, struct file *file)
962static ssize_t 970static ssize_t
963event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 971event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
964{ 972{
965 struct ftrace_event_call *call = filp->private_data; 973 int id = (long)event_file_data(filp);
966 struct trace_seq *s; 974 char buf[32];
967 int r; 975 int len;
968 976
969 if (*ppos) 977 if (*ppos)
970 return 0; 978 return 0;
971 979
972 s = kmalloc(sizeof(*s), GFP_KERNEL); 980 if (unlikely(!id))
973 if (!s) 981 return -ENODEV;
974 return -ENOMEM;
975 982
976 trace_seq_init(s); 983 len = sprintf(buf, "%d\n", id);
977 trace_seq_printf(s, "%d\n", call->event.type);
978 984
979 r = simple_read_from_buffer(ubuf, cnt, ppos, 985 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
980 s->buffer, s->len);
981 kfree(s);
982 return r;
983} 986}
984 987
985static ssize_t 988static ssize_t
986event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 989event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
987 loff_t *ppos) 990 loff_t *ppos)
988{ 991{
989 struct ftrace_event_call *call = filp->private_data; 992 struct ftrace_event_call *call;
990 struct trace_seq *s; 993 struct trace_seq *s;
991 int r; 994 int r = -ENODEV;
992 995
993 if (*ppos) 996 if (*ppos)
994 return 0; 997 return 0;
995 998
996 s = kmalloc(sizeof(*s), GFP_KERNEL); 999 s = kmalloc(sizeof(*s), GFP_KERNEL);
1000
997 if (!s) 1001 if (!s)
998 return -ENOMEM; 1002 return -ENOMEM;
999 1003
1000 trace_seq_init(s); 1004 trace_seq_init(s);
1001 1005
1002 print_event_filter(call, s); 1006 mutex_lock(&event_mutex);
1003 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); 1007 call = event_file_data(filp);
1008 if (call)
1009 print_event_filter(call, s);
1010 mutex_unlock(&event_mutex);
1011
1012 if (call)
1013 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
1004 1014
1005 kfree(s); 1015 kfree(s);
1006 1016
@@ -1011,9 +1021,9 @@ static ssize_t
1011event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, 1021event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1012 loff_t *ppos) 1022 loff_t *ppos)
1013{ 1023{
1014 struct ftrace_event_call *call = filp->private_data; 1024 struct ftrace_event_call *call;
1015 char *buf; 1025 char *buf;
1016 int err; 1026 int err = -ENODEV;
1017 1027
1018 if (cnt >= PAGE_SIZE) 1028 if (cnt >= PAGE_SIZE)
1019 return -EINVAL; 1029 return -EINVAL;
@@ -1028,7 +1038,12 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1028 } 1038 }
1029 buf[cnt] = '\0'; 1039 buf[cnt] = '\0';
1030 1040
1031 err = apply_event_filter(call, buf); 1041 mutex_lock(&event_mutex);
1042 call = event_file_data(filp);
1043 if (call)
1044 err = apply_event_filter(call, buf);
1045 mutex_unlock(&event_mutex);
1046
1032 free_page((unsigned long) buf); 1047 free_page((unsigned long) buf);
1033 if (err < 0) 1048 if (err < 0)
1034 return err; 1049 return err;
@@ -1218,6 +1233,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1218 1233
1219static int ftrace_event_avail_open(struct inode *inode, struct file *file); 1234static int ftrace_event_avail_open(struct inode *inode, struct file *file);
1220static int ftrace_event_set_open(struct inode *inode, struct file *file); 1235static int ftrace_event_set_open(struct inode *inode, struct file *file);
1236static int ftrace_event_release(struct inode *inode, struct file *file);
1221 1237
1222static const struct seq_operations show_event_seq_ops = { 1238static const struct seq_operations show_event_seq_ops = {
1223 .start = t_start, 1239 .start = t_start,
@@ -1245,14 +1261,13 @@ static const struct file_operations ftrace_set_event_fops = {
1245 .read = seq_read, 1261 .read = seq_read,
1246 .write = ftrace_event_write, 1262 .write = ftrace_event_write,
1247 .llseek = seq_lseek, 1263 .llseek = seq_lseek,
1248 .release = seq_release, 1264 .release = ftrace_event_release,
1249}; 1265};
1250 1266
1251static const struct file_operations ftrace_enable_fops = { 1267static const struct file_operations ftrace_enable_fops = {
1252 .open = tracing_open_generic_file, 1268 .open = tracing_open_generic,
1253 .read = event_enable_read, 1269 .read = event_enable_read,
1254 .write = event_enable_write, 1270 .write = event_enable_write,
1255 .release = tracing_release_generic_file,
1256 .llseek = default_llseek, 1271 .llseek = default_llseek,
1257}; 1272};
1258 1273
@@ -1264,7 +1279,6 @@ static const struct file_operations ftrace_event_format_fops = {
1264}; 1279};
1265 1280
1266static const struct file_operations ftrace_event_id_fops = { 1281static const struct file_operations ftrace_event_id_fops = {
1267 .open = tracing_open_generic,
1268 .read = event_id_read, 1282 .read = event_id_read,
1269 .llseek = default_llseek, 1283 .llseek = default_llseek,
1270}; 1284};
@@ -1323,6 +1337,15 @@ ftrace_event_open(struct inode *inode, struct file *file,
1323 return ret; 1337 return ret;
1324} 1338}
1325 1339
1340static int ftrace_event_release(struct inode *inode, struct file *file)
1341{
1342 struct trace_array *tr = inode->i_private;
1343
1344 trace_array_put(tr);
1345
1346 return seq_release(inode, file);
1347}
1348
1326static int 1349static int
1327ftrace_event_avail_open(struct inode *inode, struct file *file) 1350ftrace_event_avail_open(struct inode *inode, struct file *file)
1328{ 1351{
@@ -1336,12 +1359,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
1336{ 1359{
1337 const struct seq_operations *seq_ops = &show_set_event_seq_ops; 1360 const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1338 struct trace_array *tr = inode->i_private; 1361 struct trace_array *tr = inode->i_private;
1362 int ret;
1363
1364 if (trace_array_get(tr) < 0)
1365 return -ENODEV;
1339 1366
1340 if ((file->f_mode & FMODE_WRITE) && 1367 if ((file->f_mode & FMODE_WRITE) &&
1341 (file->f_flags & O_TRUNC)) 1368 (file->f_flags & O_TRUNC))
1342 ftrace_clear_events(tr); 1369 ftrace_clear_events(tr);
1343 1370
1344 return ftrace_event_open(inode, file, seq_ops); 1371 ret = ftrace_event_open(inode, file, seq_ops);
1372 if (ret < 0)
1373 trace_array_put(tr);
1374 return ret;
1345} 1375}
1346 1376
1347static struct event_subsystem * 1377static struct event_subsystem *
@@ -1496,8 +1526,8 @@ event_create_dir(struct dentry *parent,
1496 1526
1497#ifdef CONFIG_PERF_EVENTS 1527#ifdef CONFIG_PERF_EVENTS
1498 if (call->event.type && call->class->reg) 1528 if (call->event.type && call->class->reg)
1499 trace_create_file("id", 0444, file->dir, call, 1529 trace_create_file("id", 0444, file->dir,
1500 id); 1530 (void *)(long)call->event.type, id);
1501#endif 1531#endif
1502 1532
1503 /* 1533 /*
@@ -1522,33 +1552,16 @@ event_create_dir(struct dentry *parent,
1522 return 0; 1552 return 0;
1523} 1553}
1524 1554
1525static void remove_subsystem(struct ftrace_subsystem_dir *dir)
1526{
1527 if (!dir)
1528 return;
1529
1530 if (!--dir->nr_events) {
1531 debugfs_remove_recursive(dir->entry);
1532 list_del(&dir->list);
1533 __put_system_dir(dir);
1534 }
1535}
1536
1537static void remove_event_from_tracers(struct ftrace_event_call *call) 1555static void remove_event_from_tracers(struct ftrace_event_call *call)
1538{ 1556{
1539 struct ftrace_event_file *file; 1557 struct ftrace_event_file *file;
1540 struct trace_array *tr; 1558 struct trace_array *tr;
1541 1559
1542 do_for_each_event_file_safe(tr, file) { 1560 do_for_each_event_file_safe(tr, file) {
1543
1544 if (file->event_call != call) 1561 if (file->event_call != call)
1545 continue; 1562 continue;
1546 1563
1547 list_del(&file->list); 1564 remove_event_file_dir(file);
1548 debugfs_remove_recursive(file->dir);
1549 remove_subsystem(file->system);
1550 kmem_cache_free(file_cachep, file);
1551
1552 /* 1565 /*
1553 * The do_for_each_event_file_safe() is 1566 * The do_for_each_event_file_safe() is
1554 * a double loop. After finding the call for this 1567 * a double loop. After finding the call for this
@@ -1700,16 +1713,53 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
1700 destroy_preds(call); 1713 destroy_preds(call);
1701} 1714}
1702 1715
1716static int probe_remove_event_call(struct ftrace_event_call *call)
1717{
1718 struct trace_array *tr;
1719 struct ftrace_event_file *file;
1720
1721#ifdef CONFIG_PERF_EVENTS
1722 if (call->perf_refcount)
1723 return -EBUSY;
1724#endif
1725 do_for_each_event_file(tr, file) {
1726 if (file->event_call != call)
1727 continue;
1728 /*
1729 * We can't rely on ftrace_event_enable_disable(enable => 0)
1730 * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
1731 * TRACE_REG_UNREGISTER.
1732 */
1733 if (file->flags & FTRACE_EVENT_FL_ENABLED)
1734 return -EBUSY;
1735 /*
1736 * The do_for_each_event_file_safe() is
1737 * a double loop. After finding the call for this
1738 * trace_array, we use break to jump to the next
1739 * trace_array.
1740 */
1741 break;
1742 } while_for_each_event_file();
1743
1744 __trace_remove_event_call(call);
1745
1746 return 0;
1747}
1748
1703/* Remove an event_call */ 1749/* Remove an event_call */
1704void trace_remove_event_call(struct ftrace_event_call *call) 1750int trace_remove_event_call(struct ftrace_event_call *call)
1705{ 1751{
1752 int ret;
1753
1706 mutex_lock(&trace_types_lock); 1754 mutex_lock(&trace_types_lock);
1707 mutex_lock(&event_mutex); 1755 mutex_lock(&event_mutex);
1708 down_write(&trace_event_sem); 1756 down_write(&trace_event_sem);
1709 __trace_remove_event_call(call); 1757 ret = probe_remove_event_call(call);
1710 up_write(&trace_event_sem); 1758 up_write(&trace_event_sem);
1711 mutex_unlock(&event_mutex); 1759 mutex_unlock(&event_mutex);
1712 mutex_unlock(&trace_types_lock); 1760 mutex_unlock(&trace_types_lock);
1761
1762 return ret;
1713} 1763}
1714 1764
1715#define for_each_event(event, start, end) \ 1765#define for_each_event(event, start, end) \
@@ -2278,12 +2328,8 @@ __trace_remove_event_dirs(struct trace_array *tr)
2278{ 2328{
2279 struct ftrace_event_file *file, *next; 2329 struct ftrace_event_file *file, *next;
2280 2330
2281 list_for_each_entry_safe(file, next, &tr->events, list) { 2331 list_for_each_entry_safe(file, next, &tr->events, list)
2282 list_del(&file->list); 2332 remove_event_file_dir(file);
2283 debugfs_remove_recursive(file->dir);
2284 remove_subsystem(file->system);
2285 kmem_cache_free(file_cachep, file);
2286 }
2287} 2333}
2288 2334
2289static void 2335static void
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 0d883dc057d6..97daa8cf958d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -637,17 +637,15 @@ static void append_filter_err(struct filter_parse_state *ps,
637 free_page((unsigned long) buf); 637 free_page((unsigned long) buf);
638} 638}
639 639
640/* caller must hold event_mutex */
640void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) 641void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
641{ 642{
642 struct event_filter *filter; 643 struct event_filter *filter = call->filter;
643 644
644 mutex_lock(&event_mutex);
645 filter = call->filter;
646 if (filter && filter->filter_string) 645 if (filter && filter->filter_string)
647 trace_seq_printf(s, "%s\n", filter->filter_string); 646 trace_seq_printf(s, "%s\n", filter->filter_string);
648 else 647 else
649 trace_seq_printf(s, "none\n"); 648 trace_seq_puts(s, "none\n");
650 mutex_unlock(&event_mutex);
651} 649}
652 650
653void print_subsystem_event_filter(struct event_subsystem *system, 651void print_subsystem_event_filter(struct event_subsystem *system,
@@ -660,7 +658,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
660 if (filter && filter->filter_string) 658 if (filter && filter->filter_string)
661 trace_seq_printf(s, "%s\n", filter->filter_string); 659 trace_seq_printf(s, "%s\n", filter->filter_string);
662 else 660 else
663 trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); 661 trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
664 mutex_unlock(&event_mutex); 662 mutex_unlock(&event_mutex);
665} 663}
666 664
@@ -1841,23 +1839,22 @@ static int create_system_filter(struct event_subsystem *system,
1841 return err; 1839 return err;
1842} 1840}
1843 1841
1842/* caller must hold event_mutex */
1844int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1843int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1845{ 1844{
1846 struct event_filter *filter; 1845 struct event_filter *filter;
1847 int err = 0; 1846 int err;
1848
1849 mutex_lock(&event_mutex);
1850 1847
1851 if (!strcmp(strstrip(filter_string), "0")) { 1848 if (!strcmp(strstrip(filter_string), "0")) {
1852 filter_disable(call); 1849 filter_disable(call);
1853 filter = call->filter; 1850 filter = call->filter;
1854 if (!filter) 1851 if (!filter)
1855 goto out_unlock; 1852 return 0;
1856 RCU_INIT_POINTER(call->filter, NULL); 1853 RCU_INIT_POINTER(call->filter, NULL);
1857 /* Make sure the filter is not being used */ 1854 /* Make sure the filter is not being used */
1858 synchronize_sched(); 1855 synchronize_sched();
1859 __free_filter(filter); 1856 __free_filter(filter);
1860 goto out_unlock; 1857 return 0;
1861 } 1858 }
1862 1859
1863 err = create_filter(call, filter_string, true, &filter); 1860 err = create_filter(call, filter_string, true, &filter);
@@ -1884,8 +1881,6 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1884 __free_filter(tmp); 1881 __free_filter(tmp);
1885 } 1882 }
1886 } 1883 }
1887out_unlock:
1888 mutex_unlock(&event_mutex);
1889 1884
1890 return err; 1885 return err;
1891} 1886}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b863f93b30f3..38fe1483c508 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -199,7 +199,7 @@ static int func_set_flag(u32 old_flags, u32 bit, int set)
199 return 0; 199 return 0;
200} 200}
201 201
202static struct tracer function_trace __read_mostly = 202static struct tracer function_trace __tracer_data =
203{ 203{
204 .name = "function", 204 .name = "function",
205 .init = function_trace_init, 205 .init = function_trace_init,
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8388bc99f2ee..b5c09242683d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -446,7 +446,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
446 446
447 /* First spaces to align center */ 447 /* First spaces to align center */
448 for (i = 0; i < spaces / 2; i++) { 448 for (i = 0; i < spaces / 2; i++) {
449 ret = trace_seq_printf(s, " "); 449 ret = trace_seq_putc(s, ' ');
450 if (!ret) 450 if (!ret)
451 return TRACE_TYPE_PARTIAL_LINE; 451 return TRACE_TYPE_PARTIAL_LINE;
452 } 452 }
@@ -457,7 +457,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
457 457
458 /* Last spaces to align center */ 458 /* Last spaces to align center */
459 for (i = 0; i < spaces - (spaces / 2); i++) { 459 for (i = 0; i < spaces - (spaces / 2); i++) {
460 ret = trace_seq_printf(s, " "); 460 ret = trace_seq_putc(s, ' ');
461 if (!ret) 461 if (!ret)
462 return TRACE_TYPE_PARTIAL_LINE; 462 return TRACE_TYPE_PARTIAL_LINE;
463 } 463 }
@@ -503,7 +503,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
503 ------------------------------------------ 503 ------------------------------------------
504 504
505 */ 505 */
506 ret = trace_seq_printf(s, 506 ret = trace_seq_puts(s,
507 " ------------------------------------------\n"); 507 " ------------------------------------------\n");
508 if (!ret) 508 if (!ret)
509 return TRACE_TYPE_PARTIAL_LINE; 509 return TRACE_TYPE_PARTIAL_LINE;
@@ -516,7 +516,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
516 if (ret == TRACE_TYPE_PARTIAL_LINE) 516 if (ret == TRACE_TYPE_PARTIAL_LINE)
517 return TRACE_TYPE_PARTIAL_LINE; 517 return TRACE_TYPE_PARTIAL_LINE;
518 518
519 ret = trace_seq_printf(s, " => "); 519 ret = trace_seq_puts(s, " => ");
520 if (!ret) 520 if (!ret)
521 return TRACE_TYPE_PARTIAL_LINE; 521 return TRACE_TYPE_PARTIAL_LINE;
522 522
@@ -524,7 +524,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
524 if (ret == TRACE_TYPE_PARTIAL_LINE) 524 if (ret == TRACE_TYPE_PARTIAL_LINE)
525 return TRACE_TYPE_PARTIAL_LINE; 525 return TRACE_TYPE_PARTIAL_LINE;
526 526
527 ret = trace_seq_printf(s, 527 ret = trace_seq_puts(s,
528 "\n ------------------------------------------\n\n"); 528 "\n ------------------------------------------\n\n");
529 if (!ret) 529 if (!ret)
530 return TRACE_TYPE_PARTIAL_LINE; 530 return TRACE_TYPE_PARTIAL_LINE;
@@ -645,7 +645,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
645 ret = print_graph_proc(s, pid); 645 ret = print_graph_proc(s, pid);
646 if (ret == TRACE_TYPE_PARTIAL_LINE) 646 if (ret == TRACE_TYPE_PARTIAL_LINE)
647 return TRACE_TYPE_PARTIAL_LINE; 647 return TRACE_TYPE_PARTIAL_LINE;
648 ret = trace_seq_printf(s, " | "); 648 ret = trace_seq_puts(s, " | ");
649 if (!ret) 649 if (!ret)
650 return TRACE_TYPE_PARTIAL_LINE; 650 return TRACE_TYPE_PARTIAL_LINE;
651 } 651 }
@@ -657,9 +657,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
657 return ret; 657 return ret;
658 658
659 if (type == TRACE_GRAPH_ENT) 659 if (type == TRACE_GRAPH_ENT)
660 ret = trace_seq_printf(s, "==========>"); 660 ret = trace_seq_puts(s, "==========>");
661 else 661 else
662 ret = trace_seq_printf(s, "<=========="); 662 ret = trace_seq_puts(s, "<==========");
663 663
664 if (!ret) 664 if (!ret)
665 return TRACE_TYPE_PARTIAL_LINE; 665 return TRACE_TYPE_PARTIAL_LINE;
@@ -668,7 +668,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
668 if (ret != TRACE_TYPE_HANDLED) 668 if (ret != TRACE_TYPE_HANDLED)
669 return ret; 669 return ret;
670 670
671 ret = trace_seq_printf(s, "\n"); 671 ret = trace_seq_putc(s, '\n');
672 672
673 if (!ret) 673 if (!ret)
674 return TRACE_TYPE_PARTIAL_LINE; 674 return TRACE_TYPE_PARTIAL_LINE;
@@ -705,13 +705,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
705 len += strlen(nsecs_str); 705 len += strlen(nsecs_str);
706 } 706 }
707 707
708 ret = trace_seq_printf(s, " us "); 708 ret = trace_seq_puts(s, " us ");
709 if (!ret) 709 if (!ret)
710 return TRACE_TYPE_PARTIAL_LINE; 710 return TRACE_TYPE_PARTIAL_LINE;
711 711
712 /* Print remaining spaces to fit the row's width */ 712 /* Print remaining spaces to fit the row's width */
713 for (i = len; i < 7; i++) { 713 for (i = len; i < 7; i++) {
714 ret = trace_seq_printf(s, " "); 714 ret = trace_seq_putc(s, ' ');
715 if (!ret) 715 if (!ret)
716 return TRACE_TYPE_PARTIAL_LINE; 716 return TRACE_TYPE_PARTIAL_LINE;
717 } 717 }
@@ -731,13 +731,13 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
731 /* No real adata, just filling the column with spaces */ 731 /* No real adata, just filling the column with spaces */
732 switch (duration) { 732 switch (duration) {
733 case DURATION_FILL_FULL: 733 case DURATION_FILL_FULL:
734 ret = trace_seq_printf(s, " | "); 734 ret = trace_seq_puts(s, " | ");
735 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 735 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
736 case DURATION_FILL_START: 736 case DURATION_FILL_START:
737 ret = trace_seq_printf(s, " "); 737 ret = trace_seq_puts(s, " ");
738 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 738 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
739 case DURATION_FILL_END: 739 case DURATION_FILL_END:
740 ret = trace_seq_printf(s, " |"); 740 ret = trace_seq_puts(s, " |");
741 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 741 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
742 } 742 }
743 743
@@ -745,10 +745,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
745 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { 745 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
746 /* Duration exceeded 100 msecs */ 746 /* Duration exceeded 100 msecs */
747 if (duration > 100000ULL) 747 if (duration > 100000ULL)
748 ret = trace_seq_printf(s, "! "); 748 ret = trace_seq_puts(s, "! ");
749 /* Duration exceeded 10 msecs */ 749 /* Duration exceeded 10 msecs */
750 else if (duration > 10000ULL) 750 else if (duration > 10000ULL)
751 ret = trace_seq_printf(s, "+ "); 751 ret = trace_seq_puts(s, "+ ");
752 } 752 }
753 753
754 /* 754 /*
@@ -757,7 +757,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
757 * to fill out the space. 757 * to fill out the space.
758 */ 758 */
759 if (ret == -1) 759 if (ret == -1)
760 ret = trace_seq_printf(s, " "); 760 ret = trace_seq_puts(s, " ");
761 761
762 /* Catching here any failure happenned above */ 762 /* Catching here any failure happenned above */
763 if (!ret) 763 if (!ret)
@@ -767,7 +767,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
767 if (ret != TRACE_TYPE_HANDLED) 767 if (ret != TRACE_TYPE_HANDLED)
768 return ret; 768 return ret;
769 769
770 ret = trace_seq_printf(s, "| "); 770 ret = trace_seq_puts(s, "| ");
771 if (!ret) 771 if (!ret)
772 return TRACE_TYPE_PARTIAL_LINE; 772 return TRACE_TYPE_PARTIAL_LINE;
773 773
@@ -817,7 +817,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
817 817
818 /* Function */ 818 /* Function */
819 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 819 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
820 ret = trace_seq_printf(s, " "); 820 ret = trace_seq_putc(s, ' ');
821 if (!ret) 821 if (!ret)
822 return TRACE_TYPE_PARTIAL_LINE; 822 return TRACE_TYPE_PARTIAL_LINE;
823 } 823 }
@@ -858,7 +858,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
858 858
859 /* Function */ 859 /* Function */
860 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 860 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
861 ret = trace_seq_printf(s, " "); 861 ret = trace_seq_putc(s, ' ');
862 if (!ret) 862 if (!ret)
863 return TRACE_TYPE_PARTIAL_LINE; 863 return TRACE_TYPE_PARTIAL_LINE;
864 } 864 }
@@ -917,7 +917,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
917 if (ret == TRACE_TYPE_PARTIAL_LINE) 917 if (ret == TRACE_TYPE_PARTIAL_LINE)
918 return TRACE_TYPE_PARTIAL_LINE; 918 return TRACE_TYPE_PARTIAL_LINE;
919 919
920 ret = trace_seq_printf(s, " | "); 920 ret = trace_seq_puts(s, " | ");
921 if (!ret) 921 if (!ret)
922 return TRACE_TYPE_PARTIAL_LINE; 922 return TRACE_TYPE_PARTIAL_LINE;
923 } 923 }
@@ -1117,7 +1117,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
1117 1117
1118 /* Closing brace */ 1118 /* Closing brace */
1119 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 1119 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
1120 ret = trace_seq_printf(s, " "); 1120 ret = trace_seq_putc(s, ' ');
1121 if (!ret) 1121 if (!ret)
1122 return TRACE_TYPE_PARTIAL_LINE; 1122 return TRACE_TYPE_PARTIAL_LINE;
1123 } 1123 }
@@ -1129,7 +1129,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
1129 * belongs to, write out the function name. 1129 * belongs to, write out the function name.
1130 */ 1130 */
1131 if (func_match) { 1131 if (func_match) {
1132 ret = trace_seq_printf(s, "}\n"); 1132 ret = trace_seq_puts(s, "}\n");
1133 if (!ret) 1133 if (!ret)
1134 return TRACE_TYPE_PARTIAL_LINE; 1134 return TRACE_TYPE_PARTIAL_LINE;
1135 } else { 1135 } else {
@@ -1179,13 +1179,13 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1179 /* Indentation */ 1179 /* Indentation */
1180 if (depth > 0) 1180 if (depth > 0)
1181 for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { 1181 for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
1182 ret = trace_seq_printf(s, " "); 1182 ret = trace_seq_putc(s, ' ');
1183 if (!ret) 1183 if (!ret)
1184 return TRACE_TYPE_PARTIAL_LINE; 1184 return TRACE_TYPE_PARTIAL_LINE;
1185 } 1185 }
1186 1186
1187 /* The comment */ 1187 /* The comment */
1188 ret = trace_seq_printf(s, "/* "); 1188 ret = trace_seq_puts(s, "/* ");
1189 if (!ret) 1189 if (!ret)
1190 return TRACE_TYPE_PARTIAL_LINE; 1190 return TRACE_TYPE_PARTIAL_LINE;
1191 1191
@@ -1216,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1216 s->len--; 1216 s->len--;
1217 } 1217 }
1218 1218
1219 ret = trace_seq_printf(s, " */\n"); 1219 ret = trace_seq_puts(s, " */\n");
1220 if (!ret) 1220 if (!ret)
1221 return TRACE_TYPE_PARTIAL_LINE; 1221 return TRACE_TYPE_PARTIAL_LINE;
1222 1222
@@ -1448,7 +1448,7 @@ static struct trace_event graph_trace_ret_event = {
1448 .funcs = &graph_functions 1448 .funcs = &graph_functions
1449}; 1449};
1450 1450
1451static struct tracer graph_trace __read_mostly = { 1451static struct tracer graph_trace __tracer_data = {
1452 .name = "function_graph", 1452 .name = "function_graph",
1453 .open = graph_trace_open, 1453 .open = graph_trace_open,
1454 .pipe_open = graph_trace_open, 1454 .pipe_open = graph_trace_open,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ed6976493c8..243f6834d026 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -95,7 +95,7 @@ static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
95} 95}
96 96
97static int register_probe_event(struct trace_probe *tp); 97static int register_probe_event(struct trace_probe *tp);
98static void unregister_probe_event(struct trace_probe *tp); 98static int unregister_probe_event(struct trace_probe *tp);
99 99
100static DEFINE_MUTEX(probe_lock); 100static DEFINE_MUTEX(probe_lock);
101static LIST_HEAD(probe_list); 101static LIST_HEAD(probe_list);
@@ -243,11 +243,11 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
243static int 243static int
244disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) 244disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
245{ 245{
246 struct event_file_link *link = NULL;
247 int wait = 0;
246 int ret = 0; 248 int ret = 0;
247 249
248 if (file) { 250 if (file) {
249 struct event_file_link *link;
250
251 link = find_event_file_link(tp, file); 251 link = find_event_file_link(tp, file);
252 if (!link) { 252 if (!link) {
253 ret = -EINVAL; 253 ret = -EINVAL;
@@ -255,10 +255,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
255 } 255 }
256 256
257 list_del_rcu(&link->list); 257 list_del_rcu(&link->list);
258 /* synchronize with kprobe_trace_func/kretprobe_trace_func */ 258 wait = 1;
259 synchronize_sched();
260 kfree(link);
261
262 if (!list_empty(&tp->files)) 259 if (!list_empty(&tp->files))
263 goto out; 260 goto out;
264 261
@@ -271,8 +268,22 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
271 disable_kretprobe(&tp->rp); 268 disable_kretprobe(&tp->rp);
272 else 269 else
273 disable_kprobe(&tp->rp.kp); 270 disable_kprobe(&tp->rp.kp);
271 wait = 1;
274 } 272 }
275 out: 273 out:
274 if (wait) {
275 /*
276 * Synchronize with kprobe_trace_func/kretprobe_trace_func
277 * to ensure disabled (all running handlers are finished).
278 * This is not only for kfree(), but also the caller,
279 * trace_remove_event_call() supposes it for releasing
280 * event_call related objects, which will be accessed in
281 * the kprobe_trace_func/kretprobe_trace_func.
282 */
283 synchronize_sched();
284 kfree(link); /* Ignored if link == NULL */
285 }
286
276 return ret; 287 return ret;
277} 288}
278 289
@@ -340,9 +351,12 @@ static int unregister_trace_probe(struct trace_probe *tp)
340 if (trace_probe_is_enabled(tp)) 351 if (trace_probe_is_enabled(tp))
341 return -EBUSY; 352 return -EBUSY;
342 353
354 /* Will fail if probe is being used by ftrace or perf */
355 if (unregister_probe_event(tp))
356 return -EBUSY;
357
343 __unregister_trace_probe(tp); 358 __unregister_trace_probe(tp);
344 list_del(&tp->list); 359 list_del(&tp->list);
345 unregister_probe_event(tp);
346 360
347 return 0; 361 return 0;
348} 362}
@@ -621,7 +635,9 @@ static int release_all_trace_probes(void)
621 /* TODO: Use batch unregistration */ 635 /* TODO: Use batch unregistration */
622 while (!list_empty(&probe_list)) { 636 while (!list_empty(&probe_list)) {
623 tp = list_entry(probe_list.next, struct trace_probe, list); 637 tp = list_entry(probe_list.next, struct trace_probe, list);
624 unregister_trace_probe(tp); 638 ret = unregister_trace_probe(tp);
639 if (ret)
640 goto end;
625 free_trace_probe(tp); 641 free_trace_probe(tp);
626 } 642 }
627 643
@@ -1087,9 +1103,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
1087 __size = sizeof(*entry) + tp->size + dsize; 1103 __size = sizeof(*entry) + tp->size + dsize;
1088 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1104 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1089 size -= sizeof(u32); 1105 size -= sizeof(u32);
1090 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1091 "profile buffer not large enough"))
1092 return;
1093 1106
1094 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1107 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1095 if (!entry) 1108 if (!entry)
@@ -1120,9 +1133,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
1120 __size = sizeof(*entry) + tp->size + dsize; 1133 __size = sizeof(*entry) + tp->size + dsize;
1121 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1134 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1122 size -= sizeof(u32); 1135 size -= sizeof(u32);
1123 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1124 "profile buffer not large enough"))
1125 return;
1126 1136
1127 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1137 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1128 if (!entry) 1138 if (!entry)
@@ -1242,11 +1252,15 @@ static int register_probe_event(struct trace_probe *tp)
1242 return ret; 1252 return ret;
1243} 1253}
1244 1254
1245static void unregister_probe_event(struct trace_probe *tp) 1255static int unregister_probe_event(struct trace_probe *tp)
1246{ 1256{
1257 int ret;
1258
1247 /* tp->event is unregistered in trace_remove_event_call() */ 1259 /* tp->event is unregistered in trace_remove_event_call() */
1248 trace_remove_event_call(&tp->call); 1260 ret = trace_remove_event_call(&tp->call);
1249 kfree(tp->call.print_fmt); 1261 if (!ret)
1262 kfree(tp->call.print_fmt);
1263 return ret;
1250} 1264}
1251 1265
1252/* Make a debugfs interface for controlling probe points */ 1266/* Make a debugfs interface for controlling probe points */
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index a5e8f4878bfa..b3dcfb2f0fef 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -90,7 +90,7 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
90 if (drv) 90 if (drv)
91 ret += trace_seq_printf(s, " %s\n", drv->name); 91 ret += trace_seq_printf(s, " %s\n", drv->name);
92 else 92 else
93 ret += trace_seq_printf(s, " \n"); 93 ret += trace_seq_puts(s, " \n");
94 return ret; 94 return ret;
95} 95}
96 96
@@ -107,7 +107,7 @@ static void mmio_pipe_open(struct trace_iterator *iter)
107 struct header_iter *hiter; 107 struct header_iter *hiter;
108 struct trace_seq *s = &iter->seq; 108 struct trace_seq *s = &iter->seq;
109 109
110 trace_seq_printf(s, "VERSION 20070824\n"); 110 trace_seq_puts(s, "VERSION 20070824\n");
111 111
112 hiter = kzalloc(sizeof(*hiter), GFP_KERNEL); 112 hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
113 if (!hiter) 113 if (!hiter)
@@ -209,7 +209,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
209 (rw->value >> 0) & 0xff, rw->pc, 0); 209 (rw->value >> 0) & 0xff, rw->pc, 0);
210 break; 210 break;
211 default: 211 default:
212 ret = trace_seq_printf(s, "rw what?\n"); 212 ret = trace_seq_puts(s, "rw what?\n");
213 break; 213 break;
214 } 214 }
215 if (ret) 215 if (ret)
@@ -245,7 +245,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
245 secs, usec_rem, m->map_id, 0UL, 0); 245 secs, usec_rem, m->map_id, 0UL, 0);
246 break; 246 break;
247 default: 247 default:
248 ret = trace_seq_printf(s, "map what?\n"); 248 ret = trace_seq_puts(s, "map what?\n");
249 break; 249 break;
250 } 250 }
251 if (ret) 251 if (ret)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bb922d9ee51b..34e7cbac0c9c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -78,7 +78,7 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
78 78
79 trace_assign_type(field, entry); 79 trace_assign_type(field, entry);
80 80
81 ret = trace_seq_printf(s, "%s", field->buf); 81 ret = trace_seq_puts(s, field->buf);
82 if (!ret) 82 if (!ret)
83 return TRACE_TYPE_PARTIAL_LINE; 83 return TRACE_TYPE_PARTIAL_LINE;
84 84
@@ -558,14 +558,14 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
558 if (ret) 558 if (ret)
559 ret = trace_seq_puts(s, "??"); 559 ret = trace_seq_puts(s, "??");
560 if (ret) 560 if (ret)
561 ret = trace_seq_puts(s, "\n"); 561 ret = trace_seq_putc(s, '\n');
562 continue; 562 continue;
563 } 563 }
564 if (!ret) 564 if (!ret)
565 break; 565 break;
566 if (ret) 566 if (ret)
567 ret = seq_print_user_ip(s, mm, ip, sym_flags); 567 ret = seq_print_user_ip(s, mm, ip, sym_flags);
568 ret = trace_seq_puts(s, "\n"); 568 ret = trace_seq_putc(s, '\n');
569 } 569 }
570 570
571 if (mm) 571 if (mm)
@@ -579,7 +579,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
579 int ret; 579 int ret;
580 580
581 if (!ip) 581 if (!ip)
582 return trace_seq_printf(s, "0"); 582 return trace_seq_putc(s, '0');
583 583
584 if (sym_flags & TRACE_ITER_SYM_OFFSET) 584 if (sym_flags & TRACE_ITER_SYM_OFFSET)
585 ret = seq_print_sym_offset(s, "%s", ip); 585 ret = seq_print_sym_offset(s, "%s", ip);
@@ -964,14 +964,14 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
964 goto partial; 964 goto partial;
965 965
966 if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { 966 if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
967 if (!trace_seq_printf(s, " <-")) 967 if (!trace_seq_puts(s, " <-"))
968 goto partial; 968 goto partial;
969 if (!seq_print_ip_sym(s, 969 if (!seq_print_ip_sym(s,
970 field->parent_ip, 970 field->parent_ip,
971 flags)) 971 flags))
972 goto partial; 972 goto partial;
973 } 973 }
974 if (!trace_seq_printf(s, "\n")) 974 if (!trace_seq_putc(s, '\n'))
975 goto partial; 975 goto partial;
976 976
977 return TRACE_TYPE_HANDLED; 977 return TRACE_TYPE_HANDLED;
@@ -1210,7 +1210,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1210 1210
1211 if (!seq_print_ip_sym(s, *p, flags)) 1211 if (!seq_print_ip_sym(s, *p, flags))
1212 goto partial; 1212 goto partial;
1213 if (!trace_seq_puts(s, "\n")) 1213 if (!trace_seq_putc(s, '\n'))
1214 goto partial; 1214 goto partial;
1215 } 1215 }
1216 1216
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 322e16461072..8fd03657bc7d 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -175,7 +175,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
175 entry = syscall_nr_to_meta(syscall); 175 entry = syscall_nr_to_meta(syscall);
176 176
177 if (!entry) { 177 if (!entry) {
178 trace_seq_printf(s, "\n"); 178 trace_seq_putc(s, '\n');
179 return TRACE_TYPE_HANDLED; 179 return TRACE_TYPE_HANDLED;
180 } 180 }
181 181
@@ -566,15 +566,15 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
566 if (!sys_data) 566 if (!sys_data)
567 return; 567 return;
568 568
569 head = this_cpu_ptr(sys_data->enter_event->perf_events);
570 if (hlist_empty(head))
571 return;
572
569 /* get the size after alignment with the u32 buffer size field */ 573 /* get the size after alignment with the u32 buffer size field */
570 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); 574 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
571 size = ALIGN(size + sizeof(u32), sizeof(u64)); 575 size = ALIGN(size + sizeof(u32), sizeof(u64));
572 size -= sizeof(u32); 576 size -= sizeof(u32);
573 577
574 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
575 "perf buffer not large enough"))
576 return;
577
578 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 578 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
579 sys_data->enter_event->event.type, regs, &rctx); 579 sys_data->enter_event->event.type, regs, &rctx);
580 if (!rec) 580 if (!rec)
@@ -583,8 +583,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
583 rec->nr = syscall_nr; 583 rec->nr = syscall_nr;
584 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 584 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
585 (unsigned long *)&rec->args); 585 (unsigned long *)&rec->args);
586
587 head = this_cpu_ptr(sys_data->enter_event->perf_events);
588 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); 586 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
589} 587}
590 588
@@ -642,18 +640,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
642 if (!sys_data) 640 if (!sys_data)
643 return; 641 return;
644 642
643 head = this_cpu_ptr(sys_data->exit_event->perf_events);
644 if (hlist_empty(head))
645 return;
646
645 /* We can probably do that at build time */ 647 /* We can probably do that at build time */
646 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); 648 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
647 size -= sizeof(u32); 649 size -= sizeof(u32);
648 650
649 /*
650 * Impossible, but be paranoid with the future
651 * How to put this check outside runtime?
652 */
653 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
654 "exit event has grown above perf buffer size"))
655 return;
656
657 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 651 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
658 sys_data->exit_event->event.type, regs, &rctx); 652 sys_data->exit_event->event.type, regs, &rctx);
659 if (!rec) 653 if (!rec)
@@ -661,8 +655,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
661 655
662 rec->nr = syscall_nr; 656 rec->nr = syscall_nr;
663 rec->ret = syscall_get_return_value(current, regs); 657 rec->ret = syscall_get_return_value(current, regs);
664
665 head = this_cpu_ptr(sys_data->exit_event->perf_events);
666 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); 658 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
667} 659}
668 660
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d5d0cd368a56..272261b5f94f 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -70,7 +70,7 @@ struct trace_uprobe {
70 (sizeof(struct probe_arg) * (n))) 70 (sizeof(struct probe_arg) * (n)))
71 71
72static int register_uprobe_event(struct trace_uprobe *tu); 72static int register_uprobe_event(struct trace_uprobe *tu);
73static void unregister_uprobe_event(struct trace_uprobe *tu); 73static int unregister_uprobe_event(struct trace_uprobe *tu);
74 74
75static DEFINE_MUTEX(uprobe_lock); 75static DEFINE_MUTEX(uprobe_lock);
76static LIST_HEAD(uprobe_list); 76static LIST_HEAD(uprobe_list);
@@ -164,11 +164,17 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
164} 164}
165 165
166/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */ 166/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
167static void unregister_trace_uprobe(struct trace_uprobe *tu) 167static int unregister_trace_uprobe(struct trace_uprobe *tu)
168{ 168{
169 int ret;
170
171 ret = unregister_uprobe_event(tu);
172 if (ret)
173 return ret;
174
169 list_del(&tu->list); 175 list_del(&tu->list);
170 unregister_uprobe_event(tu);
171 free_trace_uprobe(tu); 176 free_trace_uprobe(tu);
177 return 0;
172} 178}
173 179
174/* Register a trace_uprobe and probe_event */ 180/* Register a trace_uprobe and probe_event */
@@ -181,9 +187,12 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
181 187
182 /* register as an event */ 188 /* register as an event */
183 old_tp = find_probe_event(tu->call.name, tu->call.class->system); 189 old_tp = find_probe_event(tu->call.name, tu->call.class->system);
184 if (old_tp) 190 if (old_tp) {
185 /* delete old event */ 191 /* delete old event */
186 unregister_trace_uprobe(old_tp); 192 ret = unregister_trace_uprobe(old_tp);
193 if (ret)
194 goto end;
195 }
187 196
188 ret = register_uprobe_event(tu); 197 ret = register_uprobe_event(tu);
189 if (ret) { 198 if (ret) {
@@ -256,6 +265,8 @@ static int create_trace_uprobe(int argc, char **argv)
256 group = UPROBE_EVENT_SYSTEM; 265 group = UPROBE_EVENT_SYSTEM;
257 266
258 if (is_delete) { 267 if (is_delete) {
268 int ret;
269
259 if (!event) { 270 if (!event) {
260 pr_info("Delete command needs an event name.\n"); 271 pr_info("Delete command needs an event name.\n");
261 return -EINVAL; 272 return -EINVAL;
@@ -269,9 +280,9 @@ static int create_trace_uprobe(int argc, char **argv)
269 return -ENOENT; 280 return -ENOENT;
270 } 281 }
271 /* delete an event */ 282 /* delete an event */
272 unregister_trace_uprobe(tu); 283 ret = unregister_trace_uprobe(tu);
273 mutex_unlock(&uprobe_lock); 284 mutex_unlock(&uprobe_lock);
274 return 0; 285 return ret;
275 } 286 }
276 287
277 if (argc < 2) { 288 if (argc < 2) {
@@ -408,16 +419,20 @@ fail_address_parse:
408 return ret; 419 return ret;
409} 420}
410 421
411static void cleanup_all_probes(void) 422static int cleanup_all_probes(void)
412{ 423{
413 struct trace_uprobe *tu; 424 struct trace_uprobe *tu;
425 int ret = 0;
414 426
415 mutex_lock(&uprobe_lock); 427 mutex_lock(&uprobe_lock);
416 while (!list_empty(&uprobe_list)) { 428 while (!list_empty(&uprobe_list)) {
417 tu = list_entry(uprobe_list.next, struct trace_uprobe, list); 429 tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
418 unregister_trace_uprobe(tu); 430 ret = unregister_trace_uprobe(tu);
431 if (ret)
432 break;
419 } 433 }
420 mutex_unlock(&uprobe_lock); 434 mutex_unlock(&uprobe_lock);
435 return ret;
421} 436}
422 437
423/* Probes listing interfaces */ 438/* Probes listing interfaces */
@@ -462,8 +477,13 @@ static const struct seq_operations probes_seq_op = {
462 477
463static int probes_open(struct inode *inode, struct file *file) 478static int probes_open(struct inode *inode, struct file *file)
464{ 479{
465 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 480 int ret;
466 cleanup_all_probes(); 481
482 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
483 ret = cleanup_all_probes();
484 if (ret)
485 return ret;
486 }
467 487
468 return seq_open(file, &probes_seq_op); 488 return seq_open(file, &probes_seq_op);
469} 489}
@@ -818,8 +838,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
818 838
819 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); 839 size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
820 size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32); 840 size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
821 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
822 return;
823 841
824 preempt_disable(); 842 preempt_disable();
825 head = this_cpu_ptr(call->perf_events); 843 head = this_cpu_ptr(call->perf_events);
@@ -970,12 +988,17 @@ static int register_uprobe_event(struct trace_uprobe *tu)
970 return ret; 988 return ret;
971} 989}
972 990
973static void unregister_uprobe_event(struct trace_uprobe *tu) 991static int unregister_uprobe_event(struct trace_uprobe *tu)
974{ 992{
993 int ret;
994
975 /* tu->event is unregistered in trace_remove_event_call() */ 995 /* tu->event is unregistered in trace_remove_event_call() */
976 trace_remove_event_call(&tu->call); 996 ret = trace_remove_event_call(&tu->call);
997 if (ret)
998 return ret;
977 kfree(tu->call.print_fmt); 999 kfree(tu->call.print_fmt);
978 tu->call.print_fmt = NULL; 1000 tu->call.print_fmt = NULL;
1001 return 0;
979} 1002}
980 1003
981/* Make a trace interface for controling probe points */ 1004/* Make a trace interface for controling probe points */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index d8c30db06c5b..9064b919a406 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -62,6 +62,9 @@ int create_user_ns(struct cred *new)
62 kgid_t group = new->egid; 62 kgid_t group = new->egid;
63 int ret; 63 int ret;
64 64
65 if (parent_ns->level > 32)
66 return -EUSERS;
67
65 /* 68 /*
66 * Verify that we can not violate the policy of which files 69 * Verify that we can not violate the policy of which files
67 * may be accessed that is specified by the root directory, 70 * may be accessed that is specified by the root directory,
@@ -92,6 +95,7 @@ int create_user_ns(struct cred *new)
92 atomic_set(&ns->count, 1); 95 atomic_set(&ns->count, 1);
93 /* Leave the new->user_ns reference with the new user namespace. */ 96 /* Leave the new->user_ns reference with the new user namespace. */
94 ns->parent = parent_ns; 97 ns->parent = parent_ns;
98 ns->level = parent_ns->level + 1;
95 ns->owner = owner; 99 ns->owner = owner;
96 ns->group = group; 100 ns->group = group;
97 101
@@ -105,16 +109,21 @@ int create_user_ns(struct cred *new)
105int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) 109int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
106{ 110{
107 struct cred *cred; 111 struct cred *cred;
112 int err = -ENOMEM;
108 113
109 if (!(unshare_flags & CLONE_NEWUSER)) 114 if (!(unshare_flags & CLONE_NEWUSER))
110 return 0; 115 return 0;
111 116
112 cred = prepare_creds(); 117 cred = prepare_creds();
113 if (!cred) 118 if (cred) {
114 return -ENOMEM; 119 err = create_user_ns(cred);
120 if (err)
121 put_cred(cred);
122 else
123 *new_cred = cred;
124 }
115 125
116 *new_cred = cred; 126 return err;
117 return create_user_ns(cred);
118} 127}
119 128
120void free_user_ns(struct user_namespace *ns) 129void free_user_ns(struct user_namespace *ns)
diff --git a/kernel/wait.c b/kernel/wait.c
index ce0daa320a26..d550920e040c 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -333,7 +333,8 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
333 prepare_to_wait(wq, &q->wait, mode); 333 prepare_to_wait(wq, &q->wait, mode);
334 val = q->key.flags; 334 val = q->key.flags;
335 if (atomic_read(val) == 0) 335 if (atomic_read(val) == 0)
336 ret = (*action)(val); 336 break;
337 ret = (*action)(val);
337 } while (!ret && atomic_read(val) != 0); 338 } while (!ret && atomic_read(val) != 0);
338 finish_wait(wq, &q->wait); 339 finish_wait(wq, &q->wait);
339 return ret; 340 return ret;
@@ -362,8 +363,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
362 363
363/** 364/**
364 * wake_up_atomic_t - Wake up a waiter on a atomic_t 365 * wake_up_atomic_t - Wake up a waiter on a atomic_t
365 * @word: The word being waited on, a kernel virtual address 366 * @p: The atomic_t being waited on, a kernel virtual address
366 * @bit: The bit of the word being waited on
367 * 367 *
368 * Wake up anyone waiting for the atomic_t to go to zero. 368 * Wake up anyone waiting for the atomic_t to go to zero.
369 * 369 *
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0b72e816b8d0..7f5d4be22034 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2817,6 +2817,19 @@ already_gone:
2817 return false; 2817 return false;
2818} 2818}
2819 2819
2820static bool __flush_work(struct work_struct *work)
2821{
2822 struct wq_barrier barr;
2823
2824 if (start_flush_work(work, &barr)) {
2825 wait_for_completion(&barr.done);
2826 destroy_work_on_stack(&barr.work);
2827 return true;
2828 } else {
2829 return false;
2830 }
2831}
2832
2820/** 2833/**
2821 * flush_work - wait for a work to finish executing the last queueing instance 2834 * flush_work - wait for a work to finish executing the last queueing instance
2822 * @work: the work to flush 2835 * @work: the work to flush
@@ -2830,18 +2843,10 @@ already_gone:
2830 */ 2843 */
2831bool flush_work(struct work_struct *work) 2844bool flush_work(struct work_struct *work)
2832{ 2845{
2833 struct wq_barrier barr;
2834
2835 lock_map_acquire(&work->lockdep_map); 2846 lock_map_acquire(&work->lockdep_map);
2836 lock_map_release(&work->lockdep_map); 2847 lock_map_release(&work->lockdep_map);
2837 2848
2838 if (start_flush_work(work, &barr)) { 2849 return __flush_work(work);
2839 wait_for_completion(&barr.done);
2840 destroy_work_on_stack(&barr.work);
2841 return true;
2842 } else {
2843 return false;
2844 }
2845} 2850}
2846EXPORT_SYMBOL_GPL(flush_work); 2851EXPORT_SYMBOL_GPL(flush_work);
2847 2852
@@ -3411,6 +3416,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
3411{ 3416{
3412 to->nice = from->nice; 3417 to->nice = from->nice;
3413 cpumask_copy(to->cpumask, from->cpumask); 3418 cpumask_copy(to->cpumask, from->cpumask);
3419 /*
3420 * Unlike hash and equality test, this function doesn't ignore
3421 * ->no_numa as it is used for both pool and wq attrs. Instead,
3422 * get_unbound_pool() explicitly clears ->no_numa after copying.
3423 */
3424 to->no_numa = from->no_numa;
3414} 3425}
3415 3426
3416/* hash value of the content of @attr */ 3427/* hash value of the content of @attr */
@@ -3578,6 +3589,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3578 lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */ 3589 lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
3579 copy_workqueue_attrs(pool->attrs, attrs); 3590 copy_workqueue_attrs(pool->attrs, attrs);
3580 3591
3592 /*
3593 * no_numa isn't a worker_pool attribute, always clear it. See
3594 * 'struct workqueue_attrs' comments for detail.
3595 */
3596 pool->attrs->no_numa = false;
3597
3581 /* if cpumask is contained inside a NUMA node, we belong to that node */ 3598 /* if cpumask is contained inside a NUMA node, we belong to that node */
3582 if (wq_numa_enabled) { 3599 if (wq_numa_enabled) {
3583 for_each_node(node) { 3600 for_each_node(node) {
@@ -4756,7 +4773,14 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
4756 4773
4757 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); 4774 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4758 schedule_work_on(cpu, &wfc.work); 4775 schedule_work_on(cpu, &wfc.work);
4759 flush_work(&wfc.work); 4776
4777 /*
4778 * The work item is on-stack and can't lead to deadlock through
4779 * flushing. Use __flush_work() to avoid spurious lockdep warnings
4780 * when work_on_cpu()s are nested.
4781 */
4782 __flush_work(&wfc.work);
4783
4760 return wfc.ret; 4784 return wfc.ret;
4761} 4785}
4762EXPORT_SYMBOL_GPL(work_on_cpu); 4786EXPORT_SYMBOL_GPL(work_on_cpu);