aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-02-24 02:41:07 -0500
committerIngo Molnar <mingo@kernel.org>2015-02-24 02:41:07 -0500
commit2ae79026818e7d49fead82b79b1a543e3b9c8a23 (patch)
treec7ee7bd8b37b0880918d361839fd95988fac2dac /kernel
parent1a99367023f6ac664365a37fa508b059e31d0e88 (diff)
parentc517d838eb7d07bbe9507871fab3931deccff539 (diff)
Merge tag 'v4.0-rc1' into locking/core, to refresh the tree before merging new changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile7
-rw-r--r--kernel/acct.c94
-rw-r--r--kernel/audit.h17
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/auditsc.c176
-rw-r--r--kernel/cgroup.c12
-rw-r--r--kernel/compat.c5
-rw-r--r--kernel/cpuset.c44
-rw-r--r--kernel/debug/debug_core.c19
-rw-r--r--kernel/debug/kdb/kdb_io.c46
-rw-r--r--kernel/debug/kdb/kdb_main.c16
-rw-r--r--kernel/debug/kdb/kdb_private.h4
-rw-r--r--kernel/events/Makefile2
-rw-r--r--kernel/events/core.c26
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c17
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/gcov/Makefile36
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/irq/proc.c11
-rw-r--r--kernel/kexec.c25
-rw-r--r--kernel/kprobes.c22
-rw-r--r--kernel/livepatch/Kconfig18
-rw-r--r--kernel/livepatch/Makefile3
-rw-r--r--kernel/livepatch/core.c1015
-rw-r--r--kernel/locking/Makefile8
-rw-r--r--kernel/locking/spinlock.c8
-rw-r--r--kernel/module.c58
-rw-r--r--kernel/padata.c11
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/power/process.c75
-rw-r--r--kernel/power/qos.c91
-rw-r--r--kernel/power/snapshot.c11
-rw-r--r--kernel/power/suspend.c43
-rw-r--r--kernel/printk/printk.c14
-rw-r--r--kernel/profile.c3
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/rcu/tree_plugin.h6
-rw-r--r--kernel/resource.c25
-rw-r--r--kernel/sched/Makefile2
-rw-r--r--kernel/sched/auto_group.c6
-rw-r--r--kernel/sched/clock.c13
-rw-r--r--kernel/sched/completion.c19
-rw-r--r--kernel/sched/core.c123
-rw-r--r--kernel/sched/deadline.c33
-rw-r--r--kernel/sched/idle.c16
-rw-r--r--kernel/sched/sched.h76
-rw-r--r--kernel/sched/stats.c11
-rw-r--r--kernel/seccomp.c4
-rw-r--r--kernel/signal.c6
-rw-r--r--kernel/sys.c12
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/taskstats.c13
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/clocksource.c76
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/time/ntp.c10
-rw-r--r--kernel/time/posix-cpu-timers.c3
-rw-r--r--kernel/time/tick-common.c50
-rw-r--r--kernel/time/tick-sched.c11
-rw-r--r--kernel/time/timecounter.c112
-rw-r--r--kernel/time/timekeeping.c48
-rw-r--r--kernel/time/timekeeping.h2
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/trace/power-traces.c1
-rw-r--r--kernel/trace/ring_buffer.c42
-rw-r--r--kernel/trace/ring_buffer_benchmark.c18
-rw-r--r--kernel/trace/trace.c194
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_branch.c1
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--kernel/trace/trace_irqsoff.c2
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_nop.c2
-rw-r--r--kernel/trace/trace_output.c44
-rw-r--r--kernel/trace/trace_printk.c4
-rw-r--r--kernel/trace/trace_sched_switch.c2
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_seq.c2
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/trace/trace_stat.c2
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--kernel/watchdog.c2
-rw-r--r--kernel/workqueue.c5
88 files changed, 2124 insertions, 857 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index a59481a3fa6c..1408b3353a3c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,8 +13,8 @@ obj-y = fork.o exec_domain.o panic.o \
13 13
14ifdef CONFIG_FUNCTION_TRACER 14ifdef CONFIG_FUNCTION_TRACER
15# Do not trace debug files and internal ftrace files 15# Do not trace debug files and internal ftrace files
16CFLAGS_REMOVE_cgroup-debug.o = -pg 16CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE)
17CFLAGS_REMOVE_irq_work.o = -pg 17CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE)
18endif 18endif
19 19
20# cond_syscall is currently not LTO compatible 20# cond_syscall is currently not LTO compatible
@@ -26,6 +26,7 @@ obj-y += power/
26obj-y += printk/ 26obj-y += printk/
27obj-y += irq/ 27obj-y += irq/
28obj-y += rcu/ 28obj-y += rcu/
29obj-y += livepatch/
29 30
30obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o 31obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
31obj-$(CONFIG_FREEZER) += freezer.o 32obj-$(CONFIG_FREEZER) += freezer.o
@@ -142,7 +143,7 @@ endif
142kernel/system_certificates.o: $(obj)/x509_certificate_list 143kernel/system_certificates.o: $(obj)/x509_certificate_list
143 144
144quiet_cmd_x509certs = CERTS $@ 145quiet_cmd_x509certs = CERTS $@
145 cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)") 146 cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; $(kecho) " - Including cert $(X509)")
146 147
147targets += $(obj)/x509_certificate_list 148targets += $(obj)/x509_certificate_list
148$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list 149$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
diff --git a/kernel/acct.c b/kernel/acct.c
index 33738ef972f3..e6c10d1a4058 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -76,10 +76,11 @@ int acct_parm[3] = {4, 2, 30};
76/* 76/*
77 * External references and all of the globals. 77 * External references and all of the globals.
78 */ 78 */
79static void do_acct_process(struct bsd_acct_struct *acct);
80 79
81struct bsd_acct_struct { 80struct bsd_acct_struct {
82 struct fs_pin pin; 81 struct fs_pin pin;
82 atomic_long_t count;
83 struct rcu_head rcu;
83 struct mutex lock; 84 struct mutex lock;
84 int active; 85 int active;
85 unsigned long needcheck; 86 unsigned long needcheck;
@@ -89,6 +90,8 @@ struct bsd_acct_struct {
89 struct completion done; 90 struct completion done;
90}; 91};
91 92
93static void do_acct_process(struct bsd_acct_struct *acct);
94
92/* 95/*
93 * Check the amount of free space and suspend/resume accordingly. 96 * Check the amount of free space and suspend/resume accordingly.
94 */ 97 */
@@ -124,32 +127,56 @@ out:
124 return acct->active; 127 return acct->active;
125} 128}
126 129
130static void acct_put(struct bsd_acct_struct *p)
131{
132 if (atomic_long_dec_and_test(&p->count))
133 kfree_rcu(p, rcu);
134}
135
136static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
137{
138 return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
139}
140
127static struct bsd_acct_struct *acct_get(struct pid_namespace *ns) 141static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
128{ 142{
129 struct bsd_acct_struct *res; 143 struct bsd_acct_struct *res;
130again: 144again:
131 smp_rmb(); 145 smp_rmb();
132 rcu_read_lock(); 146 rcu_read_lock();
133 res = ACCESS_ONCE(ns->bacct); 147 res = to_acct(ACCESS_ONCE(ns->bacct));
134 if (!res) { 148 if (!res) {
135 rcu_read_unlock(); 149 rcu_read_unlock();
136 return NULL; 150 return NULL;
137 } 151 }
138 if (!atomic_long_inc_not_zero(&res->pin.count)) { 152 if (!atomic_long_inc_not_zero(&res->count)) {
139 rcu_read_unlock(); 153 rcu_read_unlock();
140 cpu_relax(); 154 cpu_relax();
141 goto again; 155 goto again;
142 } 156 }
143 rcu_read_unlock(); 157 rcu_read_unlock();
144 mutex_lock(&res->lock); 158 mutex_lock(&res->lock);
145 if (!res->ns) { 159 if (res != to_acct(ACCESS_ONCE(ns->bacct))) {
146 mutex_unlock(&res->lock); 160 mutex_unlock(&res->lock);
147 pin_put(&res->pin); 161 acct_put(res);
148 goto again; 162 goto again;
149 } 163 }
150 return res; 164 return res;
151} 165}
152 166
167static void acct_pin_kill(struct fs_pin *pin)
168{
169 struct bsd_acct_struct *acct = to_acct(pin);
170 mutex_lock(&acct->lock);
171 do_acct_process(acct);
172 schedule_work(&acct->work);
173 wait_for_completion(&acct->done);
174 cmpxchg(&acct->ns->bacct, pin, NULL);
175 mutex_unlock(&acct->lock);
176 pin_remove(pin);
177 acct_put(acct);
178}
179
153static void close_work(struct work_struct *work) 180static void close_work(struct work_struct *work)
154{ 181{
155 struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); 182 struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
@@ -160,44 +187,13 @@ static void close_work(struct work_struct *work)
160 complete(&acct->done); 187 complete(&acct->done);
161} 188}
162 189
163static void acct_kill(struct bsd_acct_struct *acct,
164 struct bsd_acct_struct *new)
165{
166 if (acct) {
167 struct pid_namespace *ns = acct->ns;
168 do_acct_process(acct);
169 INIT_WORK(&acct->work, close_work);
170 init_completion(&acct->done);
171 schedule_work(&acct->work);
172 wait_for_completion(&acct->done);
173 pin_remove(&acct->pin);
174 ns->bacct = new;
175 acct->ns = NULL;
176 atomic_long_dec(&acct->pin.count);
177 mutex_unlock(&acct->lock);
178 pin_put(&acct->pin);
179 }
180}
181
182static void acct_pin_kill(struct fs_pin *pin)
183{
184 struct bsd_acct_struct *acct;
185 acct = container_of(pin, struct bsd_acct_struct, pin);
186 mutex_lock(&acct->lock);
187 if (!acct->ns) {
188 mutex_unlock(&acct->lock);
189 pin_put(pin);
190 acct = NULL;
191 }
192 acct_kill(acct, NULL);
193}
194
195static int acct_on(struct filename *pathname) 190static int acct_on(struct filename *pathname)
196{ 191{
197 struct file *file; 192 struct file *file;
198 struct vfsmount *mnt, *internal; 193 struct vfsmount *mnt, *internal;
199 struct pid_namespace *ns = task_active_pid_ns(current); 194 struct pid_namespace *ns = task_active_pid_ns(current);
200 struct bsd_acct_struct *acct, *old; 195 struct bsd_acct_struct *acct;
196 struct fs_pin *old;
201 int err; 197 int err;
202 198
203 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); 199 acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
@@ -238,21 +234,21 @@ static int acct_on(struct filename *pathname)
238 mnt = file->f_path.mnt; 234 mnt = file->f_path.mnt;
239 file->f_path.mnt = internal; 235 file->f_path.mnt = internal;
240 236
241 atomic_long_set(&acct->pin.count, 1); 237 atomic_long_set(&acct->count, 1);
242 acct->pin.kill = acct_pin_kill; 238 init_fs_pin(&acct->pin, acct_pin_kill);
243 acct->file = file; 239 acct->file = file;
244 acct->needcheck = jiffies; 240 acct->needcheck = jiffies;
245 acct->ns = ns; 241 acct->ns = ns;
246 mutex_init(&acct->lock); 242 mutex_init(&acct->lock);
243 INIT_WORK(&acct->work, close_work);
244 init_completion(&acct->done);
247 mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */ 245 mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
248 pin_insert(&acct->pin, mnt); 246 pin_insert(&acct->pin, mnt);
249 247
250 old = acct_get(ns); 248 rcu_read_lock();
251 if (old) 249 old = xchg(&ns->bacct, &acct->pin);
252 acct_kill(old, acct);
253 else
254 ns->bacct = acct;
255 mutex_unlock(&acct->lock); 250 mutex_unlock(&acct->lock);
251 pin_kill(old);
256 mnt_drop_write(mnt); 252 mnt_drop_write(mnt);
257 mntput(mnt); 253 mntput(mnt);
258 return 0; 254 return 0;
@@ -288,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
288 mutex_unlock(&acct_on_mutex); 284 mutex_unlock(&acct_on_mutex);
289 putname(tmp); 285 putname(tmp);
290 } else { 286 } else {
291 acct_kill(acct_get(task_active_pid_ns(current)), NULL); 287 rcu_read_lock();
288 pin_kill(task_active_pid_ns(current)->bacct);
292 } 289 }
293 290
294 return error; 291 return error;
@@ -296,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
296 293
297void acct_exit_ns(struct pid_namespace *ns) 294void acct_exit_ns(struct pid_namespace *ns)
298{ 295{
299 acct_kill(acct_get(ns), NULL); 296 rcu_read_lock();
297 pin_kill(ns->bacct);
300} 298}
301 299
302/* 300/*
@@ -576,7 +574,7 @@ static void slow_acct_process(struct pid_namespace *ns)
576 if (acct) { 574 if (acct) {
577 do_acct_process(acct); 575 do_acct_process(acct);
578 mutex_unlock(&acct->lock); 576 mutex_unlock(&acct->lock);
579 pin_put(&acct->pin); 577 acct_put(acct);
580 } 578 }
581 } 579 }
582} 580}
diff --git a/kernel/audit.h b/kernel/audit.h
index 3cdffad5a1d9..1caa0d345d90 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -24,12 +24,6 @@
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <uapi/linux/mqueue.h> 25#include <uapi/linux/mqueue.h>
26 26
27/* 0 = no checking
28 1 = put_count checking
29 2 = verbose put_count checking
30*/
31#define AUDIT_DEBUG 0
32
33/* AUDIT_NAMES is the number of slots we reserve in the audit_context 27/* AUDIT_NAMES is the number of slots we reserve in the audit_context
34 * for saving names from getname(). If we get more names we will allocate 28 * for saving names from getname(). If we get more names we will allocate
35 * a name dynamically and also add those to the list anchored by names_list. */ 29 * a name dynamically and also add those to the list anchored by names_list. */
@@ -74,9 +68,8 @@ struct audit_cap_data {
74 }; 68 };
75}; 69};
76 70
77/* When fs/namei.c:getname() is called, we store the pointer in name and 71/* When fs/namei.c:getname() is called, we store the pointer in name and bump
78 * we don't let putname() free it (instead we free all of the saved 72 * the refcnt in the associated filename struct.
79 * pointers at syscall exit time).
80 * 73 *
81 * Further, in fs/namei.c:path_lookup() we store the inode and device. 74 * Further, in fs/namei.c:path_lookup() we store the inode and device.
82 */ 75 */
@@ -86,7 +79,6 @@ struct audit_names {
86 struct filename *name; 79 struct filename *name;
87 int name_len; /* number of chars to log */ 80 int name_len; /* number of chars to log */
88 bool hidden; /* don't log this record */ 81 bool hidden; /* don't log this record */
89 bool name_put; /* call __putname()? */
90 82
91 unsigned long ino; 83 unsigned long ino;
92 dev_t dev; 84 dev_t dev;
@@ -208,11 +200,6 @@ struct audit_context {
208 }; 200 };
209 int fds[2]; 201 int fds[2];
210 struct audit_proctitle proctitle; 202 struct audit_proctitle proctitle;
211
212#if AUDIT_DEBUG
213 int put_count;
214 int ino_count;
215#endif
216}; 203};
217 204
218extern u32 audit_ever_enabled; 205extern u32 audit_ever_enabled;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 4f68a326d92e..72e1660a79a3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -425,7 +425,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
425 goto exit_nofree; 425 goto exit_nofree;
426 426
427 bufp = data->buf; 427 bufp = data->buf;
428 entry->rule.vers_ops = 2;
429 for (i = 0; i < data->field_count; i++) { 428 for (i = 0; i < data->field_count; i++) {
430 struct audit_field *f = &entry->rule.fields[i]; 429 struct audit_field *f = &entry->rule.fields[i];
431 430
@@ -758,7 +757,6 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old)
758 return ERR_PTR(-ENOMEM); 757 return ERR_PTR(-ENOMEM);
759 758
760 new = &entry->rule; 759 new = &entry->rule;
761 new->vers_ops = old->vers_ops;
762 new->flags = old->flags; 760 new->flags = old->flags;
763 new->pflags = old->pflags; 761 new->pflags = old->pflags;
764 new->listnr = old->listnr; 762 new->listnr = old->listnr;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 072566dd0caf..dc4ae70a7413 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -866,33 +866,10 @@ static inline void audit_free_names(struct audit_context *context)
866{ 866{
867 struct audit_names *n, *next; 867 struct audit_names *n, *next;
868 868
869#if AUDIT_DEBUG == 2
870 if (context->put_count + context->ino_count != context->name_count) {
871 int i = 0;
872
873 pr_err("%s:%d(:%d): major=%d in_syscall=%d"
874 " name_count=%d put_count=%d ino_count=%d"
875 " [NOT freeing]\n", __FILE__, __LINE__,
876 context->serial, context->major, context->in_syscall,
877 context->name_count, context->put_count,
878 context->ino_count);
879 list_for_each_entry(n, &context->names_list, list) {
880 pr_err("names[%d] = %p = %s\n", i++, n->name,
881 n->name->name ?: "(null)");
882 }
883 dump_stack();
884 return;
885 }
886#endif
887#if AUDIT_DEBUG
888 context->put_count = 0;
889 context->ino_count = 0;
890#endif
891
892 list_for_each_entry_safe(n, next, &context->names_list, list) { 869 list_for_each_entry_safe(n, next, &context->names_list, list) {
893 list_del(&n->list); 870 list_del(&n->list);
894 if (n->name && n->name_put) 871 if (n->name)
895 final_putname(n->name); 872 putname(n->name);
896 if (n->should_free) 873 if (n->should_free)
897 kfree(n); 874 kfree(n);
898 } 875 }
@@ -1711,9 +1688,6 @@ static struct audit_names *audit_alloc_name(struct audit_context *context,
1711 list_add_tail(&aname->list, &context->names_list); 1688 list_add_tail(&aname->list, &context->names_list);
1712 1689
1713 context->name_count++; 1690 context->name_count++;
1714#if AUDIT_DEBUG
1715 context->ino_count++;
1716#endif
1717 return aname; 1691 return aname;
1718} 1692}
1719 1693
@@ -1734,8 +1708,10 @@ __audit_reusename(const __user char *uptr)
1734 list_for_each_entry(n, &context->names_list, list) { 1708 list_for_each_entry(n, &context->names_list, list) {
1735 if (!n->name) 1709 if (!n->name)
1736 continue; 1710 continue;
1737 if (n->name->uptr == uptr) 1711 if (n->name->uptr == uptr) {
1712 n->name->refcnt++;
1738 return n->name; 1713 return n->name;
1714 }
1739 } 1715 }
1740 return NULL; 1716 return NULL;
1741} 1717}
@@ -1752,19 +1728,8 @@ void __audit_getname(struct filename *name)
1752 struct audit_context *context = current->audit_context; 1728 struct audit_context *context = current->audit_context;
1753 struct audit_names *n; 1729 struct audit_names *n;
1754 1730
1755 if (!context->in_syscall) { 1731 if (!context->in_syscall)
1756#if AUDIT_DEBUG == 2
1757 pr_err("%s:%d(:%d): ignoring getname(%p)\n",
1758 __FILE__, __LINE__, context->serial, name);
1759 dump_stack();
1760#endif
1761 return; 1732 return;
1762 }
1763
1764#if AUDIT_DEBUG
1765 /* The filename _must_ have a populated ->name */
1766 BUG_ON(!name->name);
1767#endif
1768 1733
1769 n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); 1734 n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN);
1770 if (!n) 1735 if (!n)
@@ -1772,56 +1737,13 @@ void __audit_getname(struct filename *name)
1772 1737
1773 n->name = name; 1738 n->name = name;
1774 n->name_len = AUDIT_NAME_FULL; 1739 n->name_len = AUDIT_NAME_FULL;
1775 n->name_put = true;
1776 name->aname = n; 1740 name->aname = n;
1741 name->refcnt++;
1777 1742
1778 if (!context->pwd.dentry) 1743 if (!context->pwd.dentry)
1779 get_fs_pwd(current->fs, &context->pwd); 1744 get_fs_pwd(current->fs, &context->pwd);
1780} 1745}
1781 1746
1782/* audit_putname - intercept a putname request
1783 * @name: name to intercept and delay for putname
1784 *
1785 * If we have stored the name from getname in the audit context,
1786 * then we delay the putname until syscall exit.
1787 * Called from include/linux/fs.h:putname().
1788 */
1789void audit_putname(struct filename *name)
1790{
1791 struct audit_context *context = current->audit_context;
1792
1793 BUG_ON(!context);
1794 if (!name->aname || !context->in_syscall) {
1795#if AUDIT_DEBUG == 2
1796 pr_err("%s:%d(:%d): final_putname(%p)\n",
1797 __FILE__, __LINE__, context->serial, name);
1798 if (context->name_count) {
1799 struct audit_names *n;
1800 int i = 0;
1801
1802 list_for_each_entry(n, &context->names_list, list)
1803 pr_err("name[%d] = %p = %s\n", i++, n->name,
1804 n->name->name ?: "(null)");
1805 }
1806#endif
1807 final_putname(name);
1808 }
1809#if AUDIT_DEBUG
1810 else {
1811 ++context->put_count;
1812 if (context->put_count > context->name_count) {
1813 pr_err("%s:%d(:%d): major=%d in_syscall=%d putname(%p)"
1814 " name_count=%d put_count=%d\n",
1815 __FILE__, __LINE__,
1816 context->serial, context->major,
1817 context->in_syscall, name->name,
1818 context->name_count, context->put_count);
1819 dump_stack();
1820 }
1821 }
1822#endif
1823}
1824
1825/** 1747/**
1826 * __audit_inode - store the inode and device from a lookup 1748 * __audit_inode - store the inode and device from a lookup
1827 * @name: name being audited 1749 * @name: name being audited
@@ -1842,10 +1764,6 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
1842 if (!name) 1764 if (!name)
1843 goto out_alloc; 1765 goto out_alloc;
1844 1766
1845#if AUDIT_DEBUG
1846 /* The struct filename _must_ have a populated ->name */
1847 BUG_ON(!name->name);
1848#endif
1849 /* 1767 /*
1850 * If we have a pointer to an audit_names entry already, then we can 1768 * If we have a pointer to an audit_names entry already, then we can
1851 * just use it directly if the type is correct. 1769 * just use it directly if the type is correct.
@@ -1863,7 +1781,17 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
1863 } 1781 }
1864 1782
1865 list_for_each_entry_reverse(n, &context->names_list, list) { 1783 list_for_each_entry_reverse(n, &context->names_list, list) {
1866 if (!n->name || strcmp(n->name->name, name->name)) 1784 if (n->ino) {
1785 /* valid inode number, use that for the comparison */
1786 if (n->ino != inode->i_ino ||
1787 n->dev != inode->i_sb->s_dev)
1788 continue;
1789 } else if (n->name) {
1790 /* inode number has not been set, check the name */
1791 if (strcmp(n->name->name, name->name))
1792 continue;
1793 } else
1794 /* no inode and no name (?!) ... this is odd ... */
1867 continue; 1795 continue;
1868 1796
1869 /* match the correct record type */ 1797 /* match the correct record type */
@@ -1882,44 +1810,11 @@ out_alloc:
1882 n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); 1810 n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN);
1883 if (!n) 1811 if (!n)
1884 return; 1812 return;
1885 /* unfortunately, while we may have a path name to record with the
1886 * inode, we can't always rely on the string lasting until the end of
1887 * the syscall so we need to create our own copy, it may fail due to
1888 * memory allocation issues, but we do our best */
1889 if (name) { 1813 if (name) {
1890 /* we can't use getname_kernel() due to size limits */ 1814 n->name = name;
1891 size_t len = strlen(name->name) + 1; 1815 name->refcnt++;
1892 struct filename *new = __getname();
1893
1894 if (unlikely(!new))
1895 goto out;
1896
1897 if (len <= (PATH_MAX - sizeof(*new))) {
1898 new->name = (char *)(new) + sizeof(*new);
1899 new->separate = false;
1900 } else if (len <= PATH_MAX) {
1901 /* this looks odd, but is due to final_putname() */
1902 struct filename *new2;
1903
1904 new2 = kmalloc(sizeof(*new2), GFP_KERNEL);
1905 if (unlikely(!new2)) {
1906 __putname(new);
1907 goto out;
1908 }
1909 new2->name = (char *)new;
1910 new2->separate = true;
1911 new = new2;
1912 } else {
1913 /* we should never get here, but let's be safe */
1914 __putname(new);
1915 goto out;
1916 }
1917 strlcpy((char *)new->name, name->name, len);
1918 new->uptr = NULL;
1919 new->aname = n;
1920 n->name = new;
1921 n->name_put = true;
1922 } 1816 }
1817
1923out: 1818out:
1924 if (parent) { 1819 if (parent) {
1925 n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; 1820 n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL;
@@ -1970,11 +1865,16 @@ void __audit_inode_child(const struct inode *parent,
1970 1865
1971 /* look for a parent entry first */ 1866 /* look for a parent entry first */
1972 list_for_each_entry(n, &context->names_list, list) { 1867 list_for_each_entry(n, &context->names_list, list) {
1973 if (!n->name || n->type != AUDIT_TYPE_PARENT) 1868 if (!n->name ||
1869 (n->type != AUDIT_TYPE_PARENT &&
1870 n->type != AUDIT_TYPE_UNKNOWN))
1974 continue; 1871 continue;
1975 1872
1976 if (n->ino == parent->i_ino && 1873 if (n->ino == parent->i_ino && n->dev == parent->i_sb->s_dev &&
1977 !audit_compare_dname_path(dname, n->name->name, n->name_len)) { 1874 !audit_compare_dname_path(dname,
1875 n->name->name, n->name_len)) {
1876 if (n->type == AUDIT_TYPE_UNKNOWN)
1877 n->type = AUDIT_TYPE_PARENT;
1978 found_parent = n; 1878 found_parent = n;
1979 break; 1879 break;
1980 } 1880 }
@@ -1983,11 +1883,8 @@ void __audit_inode_child(const struct inode *parent,
1983 /* is there a matching child entry? */ 1883 /* is there a matching child entry? */
1984 list_for_each_entry(n, &context->names_list, list) { 1884 list_for_each_entry(n, &context->names_list, list) {
1985 /* can only match entries that have a name */ 1885 /* can only match entries that have a name */
1986 if (!n->name || n->type != type) 1886 if (!n->name ||
1987 continue; 1887 (n->type != type && n->type != AUDIT_TYPE_UNKNOWN))
1988
1989 /* if we found a parent, make sure this one is a child of it */
1990 if (found_parent && (n->name != found_parent->name))
1991 continue; 1888 continue;
1992 1889
1993 if (!strcmp(dname, n->name->name) || 1890 if (!strcmp(dname, n->name->name) ||
@@ -1995,6 +1892,8 @@ void __audit_inode_child(const struct inode *parent,
1995 found_parent ? 1892 found_parent ?
1996 found_parent->name_len : 1893 found_parent->name_len :
1997 AUDIT_NAME_FULL)) { 1894 AUDIT_NAME_FULL)) {
1895 if (n->type == AUDIT_TYPE_UNKNOWN)
1896 n->type = type;
1998 found_child = n; 1897 found_child = n;
1999 break; 1898 break;
2000 } 1899 }
@@ -2019,10 +1918,10 @@ void __audit_inode_child(const struct inode *parent,
2019 if (found_parent) { 1918 if (found_parent) {
2020 found_child->name = found_parent->name; 1919 found_child->name = found_parent->name;
2021 found_child->name_len = AUDIT_NAME_FULL; 1920 found_child->name_len = AUDIT_NAME_FULL;
2022 /* don't call __putname() */ 1921 found_child->name->refcnt++;
2023 found_child->name_put = false;
2024 } 1922 }
2025 } 1923 }
1924
2026 if (inode) 1925 if (inode)
2027 audit_copy_inode(found_child, dentry, inode); 1926 audit_copy_inode(found_child, dentry, inode);
2028 else 1927 else
@@ -2405,7 +2304,6 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
2405 struct audit_aux_data_bprm_fcaps *ax; 2304 struct audit_aux_data_bprm_fcaps *ax;
2406 struct audit_context *context = current->audit_context; 2305 struct audit_context *context = current->audit_context;
2407 struct cpu_vfs_cap_data vcaps; 2306 struct cpu_vfs_cap_data vcaps;
2408 struct dentry *dentry;
2409 2307
2410 ax = kmalloc(sizeof(*ax), GFP_KERNEL); 2308 ax = kmalloc(sizeof(*ax), GFP_KERNEL);
2411 if (!ax) 2309 if (!ax)
@@ -2415,9 +2313,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
2415 ax->d.next = context->aux; 2313 ax->d.next = context->aux;
2416 context->aux = (void *)ax; 2314 context->aux = (void *)ax;
2417 2315
2418 dentry = dget(bprm->file->f_path.dentry); 2316 get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
2419 get_vfs_caps_from_disk(dentry, &vcaps);
2420 dput(dentry);
2421 2317
2422 ax->fcap.permitted = vcaps.permitted; 2318 ax->fcap.permitted = vcaps.permitted;
2423 ax->fcap.inheritable = vcaps.inheritable; 2319 ax->fcap.inheritable = vcaps.inheritable;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 04cfe8ace520..29a7b2cc593e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3077,7 +3077,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
3077#endif 3077#endif
3078 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), 3078 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3079 cgroup_file_mode(cft), 0, cft->kf_ops, cft, 3079 cgroup_file_mode(cft), 0, cft->kf_ops, cft,
3080 NULL, false, key); 3080 NULL, key);
3081 if (IS_ERR(kn)) 3081 if (IS_ERR(kn))
3082 return PTR_ERR(kn); 3082 return PTR_ERR(kn);
3083 3083
@@ -4373,16 +4373,20 @@ static void css_free_work_fn(struct work_struct *work)
4373{ 4373{
4374 struct cgroup_subsys_state *css = 4374 struct cgroup_subsys_state *css =
4375 container_of(work, struct cgroup_subsys_state, destroy_work); 4375 container_of(work, struct cgroup_subsys_state, destroy_work);
4376 struct cgroup_subsys *ss = css->ss;
4376 struct cgroup *cgrp = css->cgroup; 4377 struct cgroup *cgrp = css->cgroup;
4377 4378
4378 percpu_ref_exit(&css->refcnt); 4379 percpu_ref_exit(&css->refcnt);
4379 4380
4380 if (css->ss) { 4381 if (ss) {
4381 /* css free path */ 4382 /* css free path */
4383 int id = css->id;
4384
4382 if (css->parent) 4385 if (css->parent)
4383 css_put(css->parent); 4386 css_put(css->parent);
4384 4387
4385 css->ss->css_free(css); 4388 ss->css_free(css);
4389 cgroup_idr_remove(&ss->css_idr, id);
4386 cgroup_put(cgrp); 4390 cgroup_put(cgrp);
4387 } else { 4391 } else {
4388 /* cgroup free path */ 4392 /* cgroup free path */
@@ -4434,7 +4438,7 @@ static void css_release_work_fn(struct work_struct *work)
4434 4438
4435 if (ss) { 4439 if (ss) {
4436 /* css release path */ 4440 /* css release path */
4437 cgroup_idr_remove(&ss->css_idr, css->id); 4441 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
4438 if (ss->css_released) 4442 if (ss->css_released)
4439 ss->css_released(css); 4443 ss->css_released(css);
4440 } else { 4444 } else {
diff --git a/kernel/compat.c b/kernel/compat.c
index ebb3c369d03d..24f00610c575 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -276,8 +276,7 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
276 * core implementation decides to return random nonsense. 276 * core implementation decides to return random nonsense.
277 */ 277 */
278 if (ret == -ERESTART_RESTARTBLOCK) { 278 if (ret == -ERESTART_RESTARTBLOCK) {
279 struct restart_block *restart 279 struct restart_block *restart = &current->restart_block;
280 = &current_thread_info()->restart_block;
281 280
282 restart->fn = compat_nanosleep_restart; 281 restart->fn = compat_nanosleep_restart;
283 restart->nanosleep.compat_rmtp = rmtp; 282 restart->nanosleep.compat_rmtp = rmtp;
@@ -860,7 +859,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
860 return -EFAULT; 859 return -EFAULT;
861 860
862 if (err == -ERESTART_RESTARTBLOCK) { 861 if (err == -ERESTART_RESTARTBLOCK) {
863 restart = &current_thread_info()->restart_block; 862 restart = &current->restart_block;
864 restart->fn = compat_clock_nanosleep_restart; 863 restart->fn = compat_clock_nanosleep_restart;
865 restart->nanosleep.compat_rmtp = rmtp; 864 restart->nanosleep.compat_rmtp = rmtp;
866 } 865 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 64b257f6bca2..1d1fe9361d29 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1707,40 +1707,27 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1707{ 1707{
1708 struct cpuset *cs = css_cs(seq_css(sf)); 1708 struct cpuset *cs = css_cs(seq_css(sf));
1709 cpuset_filetype_t type = seq_cft(sf)->private; 1709 cpuset_filetype_t type = seq_cft(sf)->private;
1710 ssize_t count;
1711 char *buf, *s;
1712 int ret = 0; 1710 int ret = 0;
1713 1711
1714 count = seq_get_buf(sf, &buf);
1715 s = buf;
1716
1717 spin_lock_irq(&callback_lock); 1712 spin_lock_irq(&callback_lock);
1718 1713
1719 switch (type) { 1714 switch (type) {
1720 case FILE_CPULIST: 1715 case FILE_CPULIST:
1721 s += cpulist_scnprintf(s, count, cs->cpus_allowed); 1716 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
1722 break; 1717 break;
1723 case FILE_MEMLIST: 1718 case FILE_MEMLIST:
1724 s += nodelist_scnprintf(s, count, cs->mems_allowed); 1719 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
1725 break; 1720 break;
1726 case FILE_EFFECTIVE_CPULIST: 1721 case FILE_EFFECTIVE_CPULIST:
1727 s += cpulist_scnprintf(s, count, cs->effective_cpus); 1722 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus));
1728 break; 1723 break;
1729 case FILE_EFFECTIVE_MEMLIST: 1724 case FILE_EFFECTIVE_MEMLIST:
1730 s += nodelist_scnprintf(s, count, cs->effective_mems); 1725 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
1731 break; 1726 break;
1732 default: 1727 default:
1733 ret = -EINVAL; 1728 ret = -EINVAL;
1734 goto out_unlock;
1735 } 1729 }
1736 1730
1737 if (s < buf + count - 1) {
1738 *s++ = '\n';
1739 seq_commit(sf, s - buf);
1740 } else {
1741 seq_commit(sf, -1);
1742 }
1743out_unlock:
1744 spin_unlock_irq(&callback_lock); 1731 spin_unlock_irq(&callback_lock);
1745 return ret; 1732 return ret;
1746} 1733}
@@ -2400,7 +2387,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2400 */ 2387 */
2401} 2388}
2402 2389
2403void cpuset_init_current_mems_allowed(void) 2390void __init cpuset_init_current_mems_allowed(void)
2404{ 2391{
2405 nodes_setall(current->mems_allowed); 2392 nodes_setall(current->mems_allowed);
2406} 2393}
@@ -2610,8 +2597,6 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2610 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); 2597 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2611} 2598}
2612 2599
2613#define CPUSET_NODELIST_LEN (256)
2614
2615/** 2600/**
2616 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed 2601 * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
2617 * @tsk: pointer to task_struct of some task. 2602 * @tsk: pointer to task_struct of some task.
@@ -2621,23 +2606,16 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2621 */ 2606 */
2622void cpuset_print_task_mems_allowed(struct task_struct *tsk) 2607void cpuset_print_task_mems_allowed(struct task_struct *tsk)
2623{ 2608{
2624 /* Statically allocated to prevent using excess stack. */
2625 static char cpuset_nodelist[CPUSET_NODELIST_LEN];
2626 static DEFINE_SPINLOCK(cpuset_buffer_lock);
2627 struct cgroup *cgrp; 2609 struct cgroup *cgrp;
2628 2610
2629 spin_lock(&cpuset_buffer_lock);
2630 rcu_read_lock(); 2611 rcu_read_lock();
2631 2612
2632 cgrp = task_cs(tsk)->css.cgroup; 2613 cgrp = task_cs(tsk)->css.cgroup;
2633 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
2634 tsk->mems_allowed);
2635 pr_info("%s cpuset=", tsk->comm); 2614 pr_info("%s cpuset=", tsk->comm);
2636 pr_cont_cgroup_name(cgrp); 2615 pr_cont_cgroup_name(cgrp);
2637 pr_cont(" mems_allowed=%s\n", cpuset_nodelist); 2616 pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&tsk->mems_allowed));
2638 2617
2639 rcu_read_unlock(); 2618 rcu_read_unlock();
2640 spin_unlock(&cpuset_buffer_lock);
2641} 2619}
2642 2620
2643/* 2621/*
@@ -2715,10 +2693,8 @@ out:
2715/* Display task mems_allowed in /proc/<pid>/status file. */ 2693/* Display task mems_allowed in /proc/<pid>/status file. */
2716void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) 2694void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2717{ 2695{
2718 seq_puts(m, "Mems_allowed:\t"); 2696 seq_printf(m, "Mems_allowed:\t%*pb\n",
2719 seq_nodemask(m, &task->mems_allowed); 2697 nodemask_pr_args(&task->mems_allowed));
2720 seq_puts(m, "\n"); 2698 seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
2721 seq_puts(m, "Mems_allowed_list:\t"); 2699 nodemask_pr_args(&task->mems_allowed));
2722 seq_nodemask_list(m, &task->mems_allowed);
2723 seq_puts(m, "\n");
2724} 2700}
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 07ce18ca71e0..0874e2edd275 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -604,7 +604,7 @@ return_normal:
604 online_cpus) 604 online_cpus)
605 cpu_relax(); 605 cpu_relax();
606 if (!time_left) 606 if (!time_left)
607 pr_crit("KGDB: Timed out waiting for secondary CPUs.\n"); 607 pr_crit("Timed out waiting for secondary CPUs.\n");
608 608
609 /* 609 /*
610 * At this point the primary processor is completely 610 * At this point the primary processor is completely
@@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
696 696
697 if (arch_kgdb_ops.enable_nmi) 697 if (arch_kgdb_ops.enable_nmi)
698 arch_kgdb_ops.enable_nmi(0); 698 arch_kgdb_ops.enable_nmi(0);
699 /*
700 * Avoid entering the debugger if we were triggered due to an oops
701 * but panic_timeout indicates the system should automatically
702 * reboot on panic. We don't want to get stuck waiting for input
703 * on such systems, especially if its "just" an oops.
704 */
705 if (signo != SIGTRAP && panic_timeout)
706 return 1;
699 707
700 memset(ks, 0, sizeof(struct kgdb_state)); 708 memset(ks, 0, sizeof(struct kgdb_state));
701 ks->cpu = raw_smp_processor_id(); 709 ks->cpu = raw_smp_processor_id();
@@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self,
828 unsigned long val, 836 unsigned long val,
829 void *data) 837 void *data)
830{ 838{
839 /*
840 * Avoid entering the debugger if we were triggered due to a panic
841 * We don't want to get stuck waiting for input from user in such case.
842 * panic_timeout indicates the system should automatically
843 * reboot on panic.
844 */
845 if (panic_timeout)
846 return NOTIFY_DONE;
847
831 if (dbg_kdb_mode) 848 if (dbg_kdb_mode)
832 kdb_printf("PANIC: %s\n", (char *)data); 849 kdb_printf("PANIC: %s\n", (char *)data);
833 kgdb_breakpoint(); 850 kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 7c70812caea5..fc1ef736253c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -439,7 +439,7 @@ poll_again:
439 * substituted for %d, %x or %o in the prompt. 439 * substituted for %d, %x or %o in the prompt.
440 */ 440 */
441 441
442char *kdb_getstr(char *buffer, size_t bufsize, char *prompt) 442char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
443{ 443{
444 if (prompt && kdb_prompt_str != prompt) 444 if (prompt && kdb_prompt_str != prompt)
445 strncpy(kdb_prompt_str, prompt, CMD_BUFLEN); 445 strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
@@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor)
548 return 0; 548 return 0;
549} 549}
550 550
551int vkdb_printf(const char *fmt, va_list ap) 551int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
552{ 552{
553 int diag; 553 int diag;
554 int linecount; 554 int linecount;
@@ -680,6 +680,12 @@ int vkdb_printf(const char *fmt, va_list ap)
680 size_avail = sizeof(kdb_buffer) - len; 680 size_avail = sizeof(kdb_buffer) - len;
681 goto kdb_print_out; 681 goto kdb_print_out;
682 } 682 }
683 if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH)
684 /*
685 * This was a interactive search (using '/' at more
686 * prompt) and it has completed. Clear the flag.
687 */
688 kdb_grepping_flag = 0;
683 /* 689 /*
684 * at this point the string is a full line and 690 * at this point the string is a full line and
685 * should be printed, up to the null. 691 * should be printed, up to the null.
@@ -691,19 +697,20 @@ kdb_printit:
691 * Write to all consoles. 697 * Write to all consoles.
692 */ 698 */
693 retlen = strlen(kdb_buffer); 699 retlen = strlen(kdb_buffer);
700 cp = (char *) printk_skip_level(kdb_buffer);
694 if (!dbg_kdb_mode && kgdb_connected) { 701 if (!dbg_kdb_mode && kgdb_connected) {
695 gdbstub_msg_write(kdb_buffer, retlen); 702 gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
696 } else { 703 } else {
697 if (dbg_io_ops && !dbg_io_ops->is_console) { 704 if (dbg_io_ops && !dbg_io_ops->is_console) {
698 len = retlen; 705 len = retlen - (cp - kdb_buffer);
699 cp = kdb_buffer; 706 cp2 = cp;
700 while (len--) { 707 while (len--) {
701 dbg_io_ops->write_char(*cp); 708 dbg_io_ops->write_char(*cp2);
702 cp++; 709 cp2++;
703 } 710 }
704 } 711 }
705 while (c) { 712 while (c) {
706 c->write(c, kdb_buffer, retlen); 713 c->write(c, cp, retlen - (cp - kdb_buffer));
707 touch_nmi_watchdog(); 714 touch_nmi_watchdog();
708 c = c->next; 715 c = c->next;
709 } 716 }
@@ -711,7 +718,10 @@ kdb_printit:
711 if (logging) { 718 if (logging) {
712 saved_loglevel = console_loglevel; 719 saved_loglevel = console_loglevel;
713 console_loglevel = CONSOLE_LOGLEVEL_SILENT; 720 console_loglevel = CONSOLE_LOGLEVEL_SILENT;
714 printk(KERN_INFO "%s", kdb_buffer); 721 if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK)
722 printk("%s", kdb_buffer);
723 else
724 pr_info("%s", kdb_buffer);
715 } 725 }
716 726
717 if (KDB_STATE(PAGER)) { 727 if (KDB_STATE(PAGER)) {
@@ -794,11 +804,23 @@ kdb_printit:
794 kdb_nextline = linecount - 1; 804 kdb_nextline = linecount - 1;
795 kdb_printf("\r"); 805 kdb_printf("\r");
796 suspend_grep = 1; /* for this recursion */ 806 suspend_grep = 1; /* for this recursion */
807 } else if (buf1[0] == '/' && !kdb_grepping_flag) {
808 kdb_printf("\r");
809 kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN,
810 kdbgetenv("SEARCHPROMPT") ?: "search> ");
811 *strchrnul(kdb_grep_string, '\n') = '\0';
812 kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH;
813 suspend_grep = 1; /* for this recursion */
797 } else if (buf1[0] && buf1[0] != '\n') { 814 } else if (buf1[0] && buf1[0] != '\n') {
798 /* user hit something other than enter */ 815 /* user hit something other than enter */
799 suspend_grep = 1; /* for this recursion */ 816 suspend_grep = 1; /* for this recursion */
800 kdb_printf("\nOnly 'q' or 'Q' are processed at more " 817 if (buf1[0] != '/')
801 "prompt, input ignored\n"); 818 kdb_printf(
819 "\nOnly 'q', 'Q' or '/' are processed at "
820 "more prompt, input ignored\n");
821 else
822 kdb_printf("\n'/' cannot be used during | "
823 "grep filtering, input ignored\n");
802 } else if (kdb_grepping_flag) { 824 } else if (kdb_grepping_flag) {
803 /* user hit enter */ 825 /* user hit enter */
804 suspend_grep = 1; /* for this recursion */ 826 suspend_grep = 1; /* for this recursion */
@@ -844,7 +866,7 @@ int kdb_printf(const char *fmt, ...)
844 int r; 866 int r;
845 867
846 va_start(ap, fmt); 868 va_start(ap, fmt);
847 r = vkdb_printf(fmt, ap); 869 r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
848 va_end(ap); 870 va_end(ap);
849 871
850 return r; 872 return r;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 7b40c5f07dce..4121345498e0 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -50,8 +50,7 @@
50static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; 50static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE;
51module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); 51module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600);
52 52
53#define GREP_LEN 256 53char kdb_grep_string[KDB_GREP_STRLEN];
54char kdb_grep_string[GREP_LEN];
55int kdb_grepping_flag; 54int kdb_grepping_flag;
56EXPORT_SYMBOL(kdb_grepping_flag); 55EXPORT_SYMBOL(kdb_grepping_flag);
57int kdb_grep_leading; 56int kdb_grep_leading;
@@ -870,7 +869,7 @@ static void parse_grep(const char *str)
870 len = strlen(cp); 869 len = strlen(cp);
871 if (!len) 870 if (!len)
872 return; 871 return;
873 if (len >= GREP_LEN) { 872 if (len >= KDB_GREP_STRLEN) {
874 kdb_printf("search string too long\n"); 873 kdb_printf("search string too long\n");
875 return; 874 return;
876 } 875 }
@@ -915,13 +914,12 @@ int kdb_parse(const char *cmdstr)
915 char *cp; 914 char *cp;
916 char *cpp, quoted; 915 char *cpp, quoted;
917 kdbtab_t *tp; 916 kdbtab_t *tp;
918 int i, escaped, ignore_errors = 0, check_grep; 917 int i, escaped, ignore_errors = 0, check_grep = 0;
919 918
920 /* 919 /*
921 * First tokenize the command string. 920 * First tokenize the command string.
922 */ 921 */
923 cp = (char *)cmdstr; 922 cp = (char *)cmdstr;
924 kdb_grepping_flag = check_grep = 0;
925 923
926 if (KDB_FLAG(CMD_INTERRUPT)) { 924 if (KDB_FLAG(CMD_INTERRUPT)) {
927 /* Previous command was interrupted, newline must not 925 /* Previous command was interrupted, newline must not
@@ -1247,7 +1245,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1247 kdb_printf("due to NonMaskable Interrupt @ " 1245 kdb_printf("due to NonMaskable Interrupt @ "
1248 kdb_machreg_fmt "\n", 1246 kdb_machreg_fmt "\n",
1249 instruction_pointer(regs)); 1247 instruction_pointer(regs));
1250 kdb_dumpregs(regs);
1251 break; 1248 break;
1252 case KDB_REASON_SSTEP: 1249 case KDB_REASON_SSTEP:
1253 case KDB_REASON_BREAK: 1250 case KDB_REASON_BREAK:
@@ -1281,6 +1278,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1281 */ 1278 */
1282 kdb_nextline = 1; 1279 kdb_nextline = 1;
1283 KDB_STATE_CLEAR(SUPPRESS); 1280 KDB_STATE_CLEAR(SUPPRESS);
1281 kdb_grepping_flag = 0;
1282 /* ensure the old search does not leak into '/' commands */
1283 kdb_grep_string[0] = '\0';
1284 1284
1285 cmdbuf = cmd_cur; 1285 cmdbuf = cmd_cur;
1286 *cmdbuf = '\0'; 1286 *cmdbuf = '\0';
@@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv)
2256 /* 2256 /*
2257 * Validate cpunum 2257 * Validate cpunum
2258 */ 2258 */
2259 if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) 2259 if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
2260 return KDB_BADCPUNUM; 2260 return KDB_BADCPUNUM;
2261 2261
2262 dbg_switch_cpu = cpunum; 2262 dbg_switch_cpu = cpunum;
@@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv)
2583#define K(x) ((x) << (PAGE_SHIFT - 10)) 2583#define K(x) ((x) << (PAGE_SHIFT - 10))
2584 kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n" 2584 kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n"
2585 "Buffers: %8lu kB\n", 2585 "Buffers: %8lu kB\n",
2586 val.totalram, val.freeram, val.bufferram); 2586 K(val.totalram), K(val.freeram), K(val.bufferram));
2587 return 0; 2587 return 0;
2588} 2588}
2589 2589
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index eaacd1693954..75014d7f4568 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
196 196
197/* Miscellaneous functions and data areas */ 197/* Miscellaneous functions and data areas */
198extern int kdb_grepping_flag; 198extern int kdb_grepping_flag;
199#define KDB_GREPPING_FLAG_SEARCH 0x8000
199extern char kdb_grep_string[]; 200extern char kdb_grep_string[];
201#define KDB_GREP_STRLEN 256
200extern int kdb_grep_leading; 202extern int kdb_grep_leading;
201extern int kdb_grep_trailing; 203extern int kdb_grep_trailing;
202extern char *kdb_cmds[]; 204extern char *kdb_cmds[];
@@ -209,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p);
209extern void kdb_print_nameval(const char *name, unsigned long val); 211extern void kdb_print_nameval(const char *name, unsigned long val);
210extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); 212extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
211extern void kdb_meminfo_proc_show(void); 213extern void kdb_meminfo_proc_show(void);
212extern char *kdb_getstr(char *, size_t, char *); 214extern char *kdb_getstr(char *, size_t, const char *);
213extern void kdb_gdb_state_pass(char *buf); 215extern void kdb_gdb_state_pass(char *buf);
214 216
215/* Defines for kdb_symbol_print */ 217/* Defines for kdb_symbol_print */
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 103f5d147b2f..2925188f50ea 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -1,5 +1,5 @@
1ifdef CONFIG_FUNCTION_TRACER 1ifdef CONFIG_FUNCTION_TRACER
2CFLAGS_REMOVE_core.o = -pg 2CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE)
3endif 3endif
4 4
5obj-y := core.o ring_buffer.o callchain.o 5obj-y := core.o ring_buffer.o callchain.o
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7f2fbb8b5069..f04daabfd1cf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4101,7 +4101,8 @@ unlock:
4101 rcu_read_unlock(); 4101 rcu_read_unlock();
4102} 4102}
4103 4103
4104void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 4104void __weak arch_perf_update_userpage(
4105 struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now)
4105{ 4106{
4106} 4107}
4107 4108
@@ -4151,7 +4152,7 @@ void perf_event_update_userpage(struct perf_event *event)
4151 userpg->time_running = running + 4152 userpg->time_running = running +
4152 atomic64_read(&event->child_total_time_running); 4153 atomic64_read(&event->child_total_time_running);
4153 4154
4154 arch_perf_update_userpage(userpg, now); 4155 arch_perf_update_userpage(event, userpg, now);
4155 4156
4156 barrier(); 4157 barrier();
4157 ++userpg->lock; 4158 ++userpg->lock;
@@ -4293,6 +4294,9 @@ static void perf_mmap_open(struct vm_area_struct *vma)
4293 4294
4294 atomic_inc(&event->mmap_count); 4295 atomic_inc(&event->mmap_count);
4295 atomic_inc(&event->rb->mmap_count); 4296 atomic_inc(&event->rb->mmap_count);
4297
4298 if (event->pmu->event_mapped)
4299 event->pmu->event_mapped(event);
4296} 4300}
4297 4301
4298/* 4302/*
@@ -4312,6 +4316,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
4312 int mmap_locked = rb->mmap_locked; 4316 int mmap_locked = rb->mmap_locked;
4313 unsigned long size = perf_data_size(rb); 4317 unsigned long size = perf_data_size(rb);
4314 4318
4319 if (event->pmu->event_unmapped)
4320 event->pmu->event_unmapped(event);
4321
4315 atomic_dec(&rb->mmap_count); 4322 atomic_dec(&rb->mmap_count);
4316 4323
4317 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 4324 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
@@ -4513,6 +4520,9 @@ unlock:
4513 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; 4520 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
4514 vma->vm_ops = &perf_mmap_vmops; 4521 vma->vm_ops = &perf_mmap_vmops;
4515 4522
4523 if (event->pmu->event_mapped)
4524 event->pmu->event_mapped(event);
4525
4516 return ret; 4526 return ret;
4517} 4527}
4518 4528
@@ -8508,6 +8518,18 @@ void __init perf_event_init(void)
8508 != 1024); 8518 != 1024);
8509} 8519}
8510 8520
8521ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
8522 char *page)
8523{
8524 struct perf_pmu_events_attr *pmu_attr =
8525 container_of(attr, struct perf_pmu_events_attr, attr);
8526
8527 if (pmu_attr->event_str)
8528 return sprintf(page, "%s\n", pmu_attr->event_str);
8529
8530 return 0;
8531}
8532
8511static int __init perf_event_sysfs_init(void) 8533static int __init perf_event_sysfs_init(void)
8512{ 8534{
8513 struct pmu *pmu; 8535 struct pmu *pmu;
diff --git a/kernel/exit.c b/kernel/exit.c
index 6806c55475ee..feff10bbb307 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -435,7 +435,8 @@ static void exit_mm(struct task_struct *tsk)
435 task_unlock(tsk); 435 task_unlock(tsk);
436 mm_update_next_owner(mm); 436 mm_update_next_owner(mm);
437 mmput(mm); 437 mmput(mm);
438 clear_thread_flag(TIF_MEMDIE); 438 if (test_thread_flag(TIF_MEMDIE))
439 unmark_oom_victim();
439} 440}
440 441
441static struct task_struct *find_alive_thread(struct task_struct *p) 442static struct task_struct *find_alive_thread(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index 4dc2ddade9f1..cf65139615a0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -438,12 +438,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
438 atomic_inc(&mapping->i_mmap_writable); 438 atomic_inc(&mapping->i_mmap_writable);
439 flush_dcache_mmap_lock(mapping); 439 flush_dcache_mmap_lock(mapping);
440 /* insert tmp into the share list, just after mpnt */ 440 /* insert tmp into the share list, just after mpnt */
441 if (unlikely(tmp->vm_flags & VM_NONLINEAR)) 441 vma_interval_tree_insert_after(tmp, mpnt,
442 vma_nonlinear_insert(tmp, 442 &mapping->i_mmap);
443 &mapping->i_mmap_nonlinear);
444 else
445 vma_interval_tree_insert_after(tmp, mpnt,
446 &mapping->i_mmap);
447 flush_dcache_mmap_unlock(mapping); 443 flush_dcache_mmap_unlock(mapping);
448 i_mmap_unlock_write(mapping); 444 i_mmap_unlock_write(mapping);
449 } 445 }
@@ -559,6 +555,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
559 INIT_LIST_HEAD(&mm->mmlist); 555 INIT_LIST_HEAD(&mm->mmlist);
560 mm->core_state = NULL; 556 mm->core_state = NULL;
561 atomic_long_set(&mm->nr_ptes, 0); 557 atomic_long_set(&mm->nr_ptes, 0);
558 mm_nr_pmds_init(mm);
562 mm->map_count = 0; 559 mm->map_count = 0;
563 mm->locked_vm = 0; 560 mm->locked_vm = 0;
564 mm->pinned_vm = 0; 561 mm->pinned_vm = 0;
@@ -607,6 +604,14 @@ static void check_mm(struct mm_struct *mm)
607 printk(KERN_ALERT "BUG: Bad rss-counter state " 604 printk(KERN_ALERT "BUG: Bad rss-counter state "
608 "mm:%p idx:%d val:%ld\n", mm, i, x); 605 "mm:%p idx:%d val:%ld\n", mm, i, x);
609 } 606 }
607
608 if (atomic_long_read(&mm->nr_ptes))
609 pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n",
610 atomic_long_read(&mm->nr_ptes));
611 if (mm_nr_pmds(mm))
612 pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n",
613 mm_nr_pmds(mm));
614
610#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS 615#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
611 VM_BUG_ON_MM(mm->pmd_huge_pte, mm); 616 VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
612#endif 617#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 1f6d646eee4a..2579e407ff67 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2217,7 +2217,7 @@ retry:
2217 if (!abs_time) 2217 if (!abs_time)
2218 goto out; 2218 goto out;
2219 2219
2220 restart = &current_thread_info()->restart_block; 2220 restart = &current->restart_block;
2221 restart->fn = futex_wait_restart; 2221 restart->fn = futex_wait_restart;
2222 restart->futex.uaddr = uaddr; 2222 restart->futex.uaddr = uaddr;
2223 restart->futex.val = val; 2223 restart->futex.val = val;
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 52aa7e8de927..752d6486b67e 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -1,33 +1,7 @@
1ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' 1ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
2 2
3# if-lt 3obj-y := base.o fs.o
4# Usage VAR := $(call if-lt, $(a), $(b)) 4obj-$(CONFIG_GCOV_FORMAT_3_4) += gcc_3_4.o
5# Returns 1 if (a < b) 5obj-$(CONFIG_GCOV_FORMAT_4_7) += gcc_4_7.o
6if-lt = $(shell [ $(1) -lt $(2) ] && echo 1) 6obj-$(CONFIG_GCOV_FORMAT_AUTODETECT) += $(call cc-ifversion, -lt, 0407, \
7 7 gcc_3_4.o, gcc_4_7.o)
8ifeq ($(CONFIG_GCOV_FORMAT_3_4),y)
9 cc-ver := 0304
10else ifeq ($(CONFIG_GCOV_FORMAT_4_7),y)
11 cc-ver := 0407
12else
13# Use cc-version if available, otherwise set 0
14#
15# scripts/Kbuild.include, which contains cc-version function, is not included
16# during make clean "make -f scripts/Makefile.clean obj=kernel/gcov"
17# Meaning cc-ver is empty causing if-lt test to fail with
18# "/bin/sh: line 0: [: -lt: unary operator expected" error mesage.
19# This has no affect on the clean phase, but the error message could be
20# confusing/annoying. So this dummy workaround sets cc-ver to zero if cc-version
21# is not available. We can probably move if-lt to Kbuild.include, so it's also
22# not defined during clean or to include Kbuild.include in
23# scripts/Makefile.clean. But the following workaround seems least invasive.
24 cc-ver := $(if $(call cc-version),$(call cc-version),0)
25endif
26
27obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o
28
29ifeq ($(call if-lt, $(cc-ver), 0407),1)
30 obj-$(CONFIG_GCOV_KERNEL) += gcc_3_4.o
31else
32 obj-$(CONFIG_GCOV_KERNEL) += gcc_4_7.o
33endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 80692373abd6..196a06fbc122 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -243,6 +243,9 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
243 return -EINVAL; 243 return -EINVAL;
244 desc->affinity_hint = m; 244 desc->affinity_hint = m;
245 irq_put_desc_unlock(desc, flags); 245 irq_put_desc_unlock(desc, flags);
246 /* set the initial affinity to prevent every interrupt being on CPU0 */
247 if (m)
248 __irq_set_affinity(irq, m, false);
246 return 0; 249 return 0;
247} 250}
248EXPORT_SYMBOL_GPL(irq_set_affinity_hint); 251EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 9dc9bfd8a678..df2f4642d1e7 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -46,10 +46,9 @@ static int show_irq_affinity(int type, struct seq_file *m, void *v)
46 mask = desc->pending_mask; 46 mask = desc->pending_mask;
47#endif 47#endif
48 if (type) 48 if (type)
49 seq_cpumask_list(m, mask); 49 seq_printf(m, "%*pbl\n", cpumask_pr_args(mask));
50 else 50 else
51 seq_cpumask(m, mask); 51 seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
52 seq_putc(m, '\n');
53 return 0; 52 return 0;
54} 53}
55 54
@@ -67,8 +66,7 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
67 cpumask_copy(mask, desc->affinity_hint); 66 cpumask_copy(mask, desc->affinity_hint);
68 raw_spin_unlock_irqrestore(&desc->lock, flags); 67 raw_spin_unlock_irqrestore(&desc->lock, flags);
69 68
70 seq_cpumask(m, mask); 69 seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
71 seq_putc(m, '\n');
72 free_cpumask_var(mask); 70 free_cpumask_var(mask);
73 71
74 return 0; 72 return 0;
@@ -186,8 +184,7 @@ static const struct file_operations irq_affinity_list_proc_fops = {
186 184
187static int default_affinity_show(struct seq_file *m, void *v) 185static int default_affinity_show(struct seq_file *m, void *v)
188{ 186{
189 seq_cpumask(m, irq_default_affinity); 187 seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity));
190 seq_putc(m, '\n');
191 return 0; 188 return 0;
192} 189}
193 190
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9a8a01abbaed..38c25b1f2fd5 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -444,7 +444,7 @@ arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
444} 444}
445 445
446/* 446/*
447 * Free up memory used by kernel, initrd, and comand line. This is temporary 447 * Free up memory used by kernel, initrd, and command line. This is temporary
448 * memory allocation which is not needed any more after these buffers have 448 * memory allocation which is not needed any more after these buffers have
449 * been loaded into separate segments and have been copied elsewhere. 449 * been loaded into separate segments and have been copied elsewhere.
450 */ 450 */
@@ -856,8 +856,6 @@ static int kimage_set_destination(struct kimage *image,
856 856
857 destination &= PAGE_MASK; 857 destination &= PAGE_MASK;
858 result = kimage_add_entry(image, destination | IND_DESTINATION); 858 result = kimage_add_entry(image, destination | IND_DESTINATION);
859 if (result == 0)
860 image->destination = destination;
861 859
862 return result; 860 return result;
863} 861}
@@ -869,8 +867,6 @@ static int kimage_add_page(struct kimage *image, unsigned long page)
869 867
870 page &= PAGE_MASK; 868 page &= PAGE_MASK;
871 result = kimage_add_entry(image, page | IND_SOURCE); 869 result = kimage_add_entry(image, page | IND_SOURCE);
872 if (result == 0)
873 image->destination += PAGE_SIZE;
874 870
875 return result; 871 return result;
876} 872}
@@ -1288,19 +1284,22 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
1288 if (nr_segments > 0) { 1284 if (nr_segments > 0) {
1289 unsigned long i; 1285 unsigned long i;
1290 1286
1291 /* Loading another kernel to reboot into */ 1287 if (flags & KEXEC_ON_CRASH) {
1292 if ((flags & KEXEC_ON_CRASH) == 0) 1288 /*
1293 result = kimage_alloc_init(&image, entry, nr_segments, 1289 * Loading another kernel to switch to if this one
1294 segments, flags); 1290 * crashes. Free any current crash dump kernel before
1295 /* Loading another kernel to switch to if this one crashes */
1296 else if (flags & KEXEC_ON_CRASH) {
1297 /* Free any current crash dump kernel before
1298 * we corrupt it. 1291 * we corrupt it.
1299 */ 1292 */
1293
1300 kimage_free(xchg(&kexec_crash_image, NULL)); 1294 kimage_free(xchg(&kexec_crash_image, NULL));
1301 result = kimage_alloc_init(&image, entry, nr_segments, 1295 result = kimage_alloc_init(&image, entry, nr_segments,
1302 segments, flags); 1296 segments, flags);
1303 crash_map_reserved_pages(); 1297 crash_map_reserved_pages();
1298 } else {
1299 /* Loading another kernel to reboot into. */
1300
1301 result = kimage_alloc_init(&image, entry, nr_segments,
1302 segments, flags);
1304 } 1303 }
1305 if (result) 1304 if (result)
1306 goto out; 1305 goto out;
@@ -2512,7 +2511,7 @@ static int kexec_apply_relocations(struct kimage *image)
2512 continue; 2511 continue;
2513 2512
2514 /* 2513 /*
2515 * Respective archicture needs to provide support for applying 2514 * Respective architecture needs to provide support for applying
2516 * relocations of type SHT_RELA/SHT_REL. 2515 * relocations of type SHT_RELA/SHT_REL.
2517 */ 2516 */
2518 if (sechdrs[i].sh_type == SHT_RELA) 2517 if (sechdrs[i].sh_type == SHT_RELA)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ee619929cf90..c90e417bb963 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -717,7 +717,7 @@ static void prepare_optimized_kprobe(struct kprobe *p)
717 struct optimized_kprobe *op; 717 struct optimized_kprobe *op;
718 718
719 op = container_of(p, struct optimized_kprobe, kp); 719 op = container_of(p, struct optimized_kprobe, kp);
720 arch_prepare_optimized_kprobe(op); 720 arch_prepare_optimized_kprobe(op, p);
721} 721}
722 722
723/* Allocate new optimized_kprobe and try to prepare optimized instructions */ 723/* Allocate new optimized_kprobe and try to prepare optimized instructions */
@@ -731,7 +731,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
731 731
732 INIT_LIST_HEAD(&op->list); 732 INIT_LIST_HEAD(&op->list);
733 op->kp.addr = p->addr; 733 op->kp.addr = p->addr;
734 arch_prepare_optimized_kprobe(op); 734 arch_prepare_optimized_kprobe(op, p);
735 735
736 return &op->kp; 736 return &op->kp;
737} 737}
@@ -869,7 +869,8 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt)
869{ 869{
870 struct kprobe *_p; 870 struct kprobe *_p;
871 871
872 unoptimize_kprobe(p, false); /* Try to unoptimize */ 872 /* Try to unoptimize */
873 unoptimize_kprobe(p, kprobes_all_disarmed);
873 874
874 if (!kprobe_queued(p)) { 875 if (!kprobe_queued(p)) {
875 arch_disarm_kprobe(p); 876 arch_disarm_kprobe(p);
@@ -1571,7 +1572,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
1571 1572
1572 /* Try to disarm and disable this/parent probe */ 1573 /* Try to disarm and disable this/parent probe */
1573 if (p == orig_p || aggr_kprobe_disabled(orig_p)) { 1574 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1574 disarm_kprobe(orig_p, true); 1575 /*
1576 * If kprobes_all_disarmed is set, orig_p
1577 * should have already been disarmed, so
1578 * skip unneed disarming process.
1579 */
1580 if (!kprobes_all_disarmed)
1581 disarm_kprobe(orig_p, true);
1575 orig_p->flags |= KPROBE_FLAG_DISABLED; 1582 orig_p->flags |= KPROBE_FLAG_DISABLED;
1576 } 1583 }
1577 } 1584 }
@@ -2320,6 +2327,12 @@ static void arm_all_kprobes(void)
2320 if (!kprobes_all_disarmed) 2327 if (!kprobes_all_disarmed)
2321 goto already_enabled; 2328 goto already_enabled;
2322 2329
2330 /*
2331 * optimize_kprobe() called by arm_kprobe() checks
2332 * kprobes_all_disarmed, so set kprobes_all_disarmed before
2333 * arm_kprobe.
2334 */
2335 kprobes_all_disarmed = false;
2323 /* Arming kprobes doesn't optimize kprobe itself */ 2336 /* Arming kprobes doesn't optimize kprobe itself */
2324 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2337 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2325 head = &kprobe_table[i]; 2338 head = &kprobe_table[i];
@@ -2328,7 +2341,6 @@ static void arm_all_kprobes(void)
2328 arm_kprobe(p); 2341 arm_kprobe(p);
2329 } 2342 }
2330 2343
2331 kprobes_all_disarmed = false;
2332 printk(KERN_INFO "Kprobes globally enabled\n"); 2344 printk(KERN_INFO "Kprobes globally enabled\n");
2333 2345
2334already_enabled: 2346already_enabled:
diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig
new file mode 100644
index 000000000000..045022557936
--- /dev/null
+++ b/kernel/livepatch/Kconfig
@@ -0,0 +1,18 @@
1config HAVE_LIVEPATCH
2 bool
3 help
4 Arch supports kernel live patching
5
6config LIVEPATCH
7 bool "Kernel Live Patching"
8 depends on DYNAMIC_FTRACE_WITH_REGS
9 depends on MODULES
10 depends on SYSFS
11 depends on KALLSYMS_ALL
12 depends on HAVE_LIVEPATCH
13 help
14 Say Y here if you want to support kernel live patching.
15 This option has no runtime impact until a kernel "patch"
16 module uses the interface provided by this option to register
17 a patch, causing calls to patched functions to be redirected
18 to new function code contained in the patch module.
diff --git a/kernel/livepatch/Makefile b/kernel/livepatch/Makefile
new file mode 100644
index 000000000000..e8780c0901d9
--- /dev/null
+++ b/kernel/livepatch/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_LIVEPATCH) += livepatch.o
2
3livepatch-objs := core.o
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
new file mode 100644
index 000000000000..ff7f47d026ac
--- /dev/null
+++ b/kernel/livepatch/core.c
@@ -0,0 +1,1015 @@
1/*
2 * core.c - Kernel Live Patching Core
3 *
4 * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
5 * Copyright (C) 2014 SUSE
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/module.h>
24#include <linux/kernel.h>
25#include <linux/mutex.h>
26#include <linux/slab.h>
27#include <linux/ftrace.h>
28#include <linux/list.h>
29#include <linux/kallsyms.h>
30#include <linux/livepatch.h>
31
32/**
33 * struct klp_ops - structure for tracking registered ftrace ops structs
34 *
35 * A single ftrace_ops is shared between all enabled replacement functions
36 * (klp_func structs) which have the same old_addr. This allows the switch
37 * between function versions to happen instantaneously by updating the klp_ops
38 * struct's func_stack list. The winner is the klp_func at the top of the
39 * func_stack (front of the list).
40 *
41 * @node: node for the global klp_ops list
42 * @func_stack: list head for the stack of klp_func's (active func is on top)
43 * @fops: registered ftrace ops struct
44 */
45struct klp_ops {
46 struct list_head node;
47 struct list_head func_stack;
48 struct ftrace_ops fops;
49};
50
51/*
52 * The klp_mutex protects the global lists and state transitions of any
53 * structure reachable from them. References to any structure must be obtained
54 * under mutex protection (except in klp_ftrace_handler(), which uses RCU to
55 * ensure it gets consistent data).
56 */
57static DEFINE_MUTEX(klp_mutex);
58
59static LIST_HEAD(klp_patches);
60static LIST_HEAD(klp_ops);
61
62static struct kobject *klp_root_kobj;
63
64static struct klp_ops *klp_find_ops(unsigned long old_addr)
65{
66 struct klp_ops *ops;
67 struct klp_func *func;
68
69 list_for_each_entry(ops, &klp_ops, node) {
70 func = list_first_entry(&ops->func_stack, struct klp_func,
71 stack_node);
72 if (func->old_addr == old_addr)
73 return ops;
74 }
75
76 return NULL;
77}
78
79static bool klp_is_module(struct klp_object *obj)
80{
81 return obj->name;
82}
83
84static bool klp_is_object_loaded(struct klp_object *obj)
85{
86 return !obj->name || obj->mod;
87}
88
89/* sets obj->mod if object is not vmlinux and module is found */
90static void klp_find_object_module(struct klp_object *obj)
91{
92 if (!klp_is_module(obj))
93 return;
94
95 mutex_lock(&module_mutex);
96 /*
97 * We don't need to take a reference on the module here because we have
98 * the klp_mutex, which is also taken by the module notifier. This
99 * prevents any module from unloading until we release the klp_mutex.
100 */
101 obj->mod = find_module(obj->name);
102 mutex_unlock(&module_mutex);
103}
104
105/* klp_mutex must be held by caller */
106static bool klp_is_patch_registered(struct klp_patch *patch)
107{
108 struct klp_patch *mypatch;
109
110 list_for_each_entry(mypatch, &klp_patches, list)
111 if (mypatch == patch)
112 return true;
113
114 return false;
115}
116
117static bool klp_initialized(void)
118{
119 return klp_root_kobj;
120}
121
122struct klp_find_arg {
123 const char *objname;
124 const char *name;
125 unsigned long addr;
126 /*
127 * If count == 0, the symbol was not found. If count == 1, a unique
128 * match was found and addr is set. If count > 1, there is
129 * unresolvable ambiguity among "count" number of symbols with the same
130 * name in the same object.
131 */
132 unsigned long count;
133};
134
135static int klp_find_callback(void *data, const char *name,
136 struct module *mod, unsigned long addr)
137{
138 struct klp_find_arg *args = data;
139
140 if ((mod && !args->objname) || (!mod && args->objname))
141 return 0;
142
143 if (strcmp(args->name, name))
144 return 0;
145
146 if (args->objname && strcmp(args->objname, mod->name))
147 return 0;
148
149 /*
150 * args->addr might be overwritten if another match is found
151 * but klp_find_object_symbol() handles this and only returns the
152 * addr if count == 1.
153 */
154 args->addr = addr;
155 args->count++;
156
157 return 0;
158}
159
160static int klp_find_object_symbol(const char *objname, const char *name,
161 unsigned long *addr)
162{
163 struct klp_find_arg args = {
164 .objname = objname,
165 .name = name,
166 .addr = 0,
167 .count = 0
168 };
169
170 kallsyms_on_each_symbol(klp_find_callback, &args);
171
172 if (args.count == 0)
173 pr_err("symbol '%s' not found in symbol table\n", name);
174 else if (args.count > 1)
175 pr_err("unresolvable ambiguity (%lu matches) on symbol '%s' in object '%s'\n",
176 args.count, name, objname);
177 else {
178 *addr = args.addr;
179 return 0;
180 }
181
182 *addr = 0;
183 return -EINVAL;
184}
185
186struct klp_verify_args {
187 const char *name;
188 const unsigned long addr;
189};
190
191static int klp_verify_callback(void *data, const char *name,
192 struct module *mod, unsigned long addr)
193{
194 struct klp_verify_args *args = data;
195
196 if (!mod &&
197 !strcmp(args->name, name) &&
198 args->addr == addr)
199 return 1;
200
201 return 0;
202}
203
204static int klp_verify_vmlinux_symbol(const char *name, unsigned long addr)
205{
206 struct klp_verify_args args = {
207 .name = name,
208 .addr = addr,
209 };
210
211 if (kallsyms_on_each_symbol(klp_verify_callback, &args))
212 return 0;
213
214 pr_err("symbol '%s' not found at specified address 0x%016lx, kernel mismatch?\n",
215 name, addr);
216 return -EINVAL;
217}
218
219static int klp_find_verify_func_addr(struct klp_object *obj,
220 struct klp_func *func)
221{
222 int ret;
223
224#if defined(CONFIG_RANDOMIZE_BASE)
225 /* KASLR is enabled, disregard old_addr from user */
226 func->old_addr = 0;
227#endif
228
229 if (!func->old_addr || klp_is_module(obj))
230 ret = klp_find_object_symbol(obj->name, func->old_name,
231 &func->old_addr);
232 else
233 ret = klp_verify_vmlinux_symbol(func->old_name,
234 func->old_addr);
235
236 return ret;
237}
238
239/*
240 * external symbols are located outside the parent object (where the parent
241 * object is either vmlinux or the kmod being patched).
242 */
243static int klp_find_external_symbol(struct module *pmod, const char *name,
244 unsigned long *addr)
245{
246 const struct kernel_symbol *sym;
247
248 /* first, check if it's an exported symbol */
249 preempt_disable();
250 sym = find_symbol(name, NULL, NULL, true, true);
251 preempt_enable();
252 if (sym) {
253 *addr = sym->value;
254 return 0;
255 }
256
257 /* otherwise check if it's in another .o within the patch module */
258 return klp_find_object_symbol(pmod->name, name, addr);
259}
260
261static int klp_write_object_relocations(struct module *pmod,
262 struct klp_object *obj)
263{
264 int ret;
265 struct klp_reloc *reloc;
266
267 if (WARN_ON(!klp_is_object_loaded(obj)))
268 return -EINVAL;
269
270 if (WARN_ON(!obj->relocs))
271 return -EINVAL;
272
273 for (reloc = obj->relocs; reloc->name; reloc++) {
274 if (!klp_is_module(obj)) {
275 ret = klp_verify_vmlinux_symbol(reloc->name,
276 reloc->val);
277 if (ret)
278 return ret;
279 } else {
280 /* module, reloc->val needs to be discovered */
281 if (reloc->external)
282 ret = klp_find_external_symbol(pmod,
283 reloc->name,
284 &reloc->val);
285 else
286 ret = klp_find_object_symbol(obj->mod->name,
287 reloc->name,
288 &reloc->val);
289 if (ret)
290 return ret;
291 }
292 ret = klp_write_module_reloc(pmod, reloc->type, reloc->loc,
293 reloc->val + reloc->addend);
294 if (ret) {
295 pr_err("relocation failed for symbol '%s' at 0x%016lx (%d)\n",
296 reloc->name, reloc->val, ret);
297 return ret;
298 }
299 }
300
301 return 0;
302}
303
304static void notrace klp_ftrace_handler(unsigned long ip,
305 unsigned long parent_ip,
306 struct ftrace_ops *fops,
307 struct pt_regs *regs)
308{
309 struct klp_ops *ops;
310 struct klp_func *func;
311
312 ops = container_of(fops, struct klp_ops, fops);
313
314 rcu_read_lock();
315 func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
316 stack_node);
317 rcu_read_unlock();
318
319 if (WARN_ON_ONCE(!func))
320 return;
321
322 klp_arch_set_pc(regs, (unsigned long)func->new_func);
323}
324
325static int klp_disable_func(struct klp_func *func)
326{
327 struct klp_ops *ops;
328 int ret;
329
330 if (WARN_ON(func->state != KLP_ENABLED))
331 return -EINVAL;
332
333 if (WARN_ON(!func->old_addr))
334 return -EINVAL;
335
336 ops = klp_find_ops(func->old_addr);
337 if (WARN_ON(!ops))
338 return -EINVAL;
339
340 if (list_is_singular(&ops->func_stack)) {
341 ret = unregister_ftrace_function(&ops->fops);
342 if (ret) {
343 pr_err("failed to unregister ftrace handler for function '%s' (%d)\n",
344 func->old_name, ret);
345 return ret;
346 }
347
348 ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0);
349 if (ret)
350 pr_warn("function unregister succeeded but failed to clear the filter\n");
351
352 list_del_rcu(&func->stack_node);
353 list_del(&ops->node);
354 kfree(ops);
355 } else {
356 list_del_rcu(&func->stack_node);
357 }
358
359 func->state = KLP_DISABLED;
360
361 return 0;
362}
363
364static int klp_enable_func(struct klp_func *func)
365{
366 struct klp_ops *ops;
367 int ret;
368
369 if (WARN_ON(!func->old_addr))
370 return -EINVAL;
371
372 if (WARN_ON(func->state != KLP_DISABLED))
373 return -EINVAL;
374
375 ops = klp_find_ops(func->old_addr);
376 if (!ops) {
377 ops = kzalloc(sizeof(*ops), GFP_KERNEL);
378 if (!ops)
379 return -ENOMEM;
380
381 ops->fops.func = klp_ftrace_handler;
382 ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS |
383 FTRACE_OPS_FL_DYNAMIC |
384 FTRACE_OPS_FL_IPMODIFY;
385
386 list_add(&ops->node, &klp_ops);
387
388 INIT_LIST_HEAD(&ops->func_stack);
389 list_add_rcu(&func->stack_node, &ops->func_stack);
390
391 ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 0, 0);
392 if (ret) {
393 pr_err("failed to set ftrace filter for function '%s' (%d)\n",
394 func->old_name, ret);
395 goto err;
396 }
397
398 ret = register_ftrace_function(&ops->fops);
399 if (ret) {
400 pr_err("failed to register ftrace handler for function '%s' (%d)\n",
401 func->old_name, ret);
402 ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0);
403 goto err;
404 }
405
406
407 } else {
408 list_add_rcu(&func->stack_node, &ops->func_stack);
409 }
410
411 func->state = KLP_ENABLED;
412
413 return 0;
414
415err:
416 list_del_rcu(&func->stack_node);
417 list_del(&ops->node);
418 kfree(ops);
419 return ret;
420}
421
422static int klp_disable_object(struct klp_object *obj)
423{
424 struct klp_func *func;
425 int ret;
426
427 for (func = obj->funcs; func->old_name; func++) {
428 if (func->state != KLP_ENABLED)
429 continue;
430
431 ret = klp_disable_func(func);
432 if (ret)
433 return ret;
434 }
435
436 obj->state = KLP_DISABLED;
437
438 return 0;
439}
440
441static int klp_enable_object(struct klp_object *obj)
442{
443 struct klp_func *func;
444 int ret;
445
446 if (WARN_ON(obj->state != KLP_DISABLED))
447 return -EINVAL;
448
449 if (WARN_ON(!klp_is_object_loaded(obj)))
450 return -EINVAL;
451
452 for (func = obj->funcs; func->old_name; func++) {
453 ret = klp_enable_func(func);
454 if (ret)
455 goto unregister;
456 }
457 obj->state = KLP_ENABLED;
458
459 return 0;
460
461unregister:
462 WARN_ON(klp_disable_object(obj));
463 return ret;
464}
465
466static int __klp_disable_patch(struct klp_patch *patch)
467{
468 struct klp_object *obj;
469 int ret;
470
471 /* enforce stacking: only the last enabled patch can be disabled */
472 if (!list_is_last(&patch->list, &klp_patches) &&
473 list_next_entry(patch, list)->state == KLP_ENABLED)
474 return -EBUSY;
475
476 pr_notice("disabling patch '%s'\n", patch->mod->name);
477
478 for (obj = patch->objs; obj->funcs; obj++) {
479 if (obj->state != KLP_ENABLED)
480 continue;
481
482 ret = klp_disable_object(obj);
483 if (ret)
484 return ret;
485 }
486
487 patch->state = KLP_DISABLED;
488
489 return 0;
490}
491
492/**
493 * klp_disable_patch() - disables a registered patch
494 * @patch: The registered, enabled patch to be disabled
495 *
496 * Unregisters the patched functions from ftrace.
497 *
498 * Return: 0 on success, otherwise error
499 */
500int klp_disable_patch(struct klp_patch *patch)
501{
502 int ret;
503
504 mutex_lock(&klp_mutex);
505
506 if (!klp_is_patch_registered(patch)) {
507 ret = -EINVAL;
508 goto err;
509 }
510
511 if (patch->state == KLP_DISABLED) {
512 ret = -EINVAL;
513 goto err;
514 }
515
516 ret = __klp_disable_patch(patch);
517
518err:
519 mutex_unlock(&klp_mutex);
520 return ret;
521}
522EXPORT_SYMBOL_GPL(klp_disable_patch);
523
524static int __klp_enable_patch(struct klp_patch *patch)
525{
526 struct klp_object *obj;
527 int ret;
528
529 if (WARN_ON(patch->state != KLP_DISABLED))
530 return -EINVAL;
531
532 /* enforce stacking: only the first disabled patch can be enabled */
533 if (patch->list.prev != &klp_patches &&
534 list_prev_entry(patch, list)->state == KLP_DISABLED)
535 return -EBUSY;
536
537 pr_notice_once("tainting kernel with TAINT_LIVEPATCH\n");
538 add_taint(TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
539
540 pr_notice("enabling patch '%s'\n", patch->mod->name);
541
542 for (obj = patch->objs; obj->funcs; obj++) {
543 klp_find_object_module(obj);
544
545 if (!klp_is_object_loaded(obj))
546 continue;
547
548 ret = klp_enable_object(obj);
549 if (ret)
550 goto unregister;
551 }
552
553 patch->state = KLP_ENABLED;
554
555 return 0;
556
557unregister:
558 WARN_ON(__klp_disable_patch(patch));
559 return ret;
560}
561
562/**
563 * klp_enable_patch() - enables a registered patch
564 * @patch: The registered, disabled patch to be enabled
565 *
566 * Performs the needed symbol lookups and code relocations,
567 * then registers the patched functions with ftrace.
568 *
569 * Return: 0 on success, otherwise error
570 */
571int klp_enable_patch(struct klp_patch *patch)
572{
573 int ret;
574
575 mutex_lock(&klp_mutex);
576
577 if (!klp_is_patch_registered(patch)) {
578 ret = -EINVAL;
579 goto err;
580 }
581
582 ret = __klp_enable_patch(patch);
583
584err:
585 mutex_unlock(&klp_mutex);
586 return ret;
587}
588EXPORT_SYMBOL_GPL(klp_enable_patch);
589
590/*
591 * Sysfs Interface
592 *
593 * /sys/kernel/livepatch
594 * /sys/kernel/livepatch/<patch>
595 * /sys/kernel/livepatch/<patch>/enabled
596 * /sys/kernel/livepatch/<patch>/<object>
597 * /sys/kernel/livepatch/<patch>/<object>/<func>
598 */
599
600static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
601 const char *buf, size_t count)
602{
603 struct klp_patch *patch;
604 int ret;
605 unsigned long val;
606
607 ret = kstrtoul(buf, 10, &val);
608 if (ret)
609 return -EINVAL;
610
611 if (val != KLP_DISABLED && val != KLP_ENABLED)
612 return -EINVAL;
613
614 patch = container_of(kobj, struct klp_patch, kobj);
615
616 mutex_lock(&klp_mutex);
617
618 if (val == patch->state) {
619 /* already in requested state */
620 ret = -EINVAL;
621 goto err;
622 }
623
624 if (val == KLP_ENABLED) {
625 ret = __klp_enable_patch(patch);
626 if (ret)
627 goto err;
628 } else {
629 ret = __klp_disable_patch(patch);
630 if (ret)
631 goto err;
632 }
633
634 mutex_unlock(&klp_mutex);
635
636 return count;
637
638err:
639 mutex_unlock(&klp_mutex);
640 return ret;
641}
642
643static ssize_t enabled_show(struct kobject *kobj,
644 struct kobj_attribute *attr, char *buf)
645{
646 struct klp_patch *patch;
647
648 patch = container_of(kobj, struct klp_patch, kobj);
649 return snprintf(buf, PAGE_SIZE-1, "%d\n", patch->state);
650}
651
652static struct kobj_attribute enabled_kobj_attr = __ATTR_RW(enabled);
653static struct attribute *klp_patch_attrs[] = {
654 &enabled_kobj_attr.attr,
655 NULL
656};
657
658static void klp_kobj_release_patch(struct kobject *kobj)
659{
660 /*
661 * Once we have a consistency model we'll need to module_put() the
662 * patch module here. See klp_register_patch() for more details.
663 */
664}
665
666static struct kobj_type klp_ktype_patch = {
667 .release = klp_kobj_release_patch,
668 .sysfs_ops = &kobj_sysfs_ops,
669 .default_attrs = klp_patch_attrs,
670};
671
672static void klp_kobj_release_func(struct kobject *kobj)
673{
674}
675
676static struct kobj_type klp_ktype_func = {
677 .release = klp_kobj_release_func,
678 .sysfs_ops = &kobj_sysfs_ops,
679};
680
681/*
682 * Free all functions' kobjects in the array up to some limit. When limit is
683 * NULL, all kobjects are freed.
684 */
685static void klp_free_funcs_limited(struct klp_object *obj,
686 struct klp_func *limit)
687{
688 struct klp_func *func;
689
690 for (func = obj->funcs; func->old_name && func != limit; func++)
691 kobject_put(&func->kobj);
692}
693
694/* Clean up when a patched object is unloaded */
695static void klp_free_object_loaded(struct klp_object *obj)
696{
697 struct klp_func *func;
698
699 obj->mod = NULL;
700
701 for (func = obj->funcs; func->old_name; func++)
702 func->old_addr = 0;
703}
704
705/*
706 * Free all objects' kobjects in the array up to some limit. When limit is
707 * NULL, all kobjects are freed.
708 */
709static void klp_free_objects_limited(struct klp_patch *patch,
710 struct klp_object *limit)
711{
712 struct klp_object *obj;
713
714 for (obj = patch->objs; obj->funcs && obj != limit; obj++) {
715 klp_free_funcs_limited(obj, NULL);
716 kobject_put(obj->kobj);
717 }
718}
719
720static void klp_free_patch(struct klp_patch *patch)
721{
722 klp_free_objects_limited(patch, NULL);
723 if (!list_empty(&patch->list))
724 list_del(&patch->list);
725 kobject_put(&patch->kobj);
726}
727
728static int klp_init_func(struct klp_object *obj, struct klp_func *func)
729{
730 INIT_LIST_HEAD(&func->stack_node);
731 func->state = KLP_DISABLED;
732
733 return kobject_init_and_add(&func->kobj, &klp_ktype_func,
734 obj->kobj, func->old_name);
735}
736
737/* parts of the initialization that is done only when the object is loaded */
738static int klp_init_object_loaded(struct klp_patch *patch,
739 struct klp_object *obj)
740{
741 struct klp_func *func;
742 int ret;
743
744 if (obj->relocs) {
745 ret = klp_write_object_relocations(patch->mod, obj);
746 if (ret)
747 return ret;
748 }
749
750 for (func = obj->funcs; func->old_name; func++) {
751 ret = klp_find_verify_func_addr(obj, func);
752 if (ret)
753 return ret;
754 }
755
756 return 0;
757}
758
759static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
760{
761 struct klp_func *func;
762 int ret;
763 const char *name;
764
765 if (!obj->funcs)
766 return -EINVAL;
767
768 obj->state = KLP_DISABLED;
769
770 klp_find_object_module(obj);
771
772 name = klp_is_module(obj) ? obj->name : "vmlinux";
773 obj->kobj = kobject_create_and_add(name, &patch->kobj);
774 if (!obj->kobj)
775 return -ENOMEM;
776
777 for (func = obj->funcs; func->old_name; func++) {
778 ret = klp_init_func(obj, func);
779 if (ret)
780 goto free;
781 }
782
783 if (klp_is_object_loaded(obj)) {
784 ret = klp_init_object_loaded(patch, obj);
785 if (ret)
786 goto free;
787 }
788
789 return 0;
790
791free:
792 klp_free_funcs_limited(obj, func);
793 kobject_put(obj->kobj);
794 return ret;
795}
796
797static int klp_init_patch(struct klp_patch *patch)
798{
799 struct klp_object *obj;
800 int ret;
801
802 if (!patch->objs)
803 return -EINVAL;
804
805 mutex_lock(&klp_mutex);
806
807 patch->state = KLP_DISABLED;
808
809 ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch,
810 klp_root_kobj, patch->mod->name);
811 if (ret)
812 goto unlock;
813
814 for (obj = patch->objs; obj->funcs; obj++) {
815 ret = klp_init_object(patch, obj);
816 if (ret)
817 goto free;
818 }
819
820 list_add_tail(&patch->list, &klp_patches);
821
822 mutex_unlock(&klp_mutex);
823
824 return 0;
825
826free:
827 klp_free_objects_limited(patch, obj);
828 kobject_put(&patch->kobj);
829unlock:
830 mutex_unlock(&klp_mutex);
831 return ret;
832}
833
834/**
835 * klp_unregister_patch() - unregisters a patch
836 * @patch: Disabled patch to be unregistered
837 *
838 * Frees the data structures and removes the sysfs interface.
839 *
840 * Return: 0 on success, otherwise error
841 */
842int klp_unregister_patch(struct klp_patch *patch)
843{
844 int ret = 0;
845
846 mutex_lock(&klp_mutex);
847
848 if (!klp_is_patch_registered(patch)) {
849 ret = -EINVAL;
850 goto out;
851 }
852
853 if (patch->state == KLP_ENABLED) {
854 ret = -EBUSY;
855 goto out;
856 }
857
858 klp_free_patch(patch);
859
860out:
861 mutex_unlock(&klp_mutex);
862 return ret;
863}
864EXPORT_SYMBOL_GPL(klp_unregister_patch);
865
866/**
867 * klp_register_patch() - registers a patch
868 * @patch: Patch to be registered
869 *
870 * Initializes the data structure associated with the patch and
871 * creates the sysfs interface.
872 *
873 * Return: 0 on success, otherwise error
874 */
875int klp_register_patch(struct klp_patch *patch)
876{
877 int ret;
878
879 if (!klp_initialized())
880 return -ENODEV;
881
882 if (!patch || !patch->mod)
883 return -EINVAL;
884
885 /*
886 * A reference is taken on the patch module to prevent it from being
887 * unloaded. Right now, we don't allow patch modules to unload since
888 * there is currently no method to determine if a thread is still
889 * running in the patched code contained in the patch module once
890 * the ftrace registration is successful.
891 */
892 if (!try_module_get(patch->mod))
893 return -ENODEV;
894
895 ret = klp_init_patch(patch);
896 if (ret)
897 module_put(patch->mod);
898
899 return ret;
900}
901EXPORT_SYMBOL_GPL(klp_register_patch);
902
903static void klp_module_notify_coming(struct klp_patch *patch,
904 struct klp_object *obj)
905{
906 struct module *pmod = patch->mod;
907 struct module *mod = obj->mod;
908 int ret;
909
910 ret = klp_init_object_loaded(patch, obj);
911 if (ret)
912 goto err;
913
914 if (patch->state == KLP_DISABLED)
915 return;
916
917 pr_notice("applying patch '%s' to loading module '%s'\n",
918 pmod->name, mod->name);
919
920 ret = klp_enable_object(obj);
921 if (!ret)
922 return;
923
924err:
925 pr_warn("failed to apply patch '%s' to module '%s' (%d)\n",
926 pmod->name, mod->name, ret);
927}
928
929static void klp_module_notify_going(struct klp_patch *patch,
930 struct klp_object *obj)
931{
932 struct module *pmod = patch->mod;
933 struct module *mod = obj->mod;
934 int ret;
935
936 if (patch->state == KLP_DISABLED)
937 goto disabled;
938
939 pr_notice("reverting patch '%s' on unloading module '%s'\n",
940 pmod->name, mod->name);
941
942 ret = klp_disable_object(obj);
943 if (ret)
944 pr_warn("failed to revert patch '%s' on module '%s' (%d)\n",
945 pmod->name, mod->name, ret);
946
947disabled:
948 klp_free_object_loaded(obj);
949}
950
951static int klp_module_notify(struct notifier_block *nb, unsigned long action,
952 void *data)
953{
954 struct module *mod = data;
955 struct klp_patch *patch;
956 struct klp_object *obj;
957
958 if (action != MODULE_STATE_COMING && action != MODULE_STATE_GOING)
959 return 0;
960
961 mutex_lock(&klp_mutex);
962
963 list_for_each_entry(patch, &klp_patches, list) {
964 for (obj = patch->objs; obj->funcs; obj++) {
965 if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
966 continue;
967
968 if (action == MODULE_STATE_COMING) {
969 obj->mod = mod;
970 klp_module_notify_coming(patch, obj);
971 } else /* MODULE_STATE_GOING */
972 klp_module_notify_going(patch, obj);
973
974 break;
975 }
976 }
977
978 mutex_unlock(&klp_mutex);
979
980 return 0;
981}
982
983static struct notifier_block klp_module_nb = {
984 .notifier_call = klp_module_notify,
985 .priority = INT_MIN+1, /* called late but before ftrace notifier */
986};
987
988static int klp_init(void)
989{
990 int ret;
991
992 ret = klp_check_compiler_support();
993 if (ret) {
994 pr_info("Your compiler is too old; turning off.\n");
995 return -EINVAL;
996 }
997
998 ret = register_module_notifier(&klp_module_nb);
999 if (ret)
1000 return ret;
1001
1002 klp_root_kobj = kobject_create_and_add("livepatch", kernel_kobj);
1003 if (!klp_root_kobj) {
1004 ret = -ENOMEM;
1005 goto unregister;
1006 }
1007
1008 return 0;
1009
1010unregister:
1011 unregister_module_notifier(&klp_module_nb);
1012 return ret;
1013}
1014
1015module_init(klp_init);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 4ca8eb151975..de7a416cca2a 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -2,10 +2,10 @@
2obj-y += mutex.o semaphore.o rwsem.o 2obj-y += mutex.o semaphore.o rwsem.o
3 3
4ifdef CONFIG_FUNCTION_TRACER 4ifdef CONFIG_FUNCTION_TRACER
5CFLAGS_REMOVE_lockdep.o = -pg 5CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
6CFLAGS_REMOVE_lockdep_proc.o = -pg 6CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE)
7CFLAGS_REMOVE_mutex-debug.o = -pg 7CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE)
8CFLAGS_REMOVE_rtmutex-debug.o = -pg 8CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
9endif 9endif
10 10
11obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o 11obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 4b082b5cac9e..db3ccb1dd614 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -363,6 +363,14 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
363} 363}
364EXPORT_SYMBOL(_raw_spin_lock_nested); 364EXPORT_SYMBOL(_raw_spin_lock_nested);
365 365
366void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
367{
368 __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
369 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
370 LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
371}
372EXPORT_SYMBOL(_raw_spin_lock_bh_nested);
373
366unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, 374unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
367 int subclass) 375 int subclass)
368{ 376{
diff --git a/kernel/module.c b/kernel/module.c
index d856e96a3cce..b34813f725e9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,6 +56,7 @@
56#include <linux/async.h> 56#include <linux/async.h>
57#include <linux/percpu.h> 57#include <linux/percpu.h>
58#include <linux/kmemleak.h> 58#include <linux/kmemleak.h>
59#include <linux/kasan.h>
59#include <linux/jump_label.h> 60#include <linux/jump_label.h>
60#include <linux/pfn.h> 61#include <linux/pfn.h>
61#include <linux/bsearch.h> 62#include <linux/bsearch.h>
@@ -1225,6 +1226,12 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
1225 const unsigned long *crc; 1226 const unsigned long *crc;
1226 int err; 1227 int err;
1227 1228
1229 /*
1230 * The module_mutex should not be a heavily contended lock;
1231 * if we get the occasional sleep here, we'll go an extra iteration
1232 * in the wait_event_interruptible(), which is harmless.
1233 */
1234 sched_annotate_sleep();
1228 mutex_lock(&module_mutex); 1235 mutex_lock(&module_mutex);
1229 sym = find_symbol(name, &owner, &crc, 1236 sym = find_symbol(name, &owner, &crc,
1230 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); 1237 !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
@@ -1807,6 +1814,7 @@ static void unset_module_init_ro_nx(struct module *mod) { }
1807void __weak module_memfree(void *module_region) 1814void __weak module_memfree(void *module_region)
1808{ 1815{
1809 vfree(module_region); 1816 vfree(module_region);
1817 kasan_module_free(module_region);
1810} 1818}
1811 1819
1812void __weak module_arch_cleanup(struct module *mod) 1820void __weak module_arch_cleanup(struct module *mod)
@@ -2978,6 +2986,12 @@ static bool finished_loading(const char *name)
2978 struct module *mod; 2986 struct module *mod;
2979 bool ret; 2987 bool ret;
2980 2988
2989 /*
2990 * The module_mutex should not be a heavily contended lock;
2991 * if we get the occasional sleep here, we'll go an extra iteration
2992 * in the wait_event_interruptible(), which is harmless.
2993 */
2994 sched_annotate_sleep();
2981 mutex_lock(&module_mutex); 2995 mutex_lock(&module_mutex);
2982 mod = find_module_all(name, strlen(name), true); 2996 mod = find_module_all(name, strlen(name), true);
2983 ret = !mod || mod->state == MODULE_STATE_LIVE 2997 ret = !mod || mod->state == MODULE_STATE_LIVE
@@ -3011,8 +3025,13 @@ static void do_free_init(struct rcu_head *head)
3011 kfree(m); 3025 kfree(m);
3012} 3026}
3013 3027
3014/* This is where the real work happens */ 3028/*
3015static int do_init_module(struct module *mod) 3029 * This is where the real work happens.
3030 *
3031 * Keep it uninlined to provide a reliable breakpoint target, e.g. for the gdb
3032 * helper command 'lx-symbols'.
3033 */
3034static noinline int do_init_module(struct module *mod)
3016{ 3035{
3017 int ret = 0; 3036 int ret = 0;
3018 struct mod_initfree *freeinit; 3037 struct mod_initfree *freeinit;
@@ -3120,32 +3139,6 @@ static int may_init_module(void)
3120} 3139}
3121 3140
3122/* 3141/*
3123 * Can't use wait_event_interruptible() because our condition
3124 * 'finished_loading()' contains a blocking primitive itself (mutex_lock).
3125 */
3126static int wait_finished_loading(struct module *mod)
3127{
3128 DEFINE_WAIT_FUNC(wait, woken_wake_function);
3129 int ret = 0;
3130
3131 add_wait_queue(&module_wq, &wait);
3132 for (;;) {
3133 if (finished_loading(mod->name))
3134 break;
3135
3136 if (signal_pending(current)) {
3137 ret = -ERESTARTSYS;
3138 break;
3139 }
3140
3141 wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
3142 }
3143 remove_wait_queue(&module_wq, &wait);
3144
3145 return ret;
3146}
3147
3148/*
3149 * We try to place it in the list now to make sure it's unique before 3142 * We try to place it in the list now to make sure it's unique before
3150 * we dedicate too many resources. In particular, temporary percpu 3143 * we dedicate too many resources. In particular, temporary percpu
3151 * memory exhaustion. 3144 * memory exhaustion.
@@ -3165,8 +3158,8 @@ again:
3165 || old->state == MODULE_STATE_UNFORMED) { 3158 || old->state == MODULE_STATE_UNFORMED) {
3166 /* Wait in case it fails to load. */ 3159 /* Wait in case it fails to load. */
3167 mutex_unlock(&module_mutex); 3160 mutex_unlock(&module_mutex);
3168 3161 err = wait_event_interruptible(module_wq,
3169 err = wait_finished_loading(mod); 3162 finished_loading(mod->name));
3170 if (err) 3163 if (err)
3171 goto out_unlocked; 3164 goto out_unlocked;
3172 goto again; 3165 goto again;
@@ -3265,7 +3258,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
3265 mod->sig_ok = info->sig_ok; 3258 mod->sig_ok = info->sig_ok;
3266 if (!mod->sig_ok) { 3259 if (!mod->sig_ok) {
3267 pr_notice_once("%s: module verification failed: signature " 3260 pr_notice_once("%s: module verification failed: signature "
3268 "and/or required key missing - tainting " 3261 "and/or required key missing - tainting "
3269 "kernel\n", mod->name); 3262 "kernel\n", mod->name);
3270 add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK); 3263 add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK);
3271 } 3264 }
@@ -3356,6 +3349,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
3356 module_bug_cleanup(mod); 3349 module_bug_cleanup(mod);
3357 mutex_unlock(&module_mutex); 3350 mutex_unlock(&module_mutex);
3358 3351
3352 /* Free lock-classes: */
3353 lockdep_free_key_range(mod->module_core, mod->core_size);
3354
3359 /* we can't deallocate the module until we clear memory protection */ 3355 /* we can't deallocate the module until we clear memory protection */
3360 unset_module_init_ro_nx(mod); 3356 unset_module_init_ro_nx(mod);
3361 unset_module_core_ro_nx(mod); 3357 unset_module_core_ro_nx(mod);
diff --git a/kernel/padata.c b/kernel/padata.c
index 161402f0b517..b38bea9c466a 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -917,15 +917,10 @@ static ssize_t show_cpumask(struct padata_instance *pinst,
917 else 917 else
918 cpumask = pinst->cpumask.pcpu; 918 cpumask = pinst->cpumask.pcpu;
919 919
920 len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask), 920 len = snprintf(buf, PAGE_SIZE, "%*pb\n",
921 nr_cpu_ids); 921 nr_cpu_ids, cpumask_bits(cpumask));
922 if (PAGE_SIZE - len < 2)
923 len = -EINVAL;
924 else
925 len += sprintf(buf + len, "\n");
926
927 mutex_unlock(&pinst->lock); 922 mutex_unlock(&pinst->lock);
928 return len; 923 return len < PAGE_SIZE ? len : -EINVAL;
929} 924}
930 925
931static ssize_t store_cpumask(struct padata_instance *pinst, 926static ssize_t store_cpumask(struct padata_instance *pinst,
diff --git a/kernel/panic.c b/kernel/panic.c
index 4d8d6f906dec..8136ad76e5fd 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -226,6 +226,7 @@ static const struct tnt tnts[] = {
226 { TAINT_OOT_MODULE, 'O', ' ' }, 226 { TAINT_OOT_MODULE, 'O', ' ' },
227 { TAINT_UNSIGNED_MODULE, 'E', ' ' }, 227 { TAINT_UNSIGNED_MODULE, 'E', ' ' },
228 { TAINT_SOFTLOCKUP, 'L', ' ' }, 228 { TAINT_SOFTLOCKUP, 'L', ' ' },
229 { TAINT_LIVEPATCH, 'K', ' ' },
229}; 230};
230 231
231/** 232/**
@@ -246,6 +247,7 @@ static const struct tnt tnts[] = {
246 * 'O' - Out-of-tree module has been loaded. 247 * 'O' - Out-of-tree module has been loaded.
247 * 'E' - Unsigned module has been loaded. 248 * 'E' - Unsigned module has been loaded.
248 * 'L' - A soft lockup has previously occurred. 249 * 'L' - A soft lockup has previously occurred.
250 * 'K' - Kernel has been live patched.
249 * 251 *
250 * The string is overwritten by the next call to print_tainted(). 252 * The string is overwritten by the next call to print_tainted().
251 */ 253 */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5a6ec8678b9a..564f786df470 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -84,8 +84,8 @@ static int try_to_freeze_tasks(bool user_only)
84 elapsed_msecs = elapsed_msecs64; 84 elapsed_msecs = elapsed_msecs64;
85 85
86 if (todo) { 86 if (todo) {
87 printk("\n"); 87 pr_cont("\n");
88 printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds " 88 pr_err("Freezing of tasks %s after %d.%03d seconds "
89 "(%d tasks refusing to freeze, wq_busy=%d):\n", 89 "(%d tasks refusing to freeze, wq_busy=%d):\n",
90 wakeup ? "aborted" : "failed", 90 wakeup ? "aborted" : "failed",
91 elapsed_msecs / 1000, elapsed_msecs % 1000, 91 elapsed_msecs / 1000, elapsed_msecs % 1000,
@@ -101,37 +101,13 @@ static int try_to_freeze_tasks(bool user_only)
101 read_unlock(&tasklist_lock); 101 read_unlock(&tasklist_lock);
102 } 102 }
103 } else { 103 } else {
104 printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, 104 pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
105 elapsed_msecs % 1000); 105 elapsed_msecs % 1000);
106 } 106 }
107 107
108 return todo ? -EBUSY : 0; 108 return todo ? -EBUSY : 0;
109} 109}
110 110
111static bool __check_frozen_processes(void)
112{
113 struct task_struct *g, *p;
114
115 for_each_process_thread(g, p)
116 if (p != current && !freezer_should_skip(p) && !frozen(p))
117 return false;
118
119 return true;
120}
121
122/*
123 * Returns true if all freezable tasks (except for current) are frozen already
124 */
125static bool check_frozen_processes(void)
126{
127 bool ret;
128
129 read_lock(&tasklist_lock);
130 ret = __check_frozen_processes();
131 read_unlock(&tasklist_lock);
132 return ret;
133}
134
135/** 111/**
136 * freeze_processes - Signal user space processes to enter the refrigerator. 112 * freeze_processes - Signal user space processes to enter the refrigerator.
137 * The current thread will not be frozen. The same process that calls 113 * The current thread will not be frozen. The same process that calls
@@ -142,7 +118,6 @@ static bool check_frozen_processes(void)
142int freeze_processes(void) 118int freeze_processes(void)
143{ 119{
144 int error; 120 int error;
145 int oom_kills_saved;
146 121
147 error = __usermodehelper_disable(UMH_FREEZING); 122 error = __usermodehelper_disable(UMH_FREEZING);
148 if (error) 123 if (error)
@@ -155,31 +130,24 @@ int freeze_processes(void)
155 atomic_inc(&system_freezing_cnt); 130 atomic_inc(&system_freezing_cnt);
156 131
157 pm_wakeup_clear(); 132 pm_wakeup_clear();
158 printk("Freezing user space processes ... "); 133 pr_info("Freezing user space processes ... ");
159 pm_freezing = true; 134 pm_freezing = true;
160 oom_kills_saved = oom_kills_count();
161 error = try_to_freeze_tasks(true); 135 error = try_to_freeze_tasks(true);
162 if (!error) { 136 if (!error) {
163 __usermodehelper_set_disable_depth(UMH_DISABLED); 137 __usermodehelper_set_disable_depth(UMH_DISABLED);
164 oom_killer_disable(); 138 pr_cont("done.");
165
166 /*
167 * There might have been an OOM kill while we were
168 * freezing tasks and the killed task might be still
169 * on the way out so we have to double check for race.
170 */
171 if (oom_kills_count() != oom_kills_saved &&
172 !check_frozen_processes()) {
173 __usermodehelper_set_disable_depth(UMH_ENABLED);
174 printk("OOM in progress.");
175 error = -EBUSY;
176 } else {
177 printk("done.");
178 }
179 } 139 }
180 printk("\n"); 140 pr_cont("\n");
181 BUG_ON(in_atomic()); 141 BUG_ON(in_atomic());
182 142
143 /*
144 * Now that the whole userspace is frozen we need to disbale
145 * the OOM killer to disallow any further interference with
146 * killable tasks.
147 */
148 if (!error && !oom_killer_disable())
149 error = -EBUSY;
150
183 if (error) 151 if (error)
184 thaw_processes(); 152 thaw_processes();
185 return error; 153 return error;
@@ -197,13 +165,14 @@ int freeze_kernel_threads(void)
197{ 165{
198 int error; 166 int error;
199 167
200 printk("Freezing remaining freezable tasks ... "); 168 pr_info("Freezing remaining freezable tasks ... ");
169
201 pm_nosig_freezing = true; 170 pm_nosig_freezing = true;
202 error = try_to_freeze_tasks(false); 171 error = try_to_freeze_tasks(false);
203 if (!error) 172 if (!error)
204 printk("done."); 173 pr_cont("done.");
205 174
206 printk("\n"); 175 pr_cont("\n");
207 BUG_ON(in_atomic()); 176 BUG_ON(in_atomic());
208 177
209 if (error) 178 if (error)
@@ -224,7 +193,7 @@ void thaw_processes(void)
224 193
225 oom_killer_enable(); 194 oom_killer_enable();
226 195
227 printk("Restarting tasks ... "); 196 pr_info("Restarting tasks ... ");
228 197
229 __usermodehelper_set_disable_depth(UMH_FREEZING); 198 __usermodehelper_set_disable_depth(UMH_FREEZING);
230 thaw_workqueues(); 199 thaw_workqueues();
@@ -243,7 +212,7 @@ void thaw_processes(void)
243 usermodehelper_enable(); 212 usermodehelper_enable();
244 213
245 schedule(); 214 schedule();
246 printk("done.\n"); 215 pr_cont("done.\n");
247 trace_suspend_resume(TPS("thaw_processes"), 0, false); 216 trace_suspend_resume(TPS("thaw_processes"), 0, false);
248} 217}
249 218
@@ -252,7 +221,7 @@ void thaw_kernel_threads(void)
252 struct task_struct *g, *p; 221 struct task_struct *g, *p;
253 222
254 pm_nosig_freezing = false; 223 pm_nosig_freezing = false;
255 printk("Restarting kernel threads ... "); 224 pr_info("Restarting kernel threads ... ");
256 225
257 thaw_workqueues(); 226 thaw_workqueues();
258 227
@@ -264,5 +233,5 @@ void thaw_kernel_threads(void)
264 read_unlock(&tasklist_lock); 233 read_unlock(&tasklist_lock);
265 234
266 schedule(); 235 schedule();
267 printk("done.\n"); 236 pr_cont("done.\n");
268} 237}
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 5f4c006c4b1e..97b0df71303e 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -41,6 +41,8 @@
41#include <linux/platform_device.h> 41#include <linux/platform_device.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/debugfs.h>
45#include <linux/seq_file.h>
44 46
45#include <linux/uaccess.h> 47#include <linux/uaccess.h>
46#include <linux/export.h> 48#include <linux/export.h>
@@ -182,6 +184,81 @@ static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value)
182 c->target_value = value; 184 c->target_value = value;
183} 185}
184 186
187static inline int pm_qos_get_value(struct pm_qos_constraints *c);
188static int pm_qos_dbg_show_requests(struct seq_file *s, void *unused)
189{
190 struct pm_qos_object *qos = (struct pm_qos_object *)s->private;
191 struct pm_qos_constraints *c;
192 struct pm_qos_request *req;
193 char *type;
194 unsigned long flags;
195 int tot_reqs = 0;
196 int active_reqs = 0;
197
198 if (IS_ERR_OR_NULL(qos)) {
199 pr_err("%s: bad qos param!\n", __func__);
200 return -EINVAL;
201 }
202 c = qos->constraints;
203 if (IS_ERR_OR_NULL(c)) {
204 pr_err("%s: Bad constraints on qos?\n", __func__);
205 return -EINVAL;
206 }
207
208 /* Lock to ensure we have a snapshot */
209 spin_lock_irqsave(&pm_qos_lock, flags);
210 if (plist_head_empty(&c->list)) {
211 seq_puts(s, "Empty!\n");
212 goto out;
213 }
214
215 switch (c->type) {
216 case PM_QOS_MIN:
217 type = "Minimum";
218 break;
219 case PM_QOS_MAX:
220 type = "Maximum";
221 break;
222 case PM_QOS_SUM:
223 type = "Sum";
224 break;
225 default:
226 type = "Unknown";
227 }
228
229 plist_for_each_entry(req, &c->list, node) {
230 char *state = "Default";
231
232 if ((req->node).prio != c->default_value) {
233 active_reqs++;
234 state = "Active";
235 }
236 tot_reqs++;
237 seq_printf(s, "%d: %d: %s\n", tot_reqs,
238 (req->node).prio, state);
239 }
240
241 seq_printf(s, "Type=%s, Value=%d, Requests: active=%d / total=%d\n",
242 type, pm_qos_get_value(c), active_reqs, tot_reqs);
243
244out:
245 spin_unlock_irqrestore(&pm_qos_lock, flags);
246 return 0;
247}
248
249static int pm_qos_dbg_open(struct inode *inode, struct file *file)
250{
251 return single_open(file, pm_qos_dbg_show_requests,
252 inode->i_private);
253}
254
255static const struct file_operations pm_qos_debug_fops = {
256 .open = pm_qos_dbg_open,
257 .read = seq_read,
258 .llseek = seq_lseek,
259 .release = single_release,
260};
261
185/** 262/**
186 * pm_qos_update_target - manages the constraints list and calls the notifiers 263 * pm_qos_update_target - manages the constraints list and calls the notifiers
187 * if needed 264 * if needed
@@ -509,12 +586,17 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
509EXPORT_SYMBOL_GPL(pm_qos_remove_notifier); 586EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
510 587
511/* User space interface to PM QoS classes via misc devices */ 588/* User space interface to PM QoS classes via misc devices */
512static int register_pm_qos_misc(struct pm_qos_object *qos) 589static int register_pm_qos_misc(struct pm_qos_object *qos, struct dentry *d)
513{ 590{
514 qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR; 591 qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
515 qos->pm_qos_power_miscdev.name = qos->name; 592 qos->pm_qos_power_miscdev.name = qos->name;
516 qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops; 593 qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops;
517 594
595 if (d) {
596 (void)debugfs_create_file(qos->name, S_IRUGO, d,
597 (void *)qos, &pm_qos_debug_fops);
598 }
599
518 return misc_register(&qos->pm_qos_power_miscdev); 600 return misc_register(&qos->pm_qos_power_miscdev);
519} 601}
520 602
@@ -608,11 +690,16 @@ static int __init pm_qos_power_init(void)
608{ 690{
609 int ret = 0; 691 int ret = 0;
610 int i; 692 int i;
693 struct dentry *d;
611 694
612 BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES); 695 BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES);
613 696
697 d = debugfs_create_dir("pm_qos", NULL);
698 if (IS_ERR_OR_NULL(d))
699 d = NULL;
700
614 for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) { 701 for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) {
615 ret = register_pm_qos_misc(pm_qos_array[i]); 702 ret = register_pm_qos_misc(pm_qos_array[i], d);
616 if (ret < 0) { 703 if (ret < 0) {
617 printk(KERN_ERR "pm_qos_param: %s setup failed\n", 704 printk(KERN_ERR "pm_qos_param: %s setup failed\n",
618 pm_qos_array[i]->name); 705 pm_qos_array[i]->name);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 0c40c16174b4..c24d5a23bf93 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1472,9 +1472,9 @@ static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1472/** 1472/**
1473 * free_unnecessary_pages - Release preallocated pages not needed for the image 1473 * free_unnecessary_pages - Release preallocated pages not needed for the image
1474 */ 1474 */
1475static void free_unnecessary_pages(void) 1475static unsigned long free_unnecessary_pages(void)
1476{ 1476{
1477 unsigned long save, to_free_normal, to_free_highmem; 1477 unsigned long save, to_free_normal, to_free_highmem, free;
1478 1478
1479 save = count_data_pages(); 1479 save = count_data_pages();
1480 if (alloc_normal >= save) { 1480 if (alloc_normal >= save) {
@@ -1495,6 +1495,7 @@ static void free_unnecessary_pages(void)
1495 else 1495 else
1496 to_free_normal = 0; 1496 to_free_normal = 0;
1497 } 1497 }
1498 free = to_free_normal + to_free_highmem;
1498 1499
1499 memory_bm_position_reset(&copy_bm); 1500 memory_bm_position_reset(&copy_bm);
1500 1501
@@ -1518,6 +1519,8 @@ static void free_unnecessary_pages(void)
1518 swsusp_unset_page_free(page); 1519 swsusp_unset_page_free(page);
1519 __free_page(page); 1520 __free_page(page);
1520 } 1521 }
1522
1523 return free;
1521} 1524}
1522 1525
1523/** 1526/**
@@ -1707,7 +1710,7 @@ int hibernate_preallocate_memory(void)
1707 * pages in memory, but we have allocated more. Release the excessive 1710 * pages in memory, but we have allocated more. Release the excessive
1708 * ones now. 1711 * ones now.
1709 */ 1712 */
1710 free_unnecessary_pages(); 1713 pages -= free_unnecessary_pages();
1711 1714
1712 out: 1715 out:
1713 stop = ktime_get(); 1716 stop = ktime_get();
@@ -2310,8 +2313,6 @@ static inline void free_highmem_data(void)
2310 free_image_page(buffer, PG_UNSAFE_CLEAR); 2313 free_image_page(buffer, PG_UNSAFE_CLEAR);
2311} 2314}
2312#else 2315#else
2313static inline int get_safe_write_buffer(void) { return 0; }
2314
2315static unsigned int 2316static unsigned int
2316count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2317count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2317 2318
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c347e3ce3a55..b7d6b3a721b1 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -37,7 +37,9 @@ const char *pm_states[PM_SUSPEND_MAX];
37static const struct platform_suspend_ops *suspend_ops; 37static const struct platform_suspend_ops *suspend_ops;
38static const struct platform_freeze_ops *freeze_ops; 38static const struct platform_freeze_ops *freeze_ops;
39static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); 39static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
40static bool suspend_freeze_wake; 40
41enum freeze_state __read_mostly suspend_freeze_state;
42static DEFINE_SPINLOCK(suspend_freeze_lock);
41 43
42void freeze_set_ops(const struct platform_freeze_ops *ops) 44void freeze_set_ops(const struct platform_freeze_ops *ops)
43{ 45{
@@ -48,22 +50,49 @@ void freeze_set_ops(const struct platform_freeze_ops *ops)
48 50
49static void freeze_begin(void) 51static void freeze_begin(void)
50{ 52{
51 suspend_freeze_wake = false; 53 suspend_freeze_state = FREEZE_STATE_NONE;
52} 54}
53 55
54static void freeze_enter(void) 56static void freeze_enter(void)
55{ 57{
56 cpuidle_use_deepest_state(true); 58 spin_lock_irq(&suspend_freeze_lock);
59 if (pm_wakeup_pending())
60 goto out;
61
62 suspend_freeze_state = FREEZE_STATE_ENTER;
63 spin_unlock_irq(&suspend_freeze_lock);
64
65 get_online_cpus();
57 cpuidle_resume(); 66 cpuidle_resume();
58 wait_event(suspend_freeze_wait_head, suspend_freeze_wake); 67
68 /* Push all the CPUs into the idle loop. */
69 wake_up_all_idle_cpus();
70 pr_debug("PM: suspend-to-idle\n");
71 /* Make the current CPU wait so it can enter the idle loop too. */
72 wait_event(suspend_freeze_wait_head,
73 suspend_freeze_state == FREEZE_STATE_WAKE);
74 pr_debug("PM: resume from suspend-to-idle\n");
75
59 cpuidle_pause(); 76 cpuidle_pause();
60 cpuidle_use_deepest_state(false); 77 put_online_cpus();
78
79 spin_lock_irq(&suspend_freeze_lock);
80
81 out:
82 suspend_freeze_state = FREEZE_STATE_NONE;
83 spin_unlock_irq(&suspend_freeze_lock);
61} 84}
62 85
63void freeze_wake(void) 86void freeze_wake(void)
64{ 87{
65 suspend_freeze_wake = true; 88 unsigned long flags;
66 wake_up(&suspend_freeze_wait_head); 89
90 spin_lock_irqsave(&suspend_freeze_lock, flags);
91 if (suspend_freeze_state > FREEZE_STATE_NONE) {
92 suspend_freeze_state = FREEZE_STATE_WAKE;
93 wake_up(&suspend_freeze_wait_head);
94 }
95 spin_unlock_irqrestore(&suspend_freeze_lock, flags);
67} 96}
68EXPORT_SYMBOL_GPL(freeze_wake); 97EXPORT_SYMBOL_GPL(freeze_wake);
69 98
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 02d6b6d28796..01cfd69c54c6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -935,8 +935,8 @@ static int __init ignore_loglevel_setup(char *str)
935 935
936early_param("ignore_loglevel", ignore_loglevel_setup); 936early_param("ignore_loglevel", ignore_loglevel_setup);
937module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); 937module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
938MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" 938MODULE_PARM_DESC(ignore_loglevel,
939 "print all kernel messages to the console."); 939 "ignore loglevel setting (prints all kernel messages to the console)");
940 940
941#ifdef CONFIG_BOOT_PRINTK_DELAY 941#ifdef CONFIG_BOOT_PRINTK_DELAY
942 942
@@ -1419,16 +1419,16 @@ static void call_console_drivers(int level, const char *text, size_t len)
1419} 1419}
1420 1420
1421/* 1421/*
1422 * Zap console related locks when oopsing. Only zap at most once 1422 * Zap console related locks when oopsing.
1423 * every 10 seconds, to leave time for slow consoles to print a 1423 * To leave time for slow consoles to print a full oops,
1424 * full oops. 1424 * only zap at most once every 30 seconds.
1425 */ 1425 */
1426static void zap_locks(void) 1426static void zap_locks(void)
1427{ 1427{
1428 static unsigned long oops_timestamp; 1428 static unsigned long oops_timestamp;
1429 1429
1430 if (time_after_eq(jiffies, oops_timestamp) && 1430 if (time_after_eq(jiffies, oops_timestamp) &&
1431 !time_after(jiffies, oops_timestamp + 30 * HZ)) 1431 !time_after(jiffies, oops_timestamp + 30 * HZ))
1432 return; 1432 return;
1433 1433
1434 oops_timestamp = jiffies; 1434 oops_timestamp = jiffies;
@@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args)
1811 1811
1812#ifdef CONFIG_KGDB_KDB 1812#ifdef CONFIG_KGDB_KDB
1813 if (unlikely(kdb_trap_printk)) { 1813 if (unlikely(kdb_trap_printk)) {
1814 r = vkdb_printf(fmt, args); 1814 r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
1815 return r; 1815 return r;
1816 } 1816 }
1817#endif 1817#endif
diff --git a/kernel/profile.c b/kernel/profile.c
index 54bf5ba26420..a7bcd28d6e9f 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -422,8 +422,7 @@ void profile_tick(int type)
422 422
423static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) 423static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
424{ 424{
425 seq_cpumask(m, prof_cpu_mask); 425 seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask));
426 seq_putc(m, '\n');
427 return 0; 426 return 0;
428} 427}
429 428
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1eb9d90c3af9..227fec36b12a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1077,7 +1077,6 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
1077} 1077}
1078 1078
1079#if defined CONFIG_COMPAT 1079#if defined CONFIG_COMPAT
1080#include <linux/compat.h>
1081 1080
1082int compat_ptrace_request(struct task_struct *child, compat_long_t request, 1081int compat_ptrace_request(struct task_struct *child, compat_long_t request,
1083 compat_ulong_t addr, compat_ulong_t data) 1082 compat_ulong_t addr, compat_ulong_t data)
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 2e850a51bb8f..0a571e9a0f1d 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -49,7 +49,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
49static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ 49static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
50static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ 50static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
51static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ 51static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
52static char __initdata nocb_buf[NR_CPUS * 5];
53#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 52#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
54 53
55/* 54/*
@@ -327,6 +326,7 @@ void rcu_read_unlock_special(struct task_struct *t)
327 special = t->rcu_read_unlock_special; 326 special = t->rcu_read_unlock_special;
328 if (special.b.need_qs) { 327 if (special.b.need_qs) {
329 rcu_preempt_qs(); 328 rcu_preempt_qs();
329 t->rcu_read_unlock_special.b.need_qs = false;
330 if (!t->rcu_read_unlock_special.s) { 330 if (!t->rcu_read_unlock_special.s) {
331 local_irq_restore(flags); 331 local_irq_restore(flags);
332 return; 332 return;
@@ -2386,8 +2386,8 @@ void __init rcu_init_nohz(void)
2386 cpumask_and(rcu_nocb_mask, cpu_possible_mask, 2386 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2387 rcu_nocb_mask); 2387 rcu_nocb_mask);
2388 } 2388 }
2389 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); 2389 pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
2390 pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); 2390 cpumask_pr_args(rcu_nocb_mask));
2391 if (rcu_nocb_poll) 2391 if (rcu_nocb_poll)
2392 pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); 2392 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2393 2393
diff --git a/kernel/resource.c b/kernel/resource.c
index 0bcebffc4e77..19f2357dfda3 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -22,6 +22,7 @@
22#include <linux/device.h> 22#include <linux/device.h>
23#include <linux/pfn.h> 23#include <linux/pfn.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/resource_ext.h>
25#include <asm/io.h> 26#include <asm/io.h>
26 27
27 28
@@ -1529,6 +1530,30 @@ int iomem_is_exclusive(u64 addr)
1529 return err; 1530 return err;
1530} 1531}
1531 1532
1533struct resource_entry *resource_list_create_entry(struct resource *res,
1534 size_t extra_size)
1535{
1536 struct resource_entry *entry;
1537
1538 entry = kzalloc(sizeof(*entry) + extra_size, GFP_KERNEL);
1539 if (entry) {
1540 INIT_LIST_HEAD(&entry->node);
1541 entry->res = res ? res : &entry->__res;
1542 }
1543
1544 return entry;
1545}
1546EXPORT_SYMBOL(resource_list_create_entry);
1547
1548void resource_list_free(struct list_head *head)
1549{
1550 struct resource_entry *entry, *tmp;
1551
1552 list_for_each_entry_safe(entry, tmp, head, node)
1553 resource_list_destroy_entry(entry);
1554}
1555EXPORT_SYMBOL(resource_list_free);
1556
1532static int __init strict_iomem(char *str) 1557static int __init strict_iomem(char *str)
1533{ 1558{
1534 if (strstr(str, "relaxed")) 1559 if (strstr(str, "relaxed"))
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index ab32b7b0db5c..46be87024875 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -1,5 +1,5 @@
1ifdef CONFIG_FUNCTION_TRACER 1ifdef CONFIG_FUNCTION_TRACER
2CFLAGS_REMOVE_clock.o = -pg 2CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE)
3endif 3endif
4 4
5ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 5ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 8a2e230fb86a..eae160dd669d 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
87 * so we don't have to move tasks around upon policy change, 87 * so we don't have to move tasks around upon policy change,
88 * or flail around trying to allocate bandwidth on the fly. 88 * or flail around trying to allocate bandwidth on the fly.
89 * A bandwidth exception in __sched_setscheduler() allows 89 * A bandwidth exception in __sched_setscheduler() allows
90 * the policy change to proceed. Thereafter, task_group() 90 * the policy change to proceed.
91 * returns &root_task_group, so zero bandwidth is required.
92 */ 91 */
93 free_rt_sched_group(tg); 92 free_rt_sched_group(tg);
94 tg->rt_se = root_task_group.rt_se; 93 tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
115 if (tg != &root_task_group) 114 if (tg != &root_task_group)
116 return false; 115 return false;
117 116
118 if (p->sched_class != &fair_sched_class)
119 return false;
120
121 /* 117 /*
122 * We can only assume the task group can't go away on us if 118 * We can only assume the task group can't go away on us if
123 * autogroup_move_group() can see us on ->thread_group list. 119 * autogroup_move_group() can see us on ->thread_group list.
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c27e4f8f4879..c0a205101c23 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -420,3 +420,16 @@ u64 local_clock(void)
420 420
421EXPORT_SYMBOL_GPL(cpu_clock); 421EXPORT_SYMBOL_GPL(cpu_clock);
422EXPORT_SYMBOL_GPL(local_clock); 422EXPORT_SYMBOL_GPL(local_clock);
423
424/*
425 * Running clock - returns the time that has elapsed while a guest has been
426 * running.
427 * On a guest this value should be local_clock minus the time the guest was
428 * suspended by the hypervisor (for any reason).
429 * On bare metal this function should return the same as local_clock.
430 * Architectures and sub-architectures can override this.
431 */
432u64 __weak running_clock(void)
433{
434 return local_clock();
435}
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 7052d3fd4e7b..8d0f35debf35 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x)
274 * first without taking the lock so we can 274 * first without taking the lock so we can
275 * return early in the blocking case. 275 * return early in the blocking case.
276 */ 276 */
277 if (!ACCESS_ONCE(x->done)) 277 if (!READ_ONCE(x->done))
278 return 0; 278 return 0;
279 279
280 spin_lock_irqsave(&x->wait.lock, flags); 280 spin_lock_irqsave(&x->wait.lock, flags);
@@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion);
297 */ 297 */
298bool completion_done(struct completion *x) 298bool completion_done(struct completion *x)
299{ 299{
300 return !!ACCESS_ONCE(x->done); 300 if (!READ_ONCE(x->done))
301 return false;
302
303 /*
304 * If ->done, we need to wait for complete() to release ->wait.lock
305 * otherwise we can end up freeing the completion before complete()
306 * is done referencing it.
307 *
308 * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
309 * the loads of ->done and ->wait.lock such that we cannot observe
310 * the lock before complete() acquires it while observing the ->done
311 * after it's acquired the lock.
312 */
313 smp_rmb();
314 spin_unlock_wait(&x->wait.lock);
315 return true;
301} 316}
302EXPORT_SYMBOL(completion_done); 317EXPORT_SYMBOL(completion_done);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1f37fe7f77a4..f0f831e8a345 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -307,66 +307,6 @@ __read_mostly int scheduler_running;
307int sysctl_sched_rt_runtime = 950000; 307int sysctl_sched_rt_runtime = 950000;
308 308
309/* 309/*
310 * __task_rq_lock - lock the rq @p resides on.
311 */
312static inline struct rq *__task_rq_lock(struct task_struct *p)
313 __acquires(rq->lock)
314{
315 struct rq *rq;
316
317 lockdep_assert_held(&p->pi_lock);
318
319 for (;;) {
320 rq = task_rq(p);
321 raw_spin_lock(&rq->lock);
322 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
323 return rq;
324 raw_spin_unlock(&rq->lock);
325
326 while (unlikely(task_on_rq_migrating(p)))
327 cpu_relax();
328 }
329}
330
331/*
332 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
333 */
334static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
335 __acquires(p->pi_lock)
336 __acquires(rq->lock)
337{
338 struct rq *rq;
339
340 for (;;) {
341 raw_spin_lock_irqsave(&p->pi_lock, *flags);
342 rq = task_rq(p);
343 raw_spin_lock(&rq->lock);
344 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
345 return rq;
346 raw_spin_unlock(&rq->lock);
347 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
348
349 while (unlikely(task_on_rq_migrating(p)))
350 cpu_relax();
351 }
352}
353
354static void __task_rq_unlock(struct rq *rq)
355 __releases(rq->lock)
356{
357 raw_spin_unlock(&rq->lock);
358}
359
360static inline void
361task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
362 __releases(rq->lock)
363 __releases(p->pi_lock)
364{
365 raw_spin_unlock(&rq->lock);
366 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
367}
368
369/*
370 * this_rq_lock - lock this runqueue and disable interrupts. 310 * this_rq_lock - lock this runqueue and disable interrupts.
371 */ 311 */
372static struct rq *this_rq_lock(void) 312static struct rq *this_rq_lock(void)
@@ -2899,7 +2839,7 @@ void __sched schedule_preempt_disabled(void)
2899 preempt_disable(); 2839 preempt_disable();
2900} 2840}
2901 2841
2902static void preempt_schedule_common(void) 2842static void __sched notrace preempt_schedule_common(void)
2903{ 2843{
2904 do { 2844 do {
2905 __preempt_count_add(PREEMPT_ACTIVE); 2845 __preempt_count_add(PREEMPT_ACTIVE);
@@ -4418,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
4418 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 4358 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
4419 * that process accounting knows that this is a task in IO wait state. 4359 * that process accounting knows that this is a task in IO wait state.
4420 */ 4360 */
4421void __sched io_schedule(void)
4422{
4423 struct rq *rq = raw_rq();
4424
4425 delayacct_blkio_start();
4426 atomic_inc(&rq->nr_iowait);
4427 blk_flush_plug(current);
4428 current->in_iowait = 1;
4429 schedule();
4430 current->in_iowait = 0;
4431 atomic_dec(&rq->nr_iowait);
4432 delayacct_blkio_end();
4433}
4434EXPORT_SYMBOL(io_schedule);
4435
4436long __sched io_schedule_timeout(long timeout) 4361long __sched io_schedule_timeout(long timeout)
4437{ 4362{
4438 struct rq *rq = raw_rq(); 4363 int old_iowait = current->in_iowait;
4364 struct rq *rq;
4439 long ret; 4365 long ret;
4440 4366
4367 current->in_iowait = 1;
4368 if (old_iowait)
4369 blk_schedule_flush_plug(current);
4370 else
4371 blk_flush_plug(current);
4372
4441 delayacct_blkio_start(); 4373 delayacct_blkio_start();
4374 rq = raw_rq();
4442 atomic_inc(&rq->nr_iowait); 4375 atomic_inc(&rq->nr_iowait);
4443 blk_flush_plug(current);
4444 current->in_iowait = 1;
4445 ret = schedule_timeout(timeout); 4376 ret = schedule_timeout(timeout);
4446 current->in_iowait = 0; 4377 current->in_iowait = old_iowait;
4447 atomic_dec(&rq->nr_iowait); 4378 atomic_dec(&rq->nr_iowait);
4448 delayacct_blkio_end(); 4379 delayacct_blkio_end();
4380
4449 return ret; 4381 return ret;
4450} 4382}
4383EXPORT_SYMBOL(io_schedule_timeout);
4451 4384
4452/** 4385/**
4453 * sys_sched_get_priority_max - return maximum RT priority. 4386 * sys_sched_get_priority_max - return maximum RT priority.
@@ -5462,9 +5395,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5462 struct cpumask *groupmask) 5395 struct cpumask *groupmask)
5463{ 5396{
5464 struct sched_group *group = sd->groups; 5397 struct sched_group *group = sd->groups;
5465 char str[256];
5466 5398
5467 cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd));
5468 cpumask_clear(groupmask); 5399 cpumask_clear(groupmask);
5469 5400
5470 printk(KERN_DEBUG "%*s domain %d: ", level, "", level); 5401 printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -5477,7 +5408,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5477 return -1; 5408 return -1;
5478 } 5409 }
5479 5410
5480 printk(KERN_CONT "span %s level %s\n", str, sd->name); 5411 printk(KERN_CONT "span %*pbl level %s\n",
5412 cpumask_pr_args(sched_domain_span(sd)), sd->name);
5481 5413
5482 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { 5414 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
5483 printk(KERN_ERR "ERROR: domain->span does not contain " 5415 printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -5522,9 +5454,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
5522 5454
5523 cpumask_or(groupmask, groupmask, sched_group_cpus(group)); 5455 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
5524 5456
5525 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 5457 printk(KERN_CONT " %*pbl",
5526 5458 cpumask_pr_args(sched_group_cpus(group)));
5527 printk(KERN_CONT " %s", str);
5528 if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { 5459 if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
5529 printk(KERN_CONT " (cpu_capacity = %d)", 5460 printk(KERN_CONT " (cpu_capacity = %d)",
5530 group->sgc->capacity); 5461 group->sgc->capacity);
@@ -7644,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
7644{ 7575{
7645 struct task_struct *g, *p; 7576 struct task_struct *g, *p;
7646 7577
7578 /*
7579 * Autogroups do not have RT tasks; see autogroup_create().
7580 */
7581 if (task_group_is_autogroup(tg))
7582 return 0;
7583
7647 for_each_process_thread(g, p) { 7584 for_each_process_thread(g, p) {
7648 if (rt_task(p) && task_group(p) == tg) 7585 if (rt_task(p) && task_group(p) == tg)
7649 return 1; 7586 return 1;
@@ -7736,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
7736{ 7673{
7737 int i, err = 0; 7674 int i, err = 0;
7738 7675
7676 /*
7677 * Disallowing the root group RT runtime is BAD, it would disallow the
7678 * kernel creating (and or operating) RT threads.
7679 */
7680 if (tg == &root_task_group && rt_runtime == 0)
7681 return -EINVAL;
7682
7683 /* No period doesn't make any sense. */
7684 if (rt_period == 0)
7685 return -EINVAL;
7686
7739 mutex_lock(&rt_constraints_mutex); 7687 mutex_lock(&rt_constraints_mutex);
7740 read_lock(&tasklist_lock); 7688 read_lock(&tasklist_lock);
7741 err = __rt_schedulable(tg, rt_period, rt_runtime); 7689 err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7792,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
7792 rt_period = (u64)rt_period_us * NSEC_PER_USEC; 7740 rt_period = (u64)rt_period_us * NSEC_PER_USEC;
7793 rt_runtime = tg->rt_bandwidth.rt_runtime; 7741 rt_runtime = tg->rt_bandwidth.rt_runtime;
7794 7742
7795 if (rt_period == 0)
7796 return -EINVAL;
7797
7798 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 7743 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
7799} 7744}
7800 7745
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a027799ae130..3fa8fa6d9403 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
511 struct sched_dl_entity, 511 struct sched_dl_entity,
512 dl_timer); 512 dl_timer);
513 struct task_struct *p = dl_task_of(dl_se); 513 struct task_struct *p = dl_task_of(dl_se);
514 unsigned long flags;
514 struct rq *rq; 515 struct rq *rq;
515again:
516 rq = task_rq(p);
517 raw_spin_lock(&rq->lock);
518 516
519 if (rq != task_rq(p)) { 517 rq = task_rq_lock(current, &flags);
520 /* Task was moved, retrying. */
521 raw_spin_unlock(&rq->lock);
522 goto again;
523 }
524 518
525 /* 519 /*
526 * We need to take care of several possible races here: 520 * We need to take care of several possible races here:
@@ -541,6 +535,26 @@ again:
541 535
542 sched_clock_tick(); 536 sched_clock_tick();
543 update_rq_clock(rq); 537 update_rq_clock(rq);
538
539 /*
540 * If the throttle happened during sched-out; like:
541 *
542 * schedule()
543 * deactivate_task()
544 * dequeue_task_dl()
545 * update_curr_dl()
546 * start_dl_timer()
547 * __dequeue_task_dl()
548 * prev->on_rq = 0;
549 *
550 * We can be both throttled and !queued. Replenish the counter
551 * but do not enqueue -- wait for our wakeup to do that.
552 */
553 if (!task_on_rq_queued(p)) {
554 replenish_dl_entity(dl_se, dl_se);
555 goto unlock;
556 }
557
544 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 558 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
545 if (dl_task(rq->curr)) 559 if (dl_task(rq->curr))
546 check_preempt_curr_dl(rq, p, 0); 560 check_preempt_curr_dl(rq, p, 0);
@@ -555,7 +569,7 @@ again:
555 push_dl_task(rq); 569 push_dl_task(rq);
556#endif 570#endif
557unlock: 571unlock:
558 raw_spin_unlock(&rq->lock); 572 task_rq_unlock(rq, current, &flags);
559 573
560 return HRTIMER_NORESTART; 574 return HRTIMER_NORESTART;
561} 575}
@@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq)
898 rq->curr->dl.dl_yielded = 1; 912 rq->curr->dl.dl_yielded = 1;
899 p->dl.runtime = 0; 913 p->dl.runtime = 0;
900 } 914 }
915 update_rq_clock(rq);
901 update_curr_dl(rq); 916 update_curr_dl(rq);
902} 917}
903 918
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index aaf1c1d5cf5d..94b2d7b88a27 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -7,6 +7,7 @@
7#include <linux/tick.h> 7#include <linux/tick.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/stackprotector.h> 9#include <linux/stackprotector.h>
10#include <linux/suspend.h>
10 11
11#include <asm/tlb.h> 12#include <asm/tlb.h>
12 13
@@ -105,6 +106,21 @@ static void cpuidle_idle_call(void)
105 rcu_idle_enter(); 106 rcu_idle_enter();
106 107
107 /* 108 /*
109 * Suspend-to-idle ("freeze") is a system state in which all user space
110 * has been frozen, all I/O devices have been suspended and the only
111 * activity happens here and in iterrupts (if any). In that case bypass
112 * the cpuidle governor and go stratight for the deepest idle state
113 * available. Possibly also suspend the local tick and the entire
114 * timekeeping to prevent timer interrupts from kicking us out of idle
115 * until a proper wakeup interrupt happens.
116 */
117 if (idle_should_freeze()) {
118 cpuidle_enter_freeze();
119 local_irq_enable();
120 goto exit_idle;
121 }
122
123 /*
108 * Ask the cpuidle framework to choose a convenient idle state. 124 * Ask the cpuidle framework to choose a convenient idle state.
109 * Fall back to the default arch idle method on errors. 125 * Fall back to the default arch idle method on errors.
110 */ 126 */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0870db23d79c..dc0f435a2779 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { }
1380 1380
1381extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); 1381extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
1382 1382
1383/*
1384 * __task_rq_lock - lock the rq @p resides on.
1385 */
1386static inline struct rq *__task_rq_lock(struct task_struct *p)
1387 __acquires(rq->lock)
1388{
1389 struct rq *rq;
1390
1391 lockdep_assert_held(&p->pi_lock);
1392
1393 for (;;) {
1394 rq = task_rq(p);
1395 raw_spin_lock(&rq->lock);
1396 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
1397 return rq;
1398 raw_spin_unlock(&rq->lock);
1399
1400 while (unlikely(task_on_rq_migrating(p)))
1401 cpu_relax();
1402 }
1403}
1404
1405/*
1406 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
1407 */
1408static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
1409 __acquires(p->pi_lock)
1410 __acquires(rq->lock)
1411{
1412 struct rq *rq;
1413
1414 for (;;) {
1415 raw_spin_lock_irqsave(&p->pi_lock, *flags);
1416 rq = task_rq(p);
1417 raw_spin_lock(&rq->lock);
1418 /*
1419 * move_queued_task() task_rq_lock()
1420 *
1421 * ACQUIRE (rq->lock)
1422 * [S] ->on_rq = MIGRATING [L] rq = task_rq()
1423 * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
1424 * [S] ->cpu = new_cpu [L] task_rq()
1425 * [L] ->on_rq
1426 * RELEASE (rq->lock)
1427 *
1428 * If we observe the old cpu in task_rq_lock, the acquire of
1429 * the old rq->lock will fully serialize against the stores.
1430 *
1431 * If we observe the new cpu in task_rq_lock, the acquire will
1432 * pair with the WMB to ensure we must then also see migrating.
1433 */
1434 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
1435 return rq;
1436 raw_spin_unlock(&rq->lock);
1437 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
1438
1439 while (unlikely(task_on_rq_migrating(p)))
1440 cpu_relax();
1441 }
1442}
1443
1444static inline void __task_rq_unlock(struct rq *rq)
1445 __releases(rq->lock)
1446{
1447 raw_spin_unlock(&rq->lock);
1448}
1449
1450static inline void
1451task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
1452 __releases(rq->lock)
1453 __releases(p->pi_lock)
1454{
1455 raw_spin_unlock(&rq->lock);
1456 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
1457}
1458
1383#ifdef CONFIG_SMP 1459#ifdef CONFIG_SMP
1384#ifdef CONFIG_PREEMPT 1460#ifdef CONFIG_PREEMPT
1385 1461
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index a476bea17fbc..87e2c9f0c33e 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -15,11 +15,6 @@
15static int show_schedstat(struct seq_file *seq, void *v) 15static int show_schedstat(struct seq_file *seq, void *v)
16{ 16{
17 int cpu; 17 int cpu;
18 int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
19 char *mask_str = kmalloc(mask_len, GFP_KERNEL);
20
21 if (mask_str == NULL)
22 return -ENOMEM;
23 18
24 if (v == (void *)1) { 19 if (v == (void *)1) {
25 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); 20 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
@@ -50,9 +45,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
50 for_each_domain(cpu, sd) { 45 for_each_domain(cpu, sd) {
51 enum cpu_idle_type itype; 46 enum cpu_idle_type itype;
52 47
53 cpumask_scnprintf(mask_str, mask_len, 48 seq_printf(seq, "domain%d %*pb", dcount++,
54 sched_domain_span(sd)); 49 cpumask_pr_args(sched_domain_span(sd)));
55 seq_printf(seq, "domain%d %s", dcount++, mask_str);
56 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; 50 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
57 itype++) { 51 itype++) {
58 seq_printf(seq, " %u %u %u %u %u %u %u %u", 52 seq_printf(seq, " %u %u %u %u %u %u %u %u",
@@ -76,7 +70,6 @@ static int show_schedstat(struct seq_file *seq, void *v)
76 rcu_read_unlock(); 70 rcu_read_unlock();
77#endif 71#endif
78 } 72 }
79 kfree(mask_str);
80 return 0; 73 return 0;
81} 74}
82 75
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 4ef9687ac115..4f44028943e6 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -629,7 +629,9 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
629 629
630 switch (action) { 630 switch (action) {
631 case SECCOMP_RET_ERRNO: 631 case SECCOMP_RET_ERRNO:
632 /* Set the low-order 16-bits as a errno. */ 632 /* Set low-order bits as an errno, capped at MAX_ERRNO. */
633 if (data > MAX_ERRNO)
634 data = MAX_ERRNO;
633 syscall_set_return_value(current, task_pt_regs(current), 635 syscall_set_return_value(current, task_pt_regs(current),
634 -data, 0); 636 -data, 0);
635 goto skip; 637 goto skip;
diff --git a/kernel/signal.c b/kernel/signal.c
index 16a305295256..a390499943e4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2501,7 +2501,7 @@ EXPORT_SYMBOL(unblock_all_signals);
2501 */ 2501 */
2502SYSCALL_DEFINE0(restart_syscall) 2502SYSCALL_DEFINE0(restart_syscall)
2503{ 2503{
2504 struct restart_block *restart = &current_thread_info()->restart_block; 2504 struct restart_block *restart = &current->restart_block;
2505 return restart->fn(restart); 2505 return restart->fn(restart);
2506} 2506}
2507 2507
@@ -3550,7 +3550,7 @@ SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
3550SYSCALL_DEFINE0(pause) 3550SYSCALL_DEFINE0(pause)
3551{ 3551{
3552 while (!signal_pending(current)) { 3552 while (!signal_pending(current)) {
3553 current->state = TASK_INTERRUPTIBLE; 3553 __set_current_state(TASK_INTERRUPTIBLE);
3554 schedule(); 3554 schedule();
3555 } 3555 }
3556 return -ERESTARTNOHAND; 3556 return -ERESTARTNOHAND;
@@ -3563,7 +3563,7 @@ int sigsuspend(sigset_t *set)
3563 current->saved_sigmask = current->blocked; 3563 current->saved_sigmask = current->blocked;
3564 set_current_blocked(set); 3564 set_current_blocked(set);
3565 3565
3566 current->state = TASK_INTERRUPTIBLE; 3566 __set_current_state(TASK_INTERRUPTIBLE);
3567 schedule(); 3567 schedule();
3568 set_restore_sigmask(); 3568 set_restore_sigmask();
3569 return -ERESTARTNOHAND; 3569 return -ERESTARTNOHAND;
diff --git a/kernel/sys.c b/kernel/sys.c
index ea9c88109894..667b2e62fad2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -97,6 +97,12 @@
97#ifndef MPX_DISABLE_MANAGEMENT 97#ifndef MPX_DISABLE_MANAGEMENT
98# define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) 98# define MPX_DISABLE_MANAGEMENT(a) (-EINVAL)
99#endif 99#endif
100#ifndef GET_FP_MODE
101# define GET_FP_MODE(a) (-EINVAL)
102#endif
103#ifndef SET_FP_MODE
104# define SET_FP_MODE(a,b) (-EINVAL)
105#endif
100 106
101/* 107/*
102 * this is where the system-wide overflow UID and GID are defined, for 108 * this is where the system-wide overflow UID and GID are defined, for
@@ -2219,6 +2225,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2219 return -EINVAL; 2225 return -EINVAL;
2220 error = MPX_DISABLE_MANAGEMENT(me); 2226 error = MPX_DISABLE_MANAGEMENT(me);
2221 break; 2227 break;
2228 case PR_SET_FP_MODE:
2229 error = SET_FP_MODE(me, arg2);
2230 break;
2231 case PR_GET_FP_MODE:
2232 error = GET_FP_MODE(me);
2233 break;
2222 default: 2234 default:
2223 error = -EINVAL; 2235 error = -EINVAL;
2224 break; 2236 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 137c7f69b264..88ea2d6e0031 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1248,7 +1248,6 @@ static struct ctl_table vm_table[] = {
1248 .maxlen = sizeof(unsigned long), 1248 .maxlen = sizeof(unsigned long),
1249 .mode = 0644, 1249 .mode = 0644,
1250 .proc_handler = hugetlb_sysctl_handler, 1250 .proc_handler = hugetlb_sysctl_handler,
1251 .extra1 = &zero,
1252 }, 1251 },
1253#ifdef CONFIG_NUMA 1252#ifdef CONFIG_NUMA
1254 { 1253 {
@@ -1257,7 +1256,6 @@ static struct ctl_table vm_table[] = {
1257 .maxlen = sizeof(unsigned long), 1256 .maxlen = sizeof(unsigned long),
1258 .mode = 0644, 1257 .mode = 0644,
1259 .proc_handler = &hugetlb_mempolicy_sysctl_handler, 1258 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1260 .extra1 = &zero,
1261 }, 1259 },
1262#endif 1260#endif
1263 { 1261 {
@@ -1280,7 +1278,6 @@ static struct ctl_table vm_table[] = {
1280 .maxlen = sizeof(unsigned long), 1278 .maxlen = sizeof(unsigned long),
1281 .mode = 0644, 1279 .mode = 0644,
1282 .proc_handler = hugetlb_overcommit_handler, 1280 .proc_handler = hugetlb_overcommit_handler,
1283 .extra1 = &zero,
1284 }, 1281 },
1285#endif 1282#endif
1286 { 1283 {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 670fff88a961..21f82c29c914 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -111,13 +111,8 @@ static int send_reply(struct sk_buff *skb, struct genl_info *info)
111{ 111{
112 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 112 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
113 void *reply = genlmsg_data(genlhdr); 113 void *reply = genlmsg_data(genlhdr);
114 int rc;
115 114
116 rc = genlmsg_end(skb, reply); 115 genlmsg_end(skb, reply);
117 if (rc < 0) {
118 nlmsg_free(skb);
119 return rc;
120 }
121 116
122 return genlmsg_reply(skb, info); 117 return genlmsg_reply(skb, info);
123} 118}
@@ -134,11 +129,7 @@ static void send_cpu_listeners(struct sk_buff *skb,
134 void *reply = genlmsg_data(genlhdr); 129 void *reply = genlmsg_data(genlhdr);
135 int rc, delcount = 0; 130 int rc, delcount = 0;
136 131
137 rc = genlmsg_end(skb, reply); 132 genlmsg_end(skb, reply);
138 if (rc < 0) {
139 nlmsg_free(skb);
140 return;
141 }
142 133
143 rc = 0; 134 rc = 0;
144 down_read(&listeners->sem); 135 down_read(&listeners->sem);
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f622cf28628a..c09c07817d7a 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,6 +1,6 @@
1obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o 1obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
2obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o 2obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
3obj-y += timeconv.o posix-clock.o alarmtimer.o 3obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
4 4
5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
6obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 6obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index a7077d3ae52f..1b001ed1edb9 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -788,7 +788,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
788 goto out; 788 goto out;
789 } 789 }
790 790
791 restart = &current_thread_info()->restart_block; 791 restart = &current->restart_block;
792 restart->fn = alarm_timer_nsleep_restart; 792 restart->fn = alarm_timer_nsleep_restart;
793 restart->nanosleep.clockid = type; 793 restart->nanosleep.clockid = type;
794 restart->nanosleep.expires = exp.tv64; 794 restart->nanosleep.expires = exp.tv64;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b79f39bda7e1..4892352f0e49 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -34,82 +34,6 @@
34#include "tick-internal.h" 34#include "tick-internal.h"
35#include "timekeeping_internal.h" 35#include "timekeeping_internal.h"
36 36
37void timecounter_init(struct timecounter *tc,
38 const struct cyclecounter *cc,
39 u64 start_tstamp)
40{
41 tc->cc = cc;
42 tc->cycle_last = cc->read(cc);
43 tc->nsec = start_tstamp;
44}
45EXPORT_SYMBOL_GPL(timecounter_init);
46
47/**
48 * timecounter_read_delta - get nanoseconds since last call of this function
49 * @tc: Pointer to time counter
50 *
51 * When the underlying cycle counter runs over, this will be handled
52 * correctly as long as it does not run over more than once between
53 * calls.
54 *
55 * The first call to this function for a new time counter initializes
56 * the time tracking and returns an undefined result.
57 */
58static u64 timecounter_read_delta(struct timecounter *tc)
59{
60 cycle_t cycle_now, cycle_delta;
61 u64 ns_offset;
62
63 /* read cycle counter: */
64 cycle_now = tc->cc->read(tc->cc);
65
66 /* calculate the delta since the last timecounter_read_delta(): */
67 cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
68
69 /* convert to nanoseconds: */
70 ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
71
72 /* update time stamp of timecounter_read_delta() call: */
73 tc->cycle_last = cycle_now;
74
75 return ns_offset;
76}
77
78u64 timecounter_read(struct timecounter *tc)
79{
80 u64 nsec;
81
82 /* increment time by nanoseconds since last call */
83 nsec = timecounter_read_delta(tc);
84 nsec += tc->nsec;
85 tc->nsec = nsec;
86
87 return nsec;
88}
89EXPORT_SYMBOL_GPL(timecounter_read);
90
91u64 timecounter_cyc2time(struct timecounter *tc,
92 cycle_t cycle_tstamp)
93{
94 u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
95 u64 nsec;
96
97 /*
98 * Instead of always treating cycle_tstamp as more recent
99 * than tc->cycle_last, detect when it is too far in the
100 * future and treat it as old time stamp instead.
101 */
102 if (cycle_delta > tc->cc->mask / 2) {
103 cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
104 nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
105 } else {
106 nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
107 }
108
109 return nsec;
110}
111EXPORT_SYMBOL_GPL(timecounter_cyc2time);
112
113/** 37/**
114 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks 38 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
115 * @mult: pointer to mult variable 39 * @mult: pointer to mult variable
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3f5e183c3d97..bee0c1f78091 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1583,7 +1583,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1583 goto out; 1583 goto out;
1584 } 1584 }
1585 1585
1586 restart = &current_thread_info()->restart_block; 1586 restart = &current->restart_block;
1587 restart->fn = hrtimer_nanosleep_restart; 1587 restart->fn = hrtimer_nanosleep_restart;
1588 restart->nanosleep.clockid = t.timer.base->clockid; 1588 restart->nanosleep.clockid = t.timer.base->clockid;
1589 restart->nanosleep.rmtp = rmtp; 1589 restart->nanosleep.rmtp = rmtp;
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4b585e0fdd22..0f60b08a4f07 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc)
633 if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) 633 if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
634 return -EPERM; 634 return -EPERM;
635 635
636 if (txc->modes & ADJ_FREQUENCY) { 636 /*
637 if (LONG_MIN / PPM_SCALE > txc->freq) 637 * Check for potential multiplication overflows that can
638 * only happen on 64-bit systems:
639 */
640 if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
641 if (LLONG_MIN / PPM_SCALE > txc->freq)
638 return -EINVAL; 642 return -EINVAL;
639 if (LONG_MAX / PPM_SCALE < txc->freq) 643 if (LLONG_MAX / PPM_SCALE < txc->freq)
640 return -EINVAL; 644 return -EINVAL;
641 } 645 }
642 646
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a16b67859e2a..0075da74abf0 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1334,8 +1334,7 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1334static int posix_cpu_nsleep(const clockid_t which_clock, int flags, 1334static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1335 struct timespec *rqtp, struct timespec __user *rmtp) 1335 struct timespec *rqtp, struct timespec __user *rmtp)
1336{ 1336{
1337 struct restart_block *restart_block = 1337 struct restart_block *restart_block = &current->restart_block;
1338 &current_thread_info()->restart_block;
1339 struct itimerspec it; 1338 struct itimerspec it;
1340 int error; 1339 int error;
1341 1340
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 7efeedf53ebd..f7c515595b42 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -394,6 +394,56 @@ void tick_resume(void)
394 } 394 }
395} 395}
396 396
397static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
398static unsigned int tick_freeze_depth;
399
400/**
401 * tick_freeze - Suspend the local tick and (possibly) timekeeping.
402 *
403 * Check if this is the last online CPU executing the function and if so,
404 * suspend timekeeping. Otherwise suspend the local tick.
405 *
406 * Call with interrupts disabled. Must be balanced with %tick_unfreeze().
407 * Interrupts must not be enabled before the subsequent %tick_unfreeze().
408 */
409void tick_freeze(void)
410{
411 raw_spin_lock(&tick_freeze_lock);
412
413 tick_freeze_depth++;
414 if (tick_freeze_depth == num_online_cpus()) {
415 timekeeping_suspend();
416 } else {
417 tick_suspend();
418 tick_suspend_broadcast();
419 }
420
421 raw_spin_unlock(&tick_freeze_lock);
422}
423
424/**
425 * tick_unfreeze - Resume the local tick and (possibly) timekeeping.
426 *
427 * Check if this is the first CPU executing the function and if so, resume
428 * timekeeping. Otherwise resume the local tick.
429 *
430 * Call with interrupts disabled. Must be balanced with %tick_freeze().
431 * Interrupts must not be enabled after the preceding %tick_freeze().
432 */
433void tick_unfreeze(void)
434{
435 raw_spin_lock(&tick_freeze_lock);
436
437 if (tick_freeze_depth == num_online_cpus())
438 timekeeping_resume();
439 else
440 tick_resume();
441
442 tick_freeze_depth--;
443
444 raw_spin_unlock(&tick_freeze_lock);
445}
446
397/** 447/**
398 * tick_init - initialize the tick control 448 * tick_init - initialize the tick control
399 */ 449 */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1363d58f07e9..a4c4edac4528 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -326,13 +326,6 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
326 return NOTIFY_OK; 326 return NOTIFY_OK;
327} 327}
328 328
329/*
330 * Worst case string length in chunks of CPU range seems 2 steps
331 * separations: 0,2,4,6,...
332 * This is NR_CPUS + sizeof('\0')
333 */
334static char __initdata nohz_full_buf[NR_CPUS + 1];
335
336static int tick_nohz_init_all(void) 329static int tick_nohz_init_all(void)
337{ 330{
338 int err = -1; 331 int err = -1;
@@ -393,8 +386,8 @@ void __init tick_nohz_init(void)
393 context_tracking_cpu_set(cpu); 386 context_tracking_cpu_set(cpu);
394 387
395 cpu_notifier(tick_nohz_cpu_down_callback, 0); 388 cpu_notifier(tick_nohz_cpu_down_callback, 0);
396 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); 389 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
397 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); 390 cpumask_pr_args(tick_nohz_full_mask));
398} 391}
399#endif 392#endif
400 393
diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c
new file mode 100644
index 000000000000..4687b3104bae
--- /dev/null
+++ b/kernel/time/timecounter.c
@@ -0,0 +1,112 @@
1/*
2 * linux/kernel/time/timecounter.c
3 *
4 * based on code that migrated away from
5 * linux/kernel/time/clocksource.c
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 */
17
18#include <linux/export.h>
19#include <linux/timecounter.h>
20
21void timecounter_init(struct timecounter *tc,
22 const struct cyclecounter *cc,
23 u64 start_tstamp)
24{
25 tc->cc = cc;
26 tc->cycle_last = cc->read(cc);
27 tc->nsec = start_tstamp;
28 tc->mask = (1ULL << cc->shift) - 1;
29 tc->frac = 0;
30}
31EXPORT_SYMBOL_GPL(timecounter_init);
32
33/**
34 * timecounter_read_delta - get nanoseconds since last call of this function
35 * @tc: Pointer to time counter
36 *
37 * When the underlying cycle counter runs over, this will be handled
38 * correctly as long as it does not run over more than once between
39 * calls.
40 *
41 * The first call to this function for a new time counter initializes
42 * the time tracking and returns an undefined result.
43 */
44static u64 timecounter_read_delta(struct timecounter *tc)
45{
46 cycle_t cycle_now, cycle_delta;
47 u64 ns_offset;
48
49 /* read cycle counter: */
50 cycle_now = tc->cc->read(tc->cc);
51
52 /* calculate the delta since the last timecounter_read_delta(): */
53 cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
54
55 /* convert to nanoseconds: */
56 ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta,
57 tc->mask, &tc->frac);
58
59 /* update time stamp of timecounter_read_delta() call: */
60 tc->cycle_last = cycle_now;
61
62 return ns_offset;
63}
64
65u64 timecounter_read(struct timecounter *tc)
66{
67 u64 nsec;
68
69 /* increment time by nanoseconds since last call */
70 nsec = timecounter_read_delta(tc);
71 nsec += tc->nsec;
72 tc->nsec = nsec;
73
74 return nsec;
75}
76EXPORT_SYMBOL_GPL(timecounter_read);
77
78/*
79 * This is like cyclecounter_cyc2ns(), but it is used for computing a
80 * time previous to the time stored in the cycle counter.
81 */
82static u64 cc_cyc2ns_backwards(const struct cyclecounter *cc,
83 cycle_t cycles, u64 mask, u64 frac)
84{
85 u64 ns = (u64) cycles;
86
87 ns = ((ns * cc->mult) - frac) >> cc->shift;
88
89 return ns;
90}
91
92u64 timecounter_cyc2time(struct timecounter *tc,
93 cycle_t cycle_tstamp)
94{
95 u64 delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
96 u64 nsec = tc->nsec, frac = tc->frac;
97
98 /*
99 * Instead of always treating cycle_tstamp as more recent
100 * than tc->cycle_last, detect when it is too far in the
101 * future and treat it as old time stamp instead.
102 */
103 if (delta > tc->cc->mask / 2) {
104 delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
105 nsec -= cc_cyc2ns_backwards(tc->cc, delta, tc->mask, frac);
106 } else {
107 nsec += cyclecounter_cyc2ns(tc->cc, delta, tc->mask, &frac);
108 }
109
110 return nsec;
111}
112EXPORT_SYMBOL_GPL(timecounter_cyc2time);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b124af259800..91db94136c10 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -230,9 +230,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
230 230
231/** 231/**
232 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. 232 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
233 * @tk: The timekeeper from which we take the update 233 * @tkr: Timekeeping readout base from which we take the update
234 * @tkf: The fast timekeeper to update
235 * @tbase: The time base for the fast timekeeper (mono/raw)
236 * 234 *
237 * We want to use this from any context including NMI and tracing / 235 * We want to use this from any context including NMI and tracing /
238 * instrumenting the timekeeping code itself. 236 * instrumenting the timekeeping code itself.
@@ -244,11 +242,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
244 * smp_wmb(); <- Ensure that the last base[1] update is visible 242 * smp_wmb(); <- Ensure that the last base[1] update is visible
245 * tkf->seq++; 243 * tkf->seq++;
246 * smp_wmb(); <- Ensure that the seqcount update is visible 244 * smp_wmb(); <- Ensure that the seqcount update is visible
247 * update(tkf->base[0], tk); 245 * update(tkf->base[0], tkr);
248 * smp_wmb(); <- Ensure that the base[0] update is visible 246 * smp_wmb(); <- Ensure that the base[0] update is visible
249 * tkf->seq++; 247 * tkf->seq++;
250 * smp_wmb(); <- Ensure that the seqcount update is visible 248 * smp_wmb(); <- Ensure that the seqcount update is visible
251 * update(tkf->base[1], tk); 249 * update(tkf->base[1], tkr);
252 * 250 *
253 * The reader side does: 251 * The reader side does:
254 * 252 *
@@ -269,7 +267,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
269 * slightly wrong timestamp (a few nanoseconds). See 267 * slightly wrong timestamp (a few nanoseconds). See
270 * @ktime_get_mono_fast_ns. 268 * @ktime_get_mono_fast_ns.
271 */ 269 */
272static void update_fast_timekeeper(struct timekeeper *tk) 270static void update_fast_timekeeper(struct tk_read_base *tkr)
273{ 271{
274 struct tk_read_base *base = tk_fast_mono.base; 272 struct tk_read_base *base = tk_fast_mono.base;
275 273
@@ -277,7 +275,7 @@ static void update_fast_timekeeper(struct timekeeper *tk)
277 raw_write_seqcount_latch(&tk_fast_mono.seq); 275 raw_write_seqcount_latch(&tk_fast_mono.seq);
278 276
279 /* Update base[0] */ 277 /* Update base[0] */
280 memcpy(base, &tk->tkr, sizeof(*base)); 278 memcpy(base, tkr, sizeof(*base));
281 279
282 /* Force readers back to base[0] */ 280 /* Force readers back to base[0] */
283 raw_write_seqcount_latch(&tk_fast_mono.seq); 281 raw_write_seqcount_latch(&tk_fast_mono.seq);
@@ -334,6 +332,35 @@ u64 notrace ktime_get_mono_fast_ns(void)
334} 332}
335EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 333EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
336 334
335/* Suspend-time cycles value for halted fast timekeeper. */
336static cycle_t cycles_at_suspend;
337
338static cycle_t dummy_clock_read(struct clocksource *cs)
339{
340 return cycles_at_suspend;
341}
342
343/**
344 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
345 * @tk: Timekeeper to snapshot.
346 *
347 * It generally is unsafe to access the clocksource after timekeeping has been
348 * suspended, so take a snapshot of the readout base of @tk and use it as the
349 * fast timekeeper's readout base while suspended. It will return the same
350 * number of cycles every time until timekeeping is resumed at which time the
351 * proper readout base for the fast timekeeper will be restored automatically.
352 */
353static void halt_fast_timekeeper(struct timekeeper *tk)
354{
355 static struct tk_read_base tkr_dummy;
356 struct tk_read_base *tkr = &tk->tkr;
357
358 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
359 cycles_at_suspend = tkr->read(tkr->clock);
360 tkr_dummy.read = dummy_clock_read;
361 update_fast_timekeeper(&tkr_dummy);
362}
363
337#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD 364#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
338 365
339static inline void update_vsyscall(struct timekeeper *tk) 366static inline void update_vsyscall(struct timekeeper *tk)
@@ -462,7 +489,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
462 memcpy(&shadow_timekeeper, &tk_core.timekeeper, 489 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
463 sizeof(tk_core.timekeeper)); 490 sizeof(tk_core.timekeeper));
464 491
465 update_fast_timekeeper(tk); 492 update_fast_timekeeper(&tk->tkr);
466} 493}
467 494
468/** 495/**
@@ -1170,7 +1197,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
1170 * xtime/wall_to_monotonic/jiffies/etc are 1197 * xtime/wall_to_monotonic/jiffies/etc are
1171 * still managed by arch specific suspend/resume code. 1198 * still managed by arch specific suspend/resume code.
1172 */ 1199 */
1173static void timekeeping_resume(void) 1200void timekeeping_resume(void)
1174{ 1201{
1175 struct timekeeper *tk = &tk_core.timekeeper; 1202 struct timekeeper *tk = &tk_core.timekeeper;
1176 struct clocksource *clock = tk->tkr.clock; 1203 struct clocksource *clock = tk->tkr.clock;
@@ -1251,7 +1278,7 @@ static void timekeeping_resume(void)
1251 hrtimers_resume(); 1278 hrtimers_resume();
1252} 1279}
1253 1280
1254static int timekeeping_suspend(void) 1281int timekeeping_suspend(void)
1255{ 1282{
1256 struct timekeeper *tk = &tk_core.timekeeper; 1283 struct timekeeper *tk = &tk_core.timekeeper;
1257 unsigned long flags; 1284 unsigned long flags;
@@ -1296,6 +1323,7 @@ static int timekeeping_suspend(void)
1296 } 1323 }
1297 1324
1298 timekeeping_update(tk, TK_MIRROR); 1325 timekeeping_update(tk, TK_MIRROR);
1326 halt_fast_timekeeper(tk);
1299 write_seqcount_end(&tk_core.seq); 1327 write_seqcount_end(&tk_core.seq);
1300 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 1328 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1301 1329
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index adc1fc98bde3..1d91416055d5 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -16,5 +16,7 @@ extern int timekeeping_inject_offset(struct timespec *ts);
16extern s32 timekeeping_get_tai_offset(void); 16extern s32 timekeeping_get_tai_offset(void);
17extern void timekeeping_set_tai_offset(s32 tai_offset); 17extern void timekeeping_set_tai_offset(s32 tai_offset);
18extern void timekeeping_clocktai(struct timespec *ts); 18extern void timekeeping_clocktai(struct timespec *ts);
19extern int timekeeping_suspend(void);
20extern void timekeeping_resume(void);
19 21
20#endif 22#endif
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 979ccde26720..98f26588255e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -3,11 +3,11 @@
3 3
4ifdef CONFIG_FUNCTION_TRACER 4ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
7 7
8ifdef CONFIG_FTRACE_SELFTEST 8ifdef CONFIG_FTRACE_SELFTEST
9# selftest needs instrumentation 9# selftest needs instrumentation
10CFLAGS_trace_selftest_dynamic.o = -pg 10CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE)
11obj-y += trace_selftest_dynamic.o 11obj-y += trace_selftest_dynamic.o
12endif 12endif
13endif 13endif
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 224e768bdc73..45e5cb143d17 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5456,7 +5456,7 @@ static __init int ftrace_init_debugfs(void)
5456 struct dentry *d_tracer; 5456 struct dentry *d_tracer;
5457 5457
5458 d_tracer = tracing_init_dentry(); 5458 d_tracer = tracing_init_dentry();
5459 if (!d_tracer) 5459 if (IS_ERR(d_tracer))
5460 return 0; 5460 return 0;
5461 5461
5462 ftrace_init_dyn_debugfs(d_tracer); 5462 ftrace_init_dyn_debugfs(d_tracer);
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 1c71382b283d..eb4220a132ec 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -13,5 +13,6 @@
13#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
14#include <trace/events/power.h> 14#include <trace/events/power.h>
15 15
16EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
16EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); 17EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
17 18
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7a4104cb95cb..5040d44fe5a3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -9,7 +9,6 @@
9#include <linux/trace_seq.h> 9#include <linux/trace_seq.h>
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <linux/irq_work.h> 11#include <linux/irq_work.h>
12#include <linux/debugfs.h>
13#include <linux/uaccess.h> 12#include <linux/uaccess.h>
14#include <linux/hardirq.h> 13#include <linux/hardirq.h>
15#include <linux/kthread.h> /* for self test */ 14#include <linux/kthread.h> /* for self test */
@@ -23,7 +22,6 @@
23#include <linux/hash.h> 22#include <linux/hash.h>
24#include <linux/list.h> 23#include <linux/list.h>
25#include <linux/cpu.h> 24#include <linux/cpu.h>
26#include <linux/fs.h>
27 25
28#include <asm/local.h> 26#include <asm/local.h>
29 27
@@ -447,7 +445,10 @@ int ring_buffer_print_page_header(struct trace_seq *s)
447struct rb_irq_work { 445struct rb_irq_work {
448 struct irq_work work; 446 struct irq_work work;
449 wait_queue_head_t waiters; 447 wait_queue_head_t waiters;
448 wait_queue_head_t full_waiters;
450 bool waiters_pending; 449 bool waiters_pending;
450 bool full_waiters_pending;
451 bool wakeup_full;
451}; 452};
452 453
453/* 454/*
@@ -529,6 +530,10 @@ static void rb_wake_up_waiters(struct irq_work *work)
529 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); 530 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
530 531
531 wake_up_all(&rbwork->waiters); 532 wake_up_all(&rbwork->waiters);
533 if (rbwork->wakeup_full) {
534 rbwork->wakeup_full = false;
535 wake_up_all(&rbwork->full_waiters);
536 }
532} 537}
533 538
534/** 539/**
@@ -553,9 +558,11 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
553 * data in any cpu buffer, or a specific buffer, put the 558 * data in any cpu buffer, or a specific buffer, put the
554 * caller on the appropriate wait queue. 559 * caller on the appropriate wait queue.
555 */ 560 */
556 if (cpu == RING_BUFFER_ALL_CPUS) 561 if (cpu == RING_BUFFER_ALL_CPUS) {
557 work = &buffer->irq_work; 562 work = &buffer->irq_work;
558 else { 563 /* Full only makes sense on per cpu reads */
564 full = false;
565 } else {
559 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 566 if (!cpumask_test_cpu(cpu, buffer->cpumask))
560 return -ENODEV; 567 return -ENODEV;
561 cpu_buffer = buffer->buffers[cpu]; 568 cpu_buffer = buffer->buffers[cpu];
@@ -564,7 +571,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
564 571
565 572
566 while (true) { 573 while (true) {
567 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 574 if (full)
575 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
576 else
577 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
568 578
569 /* 579 /*
570 * The events can happen in critical sections where 580 * The events can happen in critical sections where
@@ -586,7 +596,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
586 * that is necessary is that the wake up happens after 596 * that is necessary is that the wake up happens after
587 * a task has been queued. It's OK for spurious wake ups. 597 * a task has been queued. It's OK for spurious wake ups.
588 */ 598 */
589 work->waiters_pending = true; 599 if (full)
600 work->full_waiters_pending = true;
601 else
602 work->waiters_pending = true;
590 603
591 if (signal_pending(current)) { 604 if (signal_pending(current)) {
592 ret = -EINTR; 605 ret = -EINTR;
@@ -615,7 +628,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
615 schedule(); 628 schedule();
616 } 629 }
617 630
618 finish_wait(&work->waiters, &wait); 631 if (full)
632 finish_wait(&work->full_waiters, &wait);
633 else
634 finish_wait(&work->waiters, &wait);
619 635
620 return ret; 636 return ret;
621} 637}
@@ -1230,6 +1246,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1230 init_completion(&cpu_buffer->update_done); 1246 init_completion(&cpu_buffer->update_done);
1231 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); 1247 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1232 init_waitqueue_head(&cpu_buffer->irq_work.waiters); 1248 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1249 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1233 1250
1234 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1251 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1235 GFP_KERNEL, cpu_to_node(cpu)); 1252 GFP_KERNEL, cpu_to_node(cpu));
@@ -2801,6 +2818,8 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2801static __always_inline void 2818static __always_inline void
2802rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) 2819rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2803{ 2820{
2821 bool pagebusy;
2822
2804 if (buffer->irq_work.waiters_pending) { 2823 if (buffer->irq_work.waiters_pending) {
2805 buffer->irq_work.waiters_pending = false; 2824 buffer->irq_work.waiters_pending = false;
2806 /* irq_work_queue() supplies it's own memory barriers */ 2825 /* irq_work_queue() supplies it's own memory barriers */
@@ -2812,6 +2831,15 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2812 /* irq_work_queue() supplies it's own memory barriers */ 2831 /* irq_work_queue() supplies it's own memory barriers */
2813 irq_work_queue(&cpu_buffer->irq_work.work); 2832 irq_work_queue(&cpu_buffer->irq_work.work);
2814 } 2833 }
2834
2835 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2836
2837 if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2838 cpu_buffer->irq_work.wakeup_full = true;
2839 cpu_buffer->irq_work.full_waiters_pending = false;
2840 /* irq_work_queue() supplies it's own memory barriers */
2841 irq_work_queue(&cpu_buffer->irq_work.work);
2842 }
2815} 2843}
2816 2844
2817/** 2845/**
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 3f9e328c30b5..13d945c0d03f 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -7,7 +7,7 @@
7#include <linux/completion.h> 7#include <linux/completion.h>
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/ktime.h>
11#include <asm/local.h> 11#include <asm/local.h>
12 12
13struct rb_page { 13struct rb_page {
@@ -17,7 +17,7 @@ struct rb_page {
17}; 17};
18 18
19/* run time and sleep time in seconds */ 19/* run time and sleep time in seconds */
20#define RUN_TIME 10 20#define RUN_TIME 10ULL
21#define SLEEP_TIME 10 21#define SLEEP_TIME 10
22 22
23/* number of events for writer to wake up the reader */ 23/* number of events for writer to wake up the reader */
@@ -212,8 +212,7 @@ static void ring_buffer_consumer(void)
212 212
213static void ring_buffer_producer(void) 213static void ring_buffer_producer(void)
214{ 214{
215 struct timeval start_tv; 215 ktime_t start_time, end_time, timeout;
216 struct timeval end_tv;
217 unsigned long long time; 216 unsigned long long time;
218 unsigned long long entries; 217 unsigned long long entries;
219 unsigned long long overruns; 218 unsigned long long overruns;
@@ -227,7 +226,8 @@ static void ring_buffer_producer(void)
227 * make the system stall) 226 * make the system stall)
228 */ 227 */
229 trace_printk("Starting ring buffer hammer\n"); 228 trace_printk("Starting ring buffer hammer\n");
230 do_gettimeofday(&start_tv); 229 start_time = ktime_get();
230 timeout = ktime_add_ns(start_time, RUN_TIME * NSEC_PER_SEC);
231 do { 231 do {
232 struct ring_buffer_event *event; 232 struct ring_buffer_event *event;
233 int *entry; 233 int *entry;
@@ -244,7 +244,7 @@ static void ring_buffer_producer(void)
244 ring_buffer_unlock_commit(buffer, event); 244 ring_buffer_unlock_commit(buffer, event);
245 } 245 }
246 } 246 }
247 do_gettimeofday(&end_tv); 247 end_time = ktime_get();
248 248
249 cnt++; 249 cnt++;
250 if (consumer && !(cnt % wakeup_interval)) 250 if (consumer && !(cnt % wakeup_interval))
@@ -264,7 +264,7 @@ static void ring_buffer_producer(void)
264 cond_resched(); 264 cond_resched();
265#endif 265#endif
266 266
267 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); 267 } while (ktime_before(end_time, timeout) && !kill_test);
268 trace_printk("End ring buffer hammer\n"); 268 trace_printk("End ring buffer hammer\n");
269 269
270 if (consumer) { 270 if (consumer) {
@@ -280,9 +280,7 @@ static void ring_buffer_producer(void)
280 wait_for_completion(&read_done); 280 wait_for_completion(&read_done);
281 } 281 }
282 282
283 time = end_tv.tv_sec - start_tv.tv_sec; 283 time = ktime_us_delta(end_time, start_time);
284 time *= USEC_PER_SEC;
285 time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
286 284
287 entries = ring_buffer_entries(buffer); 285 entries = ring_buffer_entries(buffer);
288 overruns = ring_buffer_overruns(buffer); 286 overruns = ring_buffer_overruns(buffer);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4a9079b9f082..62c6506d663f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2036,7 +2036,8 @@ void trace_printk_init_buffers(void)
2036 2036
2037 /* trace_printk() is for debug use only. Don't use it in production. */ 2037 /* trace_printk() is for debug use only. Don't use it in production. */
2038 2038
2039 pr_warning("\n**********************************************************\n"); 2039 pr_warning("\n");
2040 pr_warning("**********************************************************\n");
2040 pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 2041 pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2041 pr_warning("** **\n"); 2042 pr_warning("** **\n");
2042 pr_warning("** trace_printk() being used. Allocating extra memory. **\n"); 2043 pr_warning("** trace_printk() being used. Allocating extra memory. **\n");
@@ -3352,12 +3353,12 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
3352 3353
3353 mutex_lock(&tracing_cpumask_update_lock); 3354 mutex_lock(&tracing_cpumask_update_lock);
3354 3355
3355 len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask); 3356 len = snprintf(mask_str, count, "%*pb\n",
3356 if (count - len < 2) { 3357 cpumask_pr_args(tr->tracing_cpumask));
3358 if (len >= count) {
3357 count = -EINVAL; 3359 count = -EINVAL;
3358 goto out_err; 3360 goto out_err;
3359 } 3361 }
3360 len += sprintf(mask_str + len, "\n");
3361 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); 3362 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3362 3363
3363out_err: 3364out_err:
@@ -4140,6 +4141,12 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4140 goto out; 4141 goto out;
4141 } 4142 }
4142 4143
4144 /* If trace pipe files are being read, we can't change the tracer */
4145 if (tr->current_trace->ref) {
4146 ret = -EBUSY;
4147 goto out;
4148 }
4149
4143 trace_branch_disable(); 4150 trace_branch_disable();
4144 4151
4145 tr->current_trace->enabled--; 4152 tr->current_trace->enabled--;
@@ -4326,17 +4333,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
4326 } 4333 }
4327 4334
4328 trace_seq_init(&iter->seq); 4335 trace_seq_init(&iter->seq);
4329 4336 iter->trace = tr->current_trace;
4330 /*
4331 * We make a copy of the current tracer to avoid concurrent
4332 * changes on it while we are reading.
4333 */
4334 iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4335 if (!iter->trace) {
4336 ret = -ENOMEM;
4337 goto fail;
4338 }
4339 *iter->trace = *tr->current_trace;
4340 4337
4341 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 4338 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4342 ret = -ENOMEM; 4339 ret = -ENOMEM;
@@ -4363,6 +4360,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
4363 iter->trace->pipe_open(iter); 4360 iter->trace->pipe_open(iter);
4364 4361
4365 nonseekable_open(inode, filp); 4362 nonseekable_open(inode, filp);
4363
4364 tr->current_trace->ref++;
4366out: 4365out:
4367 mutex_unlock(&trace_types_lock); 4366 mutex_unlock(&trace_types_lock);
4368 return ret; 4367 return ret;
@@ -4382,6 +4381,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
4382 4381
4383 mutex_lock(&trace_types_lock); 4382 mutex_lock(&trace_types_lock);
4384 4383
4384 tr->current_trace->ref--;
4385
4385 if (iter->trace->pipe_close) 4386 if (iter->trace->pipe_close)
4386 iter->trace->pipe_close(iter); 4387 iter->trace->pipe_close(iter);
4387 4388
@@ -4389,7 +4390,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
4389 4390
4390 free_cpumask_var(iter->started); 4391 free_cpumask_var(iter->started);
4391 mutex_destroy(&iter->mutex); 4392 mutex_destroy(&iter->mutex);
4392 kfree(iter->trace);
4393 kfree(iter); 4393 kfree(iter);
4394 4394
4395 trace_array_put(tr); 4395 trace_array_put(tr);
@@ -4422,7 +4422,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4422 return trace_poll(iter, filp, poll_table); 4422 return trace_poll(iter, filp, poll_table);
4423} 4423}
4424 4424
4425/* Must be called with trace_types_lock mutex held. */ 4425/* Must be called with iter->mutex held. */
4426static int tracing_wait_pipe(struct file *filp) 4426static int tracing_wait_pipe(struct file *filp)
4427{ 4427{
4428 struct trace_iterator *iter = filp->private_data; 4428 struct trace_iterator *iter = filp->private_data;
@@ -4467,7 +4467,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
4467 size_t cnt, loff_t *ppos) 4467 size_t cnt, loff_t *ppos)
4468{ 4468{
4469 struct trace_iterator *iter = filp->private_data; 4469 struct trace_iterator *iter = filp->private_data;
4470 struct trace_array *tr = iter->tr;
4471 ssize_t sret; 4470 ssize_t sret;
4472 4471
4473 /* return any leftover data */ 4472 /* return any leftover data */
@@ -4477,12 +4476,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
4477 4476
4478 trace_seq_init(&iter->seq); 4477 trace_seq_init(&iter->seq);
4479 4478
4480 /* copy the tracer to avoid using a global lock all around */
4481 mutex_lock(&trace_types_lock);
4482 if (unlikely(iter->trace->name != tr->current_trace->name))
4483 *iter->trace = *tr->current_trace;
4484 mutex_unlock(&trace_types_lock);
4485
4486 /* 4479 /*
4487 * Avoid more than one consumer on a single file descriptor 4480 * Avoid more than one consumer on a single file descriptor
4488 * This is just a matter of traces coherency, the ring buffer itself 4481 * This is just a matter of traces coherency, the ring buffer itself
@@ -4642,7 +4635,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
4642 .ops = &tracing_pipe_buf_ops, 4635 .ops = &tracing_pipe_buf_ops,
4643 .spd_release = tracing_spd_release_pipe, 4636 .spd_release = tracing_spd_release_pipe,
4644 }; 4637 };
4645 struct trace_array *tr = iter->tr;
4646 ssize_t ret; 4638 ssize_t ret;
4647 size_t rem; 4639 size_t rem;
4648 unsigned int i; 4640 unsigned int i;
@@ -4650,12 +4642,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
4650 if (splice_grow_spd(pipe, &spd)) 4642 if (splice_grow_spd(pipe, &spd))
4651 return -ENOMEM; 4643 return -ENOMEM;
4652 4644
4653 /* copy the tracer to avoid using a global lock all around */
4654 mutex_lock(&trace_types_lock);
4655 if (unlikely(iter->trace->name != tr->current_trace->name))
4656 *iter->trace = *tr->current_trace;
4657 mutex_unlock(&trace_types_lock);
4658
4659 mutex_lock(&iter->mutex); 4645 mutex_lock(&iter->mutex);
4660 4646
4661 if (iter->trace->splice_read) { 4647 if (iter->trace->splice_read) {
@@ -4942,7 +4928,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
4942 *fpos += written; 4928 *fpos += written;
4943 4929
4944 out_unlock: 4930 out_unlock:
4945 for (i = 0; i < nr_pages; i++){ 4931 for (i = nr_pages - 1; i >= 0; i--) {
4946 kunmap_atomic(map_page[i]); 4932 kunmap_atomic(map_page[i]);
4947 put_page(pages[i]); 4933 put_page(pages[i]);
4948 } 4934 }
@@ -5331,6 +5317,8 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
5331 5317
5332 filp->private_data = info; 5318 filp->private_data = info;
5333 5319
5320 tr->current_trace->ref++;
5321
5334 mutex_unlock(&trace_types_lock); 5322 mutex_unlock(&trace_types_lock);
5335 5323
5336 ret = nonseekable_open(inode, filp); 5324 ret = nonseekable_open(inode, filp);
@@ -5361,21 +5349,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
5361 if (!count) 5349 if (!count)
5362 return 0; 5350 return 0;
5363 5351
5364 mutex_lock(&trace_types_lock);
5365
5366#ifdef CONFIG_TRACER_MAX_TRACE 5352#ifdef CONFIG_TRACER_MAX_TRACE
5367 if (iter->snapshot && iter->tr->current_trace->use_max_tr) { 5353 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5368 size = -EBUSY; 5354 return -EBUSY;
5369 goto out_unlock;
5370 }
5371#endif 5355#endif
5372 5356
5373 if (!info->spare) 5357 if (!info->spare)
5374 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, 5358 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5375 iter->cpu_file); 5359 iter->cpu_file);
5376 size = -ENOMEM;
5377 if (!info->spare) 5360 if (!info->spare)
5378 goto out_unlock; 5361 return -ENOMEM;
5379 5362
5380 /* Do we have previous read data to read? */ 5363 /* Do we have previous read data to read? */
5381 if (info->read < PAGE_SIZE) 5364 if (info->read < PAGE_SIZE)
@@ -5391,21 +5374,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
5391 5374
5392 if (ret < 0) { 5375 if (ret < 0) {
5393 if (trace_empty(iter)) { 5376 if (trace_empty(iter)) {
5394 if ((filp->f_flags & O_NONBLOCK)) { 5377 if ((filp->f_flags & O_NONBLOCK))
5395 size = -EAGAIN; 5378 return -EAGAIN;
5396 goto out_unlock; 5379
5397 }
5398 mutex_unlock(&trace_types_lock);
5399 ret = wait_on_pipe(iter, false); 5380 ret = wait_on_pipe(iter, false);
5400 mutex_lock(&trace_types_lock); 5381 if (ret)
5401 if (ret) { 5382 return ret;
5402 size = ret; 5383
5403 goto out_unlock;
5404 }
5405 goto again; 5384 goto again;
5406 } 5385 }
5407 size = 0; 5386 return 0;
5408 goto out_unlock;
5409 } 5387 }
5410 5388
5411 info->read = 0; 5389 info->read = 0;
@@ -5415,18 +5393,14 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
5415 size = count; 5393 size = count;
5416 5394
5417 ret = copy_to_user(ubuf, info->spare + info->read, size); 5395 ret = copy_to_user(ubuf, info->spare + info->read, size);
5418 if (ret == size) { 5396 if (ret == size)
5419 size = -EFAULT; 5397 return -EFAULT;
5420 goto out_unlock; 5398
5421 }
5422 size -= ret; 5399 size -= ret;
5423 5400
5424 *ppos += size; 5401 *ppos += size;
5425 info->read += size; 5402 info->read += size;
5426 5403
5427 out_unlock:
5428 mutex_unlock(&trace_types_lock);
5429
5430 return size; 5404 return size;
5431} 5405}
5432 5406
@@ -5437,6 +5411,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
5437 5411
5438 mutex_lock(&trace_types_lock); 5412 mutex_lock(&trace_types_lock);
5439 5413
5414 iter->tr->current_trace->ref--;
5415
5440 __trace_array_put(iter->tr); 5416 __trace_array_put(iter->tr);
5441 5417
5442 if (info->spare) 5418 if (info->spare)
@@ -5522,30 +5498,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5522 int entries, size, i; 5498 int entries, size, i;
5523 ssize_t ret = 0; 5499 ssize_t ret = 0;
5524 5500
5525 mutex_lock(&trace_types_lock);
5526
5527#ifdef CONFIG_TRACER_MAX_TRACE 5501#ifdef CONFIG_TRACER_MAX_TRACE
5528 if (iter->snapshot && iter->tr->current_trace->use_max_tr) { 5502 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5529 ret = -EBUSY; 5503 return -EBUSY;
5530 goto out;
5531 }
5532#endif 5504#endif
5533 5505
5534 if (splice_grow_spd(pipe, &spd)) { 5506 if (splice_grow_spd(pipe, &spd))
5535 ret = -ENOMEM; 5507 return -ENOMEM;
5536 goto out;
5537 }
5538 5508
5539 if (*ppos & (PAGE_SIZE - 1)) { 5509 if (*ppos & (PAGE_SIZE - 1))
5540 ret = -EINVAL; 5510 return -EINVAL;
5541 goto out;
5542 }
5543 5511
5544 if (len & (PAGE_SIZE - 1)) { 5512 if (len & (PAGE_SIZE - 1)) {
5545 if (len < PAGE_SIZE) { 5513 if (len < PAGE_SIZE)
5546 ret = -EINVAL; 5514 return -EINVAL;
5547 goto out;
5548 }
5549 len &= PAGE_MASK; 5515 len &= PAGE_MASK;
5550 } 5516 }
5551 5517
@@ -5606,25 +5572,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5606 /* did we read anything? */ 5572 /* did we read anything? */
5607 if (!spd.nr_pages) { 5573 if (!spd.nr_pages) {
5608 if (ret) 5574 if (ret)
5609 goto out; 5575 return ret;
5576
5577 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5578 return -EAGAIN;
5610 5579
5611 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5612 ret = -EAGAIN;
5613 goto out;
5614 }
5615 mutex_unlock(&trace_types_lock);
5616 ret = wait_on_pipe(iter, true); 5580 ret = wait_on_pipe(iter, true);
5617 mutex_lock(&trace_types_lock);
5618 if (ret) 5581 if (ret)
5619 goto out; 5582 return ret;
5620 5583
5621 goto again; 5584 goto again;
5622 } 5585 }
5623 5586
5624 ret = splice_to_pipe(pipe, &spd); 5587 ret = splice_to_pipe(pipe, &spd);
5625 splice_shrink_spd(&spd); 5588 splice_shrink_spd(&spd);
5626out:
5627 mutex_unlock(&trace_types_lock);
5628 5589
5629 return ret; 5590 return ret;
5630} 5591}
@@ -5854,28 +5815,11 @@ static __init int register_snapshot_cmd(void)
5854static inline __init int register_snapshot_cmd(void) { return 0; } 5815static inline __init int register_snapshot_cmd(void) { return 0; }
5855#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ 5816#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5856 5817
5857struct dentry *tracing_init_dentry_tr(struct trace_array *tr) 5818static struct dentry *tracing_get_dentry(struct trace_array *tr)
5858{ 5819{
5859 if (tr->dir)
5860 return tr->dir;
5861
5862 if (!debugfs_initialized())
5863 return NULL;
5864
5865 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5866 tr->dir = debugfs_create_dir("tracing", NULL);
5867
5868 if (!tr->dir)
5869 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5870
5871 return tr->dir; 5820 return tr->dir;
5872} 5821}
5873 5822
5874struct dentry *tracing_init_dentry(void)
5875{
5876 return tracing_init_dentry_tr(&global_trace);
5877}
5878
5879static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 5823static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5880{ 5824{
5881 struct dentry *d_tracer; 5825 struct dentry *d_tracer;
@@ -5883,8 +5827,8 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5883 if (tr->percpu_dir) 5827 if (tr->percpu_dir)
5884 return tr->percpu_dir; 5828 return tr->percpu_dir;
5885 5829
5886 d_tracer = tracing_init_dentry_tr(tr); 5830 d_tracer = tracing_get_dentry(tr);
5887 if (!d_tracer) 5831 if (IS_ERR(d_tracer))
5888 return NULL; 5832 return NULL;
5889 5833
5890 tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer); 5834 tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
@@ -6086,8 +6030,8 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6086 if (tr->options) 6030 if (tr->options)
6087 return tr->options; 6031 return tr->options;
6088 6032
6089 d_tracer = tracing_init_dentry_tr(tr); 6033 d_tracer = tracing_get_dentry(tr);
6090 if (!d_tracer) 6034 if (IS_ERR(d_tracer))
6091 return NULL; 6035 return NULL;
6092 6036
6093 tr->options = debugfs_create_dir("options", d_tracer); 6037 tr->options = debugfs_create_dir("options", d_tracer);
@@ -6416,7 +6360,7 @@ static int instance_delete(const char *name)
6416 goto out_unlock; 6360 goto out_unlock;
6417 6361
6418 ret = -EBUSY; 6362 ret = -EBUSY;
6419 if (tr->ref) 6363 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6420 goto out_unlock; 6364 goto out_unlock;
6421 6365
6422 list_del(&tr->list); 6366 list_del(&tr->list);
@@ -6571,6 +6515,33 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6571 6515
6572} 6516}
6573 6517
6518/**
6519 * tracing_init_dentry - initialize top level trace array
6520 *
6521 * This is called when creating files or directories in the tracing
6522 * directory. It is called via fs_initcall() by any of the boot up code
6523 * and expects to return the dentry of the top level tracing directory.
6524 */
6525struct dentry *tracing_init_dentry(void)
6526{
6527 struct trace_array *tr = &global_trace;
6528
6529 if (tr->dir)
6530 return tr->dir;
6531
6532 if (WARN_ON(!debugfs_initialized()))
6533 return ERR_PTR(-ENODEV);
6534
6535 tr->dir = debugfs_create_dir("tracing", NULL);
6536
6537 if (!tr->dir) {
6538 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6539 return ERR_PTR(-ENOMEM);
6540 }
6541
6542 return tr->dir;
6543}
6544
6574static __init int tracer_init_debugfs(void) 6545static __init int tracer_init_debugfs(void)
6575{ 6546{
6576 struct dentry *d_tracer; 6547 struct dentry *d_tracer;
@@ -6578,7 +6549,7 @@ static __init int tracer_init_debugfs(void)
6578 trace_access_lock_init(); 6549 trace_access_lock_init();
6579 6550
6580 d_tracer = tracing_init_dentry(); 6551 d_tracer = tracing_init_dentry();
6581 if (!d_tracer) 6552 if (IS_ERR(d_tracer))
6582 return 0; 6553 return 0;
6583 6554
6584 init_tracer_debugfs(&global_trace, d_tracer); 6555 init_tracer_debugfs(&global_trace, d_tracer);
@@ -6811,7 +6782,6 @@ __init static int tracer_alloc_buffers(void)
6811 int ring_buf_size; 6782 int ring_buf_size;
6812 int ret = -ENOMEM; 6783 int ret = -ENOMEM;
6813 6784
6814
6815 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 6785 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6816 goto out; 6786 goto out;
6817 6787
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8de48bac1ce2..dd8205a35760 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -388,6 +388,7 @@ struct tracer {
388 struct tracer *next; 388 struct tracer *next;
389 struct tracer_flags *flags; 389 struct tracer_flags *flags;
390 int enabled; 390 int enabled;
391 int ref;
391 bool print_max; 392 bool print_max;
392 bool allow_instances; 393 bool allow_instances;
393#ifdef CONFIG_TRACER_MAX_TRACE 394#ifdef CONFIG_TRACER_MAX_TRACE
@@ -541,7 +542,6 @@ struct dentry *trace_create_file(const char *name,
541 void *data, 542 void *data,
542 const struct file_operations *fops); 543 const struct file_operations *fops);
543 544
544struct dentry *tracing_init_dentry_tr(struct trace_array *tr);
545struct dentry *tracing_init_dentry(void); 545struct dentry *tracing_init_dentry(void);
546 546
547struct ring_buffer_event; 547struct ring_buffer_event;
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 7d6e2afde669..57cbf1efdd44 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -7,7 +7,6 @@
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/irqflags.h> 9#include <linux/irqflags.h>
10#include <linux/debugfs.h>
11#include <linux/uaccess.h> 10#include <linux/uaccess.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/ftrace.h> 12#include <linux/ftrace.h>
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b03a0ea77b99..db54dda10ccc 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2531,7 +2531,7 @@ static __init int event_trace_init(void)
2531 return -ENODEV; 2531 return -ENODEV;
2532 2532
2533 d_tracer = tracing_init_dentry(); 2533 d_tracer = tracing_init_dentry();
2534 if (!d_tracer) 2534 if (IS_ERR(d_tracer))
2535 return 0; 2535 return 0;
2536 2536
2537 entry = debugfs_create_file("available_events", 0444, d_tracer, 2537 entry = debugfs_create_file("available_events", 0444, d_tracer,
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d4ddde28a81a..12e2b99be862 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -6,12 +6,10 @@
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/kallsyms.h> 7#include <linux/kallsyms.h>
8#include <linux/seq_file.h> 8#include <linux/seq_file.h>
9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 9#include <linux/uaccess.h>
11#include <linux/ftrace.h> 10#include <linux/ftrace.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/fs.h>
15 13
16#include "trace_output.h" 14#include "trace_output.h"
17 15
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index ba476009e5de..2d25ad1526bb 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1437,7 +1437,7 @@ static __init int init_graph_debugfs(void)
1437 struct dentry *d_tracer; 1437 struct dentry *d_tracer;
1438 1438
1439 d_tracer = tracing_init_dentry(); 1439 d_tracer = tracing_init_dentry();
1440 if (!d_tracer) 1440 if (IS_ERR(d_tracer))
1441 return 0; 1441 return 0;
1442 1442
1443 trace_create_file("max_graph_depth", 0644, d_tracer, 1443 trace_create_file("max_graph_depth", 0644, d_tracer,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9bb104f748d0..8523ea345f2b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -10,11 +10,9 @@
10 * Copyright (C) 2004 Nadia Yvette Chambers 10 * Copyright (C) 2004 Nadia Yvette Chambers
11 */ 11 */
12#include <linux/kallsyms.h> 12#include <linux/kallsyms.h>
13#include <linux/debugfs.h>
14#include <linux/uaccess.h> 13#include <linux/uaccess.h>
15#include <linux/module.h> 14#include <linux/module.h>
16#include <linux/ftrace.h> 15#include <linux/ftrace.h>
17#include <linux/fs.h>
18 16
19#include "trace.h" 17#include "trace.h"
20 18
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 296079ae6583..d73f565b4e06 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1320,7 +1320,7 @@ static __init int init_kprobe_trace(void)
1320 return -EINVAL; 1320 return -EINVAL;
1321 1321
1322 d_tracer = tracing_init_dentry(); 1322 d_tracer = tracing_init_dentry();
1323 if (!d_tracer) 1323 if (IS_ERR(d_tracer))
1324 return 0; 1324 return 0;
1325 1325
1326 entry = debugfs_create_file("kprobe_events", 0644, d_tracer, 1326 entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index fcf0a9e48916..8bb2071474dd 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -6,8 +6,6 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h> 9#include <linux/ftrace.h>
12 10
13#include "trace.h" 11#include "trace.h"
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b77b9a697619..692bf7184c8c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -177,6 +177,50 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
177} 177}
178EXPORT_SYMBOL(ftrace_print_hex_seq); 178EXPORT_SYMBOL(ftrace_print_hex_seq);
179 179
180const char *
181ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len,
182 size_t el_size)
183{
184 const char *ret = trace_seq_buffer_ptr(p);
185 const char *prefix = "";
186 void *ptr = (void *)buf;
187
188 trace_seq_putc(p, '{');
189
190 while (ptr < buf + buf_len) {
191 switch (el_size) {
192 case 1:
193 trace_seq_printf(p, "%s0x%x", prefix,
194 *(u8 *)ptr);
195 break;
196 case 2:
197 trace_seq_printf(p, "%s0x%x", prefix,
198 *(u16 *)ptr);
199 break;
200 case 4:
201 trace_seq_printf(p, "%s0x%x", prefix,
202 *(u32 *)ptr);
203 break;
204 case 8:
205 trace_seq_printf(p, "%s0x%llx", prefix,
206 *(u64 *)ptr);
207 break;
208 default:
209 trace_seq_printf(p, "BAD SIZE:%zu 0x%x", el_size,
210 *(u8 *)ptr);
211 el_size = 1;
212 }
213 prefix = ",";
214 ptr += el_size;
215 }
216
217 trace_seq_putc(p, '}');
218 trace_seq_putc(p, 0);
219
220 return ret;
221}
222EXPORT_SYMBOL(ftrace_print_array_seq);
223
180int ftrace_raw_output_prep(struct trace_iterator *iter, 224int ftrace_raw_output_prep(struct trace_iterator *iter,
181 struct trace_event *trace_event) 225 struct trace_event *trace_event)
182{ 226{
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index c4e70b6bd7fa..36c1455b7567 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -5,7 +5,6 @@
5 * 5 *
6 */ 6 */
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/debugfs.h>
9#include <linux/uaccess.h> 8#include <linux/uaccess.h>
10#include <linux/kernel.h> 9#include <linux/kernel.h>
11#include <linux/ftrace.h> 10#include <linux/ftrace.h>
@@ -15,7 +14,6 @@
15#include <linux/ctype.h> 14#include <linux/ctype.h>
16#include <linux/list.h> 15#include <linux/list.h>
17#include <linux/slab.h> 16#include <linux/slab.h>
18#include <linux/fs.h>
19 17
20#include "trace.h" 18#include "trace.h"
21 19
@@ -349,7 +347,7 @@ static __init int init_trace_printk_function_export(void)
349 struct dentry *d_tracer; 347 struct dentry *d_tracer;
350 348
351 d_tracer = tracing_init_dentry(); 349 d_tracer = tracing_init_dentry();
352 if (!d_tracer) 350 if (IS_ERR(d_tracer))
353 return 0; 351 return 0;
354 352
355 trace_create_file("printk_formats", 0444, d_tracer, 353 trace_create_file("printk_formats", 0444, d_tracer,
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 2e293beb186e..419ca37e72c9 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -5,8 +5,6 @@
5 * 5 *
6 */ 6 */
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/fs.h>
9#include <linux/debugfs.h>
10#include <linux/kallsyms.h> 8#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 9#include <linux/uaccess.h>
12#include <linux/ftrace.h> 10#include <linux/ftrace.h>
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 8fb84b362816..d6e1003724e9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -10,8 +10,6 @@
10 * Copyright (C) 2004 Nadia Yvette Chambers 10 * Copyright (C) 2004 Nadia Yvette Chambers
11 */ 11 */
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/debugfs.h>
15#include <linux/kallsyms.h> 13#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 14#include <linux/uaccess.h>
17#include <linux/ftrace.h> 15#include <linux/ftrace.h>
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index f8b45d8792f9..e694c9f9efa4 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -120,7 +120,7 @@ void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
120 120
121 __trace_seq_init(s); 121 __trace_seq_init(s);
122 122
123 seq_buf_bitmask(&s->seq, maskp, nmaskbits); 123 seq_buf_printf(&s->seq, "%*pb", nmaskbits, maskp);
124 124
125 if (unlikely(seq_buf_has_overflowed(&s->seq))) { 125 if (unlikely(seq_buf_has_overflowed(&s->seq))) {
126 s->seq.len = save_len; 126 s->seq.len = save_len;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 16eddb308c33..c3e4fcfddd45 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -7,12 +7,10 @@
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h> 10#include <linux/ftrace.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/sysctl.h> 12#include <linux/sysctl.h>
14#include <linux/init.h> 13#include <linux/init.h>
15#include <linux/fs.h>
16 14
17#include <asm/setup.h> 15#include <asm/setup.h>
18 16
@@ -462,7 +460,7 @@ static __init int stack_trace_init(void)
462 struct dentry *d_tracer; 460 struct dentry *d_tracer;
463 461
464 d_tracer = tracing_init_dentry(); 462 d_tracer = tracing_init_dentry();
465 if (!d_tracer) 463 if (IS_ERR(d_tracer))
466 return 0; 464 return 0;
467 465
468 trace_create_file("stack_max_size", 0644, d_tracer, 466 trace_create_file("stack_max_size", 0644, d_tracer,
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 7af67360b330..75e19e86c954 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -276,7 +276,7 @@ static int tracing_stat_init(void)
276 struct dentry *d_tracing; 276 struct dentry *d_tracing;
277 277
278 d_tracing = tracing_init_dentry(); 278 d_tracing = tracing_init_dentry();
279 if (!d_tracing) 279 if (IS_ERR(d_tracing))
280 return 0; 280 return 0;
281 281
282 stat_dir = debugfs_create_dir("trace_stat", d_tracing); 282 stat_dir = debugfs_create_dir("trace_stat", d_tracing);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index b11441321e7a..7dc1c8abecd6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1321,7 +1321,7 @@ static __init int init_uprobe_trace(void)
1321 struct dentry *d_tracer; 1321 struct dentry *d_tracer;
1322 1322
1323 d_tracer = tracing_init_dentry(); 1323 d_tracer = tracing_init_dentry();
1324 if (!d_tracer) 1324 if (IS_ERR(d_tracer))
1325 return 0; 1325 return 0;
1326 1326
1327 trace_create_file("uprobe_events", 0644, d_tracer, 1327 trace_create_file("uprobe_events", 0644, d_tracer,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 70bf11815f84..3174bf8e3538 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -154,7 +154,7 @@ static int get_softlockup_thresh(void)
154 */ 154 */
155static unsigned long get_timestamp(void) 155static unsigned long get_timestamp(void)
156{ 156{
157 return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ 157 return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
158} 158}
159 159
160static void set_sample_period(void) 160static void set_sample_period(void)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index beeeac9e0e3e..f28849394791 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3083,10 +3083,9 @@ static ssize_t wq_cpumask_show(struct device *dev,
3083 int written; 3083 int written;
3084 3084
3085 mutex_lock(&wq->mutex); 3085 mutex_lock(&wq->mutex);
3086 written = cpumask_scnprintf(buf, PAGE_SIZE, wq->unbound_attrs->cpumask); 3086 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
3087 cpumask_pr_args(wq->unbound_attrs->cpumask));
3087 mutex_unlock(&wq->mutex); 3088 mutex_unlock(&wq->mutex);
3088
3089 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
3090 return written; 3089 return written;
3091} 3090}
3092 3091