aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-02-08 02:55:43 -0500
committerIngo Molnar <mingo@elte.hu>2010-02-08 02:55:46 -0500
commit6d3e0907b8b239d16720d144e2675ecf10d3bc3b (patch)
treee0b0743b5f6f82b057cafc4f3687396a6e01a0b4 /kernel
parent23577256953c870de9b724c3a2611ce7be6a1e4e (diff)
parent50200df462023b187d80a99a52f5f2cfe3c86c26 (diff)
Merge branch 'sched/urgent' into sched/core
Merge reason: Merge dependent fix, update to latest -rc. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c8
-rw-r--r--kernel/cpu.c10
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/futex.c57
-rw-r--r--kernel/hw_breakpoint.c66
-rw-r--r--kernel/kexec.c4
-rw-r--r--kernel/kfifo.c108
-rw-r--r--kernel/kgdb.c9
-rw-r--r--kernel/kmod.c12
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/lockdep.c2
-rw-r--r--kernel/module.c17
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/perf_event.c14
-rw-r--r--kernel/printk.c1
-rw-r--r--kernel/sched.c44
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softlockup.c15
-rw-r--r--kernel/time/clockevents.c3
-rw-r--r--kernel/time/clocksource.c18
-rw-r--r--kernel/timer.c3
-rw-r--r--kernel/trace/Kconfig116
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/ring_buffer.c28
-rw-r--r--kernel/trace/trace.c7
-rw-r--r--kernel/trace/trace_events_filter.c29
-rw-r--r--kernel/trace/trace_export.c7
-rw-r--r--kernel/trace/trace_kprobe.c7
-rw-r--r--kernel/trace/trace_ksym.c140
32 files changed, 464 insertions, 296 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0249f4be9b5c..aa3bee566446 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2468,7 +2468,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2468 /* make sure l doesn't vanish out from under us */ 2468 /* make sure l doesn't vanish out from under us */
2469 down_write(&l->mutex); 2469 down_write(&l->mutex);
2470 mutex_unlock(&cgrp->pidlist_mutex); 2470 mutex_unlock(&cgrp->pidlist_mutex);
2471 l->use_count++;
2472 return l; 2471 return l;
2473 } 2472 }
2474 } 2473 }
@@ -2937,14 +2936,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2937 2936
2938 for_each_subsys(root, ss) { 2937 for_each_subsys(root, ss) {
2939 struct cgroup_subsys_state *css = ss->create(ss, cgrp); 2938 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2939
2940 if (IS_ERR(css)) { 2940 if (IS_ERR(css)) {
2941 err = PTR_ERR(css); 2941 err = PTR_ERR(css);
2942 goto err_destroy; 2942 goto err_destroy;
2943 } 2943 }
2944 init_cgroup_css(css, ss, cgrp); 2944 init_cgroup_css(css, ss, cgrp);
2945 if (ss->use_id) 2945 if (ss->use_id) {
2946 if (alloc_css_id(ss, parent, cgrp)) 2946 err = alloc_css_id(ss, parent, cgrp);
2947 if (err)
2947 goto err_destroy; 2948 goto err_destroy;
2949 }
2948 /* At error, ->destroy() callback has to free assigned ID. */ 2950 /* At error, ->destroy() callback has to free assigned ID. */
2949 } 2951 }
2950 2952
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1c8ddd6ee940..677f25376a38 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -151,13 +151,13 @@ static inline void check_for_tasks(int cpu)
151 151
152 write_lock_irq(&tasklist_lock); 152 write_lock_irq(&tasklist_lock);
153 for_each_process(p) { 153 for_each_process(p) {
154 if (task_cpu(p) == cpu && 154 if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
155 (!cputime_eq(p->utime, cputime_zero) || 155 (!cputime_eq(p->utime, cputime_zero) ||
156 !cputime_eq(p->stime, cputime_zero))) 156 !cputime_eq(p->stime, cputime_zero)))
157 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\ 157 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
158 (state = %ld, flags = %x) \n", 158 "(state = %ld, flags = %x)\n",
159 p->comm, task_pid_nr(p), cpu, 159 p->comm, task_pid_nr(p), cpu,
160 p->state, p->flags); 160 p->state, p->flags);
161 } 161 }
162 write_unlock_irq(&tasklist_lock); 162 write_unlock_irq(&tasklist_lock);
163} 163}
diff --git a/kernel/cred.c b/kernel/cred.c
index dd76cfe5f5b0..1ed8ca18790c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -224,7 +224,7 @@ struct cred *cred_alloc_blank(void)
224#ifdef CONFIG_KEYS 224#ifdef CONFIG_KEYS
225 new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); 225 new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
226 if (!new->tgcred) { 226 if (!new->tgcred) {
227 kfree(new); 227 kmem_cache_free(cred_jar, new);
228 return NULL; 228 return NULL;
229 } 229 }
230 atomic_set(&new->tgcred->usage, 1); 230 atomic_set(&new->tgcred->usage, 1);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5b2959b3ffc2..f88bd984df35 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1241,21 +1241,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1241 /* Need tasklist lock for parent etc handling! */ 1241 /* Need tasklist lock for parent etc handling! */
1242 write_lock_irq(&tasklist_lock); 1242 write_lock_irq(&tasklist_lock);
1243 1243
1244 /*
1245 * The task hasn't been attached yet, so its cpus_allowed mask will
1246 * not be changed, nor will its assigned CPU.
1247 *
1248 * The cpus_allowed mask of the parent may have changed after it was
1249 * copied first time - so re-copy it here, then check the child's CPU
1250 * to ensure it is on a valid CPU (and if not, just force it back to
1251 * parent's CPU). This avoids alot of nasty races.
1252 */
1253 p->cpus_allowed = current->cpus_allowed;
1254 p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
1255 if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
1256 !cpu_online(task_cpu(p))))
1257 set_task_cpu(p, smp_processor_id());
1258
1259 /* CLONE_PARENT re-uses the old parent */ 1244 /* CLONE_PARENT re-uses the old parent */
1260 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { 1245 if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1261 p->real_parent = current->real_parent; 1246 p->real_parent = current->real_parent;
diff --git a/kernel/futex.c b/kernel/futex.c
index 8e3c3ffe1b9a..e7a35f1039e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -203,8 +203,6 @@ static void drop_futex_key_refs(union futex_key *key)
203 * @uaddr: virtual address of the futex 203 * @uaddr: virtual address of the futex
204 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 204 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
205 * @key: address where result is stored. 205 * @key: address where result is stored.
206 * @rw: mapping needs to be read/write (values: VERIFY_READ,
207 * VERIFY_WRITE)
208 * 206 *
209 * Returns a negative error code or 0 207 * Returns a negative error code or 0
210 * The key words are stored in *key on success. 208 * The key words are stored in *key on success.
@@ -216,7 +214,7 @@ static void drop_futex_key_refs(union futex_key *key)
216 * lock_page() might sleep, the caller should not hold a spinlock. 214 * lock_page() might sleep, the caller should not hold a spinlock.
217 */ 215 */
218static int 216static int
219get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) 217get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
220{ 218{
221 unsigned long address = (unsigned long)uaddr; 219 unsigned long address = (unsigned long)uaddr;
222 struct mm_struct *mm = current->mm; 220 struct mm_struct *mm = current->mm;
@@ -239,7 +237,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
239 * but access_ok() should be faster than find_vma() 237 * but access_ok() should be faster than find_vma()
240 */ 238 */
241 if (!fshared) { 239 if (!fshared) {
242 if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) 240 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
243 return -EFAULT; 241 return -EFAULT;
244 key->private.mm = mm; 242 key->private.mm = mm;
245 key->private.address = address; 243 key->private.address = address;
@@ -248,7 +246,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
248 } 246 }
249 247
250again: 248again:
251 err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page); 249 err = get_user_pages_fast(address, 1, 1, &page);
252 if (err < 0) 250 if (err < 0)
253 return err; 251 return err;
254 252
@@ -532,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
532 return -EINVAL; 530 return -EINVAL;
533 531
534 WARN_ON(!atomic_read(&pi_state->refcount)); 532 WARN_ON(!atomic_read(&pi_state->refcount));
535 WARN_ON(pid && pi_state->owner && 533
536 pi_state->owner->pid != pid); 534 /*
535 * When pi_state->owner is NULL then the owner died
536 * and another waiter is on the fly. pi_state->owner
537 * is fixed up by the task which acquires
538 * pi_state->rt_mutex.
539 *
540 * We do not check for pid == 0 which can happen when
541 * the owner died and robust_list_exit() cleared the
542 * TID.
543 */
544 if (pid && pi_state->owner) {
545 /*
546 * Bail out if user space manipulated the
547 * futex value.
548 */
549 if (pid != task_pid_vnr(pi_state->owner))
550 return -EINVAL;
551 }
537 552
538 atomic_inc(&pi_state->refcount); 553 atomic_inc(&pi_state->refcount);
539 *ps = pi_state; 554 *ps = pi_state;
@@ -760,6 +775,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
760 if (!pi_state) 775 if (!pi_state)
761 return -EINVAL; 776 return -EINVAL;
762 777
778 /*
779 * If current does not own the pi_state then the futex is
780 * inconsistent and user space fiddled with the futex value.
781 */
782 if (pi_state->owner != current)
783 return -EINVAL;
784
763 raw_spin_lock(&pi_state->pi_mutex.wait_lock); 785 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
764 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 786 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
765 787
@@ -867,7 +889,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
867 if (!bitset) 889 if (!bitset)
868 return -EINVAL; 890 return -EINVAL;
869 891
870 ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ); 892 ret = get_futex_key(uaddr, fshared, &key);
871 if (unlikely(ret != 0)) 893 if (unlikely(ret != 0))
872 goto out; 894 goto out;
873 895
@@ -913,10 +935,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
913 int ret, op_ret; 935 int ret, op_ret;
914 936
915retry: 937retry:
916 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); 938 ret = get_futex_key(uaddr1, fshared, &key1);
917 if (unlikely(ret != 0)) 939 if (unlikely(ret != 0))
918 goto out; 940 goto out;
919 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); 941 ret = get_futex_key(uaddr2, fshared, &key2);
920 if (unlikely(ret != 0)) 942 if (unlikely(ret != 0))
921 goto out_put_key1; 943 goto out_put_key1;
922 944
@@ -1175,11 +1197,10 @@ retry:
1175 pi_state = NULL; 1197 pi_state = NULL;
1176 } 1198 }
1177 1199
1178 ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); 1200 ret = get_futex_key(uaddr1, fshared, &key1);
1179 if (unlikely(ret != 0)) 1201 if (unlikely(ret != 0))
1180 goto out; 1202 goto out;
1181 ret = get_futex_key(uaddr2, fshared, &key2, 1203 ret = get_futex_key(uaddr2, fshared, &key2);
1182 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1183 if (unlikely(ret != 0)) 1204 if (unlikely(ret != 0))
1184 goto out_put_key1; 1205 goto out_put_key1;
1185 1206
@@ -1738,7 +1759,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
1738 */ 1759 */
1739retry: 1760retry:
1740 q->key = FUTEX_KEY_INIT; 1761 q->key = FUTEX_KEY_INIT;
1741 ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ); 1762 ret = get_futex_key(uaddr, fshared, &q->key);
1742 if (unlikely(ret != 0)) 1763 if (unlikely(ret != 0))
1743 return ret; 1764 return ret;
1744 1765
@@ -1904,7 +1925,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1904 q.requeue_pi_key = NULL; 1925 q.requeue_pi_key = NULL;
1905retry: 1926retry:
1906 q.key = FUTEX_KEY_INIT; 1927 q.key = FUTEX_KEY_INIT;
1907 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); 1928 ret = get_futex_key(uaddr, fshared, &q.key);
1908 if (unlikely(ret != 0)) 1929 if (unlikely(ret != 0))
1909 goto out; 1930 goto out;
1910 1931
@@ -1974,7 +1995,7 @@ retry_private:
1974 /* Unqueue and drop the lock */ 1995 /* Unqueue and drop the lock */
1975 unqueue_me_pi(&q); 1996 unqueue_me_pi(&q);
1976 1997
1977 goto out; 1998 goto out_put_key;
1978 1999
1979out_unlock_put_key: 2000out_unlock_put_key:
1980 queue_unlock(&q, hb); 2001 queue_unlock(&q, hb);
@@ -2023,7 +2044,7 @@ retry:
2023 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 2044 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
2024 return -EPERM; 2045 return -EPERM;
2025 2046
2026 ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE); 2047 ret = get_futex_key(uaddr, fshared, &key);
2027 if (unlikely(ret != 0)) 2048 if (unlikely(ret != 0))
2028 goto out; 2049 goto out;
2029 2050
@@ -2215,7 +2236,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2215 rt_waiter.task = NULL; 2236 rt_waiter.task = NULL;
2216 2237
2217 key2 = FUTEX_KEY_INIT; 2238 key2 = FUTEX_KEY_INIT;
2218 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); 2239 ret = get_futex_key(uaddr2, fshared, &key2);
2219 if (unlikely(ret != 0)) 2240 if (unlikely(ret != 0))
2220 goto out; 2241 goto out;
2221 2242
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index dbcbf6a33a08..8a5c7d55ac9f 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,6 +40,7 @@
40#include <linux/percpu.h> 40#include <linux/percpu.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/cpu.h>
43#include <linux/smp.h> 44#include <linux/smp.h>
44 45
45#include <linux/hw_breakpoint.h> 46#include <linux/hw_breakpoint.h>
@@ -242,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
242 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) 243 * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
243 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM 244 * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
244 */ 245 */
245int reserve_bp_slot(struct perf_event *bp) 246static int __reserve_bp_slot(struct perf_event *bp)
246{ 247{
247 struct bp_busy_slots slots = {0}; 248 struct bp_busy_slots slots = {0};
248 int ret = 0;
249
250 mutex_lock(&nr_bp_mutex);
251 249
252 fetch_bp_busy_slots(&slots, bp); 250 fetch_bp_busy_slots(&slots, bp);
253 251
254 /* Flexible counters need to keep at least one slot */ 252 /* Flexible counters need to keep at least one slot */
255 if (slots.pinned + (!!slots.flexible) == HBP_NUM) { 253 if (slots.pinned + (!!slots.flexible) == HBP_NUM)
256 ret = -ENOSPC; 254 return -ENOSPC;
257 goto end;
258 }
259 255
260 toggle_bp_slot(bp, true); 256 toggle_bp_slot(bp, true);
261 257
262end: 258 return 0;
259}
260
261int reserve_bp_slot(struct perf_event *bp)
262{
263 int ret;
264
265 mutex_lock(&nr_bp_mutex);
266
267 ret = __reserve_bp_slot(bp);
268
263 mutex_unlock(&nr_bp_mutex); 269 mutex_unlock(&nr_bp_mutex);
264 270
265 return ret; 271 return ret;
266} 272}
267 273
274static void __release_bp_slot(struct perf_event *bp)
275{
276 toggle_bp_slot(bp, false);
277}
278
268void release_bp_slot(struct perf_event *bp) 279void release_bp_slot(struct perf_event *bp)
269{ 280{
270 mutex_lock(&nr_bp_mutex); 281 mutex_lock(&nr_bp_mutex);
271 282
272 toggle_bp_slot(bp, false); 283 __release_bp_slot(bp);
273 284
274 mutex_unlock(&nr_bp_mutex); 285 mutex_unlock(&nr_bp_mutex);
275} 286}
276 287
288/*
289 * Allow the kernel debugger to reserve breakpoint slots without
290 * taking a lock using the dbg_* variant of for the reserve and
291 * release breakpoint slots.
292 */
293int dbg_reserve_bp_slot(struct perf_event *bp)
294{
295 if (mutex_is_locked(&nr_bp_mutex))
296 return -1;
297
298 return __reserve_bp_slot(bp);
299}
300
301int dbg_release_bp_slot(struct perf_event *bp)
302{
303 if (mutex_is_locked(&nr_bp_mutex))
304 return -1;
305
306 __release_bp_slot(bp);
307
308 return 0;
309}
277 310
278int register_perf_hw_breakpoint(struct perf_event *bp) 311int register_perf_hw_breakpoint(struct perf_event *bp)
279{ 312{
@@ -295,6 +328,10 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
295 if (!bp->attr.disabled || !bp->overflow_handler) 328 if (!bp->attr.disabled || !bp->overflow_handler)
296 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); 329 ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
297 330
331 /* if arch_validate_hwbkpt_settings() fails then release bp slot */
332 if (ret)
333 release_bp_slot(bp);
334
298 return ret; 335 return ret;
299} 336}
300 337
@@ -388,7 +425,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
388 if (!cpu_events) 425 if (!cpu_events)
389 return ERR_PTR(-ENOMEM); 426 return ERR_PTR(-ENOMEM);
390 427
391 for_each_possible_cpu(cpu) { 428 get_online_cpus();
429 for_each_online_cpu(cpu) {
392 pevent = per_cpu_ptr(cpu_events, cpu); 430 pevent = per_cpu_ptr(cpu_events, cpu);
393 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); 431 bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered);
394 432
@@ -399,18 +437,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
399 goto fail; 437 goto fail;
400 } 438 }
401 } 439 }
440 put_online_cpus();
402 441
403 return cpu_events; 442 return cpu_events;
404 443
405fail: 444fail:
406 for_each_possible_cpu(cpu) { 445 for_each_online_cpu(cpu) {
407 pevent = per_cpu_ptr(cpu_events, cpu); 446 pevent = per_cpu_ptr(cpu_events, cpu);
408 if (IS_ERR(*pevent)) 447 if (IS_ERR(*pevent))
409 break; 448 break;
410 unregister_hw_breakpoint(*pevent); 449 unregister_hw_breakpoint(*pevent);
411 } 450 }
451 put_online_cpus();
452
412 free_percpu(cpu_events); 453 free_percpu(cpu_events);
413 /* return the error if any */
414 return ERR_PTR(err); 454 return ERR_PTR(err);
415} 455}
416EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 456EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index a9a93d9ee7a7..ef077fb73155 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,6 +32,7 @@
32#include <linux/console.h> 32#include <linux/console.h>
33#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
34#include <linux/swap.h> 34#include <linux/swap.h>
35#include <linux/kmsg_dump.h>
35 36
36#include <asm/page.h> 37#include <asm/page.h>
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
@@ -1074,6 +1075,9 @@ void crash_kexec(struct pt_regs *regs)
1074 if (mutex_trylock(&kexec_mutex)) { 1075 if (mutex_trylock(&kexec_mutex)) {
1075 if (kexec_crash_image) { 1076 if (kexec_crash_image) {
1076 struct pt_regs fixed_regs; 1077 struct pt_regs fixed_regs;
1078
1079 kmsg_dump(KMSG_DUMP_KEXEC);
1080
1077 crash_setup_regs(&fixed_regs, regs); 1081 crash_setup_regs(&fixed_regs, regs);
1078 crash_save_vmcoreinfo(); 1082 crash_save_vmcoreinfo();
1079 machine_crash_shutdown(&fixed_regs); 1083 machine_crash_shutdown(&fixed_regs);
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..498cabba225e 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
28#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/uaccess.h> 29#include <linux/uaccess.h>
30 30
31static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer, 31static void _kfifo_init(struct kfifo *fifo, void *buffer,
32 unsigned int size) 32 unsigned int size)
33{ 33{
34 fifo->buffer = buffer; 34 fifo->buffer = buffer;
@@ -41,10 +41,10 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
41 * kfifo_init - initialize a FIFO using a preallocated buffer 41 * kfifo_init - initialize a FIFO using a preallocated buffer
42 * @fifo: the fifo to assign the buffer 42 * @fifo: the fifo to assign the buffer
43 * @buffer: the preallocated buffer to be used. 43 * @buffer: the preallocated buffer to be used.
44 * @size: the size of the internal buffer, this have to be a power of 2. 44 * @size: the size of the internal buffer, this has to be a power of 2.
45 * 45 *
46 */ 46 */
47void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size) 47void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
48{ 48{
49 /* size must be a power of 2 */ 49 /* size must be a power of 2 */
50 BUG_ON(!is_power_of_2(size)); 50 BUG_ON(!is_power_of_2(size));
@@ -159,8 +159,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
159 memcpy(to + l, fifo->buffer, len - l); 159 memcpy(to + l, fifo->buffer, len - l);
160} 160}
161 161
162static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo, 162static inline int __kfifo_from_user_data(struct kfifo *fifo,
163 const void __user *from, unsigned int len, unsigned int off) 163 const void __user *from, unsigned int len, unsigned int off,
164 unsigned *lenout)
164{ 165{
165 unsigned int l; 166 unsigned int l;
166 int ret; 167 int ret;
@@ -177,16 +178,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
177 /* first put the data starting from fifo->in to buffer end */ 178 /* first put the data starting from fifo->in to buffer end */
178 l = min(len, fifo->size - off); 179 l = min(len, fifo->size - off);
179 ret = copy_from_user(fifo->buffer + off, from, l); 180 ret = copy_from_user(fifo->buffer + off, from, l);
180 181 if (unlikely(ret)) {
181 if (unlikely(ret)) 182 *lenout = ret;
182 return ret + len - l; 183 return -EFAULT;
184 }
185 *lenout = l;
183 186
184 /* then put the rest (if any) at the beginning of the buffer */ 187 /* then put the rest (if any) at the beginning of the buffer */
185 return copy_from_user(fifo->buffer, from + l, len - l); 188 ret = copy_from_user(fifo->buffer, from + l, len - l);
189 *lenout += ret ? ret : len - l;
190 return ret ? -EFAULT : 0;
186} 191}
187 192
188static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo, 193static inline int __kfifo_to_user_data(struct kfifo *fifo,
189 void __user *to, unsigned int len, unsigned int off) 194 void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
190{ 195{
191 unsigned int l; 196 unsigned int l;
192 int ret; 197 int ret;
@@ -203,12 +208,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
203 /* first get the data from fifo->out until the end of the buffer */ 208 /* first get the data from fifo->out until the end of the buffer */
204 l = min(len, fifo->size - off); 209 l = min(len, fifo->size - off);
205 ret = copy_to_user(to, fifo->buffer + off, l); 210 ret = copy_to_user(to, fifo->buffer + off, l);
206 211 *lenout = l;
207 if (unlikely(ret)) 212 if (unlikely(ret)) {
208 return ret + len - l; 213 *lenout -= ret;
214 return -EFAULT;
215 }
209 216
210 /* then get the rest (if any) from the beginning of the buffer */ 217 /* then get the rest (if any) from the beginning of the buffer */
211 return copy_to_user(to + l, fifo->buffer, len - l); 218 len -= l;
219 ret = copy_to_user(to + l, fifo->buffer, len);
220 if (unlikely(ret)) {
221 *lenout += len - ret;
222 return -EFAULT;
223 }
224 *lenout += len;
225 return 0;
212} 226}
213 227
214unsigned int __kfifo_in_n(struct kfifo *fifo, 228unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -235,7 +249,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
235 * Note that with only one concurrent reader and one concurrent 249 * Note that with only one concurrent reader and one concurrent
236 * writer, you don't need extra locking to use these functions. 250 * writer, you don't need extra locking to use these functions.
237 */ 251 */
238unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from, 252unsigned int kfifo_in(struct kfifo *fifo, const void *from,
239 unsigned int len) 253 unsigned int len)
240{ 254{
241 len = min(kfifo_avail(fifo), len); 255 len = min(kfifo_avail(fifo), len);
@@ -277,7 +291,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
277 * Note that with only one concurrent reader and one concurrent 291 * Note that with only one concurrent reader and one concurrent
278 * writer, you don't need extra locking to use these functions. 292 * writer, you don't need extra locking to use these functions.
279 */ 293 */
280unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len) 294unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
281{ 295{
282 len = min(kfifo_len(fifo), len); 296 len = min(kfifo_len(fifo), len);
283 297
@@ -288,6 +302,27 @@ unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
288} 302}
289EXPORT_SYMBOL(kfifo_out); 303EXPORT_SYMBOL(kfifo_out);
290 304
305/**
306 * kfifo_out_peek - copy some data from the FIFO, but do not remove it
307 * @fifo: the fifo to be used.
308 * @to: where the data must be copied.
309 * @len: the size of the destination buffer.
310 * @offset: offset into the fifo
311 *
312 * This function copies at most @len bytes at @offset from the FIFO
313 * into the @to buffer and returns the number of copied bytes.
314 * The data is not removed from the FIFO.
315 */
316unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
317 unsigned offset)
318{
319 len = min(kfifo_len(fifo), len + offset);
320
321 __kfifo_out_data(fifo, to, len, offset);
322 return len;
323}
324EXPORT_SYMBOL(kfifo_out_peek);
325
291unsigned int __kfifo_out_generic(struct kfifo *fifo, 326unsigned int __kfifo_out_generic(struct kfifo *fifo,
292 void *to, unsigned int len, unsigned int recsize, 327 void *to, unsigned int len, unsigned int recsize,
293 unsigned int *total) 328 unsigned int *total)
@@ -299,10 +334,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
299unsigned int __kfifo_from_user_n(struct kfifo *fifo, 334unsigned int __kfifo_from_user_n(struct kfifo *fifo,
300 const void __user *from, unsigned int len, unsigned int recsize) 335 const void __user *from, unsigned int len, unsigned int recsize)
301{ 336{
337 unsigned total;
338
302 if (kfifo_avail(fifo) < len + recsize) 339 if (kfifo_avail(fifo) < len + recsize)
303 return len + 1; 340 return len + 1;
304 341
305 return __kfifo_from_user_data(fifo, from, len, recsize); 342 __kfifo_from_user_data(fifo, from, len, recsize, &total);
343 return total;
306} 344}
307EXPORT_SYMBOL(__kfifo_from_user_n); 345EXPORT_SYMBOL(__kfifo_from_user_n);
308 346
@@ -311,20 +349,24 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
311 * @fifo: the fifo to be used. 349 * @fifo: the fifo to be used.
312 * @from: pointer to the data to be added. 350 * @from: pointer to the data to be added.
313 * @len: the length of the data to be added. 351 * @len: the length of the data to be added.
352 * @total: the actual returned data length.
314 * 353 *
315 * This function copies at most @len bytes from the @from into the 354 * This function copies at most @len bytes from the @from into the
316 * FIFO depending and returns the number of copied bytes. 355 * FIFO depending and returns -EFAULT/0.
317 * 356 *
318 * Note that with only one concurrent reader and one concurrent 357 * Note that with only one concurrent reader and one concurrent
319 * writer, you don't need extra locking to use these functions. 358 * writer, you don't need extra locking to use these functions.
320 */ 359 */
321unsigned int kfifo_from_user(struct kfifo *fifo, 360int kfifo_from_user(struct kfifo *fifo,
322 const void __user *from, unsigned int len) 361 const void __user *from, unsigned int len, unsigned *total)
323{ 362{
363 int ret;
324 len = min(kfifo_avail(fifo), len); 364 len = min(kfifo_avail(fifo), len);
325 len -= __kfifo_from_user_data(fifo, from, len, 0); 365 ret = __kfifo_from_user_data(fifo, from, len, 0, total);
366 if (ret)
367 return ret;
326 __kfifo_add_in(fifo, len); 368 __kfifo_add_in(fifo, len);
327 return len; 369 return 0;
328} 370}
329EXPORT_SYMBOL(kfifo_from_user); 371EXPORT_SYMBOL(kfifo_from_user);
330 372
@@ -339,17 +381,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
339 void __user *to, unsigned int len, unsigned int reclen, 381 void __user *to, unsigned int len, unsigned int reclen,
340 unsigned int recsize) 382 unsigned int recsize)
341{ 383{
342 unsigned int ret; 384 unsigned int ret, total;
343 385
344 if (kfifo_len(fifo) < reclen + recsize) 386 if (kfifo_len(fifo) < reclen + recsize)
345 return len; 387 return len;
346 388
347 ret = __kfifo_to_user_data(fifo, to, reclen, recsize); 389 ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
348 390
349 if (likely(ret == 0)) 391 if (likely(ret == 0))
350 __kfifo_add_out(fifo, reclen + recsize); 392 __kfifo_add_out(fifo, reclen + recsize);
351 393
352 return ret; 394 return total;
353} 395}
354EXPORT_SYMBOL(__kfifo_to_user_n); 396EXPORT_SYMBOL(__kfifo_to_user_n);
355 397
@@ -358,20 +400,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
358 * @fifo: the fifo to be used. 400 * @fifo: the fifo to be used.
359 * @to: where the data must be copied. 401 * @to: where the data must be copied.
360 * @len: the size of the destination buffer. 402 * @len: the size of the destination buffer.
403 * @lenout: pointer to output variable with copied data
361 * 404 *
362 * This function copies at most @len bytes from the FIFO into the 405 * This function copies at most @len bytes from the FIFO into the
363 * @to buffer and returns the number of copied bytes. 406 * @to buffer and 0 or -EFAULT.
364 * 407 *
365 * Note that with only one concurrent reader and one concurrent 408 * Note that with only one concurrent reader and one concurrent
366 * writer, you don't need extra locking to use these functions. 409 * writer, you don't need extra locking to use these functions.
367 */ 410 */
368unsigned int kfifo_to_user(struct kfifo *fifo, 411int kfifo_to_user(struct kfifo *fifo,
369 void __user *to, unsigned int len) 412 void __user *to, unsigned int len, unsigned *lenout)
370{ 413{
414 int ret;
371 len = min(kfifo_len(fifo), len); 415 len = min(kfifo_len(fifo), len);
372 len -= __kfifo_to_user_data(fifo, to, len, 0); 416 ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
373 __kfifo_add_out(fifo, len); 417 __kfifo_add_out(fifo, *lenout);
374 return len; 418 return ret;
375} 419}
376EXPORT_SYMBOL(kfifo_to_user); 420EXPORT_SYMBOL(kfifo_to_user);
377 421
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..761fdd2b3034 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -583,6 +583,9 @@ static void kgdb_wait(struct pt_regs *regs)
583 smp_wmb(); 583 smp_wmb();
584 atomic_set(&cpu_in_kgdb[cpu], 1); 584 atomic_set(&cpu_in_kgdb[cpu], 1);
585 585
586 /* Disable any cpu specific hw breakpoints */
587 kgdb_disable_hw_debug(regs);
588
586 /* Wait till primary CPU is done with debugging */ 589 /* Wait till primary CPU is done with debugging */
587 while (atomic_read(&passive_cpu_wait[cpu])) 590 while (atomic_read(&passive_cpu_wait[cpu]))
588 cpu_relax(); 591 cpu_relax();
@@ -596,7 +599,7 @@ static void kgdb_wait(struct pt_regs *regs)
596 599
597 /* Signal the primary CPU that we are done: */ 600 /* Signal the primary CPU that we are done: */
598 atomic_set(&cpu_in_kgdb[cpu], 0); 601 atomic_set(&cpu_in_kgdb[cpu], 0);
599 touch_softlockup_watchdog(); 602 touch_softlockup_watchdog_sync();
600 clocksource_touch_watchdog(); 603 clocksource_touch_watchdog();
601 local_irq_restore(flags); 604 local_irq_restore(flags);
602} 605}
@@ -1450,7 +1453,7 @@ acquirelock:
1450 (kgdb_info[cpu].task && 1453 (kgdb_info[cpu].task &&
1451 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { 1454 kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
1452 atomic_set(&kgdb_active, -1); 1455 atomic_set(&kgdb_active, -1);
1453 touch_softlockup_watchdog(); 1456 touch_softlockup_watchdog_sync();
1454 clocksource_touch_watchdog(); 1457 clocksource_touch_watchdog();
1455 local_irq_restore(flags); 1458 local_irq_restore(flags);
1456 1459
@@ -1550,7 +1553,7 @@ kgdb_restore:
1550 } 1553 }
1551 /* Free kgdb_active */ 1554 /* Free kgdb_active */
1552 atomic_set(&kgdb_active, -1); 1555 atomic_set(&kgdb_active, -1);
1553 touch_softlockup_watchdog(); 1556 touch_softlockup_watchdog_sync();
1554 clocksource_touch_watchdog(); 1557 clocksource_touch_watchdog();
1555 local_irq_restore(flags); 1558 local_irq_restore(flags);
1556 1559
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 25b103190364..bf0e231d9702 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -520,13 +520,15 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
520 return -ENOMEM; 520 return -ENOMEM;
521 521
522 ret = call_usermodehelper_stdinpipe(sub_info, filp); 522 ret = call_usermodehelper_stdinpipe(sub_info, filp);
523 if (ret < 0) 523 if (ret < 0) {
524 goto out; 524 call_usermodehelper_freeinfo(sub_info);
525 return ret;
526 }
525 527
526 return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); 528 ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
529 if (ret < 0) /* Failed to execute helper, close pipe */
530 filp_close(*filp, NULL);
527 531
528 out:
529 call_usermodehelper_freeinfo(sub_info);
530 return ret; 532 return ret;
531} 533}
532EXPORT_SYMBOL(call_usermodehelper_pipe); 534EXPORT_SYMBOL(call_usermodehelper_pipe);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e5342a344c43..b7df302a0204 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1035,7 +1035,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
1035 /* Pre-allocate memory for max kretprobe instances */ 1035 /* Pre-allocate memory for max kretprobe instances */
1036 if (rp->maxactive <= 0) { 1036 if (rp->maxactive <= 0) {
1037#ifdef CONFIG_PREEMPT 1037#ifdef CONFIG_PREEMPT
1038 rp->maxactive = max(10, 2 * num_possible_cpus()); 1038 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
1039#else 1039#else
1040 rp->maxactive = num_possible_cpus(); 1040 rp->maxactive = num_possible_cpus();
1041#endif 1041#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 5feaddcdbe49..c62ec14609b9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2147,7 +2147,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
2147 return ret; 2147 return ret;
2148 2148
2149 return print_irq_inversion_bug(curr, &root, target_entry, 2149 return print_irq_inversion_bug(curr, &root, target_entry,
2150 this, 1, irqclass); 2150 this, 0, irqclass);
2151} 2151}
2152 2152
2153void print_irqtrace_events(struct task_struct *curr) 2153void print_irqtrace_events(struct task_struct *curr)
diff --git a/kernel/module.c b/kernel/module.c
index e96b8ed1cb6a..f82386bd9ee9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1010,6 +1010,12 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
1010 * J. Corbet <corbet@lwn.net> 1010 * J. Corbet <corbet@lwn.net>
1011 */ 1011 */
1012#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) 1012#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
1013
1014static inline bool sect_empty(const Elf_Shdr *sect)
1015{
1016 return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
1017}
1018
1013struct module_sect_attr 1019struct module_sect_attr
1014{ 1020{
1015 struct module_attribute mattr; 1021 struct module_attribute mattr;
@@ -1051,8 +1057,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
1051 1057
1052 /* Count loaded sections and allocate structures */ 1058 /* Count loaded sections and allocate structures */
1053 for (i = 0; i < nsect; i++) 1059 for (i = 0; i < nsect; i++)
1054 if (sechdrs[i].sh_flags & SHF_ALLOC 1060 if (!sect_empty(&sechdrs[i]))
1055 && sechdrs[i].sh_size)
1056 nloaded++; 1061 nloaded++;
1057 size[0] = ALIGN(sizeof(*sect_attrs) 1062 size[0] = ALIGN(sizeof(*sect_attrs)
1058 + nloaded * sizeof(sect_attrs->attrs[0]), 1063 + nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1070,9 +1075,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
1070 sattr = &sect_attrs->attrs[0]; 1075 sattr = &sect_attrs->attrs[0];
1071 gattr = &sect_attrs->grp.attrs[0]; 1076 gattr = &sect_attrs->grp.attrs[0];
1072 for (i = 0; i < nsect; i++) { 1077 for (i = 0; i < nsect; i++) {
1073 if (! (sechdrs[i].sh_flags & SHF_ALLOC)) 1078 if (sect_empty(&sechdrs[i]))
1074 continue;
1075 if (!sechdrs[i].sh_size)
1076 continue; 1079 continue;
1077 sattr->address = sechdrs[i].sh_addr; 1080 sattr->address = sechdrs[i].sh_addr;
1078 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, 1081 sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
@@ -1156,7 +1159,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
1156 /* Count notes sections and allocate structures. */ 1159 /* Count notes sections and allocate structures. */
1157 notes = 0; 1160 notes = 0;
1158 for (i = 0; i < nsect; i++) 1161 for (i = 0; i < nsect; i++)
1159 if ((sechdrs[i].sh_flags & SHF_ALLOC) && 1162 if (!sect_empty(&sechdrs[i]) &&
1160 (sechdrs[i].sh_type == SHT_NOTE)) 1163 (sechdrs[i].sh_type == SHT_NOTE))
1161 ++notes; 1164 ++notes;
1162 1165
@@ -1172,7 +1175,7 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
1172 notes_attrs->notes = notes; 1175 notes_attrs->notes = notes;
1173 nattr = &notes_attrs->attrs[0]; 1176 nattr = &notes_attrs->attrs[0];
1174 for (loaded = i = 0; i < nsect; ++i) { 1177 for (loaded = i = 0; i < nsect; ++i) {
1175 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 1178 if (sect_empty(&sechdrs[i]))
1176 continue; 1179 continue;
1177 if (sechdrs[i].sh_type == SHT_NOTE) { 1180 if (sechdrs[i].sh_type == SHT_NOTE) {
1178 nattr->attr.name = mod->sect_attrs->attrs[loaded].name; 1181 nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
diff --git a/kernel/panic.c b/kernel/panic.c
index 5827f7b97254..c787333282b8 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -75,7 +75,6 @@ NORET_TYPE void panic(const char * fmt, ...)
75 dump_stack(); 75 dump_stack();
76#endif 76#endif
77 77
78 kmsg_dump(KMSG_DUMP_PANIC);
79 /* 78 /*
80 * If we have crashed and we have a crash kernel loaded let it handle 79 * If we have crashed and we have a crash kernel loaded let it handle
81 * everything else. 80 * everything else.
@@ -83,6 +82,8 @@ NORET_TYPE void panic(const char * fmt, ...)
83 */ 82 */
84 crash_kexec(NULL); 83 crash_kexec(NULL);
85 84
85 kmsg_dump(KMSG_DUMP_PANIC);
86
86 /* 87 /*
87 * Note smp_send_stop is the usual smp shutdown function, which 88 * Note smp_send_stop is the usual smp shutdown function, which
88 * unfortunately means it may not be hardened to work in a panic 89 * unfortunately means it may not be hardened to work in a panic
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 1f38270f08c7..d27746bd3a06 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3268,6 +3268,9 @@ static void perf_event_task_output(struct perf_event *event,
3268 3268
3269static int perf_event_task_match(struct perf_event *event) 3269static int perf_event_task_match(struct perf_event *event)
3270{ 3270{
3271 if (event->state != PERF_EVENT_STATE_ACTIVE)
3272 return 0;
3273
3271 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3274 if (event->cpu != -1 && event->cpu != smp_processor_id())
3272 return 0; 3275 return 0;
3273 3276
@@ -3377,6 +3380,9 @@ static void perf_event_comm_output(struct perf_event *event,
3377 3380
3378static int perf_event_comm_match(struct perf_event *event) 3381static int perf_event_comm_match(struct perf_event *event)
3379{ 3382{
3383 if (event->state != PERF_EVENT_STATE_ACTIVE)
3384 return 0;
3385
3380 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3386 if (event->cpu != -1 && event->cpu != smp_processor_id())
3381 return 0; 3387 return 0;
3382 3388
@@ -3494,6 +3500,9 @@ static void perf_event_mmap_output(struct perf_event *event,
3494static int perf_event_mmap_match(struct perf_event *event, 3500static int perf_event_mmap_match(struct perf_event *event,
3495 struct perf_mmap_event *mmap_event) 3501 struct perf_mmap_event *mmap_event)
3496{ 3502{
3503 if (event->state != PERF_EVENT_STATE_ACTIVE)
3504 return 0;
3505
3497 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3506 if (event->cpu != -1 && event->cpu != smp_processor_id())
3498 return 0; 3507 return 0;
3499 3508
@@ -5148,7 +5157,7 @@ int perf_event_init_task(struct task_struct *child)
5148 GFP_KERNEL); 5157 GFP_KERNEL);
5149 if (!child_ctx) { 5158 if (!child_ctx) {
5150 ret = -ENOMEM; 5159 ret = -ENOMEM;
5151 goto exit; 5160 break;
5152 } 5161 }
5153 5162
5154 __perf_event_init_context(child_ctx, child); 5163 __perf_event_init_context(child_ctx, child);
@@ -5164,7 +5173,7 @@ int perf_event_init_task(struct task_struct *child)
5164 } 5173 }
5165 } 5174 }
5166 5175
5167 if (inherited_all) { 5176 if (child_ctx && inherited_all) {
5168 /* 5177 /*
5169 * Mark the child context as a clone of the parent 5178 * Mark the child context as a clone of the parent
5170 * context, or of whatever the parent is a clone of. 5179 * context, or of whatever the parent is a clone of.
@@ -5184,7 +5193,6 @@ int perf_event_init_task(struct task_struct *child)
5184 get_ctx(child_ctx->parent_ctx); 5193 get_ctx(child_ctx->parent_ctx);
5185 } 5194 }
5186 5195
5187exit:
5188 mutex_unlock(&parent_ctx->mutex); 5196 mutex_unlock(&parent_ctx->mutex);
5189 5197
5190 perf_unpin_context(parent_ctx); 5198 perf_unpin_context(parent_ctx);
diff --git a/kernel/printk.c b/kernel/printk.c
index 17463ca2e229..1751c456b71f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1467,6 +1467,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
1467static const char const *kmsg_reasons[] = { 1467static const char const *kmsg_reasons[] = {
1468 [KMSG_DUMP_OOPS] = "oops", 1468 [KMSG_DUMP_OOPS] = "oops",
1469 [KMSG_DUMP_PANIC] = "panic", 1469 [KMSG_DUMP_PANIC] = "panic",
1470 [KMSG_DUMP_KEXEC] = "kexec",
1470}; 1471};
1471 1472
1472static const char *kmsg_to_str(enum kmsg_dump_reason reason) 1473static const char *kmsg_to_str(enum kmsg_dump_reason reason)
diff --git a/kernel/sched.c b/kernel/sched.c
index 6cee227b1459..f96be9370b75 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2286,14 +2286,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
2286} 2286}
2287 2287
2288/* 2288/*
2289 * Called from: 2289 * Gets called from 3 sites (exec, fork, wakeup), since it is called without
2290 * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
2291 * by:
2290 * 2292 *
2291 * - fork, @p is stable because it isn't on the tasklist yet 2293 * exec: is unstable, retry loop
2292 * 2294 * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
2293 * - exec, @p is unstable, retry loop
2294 *
2295 * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
2296 * we should be good.
2297 */ 2295 */
2298static inline 2296static inline
2299int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) 2297int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
@@ -2586,9 +2584,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
2586 if (p->sched_class->task_fork) 2584 if (p->sched_class->task_fork)
2587 p->sched_class->task_fork(p); 2585 p->sched_class->task_fork(p);
2588 2586
2589#ifdef CONFIG_SMP
2590 cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
2591#endif
2592 set_task_cpu(p, cpu); 2587 set_task_cpu(p, cpu);
2593 2588
2594#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2589#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2618,6 +2613,21 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2618{ 2613{
2619 unsigned long flags; 2614 unsigned long flags;
2620 struct rq *rq; 2615 struct rq *rq;
2616 int cpu __maybe_unused = get_cpu();
2617
2618#ifdef CONFIG_SMP
2619 /*
2620 * Fork balancing, do it here and not earlier because:
2621 * - cpus_allowed can change in the fork path
2622 * - any previously selected cpu might disappear through hotplug
2623 *
2624 * We still have TASK_WAKING but PF_STARTING is gone now, meaning
2625 * ->cpus_allowed is stable, we have preemption disabled, meaning
2626 * cpu_online_mask is stable.
2627 */
2628 cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
2629 set_task_cpu(p, cpu);
2630#endif
2621 2631
2622 rq = task_rq_lock(p, &flags); 2632 rq = task_rq_lock(p, &flags);
2623 BUG_ON(p->state != TASK_WAKING); 2633 BUG_ON(p->state != TASK_WAKING);
@@ -2631,6 +2641,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2631 p->sched_class->task_woken(rq, p); 2641 p->sched_class->task_woken(rq, p);
2632#endif 2642#endif
2633 task_rq_unlock(rq, &flags); 2643 task_rq_unlock(rq, &flags);
2644 put_cpu();
2634} 2645}
2635 2646
2636#ifdef CONFIG_PREEMPT_NOTIFIERS 2647#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -3687,8 +3698,11 @@ need_resched_nonpreemptible:
3687 3698
3688 post_schedule(rq); 3699 post_schedule(rq);
3689 3700
3690 if (unlikely(reacquire_kernel_lock(current) < 0)) 3701 if (unlikely(reacquire_kernel_lock(current) < 0)) {
3702 prev = rq->curr;
3703 switch_count = &prev->nivcsw;
3691 goto need_resched_nonpreemptible; 3704 goto need_resched_nonpreemptible;
3705 }
3692 3706
3693 preempt_enable_no_resched(); 3707 preempt_enable_no_resched();
3694 if (need_resched()) 3708 if (need_resched())
@@ -5293,14 +5307,18 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5293 * the ->cpus_allowed mask from under waking tasks, which would be 5307 * the ->cpus_allowed mask from under waking tasks, which would be
5294 * possible when we change rq->lock in ttwu(), so synchronize against 5308 * possible when we change rq->lock in ttwu(), so synchronize against
5295 * TASK_WAKING to avoid that. 5309 * TASK_WAKING to avoid that.
5310 *
5311 * Make an exception for freshly cloned tasks, since cpuset namespaces
5312 * might move the task about, we have to validate the target in
5313 * wake_up_new_task() anyway since the cpu might have gone away.
5296 */ 5314 */
5297again: 5315again:
5298 while (p->state == TASK_WAKING) 5316 while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
5299 cpu_relax(); 5317 cpu_relax();
5300 5318
5301 rq = task_rq_lock(p, &flags); 5319 rq = task_rq_lock(p, &flags);
5302 5320
5303 if (p->state == TASK_WAKING) { 5321 if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
5304 task_rq_unlock(rq, &flags); 5322 task_rq_unlock(rq, &flags);
5305 goto again; 5323 goto again;
5306 } 5324 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0e7a7af9cf8b..b45abbe55067 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1509,7 +1509,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
1509 * If there's an idle sibling in this domain, make that 1509 * If there's an idle sibling in this domain, make that
1510 * the wake_affine target instead of the current cpu. 1510 * the wake_affine target instead of the current cpu.
1511 */ 1511 */
1512 if (tmp->flags & SD_PREFER_SIBLING) 1512 if (tmp->flags & SD_SHARE_PKG_RESOURCES)
1513 target = select_idle_sibling(p, tmp, target); 1513 target = select_idle_sibling(p, tmp, target);
1514 1514
1515 if (target >= 0) { 1515 if (target >= 0) {
diff --git a/kernel/signal.c b/kernel/signal.c
index d09692b40376..934ae5e687b9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -979,7 +979,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
979 for (i = 0; i < 16; i++) { 979 for (i = 0; i < 16; i++) {
980 unsigned char insn; 980 unsigned char insn;
981 981
982 __get_user(insn, (unsigned char *)(regs->ip + i)); 982 if (get_user(insn, (unsigned char *)(regs->ip + i)))
983 break;
983 printk("%02x ", insn); 984 printk("%02x ", insn);
984 } 985 }
985 } 986 }
diff --git a/kernel/smp.c b/kernel/smp.c
index de735a6637d0..f10408422444 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -347,7 +347,7 @@ int smp_call_function_any(const struct cpumask *mask,
347 goto call; 347 goto call;
348 348
349 /* Try for same node. */ 349 /* Try for same node. */
350 nodemask = cpumask_of_node(cpu); 350 nodemask = cpumask_of_node(cpu_to_node(cpu));
351 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; 351 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
352 cpu = cpumask_next_and(cpu, nodemask, mask)) { 352 cpu = cpumask_next_and(cpu, nodemask, mask)) {
353 if (cpu_online(cpu)) 353 if (cpu_online(cpu))
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
25static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */ 25static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
26static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */ 26static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
27static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); 27static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
28static DEFINE_PER_CPU(bool, softlock_touch_sync);
28 29
29static int __read_mostly did_panic; 30static int __read_mostly did_panic;
30int __read_mostly softlockup_thresh = 60; 31int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
79} 80}
80EXPORT_SYMBOL(touch_softlockup_watchdog); 81EXPORT_SYMBOL(touch_softlockup_watchdog);
81 82
83void touch_softlockup_watchdog_sync(void)
84{
85 __raw_get_cpu_var(softlock_touch_sync) = true;
86 __raw_get_cpu_var(softlockup_touch_ts) = 0;
87}
88
82void touch_all_softlockup_watchdogs(void) 89void touch_all_softlockup_watchdogs(void)
83{ 90{
84 int cpu; 91 int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
118 } 125 }
119 126
120 if (touch_ts == 0) { 127 if (touch_ts == 0) {
128 if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
129 /*
130 * If the time stamp was touched atomically
131 * make sure the scheduler tick is up to date.
132 */
133 per_cpu(softlock_touch_sync, this_cpu) = false;
134 sched_clock_tick();
135 }
121 __touch_softlockup_watchdog(); 136 __touch_softlockup_watchdog();
122 return; 137 return;
123 } 138 }
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 6f740d9f0948..d7395fdfb9f3 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -259,7 +259,8 @@ void clockevents_notify(unsigned long reason, void *arg)
259 cpu = *((int *)arg); 259 cpu = *((int *)arg);
260 list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { 260 list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
261 if (cpumask_test_cpu(cpu, dev->cpumask) && 261 if (cpumask_test_cpu(cpu, dev->cpumask) &&
262 cpumask_weight(dev->cpumask) == 1) { 262 cpumask_weight(dev->cpumask) == 1 &&
263 !tick_is_broadcast_device(dev)) {
263 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 264 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
264 list_del(&dev->list); 265 list_del(&dev->list);
265 } 266 }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e85c23404d34..13700833c181 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -343,7 +343,19 @@ static void clocksource_resume_watchdog(void)
343{ 343{
344 unsigned long flags; 344 unsigned long flags;
345 345
346 spin_lock_irqsave(&watchdog_lock, flags); 346 /*
347 * We use trylock here to avoid a potential dead lock when
348 * kgdb calls this code after the kernel has been stopped with
349 * watchdog_lock held. When watchdog_lock is held we just
350 * return and accept, that the watchdog might trigger and mark
351 * the monitored clock source (usually TSC) unstable.
352 *
353 * This does not affect the other caller clocksource_resume()
354 * because at this point the kernel is UP, interrupts are
355 * disabled and nothing can hold watchdog_lock.
356 */
357 if (!spin_trylock_irqsave(&watchdog_lock, flags))
358 return;
347 clocksource_reset_watchdog(); 359 clocksource_reset_watchdog();
348 spin_unlock_irqrestore(&watchdog_lock, flags); 360 spin_unlock_irqrestore(&watchdog_lock, flags);
349} 361}
@@ -458,8 +470,8 @@ void clocksource_resume(void)
458 * clocksource_touch_watchdog - Update watchdog 470 * clocksource_touch_watchdog - Update watchdog
459 * 471 *
460 * Update the watchdog after exception contexts such as kgdb so as not 472 * Update the watchdog after exception contexts such as kgdb so as not
461 * to incorrectly trip the watchdog. 473 * to incorrectly trip the watchdog. This might fail when the kernel
462 * 474 * was stopped in code which holds watchdog_lock.
463 */ 475 */
464void clocksource_touch_watchdog(void) 476void clocksource_touch_watchdog(void)
465{ 477{
diff --git a/kernel/timer.c b/kernel/timer.c
index 15533b792397..c61a7949387f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1198,6 +1198,7 @@ void update_process_times(int user_tick)
1198 run_local_timers(); 1198 run_local_timers();
1199 rcu_check_callbacks(cpu, user_tick); 1199 rcu_check_callbacks(cpu, user_tick);
1200 printk_tick(); 1200 printk_tick();
1201 perf_event_do_pending();
1201 scheduler_tick(); 1202 scheduler_tick();
1202 run_posix_cpu_timers(p); 1203 run_posix_cpu_timers(p);
1203} 1204}
@@ -1209,8 +1210,6 @@ static void run_timer_softirq(struct softirq_action *h)
1209{ 1210{
1210 struct tvec_base *base = __get_cpu_var(tvec_bases); 1211 struct tvec_base *base = __get_cpu_var(tvec_bases);
1211 1212
1212 perf_event_do_pending();
1213
1214 hrtimer_run_pending(); 1213 hrtimer_run_pending();
1215 1214
1216 if (time_after_eq(jiffies, base->timer_jiffies)) 1215 if (time_after_eq(jiffies, base->timer_jiffies))
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d006554888dc..60e2ce0181ee 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -12,39 +12,37 @@ config NOP_TRACER
12config HAVE_FTRACE_NMI_ENTER 12config HAVE_FTRACE_NMI_ENTER
13 bool 13 bool
14 help 14 help
15 See Documentation/trace/ftrace-implementation.txt 15 See Documentation/trace/ftrace-design.txt
16 16
17config HAVE_FUNCTION_TRACER 17config HAVE_FUNCTION_TRACER
18 bool 18 bool
19 help 19 help
20 See Documentation/trace/ftrace-implementation.txt 20 See Documentation/trace/ftrace-design.txt
21 21
22config HAVE_FUNCTION_GRAPH_TRACER 22config HAVE_FUNCTION_GRAPH_TRACER
23 bool 23 bool
24 help 24 help
25 See Documentation/trace/ftrace-implementation.txt 25 See Documentation/trace/ftrace-design.txt
26 26
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
36 help 34 help
37 See Documentation/trace/ftrace-implementation.txt 35 See Documentation/trace/ftrace-design.txt
38 36
39config HAVE_DYNAMIC_FTRACE 37config HAVE_DYNAMIC_FTRACE
40 bool 38 bool
41 help 39 help
42 See Documentation/trace/ftrace-implementation.txt 40 See Documentation/trace/ftrace-design.txt
43 41
44config HAVE_FTRACE_MCOUNT_RECORD 42config HAVE_FTRACE_MCOUNT_RECORD
45 bool 43 bool
46 help 44 help
47 See Documentation/trace/ftrace-implementation.txt 45 See Documentation/trace/ftrace-design.txt
48 46
49config HAVE_HW_BRANCH_TRACER 47config HAVE_HW_BRANCH_TRACER
50 bool 48 bool
@@ -52,7 +50,7 @@ config HAVE_HW_BRANCH_TRACER
52config HAVE_SYSCALL_TRACEPOINTS 50config HAVE_SYSCALL_TRACEPOINTS
53 bool 51 bool
54 help 52 help
55 See Documentation/trace/ftrace-implementation.txt 53 See Documentation/trace/ftrace-design.txt
56 54
57config TRACER_MAX_TRACE 55config TRACER_MAX_TRACE
58 bool 56 bool
@@ -83,7 +81,7 @@ config RING_BUFFER_ALLOW_SWAP
83# This allows those options to appear when no other tracer is selected. But the 81# This allows those options to appear when no other tracer is selected. But the
84# options do not appear when something else selects it. We need the two options 82# options do not appear when something else selects it. We need the two options
85# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the 83# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
86# hidding of the automatic options. 84# hiding of the automatic options.
87 85
88config TRACING 86config TRACING
89 bool 87 bool
@@ -119,7 +117,7 @@ menuconfig FTRACE
119 bool "Tracers" 117 bool "Tracers"
120 default y if DEBUG_KERNEL 118 default y if DEBUG_KERNEL
121 help 119 help
122 Enable the kernel tracing infrastructure. 120 Enable the kernel tracing infrastructure.
123 121
124if FTRACE 122if FTRACE
125 123
@@ -133,7 +131,7 @@ config FUNCTION_TRACER
133 help 131 help
134 Enable the kernel to trace every kernel function. This is done 132 Enable the kernel to trace every kernel function. This is done
135 by using a compiler feature to insert a small, 5-byte No-Operation 133 by using a compiler feature to insert a small, 5-byte No-Operation
136 instruction to the beginning of every kernel function, which NOP 134 instruction at the beginning of every kernel function, which NOP
137 sequence is then dynamically patched into a tracer call when 135 sequence is then dynamically patched into a tracer call when
138 tracing is enabled by the administrator. If it's runtime disabled 136 tracing is enabled by the administrator. If it's runtime disabled
139 (the bootup default), then the overhead of the instructions is very 137 (the bootup default), then the overhead of the instructions is very
@@ -150,7 +148,7 @@ config FUNCTION_GRAPH_TRACER
150 and its entry. 148 and its entry.
151 Its first purpose is to trace the duration of functions and 149 Its first purpose is to trace the duration of functions and
152 draw a call graph for each thread with some information like 150 draw a call graph for each thread with some information like
153 the return value. This is done by setting the current return 151 the return value. This is done by setting the current return
154 address on the current task structure into a stack of calls. 152 address on the current task structure into a stack of calls.
155 153
156 154
@@ -173,7 +171,7 @@ config IRQSOFF_TRACER
173 171
174 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 172 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
175 173
176 (Note that kernel size and overhead increases with this option 174 (Note that kernel size and overhead increase with this option
177 enabled. This option and the preempt-off timing option can be 175 enabled. This option and the preempt-off timing option can be
178 used together or separately.) 176 used together or separately.)
179 177
@@ -186,7 +184,7 @@ config PREEMPT_TRACER
186 select TRACER_MAX_TRACE 184 select TRACER_MAX_TRACE
187 select RING_BUFFER_ALLOW_SWAP 185 select RING_BUFFER_ALLOW_SWAP
188 help 186 help
189 This option measures the time spent in preemption off critical 187 This option measures the time spent in preemption-off critical
190 sections, with microsecond accuracy. 188 sections, with microsecond accuracy.
191 189
192 The default measurement method is a maximum search, which is 190 The default measurement method is a maximum search, which is
@@ -195,7 +193,7 @@ config PREEMPT_TRACER
195 193
196 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency 194 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
197 195
198 (Note that kernel size and overhead increases with this option 196 (Note that kernel size and overhead increase with this option
199 enabled. This option and the irqs-off timing option can be 197 enabled. This option and the irqs-off timing option can be
200 used together or separately.) 198 used together or separately.)
201 199
@@ -222,7 +220,7 @@ config ENABLE_DEFAULT_TRACERS
222 depends on !GENERIC_TRACER 220 depends on !GENERIC_TRACER
223 select TRACING 221 select TRACING
224 help 222 help
225 This tracer hooks to various trace points in the kernel 223 This tracer hooks to various trace points in the kernel,
226 allowing the user to pick and choose which trace point they 224 allowing the user to pick and choose which trace point they
227 want to trace. It also includes the sched_switch tracer plugin. 225 want to trace. It also includes the sched_switch tracer plugin.
228 226
@@ -265,19 +263,19 @@ choice
265 The likely/unlikely profiler only looks at the conditions that 263 The likely/unlikely profiler only looks at the conditions that
266 are annotated with a likely or unlikely macro. 264 are annotated with a likely or unlikely macro.
267 265
268 The "all branch" profiler will profile every if statement in the 266 The "all branch" profiler will profile every if-statement in the
269 kernel. This profiler will also enable the likely/unlikely 267 kernel. This profiler will also enable the likely/unlikely
270 profiler as well. 268 profiler.
271 269
272 Either of the above profilers add a bit of overhead to the system. 270 Either of the above profilers adds a bit of overhead to the system.
273 If unsure choose "No branch profiling". 271 If unsure, choose "No branch profiling".
274 272
275config BRANCH_PROFILE_NONE 273config BRANCH_PROFILE_NONE
276 bool "No branch profiling" 274 bool "No branch profiling"
277 help 275 help
278 No branch profiling. Branch profiling adds a bit of overhead. 276 No branch profiling. Branch profiling adds a bit of overhead.
279 Only enable it if you want to analyse the branching behavior. 277 Only enable it if you want to analyse the branching behavior.
280 Otherwise keep it disabled. 278 Otherwise keep it disabled.
281 279
282config PROFILE_ANNOTATED_BRANCHES 280config PROFILE_ANNOTATED_BRANCHES
283 bool "Trace likely/unlikely profiler" 281 bool "Trace likely/unlikely profiler"
@@ -288,7 +286,7 @@ config PROFILE_ANNOTATED_BRANCHES
288 286
289 /sys/kernel/debug/tracing/profile_annotated_branch 287 /sys/kernel/debug/tracing/profile_annotated_branch
290 288
291 Note: this will add a significant overhead, only turn this 289 Note: this will add a significant overhead; only turn this
292 on if you need to profile the system's use of these macros. 290 on if you need to profile the system's use of these macros.
293 291
294config PROFILE_ALL_BRANCHES 292config PROFILE_ALL_BRANCHES
@@ -305,7 +303,7 @@ config PROFILE_ALL_BRANCHES
305 303
306 This configuration, when enabled, will impose a great overhead 304 This configuration, when enabled, will impose a great overhead
307 on the system. This should only be enabled when the system 305 on the system. This should only be enabled when the system
308 is to be analyzed 306 is to be analyzed in much detail.
309endchoice 307endchoice
310 308
311config TRACING_BRANCHES 309config TRACING_BRANCHES
@@ -335,7 +333,7 @@ config POWER_TRACER
335 depends on X86 333 depends on X86
336 select GENERIC_TRACER 334 select GENERIC_TRACER
337 help 335 help
338 This tracer helps developers to analyze and optimize the kernels 336 This tracer helps developers to analyze and optimize the kernel's
339 power management decisions, specifically the C-state and P-state 337 power management decisions, specifically the C-state and P-state
340 behavior. 338 behavior.
341 339
@@ -391,14 +389,14 @@ config HW_BRANCH_TRACER
391 select GENERIC_TRACER 389 select GENERIC_TRACER
392 help 390 help
393 This tracer records all branches on the system in a circular 391 This tracer records all branches on the system in a circular
394 buffer giving access to the last N branches for each cpu. 392 buffer, giving access to the last N branches for each cpu.
395 393
396config KMEMTRACE 394config KMEMTRACE
397 bool "Trace SLAB allocations" 395 bool "Trace SLAB allocations"
398 select GENERIC_TRACER 396 select GENERIC_TRACER
399 help 397 help
400 kmemtrace provides tracing for slab allocator functions, such as 398 kmemtrace provides tracing for slab allocator functions, such as
401 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected 399 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
402 data is then fed to the userspace application in order to analyse 400 data is then fed to the userspace application in order to analyse
403 allocation hotspots, internal fragmentation and so on, making it 401 allocation hotspots, internal fragmentation and so on, making it
404 possible to see how well an allocator performs, as well as debug 402 possible to see how well an allocator performs, as well as debug
@@ -417,15 +415,15 @@ config WORKQUEUE_TRACER
417 bool "Trace workqueues" 415 bool "Trace workqueues"
418 select GENERIC_TRACER 416 select GENERIC_TRACER
419 help 417 help
420 The workqueue tracer provides some statistical informations 418 The workqueue tracer provides some statistical information
421 about each cpu workqueue thread such as the number of the 419 about each cpu workqueue thread such as the number of the
422 works inserted and executed since their creation. It can help 420 works inserted and executed since their creation. It can help
423 to evaluate the amount of work each of them have to perform. 421 to evaluate the amount of work each of them has to perform.
424 For example it can help a developer to decide whether he should 422 For example it can help a developer to decide whether he should
425 choose a per cpu workqueue instead of a singlethreaded one. 423 choose a per-cpu workqueue instead of a singlethreaded one.
426 424
427config BLK_DEV_IO_TRACE 425config BLK_DEV_IO_TRACE
428 bool "Support for tracing block io actions" 426 bool "Support for tracing block IO actions"
429 depends on SYSFS 427 depends on SYSFS
430 depends on BLOCK 428 depends on BLOCK
431 select RELAY 429 select RELAY
@@ -456,15 +454,15 @@ config KPROBE_EVENT
456 select TRACING 454 select TRACING
457 default y 455 default y
458 help 456 help
459 This allows the user to add tracing events (similar to tracepoints) on the fly 457 This allows the user to add tracing events (similar to tracepoints)
460 via the ftrace interface. See Documentation/trace/kprobetrace.txt 458 on the fly via the ftrace interface. See
461 for more details. 459 Documentation/trace/kprobetrace.txt for more details.
462 460
463 Those events can be inserted wherever kprobes can probe, and record 461 Those events can be inserted wherever kprobes can probe, and record
464 various register and memory values. 462 various register and memory values.
465 463
466 This option is also required by perf-probe subcommand of perf tools. If 464 This option is also required by perf-probe subcommand of perf tools.
467 you want to use perf tools, this option is strongly recommended. 465 If you want to use perf tools, this option is strongly recommended.
468 466
469config DYNAMIC_FTRACE 467config DYNAMIC_FTRACE
470 bool "enable/disable ftrace tracepoints dynamically" 468 bool "enable/disable ftrace tracepoints dynamically"
@@ -472,32 +470,32 @@ config DYNAMIC_FTRACE
472 depends on HAVE_DYNAMIC_FTRACE 470 depends on HAVE_DYNAMIC_FTRACE
473 default y 471 default y
474 help 472 help
475 This option will modify all the calls to ftrace dynamically 473 This option will modify all the calls to ftrace dynamically
476 (will patch them out of the binary image and replaces them 474 (will patch them out of the binary image and replace them
477 with a No-Op instruction) as they are called. A table is 475 with a No-Op instruction) as they are called. A table is
478 created to dynamically enable them again. 476 created to dynamically enable them again.
479 477
480 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise 478 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
481 has native performance as long as no tracing is active. 479 otherwise has native performance as long as no tracing is active.
482 480
483 The changes to the code are done by a kernel thread that 481 The changes to the code are done by a kernel thread that
484 wakes up once a second and checks to see if any ftrace calls 482 wakes up once a second and checks to see if any ftrace calls
485 were made. If so, it runs stop_machine (stops all CPUS) 483 were made. If so, it runs stop_machine (stops all CPUS)
486 and modifies the code to jump over the call to ftrace. 484 and modifies the code to jump over the call to ftrace.
487 485
488config FUNCTION_PROFILER 486config FUNCTION_PROFILER
489 bool "Kernel function profiler" 487 bool "Kernel function profiler"
490 depends on FUNCTION_TRACER 488 depends on FUNCTION_TRACER
491 default n 489 default n
492 help 490 help
493 This option enables the kernel function profiler. A file is created 491 This option enables the kernel function profiler. A file is created
494 in debugfs called function_profile_enabled which defaults to zero. 492 in debugfs called function_profile_enabled which defaults to zero.
495 When a 1 is echoed into this file profiling begins, and when a 493 When a 1 is echoed into this file profiling begins, and when a
496 zero is entered, profiling stops. A file in the trace_stats 494 zero is entered, profiling stops. A "functions" file is created in
497 directory called functions, that show the list of functions that 495 the trace_stats directory; this file shows the list of functions that
498 have been hit and their counters. 496 have been hit and their counters.
499 497
500 If in doubt, say N 498 If in doubt, say N.
501 499
502config FTRACE_MCOUNT_RECORD 500config FTRACE_MCOUNT_RECORD
503 def_bool y 501 def_bool y
@@ -556,8 +554,8 @@ config RING_BUFFER_BENCHMARK
556 tristate "Ring buffer benchmark stress tester" 554 tristate "Ring buffer benchmark stress tester"
557 depends on RING_BUFFER 555 depends on RING_BUFFER
558 help 556 help
559 This option creates a test to stress the ring buffer and bench mark it. 557 This option creates a test to stress the ring buffer and benchmark it.
560 It creates its own ring buffer such that it will not interfer with 558 It creates its own ring buffer such that it will not interfere with
561 any other users of the ring buffer (such as ftrace). It then creates 559 any other users of the ring buffer (such as ftrace). It then creates
562 a producer and consumer that will run for 10 seconds and sleep for 560 a producer and consumer that will run for 10 seconds and sleep for
563 10 seconds. Each interval it will print out the number of events 561 10 seconds. Each interval it will print out the number of events
@@ -566,7 +564,7 @@ config RING_BUFFER_BENCHMARK
566 It does not disable interrupts or raise its priority, so it may be 564 It does not disable interrupts or raise its priority, so it may be
567 affected by processes that are running. 565 affected by processes that are running.
568 566
569 If unsure, say N 567 If unsure, say N.
570 568
571endif # FTRACE 569endif # FTRACE
572 570
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7968762c8167..1e6640f80454 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1690,7 +1690,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1690static int ftrace_match(char *str, char *regex, int len, int type) 1690static int ftrace_match(char *str, char *regex, int len, int type)
1691{ 1691{
1692 int matched = 0; 1692 int matched = 0;
1693 char *ptr; 1693 int slen;
1694 1694
1695 switch (type) { 1695 switch (type) {
1696 case MATCH_FULL: 1696 case MATCH_FULL:
@@ -1706,8 +1706,8 @@ static int ftrace_match(char *str, char *regex, int len, int type)
1706 matched = 1; 1706 matched = 1;
1707 break; 1707 break;
1708 case MATCH_END_ONLY: 1708 case MATCH_END_ONLY:
1709 ptr = strstr(str, regex); 1709 slen = strlen(str);
1710 if (ptr && (ptr[len] == 0)) 1710 if (slen >= len && memcmp(str + slen - len, regex, len) == 0)
1711 matched = 1; 1711 matched = 1;
1712 break; 1712 break;
1713 } 1713 }
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04c95c4..8c1b2d290718 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -464,6 +464,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 464 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 465 unsigned long head;
466 struct buffer_page *head_page; 466 struct buffer_page *head_page;
467 struct buffer_page *cache_reader_page;
468 unsigned long cache_read;
467 u64 read_stamp; 469 u64 read_stamp;
468}; 470};
469 471
@@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2716 iter->read_stamp = cpu_buffer->read_stamp; 2718 iter->read_stamp = cpu_buffer->read_stamp;
2717 else 2719 else
2718 iter->read_stamp = iter->head_page->page->time_stamp; 2720 iter->read_stamp = iter->head_page->page->time_stamp;
2721 iter->cache_reader_page = cpu_buffer->reader_page;
2722 iter->cache_read = cpu_buffer->read;
2719} 2723}
2720 2724
2721/** 2725/**
@@ -2869,7 +2873,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2869 * Splice the empty reader page into the list around the head. 2873 * Splice the empty reader page into the list around the head.
2870 */ 2874 */
2871 reader = rb_set_head_page(cpu_buffer); 2875 reader = rb_set_head_page(cpu_buffer);
2872 cpu_buffer->reader_page->list.next = reader->list.next; 2876 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
2873 cpu_buffer->reader_page->list.prev = reader->list.prev; 2877 cpu_buffer->reader_page->list.prev = reader->list.prev;
2874 2878
2875 /* 2879 /*
@@ -2906,7 +2910,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2906 * 2910 *
2907 * Now make the new head point back to the reader page. 2911 * Now make the new head point back to the reader page.
2908 */ 2912 */
2909 reader->list.next->prev = &cpu_buffer->reader_page->list; 2913 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
2910 rb_inc_page(cpu_buffer, &cpu_buffer->head_page); 2914 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
2911 2915
2912 /* Finally update the reader page to the new head */ 2916 /* Finally update the reader page to the new head */
@@ -3060,13 +3064,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3060 struct ring_buffer_event *event; 3064 struct ring_buffer_event *event;
3061 int nr_loops = 0; 3065 int nr_loops = 0;
3062 3066
3063 if (ring_buffer_iter_empty(iter))
3064 return NULL;
3065
3066 cpu_buffer = iter->cpu_buffer; 3067 cpu_buffer = iter->cpu_buffer;
3067 buffer = cpu_buffer->buffer; 3068 buffer = cpu_buffer->buffer;
3068 3069
3070 /*
3071 * Check if someone performed a consuming read to
3072 * the buffer. A consuming read invalidates the iterator
3073 * and we need to reset the iterator in this case.
3074 */
3075 if (unlikely(iter->cache_read != cpu_buffer->read ||
3076 iter->cache_reader_page != cpu_buffer->reader_page))
3077 rb_iter_reset(iter);
3078
3069 again: 3079 again:
3080 if (ring_buffer_iter_empty(iter))
3081 return NULL;
3082
3070 /* 3083 /*
3071 * We repeat when a timestamp is encountered. 3084 * We repeat when a timestamp is encountered.
3072 * We can get multiple timestamps by nested interrupts or also 3085 * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3081 if (rb_per_cpu_empty(cpu_buffer)) 3094 if (rb_per_cpu_empty(cpu_buffer))
3082 return NULL; 3095 return NULL;
3083 3096
3097 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3098 rb_inc_iter(iter);
3099 goto again;
3100 }
3101
3084 event = rb_iter_head_event(iter); 3102 event = rb_iter_head_event(iter);
3085 3103
3086 switch (event->type_len) { 3104 switch (event->type_len) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8b9f20ab8eed..eac6875cb990 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[])
951 return; 951 return;
952 } 952 }
953 953
954 if (WARN_ON_ONCE(pid < 0)) {
955 strcpy(comm, "<XXX>");
956 return;
957 }
958
954 if (pid > PID_MAX_DEFAULT) { 959 if (pid > PID_MAX_DEFAULT) {
955 strcpy(comm, "<...>"); 960 strcpy(comm, "<...>");
956 return; 961 return;
@@ -3949,7 +3954,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3949 if (!!(topt->flags->val & topt->opt->bit) != val) { 3954 if (!!(topt->flags->val & topt->opt->bit) != val) {
3950 mutex_lock(&trace_types_lock); 3955 mutex_lock(&trace_types_lock);
3951 ret = __set_tracer_option(current_trace, topt->flags, 3956 ret = __set_tracer_option(current_trace, topt->flags,
3952 topt->opt, val); 3957 topt->opt, !val);
3953 mutex_unlock(&trace_types_lock); 3958 mutex_unlock(&trace_types_lock);
3954 if (ret) 3959 if (ret)
3955 return ret; 3960 return ret;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..e42af9aad69f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
211{ 211{
212 char **addr = (char **)(event + pred->offset); 212 char **addr = (char **)(event + pred->offset);
213 int cmp, match; 213 int cmp, match;
214 int len = strlen(*addr) + 1; /* including tailing '\0' */
214 215
215 cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); 216 cmp = pred->regex.match(*addr, &pred->regex, len);
216 217
217 match = cmp ^ pred->not; 218 match = cmp ^ pred->not;
218 219
@@ -251,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
251 return 0; 252 return 0;
252} 253}
253 254
254/* Basic regex callbacks */ 255/*
256 * regex_match_foo - Basic regex callbacks
257 *
258 * @str: the string to be searched
259 * @r: the regex structure containing the pattern string
260 * @len: the length of the string to be searched (including '\0')
261 *
262 * Note:
263 * - @str might not be NULL-terminated if it's of type DYN_STRING
264 * or STATIC_STRING
265 */
266
255static int regex_match_full(char *str, struct regex *r, int len) 267static int regex_match_full(char *str, struct regex *r, int len)
256{ 268{
257 if (strncmp(str, r->pattern, len) == 0) 269 if (strncmp(str, r->pattern, len) == 0)
@@ -261,23 +273,24 @@ static int regex_match_full(char *str, struct regex *r, int len)
261 273
262static int regex_match_front(char *str, struct regex *r, int len) 274static int regex_match_front(char *str, struct regex *r, int len)
263{ 275{
264 if (strncmp(str, r->pattern, len) == 0) 276 if (strncmp(str, r->pattern, r->len) == 0)
265 return 1; 277 return 1;
266 return 0; 278 return 0;
267} 279}
268 280
269static int regex_match_middle(char *str, struct regex *r, int len) 281static int regex_match_middle(char *str, struct regex *r, int len)
270{ 282{
271 if (strstr(str, r->pattern)) 283 if (strnstr(str, r->pattern, len))
272 return 1; 284 return 1;
273 return 0; 285 return 0;
274} 286}
275 287
276static int regex_match_end(char *str, struct regex *r, int len) 288static int regex_match_end(char *str, struct regex *r, int len)
277{ 289{
278 char *ptr = strstr(str, r->pattern); 290 int strlen = len - 1;
279 291
280 if (ptr && (ptr[r->len] == 0)) 292 if (strlen >= r->len &&
293 memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
281 return 1; 294 return 1;
282 return 0; 295 return 0;
283} 296}
@@ -781,10 +794,8 @@ static int filter_add_pred(struct filter_parse_state *ps,
781 pred->regex.field_len = field->size; 794 pred->regex.field_len = field->size;
782 } else if (field->filter_type == FILTER_DYN_STRING) 795 } else if (field->filter_type == FILTER_DYN_STRING)
783 fn = filter_pred_strloc; 796 fn = filter_pred_strloc;
784 else { 797 else
785 fn = filter_pred_pchar; 798 fn = filter_pred_pchar;
786 pred->regex.field_len = strlen(pred->regex.pattern);
787 }
788 } else { 799 } else {
789 if (field->is_signed) 800 if (field->is_signed)
790 ret = strict_strtoll(pred->regex.pattern, 0, &val); 801 ret = strict_strtoll(pred->regex.pattern, 0, &val);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 458e5bfe26d0..d4fa5dc1ee4e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
158 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 158 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
159 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 159 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
160 offsetof(typeof(field), item), \ 160 offsetof(typeof(field), item), \
161 sizeof(field.item), 0, FILTER_OTHER); \ 161 sizeof(field.item), \
162 is_signed_type(type), FILTER_OTHER); \
162 if (ret) \ 163 if (ret) \
163 return ret; 164 return ret;
164 165
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
168 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 169 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
169 offsetof(typeof(field), \ 170 offsetof(typeof(field), \
170 container.item), \ 171 container.item), \
171 sizeof(field.container.item), 0, \ 172 sizeof(field.container.item), \
172 FILTER_OTHER); \ 173 is_signed_type(type), FILTER_OTHER); \
173 if (ret) \ 174 if (ret) \
174 return ret; 175 return ret;
175 176
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 375f81a568dc..6ea90c0e2c96 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1201,10 +1201,11 @@ static int __probe_event_show_format(struct trace_seq *s,
1201#undef SHOW_FIELD 1201#undef SHOW_FIELD
1202#define SHOW_FIELD(type, item, name) \ 1202#define SHOW_FIELD(type, item, name) \
1203 do { \ 1203 do { \
1204 ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ 1204 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \
1205 "offset:%u;\tsize:%u;\n", name, \ 1205 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1206 (unsigned int)offsetof(typeof(field), item),\ 1206 (unsigned int)offsetof(typeof(field), item),\
1207 (unsigned int)sizeof(type)); \ 1207 (unsigned int)sizeof(type), \
1208 is_signed_type(type)); \
1208 if (!ret) \ 1209 if (!ret) \
1209 return 0; \ 1210 return 0; \
1210 } while (0) 1211 } while (0)
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index faf37fa4408c..94103cdcf9d8 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -26,12 +26,13 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27 27
28#include "trace_output.h" 28#include "trace_output.h"
29#include "trace_stat.h"
30#include "trace.h" 29#include "trace.h"
31 30
32#include <linux/hw_breakpoint.h> 31#include <linux/hw_breakpoint.h>
33#include <asm/hw_breakpoint.h> 32#include <asm/hw_breakpoint.h>
34 33
34#include <asm/atomic.h>
35
35/* 36/*
36 * For now, let us restrict the no. of symbols traced simultaneously to number 37 * For now, let us restrict the no. of symbols traced simultaneously to number
37 * of available hardware breakpoint registers. 38 * of available hardware breakpoint registers.
@@ -44,7 +45,7 @@ struct trace_ksym {
44 struct perf_event **ksym_hbp; 45 struct perf_event **ksym_hbp;
45 struct perf_event_attr attr; 46 struct perf_event_attr attr;
46#ifdef CONFIG_PROFILE_KSYM_TRACER 47#ifdef CONFIG_PROFILE_KSYM_TRACER
47 unsigned long counter; 48 atomic64_t counter;
48#endif 49#endif
49 struct hlist_node ksym_hlist; 50 struct hlist_node ksym_hlist;
50}; 51};
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
69 70
70 rcu_read_lock(); 71 rcu_read_lock();
71 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { 72 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
72 if ((entry->attr.bp_addr == hbp_hit_addr) && 73 if (entry->attr.bp_addr == hbp_hit_addr) {
73 (entry->counter <= MAX_UL_INT)) { 74 atomic64_inc(&entry->counter);
74 entry->counter++;
75 break; 75 break;
76 } 76 }
77 } 77 }
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
197 entry->attr.bp_addr = addr; 197 entry->attr.bp_addr = addr;
198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4; 198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
199 199
200 ret = -EAGAIN;
201 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, 200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
202 ksym_hbp_handler); 201 ksym_hbp_handler);
203 202
@@ -300,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file,
300 * 2: echo 0 > ksym_trace_filter 299 * 2: echo 0 > ksym_trace_filter
301 * 3: echo "*:---" > ksym_trace_filter 300 * 3: echo "*:---" > ksym_trace_filter
302 */ 301 */
303 if (!buf[0] || !strcmp(buf, "0") || 302 if (!input_string[0] || !strcmp(input_string, "0") ||
304 !strcmp(buf, "*:---")) { 303 !strcmp(input_string, "*:---")) {
305 __ksym_trace_reset(); 304 __ksym_trace_reset();
306 ret = 0; 305 ret = 0;
307 goto out; 306 goto out;
@@ -444,102 +443,77 @@ struct tracer ksym_tracer __read_mostly =
444 .print_line = ksym_trace_output 443 .print_line = ksym_trace_output
445}; 444};
446 445
447__init static int init_ksym_trace(void)
448{
449 struct dentry *d_tracer;
450 struct dentry *entry;
451
452 d_tracer = tracing_init_dentry();
453 ksym_filter_entry_count = 0;
454
455 entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
456 NULL, &ksym_tracing_fops);
457 if (!entry)
458 pr_warning("Could not create debugfs "
459 "'ksym_trace_filter' file\n");
460
461 return register_tracer(&ksym_tracer);
462}
463device_initcall(init_ksym_trace);
464
465
466#ifdef CONFIG_PROFILE_KSYM_TRACER 446#ifdef CONFIG_PROFILE_KSYM_TRACER
467static int ksym_tracer_stat_headers(struct seq_file *m) 447static int ksym_profile_show(struct seq_file *m, void *v)
468{ 448{
449 struct hlist_node *node;
450 struct trace_ksym *entry;
451 int access_type = 0;
452 char fn_name[KSYM_NAME_LEN];
453
469 seq_puts(m, " Access Type "); 454 seq_puts(m, " Access Type ");
470 seq_puts(m, " Symbol Counter\n"); 455 seq_puts(m, " Symbol Counter\n");
471 seq_puts(m, " ----------- "); 456 seq_puts(m, " ----------- ");
472 seq_puts(m, " ------ -------\n"); 457 seq_puts(m, " ------ -------\n");
473 return 0;
474}
475 458
476static int ksym_tracer_stat_show(struct seq_file *m, void *v) 459 rcu_read_lock();
477{ 460 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
478 struct hlist_node *stat = v;
479 struct trace_ksym *entry;
480 int access_type = 0;
481 char fn_name[KSYM_NAME_LEN];
482 461
483 entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); 462 access_type = entry->attr.bp_type;
484 463
485 access_type = entry->attr.bp_type; 464 switch (access_type) {
465 case HW_BREAKPOINT_R:
466 seq_puts(m, " R ");
467 break;
468 case HW_BREAKPOINT_W:
469 seq_puts(m, " W ");
470 break;
471 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
472 seq_puts(m, " RW ");
473 break;
474 default:
475 seq_puts(m, " NA ");
476 }
486 477
487 switch (access_type) { 478 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
488 case HW_BREAKPOINT_R: 479 seq_printf(m, " %-36s", fn_name);
489 seq_puts(m, " R "); 480 else
490 break; 481 seq_printf(m, " %-36s", "<NA>");
491 case HW_BREAKPOINT_W: 482 seq_printf(m, " %15llu\n",
492 seq_puts(m, " W "); 483 (unsigned long long)atomic64_read(&entry->counter));
493 break;
494 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
495 seq_puts(m, " RW ");
496 break;
497 default:
498 seq_puts(m, " NA ");
499 } 484 }
500 485 rcu_read_unlock();
501 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
502 seq_printf(m, " %-36s", fn_name);
503 else
504 seq_printf(m, " %-36s", "<NA>");
505 seq_printf(m, " %15lu\n", entry->counter);
506 486
507 return 0; 487 return 0;
508} 488}
509 489
510static void *ksym_tracer_stat_start(struct tracer_stat *trace) 490static int ksym_profile_open(struct inode *node, struct file *file)
511{ 491{
512 return ksym_filter_head.first; 492 return single_open(file, ksym_profile_show, NULL);
513}
514
515static void *
516ksym_tracer_stat_next(void *v, int idx)
517{
518 struct hlist_node *stat = v;
519
520 return stat->next;
521} 493}
522 494
523static struct tracer_stat ksym_tracer_stats = { 495static const struct file_operations ksym_profile_fops = {
524 .name = "ksym_tracer", 496 .open = ksym_profile_open,
525 .stat_start = ksym_tracer_stat_start, 497 .read = seq_read,
526 .stat_next = ksym_tracer_stat_next, 498 .llseek = seq_lseek,
527 .stat_headers = ksym_tracer_stat_headers, 499 .release = single_release,
528 .stat_show = ksym_tracer_stat_show
529}; 500};
501#endif /* CONFIG_PROFILE_KSYM_TRACER */
530 502
531__init static int ksym_tracer_stat_init(void) 503__init static int init_ksym_trace(void)
532{ 504{
533 int ret; 505 struct dentry *d_tracer;
534 506
535 ret = register_stat_tracer(&ksym_tracer_stats); 507 d_tracer = tracing_init_dentry();
536 if (ret) {
537 printk(KERN_WARNING "Warning: could not register "
538 "ksym tracer stats\n");
539 return 1;
540 }
541 508
542 return 0; 509 trace_create_file("ksym_trace_filter", 0644, d_tracer,
510 NULL, &ksym_tracing_fops);
511
512#ifdef CONFIG_PROFILE_KSYM_TRACER
513 trace_create_file("ksym_profile", 0444, d_tracer,
514 NULL, &ksym_profile_fops);
515#endif
516
517 return register_tracer(&ksym_tracer);
543} 518}
544fs_initcall(ksym_tracer_stat_init); 519device_initcall(init_ksym_trace);
545#endif /* CONFIG_PROFILE_KSYM_TRACER */