aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2014-12-08 20:39:29 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2014-12-08 20:39:29 -0500
commitba00410b8131b23edfb0e09f8b6dd26c8eb621fb (patch)
treec08504e4d2fa51ac91cef544f336d0169806c49f /kernel
parent8ce74dd6057832618957fc2cbd38fa959c3a0a6c (diff)
parentaa583096d9767892983332e7c1a984bd17e3cd39 (diff)
Merge branch 'iov_iter' into for-next
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/audit_tree.c1
-rw-r--r--kernel/bpf/Makefile6
-rw-r--r--kernel/bpf/core.c9
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--kernel/context_tracking.c40
-rw-r--r--kernel/cpu.c14
-rw-r--r--kernel/events/core.c23
-rw-r--r--kernel/events/hw_breakpoint.c7
-rw-r--r--kernel/events/uprobes.c1
-rw-r--r--kernel/futex.c36
-rw-r--r--kernel/gcov/Kconfig2
-rw-r--r--kernel/kmod.c76
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/power/hibernate.c8
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/rcu/tree.c15
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_plugin.h33
-rw-r--r--kernel/sched/core.c110
-rw-r--r--kernel/sched/deadline.c43
-rw-r--r--kernel/sched/fair.c35
-rw-r--r--kernel/sched/idle_task.c5
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/sched/stop_task.c5
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/posix-timers.c1
-rw-r--r--kernel/trace/ftrace.c54
-rw-r--r--kernel/trace/ring_buffer.c81
-rw-r--r--kernel/trace/trace.c33
-rw-r--r--kernel/trace/trace_syscalls.c8
35 files changed, 410 insertions, 260 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index dc5c77544fd6..17ea6d4a9a24 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -86,7 +86,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
86obj-$(CONFIG_TRACEPOINTS) += trace/ 86obj-$(CONFIG_TRACEPOINTS) += trace/
87obj-$(CONFIG_IRQ_WORK) += irq_work.o 87obj-$(CONFIG_IRQ_WORK) += irq_work.o
88obj-$(CONFIG_CPU_PM) += cpu_pm.o 88obj-$(CONFIG_CPU_PM) += cpu_pm.o
89obj-$(CONFIG_NET) += bpf/ 89obj-$(CONFIG_BPF) += bpf/
90 90
91obj-$(CONFIG_PERF_EVENTS) += events/ 91obj-$(CONFIG_PERF_EVENTS) += events/
92 92
diff --git a/kernel/audit.c b/kernel/audit.c
index 80983df92cd4..cebb11db4d34 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -739,7 +739,7 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
739 739
740 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); 740 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
741 audit_log_task_info(ab, current); 741 audit_log_task_info(ab, current);
742 audit_log_format(ab, "feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", 742 audit_log_format(ab, " feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d",
743 audit_feature_names[which], !!old_feature, !!new_feature, 743 audit_feature_names[which], !!old_feature, !!new_feature,
744 !!old_lock, !!new_lock, res); 744 !!old_lock, !!new_lock, res);
745 audit_log_end(ab); 745 audit_log_end(ab);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index e242e3a9864a..80f29e015570 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -154,6 +154,7 @@ static struct audit_chunk *alloc_chunk(int count)
154 chunk->owners[i].index = i; 154 chunk->owners[i].index = i;
155 } 155 }
156 fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); 156 fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch);
157 chunk->mark.mask = FS_IN_IGNORED;
157 return chunk; 158 return chunk;
158} 159}
159 160
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 45427239f375..0daf7f6ae7df 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,5 @@
1obj-y := core.o syscall.o verifier.o 1obj-y := core.o
2 2obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o
3ifdef CONFIG_TEST_BPF 3ifdef CONFIG_TEST_BPF
4obj-y += test_stub.o 4obj-$(CONFIG_BPF_SYSCALL) += test_stub.o
5endif 5endif
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f0c30c59b317..d6594e457a25 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -655,3 +655,12 @@ void bpf_prog_free(struct bpf_prog *fp)
655 schedule_work(&aux->work); 655 schedule_work(&aux->work);
656} 656}
657EXPORT_SYMBOL_GPL(bpf_prog_free); 657EXPORT_SYMBOL_GPL(bpf_prog_free);
658
659/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
660 * skb_copy_bits(), so provide a weak definition of it for NET-less config.
661 */
662int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
663 int len)
664{
665 return -EFAULT;
666}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 801f5f3b9307..9f81818f2941 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1409,7 +1409,8 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
1409 if (memcmp(&old->regs[i], &cur->regs[i], 1409 if (memcmp(&old->regs[i], &cur->regs[i],
1410 sizeof(old->regs[0])) != 0) { 1410 sizeof(old->regs[0])) != 0) {
1411 if (old->regs[i].type == NOT_INIT || 1411 if (old->regs[i].type == NOT_INIT ||
1412 old->regs[i].type == UNKNOWN_VALUE) 1412 (old->regs[i].type == UNKNOWN_VALUE &&
1413 cur->regs[i].type != NOT_INIT))
1413 continue; 1414 continue;
1414 return false; 1415 return false;
1415 } 1416 }
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 5664985c46a0..937ecdfdf258 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -107,46 +107,6 @@ void context_tracking_user_enter(void)
107} 107}
108NOKPROBE_SYMBOL(context_tracking_user_enter); 108NOKPROBE_SYMBOL(context_tracking_user_enter);
109 109
110#ifdef CONFIG_PREEMPT
111/**
112 * preempt_schedule_context - preempt_schedule called by tracing
113 *
114 * The tracing infrastructure uses preempt_enable_notrace to prevent
115 * recursion and tracing preempt enabling caused by the tracing
116 * infrastructure itself. But as tracing can happen in areas coming
117 * from userspace or just about to enter userspace, a preempt enable
118 * can occur before user_exit() is called. This will cause the scheduler
119 * to be called when the system is still in usermode.
120 *
121 * To prevent this, the preempt_enable_notrace will use this function
122 * instead of preempt_schedule() to exit user context if needed before
123 * calling the scheduler.
124 */
125asmlinkage __visible void __sched notrace preempt_schedule_context(void)
126{
127 enum ctx_state prev_ctx;
128
129 if (likely(!preemptible()))
130 return;
131
132 /*
133 * Need to disable preemption in case user_exit() is traced
134 * and the tracer calls preempt_enable_notrace() causing
135 * an infinite recursion.
136 */
137 preempt_disable_notrace();
138 prev_ctx = exception_enter();
139 preempt_enable_no_resched_notrace();
140
141 preempt_schedule();
142
143 preempt_disable_notrace();
144 exception_exit(prev_ctx);
145 preempt_enable_notrace();
146}
147EXPORT_SYMBOL_GPL(preempt_schedule_context);
148#endif /* CONFIG_PREEMPT */
149
150/** 110/**
151 * context_tracking_user_exit - Inform the context tracking that the CPU is 111 * context_tracking_user_exit - Inform the context tracking that the CPU is
152 * exiting userspace mode and entering the kernel. 112 * exiting userspace mode and entering the kernel.
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 356450f09c1f..90a3d017b90c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ static struct {
64 * an ongoing cpu hotplug operation. 64 * an ongoing cpu hotplug operation.
65 */ 65 */
66 int refcount; 66 int refcount;
67 /* And allows lockless put_online_cpus(). */
68 atomic_t puts_pending;
67 69
68#ifdef CONFIG_DEBUG_LOCK_ALLOC 70#ifdef CONFIG_DEBUG_LOCK_ALLOC
69 struct lockdep_map dep_map; 71 struct lockdep_map dep_map;
@@ -113,7 +115,11 @@ void put_online_cpus(void)
113{ 115{
114 if (cpu_hotplug.active_writer == current) 116 if (cpu_hotplug.active_writer == current)
115 return; 117 return;
116 mutex_lock(&cpu_hotplug.lock); 118 if (!mutex_trylock(&cpu_hotplug.lock)) {
119 atomic_inc(&cpu_hotplug.puts_pending);
120 cpuhp_lock_release();
121 return;
122 }
117 123
118 if (WARN_ON(!cpu_hotplug.refcount)) 124 if (WARN_ON(!cpu_hotplug.refcount))
119 cpu_hotplug.refcount++; /* try to fix things up */ 125 cpu_hotplug.refcount++; /* try to fix things up */
@@ -155,6 +161,12 @@ void cpu_hotplug_begin(void)
155 cpuhp_lock_acquire(); 161 cpuhp_lock_acquire();
156 for (;;) { 162 for (;;) {
157 mutex_lock(&cpu_hotplug.lock); 163 mutex_lock(&cpu_hotplug.lock);
164 if (atomic_read(&cpu_hotplug.puts_pending)) {
165 int delta;
166
167 delta = atomic_xchg(&cpu_hotplug.puts_pending, 0);
168 cpu_hotplug.refcount -= delta;
169 }
158 if (likely(!cpu_hotplug.refcount)) 170 if (likely(!cpu_hotplug.refcount))
159 break; 171 break;
160 __set_current_state(TASK_UNINTERRUPTIBLE); 172 __set_current_state(TASK_UNINTERRUPTIBLE);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 446fbeefad1c..e56923026dd8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1562,8 +1562,10 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group
1562 1562
1563 if (!task) { 1563 if (!task) {
1564 /* 1564 /*
1565 * Per cpu events are removed via an smp call and 1565 * Per cpu events are removed via an smp call. The removal can
1566 * the removal is always successful. 1566 * fail if the CPU is currently offline, but in that case we
1567 * already called __perf_remove_from_context from
1568 * perf_event_exit_cpu.
1567 */ 1569 */
1568 cpu_function_call(event->cpu, __perf_remove_from_context, &re); 1570 cpu_function_call(event->cpu, __perf_remove_from_context, &re);
1569 return; 1571 return;
@@ -6071,11 +6073,6 @@ static int perf_swevent_init(struct perf_event *event)
6071 return 0; 6073 return 0;
6072} 6074}
6073 6075
6074static int perf_swevent_event_idx(struct perf_event *event)
6075{
6076 return 0;
6077}
6078
6079static struct pmu perf_swevent = { 6076static struct pmu perf_swevent = {
6080 .task_ctx_nr = perf_sw_context, 6077 .task_ctx_nr = perf_sw_context,
6081 6078
@@ -6085,8 +6082,6 @@ static struct pmu perf_swevent = {
6085 .start = perf_swevent_start, 6082 .start = perf_swevent_start,
6086 .stop = perf_swevent_stop, 6083 .stop = perf_swevent_stop,
6087 .read = perf_swevent_read, 6084 .read = perf_swevent_read,
6088
6089 .event_idx = perf_swevent_event_idx,
6090}; 6085};
6091 6086
6092#ifdef CONFIG_EVENT_TRACING 6087#ifdef CONFIG_EVENT_TRACING
@@ -6204,8 +6199,6 @@ static struct pmu perf_tracepoint = {
6204 .start = perf_swevent_start, 6199 .start = perf_swevent_start,
6205 .stop = perf_swevent_stop, 6200 .stop = perf_swevent_stop,
6206 .read = perf_swevent_read, 6201 .read = perf_swevent_read,
6207
6208 .event_idx = perf_swevent_event_idx,
6209}; 6202};
6210 6203
6211static inline void perf_tp_register(void) 6204static inline void perf_tp_register(void)
@@ -6431,8 +6424,6 @@ static struct pmu perf_cpu_clock = {
6431 .start = cpu_clock_event_start, 6424 .start = cpu_clock_event_start,
6432 .stop = cpu_clock_event_stop, 6425 .stop = cpu_clock_event_stop,
6433 .read = cpu_clock_event_read, 6426 .read = cpu_clock_event_read,
6434
6435 .event_idx = perf_swevent_event_idx,
6436}; 6427};
6437 6428
6438/* 6429/*
@@ -6511,8 +6502,6 @@ static struct pmu perf_task_clock = {
6511 .start = task_clock_event_start, 6502 .start = task_clock_event_start,
6512 .stop = task_clock_event_stop, 6503 .stop = task_clock_event_stop,
6513 .read = task_clock_event_read, 6504 .read = task_clock_event_read,
6514
6515 .event_idx = perf_swevent_event_idx,
6516}; 6505};
6517 6506
6518static void perf_pmu_nop_void(struct pmu *pmu) 6507static void perf_pmu_nop_void(struct pmu *pmu)
@@ -6542,7 +6531,7 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
6542 6531
6543static int perf_event_idx_default(struct perf_event *event) 6532static int perf_event_idx_default(struct perf_event *event)
6544{ 6533{
6545 return event->hw.idx + 1; 6534 return 0;
6546} 6535}
6547 6536
6548/* 6537/*
@@ -8130,7 +8119,7 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
8130 8119
8131static void __perf_event_exit_context(void *__info) 8120static void __perf_event_exit_context(void *__info)
8132{ 8121{
8133 struct remove_event re = { .detach_group = false }; 8122 struct remove_event re = { .detach_group = true };
8134 struct perf_event_context *ctx = __info; 8123 struct perf_event_context *ctx = __info;
8135 8124
8136 perf_pmu_rotate_stop(ctx->pmu); 8125 perf_pmu_rotate_stop(ctx->pmu);
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 1559fb0b9296..9803a6600d49 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -605,11 +605,6 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
605 bp->hw.state = PERF_HES_STOPPED; 605 bp->hw.state = PERF_HES_STOPPED;
606} 606}
607 607
608static int hw_breakpoint_event_idx(struct perf_event *bp)
609{
610 return 0;
611}
612
613static struct pmu perf_breakpoint = { 608static struct pmu perf_breakpoint = {
614 .task_ctx_nr = perf_sw_context, /* could eventually get its own */ 609 .task_ctx_nr = perf_sw_context, /* could eventually get its own */
615 610
@@ -619,8 +614,6 @@ static struct pmu perf_breakpoint = {
619 .start = hw_breakpoint_start, 614 .start = hw_breakpoint_start,
620 .stop = hw_breakpoint_stop, 615 .stop = hw_breakpoint_stop,
621 .read = hw_breakpoint_pmu_read, 616 .read = hw_breakpoint_pmu_read,
622
623 .event_idx = hw_breakpoint_event_idx,
624}; 617};
625 618
626int __init init_hw_breakpoint(void) 619int __init init_hw_breakpoint(void)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 1d0af8a2c646..ed8f2cde34c5 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1640,7 +1640,6 @@ bool uprobe_deny_signal(void)
1640 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { 1640 if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
1641 utask->state = UTASK_SSTEP_TRAPPED; 1641 utask->state = UTASK_SSTEP_TRAPPED;
1642 set_tsk_thread_flag(t, TIF_UPROBE); 1642 set_tsk_thread_flag(t, TIF_UPROBE);
1643 set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
1644 } 1643 }
1645 } 1644 }
1646 1645
diff --git a/kernel/futex.c b/kernel/futex.c
index f3a3a071283c..63678b573d61 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -143,9 +143,8 @@
143 * 143 *
144 * Where (A) orders the waiters increment and the futex value read through 144 * Where (A) orders the waiters increment and the futex value read through
145 * atomic operations (see hb_waiters_inc) and where (B) orders the write 145 * atomic operations (see hb_waiters_inc) and where (B) orders the write
146 * to futex and the waiters read -- this is done by the barriers in 146 * to futex and the waiters read -- this is done by the barriers for both
147 * get_futex_key_refs(), through either ihold or atomic_inc, depending on the 147 * shared and private futexes in get_futex_key_refs().
148 * futex type.
149 * 148 *
150 * This yields the following case (where X:=waiters, Y:=futex): 149 * This yields the following case (where X:=waiters, Y:=futex):
151 * 150 *
@@ -344,13 +343,20 @@ static void get_futex_key_refs(union futex_key *key)
344 futex_get_mm(key); /* implies MB (B) */ 343 futex_get_mm(key); /* implies MB (B) */
345 break; 344 break;
346 default: 345 default:
346 /*
347 * Private futexes do not hold reference on an inode or
348 * mm, therefore the only purpose of calling get_futex_key_refs
349 * is because we need the barrier for the lockless waiter check.
350 */
347 smp_mb(); /* explicit MB (B) */ 351 smp_mb(); /* explicit MB (B) */
348 } 352 }
349} 353}
350 354
351/* 355/*
352 * Drop a reference to the resource addressed by a key. 356 * Drop a reference to the resource addressed by a key.
353 * The hash bucket spinlock must not be held. 357 * The hash bucket spinlock must not be held. This is
358 * a no-op for private futexes, see comment in the get
359 * counterpart.
354 */ 360 */
355static void drop_futex_key_refs(union futex_key *key) 361static void drop_futex_key_refs(union futex_key *key)
356{ 362{
@@ -641,8 +647,14 @@ static struct futex_pi_state * alloc_pi_state(void)
641 return pi_state; 647 return pi_state;
642} 648}
643 649
650/*
651 * Must be called with the hb lock held.
652 */
644static void free_pi_state(struct futex_pi_state *pi_state) 653static void free_pi_state(struct futex_pi_state *pi_state)
645{ 654{
655 if (!pi_state)
656 return;
657
646 if (!atomic_dec_and_test(&pi_state->refcount)) 658 if (!atomic_dec_and_test(&pi_state->refcount))
647 return; 659 return;
648 660
@@ -1521,15 +1533,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1521 } 1533 }
1522 1534
1523retry: 1535retry:
1524 if (pi_state != NULL) {
1525 /*
1526 * We will have to lookup the pi_state again, so free this one
1527 * to keep the accounting correct.
1528 */
1529 free_pi_state(pi_state);
1530 pi_state = NULL;
1531 }
1532
1533 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); 1536 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1534 if (unlikely(ret != 0)) 1537 if (unlikely(ret != 0))
1535 goto out; 1538 goto out;
@@ -1619,6 +1622,8 @@ retry_private:
1619 case 0: 1622 case 0:
1620 break; 1623 break;
1621 case -EFAULT: 1624 case -EFAULT:
1625 free_pi_state(pi_state);
1626 pi_state = NULL;
1622 double_unlock_hb(hb1, hb2); 1627 double_unlock_hb(hb1, hb2);
1623 hb_waiters_dec(hb2); 1628 hb_waiters_dec(hb2);
1624 put_futex_key(&key2); 1629 put_futex_key(&key2);
@@ -1634,6 +1639,8 @@ retry_private:
1634 * exit to complete. 1639 * exit to complete.
1635 * - The user space value changed. 1640 * - The user space value changed.
1636 */ 1641 */
1642 free_pi_state(pi_state);
1643 pi_state = NULL;
1637 double_unlock_hb(hb1, hb2); 1644 double_unlock_hb(hb1, hb2);
1638 hb_waiters_dec(hb2); 1645 hb_waiters_dec(hb2);
1639 put_futex_key(&key2); 1646 put_futex_key(&key2);
@@ -1710,6 +1717,7 @@ retry_private:
1710 } 1717 }
1711 1718
1712out_unlock: 1719out_unlock:
1720 free_pi_state(pi_state);
1713 double_unlock_hb(hb1, hb2); 1721 double_unlock_hb(hb1, hb2);
1714 hb_waiters_dec(hb2); 1722 hb_waiters_dec(hb2);
1715 1723
@@ -1727,8 +1735,6 @@ out_put_keys:
1727out_put_key1: 1735out_put_key1:
1728 put_futex_key(&key1); 1736 put_futex_key(&key1);
1729out: 1737out:
1730 if (pi_state != NULL)
1731 free_pi_state(pi_state);
1732 return ret ? ret : task_count; 1738 return ret ? ret : task_count;
1733} 1739}
1734 1740
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index cf66c5c8458e..3b7408759bdf 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -35,7 +35,7 @@ config GCOV_KERNEL
35config GCOV_PROFILE_ALL 35config GCOV_PROFILE_ALL
36 bool "Profile entire Kernel" 36 bool "Profile entire Kernel"
37 depends on GCOV_KERNEL 37 depends on GCOV_KERNEL
38 depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM 38 depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64
39 default n 39 default n
40 ---help--- 40 ---help---
41 This options activates profiling for the entire kernel. 41 This options activates profiling for the entire kernel.
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 8637e041a247..80f7a6d00519 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -196,12 +196,34 @@ int __request_module(bool wait, const char *fmt, ...)
196EXPORT_SYMBOL(__request_module); 196EXPORT_SYMBOL(__request_module);
197#endif /* CONFIG_MODULES */ 197#endif /* CONFIG_MODULES */
198 198
199static void call_usermodehelper_freeinfo(struct subprocess_info *info)
200{
201 if (info->cleanup)
202 (*info->cleanup)(info);
203 kfree(info);
204}
205
206static void umh_complete(struct subprocess_info *sub_info)
207{
208 struct completion *comp = xchg(&sub_info->complete, NULL);
209 /*
210 * See call_usermodehelper_exec(). If xchg() returns NULL
211 * we own sub_info, the UMH_KILLABLE caller has gone away
212 * or the caller used UMH_NO_WAIT.
213 */
214 if (comp)
215 complete(comp);
216 else
217 call_usermodehelper_freeinfo(sub_info);
218}
219
199/* 220/*
200 * This is the task which runs the usermode application 221 * This is the task which runs the usermode application
201 */ 222 */
202static int ____call_usermodehelper(void *data) 223static int ____call_usermodehelper(void *data)
203{ 224{
204 struct subprocess_info *sub_info = data; 225 struct subprocess_info *sub_info = data;
226 int wait = sub_info->wait & ~UMH_KILLABLE;
205 struct cred *new; 227 struct cred *new;
206 int retval; 228 int retval;
207 229
@@ -221,7 +243,7 @@ static int ____call_usermodehelper(void *data)
221 retval = -ENOMEM; 243 retval = -ENOMEM;
222 new = prepare_kernel_cred(current); 244 new = prepare_kernel_cred(current);
223 if (!new) 245 if (!new)
224 goto fail; 246 goto out;
225 247
226 spin_lock(&umh_sysctl_lock); 248 spin_lock(&umh_sysctl_lock);
227 new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); 249 new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
@@ -233,7 +255,7 @@ static int ____call_usermodehelper(void *data)
233 retval = sub_info->init(sub_info, new); 255 retval = sub_info->init(sub_info, new);
234 if (retval) { 256 if (retval) {
235 abort_creds(new); 257 abort_creds(new);
236 goto fail; 258 goto out;
237 } 259 }
238 } 260 }
239 261
@@ -242,12 +264,13 @@ static int ____call_usermodehelper(void *data)
242 retval = do_execve(getname_kernel(sub_info->path), 264 retval = do_execve(getname_kernel(sub_info->path),
243 (const char __user *const __user *)sub_info->argv, 265 (const char __user *const __user *)sub_info->argv,
244 (const char __user *const __user *)sub_info->envp); 266 (const char __user *const __user *)sub_info->envp);
267out:
268 sub_info->retval = retval;
269 /* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */
270 if (wait != UMH_WAIT_PROC)
271 umh_complete(sub_info);
245 if (!retval) 272 if (!retval)
246 return 0; 273 return 0;
247
248 /* Exec failed? */
249fail:
250 sub_info->retval = retval;
251 do_exit(0); 274 do_exit(0);
252} 275}
253 276
@@ -258,26 +281,6 @@ static int call_helper(void *data)
258 return ____call_usermodehelper(data); 281 return ____call_usermodehelper(data);
259} 282}
260 283
261static void call_usermodehelper_freeinfo(struct subprocess_info *info)
262{
263 if (info->cleanup)
264 (*info->cleanup)(info);
265 kfree(info);
266}
267
268static void umh_complete(struct subprocess_info *sub_info)
269{
270 struct completion *comp = xchg(&sub_info->complete, NULL);
271 /*
272 * See call_usermodehelper_exec(). If xchg() returns NULL
273 * we own sub_info, the UMH_KILLABLE caller has gone away.
274 */
275 if (comp)
276 complete(comp);
277 else
278 call_usermodehelper_freeinfo(sub_info);
279}
280
281/* Keventd can't block, but this (a child) can. */ 284/* Keventd can't block, but this (a child) can. */
282static int wait_for_helper(void *data) 285static int wait_for_helper(void *data)
283{ 286{
@@ -336,18 +339,8 @@ static void __call_usermodehelper(struct work_struct *work)
336 kmod_thread_locker = NULL; 339 kmod_thread_locker = NULL;
337 } 340 }
338 341
339 switch (wait) { 342 if (pid < 0) {
340 case UMH_NO_WAIT: 343 sub_info->retval = pid;
341 call_usermodehelper_freeinfo(sub_info);
342 break;
343
344 case UMH_WAIT_PROC:
345 if (pid > 0)
346 break;
347 /* FALLTHROUGH */
348 case UMH_WAIT_EXEC:
349 if (pid < 0)
350 sub_info->retval = pid;
351 umh_complete(sub_info); 344 umh_complete(sub_info);
352 } 345 }
353} 346}
@@ -588,7 +581,12 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
588 goto out; 581 goto out;
589 } 582 }
590 583
591 sub_info->complete = &done; 584 /*
585 * Set the completion pointer only if there is a waiter.
586 * This makes it possible to use umh_complete to free
587 * the data structure in case of UMH_NO_WAIT.
588 */
589 sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
592 sub_info->wait = wait; 590 sub_info->wait = wait;
593 591
594 queue_work(khelper_wq, &sub_info->work); 592 queue_work(khelper_wq, &sub_info->work);
diff --git a/kernel/panic.c b/kernel/panic.c
index d09dc5c32c67..cf80672b7924 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -244,6 +244,7 @@ static const struct tnt tnts[] = {
244 * 'I' - Working around severe firmware bug. 244 * 'I' - Working around severe firmware bug.
245 * 'O' - Out-of-tree module has been loaded. 245 * 'O' - Out-of-tree module has been loaded.
246 * 'E' - Unsigned module has been loaded. 246 * 'E' - Unsigned module has been loaded.
247 * 'L' - A soft lockup has previously occurred.
247 * 248 *
248 * The string is overwritten by the next call to print_tainted(). 249 * The string is overwritten by the next call to print_tainted().
249 */ 250 */
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a9dfa79b6bab..1f35a3478f3c 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -502,8 +502,14 @@ int hibernation_restore(int platform_mode)
502 error = dpm_suspend_start(PMSG_QUIESCE); 502 error = dpm_suspend_start(PMSG_QUIESCE);
503 if (!error) { 503 if (!error) {
504 error = resume_target_kernel(platform_mode); 504 error = resume_target_kernel(platform_mode);
505 dpm_resume_end(PMSG_RECOVER); 505 /*
506 * The above should either succeed and jump to the new kernel,
507 * or return with an error. Otherwise things are just
508 * undefined, so let's be paranoid.
509 */
510 BUG_ON(!error);
506 } 511 }
512 dpm_resume_end(PMSG_RECOVER);
507 pm_restore_gfp_mask(); 513 pm_restore_gfp_mask();
508 resume_console(); 514 resume_console();
509 pm_restore_console(); 515 pm_restore_console();
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4ca9a33ff620..c347e3ce3a55 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -146,7 +146,7 @@ static int platform_suspend_prepare(suspend_state_t state)
146 146
147static int platform_suspend_prepare_late(suspend_state_t state) 147static int platform_suspend_prepare_late(suspend_state_t state)
148{ 148{
149 return state == PM_SUSPEND_FREEZE && freeze_ops->prepare ? 149 return state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->prepare ?
150 freeze_ops->prepare() : 0; 150 freeze_ops->prepare() : 0;
151} 151}
152 152
@@ -164,7 +164,7 @@ static void platform_resume_noirq(suspend_state_t state)
164 164
165static void platform_resume_early(suspend_state_t state) 165static void platform_resume_early(suspend_state_t state)
166{ 166{
167 if (state == PM_SUSPEND_FREEZE && freeze_ops->restore) 167 if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->restore)
168 freeze_ops->restore(); 168 freeze_ops->restore();
169} 169}
170 170
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 133e47223095..9815447d22e0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3299,11 +3299,16 @@ static void _rcu_barrier(struct rcu_state *rsp)
3299 continue; 3299 continue;
3300 rdp = per_cpu_ptr(rsp->rda, cpu); 3300 rdp = per_cpu_ptr(rsp->rda, cpu);
3301 if (rcu_is_nocb_cpu(cpu)) { 3301 if (rcu_is_nocb_cpu(cpu)) {
3302 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, 3302 if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
3303 rsp->n_barrier_done); 3303 _rcu_barrier_trace(rsp, "OfflineNoCB", cpu,
3304 atomic_inc(&rsp->barrier_cpu_count); 3304 rsp->n_barrier_done);
3305 __call_rcu(&rdp->barrier_head, rcu_barrier_callback, 3305 } else {
3306 rsp, cpu, 0); 3306 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
3307 rsp->n_barrier_done);
3308 atomic_inc(&rsp->barrier_cpu_count);
3309 __call_rcu(&rdp->barrier_head,
3310 rcu_barrier_callback, rsp, cpu, 0);
3311 }
3307 } else if (ACCESS_ONCE(rdp->qlen)) { 3312 } else if (ACCESS_ONCE(rdp->qlen)) {
3308 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 3313 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
3309 rsp->n_barrier_done); 3314 rsp->n_barrier_done);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index d03764652d91..bbdc45d8d74f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -587,6 +587,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
587static void print_cpu_stall_info_end(void); 587static void print_cpu_stall_info_end(void);
588static void zero_cpu_stall_ticks(struct rcu_data *rdp); 588static void zero_cpu_stall_ticks(struct rcu_data *rdp);
589static void increment_cpu_stall_ticks(void); 589static void increment_cpu_stall_ticks(void);
590static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
590static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); 591static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
591static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); 592static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
592static void rcu_init_one_nocb(struct rcu_node *rnp); 593static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 387dd4599344..c1d7f27bd38f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2050,6 +2050,33 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
2050} 2050}
2051 2051
2052/* 2052/*
2053 * Does the specified CPU need an RCU callback for the specified flavor
2054 * of rcu_barrier()?
2055 */
2056static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2057{
2058 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2059 struct rcu_head *rhp;
2060
2061 /* No-CBs CPUs might have callbacks on any of three lists. */
2062 rhp = ACCESS_ONCE(rdp->nocb_head);
2063 if (!rhp)
2064 rhp = ACCESS_ONCE(rdp->nocb_gp_head);
2065 if (!rhp)
2066 rhp = ACCESS_ONCE(rdp->nocb_follower_head);
2067
2068 /* Having no rcuo kthread but CBs after scheduler starts is bad! */
2069 if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) {
2070 /* RCU callback enqueued before CPU first came online??? */
2071 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
2072 cpu, rhp->func);
2073 WARN_ON_ONCE(1);
2074 }
2075
2076 return !!rhp;
2077}
2078
2079/*
2053 * Enqueue the specified string of rcu_head structures onto the specified 2080 * Enqueue the specified string of rcu_head structures onto the specified
2054 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the 2081 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the
2055 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy 2082 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy
@@ -2642,6 +2669,12 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2642 2669
2643#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 2670#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2644 2671
2672static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2673{
2674 WARN_ON_ONCE(1); /* Should be dead code. */
2675 return false;
2676}
2677
2645static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 2678static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
2646{ 2679{
2647} 2680}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44999505e1bf..24beb9bb4c3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2475,44 +2475,6 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2475EXPORT_PER_CPU_SYMBOL(kernel_cpustat); 2475EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
2476 2476
2477/* 2477/*
2478 * Return any ns on the sched_clock that have not yet been accounted in
2479 * @p in case that task is currently running.
2480 *
2481 * Called with task_rq_lock() held on @rq.
2482 */
2483static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
2484{
2485 u64 ns = 0;
2486
2487 /*
2488 * Must be ->curr _and_ ->on_rq. If dequeued, we would
2489 * project cycles that may never be accounted to this
2490 * thread, breaking clock_gettime().
2491 */
2492 if (task_current(rq, p) && task_on_rq_queued(p)) {
2493 update_rq_clock(rq);
2494 ns = rq_clock_task(rq) - p->se.exec_start;
2495 if ((s64)ns < 0)
2496 ns = 0;
2497 }
2498
2499 return ns;
2500}
2501
2502unsigned long long task_delta_exec(struct task_struct *p)
2503{
2504 unsigned long flags;
2505 struct rq *rq;
2506 u64 ns = 0;
2507
2508 rq = task_rq_lock(p, &flags);
2509 ns = do_task_delta_exec(p, rq);
2510 task_rq_unlock(rq, p, &flags);
2511
2512 return ns;
2513}
2514
2515/*
2516 * Return accounted runtime for the task. 2478 * Return accounted runtime for the task.
2517 * In case the task is currently running, return the runtime plus current's 2479 * In case the task is currently running, return the runtime plus current's
2518 * pending runtime that have not been accounted yet. 2480 * pending runtime that have not been accounted yet.
@@ -2521,7 +2483,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
2521{ 2483{
2522 unsigned long flags; 2484 unsigned long flags;
2523 struct rq *rq; 2485 struct rq *rq;
2524 u64 ns = 0; 2486 u64 ns;
2525 2487
2526#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) 2488#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
2527 /* 2489 /*
@@ -2540,7 +2502,16 @@ unsigned long long task_sched_runtime(struct task_struct *p)
2540#endif 2502#endif
2541 2503
2542 rq = task_rq_lock(p, &flags); 2504 rq = task_rq_lock(p, &flags);
2543 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); 2505 /*
2506 * Must be ->curr _and_ ->on_rq. If dequeued, we would
2507 * project cycles that may never be accounted to this
2508 * thread, breaking clock_gettime().
2509 */
2510 if (task_current(rq, p) && task_on_rq_queued(p)) {
2511 update_rq_clock(rq);
2512 p->sched_class->update_curr(rq);
2513 }
2514 ns = p->se.sum_exec_runtime;
2544 task_rq_unlock(rq, p, &flags); 2515 task_rq_unlock(rq, p, &flags);
2545 2516
2546 return ns; 2517 return ns;
@@ -2951,6 +2922,47 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
2951} 2922}
2952NOKPROBE_SYMBOL(preempt_schedule); 2923NOKPROBE_SYMBOL(preempt_schedule);
2953EXPORT_SYMBOL(preempt_schedule); 2924EXPORT_SYMBOL(preempt_schedule);
2925
2926#ifdef CONFIG_CONTEXT_TRACKING
2927/**
2928 * preempt_schedule_context - preempt_schedule called by tracing
2929 *
2930 * The tracing infrastructure uses preempt_enable_notrace to prevent
2931 * recursion and tracing preempt enabling caused by the tracing
2932 * infrastructure itself. But as tracing can happen in areas coming
2933 * from userspace or just about to enter userspace, a preempt enable
2934 * can occur before user_exit() is called. This will cause the scheduler
2935 * to be called when the system is still in usermode.
2936 *
2937 * To prevent this, the preempt_enable_notrace will use this function
2938 * instead of preempt_schedule() to exit user context if needed before
2939 * calling the scheduler.
2940 */
2941asmlinkage __visible void __sched notrace preempt_schedule_context(void)
2942{
2943 enum ctx_state prev_ctx;
2944
2945 if (likely(!preemptible()))
2946 return;
2947
2948 do {
2949 __preempt_count_add(PREEMPT_ACTIVE);
2950 /*
2951 * Needs preempt disabled in case user_exit() is traced
2952 * and the tracer calls preempt_enable_notrace() causing
2953 * an infinite recursion.
2954 */
2955 prev_ctx = exception_enter();
2956 __schedule();
2957 exception_exit(prev_ctx);
2958
2959 __preempt_count_sub(PREEMPT_ACTIVE);
2960 barrier();
2961 } while (need_resched());
2962}
2963EXPORT_SYMBOL_GPL(preempt_schedule_context);
2964#endif /* CONFIG_CONTEXT_TRACKING */
2965
2954#endif /* CONFIG_PREEMPT */ 2966#endif /* CONFIG_PREEMPT */
2955 2967
2956/* 2968/*
@@ -6327,6 +6339,10 @@ static void sched_init_numa(void)
6327 if (!sched_debug()) 6339 if (!sched_debug())
6328 break; 6340 break;
6329 } 6341 }
6342
6343 if (!level)
6344 return;
6345
6330 /* 6346 /*
6331 * 'level' contains the number of unique distances, excluding the 6347 * 'level' contains the number of unique distances, excluding the
6332 * identity distance node_distance(i,i). 6348 * identity distance node_distance(i,i).
@@ -7403,8 +7419,12 @@ void sched_move_task(struct task_struct *tsk)
7403 if (unlikely(running)) 7419 if (unlikely(running))
7404 put_prev_task(rq, tsk); 7420 put_prev_task(rq, tsk);
7405 7421
7406 tg = container_of(task_css_check(tsk, cpu_cgrp_id, 7422 /*
7407 lockdep_is_held(&tsk->sighand->siglock)), 7423 * All callers are synchronized by task_rq_lock(); we do not use RCU
7424 * which is pointless here. Thus, we pass "true" to task_css_check()
7425 * to prevent lockdep warnings.
7426 */
7427 tg = container_of(task_css_check(tsk, cpu_cgrp_id, true),
7408 struct task_group, css); 7428 struct task_group, css);
7409 tg = autogroup_task_group(tsk, tg); 7429 tg = autogroup_task_group(tsk, tg);
7410 tsk->sched_task_group = tg; 7430 tsk->sched_task_group = tg;
@@ -7833,6 +7853,11 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
7833 sched_offline_group(tg); 7853 sched_offline_group(tg);
7834} 7854}
7835 7855
7856static void cpu_cgroup_fork(struct task_struct *task)
7857{
7858 sched_move_task(task);
7859}
7860
7836static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, 7861static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
7837 struct cgroup_taskset *tset) 7862 struct cgroup_taskset *tset)
7838{ 7863{
@@ -8205,6 +8230,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
8205 .css_free = cpu_cgroup_css_free, 8230 .css_free = cpu_cgroup_css_free,
8206 .css_online = cpu_cgroup_css_online, 8231 .css_online = cpu_cgroup_css_online,
8207 .css_offline = cpu_cgroup_css_offline, 8232 .css_offline = cpu_cgroup_css_offline,
8233 .fork = cpu_cgroup_fork,
8208 .can_attach = cpu_cgroup_can_attach, 8234 .can_attach = cpu_cgroup_can_attach,
8209 .attach = cpu_cgroup_attach, 8235 .attach = cpu_cgroup_attach,
8210 .exit = cpu_cgroup_exit, 8236 .exit = cpu_cgroup_exit,
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 256e577faf1b..28fa9d9e9201 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -518,12 +518,20 @@ again:
518 } 518 }
519 519
520 /* 520 /*
521 * We need to take care of a possible races here. In fact, the 521 * We need to take care of several possible races here:
522 * task might have changed its scheduling policy to something 522 *
523 * different from SCHED_DEADLINE or changed its reservation 523 * - the task might have changed its scheduling policy
524 * parameters (through sched_setattr()). 524 * to something different than SCHED_DEADLINE
525 * - the task might have changed its reservation parameters
526 * (through sched_setattr())
527 * - the task might have been boosted by someone else and
528 * might be in the boosting/deboosting path
529 *
530 * In all this cases we bail out, as the task is already
531 * in the runqueue or is going to be enqueued back anyway.
525 */ 532 */
526 if (!dl_task(p) || dl_se->dl_new) 533 if (!dl_task(p) || dl_se->dl_new ||
534 dl_se->dl_boosted || !dl_se->dl_throttled)
527 goto unlock; 535 goto unlock;
528 536
529 sched_clock_tick(); 537 sched_clock_tick();
@@ -532,7 +540,7 @@ again:
532 dl_se->dl_yielded = 0; 540 dl_se->dl_yielded = 0;
533 if (task_on_rq_queued(p)) { 541 if (task_on_rq_queued(p)) {
534 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 542 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
535 if (task_has_dl_policy(rq->curr)) 543 if (dl_task(rq->curr))
536 check_preempt_curr_dl(rq, p, 0); 544 check_preempt_curr_dl(rq, p, 0);
537 else 545 else
538 resched_curr(rq); 546 resched_curr(rq);
@@ -847,8 +855,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
847 * smaller than our one... OTW we keep our runtime and 855 * smaller than our one... OTW we keep our runtime and
848 * deadline. 856 * deadline.
849 */ 857 */
850 if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) 858 if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
851 pi_se = &pi_task->dl; 859 pi_se = &pi_task->dl;
860 } else if (!dl_prio(p->normal_prio)) {
861 /*
862 * Special case in which we have a !SCHED_DEADLINE task
863 * that is going to be deboosted, but exceedes its
864 * runtime while doing so. No point in replenishing
865 * it, as it's going to return back to its original
866 * scheduling class after this.
867 */
868 BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
869 return;
870 }
852 871
853 /* 872 /*
854 * If p is throttled, we do nothing. In fact, if it exhausted 873 * If p is throttled, we do nothing. In fact, if it exhausted
@@ -1607,8 +1626,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
1607 /* Only reschedule if pushing failed */ 1626 /* Only reschedule if pushing failed */
1608 check_resched = 0; 1627 check_resched = 0;
1609#endif /* CONFIG_SMP */ 1628#endif /* CONFIG_SMP */
1610 if (check_resched && task_has_dl_policy(rq->curr)) 1629 if (check_resched) {
1611 check_preempt_curr_dl(rq, p, 0); 1630 if (dl_task(rq->curr))
1631 check_preempt_curr_dl(rq, p, 0);
1632 else
1633 resched_curr(rq);
1634 }
1612 } 1635 }
1613} 1636}
1614 1637
@@ -1678,4 +1701,6 @@ const struct sched_class dl_sched_class = {
1678 .prio_changed = prio_changed_dl, 1701 .prio_changed = prio_changed_dl,
1679 .switched_from = switched_from_dl, 1702 .switched_from = switched_from_dl,
1680 .switched_to = switched_to_dl, 1703 .switched_to = switched_to_dl,
1704
1705 .update_curr = update_curr_dl,
1681}; 1706};
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0b069bf3e708..ef2b104b254c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -726,6 +726,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
726 account_cfs_rq_runtime(cfs_rq, delta_exec); 726 account_cfs_rq_runtime(cfs_rq, delta_exec);
727} 727}
728 728
729static void update_curr_fair(struct rq *rq)
730{
731 update_curr(cfs_rq_of(&rq->curr->se));
732}
733
729static inline void 734static inline void
730update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) 735update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
731{ 736{
@@ -828,11 +833,12 @@ static unsigned int task_nr_scan_windows(struct task_struct *p)
828 833
829static unsigned int task_scan_min(struct task_struct *p) 834static unsigned int task_scan_min(struct task_struct *p)
830{ 835{
836 unsigned int scan_size = ACCESS_ONCE(sysctl_numa_balancing_scan_size);
831 unsigned int scan, floor; 837 unsigned int scan, floor;
832 unsigned int windows = 1; 838 unsigned int windows = 1;
833 839
834 if (sysctl_numa_balancing_scan_size < MAX_SCAN_WINDOW) 840 if (scan_size < MAX_SCAN_WINDOW)
835 windows = MAX_SCAN_WINDOW / sysctl_numa_balancing_scan_size; 841 windows = MAX_SCAN_WINDOW / scan_size;
836 floor = 1000 / windows; 842 floor = 1000 / windows;
837 843
838 scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p); 844 scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p);
@@ -1164,9 +1170,26 @@ static void task_numa_compare(struct task_numa_env *env,
1164 long moveimp = imp; 1170 long moveimp = imp;
1165 1171
1166 rcu_read_lock(); 1172 rcu_read_lock();
1167 cur = ACCESS_ONCE(dst_rq->curr); 1173
1168 if (cur->pid == 0) /* idle */ 1174 raw_spin_lock_irq(&dst_rq->lock);
1175 cur = dst_rq->curr;
1176 /*
1177 * No need to move the exiting task, and this ensures that ->curr
1178 * wasn't reaped and thus get_task_struct() in task_numa_assign()
1179 * is safe under RCU read lock.
1180 * Note that rcu_read_lock() itself can't protect from the final
1181 * put_task_struct() after the last schedule().
1182 */
1183 if ((cur->flags & PF_EXITING) || is_idle_task(cur))
1169 cur = NULL; 1184 cur = NULL;
1185 raw_spin_unlock_irq(&dst_rq->lock);
1186
1187 /*
1188 * Because we have preemption enabled we can get migrated around and
1189 * end try selecting ourselves (current == env->p) as a swap candidate.
1190 */
1191 if (cur == env->p)
1192 goto unlock;
1170 1193
1171 /* 1194 /*
1172 * "imp" is the fault differential for the source task between the 1195 * "imp" is the fault differential for the source task between the
@@ -1520,7 +1543,7 @@ static void update_task_scan_period(struct task_struct *p,
1520 * scanning faster if shared accesses dominate as it may 1543 * scanning faster if shared accesses dominate as it may
1521 * simply bounce migrations uselessly 1544 * simply bounce migrations uselessly
1522 */ 1545 */
1523 ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared)); 1546 ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared + 1));
1524 diff = (diff * ratio) / NUMA_PERIOD_SLOTS; 1547 diff = (diff * ratio) / NUMA_PERIOD_SLOTS;
1525 } 1548 }
1526 1549
@@ -7938,6 +7961,8 @@ const struct sched_class fair_sched_class = {
7938 7961
7939 .get_rr_interval = get_rr_interval_fair, 7962 .get_rr_interval = get_rr_interval_fair,
7940 7963
7964 .update_curr = update_curr_fair,
7965
7941#ifdef CONFIG_FAIR_GROUP_SCHED 7966#ifdef CONFIG_FAIR_GROUP_SCHED
7942 .task_move_group = task_move_group_fair, 7967 .task_move_group = task_move_group_fair,
7943#endif 7968#endif
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 67ad4e7f506a..c65dac8c97cd 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -75,6 +75,10 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task
75 return 0; 75 return 0;
76} 76}
77 77
78static void update_curr_idle(struct rq *rq)
79{
80}
81
78/* 82/*
79 * Simple, special scheduling class for the per-CPU idle tasks: 83 * Simple, special scheduling class for the per-CPU idle tasks:
80 */ 84 */
@@ -101,4 +105,5 @@ const struct sched_class idle_sched_class = {
101 105
102 .prio_changed = prio_changed_idle, 106 .prio_changed = prio_changed_idle,
103 .switched_to = switched_to_idle, 107 .switched_to = switched_to_idle,
108 .update_curr = update_curr_idle,
104}; 109};
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d024e6ce30ba..20bca398084a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2128,6 +2128,8 @@ const struct sched_class rt_sched_class = {
2128 2128
2129 .prio_changed = prio_changed_rt, 2129 .prio_changed = prio_changed_rt,
2130 .switched_to = switched_to_rt, 2130 .switched_to = switched_to_rt,
2131
2132 .update_curr = update_curr_rt,
2131}; 2133};
2132 2134
2133#ifdef CONFIG_SCHED_DEBUG 2135#ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 24156c8434d1..2df8ef067cc5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1135,6 +1135,8 @@ struct sched_class {
1135 unsigned int (*get_rr_interval) (struct rq *rq, 1135 unsigned int (*get_rr_interval) (struct rq *rq,
1136 struct task_struct *task); 1136 struct task_struct *task);
1137 1137
1138 void (*update_curr) (struct rq *rq);
1139
1138#ifdef CONFIG_FAIR_GROUP_SCHED 1140#ifdef CONFIG_FAIR_GROUP_SCHED
1139 void (*task_move_group) (struct task_struct *p, int on_rq); 1141 void (*task_move_group) (struct task_struct *p, int on_rq);
1140#endif 1142#endif
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 67426e529f59..79ffec45a6ac 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -102,6 +102,10 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task)
102 return 0; 102 return 0;
103} 103}
104 104
105static void update_curr_stop(struct rq *rq)
106{
107}
108
105/* 109/*
106 * Simple, special scheduling class for the per-CPU stop tasks: 110 * Simple, special scheduling class for the per-CPU stop tasks:
107 */ 111 */
@@ -128,4 +132,5 @@ const struct sched_class stop_sched_class = {
128 132
129 .prio_changed = prio_changed_stop, 133 .prio_changed = prio_changed_stop,
130 .switched_to = switched_to_stop, 134 .switched_to = switched_to_stop,
135 .update_curr = update_curr_stop,
131}; 136};
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4aada6d9fe74..15f2511a1b7c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -387,7 +387,8 @@ static struct ctl_table kern_table[] = {
387 .data = &sysctl_numa_balancing_scan_size, 387 .data = &sysctl_numa_balancing_scan_size,
388 .maxlen = sizeof(unsigned int), 388 .maxlen = sizeof(unsigned int),
389 .mode = 0644, 389 .mode = 0644,
390 .proc_handler = proc_dointvec, 390 .proc_handler = proc_dointvec_minmax,
391 .extra1 = &one,
391 }, 392 },
392 { 393 {
393 .procname = "numa_balancing", 394 .procname = "numa_balancing",
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 9c94c19f1305..55449909f114 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -72,7 +72,7 @@ static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt,
72 * Also omit the add if it would overflow the u64 boundary. 72 * Also omit the add if it would overflow the u64 boundary.
73 */ 73 */
74 if ((~0ULL - clc > rnd) && 74 if ((~0ULL - clc > rnd) &&
75 (!ismax || evt->mult <= (1U << evt->shift))) 75 (!ismax || evt->mult <= (1ULL << evt->shift)))
76 clc += rnd; 76 clc += rnd;
77 77
78 do_div(clc, evt->mult); 78 do_div(clc, evt->mult);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 492b986195d5..a16b67859e2a 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
553 *sample = cputime_to_expires(cputime.utime); 553 *sample = cputime_to_expires(cputime.utime);
554 break; 554 break;
555 case CPUCLOCK_SCHED: 555 case CPUCLOCK_SCHED:
556 *sample = cputime.sum_exec_runtime + task_delta_exec(p); 556 *sample = cputime.sum_exec_runtime;
557 break; 557 break;
558 } 558 }
559 return 0; 559 return 0;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 42b463ad90f2..31ea01f42e1f 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -636,6 +636,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
636 goto out; 636 goto out;
637 } 637 }
638 } else { 638 } else {
639 memset(&event.sigev_value, 0, sizeof(event.sigev_value));
639 event.sigev_notify = SIGEV_SIGNAL; 640 event.sigev_notify = SIGEV_SIGNAL;
640 event.sigev_signo = SIGALRM; 641 event.sigev_signo = SIGALRM;
641 event.sigev_value.sival_int = new_timer->it_id; 642 event.sigev_value.sival_int = new_timer->it_id;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fb186b9ddf51..31c90fec4158 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1925,8 +1925,16 @@ ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec)
1925 * when we are adding another op to the rec or removing the 1925 * when we are adding another op to the rec or removing the
1926 * current one. Thus, if the op is being added, we can 1926 * current one. Thus, if the op is being added, we can
1927 * ignore it because it hasn't attached itself to the rec 1927 * ignore it because it hasn't attached itself to the rec
1928 * yet. That means we just need to find the op that has a 1928 * yet.
1929 * trampoline and is not beeing added. 1929 *
1930 * If an ops is being modified (hooking to different functions)
1931 * then we don't care about the new functions that are being
1932 * added, just the old ones (that are probably being removed).
1933 *
1934 * If we are adding an ops to a function that already is using
1935 * a trampoline, it needs to be removed (trampolines are only
1936 * for single ops connected), then an ops that is not being
1937 * modified also needs to be checked.
1930 */ 1938 */
1931 do_for_each_ftrace_op(op, ftrace_ops_list) { 1939 do_for_each_ftrace_op(op, ftrace_ops_list) {
1932 1940
@@ -1940,17 +1948,23 @@ ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec)
1940 if (op->flags & FTRACE_OPS_FL_ADDING) 1948 if (op->flags & FTRACE_OPS_FL_ADDING)
1941 continue; 1949 continue;
1942 1950
1951
1943 /* 1952 /*
1944 * If the ops is not being added and has a trampoline, 1953 * If the ops is being modified and is in the old
1945 * then it must be the one that we want! 1954 * hash, then it is probably being removed from this
1955 * function.
1946 */ 1956 */
1947 if (hash_contains_ip(ip, op->func_hash))
1948 return op;
1949
1950 /* If the ops is being modified, it may be in the old hash. */
1951 if ((op->flags & FTRACE_OPS_FL_MODIFYING) && 1957 if ((op->flags & FTRACE_OPS_FL_MODIFYING) &&
1952 hash_contains_ip(ip, &op->old_hash)) 1958 hash_contains_ip(ip, &op->old_hash))
1953 return op; 1959 return op;
1960 /*
1961 * If the ops is not being added or modified, and it's
1962 * in its normal filter hash, then this must be the one
1963 * we want!
1964 */
1965 if (!(op->flags & FTRACE_OPS_FL_MODIFYING) &&
1966 hash_contains_ip(ip, op->func_hash))
1967 return op;
1954 1968
1955 } while_for_each_ftrace_op(op); 1969 } while_for_each_ftrace_op(op);
1956 1970
@@ -2293,10 +2307,13 @@ static void ftrace_run_update_code(int command)
2293 FTRACE_WARN_ON(ret); 2307 FTRACE_WARN_ON(ret);
2294} 2308}
2295 2309
2296static void ftrace_run_modify_code(struct ftrace_ops *ops, int command) 2310static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
2311 struct ftrace_hash *old_hash)
2297{ 2312{
2298 ops->flags |= FTRACE_OPS_FL_MODIFYING; 2313 ops->flags |= FTRACE_OPS_FL_MODIFYING;
2314 ops->old_hash.filter_hash = old_hash;
2299 ftrace_run_update_code(command); 2315 ftrace_run_update_code(command);
2316 ops->old_hash.filter_hash = NULL;
2300 ops->flags &= ~FTRACE_OPS_FL_MODIFYING; 2317 ops->flags &= ~FTRACE_OPS_FL_MODIFYING;
2301} 2318}
2302 2319
@@ -3340,7 +3357,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly =
3340 3357
3341static int ftrace_probe_registered; 3358static int ftrace_probe_registered;
3342 3359
3343static void __enable_ftrace_function_probe(void) 3360static void __enable_ftrace_function_probe(struct ftrace_hash *old_hash)
3344{ 3361{
3345 int ret; 3362 int ret;
3346 int i; 3363 int i;
@@ -3348,7 +3365,8 @@ static void __enable_ftrace_function_probe(void)
3348 if (ftrace_probe_registered) { 3365 if (ftrace_probe_registered) {
3349 /* still need to update the function call sites */ 3366 /* still need to update the function call sites */
3350 if (ftrace_enabled) 3367 if (ftrace_enabled)
3351 ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS); 3368 ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
3369 old_hash);
3352 return; 3370 return;
3353 } 3371 }
3354 3372
@@ -3477,13 +3495,14 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3477 } while_for_each_ftrace_rec(); 3495 } while_for_each_ftrace_rec();
3478 3496
3479 ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); 3497 ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
3498
3499 __enable_ftrace_function_probe(old_hash);
3500
3480 if (!ret) 3501 if (!ret)
3481 free_ftrace_hash_rcu(old_hash); 3502 free_ftrace_hash_rcu(old_hash);
3482 else 3503 else
3483 count = ret; 3504 count = ret;
3484 3505
3485 __enable_ftrace_function_probe();
3486
3487 out_unlock: 3506 out_unlock:
3488 mutex_unlock(&ftrace_lock); 3507 mutex_unlock(&ftrace_lock);
3489 out: 3508 out:
@@ -3764,10 +3783,11 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
3764 return add_hash_entry(hash, ip); 3783 return add_hash_entry(hash, ip);
3765} 3784}
3766 3785
3767static void ftrace_ops_update_code(struct ftrace_ops *ops) 3786static void ftrace_ops_update_code(struct ftrace_ops *ops,
3787 struct ftrace_hash *old_hash)
3768{ 3788{
3769 if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) 3789 if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled)
3770 ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS); 3790 ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash);
3771} 3791}
3772 3792
3773static int 3793static int
@@ -3813,7 +3833,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
3813 old_hash = *orig_hash; 3833 old_hash = *orig_hash;
3814 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 3834 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
3815 if (!ret) { 3835 if (!ret) {
3816 ftrace_ops_update_code(ops); 3836 ftrace_ops_update_code(ops, old_hash);
3817 free_ftrace_hash_rcu(old_hash); 3837 free_ftrace_hash_rcu(old_hash);
3818 } 3838 }
3819 mutex_unlock(&ftrace_lock); 3839 mutex_unlock(&ftrace_lock);
@@ -4058,7 +4078,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
4058 ret = ftrace_hash_move(iter->ops, filter_hash, 4078 ret = ftrace_hash_move(iter->ops, filter_hash,
4059 orig_hash, iter->hash); 4079 orig_hash, iter->hash);
4060 if (!ret) { 4080 if (!ret) {
4061 ftrace_ops_update_code(iter->ops); 4081 ftrace_ops_update_code(iter->ops, old_hash);
4062 free_ftrace_hash_rcu(old_hash); 4082 free_ftrace_hash_rcu(old_hash);
4063 } 4083 }
4064 mutex_unlock(&ftrace_lock); 4084 mutex_unlock(&ftrace_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2d75c94ae87d..a56e07c8d15b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work)
538 * ring_buffer_wait - wait for input to the ring buffer 538 * ring_buffer_wait - wait for input to the ring buffer
539 * @buffer: buffer to wait on 539 * @buffer: buffer to wait on
540 * @cpu: the cpu buffer to wait on 540 * @cpu: the cpu buffer to wait on
541 * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
541 * 542 *
542 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon 543 * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
543 * as data is added to any of the @buffer's cpu buffers. Otherwise 544 * as data is added to any of the @buffer's cpu buffers. Otherwise
544 * it will wait for data to be added to a specific cpu buffer. 545 * it will wait for data to be added to a specific cpu buffer.
545 */ 546 */
546int ring_buffer_wait(struct ring_buffer *buffer, int cpu) 547int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
547{ 548{
548 struct ring_buffer_per_cpu *cpu_buffer; 549 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
549 DEFINE_WAIT(wait); 550 DEFINE_WAIT(wait);
550 struct rb_irq_work *work; 551 struct rb_irq_work *work;
552 int ret = 0;
551 553
552 /* 554 /*
553 * Depending on what the caller is waiting for, either any 555 * Depending on what the caller is waiting for, either any
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
564 } 566 }
565 567
566 568
567 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); 569 while (true) {
570 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
568 571
569 /* 572 /*
570 * The events can happen in critical sections where 573 * The events can happen in critical sections where
571 * checking a work queue can cause deadlocks. 574 * checking a work queue can cause deadlocks.
572 * After adding a task to the queue, this flag is set 575 * After adding a task to the queue, this flag is set
573 * only to notify events to try to wake up the queue 576 * only to notify events to try to wake up the queue
574 * using irq_work. 577 * using irq_work.
575 * 578 *
576 * We don't clear it even if the buffer is no longer 579 * We don't clear it even if the buffer is no longer
577 * empty. The flag only causes the next event to run 580 * empty. The flag only causes the next event to run
578 * irq_work to do the work queue wake up. The worse 581 * irq_work to do the work queue wake up. The worse
579 * that can happen if we race with !trace_empty() is that 582 * that can happen if we race with !trace_empty() is that
580 * an event will cause an irq_work to try to wake up 583 * an event will cause an irq_work to try to wake up
581 * an empty queue. 584 * an empty queue.
582 * 585 *
583 * There's no reason to protect this flag either, as 586 * There's no reason to protect this flag either, as
584 * the work queue and irq_work logic will do the necessary 587 * the work queue and irq_work logic will do the necessary
585 * synchronization for the wake ups. The only thing 588 * synchronization for the wake ups. The only thing
586 * that is necessary is that the wake up happens after 589 * that is necessary is that the wake up happens after
587 * a task has been queued. It's OK for spurious wake ups. 590 * a task has been queued. It's OK for spurious wake ups.
588 */ 591 */
589 work->waiters_pending = true; 592 work->waiters_pending = true;
593
594 if (signal_pending(current)) {
595 ret = -EINTR;
596 break;
597 }
598
599 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
600 break;
601
602 if (cpu != RING_BUFFER_ALL_CPUS &&
603 !ring_buffer_empty_cpu(buffer, cpu)) {
604 unsigned long flags;
605 bool pagebusy;
606
607 if (!full)
608 break;
609
610 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
611 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
612 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
613
614 if (!pagebusy)
615 break;
616 }
590 617
591 if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
592 (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
593 schedule(); 618 schedule();
619 }
594 620
595 finish_wait(&work->waiters, &wait); 621 finish_wait(&work->waiters, &wait);
596 return 0; 622
623 return ret;
597} 624}
598 625
599/** 626/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 459a7b1251e5..426962b04183 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1076} 1076}
1077#endif /* CONFIG_TRACER_MAX_TRACE */ 1077#endif /* CONFIG_TRACER_MAX_TRACE */
1078 1078
1079static int wait_on_pipe(struct trace_iterator *iter) 1079static int wait_on_pipe(struct trace_iterator *iter, bool full)
1080{ 1080{
1081 /* Iterators are static, they should be filled or empty */ 1081 /* Iterators are static, they should be filled or empty */
1082 if (trace_buffer_iter(iter, iter->cpu_file)) 1082 if (trace_buffer_iter(iter, iter->cpu_file))
1083 return 0; 1083 return 0;
1084 1084
1085 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); 1085 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1086 full);
1086} 1087}
1087 1088
1088#ifdef CONFIG_FTRACE_STARTUP_TEST 1089#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp)
4434 4435
4435 mutex_unlock(&iter->mutex); 4436 mutex_unlock(&iter->mutex);
4436 4437
4437 ret = wait_on_pipe(iter); 4438 ret = wait_on_pipe(iter, false);
4438 4439
4439 mutex_lock(&iter->mutex); 4440 mutex_lock(&iter->mutex);
4440 4441
4441 if (ret) 4442 if (ret)
4442 return ret; 4443 return ret;
4443
4444 if (signal_pending(current))
4445 return -EINTR;
4446 } 4444 }
4447 4445
4448 return 1; 4446 return 1;
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
5372 goto out_unlock; 5370 goto out_unlock;
5373 } 5371 }
5374 mutex_unlock(&trace_types_lock); 5372 mutex_unlock(&trace_types_lock);
5375 ret = wait_on_pipe(iter); 5373 ret = wait_on_pipe(iter, false);
5376 mutex_lock(&trace_types_lock); 5374 mutex_lock(&trace_types_lock);
5377 if (ret) { 5375 if (ret) {
5378 size = ret; 5376 size = ret;
5379 goto out_unlock; 5377 goto out_unlock;
5380 } 5378 }
5381 if (signal_pending(current)) {
5382 size = -EINTR;
5383 goto out_unlock;
5384 }
5385 goto again; 5379 goto again;
5386 } 5380 }
5387 size = 0; 5381 size = 0;
@@ -5500,7 +5494,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5500 }; 5494 };
5501 struct buffer_ref *ref; 5495 struct buffer_ref *ref;
5502 int entries, size, i; 5496 int entries, size, i;
5503 ssize_t ret; 5497 ssize_t ret = 0;
5504 5498
5505 mutex_lock(&trace_types_lock); 5499 mutex_lock(&trace_types_lock);
5506 5500
@@ -5538,13 +5532,16 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5538 int r; 5532 int r;
5539 5533
5540 ref = kzalloc(sizeof(*ref), GFP_KERNEL); 5534 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5541 if (!ref) 5535 if (!ref) {
5536 ret = -ENOMEM;
5542 break; 5537 break;
5538 }
5543 5539
5544 ref->ref = 1; 5540 ref->ref = 1;
5545 ref->buffer = iter->trace_buffer->buffer; 5541 ref->buffer = iter->trace_buffer->buffer;
5546 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 5542 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5547 if (!ref->page) { 5543 if (!ref->page) {
5544 ret = -ENOMEM;
5548 kfree(ref); 5545 kfree(ref);
5549 break; 5546 break;
5550 } 5547 }
@@ -5582,19 +5579,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5582 5579
5583 /* did we read anything? */ 5580 /* did we read anything? */
5584 if (!spd.nr_pages) { 5581 if (!spd.nr_pages) {
5582 if (ret)
5583 goto out;
5584
5585 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { 5585 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5586 ret = -EAGAIN; 5586 ret = -EAGAIN;
5587 goto out; 5587 goto out;
5588 } 5588 }
5589 mutex_unlock(&trace_types_lock); 5589 mutex_unlock(&trace_types_lock);
5590 ret = wait_on_pipe(iter); 5590 ret = wait_on_pipe(iter, true);
5591 mutex_lock(&trace_types_lock); 5591 mutex_lock(&trace_types_lock);
5592 if (ret) 5592 if (ret)
5593 goto out; 5593 goto out;
5594 if (signal_pending(current)) { 5594
5595 ret = -EINTR;
5596 goto out;
5597 }
5598 goto again; 5595 goto again;
5599 } 5596 }
5600 5597
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 4dc8b79c5f75..29228c4d5696 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -313,7 +313,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
313 int size; 313 int size;
314 314
315 syscall_nr = trace_get_syscall_nr(current, regs); 315 syscall_nr = trace_get_syscall_nr(current, regs);
316 if (syscall_nr < 0) 316 if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
317 return; 317 return;
318 318
319 /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ 319 /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
@@ -360,7 +360,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
360 int syscall_nr; 360 int syscall_nr;
361 361
362 syscall_nr = trace_get_syscall_nr(current, regs); 362 syscall_nr = trace_get_syscall_nr(current, regs);
363 if (syscall_nr < 0) 363 if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
364 return; 364 return;
365 365
366 /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ 366 /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
@@ -567,7 +567,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
567 int size; 567 int size;
568 568
569 syscall_nr = trace_get_syscall_nr(current, regs); 569 syscall_nr = trace_get_syscall_nr(current, regs);
570 if (syscall_nr < 0) 570 if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
571 return; 571 return;
572 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) 572 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
573 return; 573 return;
@@ -641,7 +641,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
641 int size; 641 int size;
642 642
643 syscall_nr = trace_get_syscall_nr(current, regs); 643 syscall_nr = trace_get_syscall_nr(current, regs);
644 if (syscall_nr < 0) 644 if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
645 return; 645 return;
646 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) 646 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
647 return; 647 return;