aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso-fakesections.c1
-rw-r--r--arch/x86/events/core.c8
-rw-r--r--arch/x86/events/intel/cstate.c2
-rw-r--r--arch/x86/events/msr.c9
-rw-r--r--include/linux/kthread.h1
-rw-r--r--include/linux/sched.h50
-rw-r--r--include/linux/sched/signal.h2
-rw-r--r--kernel/events/ring_buffer.c7
-rw-r--r--kernel/kthread.c50
-rw-r--r--kernel/sched/autogroup.c7
-rw-r--r--kernel/sched/core.c56
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/signal.c17
-rw-r--r--kernel/stop_machine.c19
14 files changed, 153 insertions, 78 deletions
diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
deleted file mode 100644
index 541468e25265..000000000000
--- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
+++ /dev/null
@@ -1 +0,0 @@
1#include "../vdso-fakesections.c"
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index a6006e7bb729..45b2b1c93d04 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -27,6 +27,7 @@
27#include <linux/cpu.h> 27#include <linux/cpu.h>
28#include <linux/bitops.h> 28#include <linux/bitops.h>
29#include <linux/device.h> 29#include <linux/device.h>
30#include <linux/nospec.h>
30 31
31#include <asm/apic.h> 32#include <asm/apic.h>
32#include <asm/stacktrace.h> 33#include <asm/stacktrace.h>
@@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
304 305
305 config = attr->config; 306 config = attr->config;
306 307
307 cache_type = (config >> 0) & 0xff; 308 cache_type = (config >> 0) & 0xff;
308 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 309 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
309 return -EINVAL; 310 return -EINVAL;
311 cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
310 312
311 cache_op = (config >> 8) & 0xff; 313 cache_op = (config >> 8) & 0xff;
312 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 314 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
313 return -EINVAL; 315 return -EINVAL;
316 cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
314 317
315 cache_result = (config >> 16) & 0xff; 318 cache_result = (config >> 16) & 0xff;
316 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 319 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
317 return -EINVAL; 320 return -EINVAL;
321 cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
318 322
319 val = hw_cache_event_ids[cache_type][cache_op][cache_result]; 323 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
320 324
@@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event)
421 if (attr->config >= x86_pmu.max_events) 425 if (attr->config >= x86_pmu.max_events)
422 return -EINVAL; 426 return -EINVAL;
423 427
428 attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
429
424 /* 430 /*
425 * The generic map: 431 * The generic map:
426 */ 432 */
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9aca448bb8e6..9f8084f18d58 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -92,6 +92,7 @@
92#include <linux/module.h> 92#include <linux/module.h>
93#include <linux/slab.h> 93#include <linux/slab.h>
94#include <linux/perf_event.h> 94#include <linux/perf_event.h>
95#include <linux/nospec.h>
95#include <asm/cpu_device_id.h> 96#include <asm/cpu_device_id.h>
96#include <asm/intel-family.h> 97#include <asm/intel-family.h>
97#include "../perf_event.h" 98#include "../perf_event.h"
@@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
302 } else if (event->pmu == &cstate_pkg_pmu) { 303 } else if (event->pmu == &cstate_pkg_pmu) {
303 if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) 304 if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
304 return -EINVAL; 305 return -EINVAL;
306 cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
305 if (!pkg_msr[cfg].attr) 307 if (!pkg_msr[cfg].attr)
306 return -EINVAL; 308 return -EINVAL;
307 event->hw.event_base = pkg_msr[cfg].msr; 309 event->hw.event_base = pkg_msr[cfg].msr;
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index e7edf19e64c2..b4771a6ddbc1 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,5 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/perf_event.h> 2#include <linux/perf_event.h>
3#include <linux/nospec.h>
3#include <asm/intel-family.h> 4#include <asm/intel-family.h>
4 5
5enum perf_msr_id { 6enum perf_msr_id {
@@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event)
158 if (event->attr.type != event->pmu->type) 159 if (event->attr.type != event->pmu->type)
159 return -ENOENT; 160 return -ENOENT;
160 161
161 if (cfg >= PERF_MSR_EVENT_MAX)
162 return -EINVAL;
163
164 /* unsupported modes and filters */ 162 /* unsupported modes and filters */
165 if (event->attr.exclude_user || 163 if (event->attr.exclude_user ||
166 event->attr.exclude_kernel || 164 event->attr.exclude_kernel ||
@@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event)
171 event->attr.sample_period) /* no sampling */ 169 event->attr.sample_period) /* no sampling */
172 return -EINVAL; 170 return -EINVAL;
173 171
172 if (cfg >= PERF_MSR_EVENT_MAX)
173 return -EINVAL;
174
175 cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
176
174 if (!msr[cfg].attr) 177 if (!msr[cfg].attr)
175 return -EINVAL; 178 return -EINVAL;
176 179
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311d..2803264c512f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
62int kthread_park(struct task_struct *k); 62int kthread_park(struct task_struct *k);
63void kthread_unpark(struct task_struct *k); 63void kthread_unpark(struct task_struct *k);
64void kthread_parkme(void); 64void kthread_parkme(void);
65void kthread_park_complete(struct task_struct *k);
65 66
66int kthreadd(void *unused); 67int kthreadd(void *unused);
67extern struct task_struct *kthreadd_task; 68extern struct task_struct *kthreadd_task;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b3d697f3b573..c2413703f45d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -112,17 +112,36 @@ struct task_group;
112 112
113#ifdef CONFIG_DEBUG_ATOMIC_SLEEP 113#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
114 114
115/*
116 * Special states are those that do not use the normal wait-loop pattern. See
117 * the comment with set_special_state().
118 */
119#define is_special_task_state(state) \
120 ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
121
115#define __set_current_state(state_value) \ 122#define __set_current_state(state_value) \
116 do { \ 123 do { \
124 WARN_ON_ONCE(is_special_task_state(state_value));\
117 current->task_state_change = _THIS_IP_; \ 125 current->task_state_change = _THIS_IP_; \
118 current->state = (state_value); \ 126 current->state = (state_value); \
119 } while (0) 127 } while (0)
128
120#define set_current_state(state_value) \ 129#define set_current_state(state_value) \
121 do { \ 130 do { \
131 WARN_ON_ONCE(is_special_task_state(state_value));\
122 current->task_state_change = _THIS_IP_; \ 132 current->task_state_change = _THIS_IP_; \
123 smp_store_mb(current->state, (state_value)); \ 133 smp_store_mb(current->state, (state_value)); \
124 } while (0) 134 } while (0)
125 135
136#define set_special_state(state_value) \
137 do { \
138 unsigned long flags; /* may shadow */ \
139 WARN_ON_ONCE(!is_special_task_state(state_value)); \
140 raw_spin_lock_irqsave(&current->pi_lock, flags); \
141 current->task_state_change = _THIS_IP_; \
142 current->state = (state_value); \
143 raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
144 } while (0)
126#else 145#else
127/* 146/*
128 * set_current_state() includes a barrier so that the write of current->state 147 * set_current_state() includes a barrier so that the write of current->state
@@ -144,8 +163,8 @@ struct task_group;
144 * 163 *
145 * The above is typically ordered against the wakeup, which does: 164 * The above is typically ordered against the wakeup, which does:
146 * 165 *
147 * need_sleep = false; 166 * need_sleep = false;
148 * wake_up_state(p, TASK_UNINTERRUPTIBLE); 167 * wake_up_state(p, TASK_UNINTERRUPTIBLE);
149 * 168 *
150 * Where wake_up_state() (and all other wakeup primitives) imply enough 169 * Where wake_up_state() (and all other wakeup primitives) imply enough
151 * barriers to order the store of the variable against wakeup. 170 * barriers to order the store of the variable against wakeup.
@@ -154,12 +173,33 @@ struct task_group;
154 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a 173 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
155 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). 174 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
156 * 175 *
157 * This is obviously fine, since they both store the exact same value. 176 * However, with slightly different timing the wakeup TASK_RUNNING store can
177 * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
178 * a problem either because that will result in one extra go around the loop
179 * and our @cond test will save the day.
158 * 180 *
159 * Also see the comments of try_to_wake_up(). 181 * Also see the comments of try_to_wake_up().
160 */ 182 */
161#define __set_current_state(state_value) do { current->state = (state_value); } while (0) 183#define __set_current_state(state_value) \
162#define set_current_state(state_value) smp_store_mb(current->state, (state_value)) 184 current->state = (state_value)
185
186#define set_current_state(state_value) \
187 smp_store_mb(current->state, (state_value))
188
189/*
190 * set_special_state() should be used for those states when the blocking task
191 * can not use the regular condition based wait-loop. In that case we must
192 * serialize against wakeups such that any possible in-flight TASK_RUNNING stores
193 * will not collide with our state change.
194 */
195#define set_special_state(state_value) \
196 do { \
197 unsigned long flags; /* may shadow */ \
198 raw_spin_lock_irqsave(&current->pi_lock, flags); \
199 current->state = (state_value); \
200 raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
201 } while (0)
202
163#endif 203#endif
164 204
165/* Task command name length: */ 205/* Task command name length: */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a7ce74c74e49..113d1ad1ced7 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void)
280{ 280{
281 spin_lock_irq(&current->sighand->siglock); 281 spin_lock_irq(&current->sighand->siglock);
282 if (current->jobctl & JOBCTL_STOP_DEQUEUED) 282 if (current->jobctl & JOBCTL_STOP_DEQUEUED)
283 __set_current_state(TASK_STOPPED); 283 set_special_state(TASK_STOPPED);
284 spin_unlock_irq(&current->sighand->siglock); 284 spin_unlock_irq(&current->sighand->siglock);
285 285
286 schedule(); 286 schedule();
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 6c6b3c48db71..1d8ca9ea9979 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/circ_buf.h> 15#include <linux/circ_buf.h>
16#include <linux/poll.h> 16#include <linux/poll.h>
17#include <linux/nospec.h>
17 18
18#include "internal.h" 19#include "internal.h"
19 20
@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
867 return NULL; 868 return NULL;
868 869
869 /* AUX space */ 870 /* AUX space */
870 if (pgoff >= rb->aux_pgoff) 871 if (pgoff >= rb->aux_pgoff) {
871 return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); 872 int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
873 return virt_to_page(rb->aux_pages[aux_pgoff]);
874 }
872 } 875 }
873 876
874 return __perf_mmap_to_page(rb, pgoff); 877 return __perf_mmap_to_page(rb, pgoff);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index cd50e99202b0..2017a39ab490 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
55 KTHREAD_IS_PER_CPU = 0, 55 KTHREAD_IS_PER_CPU = 0,
56 KTHREAD_SHOULD_STOP, 56 KTHREAD_SHOULD_STOP,
57 KTHREAD_SHOULD_PARK, 57 KTHREAD_SHOULD_PARK,
58 KTHREAD_IS_PARKED,
59}; 58};
60 59
61static inline void set_kthread_struct(void *kthread) 60static inline void set_kthread_struct(void *kthread)
@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)
177 176
178static void __kthread_parkme(struct kthread *self) 177static void __kthread_parkme(struct kthread *self)
179{ 178{
180 __set_current_state(TASK_PARKED); 179 for (;;) {
181 while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { 180 set_current_state(TASK_PARKED);
182 if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) 181 if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
183 complete(&self->parked); 182 break;
184 schedule(); 183 schedule();
185 __set_current_state(TASK_PARKED);
186 } 184 }
187 clear_bit(KTHREAD_IS_PARKED, &self->flags);
188 __set_current_state(TASK_RUNNING); 185 __set_current_state(TASK_RUNNING);
189} 186}
190 187
@@ -194,6 +191,11 @@ void kthread_parkme(void)
194} 191}
195EXPORT_SYMBOL_GPL(kthread_parkme); 192EXPORT_SYMBOL_GPL(kthread_parkme);
196 193
194void kthread_park_complete(struct task_struct *k)
195{
196 complete(&to_kthread(k)->parked);
197}
198
197static int kthread(void *_create) 199static int kthread(void *_create)
198{ 200{
199 /* Copy data: it's on kthread's stack */ 201 /* Copy data: it's on kthread's stack */
@@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
450{ 452{
451 struct kthread *kthread = to_kthread(k); 453 struct kthread *kthread = to_kthread(k);
452 454
453 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
454 /* 455 /*
455 * We clear the IS_PARKED bit here as we don't wait 456 * Newly created kthread was parked when the CPU was offline.
456 * until the task has left the park code. So if we'd 457 * The binding was lost and we need to set it again.
457 * park before that happens we'd see the IS_PARKED bit
458 * which might be about to be cleared.
459 */ 458 */
460 if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { 459 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
461 /* 460 __kthread_bind(k, kthread->cpu, TASK_PARKED);
462 * Newly created kthread was parked when the CPU was offline. 461
463 * The binding was lost and we need to set it again. 462 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
464 */ 463 wake_up_state(k, TASK_PARKED);
465 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
466 __kthread_bind(k, kthread->cpu, TASK_PARKED);
467 wake_up_state(k, TASK_PARKED);
468 }
469} 464}
470EXPORT_SYMBOL_GPL(kthread_unpark); 465EXPORT_SYMBOL_GPL(kthread_unpark);
471 466
@@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k)
488 if (WARN_ON(k->flags & PF_EXITING)) 483 if (WARN_ON(k->flags & PF_EXITING))
489 return -ENOSYS; 484 return -ENOSYS;
490 485
491 if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { 486 if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
492 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 487 return -EBUSY;
493 if (k != current) { 488
494 wake_up_process(k); 489 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
495 wait_for_completion(&kthread->parked); 490 if (k != current) {
496 } 491 wake_up_process(k);
492 wait_for_completion(&kthread->parked);
497 } 493 }
498 494
499 return 0; 495 return 0;
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 6be6c575b6cd..2d4ff5353ded 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
2/* 2/*
3 * Auto-group scheduling implementation: 3 * Auto-group scheduling implementation:
4 */ 4 */
5#include <linux/nospec.h>
5#include "sched.h" 6#include "sched.h"
6 7
7unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; 8unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
209 static unsigned long next = INITIAL_JIFFIES; 210 static unsigned long next = INITIAL_JIFFIES;
210 struct autogroup *ag; 211 struct autogroup *ag;
211 unsigned long shares; 212 unsigned long shares;
212 int err; 213 int err, idx;
213 214
214 if (nice < MIN_NICE || nice > MAX_NICE) 215 if (nice < MIN_NICE || nice > MAX_NICE)
215 return -EINVAL; 216 return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
227 228
228 next = HZ / 10 + jiffies; 229 next = HZ / 10 + jiffies;
229 ag = autogroup_task_get(p); 230 ag = autogroup_task_get(p);
230 shares = scale_load(sched_prio_to_weight[nice + 20]); 231
232 idx = array_index_nospec(nice + 20, 40);
233 shares = scale_load(sched_prio_to_weight[idx]);
231 234
232 down_write(&ag->lock); 235 down_write(&ag->lock);
233 err = sched_group_set_shares(ag->tg, shares); 236 err = sched_group_set_shares(ag->tg, shares);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e10aaeebfcc..092f7c4de903 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,9 @@
7 */ 7 */
8#include "sched.h" 8#include "sched.h"
9 9
10#include <linux/kthread.h>
11#include <linux/nospec.h>
12
10#include <asm/switch_to.h> 13#include <asm/switch_to.h>
11#include <asm/tlb.h> 14#include <asm/tlb.h>
12 15
@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2718 membarrier_mm_sync_core_before_usermode(mm); 2721 membarrier_mm_sync_core_before_usermode(mm);
2719 mmdrop(mm); 2722 mmdrop(mm);
2720 } 2723 }
2721 if (unlikely(prev_state == TASK_DEAD)) { 2724 if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
2722 if (prev->sched_class->task_dead) 2725 switch (prev_state) {
2723 prev->sched_class->task_dead(prev); 2726 case TASK_DEAD:
2727 if (prev->sched_class->task_dead)
2728 prev->sched_class->task_dead(prev);
2724 2729
2725 /* 2730 /*
2726 * Remove function-return probe instances associated with this 2731 * Remove function-return probe instances associated with this
2727 * task and put them back on the free list. 2732 * task and put them back on the free list.
2728 */ 2733 */
2729 kprobe_flush_task(prev); 2734 kprobe_flush_task(prev);
2735
2736 /* Task is done with its stack. */
2737 put_task_stack(prev);
2730 2738
2731 /* Task is done with its stack. */ 2739 put_task_struct(prev);
2732 put_task_stack(prev); 2740 break;
2733 2741
2734 put_task_struct(prev); 2742 case TASK_PARKED:
2743 kthread_park_complete(prev);
2744 break;
2745 }
2735 } 2746 }
2736 2747
2737 tick_nohz_task_switch(); 2748 tick_nohz_task_switch();
@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt)
3498 3509
3499void __noreturn do_task_dead(void) 3510void __noreturn do_task_dead(void)
3500{ 3511{
3501 /*
3502 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
3503 * when the following two conditions become true.
3504 * - There is race condition of mmap_sem (It is acquired by
3505 * exit_mm()), and
3506 * - SMI occurs before setting TASK_RUNINNG.
3507 * (or hypervisor of virtual machine switches to other guest)
3508 * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
3509 *
3510 * To avoid it, we have to wait for releasing tsk->pi_lock which
3511 * is held by try_to_wake_up()
3512 */
3513 raw_spin_lock_irq(&current->pi_lock);
3514 raw_spin_unlock_irq(&current->pi_lock);
3515
3516 /* Causes final put_task_struct in finish_task_switch(): */ 3512 /* Causes final put_task_struct in finish_task_switch(): */
3517 __set_current_state(TASK_DEAD); 3513 set_special_state(TASK_DEAD);
3518 3514
3519 /* Tell freezer to ignore us: */ 3515 /* Tell freezer to ignore us: */
3520 current->flags |= PF_NOFREEZE; 3516 current->flags |= PF_NOFREEZE;
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
6928 struct cftype *cft, s64 nice) 6924 struct cftype *cft, s64 nice)
6929{ 6925{
6930 unsigned long weight; 6926 unsigned long weight;
6927 int idx;
6931 6928
6932 if (nice < MIN_NICE || nice > MAX_NICE) 6929 if (nice < MIN_NICE || nice > MAX_NICE)
6933 return -ERANGE; 6930 return -ERANGE;
6934 6931
6935 weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; 6932 idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
6933 idx = array_index_nospec(idx, 40);
6934 weight = sched_prio_to_weight[idx];
6935
6936 return sched_group_set_shares(css_tg(css), scale_load(weight)); 6936 return sched_group_set_shares(css_tg(css), scale_load(weight));
6937} 6937}
6938#endif 6938#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f43627c6bb3d..79f574dba096 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9792,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
9792 if (curr_cost > this_rq->max_idle_balance_cost) 9792 if (curr_cost > this_rq->max_idle_balance_cost)
9793 this_rq->max_idle_balance_cost = curr_cost; 9793 this_rq->max_idle_balance_cost = curr_cost;
9794 9794
9795out:
9795 /* 9796 /*
9796 * While browsing the domains, we released the rq lock, a task could 9797 * While browsing the domains, we released the rq lock, a task could
9797 * have been enqueued in the meantime. Since we're not going idle, 9798 * have been enqueued in the meantime. Since we're not going idle,
@@ -9800,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
9800 if (this_rq->cfs.h_nr_running && !pulled_task) 9801 if (this_rq->cfs.h_nr_running && !pulled_task)
9801 pulled_task = 1; 9802 pulled_task = 1;
9802 9803
9803out:
9804 /* Move the next balance forward */ 9804 /* Move the next balance forward */
9805 if (time_after(this_rq->next_balance, next_balance)) 9805 if (time_after(this_rq->next_balance, next_balance))
9806 this_rq->next_balance = next_balance; 9806 this_rq->next_balance = next_balance;
diff --git a/kernel/signal.c b/kernel/signal.c
index d4ccea599692..9c33163a6165 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
1961 return; 1961 return;
1962 } 1962 }
1963 1963
1964 set_special_state(TASK_TRACED);
1965
1964 /* 1966 /*
1965 * We're committing to trapping. TRACED should be visible before 1967 * We're committing to trapping. TRACED should be visible before
1966 * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). 1968 * TRAPPING is cleared; otherwise, the tracer might fail do_wait().
1967 * Also, transition to TRACED and updates to ->jobctl should be 1969 * Also, transition to TRACED and updates to ->jobctl should be
1968 * atomic with respect to siglock and should be done after the arch 1970 * atomic with respect to siglock and should be done after the arch
1969 * hook as siglock is released and regrabbed across it. 1971 * hook as siglock is released and regrabbed across it.
1972 *
1973 * TRACER TRACEE
1974 *
1975 * ptrace_attach()
1976 * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED)
1977 * do_wait()
1978 * set_current_state() smp_wmb();
1979 * ptrace_do_wait()
1980 * wait_task_stopped()
1981 * task_stopped_code()
1982 * [L] task_is_traced() [S] task_clear_jobctl_trapping();
1970 */ 1983 */
1971 set_current_state(TASK_TRACED); 1984 smp_wmb();
1972 1985
1973 current->last_siginfo = info; 1986 current->last_siginfo = info;
1974 current->exit_code = exit_code; 1987 current->exit_code = exit_code;
@@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr)
2176 if (task_participate_group_stop(current)) 2189 if (task_participate_group_stop(current))
2177 notify = CLD_STOPPED; 2190 notify = CLD_STOPPED;
2178 2191
2179 __set_current_state(TASK_STOPPED); 2192 set_special_state(TASK_STOPPED);
2180 spin_unlock_irq(&current->sighand->siglock); 2193 spin_unlock_irq(&current->sighand->siglock);
2181 2194
2182 /* 2195 /*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b7591261652d..64c0291b579c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -21,6 +21,7 @@
21#include <linux/smpboot.h> 21#include <linux/smpboot.h>
22#include <linux/atomic.h> 22#include <linux/atomic.h>
23#include <linux/nmi.h> 23#include <linux/nmi.h>
24#include <linux/sched/wake_q.h>
24 25
25/* 26/*
26 * Structure to determine completion condition and record errors. May 27 * Structure to determine completion condition and record errors. May
@@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
65} 66}
66 67
67static void __cpu_stop_queue_work(struct cpu_stopper *stopper, 68static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
68 struct cpu_stop_work *work) 69 struct cpu_stop_work *work,
70 struct wake_q_head *wakeq)
69{ 71{
70 list_add_tail(&work->list, &stopper->works); 72 list_add_tail(&work->list, &stopper->works);
71 wake_up_process(stopper->thread); 73 wake_q_add(wakeq, stopper->thread);
72} 74}
73 75
74/* queue @work to @stopper. if offline, @work is completed immediately */ 76/* queue @work to @stopper. if offline, @work is completed immediately */
75static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) 77static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
76{ 78{
77 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 79 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
80 DEFINE_WAKE_Q(wakeq);
78 unsigned long flags; 81 unsigned long flags;
79 bool enabled; 82 bool enabled;
80 83
81 spin_lock_irqsave(&stopper->lock, flags); 84 spin_lock_irqsave(&stopper->lock, flags);
82 enabled = stopper->enabled; 85 enabled = stopper->enabled;
83 if (enabled) 86 if (enabled)
84 __cpu_stop_queue_work(stopper, work); 87 __cpu_stop_queue_work(stopper, work, &wakeq);
85 else if (work->done) 88 else if (work->done)
86 cpu_stop_signal_done(work->done); 89 cpu_stop_signal_done(work->done);
87 spin_unlock_irqrestore(&stopper->lock, flags); 90 spin_unlock_irqrestore(&stopper->lock, flags);
88 91
92 wake_up_q(&wakeq);
93
89 return enabled; 94 return enabled;
90} 95}
91 96
@@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
229{ 234{
230 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); 235 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
231 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); 236 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
237 DEFINE_WAKE_Q(wakeq);
232 int err; 238 int err;
233retry: 239retry:
234 spin_lock_irq(&stopper1->lock); 240 spin_lock_irq(&stopper1->lock);
@@ -252,8 +258,8 @@ retry:
252 goto unlock; 258 goto unlock;
253 259
254 err = 0; 260 err = 0;
255 __cpu_stop_queue_work(stopper1, work1); 261 __cpu_stop_queue_work(stopper1, work1, &wakeq);
256 __cpu_stop_queue_work(stopper2, work2); 262 __cpu_stop_queue_work(stopper2, work2, &wakeq);
257unlock: 263unlock:
258 spin_unlock(&stopper2->lock); 264 spin_unlock(&stopper2->lock);
259 spin_unlock_irq(&stopper1->lock); 265 spin_unlock_irq(&stopper1->lock);
@@ -263,6 +269,9 @@ unlock:
263 cpu_relax(); 269 cpu_relax();
264 goto retry; 270 goto retry;
265 } 271 }
272
273 wake_up_q(&wakeq);
274
266 return err; 275 return err;
267} 276}
268/** 277/**