diff options
| -rw-r--r-- | arch/x86/entry/vdso/vdso32/vdso-fakesections.c | 1 | ||||
| -rw-r--r-- | arch/x86/events/core.c | 8 | ||||
| -rw-r--r-- | arch/x86/events/intel/cstate.c | 2 | ||||
| -rw-r--r-- | arch/x86/events/msr.c | 9 | ||||
| -rw-r--r-- | include/linux/kthread.h | 1 | ||||
| -rw-r--r-- | include/linux/sched.h | 50 | ||||
| -rw-r--r-- | include/linux/sched/signal.h | 2 | ||||
| -rw-r--r-- | kernel/events/ring_buffer.c | 7 | ||||
| -rw-r--r-- | kernel/kthread.c | 50 | ||||
| -rw-r--r-- | kernel/sched/autogroup.c | 7 | ||||
| -rw-r--r-- | kernel/sched/core.c | 56 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 2 | ||||
| -rw-r--r-- | kernel/signal.c | 17 | ||||
| -rw-r--r-- | kernel/stop_machine.c | 19 |
14 files changed, 153 insertions, 78 deletions
diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c deleted file mode 100644 index 541468e25265..000000000000 --- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c +++ /dev/null | |||
| @@ -1 +0,0 @@ | |||
| 1 | #include "../vdso-fakesections.c" | ||
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a6006e7bb729..45b2b1c93d04 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
| 28 | #include <linux/bitops.h> | 28 | #include <linux/bitops.h> |
| 29 | #include <linux/device.h> | 29 | #include <linux/device.h> |
| 30 | #include <linux/nospec.h> | ||
| 30 | 31 | ||
| 31 | #include <asm/apic.h> | 32 | #include <asm/apic.h> |
| 32 | #include <asm/stacktrace.h> | 33 | #include <asm/stacktrace.h> |
| @@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) | |||
| 304 | 305 | ||
| 305 | config = attr->config; | 306 | config = attr->config; |
| 306 | 307 | ||
| 307 | cache_type = (config >> 0) & 0xff; | 308 | cache_type = (config >> 0) & 0xff; |
| 308 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | 309 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) |
| 309 | return -EINVAL; | 310 | return -EINVAL; |
| 311 | cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); | ||
| 310 | 312 | ||
| 311 | cache_op = (config >> 8) & 0xff; | 313 | cache_op = (config >> 8) & 0xff; |
| 312 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | 314 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) |
| 313 | return -EINVAL; | 315 | return -EINVAL; |
| 316 | cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); | ||
| 314 | 317 | ||
| 315 | cache_result = (config >> 16) & 0xff; | 318 | cache_result = (config >> 16) & 0xff; |
| 316 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | 319 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) |
| 317 | return -EINVAL; | 320 | return -EINVAL; |
| 321 | cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); | ||
| 318 | 322 | ||
| 319 | val = hw_cache_event_ids[cache_type][cache_op][cache_result]; | 323 | val = hw_cache_event_ids[cache_type][cache_op][cache_result]; |
| 320 | 324 | ||
| @@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event) | |||
| 421 | if (attr->config >= x86_pmu.max_events) | 425 | if (attr->config >= x86_pmu.max_events) |
| 422 | return -EINVAL; | 426 | return -EINVAL; |
| 423 | 427 | ||
| 428 | attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); | ||
| 429 | |||
| 424 | /* | 430 | /* |
| 425 | * The generic map: | 431 | * The generic map: |
| 426 | */ | 432 | */ |
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9aca448bb8e6..9f8084f18d58 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c | |||
| @@ -92,6 +92,7 @@ | |||
| 92 | #include <linux/module.h> | 92 | #include <linux/module.h> |
| 93 | #include <linux/slab.h> | 93 | #include <linux/slab.h> |
| 94 | #include <linux/perf_event.h> | 94 | #include <linux/perf_event.h> |
| 95 | #include <linux/nospec.h> | ||
| 95 | #include <asm/cpu_device_id.h> | 96 | #include <asm/cpu_device_id.h> |
| 96 | #include <asm/intel-family.h> | 97 | #include <asm/intel-family.h> |
| 97 | #include "../perf_event.h" | 98 | #include "../perf_event.h" |
| @@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event) | |||
| 302 | } else if (event->pmu == &cstate_pkg_pmu) { | 303 | } else if (event->pmu == &cstate_pkg_pmu) { |
| 303 | if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) | 304 | if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) |
| 304 | return -EINVAL; | 305 | return -EINVAL; |
| 306 | cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); | ||
| 305 | if (!pkg_msr[cfg].attr) | 307 | if (!pkg_msr[cfg].attr) |
| 306 | return -EINVAL; | 308 | return -EINVAL; |
| 307 | event->hw.event_base = pkg_msr[cfg].msr; | 309 | event->hw.event_base = pkg_msr[cfg].msr; |
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index e7edf19e64c2..b4771a6ddbc1 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/perf_event.h> | 2 | #include <linux/perf_event.h> |
| 3 | #include <linux/nospec.h> | ||
| 3 | #include <asm/intel-family.h> | 4 | #include <asm/intel-family.h> |
| 4 | 5 | ||
| 5 | enum perf_msr_id { | 6 | enum perf_msr_id { |
| @@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event) | |||
| 158 | if (event->attr.type != event->pmu->type) | 159 | if (event->attr.type != event->pmu->type) |
| 159 | return -ENOENT; | 160 | return -ENOENT; |
| 160 | 161 | ||
| 161 | if (cfg >= PERF_MSR_EVENT_MAX) | ||
| 162 | return -EINVAL; | ||
| 163 | |||
| 164 | /* unsupported modes and filters */ | 162 | /* unsupported modes and filters */ |
| 165 | if (event->attr.exclude_user || | 163 | if (event->attr.exclude_user || |
| 166 | event->attr.exclude_kernel || | 164 | event->attr.exclude_kernel || |
| @@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event) | |||
| 171 | event->attr.sample_period) /* no sampling */ | 169 | event->attr.sample_period) /* no sampling */ |
| 172 | return -EINVAL; | 170 | return -EINVAL; |
| 173 | 171 | ||
| 172 | if (cfg >= PERF_MSR_EVENT_MAX) | ||
| 173 | return -EINVAL; | ||
| 174 | |||
| 175 | cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); | ||
| 176 | |||
| 174 | if (!msr[cfg].attr) | 177 | if (!msr[cfg].attr) |
| 175 | return -EINVAL; | 178 | return -EINVAL; |
| 176 | 179 | ||
diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c1961761311d..2803264c512f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h | |||
| @@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k); | |||
| 62 | int kthread_park(struct task_struct *k); | 62 | int kthread_park(struct task_struct *k); |
| 63 | void kthread_unpark(struct task_struct *k); | 63 | void kthread_unpark(struct task_struct *k); |
| 64 | void kthread_parkme(void); | 64 | void kthread_parkme(void); |
| 65 | void kthread_park_complete(struct task_struct *k); | ||
| 65 | 66 | ||
| 66 | int kthreadd(void *unused); | 67 | int kthreadd(void *unused); |
| 67 | extern struct task_struct *kthreadd_task; | 68 | extern struct task_struct *kthreadd_task; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index b3d697f3b573..c2413703f45d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -112,17 +112,36 @@ struct task_group; | |||
| 112 | 112 | ||
| 113 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | 113 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
| 114 | 114 | ||
| 115 | /* | ||
| 116 | * Special states are those that do not use the normal wait-loop pattern. See | ||
| 117 | * the comment with set_special_state(). | ||
| 118 | */ | ||
| 119 | #define is_special_task_state(state) \ | ||
| 120 | ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD)) | ||
| 121 | |||
| 115 | #define __set_current_state(state_value) \ | 122 | #define __set_current_state(state_value) \ |
| 116 | do { \ | 123 | do { \ |
| 124 | WARN_ON_ONCE(is_special_task_state(state_value));\ | ||
| 117 | current->task_state_change = _THIS_IP_; \ | 125 | current->task_state_change = _THIS_IP_; \ |
| 118 | current->state = (state_value); \ | 126 | current->state = (state_value); \ |
| 119 | } while (0) | 127 | } while (0) |
| 128 | |||
| 120 | #define set_current_state(state_value) \ | 129 | #define set_current_state(state_value) \ |
| 121 | do { \ | 130 | do { \ |
| 131 | WARN_ON_ONCE(is_special_task_state(state_value));\ | ||
| 122 | current->task_state_change = _THIS_IP_; \ | 132 | current->task_state_change = _THIS_IP_; \ |
| 123 | smp_store_mb(current->state, (state_value)); \ | 133 | smp_store_mb(current->state, (state_value)); \ |
| 124 | } while (0) | 134 | } while (0) |
| 125 | 135 | ||
| 136 | #define set_special_state(state_value) \ | ||
| 137 | do { \ | ||
| 138 | unsigned long flags; /* may shadow */ \ | ||
| 139 | WARN_ON_ONCE(!is_special_task_state(state_value)); \ | ||
| 140 | raw_spin_lock_irqsave(¤t->pi_lock, flags); \ | ||
| 141 | current->task_state_change = _THIS_IP_; \ | ||
| 142 | current->state = (state_value); \ | ||
| 143 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ | ||
| 144 | } while (0) | ||
| 126 | #else | 145 | #else |
| 127 | /* | 146 | /* |
| 128 | * set_current_state() includes a barrier so that the write of current->state | 147 | * set_current_state() includes a barrier so that the write of current->state |
| @@ -144,8 +163,8 @@ struct task_group; | |||
| 144 | * | 163 | * |
| 145 | * The above is typically ordered against the wakeup, which does: | 164 | * The above is typically ordered against the wakeup, which does: |
| 146 | * | 165 | * |
| 147 | * need_sleep = false; | 166 | * need_sleep = false; |
| 148 | * wake_up_state(p, TASK_UNINTERRUPTIBLE); | 167 | * wake_up_state(p, TASK_UNINTERRUPTIBLE); |
| 149 | * | 168 | * |
| 150 | * Where wake_up_state() (and all other wakeup primitives) imply enough | 169 | * Where wake_up_state() (and all other wakeup primitives) imply enough |
| 151 | * barriers to order the store of the variable against wakeup. | 170 | * barriers to order the store of the variable against wakeup. |
| @@ -154,12 +173,33 @@ struct task_group; | |||
| 154 | * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a | 173 | * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a |
| 155 | * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). | 174 | * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). |
| 156 | * | 175 | * |
| 157 | * This is obviously fine, since they both store the exact same value. | 176 | * However, with slightly different timing the wakeup TASK_RUNNING store can |
| 177 | * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not | ||
| 178 | * a problem either because that will result in one extra go around the loop | ||
| 179 | * and our @cond test will save the day. | ||
| 158 | * | 180 | * |
| 159 | * Also see the comments of try_to_wake_up(). | 181 | * Also see the comments of try_to_wake_up(). |
| 160 | */ | 182 | */ |
| 161 | #define __set_current_state(state_value) do { current->state = (state_value); } while (0) | 183 | #define __set_current_state(state_value) \ |
| 162 | #define set_current_state(state_value) smp_store_mb(current->state, (state_value)) | 184 | current->state = (state_value) |
| 185 | |||
| 186 | #define set_current_state(state_value) \ | ||
| 187 | smp_store_mb(current->state, (state_value)) | ||
| 188 | |||
| 189 | /* | ||
| 190 | * set_special_state() should be used for those states when the blocking task | ||
| 191 | * can not use the regular condition based wait-loop. In that case we must | ||
| 192 | * serialize against wakeups such that any possible in-flight TASK_RUNNING stores | ||
| 193 | * will not collide with our state change. | ||
| 194 | */ | ||
| 195 | #define set_special_state(state_value) \ | ||
| 196 | do { \ | ||
| 197 | unsigned long flags; /* may shadow */ \ | ||
| 198 | raw_spin_lock_irqsave(¤t->pi_lock, flags); \ | ||
| 199 | current->state = (state_value); \ | ||
| 200 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ | ||
| 201 | } while (0) | ||
| 202 | |||
| 163 | #endif | 203 | #endif |
| 164 | 204 | ||
| 165 | /* Task command name length: */ | 205 | /* Task command name length: */ |
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index a7ce74c74e49..113d1ad1ced7 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h | |||
| @@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void) | |||
| 280 | { | 280 | { |
| 281 | spin_lock_irq(¤t->sighand->siglock); | 281 | spin_lock_irq(¤t->sighand->siglock); |
| 282 | if (current->jobctl & JOBCTL_STOP_DEQUEUED) | 282 | if (current->jobctl & JOBCTL_STOP_DEQUEUED) |
| 283 | __set_current_state(TASK_STOPPED); | 283 | set_special_state(TASK_STOPPED); |
| 284 | spin_unlock_irq(¤t->sighand->siglock); | 284 | spin_unlock_irq(¤t->sighand->siglock); |
| 285 | 285 | ||
| 286 | schedule(); | 286 | schedule(); |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6c6b3c48db71..1d8ca9ea9979 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
| 15 | #include <linux/circ_buf.h> | 15 | #include <linux/circ_buf.h> |
| 16 | #include <linux/poll.h> | 16 | #include <linux/poll.h> |
| 17 | #include <linux/nospec.h> | ||
| 17 | 18 | ||
| 18 | #include "internal.h" | 19 | #include "internal.h" |
| 19 | 20 | ||
| @@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | |||
| 867 | return NULL; | 868 | return NULL; |
| 868 | 869 | ||
| 869 | /* AUX space */ | 870 | /* AUX space */ |
| 870 | if (pgoff >= rb->aux_pgoff) | 871 | if (pgoff >= rb->aux_pgoff) { |
| 871 | return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); | 872 | int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages); |
| 873 | return virt_to_page(rb->aux_pages[aux_pgoff]); | ||
| 874 | } | ||
| 872 | } | 875 | } |
| 873 | 876 | ||
| 874 | return __perf_mmap_to_page(rb, pgoff); | 877 | return __perf_mmap_to_page(rb, pgoff); |
diff --git a/kernel/kthread.c b/kernel/kthread.c index cd50e99202b0..2017a39ab490 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
| @@ -55,7 +55,6 @@ enum KTHREAD_BITS { | |||
| 55 | KTHREAD_IS_PER_CPU = 0, | 55 | KTHREAD_IS_PER_CPU = 0, |
| 56 | KTHREAD_SHOULD_STOP, | 56 | KTHREAD_SHOULD_STOP, |
| 57 | KTHREAD_SHOULD_PARK, | 57 | KTHREAD_SHOULD_PARK, |
| 58 | KTHREAD_IS_PARKED, | ||
| 59 | }; | 58 | }; |
| 60 | 59 | ||
| 61 | static inline void set_kthread_struct(void *kthread) | 60 | static inline void set_kthread_struct(void *kthread) |
| @@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task) | |||
| 177 | 176 | ||
| 178 | static void __kthread_parkme(struct kthread *self) | 177 | static void __kthread_parkme(struct kthread *self) |
| 179 | { | 178 | { |
| 180 | __set_current_state(TASK_PARKED); | 179 | for (;;) { |
| 181 | while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { | 180 | set_current_state(TASK_PARKED); |
| 182 | if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) | 181 | if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) |
| 183 | complete(&self->parked); | 182 | break; |
| 184 | schedule(); | 183 | schedule(); |
| 185 | __set_current_state(TASK_PARKED); | ||
| 186 | } | 184 | } |
| 187 | clear_bit(KTHREAD_IS_PARKED, &self->flags); | ||
| 188 | __set_current_state(TASK_RUNNING); | 185 | __set_current_state(TASK_RUNNING); |
| 189 | } | 186 | } |
| 190 | 187 | ||
| @@ -194,6 +191,11 @@ void kthread_parkme(void) | |||
| 194 | } | 191 | } |
| 195 | EXPORT_SYMBOL_GPL(kthread_parkme); | 192 | EXPORT_SYMBOL_GPL(kthread_parkme); |
| 196 | 193 | ||
| 194 | void kthread_park_complete(struct task_struct *k) | ||
| 195 | { | ||
| 196 | complete(&to_kthread(k)->parked); | ||
| 197 | } | ||
| 198 | |||
| 197 | static int kthread(void *_create) | 199 | static int kthread(void *_create) |
| 198 | { | 200 | { |
| 199 | /* Copy data: it's on kthread's stack */ | 201 | /* Copy data: it's on kthread's stack */ |
| @@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k) | |||
| 450 | { | 452 | { |
| 451 | struct kthread *kthread = to_kthread(k); | 453 | struct kthread *kthread = to_kthread(k); |
| 452 | 454 | ||
| 453 | clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); | ||
| 454 | /* | 455 | /* |
| 455 | * We clear the IS_PARKED bit here as we don't wait | 456 | * Newly created kthread was parked when the CPU was offline. |
| 456 | * until the task has left the park code. So if we'd | 457 | * The binding was lost and we need to set it again. |
| 457 | * park before that happens we'd see the IS_PARKED bit | ||
| 458 | * which might be about to be cleared. | ||
| 459 | */ | 458 | */ |
| 460 | if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { | 459 | if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) |
| 461 | /* | 460 | __kthread_bind(k, kthread->cpu, TASK_PARKED); |
| 462 | * Newly created kthread was parked when the CPU was offline. | 461 | |
| 463 | * The binding was lost and we need to set it again. | 462 | clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); |
| 464 | */ | 463 | wake_up_state(k, TASK_PARKED); |
| 465 | if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) | ||
| 466 | __kthread_bind(k, kthread->cpu, TASK_PARKED); | ||
| 467 | wake_up_state(k, TASK_PARKED); | ||
| 468 | } | ||
| 469 | } | 464 | } |
| 470 | EXPORT_SYMBOL_GPL(kthread_unpark); | 465 | EXPORT_SYMBOL_GPL(kthread_unpark); |
| 471 | 466 | ||
| @@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k) | |||
| 488 | if (WARN_ON(k->flags & PF_EXITING)) | 483 | if (WARN_ON(k->flags & PF_EXITING)) |
| 489 | return -ENOSYS; | 484 | return -ENOSYS; |
| 490 | 485 | ||
| 491 | if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { | 486 | if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))) |
| 492 | set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); | 487 | return -EBUSY; |
| 493 | if (k != current) { | 488 | |
| 494 | wake_up_process(k); | 489 | set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); |
| 495 | wait_for_completion(&kthread->parked); | 490 | if (k != current) { |
| 496 | } | 491 | wake_up_process(k); |
| 492 | wait_for_completion(&kthread->parked); | ||
| 497 | } | 493 | } |
| 498 | 494 | ||
| 499 | return 0; | 495 | return 0; |
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 6be6c575b6cd..2d4ff5353ded 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | /* | 2 | /* |
| 3 | * Auto-group scheduling implementation: | 3 | * Auto-group scheduling implementation: |
| 4 | */ | 4 | */ |
| 5 | #include <linux/nospec.h> | ||
| 5 | #include "sched.h" | 6 | #include "sched.h" |
| 6 | 7 | ||
| 7 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; | 8 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; |
| @@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |||
| 209 | static unsigned long next = INITIAL_JIFFIES; | 210 | static unsigned long next = INITIAL_JIFFIES; |
| 210 | struct autogroup *ag; | 211 | struct autogroup *ag; |
| 211 | unsigned long shares; | 212 | unsigned long shares; |
| 212 | int err; | 213 | int err, idx; |
| 213 | 214 | ||
| 214 | if (nice < MIN_NICE || nice > MAX_NICE) | 215 | if (nice < MIN_NICE || nice > MAX_NICE) |
| 215 | return -EINVAL; | 216 | return -EINVAL; |
| @@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |||
| 227 | 228 | ||
| 228 | next = HZ / 10 + jiffies; | 229 | next = HZ / 10 + jiffies; |
| 229 | ag = autogroup_task_get(p); | 230 | ag = autogroup_task_get(p); |
| 230 | shares = scale_load(sched_prio_to_weight[nice + 20]); | 231 | |
| 232 | idx = array_index_nospec(nice + 20, 40); | ||
| 233 | shares = scale_load(sched_prio_to_weight[idx]); | ||
| 231 | 234 | ||
| 232 | down_write(&ag->lock); | 235 | down_write(&ag->lock); |
| 233 | err = sched_group_set_shares(ag->tg, shares); | 236 | err = sched_group_set_shares(ag->tg, shares); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5e10aaeebfcc..092f7c4de903 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -7,6 +7,9 @@ | |||
| 7 | */ | 7 | */ |
| 8 | #include "sched.h" | 8 | #include "sched.h" |
| 9 | 9 | ||
| 10 | #include <linux/kthread.h> | ||
| 11 | #include <linux/nospec.h> | ||
| 12 | |||
| 10 | #include <asm/switch_to.h> | 13 | #include <asm/switch_to.h> |
| 11 | #include <asm/tlb.h> | 14 | #include <asm/tlb.h> |
| 12 | 15 | ||
| @@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |||
| 2718 | membarrier_mm_sync_core_before_usermode(mm); | 2721 | membarrier_mm_sync_core_before_usermode(mm); |
| 2719 | mmdrop(mm); | 2722 | mmdrop(mm); |
| 2720 | } | 2723 | } |
| 2721 | if (unlikely(prev_state == TASK_DEAD)) { | 2724 | if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) { |
| 2722 | if (prev->sched_class->task_dead) | 2725 | switch (prev_state) { |
| 2723 | prev->sched_class->task_dead(prev); | 2726 | case TASK_DEAD: |
| 2727 | if (prev->sched_class->task_dead) | ||
| 2728 | prev->sched_class->task_dead(prev); | ||
| 2724 | 2729 | ||
| 2725 | /* | 2730 | /* |
| 2726 | * Remove function-return probe instances associated with this | 2731 | * Remove function-return probe instances associated with this |
| 2727 | * task and put them back on the free list. | 2732 | * task and put them back on the free list. |
| 2728 | */ | 2733 | */ |
| 2729 | kprobe_flush_task(prev); | 2734 | kprobe_flush_task(prev); |
| 2735 | |||
| 2736 | /* Task is done with its stack. */ | ||
| 2737 | put_task_stack(prev); | ||
| 2730 | 2738 | ||
| 2731 | /* Task is done with its stack. */ | 2739 | put_task_struct(prev); |
| 2732 | put_task_stack(prev); | 2740 | break; |
| 2733 | 2741 | ||
| 2734 | put_task_struct(prev); | 2742 | case TASK_PARKED: |
| 2743 | kthread_park_complete(prev); | ||
| 2744 | break; | ||
| 2745 | } | ||
| 2735 | } | 2746 | } |
| 2736 | 2747 | ||
| 2737 | tick_nohz_task_switch(); | 2748 | tick_nohz_task_switch(); |
| @@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt) | |||
| 3498 | 3509 | ||
| 3499 | void __noreturn do_task_dead(void) | 3510 | void __noreturn do_task_dead(void) |
| 3500 | { | 3511 | { |
| 3501 | /* | ||
| 3502 | * The setting of TASK_RUNNING by try_to_wake_up() may be delayed | ||
| 3503 | * when the following two conditions become true. | ||
| 3504 | * - There is race condition of mmap_sem (It is acquired by | ||
| 3505 | * exit_mm()), and | ||
| 3506 | * - SMI occurs before setting TASK_RUNINNG. | ||
| 3507 | * (or hypervisor of virtual machine switches to other guest) | ||
| 3508 | * As a result, we may become TASK_RUNNING after becoming TASK_DEAD | ||
| 3509 | * | ||
| 3510 | * To avoid it, we have to wait for releasing tsk->pi_lock which | ||
| 3511 | * is held by try_to_wake_up() | ||
| 3512 | */ | ||
| 3513 | raw_spin_lock_irq(¤t->pi_lock); | ||
| 3514 | raw_spin_unlock_irq(¤t->pi_lock); | ||
| 3515 | |||
| 3516 | /* Causes final put_task_struct in finish_task_switch(): */ | 3512 | /* Causes final put_task_struct in finish_task_switch(): */ |
| 3517 | __set_current_state(TASK_DEAD); | 3513 | set_special_state(TASK_DEAD); |
| 3518 | 3514 | ||
| 3519 | /* Tell freezer to ignore us: */ | 3515 | /* Tell freezer to ignore us: */ |
| 3520 | current->flags |= PF_NOFREEZE; | 3516 | current->flags |= PF_NOFREEZE; |
| @@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css, | |||
| 6928 | struct cftype *cft, s64 nice) | 6924 | struct cftype *cft, s64 nice) |
| 6929 | { | 6925 | { |
| 6930 | unsigned long weight; | 6926 | unsigned long weight; |
| 6927 | int idx; | ||
| 6931 | 6928 | ||
| 6932 | if (nice < MIN_NICE || nice > MAX_NICE) | 6929 | if (nice < MIN_NICE || nice > MAX_NICE) |
| 6933 | return -ERANGE; | 6930 | return -ERANGE; |
| 6934 | 6931 | ||
| 6935 | weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; | 6932 | idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO; |
| 6933 | idx = array_index_nospec(idx, 40); | ||
| 6934 | weight = sched_prio_to_weight[idx]; | ||
| 6935 | |||
| 6936 | return sched_group_set_shares(css_tg(css), scale_load(weight)); | 6936 | return sched_group_set_shares(css_tg(css), scale_load(weight)); |
| 6937 | } | 6937 | } |
| 6938 | #endif | 6938 | #endif |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f43627c6bb3d..79f574dba096 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -9792,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) | |||
| 9792 | if (curr_cost > this_rq->max_idle_balance_cost) | 9792 | if (curr_cost > this_rq->max_idle_balance_cost) |
| 9793 | this_rq->max_idle_balance_cost = curr_cost; | 9793 | this_rq->max_idle_balance_cost = curr_cost; |
| 9794 | 9794 | ||
| 9795 | out: | ||
| 9795 | /* | 9796 | /* |
| 9796 | * While browsing the domains, we released the rq lock, a task could | 9797 | * While browsing the domains, we released the rq lock, a task could |
| 9797 | * have been enqueued in the meantime. Since we're not going idle, | 9798 | * have been enqueued in the meantime. Since we're not going idle, |
| @@ -9800,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) | |||
| 9800 | if (this_rq->cfs.h_nr_running && !pulled_task) | 9801 | if (this_rq->cfs.h_nr_running && !pulled_task) |
| 9801 | pulled_task = 1; | 9802 | pulled_task = 1; |
| 9802 | 9803 | ||
| 9803 | out: | ||
| 9804 | /* Move the next balance forward */ | 9804 | /* Move the next balance forward */ |
| 9805 | if (time_after(this_rq->next_balance, next_balance)) | 9805 | if (time_after(this_rq->next_balance, next_balance)) |
| 9806 | this_rq->next_balance = next_balance; | 9806 | this_rq->next_balance = next_balance; |
diff --git a/kernel/signal.c b/kernel/signal.c index d4ccea599692..9c33163a6165 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |||
| 1961 | return; | 1961 | return; |
| 1962 | } | 1962 | } |
| 1963 | 1963 | ||
| 1964 | set_special_state(TASK_TRACED); | ||
| 1965 | |||
| 1964 | /* | 1966 | /* |
| 1965 | * We're committing to trapping. TRACED should be visible before | 1967 | * We're committing to trapping. TRACED should be visible before |
| 1966 | * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). | 1968 | * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). |
| 1967 | * Also, transition to TRACED and updates to ->jobctl should be | 1969 | * Also, transition to TRACED and updates to ->jobctl should be |
| 1968 | * atomic with respect to siglock and should be done after the arch | 1970 | * atomic with respect to siglock and should be done after the arch |
| 1969 | * hook as siglock is released and regrabbed across it. | 1971 | * hook as siglock is released and regrabbed across it. |
| 1972 | * | ||
| 1973 | * TRACER TRACEE | ||
| 1974 | * | ||
| 1975 | * ptrace_attach() | ||
| 1976 | * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED) | ||
| 1977 | * do_wait() | ||
| 1978 | * set_current_state() smp_wmb(); | ||
| 1979 | * ptrace_do_wait() | ||
| 1980 | * wait_task_stopped() | ||
| 1981 | * task_stopped_code() | ||
| 1982 | * [L] task_is_traced() [S] task_clear_jobctl_trapping(); | ||
| 1970 | */ | 1983 | */ |
| 1971 | set_current_state(TASK_TRACED); | 1984 | smp_wmb(); |
| 1972 | 1985 | ||
| 1973 | current->last_siginfo = info; | 1986 | current->last_siginfo = info; |
| 1974 | current->exit_code = exit_code; | 1987 | current->exit_code = exit_code; |
| @@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr) | |||
| 2176 | if (task_participate_group_stop(current)) | 2189 | if (task_participate_group_stop(current)) |
| 2177 | notify = CLD_STOPPED; | 2190 | notify = CLD_STOPPED; |
| 2178 | 2191 | ||
| 2179 | __set_current_state(TASK_STOPPED); | 2192 | set_special_state(TASK_STOPPED); |
| 2180 | spin_unlock_irq(¤t->sighand->siglock); | 2193 | spin_unlock_irq(¤t->sighand->siglock); |
| 2181 | 2194 | ||
| 2182 | /* | 2195 | /* |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b7591261652d..64c0291b579c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/smpboot.h> | 21 | #include <linux/smpboot.h> |
| 22 | #include <linux/atomic.h> | 22 | #include <linux/atomic.h> |
| 23 | #include <linux/nmi.h> | 23 | #include <linux/nmi.h> |
| 24 | #include <linux/sched/wake_q.h> | ||
| 24 | 25 | ||
| 25 | /* | 26 | /* |
| 26 | * Structure to determine completion condition and record errors. May | 27 | * Structure to determine completion condition and record errors. May |
| @@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) | |||
| 65 | } | 66 | } |
| 66 | 67 | ||
| 67 | static void __cpu_stop_queue_work(struct cpu_stopper *stopper, | 68 | static void __cpu_stop_queue_work(struct cpu_stopper *stopper, |
| 68 | struct cpu_stop_work *work) | 69 | struct cpu_stop_work *work, |
| 70 | struct wake_q_head *wakeq) | ||
| 69 | { | 71 | { |
| 70 | list_add_tail(&work->list, &stopper->works); | 72 | list_add_tail(&work->list, &stopper->works); |
| 71 | wake_up_process(stopper->thread); | 73 | wake_q_add(wakeq, stopper->thread); |
| 72 | } | 74 | } |
| 73 | 75 | ||
| 74 | /* queue @work to @stopper. if offline, @work is completed immediately */ | 76 | /* queue @work to @stopper. if offline, @work is completed immediately */ |
| 75 | static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) | 77 | static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) |
| 76 | { | 78 | { |
| 77 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | 79 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); |
| 80 | DEFINE_WAKE_Q(wakeq); | ||
| 78 | unsigned long flags; | 81 | unsigned long flags; |
| 79 | bool enabled; | 82 | bool enabled; |
| 80 | 83 | ||
| 81 | spin_lock_irqsave(&stopper->lock, flags); | 84 | spin_lock_irqsave(&stopper->lock, flags); |
| 82 | enabled = stopper->enabled; | 85 | enabled = stopper->enabled; |
| 83 | if (enabled) | 86 | if (enabled) |
| 84 | __cpu_stop_queue_work(stopper, work); | 87 | __cpu_stop_queue_work(stopper, work, &wakeq); |
| 85 | else if (work->done) | 88 | else if (work->done) |
| 86 | cpu_stop_signal_done(work->done); | 89 | cpu_stop_signal_done(work->done); |
| 87 | spin_unlock_irqrestore(&stopper->lock, flags); | 90 | spin_unlock_irqrestore(&stopper->lock, flags); |
| 88 | 91 | ||
| 92 | wake_up_q(&wakeq); | ||
| 93 | |||
| 89 | return enabled; | 94 | return enabled; |
| 90 | } | 95 | } |
| 91 | 96 | ||
| @@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |||
| 229 | { | 234 | { |
| 230 | struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); | 235 | struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); |
| 231 | struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); | 236 | struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); |
| 237 | DEFINE_WAKE_Q(wakeq); | ||
| 232 | int err; | 238 | int err; |
| 233 | retry: | 239 | retry: |
| 234 | spin_lock_irq(&stopper1->lock); | 240 | spin_lock_irq(&stopper1->lock); |
| @@ -252,8 +258,8 @@ retry: | |||
| 252 | goto unlock; | 258 | goto unlock; |
| 253 | 259 | ||
| 254 | err = 0; | 260 | err = 0; |
| 255 | __cpu_stop_queue_work(stopper1, work1); | 261 | __cpu_stop_queue_work(stopper1, work1, &wakeq); |
| 256 | __cpu_stop_queue_work(stopper2, work2); | 262 | __cpu_stop_queue_work(stopper2, work2, &wakeq); |
| 257 | unlock: | 263 | unlock: |
| 258 | spin_unlock(&stopper2->lock); | 264 | spin_unlock(&stopper2->lock); |
| 259 | spin_unlock_irq(&stopper1->lock); | 265 | spin_unlock_irq(&stopper1->lock); |
| @@ -263,6 +269,9 @@ unlock: | |||
| 263 | cpu_relax(); | 269 | cpu_relax(); |
| 264 | goto retry; | 270 | goto retry; |
| 265 | } | 271 | } |
| 272 | |||
| 273 | wake_up_q(&wakeq); | ||
| 274 | |||
| 266 | return err; | 275 | return err; |
| 267 | } | 276 | } |
| 268 | /** | 277 | /** |
