diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-13 13:53:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-13 13:53:08 -0400 |
commit | 66e1c94db3cd4e094de66a6be68c3ab6d17e0c52 (patch) | |
tree | 920eecb13e08704407ce3aa9739699366b3ef130 /kernel/sched | |
parent | 86a4ac433b927a610c09aa6cfb1926d94a6b37b7 (diff) | |
parent | e0f6d1a526b6adfa9ca3b336b83ece0eed345033 (diff) |
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/pti updates from Thomas Gleixner:
"A mixed bag of fixes and updates for the ghosts which are hunting us.
The scheduler fixes have been pulled into that branch to avoid
conflicts.
- A set of fixes to address a khread_parkme() race which caused lost
wakeups and loss of state.
- A deadlock fix for stop_machine() solved by moving the wakeups
outside of the stopper_lock held region.
- A set of Spectre V1 array access restrictions. The possible
problematic spots were discuvered by Dan Carpenters new checks in
smatch.
- Removal of an unused file which was forgotten when the rest of that
functionality was removed"
* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/vdso: Remove unused file
perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr
perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver
perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map()
perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_*
perf/core: Fix possible Spectre-v1 indexing for ->aux_pages[]
sched/autogroup: Fix possible Spectre-v1 indexing for sched_prio_to_weight[]
sched/core: Fix possible Spectre-v1 indexing for sched_prio_to_weight[]
sched/core: Introduce set_special_state()
kthread, sched/wait: Fix kthread_parkme() completion issue
kthread, sched/wait: Fix kthread_parkme() wait-loop
sched/fair: Fix the update of blocked load when newly idle
stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/autogroup.c | 7 | ||||
-rw-r--r-- | kernel/sched/core.c | 56 | ||||
-rw-r--r-- | kernel/sched/fair.c | 2 |
3 files changed, 34 insertions, 31 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 6be6c575b6cd..2d4ff5353ded 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c | |||
@@ -2,6 +2,7 @@ | |||
2 | /* | 2 | /* |
3 | * Auto-group scheduling implementation: | 3 | * Auto-group scheduling implementation: |
4 | */ | 4 | */ |
5 | #include <linux/nospec.h> | ||
5 | #include "sched.h" | 6 | #include "sched.h" |
6 | 7 | ||
7 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; | 8 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; |
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |||
209 | static unsigned long next = INITIAL_JIFFIES; | 210 | static unsigned long next = INITIAL_JIFFIES; |
210 | struct autogroup *ag; | 211 | struct autogroup *ag; |
211 | unsigned long shares; | 212 | unsigned long shares; |
212 | int err; | 213 | int err, idx; |
213 | 214 | ||
214 | if (nice < MIN_NICE || nice > MAX_NICE) | 215 | if (nice < MIN_NICE || nice > MAX_NICE) |
215 | return -EINVAL; | 216 | return -EINVAL; |
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |||
227 | 228 | ||
228 | next = HZ / 10 + jiffies; | 229 | next = HZ / 10 + jiffies; |
229 | ag = autogroup_task_get(p); | 230 | ag = autogroup_task_get(p); |
230 | shares = scale_load(sched_prio_to_weight[nice + 20]); | 231 | |
232 | idx = array_index_nospec(nice + 20, 40); | ||
233 | shares = scale_load(sched_prio_to_weight[idx]); | ||
231 | 234 | ||
232 | down_write(&ag->lock); | 235 | down_write(&ag->lock); |
233 | err = sched_group_set_shares(ag->tg, shares); | 236 | err = sched_group_set_shares(ag->tg, shares); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5e10aaeebfcc..092f7c4de903 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -7,6 +7,9 @@ | |||
7 | */ | 7 | */ |
8 | #include "sched.h" | 8 | #include "sched.h" |
9 | 9 | ||
10 | #include <linux/kthread.h> | ||
11 | #include <linux/nospec.h> | ||
12 | |||
10 | #include <asm/switch_to.h> | 13 | #include <asm/switch_to.h> |
11 | #include <asm/tlb.h> | 14 | #include <asm/tlb.h> |
12 | 15 | ||
@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |||
2718 | membarrier_mm_sync_core_before_usermode(mm); | 2721 | membarrier_mm_sync_core_before_usermode(mm); |
2719 | mmdrop(mm); | 2722 | mmdrop(mm); |
2720 | } | 2723 | } |
2721 | if (unlikely(prev_state == TASK_DEAD)) { | 2724 | if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) { |
2722 | if (prev->sched_class->task_dead) | 2725 | switch (prev_state) { |
2723 | prev->sched_class->task_dead(prev); | 2726 | case TASK_DEAD: |
2727 | if (prev->sched_class->task_dead) | ||
2728 | prev->sched_class->task_dead(prev); | ||
2724 | 2729 | ||
2725 | /* | 2730 | /* |
2726 | * Remove function-return probe instances associated with this | 2731 | * Remove function-return probe instances associated with this |
2727 | * task and put them back on the free list. | 2732 | * task and put them back on the free list. |
2728 | */ | 2733 | */ |
2729 | kprobe_flush_task(prev); | 2734 | kprobe_flush_task(prev); |
2735 | |||
2736 | /* Task is done with its stack. */ | ||
2737 | put_task_stack(prev); | ||
2730 | 2738 | ||
2731 | /* Task is done with its stack. */ | 2739 | put_task_struct(prev); |
2732 | put_task_stack(prev); | 2740 | break; |
2733 | 2741 | ||
2734 | put_task_struct(prev); | 2742 | case TASK_PARKED: |
2743 | kthread_park_complete(prev); | ||
2744 | break; | ||
2745 | } | ||
2735 | } | 2746 | } |
2736 | 2747 | ||
2737 | tick_nohz_task_switch(); | 2748 | tick_nohz_task_switch(); |
@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt) | |||
3498 | 3509 | ||
3499 | void __noreturn do_task_dead(void) | 3510 | void __noreturn do_task_dead(void) |
3500 | { | 3511 | { |
3501 | /* | ||
3502 | * The setting of TASK_RUNNING by try_to_wake_up() may be delayed | ||
3503 | * when the following two conditions become true. | ||
3504 | * - There is race condition of mmap_sem (It is acquired by | ||
3505 | * exit_mm()), and | ||
3506 | * - SMI occurs before setting TASK_RUNINNG. | ||
3507 | * (or hypervisor of virtual machine switches to other guest) | ||
3508 | * As a result, we may become TASK_RUNNING after becoming TASK_DEAD | ||
3509 | * | ||
3510 | * To avoid it, we have to wait for releasing tsk->pi_lock which | ||
3511 | * is held by try_to_wake_up() | ||
3512 | */ | ||
3513 | raw_spin_lock_irq(¤t->pi_lock); | ||
3514 | raw_spin_unlock_irq(¤t->pi_lock); | ||
3515 | |||
3516 | /* Causes final put_task_struct in finish_task_switch(): */ | 3512 | /* Causes final put_task_struct in finish_task_switch(): */ |
3517 | __set_current_state(TASK_DEAD); | 3513 | set_special_state(TASK_DEAD); |
3518 | 3514 | ||
3519 | /* Tell freezer to ignore us: */ | 3515 | /* Tell freezer to ignore us: */ |
3520 | current->flags |= PF_NOFREEZE; | 3516 | current->flags |= PF_NOFREEZE; |
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css, | |||
6928 | struct cftype *cft, s64 nice) | 6924 | struct cftype *cft, s64 nice) |
6929 | { | 6925 | { |
6930 | unsigned long weight; | 6926 | unsigned long weight; |
6927 | int idx; | ||
6931 | 6928 | ||
6932 | if (nice < MIN_NICE || nice > MAX_NICE) | 6929 | if (nice < MIN_NICE || nice > MAX_NICE) |
6933 | return -ERANGE; | 6930 | return -ERANGE; |
6934 | 6931 | ||
6935 | weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; | 6932 | idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO; |
6933 | idx = array_index_nospec(idx, 40); | ||
6934 | weight = sched_prio_to_weight[idx]; | ||
6935 | |||
6936 | return sched_group_set_shares(css_tg(css), scale_load(weight)); | 6936 | return sched_group_set_shares(css_tg(css), scale_load(weight)); |
6937 | } | 6937 | } |
6938 | #endif | 6938 | #endif |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f43627c6bb3d..79f574dba096 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -9792,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) | |||
9792 | if (curr_cost > this_rq->max_idle_balance_cost) | 9792 | if (curr_cost > this_rq->max_idle_balance_cost) |
9793 | this_rq->max_idle_balance_cost = curr_cost; | 9793 | this_rq->max_idle_balance_cost = curr_cost; |
9794 | 9794 | ||
9795 | out: | ||
9795 | /* | 9796 | /* |
9796 | * While browsing the domains, we released the rq lock, a task could | 9797 | * While browsing the domains, we released the rq lock, a task could |
9797 | * have been enqueued in the meantime. Since we're not going idle, | 9798 | * have been enqueued in the meantime. Since we're not going idle, |
@@ -9800,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) | |||
9800 | if (this_rq->cfs.h_nr_running && !pulled_task) | 9801 | if (this_rq->cfs.h_nr_running && !pulled_task) |
9801 | pulled_task = 1; | 9802 | pulled_task = 1; |
9802 | 9803 | ||
9803 | out: | ||
9804 | /* Move the next balance forward */ | 9804 | /* Move the next balance forward */ |
9805 | if (time_after(this_rq->next_balance, next_balance)) | 9805 | if (time_after(this_rq->next_balance, next_balance)) |
9806 | this_rq->next_balance = next_balance; | 9806 | this_rq->next_balance = next_balance; |