summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-05-13 13:53:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-05-13 13:53:08 -0400
commit66e1c94db3cd4e094de66a6be68c3ab6d17e0c52 (patch)
tree920eecb13e08704407ce3aa9739699366b3ef130 /kernel/sched
parent86a4ac433b927a610c09aa6cfb1926d94a6b37b7 (diff)
parente0f6d1a526b6adfa9ca3b336b83ece0eed345033 (diff)
Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/pti updates from Thomas Gleixner: "A mixed bag of fixes and updates for the ghosts which are hunting us. The scheduler fixes have been pulled into that branch to avoid conflicts. - A set of fixes to address a khread_parkme() race which caused lost wakeups and loss of state. - A deadlock fix for stop_machine() solved by moving the wakeups outside of the stopper_lock held region. - A set of Spectre V1 array access restrictions. The possible problematic spots were discuvered by Dan Carpenters new checks in smatch. - Removal of an unused file which was forgotten when the rest of that functionality was removed" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/vdso: Remove unused file perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map() perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_* perf/core: Fix possible Spectre-v1 indexing for ->aux_pages[] sched/autogroup: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] sched/core: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] sched/core: Introduce set_special_state() kthread, sched/wait: Fix kthread_parkme() completion issue kthread, sched/wait: Fix kthread_parkme() wait-loop sched/fair: Fix the update of blocked load when newly idle stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/autogroup.c7
-rw-r--r--kernel/sched/core.c56
-rw-r--r--kernel/sched/fair.c2
3 files changed, 34 insertions, 31 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 6be6c575b6cd..2d4ff5353ded 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
2/* 2/*
3 * Auto-group scheduling implementation: 3 * Auto-group scheduling implementation:
4 */ 4 */
5#include <linux/nospec.h>
5#include "sched.h" 6#include "sched.h"
6 7
7unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; 8unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
209 static unsigned long next = INITIAL_JIFFIES; 210 static unsigned long next = INITIAL_JIFFIES;
210 struct autogroup *ag; 211 struct autogroup *ag;
211 unsigned long shares; 212 unsigned long shares;
212 int err; 213 int err, idx;
213 214
214 if (nice < MIN_NICE || nice > MAX_NICE) 215 if (nice < MIN_NICE || nice > MAX_NICE)
215 return -EINVAL; 216 return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
227 228
228 next = HZ / 10 + jiffies; 229 next = HZ / 10 + jiffies;
229 ag = autogroup_task_get(p); 230 ag = autogroup_task_get(p);
230 shares = scale_load(sched_prio_to_weight[nice + 20]); 231
232 idx = array_index_nospec(nice + 20, 40);
233 shares = scale_load(sched_prio_to_weight[idx]);
231 234
232 down_write(&ag->lock); 235 down_write(&ag->lock);
233 err = sched_group_set_shares(ag->tg, shares); 236 err = sched_group_set_shares(ag->tg, shares);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e10aaeebfcc..092f7c4de903 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,9 @@
7 */ 7 */
8#include "sched.h" 8#include "sched.h"
9 9
10#include <linux/kthread.h>
11#include <linux/nospec.h>
12
10#include <asm/switch_to.h> 13#include <asm/switch_to.h>
11#include <asm/tlb.h> 14#include <asm/tlb.h>
12 15
@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2718 membarrier_mm_sync_core_before_usermode(mm); 2721 membarrier_mm_sync_core_before_usermode(mm);
2719 mmdrop(mm); 2722 mmdrop(mm);
2720 } 2723 }
2721 if (unlikely(prev_state == TASK_DEAD)) { 2724 if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
2722 if (prev->sched_class->task_dead) 2725 switch (prev_state) {
2723 prev->sched_class->task_dead(prev); 2726 case TASK_DEAD:
2727 if (prev->sched_class->task_dead)
2728 prev->sched_class->task_dead(prev);
2724 2729
2725 /* 2730 /*
2726 * Remove function-return probe instances associated with this 2731 * Remove function-return probe instances associated with this
2727 * task and put them back on the free list. 2732 * task and put them back on the free list.
2728 */ 2733 */
2729 kprobe_flush_task(prev); 2734 kprobe_flush_task(prev);
2735
2736 /* Task is done with its stack. */
2737 put_task_stack(prev);
2730 2738
2731 /* Task is done with its stack. */ 2739 put_task_struct(prev);
2732 put_task_stack(prev); 2740 break;
2733 2741
2734 put_task_struct(prev); 2742 case TASK_PARKED:
2743 kthread_park_complete(prev);
2744 break;
2745 }
2735 } 2746 }
2736 2747
2737 tick_nohz_task_switch(); 2748 tick_nohz_task_switch();
@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt)
3498 3509
3499void __noreturn do_task_dead(void) 3510void __noreturn do_task_dead(void)
3500{ 3511{
3501 /*
3502 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
3503 * when the following two conditions become true.
3504 * - There is race condition of mmap_sem (It is acquired by
3505 * exit_mm()), and
3506 * - SMI occurs before setting TASK_RUNINNG.
3507 * (or hypervisor of virtual machine switches to other guest)
3508 * As a result, we may become TASK_RUNNING after becoming TASK_DEAD
3509 *
3510 * To avoid it, we have to wait for releasing tsk->pi_lock which
3511 * is held by try_to_wake_up()
3512 */
3513 raw_spin_lock_irq(&current->pi_lock);
3514 raw_spin_unlock_irq(&current->pi_lock);
3515
3516 /* Causes final put_task_struct in finish_task_switch(): */ 3512 /* Causes final put_task_struct in finish_task_switch(): */
3517 __set_current_state(TASK_DEAD); 3513 set_special_state(TASK_DEAD);
3518 3514
3519 /* Tell freezer to ignore us: */ 3515 /* Tell freezer to ignore us: */
3520 current->flags |= PF_NOFREEZE; 3516 current->flags |= PF_NOFREEZE;
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
6928 struct cftype *cft, s64 nice) 6924 struct cftype *cft, s64 nice)
6929{ 6925{
6930 unsigned long weight; 6926 unsigned long weight;
6927 int idx;
6931 6928
6932 if (nice < MIN_NICE || nice > MAX_NICE) 6929 if (nice < MIN_NICE || nice > MAX_NICE)
6933 return -ERANGE; 6930 return -ERANGE;
6934 6931
6935 weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; 6932 idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
6933 idx = array_index_nospec(idx, 40);
6934 weight = sched_prio_to_weight[idx];
6935
6936 return sched_group_set_shares(css_tg(css), scale_load(weight)); 6936 return sched_group_set_shares(css_tg(css), scale_load(weight));
6937} 6937}
6938#endif 6938#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f43627c6bb3d..79f574dba096 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9792,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
9792 if (curr_cost > this_rq->max_idle_balance_cost) 9792 if (curr_cost > this_rq->max_idle_balance_cost)
9793 this_rq->max_idle_balance_cost = curr_cost; 9793 this_rq->max_idle_balance_cost = curr_cost;
9794 9794
9795out:
9795 /* 9796 /*
9796 * While browsing the domains, we released the rq lock, a task could 9797 * While browsing the domains, we released the rq lock, a task could
9797 * have been enqueued in the meantime. Since we're not going idle, 9798 * have been enqueued in the meantime. Since we're not going idle,
@@ -9800,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
9800 if (this_rq->cfs.h_nr_running && !pulled_task) 9801 if (this_rq->cfs.h_nr_running && !pulled_task)
9801 pulled_task = 1; 9802 pulled_task = 1;
9802 9803
9803out:
9804 /* Move the next balance forward */ 9804 /* Move the next balance forward */
9805 if (time_after(this_rq->next_balance, next_balance)) 9805 if (time_after(this_rq->next_balance, next_balance))
9806 this_rq->next_balance = next_balance; 9806 this_rq->next_balance = next_balance;