Merge tag 'v4.17-rc5' into sched/core, to pick up fixes and dependencies

Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2018-05-14 03:02:14 -0400
committer: Ingo Molnar <mingo@kernel.org> 2018-05-14 03:02:14 -0400
commit: dfd5c3ea641b1697333e5f6704e4e5dddfafe86b (patch)
tree: 5eab12757acaec0f7ff07a48f4b66140b78eb969 /kernel/sched
parent: 247f2f6f3c706b40b5f3886646f3eb53671258bf (diff)
parent: 67b8d5c7081221efa252e111cd52532ec6d4266f (diff)
4 files changed, 14 insertions, 73 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 6be6c575b6cd..2d4ff5353ded 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
 /*
 * Auto-group scheduling implementation:
 */
+#include <linux/nospec.h>
 #include "sched.h"
 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
        static unsigned long next = INITIAL_JIFFIES;
        struct autogroup *ag;
        unsigned long shares;
-        int err;
+        int err, idx;
        if (nice < MIN_NICE || nice > MAX_NICE)
                return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
        next = HZ / 10 + jiffies;
        ag = autogroup_task_get(p);
-        shares = scale_load(sched_prio_to_weight[nice + 20]);
+        idx = array_index_nospec(nice + 20, 40);
+        shares = scale_load(sched_prio_to_weight[idx]);
        down_write(&ag->lock);
        err = sched_group_set_shares(ag->tg, shares);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 71bdb86e07f9..4e0ebae045dc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8,6 +8,7 @@
 #include "sched.h"
 #include <linux/kthread.h>
+#include <linux/nospec.h>
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -6926,11 +6927,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
                                     struct cftype *cft, s64 nice)
 {
        unsigned long weight;
+        int idx;
        if (nice < MIN_NICE || nice > MAX_NICE)
                return -ERANGE;
-        weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
+        idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
+        idx = array_index_nospec(idx, 40);
+        weight = sched_prio_to_weight[idx];
        return sched_group_set_shares(css_tg(css), scale_load(weight));
 }
 #endif
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d2c6083304b4..e13df951aca7 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -305,7 +305,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
         * Do not reduce the frequency if the CPU has not been idle
         * recently, as the reduction is likely to be premature then.
         */
-        if (busy && next_f < sg_policy->next_freq) {
+        if (busy && next_f < sg_policy->next_freq &&
+            sg_policy->next_freq != UINT_MAX) {
                next_f = sg_policy->next_freq;
                /* Reset cached freq as next_freq has changed */
@@ -396,19 +397,6 @@ static void sugov_irq_work(struct irq_work *irq_work)
        sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
-        /*
-         * For RT tasks, the schedutil governor shoots the frequency to maximum.
-         * Special care must be taken to ensure that this kthread doesn't result
-         * in the same behavior.
-         *
-         * This is (mostly) guaranteed by the work_in_progress flag. The flag is
-         * updated only at the end of the sugov_work() function and before that
-         * the schedutil governor rejects all other frequency scaling requests.
-         *
-         * There is a very rare case though, where the RT thread yields right
-         * after the work_in_progress flag is cleared. The effects of that are
-         * neglected for now.
-         */
        kthread_queue_work(&sg_policy->worker, &sg_policy->work);
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1f6a23a5b451..43c7b45f20be 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
 static void numa_migrate_preferred(struct task_struct *p)
 {
        unsigned long interval = HZ;
-        unsigned long numa_migrate_retry;
        /* This task has no NUMA fault statistics yet */
        if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
        /* Periodically retry migrating the task to the preferred node */
        interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
-        numa_migrate_retry = jiffies + interval;
+        p->numa_migrate_retry = jiffies + interval;
-        /*
-         * Check that the new retry threshold is after the current one. If
-         * the retry is in the future, it implies that wake_affine has
-         * temporarily asked NUMA balancing to backoff from placement.
-         */
-        if (numa_migrate_retry > p->numa_migrate_retry)
-                return;
-        /* Safe to try placing the task on the preferred node */
-        p->numa_migrate_retry = numa_migrate_retry;
        /* Success if task is already running on preferred CPU */
        if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
        return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
 }
-#ifdef CONFIG_NUMA_BALANCING
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
-        unsigned long interval;
-        if (!static_branch_likely(&sched_numa_balancing))
-                return;
-        /* If balancing has no preference then continue gathering data */
-        if (p->numa_preferred_nid == -1)
-                return;
-        /*
-         * If the wakeup is not affecting locality then it is neutral from
-         * the perspective of NUMA balacing so continue gathering data.
-         */
-        if (cpu_to_node(prev_cpu) == cpu_to_node(target))
-                return;
-        /*
-         * Temporarily prevent NUMA balancing trying to place waker/wakee after
-         * wakee has been moved by wake_affine. This will potentially allow
-         * related tasks to converge and update their data placement. The
-         * 4 * numa_scan_period is to allow the two-pass filter to migrate
-         * hot data to the wakers node.
-         */
-        interval = max(sysctl_numa_balancing_scan_delay,
-                         p->numa_scan_period << 2);
-        p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-        interval = max(sysctl_numa_balancing_scan_delay,
-                         current->numa_scan_period << 2);
-        current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
-}
-#else
-static void
-update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
-{
-}
-#endif
 static int wake_affine(struct sched_domain *sd, struct task_struct *p,
                       int this_cpu, int prev_cpu, int sync)
 {
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
        if (target == nr_cpumask_bits)
                return prev_cpu;
-        update_wa_numa_placement(p, prev_cpu, target);
        schedstat_inc(sd->ttwu_move_affine);
        schedstat_inc(p->se.statistics.nr_wakeups_affine);
        return target;
author	Ingo Molnar <mingo@kernel.org>	2018-05-14 03:02:14 -0400
committer	Ingo Molnar <mingo@kernel.org>	2018-05-14 03:02:14 -0400
commit	dfd5c3ea641b1697333e5f6704e4e5dddfafe86b (patch)
tree	5eab12757acaec0f7ff07a48f4b66140b78eb969 /kernel/sched
parent	247f2f6f3c706b40b5f3886646f3eb53671258bf (diff)
parent	67b8d5c7081221efa252e111cd52532ec6d4266f (diff)

diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 6be6c575b6cd..2d4ff5353ded 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
2	/*	2	/*
3	* Auto-group scheduling implementation:	3	* Auto-group scheduling implementation:
4	*/	4	*/
		5	#include <linux/nospec.h>
5	#include "sched.h"	6	#include "sched.h"
6		7
7	unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;	8	unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
209	static unsigned long next = INITIAL_JIFFIES;	210	static unsigned long next = INITIAL_JIFFIES;
210	struct autogroup *ag;	211	struct autogroup *ag;
211	unsigned long shares;	212	unsigned long shares;
212	int err;	213	int err, idx;
213		214
214	if (nice < MIN_NICE \|\| nice > MAX_NICE)	215	if (nice < MIN_NICE \|\| nice > MAX_NICE)
215	return -EINVAL;	216	return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
227		228
228	next = HZ / 10 + jiffies;	229	next = HZ / 10 + jiffies;
229	ag = autogroup_task_get(p);	230	ag = autogroup_task_get(p);
230	shares = scale_load(sched_prio_to_weight[nice + 20]);	231
		232	idx = array_index_nospec(nice + 20, 40);
		233	shares = scale_load(sched_prio_to_weight[idx]);
231		234
232	down_write(&ag->lock);	235	down_write(&ag->lock);
233	err = sched_group_set_shares(ag->tg, shares);	236	err = sched_group_set_shares(ag->tg, shares);


diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 71bdb86e07f9..4e0ebae045dc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -8,6 +8,7 @@
8	#include "sched.h"	8	#include "sched.h"
9		9
10	#include <linux/kthread.h>	10	#include <linux/kthread.h>
		11	#include <linux/nospec.h>
11		12
12	#include <asm/switch_to.h>	13	#include <asm/switch_to.h>
13	#include <asm/tlb.h>	14	#include <asm/tlb.h>
@@ -6926,11 +6927,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
6926	struct cftype *cft, s64 nice)	6927	struct cftype *cft, s64 nice)
6927	{	6928	{
6928	unsigned long weight;	6929	unsigned long weight;
		6930	int idx;
6929		6931
6930	if (nice < MIN_NICE \|\| nice > MAX_NICE)	6932	if (nice < MIN_NICE \|\| nice > MAX_NICE)
6931	return -ERANGE;	6933	return -ERANGE;
6932		6934
6933	weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];	6935	idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
		6936	idx = array_index_nospec(idx, 40);
		6937	weight = sched_prio_to_weight[idx];
		6938
6934	return sched_group_set_shares(css_tg(css), scale_load(weight));	6939	return sched_group_set_shares(css_tg(css), scale_load(weight));
6935	}	6940	}
6936	#endif	6941	#endif


diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d2c6083304b4..e13df951aca7 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c
@@ -305,7 +305,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
305	* Do not reduce the frequency if the CPU has not been idle	305	* Do not reduce the frequency if the CPU has not been idle
306	* recently, as the reduction is likely to be premature then.	306	* recently, as the reduction is likely to be premature then.
307	*/	307	*/
308	if (busy && next_f < sg_policy->next_freq) {	308	if (busy && next_f < sg_policy->next_freq &&
		309	sg_policy->next_freq != UINT_MAX) {
309	next_f = sg_policy->next_freq;	310	next_f = sg_policy->next_freq;
310		311
311	/* Reset cached freq as next_freq has changed */	312	/* Reset cached freq as next_freq has changed */
@@ -396,19 +397,6 @@ static void sugov_irq_work(struct irq_work *irq_work)
396		397
397	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);	398	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
398		399
399	/*
400	* For RT tasks, the schedutil governor shoots the frequency to maximum.
401	* Special care must be taken to ensure that this kthread doesn't result
402	* in the same behavior.
403	*
404	* This is (mostly) guaranteed by the work_in_progress flag. The flag is
405	* updated only at the end of the sugov_work() function and before that
406	* the schedutil governor rejects all other frequency scaling requests.
407	*
408	* There is a very rare case though, where the RT thread yields right
409	* after the work_in_progress flag is cleared. The effects of that are
410	* neglected for now.
411	*/
412	kthread_queue_work(&sg_policy->worker, &sg_policy->work);	400	kthread_queue_work(&sg_policy->worker, &sg_policy->work);
413	}	401	}
414		402


diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1f6a23a5b451..43c7b45f20be 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
1854	static void numa_migrate_preferred(struct task_struct *p)	1854	static void numa_migrate_preferred(struct task_struct *p)
1855	{	1855	{
1856	unsigned long interval = HZ;	1856	unsigned long interval = HZ;
1857	unsigned long numa_migrate_retry;
1858		1857
1859	/* This task has no NUMA fault statistics yet */	1858	/* This task has no NUMA fault statistics yet */
1860	if (unlikely(p->numa_preferred_nid == -1 \|\| !p->numa_faults))	1859	if (unlikely(p->numa_preferred_nid == -1 \|\| !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
1862		1861
1863	/* Periodically retry migrating the task to the preferred node */	1862	/* Periodically retry migrating the task to the preferred node */
1864	interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);	1863	interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
1865	numa_migrate_retry = jiffies + interval;	1864	p->numa_migrate_retry = jiffies + interval;
1866
1867	/*
1868	* Check that the new retry threshold is after the current one. If
1869	* the retry is in the future, it implies that wake_affine has
1870	* temporarily asked NUMA balancing to backoff from placement.
1871	*/
1872	if (numa_migrate_retry > p->numa_migrate_retry)
1873	return;
1874
1875	/* Safe to try placing the task on the preferred node */
1876	p->numa_migrate_retry = numa_migrate_retry;
1877		1865
1878	/* Success if task is already running on preferred CPU */	1866	/* Success if task is already running on preferred CPU */
1879	if (task_node(p) == p->numa_preferred_nid)	1867	if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain sd, struct task_struct p,
5922	return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;	5910	return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
5923	}	5911	}
5924		5912
5925	#ifdef CONFIG_NUMA_BALANCING
5926	static void
5927	update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
5928	{
5929	unsigned long interval;
5930
5931	if (!static_branch_likely(&sched_numa_balancing))
5932	return;
5933
5934	/* If balancing has no preference then continue gathering data */
5935	if (p->numa_preferred_nid == -1)
5936	return;
5937
5938	/*
5939	* If the wakeup is not affecting locality then it is neutral from
5940	* the perspective of NUMA balacing so continue gathering data.
5941	*/
5942	if (cpu_to_node(prev_cpu) == cpu_to_node(target))
5943	return;
5944
5945	/*
5946	* Temporarily prevent NUMA balancing trying to place waker/wakee after
5947	* wakee has been moved by wake_affine. This will potentially allow
5948	* related tasks to converge and update their data placement. The
5949	* 4 * numa_scan_period is to allow the two-pass filter to migrate
5950	* hot data to the wakers node.
5951	*/
5952	interval = max(sysctl_numa_balancing_scan_delay,
5953	p->numa_scan_period << 2);
5954	p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
5955
5956	interval = max(sysctl_numa_balancing_scan_delay,
5957	current->numa_scan_period << 2);
5958	current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
5959	}
5960	#else
5961	static void
5962	update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
5963	{
5964	}
5965	#endif
5966
5967	static int wake_affine(struct sched_domain sd, struct task_struct p,	5913	static int wake_affine(struct sched_domain sd, struct task_struct p,
5968	int this_cpu, int prev_cpu, int sync)	5914	int this_cpu, int prev_cpu, int sync)
5969	{	5915	{
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain sd, struct task_struct p,
5979	if (target == nr_cpumask_bits)	5925	if (target == nr_cpumask_bits)
5980	return prev_cpu;	5926	return prev_cpu;
5981		5927
5982	update_wa_numa_placement(p, prev_cpu, target);
5983	schedstat_inc(sd->ttwu_move_affine);	5928	schedstat_inc(sd->ttwu_move_affine);
5984	schedstat_inc(p->se.statistics.nr_wakeups_affine);	5929	schedstat_inc(p->se.statistics.nr_wakeups_affine);
5985	return target;	5930	return target;