summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-05-14 03:02:14 -0400
committerIngo Molnar <mingo@kernel.org>2018-05-14 03:02:14 -0400
commitdfd5c3ea641b1697333e5f6704e4e5dddfafe86b (patch)
tree5eab12757acaec0f7ff07a48f4b66140b78eb969 /kernel/sched
parent247f2f6f3c706b40b5f3886646f3eb53671258bf (diff)
parent67b8d5c7081221efa252e111cd52532ec6d4266f (diff)
Merge tag 'v4.17-rc5' into sched/core, to pick up fixes and dependencies
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/autogroup.c7
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/sched/cpufreq_schedutil.c16
-rw-r--r--kernel/sched/fair.c57
4 files changed, 14 insertions, 73 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 6be6c575b6cd..2d4ff5353ded 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -2,6 +2,7 @@
2/* 2/*
3 * Auto-group scheduling implementation: 3 * Auto-group scheduling implementation:
4 */ 4 */
5#include <linux/nospec.h>
5#include "sched.h" 6#include "sched.h"
6 7
7unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; 8unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
209 static unsigned long next = INITIAL_JIFFIES; 210 static unsigned long next = INITIAL_JIFFIES;
210 struct autogroup *ag; 211 struct autogroup *ag;
211 unsigned long shares; 212 unsigned long shares;
212 int err; 213 int err, idx;
213 214
214 if (nice < MIN_NICE || nice > MAX_NICE) 215 if (nice < MIN_NICE || nice > MAX_NICE)
215 return -EINVAL; 216 return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
227 228
228 next = HZ / 10 + jiffies; 229 next = HZ / 10 + jiffies;
229 ag = autogroup_task_get(p); 230 ag = autogroup_task_get(p);
230 shares = scale_load(sched_prio_to_weight[nice + 20]); 231
232 idx = array_index_nospec(nice + 20, 40);
233 shares = scale_load(sched_prio_to_weight[idx]);
231 234
232 down_write(&ag->lock); 235 down_write(&ag->lock);
233 err = sched_group_set_shares(ag->tg, shares); 236 err = sched_group_set_shares(ag->tg, shares);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 71bdb86e07f9..4e0ebae045dc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8,6 +8,7 @@
8#include "sched.h" 8#include "sched.h"
9 9
10#include <linux/kthread.h> 10#include <linux/kthread.h>
11#include <linux/nospec.h>
11 12
12#include <asm/switch_to.h> 13#include <asm/switch_to.h>
13#include <asm/tlb.h> 14#include <asm/tlb.h>
@@ -6926,11 +6927,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
6926 struct cftype *cft, s64 nice) 6927 struct cftype *cft, s64 nice)
6927{ 6928{
6928 unsigned long weight; 6929 unsigned long weight;
6930 int idx;
6929 6931
6930 if (nice < MIN_NICE || nice > MAX_NICE) 6932 if (nice < MIN_NICE || nice > MAX_NICE)
6931 return -ERANGE; 6933 return -ERANGE;
6932 6934
6933 weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; 6935 idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
6936 idx = array_index_nospec(idx, 40);
6937 weight = sched_prio_to_weight[idx];
6938
6934 return sched_group_set_shares(css_tg(css), scale_load(weight)); 6939 return sched_group_set_shares(css_tg(css), scale_load(weight));
6935} 6940}
6936#endif 6941#endif
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index d2c6083304b4..e13df951aca7 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -305,7 +305,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
305 * Do not reduce the frequency if the CPU has not been idle 305 * Do not reduce the frequency if the CPU has not been idle
306 * recently, as the reduction is likely to be premature then. 306 * recently, as the reduction is likely to be premature then.
307 */ 307 */
308 if (busy && next_f < sg_policy->next_freq) { 308 if (busy && next_f < sg_policy->next_freq &&
309 sg_policy->next_freq != UINT_MAX) {
309 next_f = sg_policy->next_freq; 310 next_f = sg_policy->next_freq;
310 311
311 /* Reset cached freq as next_freq has changed */ 312 /* Reset cached freq as next_freq has changed */
@@ -396,19 +397,6 @@ static void sugov_irq_work(struct irq_work *irq_work)
396 397
397 sg_policy = container_of(irq_work, struct sugov_policy, irq_work); 398 sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
398 399
399 /*
400 * For RT tasks, the schedutil governor shoots the frequency to maximum.
401 * Special care must be taken to ensure that this kthread doesn't result
402 * in the same behavior.
403 *
404 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
405 * updated only at the end of the sugov_work() function and before that
406 * the schedutil governor rejects all other frequency scaling requests.
407 *
408 * There is a very rare case though, where the RT thread yields right
409 * after the work_in_progress flag is cleared. The effects of that are
410 * neglected for now.
411 */
412 kthread_queue_work(&sg_policy->worker, &sg_policy->work); 400 kthread_queue_work(&sg_policy->worker, &sg_policy->work);
413} 401}
414 402
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1f6a23a5b451..43c7b45f20be 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
1854static void numa_migrate_preferred(struct task_struct *p) 1854static void numa_migrate_preferred(struct task_struct *p)
1855{ 1855{
1856 unsigned long interval = HZ; 1856 unsigned long interval = HZ;
1857 unsigned long numa_migrate_retry;
1858 1857
1859 /* This task has no NUMA fault statistics yet */ 1858 /* This task has no NUMA fault statistics yet */
1860 if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) 1859 if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
1862 1861
1863 /* Periodically retry migrating the task to the preferred node */ 1862 /* Periodically retry migrating the task to the preferred node */
1864 interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); 1863 interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
1865 numa_migrate_retry = jiffies + interval; 1864 p->numa_migrate_retry = jiffies + interval;
1866
1867 /*
1868 * Check that the new retry threshold is after the current one. If
1869 * the retry is in the future, it implies that wake_affine has
1870 * temporarily asked NUMA balancing to backoff from placement.
1871 */
1872 if (numa_migrate_retry > p->numa_migrate_retry)
1873 return;
1874
1875 /* Safe to try placing the task on the preferred node */
1876 p->numa_migrate_retry = numa_migrate_retry;
1877 1865
1878 /* Success if task is already running on preferred CPU */ 1866 /* Success if task is already running on preferred CPU */
1879 if (task_node(p) == p->numa_preferred_nid) 1867 if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
5922 return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; 5910 return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
5923} 5911}
5924 5912
5925#ifdef CONFIG_NUMA_BALANCING
5926static void
5927update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
5928{
5929 unsigned long interval;
5930
5931 if (!static_branch_likely(&sched_numa_balancing))
5932 return;
5933
5934 /* If balancing has no preference then continue gathering data */
5935 if (p->numa_preferred_nid == -1)
5936 return;
5937
5938 /*
5939 * If the wakeup is not affecting locality then it is neutral from
5940 * the perspective of NUMA balacing so continue gathering data.
5941 */
5942 if (cpu_to_node(prev_cpu) == cpu_to_node(target))
5943 return;
5944
5945 /*
5946 * Temporarily prevent NUMA balancing trying to place waker/wakee after
5947 * wakee has been moved by wake_affine. This will potentially allow
5948 * related tasks to converge and update their data placement. The
5949 * 4 * numa_scan_period is to allow the two-pass filter to migrate
5950 * hot data to the wakers node.
5951 */
5952 interval = max(sysctl_numa_balancing_scan_delay,
5953 p->numa_scan_period << 2);
5954 p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
5955
5956 interval = max(sysctl_numa_balancing_scan_delay,
5957 current->numa_scan_period << 2);
5958 current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
5959}
5960#else
5961static void
5962update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
5963{
5964}
5965#endif
5966
5967static int wake_affine(struct sched_domain *sd, struct task_struct *p, 5913static int wake_affine(struct sched_domain *sd, struct task_struct *p,
5968 int this_cpu, int prev_cpu, int sync) 5914 int this_cpu, int prev_cpu, int sync)
5969{ 5915{
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
5979 if (target == nr_cpumask_bits) 5925 if (target == nr_cpumask_bits)
5980 return prev_cpu; 5926 return prev_cpu;
5981 5927
5982 update_wa_numa_placement(p, prev_cpu, target);
5983 schedstat_inc(sd->ttwu_move_affine); 5928 schedstat_inc(sd->ttwu_move_affine);
5984 schedstat_inc(p->se.statistics.nr_wakeups_affine); 5929 schedstat_inc(p->se.statistics.nr_wakeups_affine);
5985 return target; 5930 return target;