diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-05-14 03:02:14 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-05-14 03:02:14 -0400 |
commit | dfd5c3ea641b1697333e5f6704e4e5dddfafe86b (patch) | |
tree | 5eab12757acaec0f7ff07a48f4b66140b78eb969 /kernel/sched | |
parent | 247f2f6f3c706b40b5f3886646f3eb53671258bf (diff) | |
parent | 67b8d5c7081221efa252e111cd52532ec6d4266f (diff) |
Merge tag 'v4.17-rc5' into sched/core, to pick up fixes and dependencies
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/autogroup.c | 7 | ||||
-rw-r--r-- | kernel/sched/core.c | 7 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 16 | ||||
-rw-r--r-- | kernel/sched/fair.c | 57 |
4 files changed, 14 insertions, 73 deletions
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 6be6c575b6cd..2d4ff5353ded 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c | |||
@@ -2,6 +2,7 @@ | |||
2 | /* | 2 | /* |
3 | * Auto-group scheduling implementation: | 3 | * Auto-group scheduling implementation: |
4 | */ | 4 | */ |
5 | #include <linux/nospec.h> | ||
5 | #include "sched.h" | 6 | #include "sched.h" |
6 | 7 | ||
7 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; | 8 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; |
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |||
209 | static unsigned long next = INITIAL_JIFFIES; | 210 | static unsigned long next = INITIAL_JIFFIES; |
210 | struct autogroup *ag; | 211 | struct autogroup *ag; |
211 | unsigned long shares; | 212 | unsigned long shares; |
212 | int err; | 213 | int err, idx; |
213 | 214 | ||
214 | if (nice < MIN_NICE || nice > MAX_NICE) | 215 | if (nice < MIN_NICE || nice > MAX_NICE) |
215 | return -EINVAL; | 216 | return -EINVAL; |
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) | |||
227 | 228 | ||
228 | next = HZ / 10 + jiffies; | 229 | next = HZ / 10 + jiffies; |
229 | ag = autogroup_task_get(p); | 230 | ag = autogroup_task_get(p); |
230 | shares = scale_load(sched_prio_to_weight[nice + 20]); | 231 | |
232 | idx = array_index_nospec(nice + 20, 40); | ||
233 | shares = scale_load(sched_prio_to_weight[idx]); | ||
231 | 234 | ||
232 | down_write(&ag->lock); | 235 | down_write(&ag->lock); |
233 | err = sched_group_set_shares(ag->tg, shares); | 236 | err = sched_group_set_shares(ag->tg, shares); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 71bdb86e07f9..4e0ebae045dc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include "sched.h" | 8 | #include "sched.h" |
9 | 9 | ||
10 | #include <linux/kthread.h> | 10 | #include <linux/kthread.h> |
11 | #include <linux/nospec.h> | ||
11 | 12 | ||
12 | #include <asm/switch_to.h> | 13 | #include <asm/switch_to.h> |
13 | #include <asm/tlb.h> | 14 | #include <asm/tlb.h> |
@@ -6926,11 +6927,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css, | |||
6926 | struct cftype *cft, s64 nice) | 6927 | struct cftype *cft, s64 nice) |
6927 | { | 6928 | { |
6928 | unsigned long weight; | 6929 | unsigned long weight; |
6930 | int idx; | ||
6929 | 6931 | ||
6930 | if (nice < MIN_NICE || nice > MAX_NICE) | 6932 | if (nice < MIN_NICE || nice > MAX_NICE) |
6931 | return -ERANGE; | 6933 | return -ERANGE; |
6932 | 6934 | ||
6933 | weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; | 6935 | idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO; |
6936 | idx = array_index_nospec(idx, 40); | ||
6937 | weight = sched_prio_to_weight[idx]; | ||
6938 | |||
6934 | return sched_group_set_shares(css_tg(css), scale_load(weight)); | 6939 | return sched_group_set_shares(css_tg(css), scale_load(weight)); |
6935 | } | 6940 | } |
6936 | #endif | 6941 | #endif |
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d2c6083304b4..e13df951aca7 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c | |||
@@ -305,7 +305,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, | |||
305 | * Do not reduce the frequency if the CPU has not been idle | 305 | * Do not reduce the frequency if the CPU has not been idle |
306 | * recently, as the reduction is likely to be premature then. | 306 | * recently, as the reduction is likely to be premature then. |
307 | */ | 307 | */ |
308 | if (busy && next_f < sg_policy->next_freq) { | 308 | if (busy && next_f < sg_policy->next_freq && |
309 | sg_policy->next_freq != UINT_MAX) { | ||
309 | next_f = sg_policy->next_freq; | 310 | next_f = sg_policy->next_freq; |
310 | 311 | ||
311 | /* Reset cached freq as next_freq has changed */ | 312 | /* Reset cached freq as next_freq has changed */ |
@@ -396,19 +397,6 @@ static void sugov_irq_work(struct irq_work *irq_work) | |||
396 | 397 | ||
397 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | 398 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); |
398 | 399 | ||
399 | /* | ||
400 | * For RT tasks, the schedutil governor shoots the frequency to maximum. | ||
401 | * Special care must be taken to ensure that this kthread doesn't result | ||
402 | * in the same behavior. | ||
403 | * | ||
404 | * This is (mostly) guaranteed by the work_in_progress flag. The flag is | ||
405 | * updated only at the end of the sugov_work() function and before that | ||
406 | * the schedutil governor rejects all other frequency scaling requests. | ||
407 | * | ||
408 | * There is a very rare case though, where the RT thread yields right | ||
409 | * after the work_in_progress flag is cleared. The effects of that are | ||
410 | * neglected for now. | ||
411 | */ | ||
412 | kthread_queue_work(&sg_policy->worker, &sg_policy->work); | 400 | kthread_queue_work(&sg_policy->worker, &sg_policy->work); |
413 | } | 401 | } |
414 | 402 | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1f6a23a5b451..43c7b45f20be 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p) | |||
1854 | static void numa_migrate_preferred(struct task_struct *p) | 1854 | static void numa_migrate_preferred(struct task_struct *p) |
1855 | { | 1855 | { |
1856 | unsigned long interval = HZ; | 1856 | unsigned long interval = HZ; |
1857 | unsigned long numa_migrate_retry; | ||
1858 | 1857 | ||
1859 | /* This task has no NUMA fault statistics yet */ | 1858 | /* This task has no NUMA fault statistics yet */ |
1860 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) | 1859 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) |
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p) | |||
1862 | 1861 | ||
1863 | /* Periodically retry migrating the task to the preferred node */ | 1862 | /* Periodically retry migrating the task to the preferred node */ |
1864 | interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); | 1863 | interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); |
1865 | numa_migrate_retry = jiffies + interval; | 1864 | p->numa_migrate_retry = jiffies + interval; |
1866 | |||
1867 | /* | ||
1868 | * Check that the new retry threshold is after the current one. If | ||
1869 | * the retry is in the future, it implies that wake_affine has | ||
1870 | * temporarily asked NUMA balancing to backoff from placement. | ||
1871 | */ | ||
1872 | if (numa_migrate_retry > p->numa_migrate_retry) | ||
1873 | return; | ||
1874 | |||
1875 | /* Safe to try placing the task on the preferred node */ | ||
1876 | p->numa_migrate_retry = numa_migrate_retry; | ||
1877 | 1865 | ||
1878 | /* Success if task is already running on preferred CPU */ | 1866 | /* Success if task is already running on preferred CPU */ |
1879 | if (task_node(p) == p->numa_preferred_nid) | 1867 | if (task_node(p) == p->numa_preferred_nid) |
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, | |||
5922 | return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; | 5910 | return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; |
5923 | } | 5911 | } |
5924 | 5912 | ||
5925 | #ifdef CONFIG_NUMA_BALANCING | ||
5926 | static void | ||
5927 | update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) | ||
5928 | { | ||
5929 | unsigned long interval; | ||
5930 | |||
5931 | if (!static_branch_likely(&sched_numa_balancing)) | ||
5932 | return; | ||
5933 | |||
5934 | /* If balancing has no preference then continue gathering data */ | ||
5935 | if (p->numa_preferred_nid == -1) | ||
5936 | return; | ||
5937 | |||
5938 | /* | ||
5939 | * If the wakeup is not affecting locality then it is neutral from | ||
5940 | * the perspective of NUMA balacing so continue gathering data. | ||
5941 | */ | ||
5942 | if (cpu_to_node(prev_cpu) == cpu_to_node(target)) | ||
5943 | return; | ||
5944 | |||
5945 | /* | ||
5946 | * Temporarily prevent NUMA balancing trying to place waker/wakee after | ||
5947 | * wakee has been moved by wake_affine. This will potentially allow | ||
5948 | * related tasks to converge and update their data placement. The | ||
5949 | * 4 * numa_scan_period is to allow the two-pass filter to migrate | ||
5950 | * hot data to the wakers node. | ||
5951 | */ | ||
5952 | interval = max(sysctl_numa_balancing_scan_delay, | ||
5953 | p->numa_scan_period << 2); | ||
5954 | p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); | ||
5955 | |||
5956 | interval = max(sysctl_numa_balancing_scan_delay, | ||
5957 | current->numa_scan_period << 2); | ||
5958 | current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); | ||
5959 | } | ||
5960 | #else | ||
5961 | static void | ||
5962 | update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) | ||
5963 | { | ||
5964 | } | ||
5965 | #endif | ||
5966 | |||
5967 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, | 5913 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, |
5968 | int this_cpu, int prev_cpu, int sync) | 5914 | int this_cpu, int prev_cpu, int sync) |
5969 | { | 5915 | { |
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, | |||
5979 | if (target == nr_cpumask_bits) | 5925 | if (target == nr_cpumask_bits) |
5980 | return prev_cpu; | 5926 | return prev_cpu; |
5981 | 5927 | ||
5982 | update_wa_numa_placement(p, prev_cpu, target); | ||
5983 | schedstat_inc(sd->ttwu_move_affine); | 5928 | schedstat_inc(sd->ttwu_move_affine); |
5984 | schedstat_inc(p->se.statistics.nr_wakeups_affine); | 5929 | schedstat_inc(p->se.statistics.nr_wakeups_affine); |
5985 | return target; | 5930 | return target; |