aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-05-13 13:46:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-05-13 13:46:53 -0400
commit86a4ac433b927a610c09aa6cfb1926d94a6b37b7 (patch)
tree30382aac1c4d9ce4612f9398f1e9237ab28cfcfb
parentbaeda7131f54e71e916c43d7a88cb68fcace37da (diff)
parent789ba28013ce23dbf5e9f5f014f4233b35523bf3 (diff)
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fix from Thomas Gleixner: "Revert the new NUMA aware placement approach which turned out to create more problems than it solved" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: Revert "sched/numa: Delay retrying placement for automatic NUMA balance after wake_affine()"
-rw-r--r--kernel/sched/fair.c57
1 files changed, 1 insertions, 56 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 54dc31e7ab9b..f43627c6bb3d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
1854static void numa_migrate_preferred(struct task_struct *p) 1854static void numa_migrate_preferred(struct task_struct *p)
1855{ 1855{
1856 unsigned long interval = HZ; 1856 unsigned long interval = HZ;
1857 unsigned long numa_migrate_retry;
1858 1857
1859 /* This task has no NUMA fault statistics yet */ 1858 /* This task has no NUMA fault statistics yet */
1860 if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) 1859 if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
1862 1861
1863 /* Periodically retry migrating the task to the preferred node */ 1862 /* Periodically retry migrating the task to the preferred node */
1864 interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); 1863 interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
1865 numa_migrate_retry = jiffies + interval; 1864 p->numa_migrate_retry = jiffies + interval;
1866
1867 /*
1868 * Check that the new retry threshold is after the current one. If
1869 * the retry is in the future, it implies that wake_affine has
1870 * temporarily asked NUMA balancing to backoff from placement.
1871 */
1872 if (numa_migrate_retry > p->numa_migrate_retry)
1873 return;
1874
1875 /* Safe to try placing the task on the preferred node */
1876 p->numa_migrate_retry = numa_migrate_retry;
1877 1865
1878 /* Success if task is already running on preferred CPU */ 1866 /* Success if task is already running on preferred CPU */
1879 if (task_node(p) == p->numa_preferred_nid) 1867 if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
5922 return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; 5910 return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
5923} 5911}
5924 5912
5925#ifdef CONFIG_NUMA_BALANCING
5926static void
5927update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
5928{
5929 unsigned long interval;
5930
5931 if (!static_branch_likely(&sched_numa_balancing))
5932 return;
5933
5934 /* If balancing has no preference then continue gathering data */
5935 if (p->numa_preferred_nid == -1)
5936 return;
5937
5938 /*
5939 * If the wakeup is not affecting locality then it is neutral from
5940 * the perspective of NUMA balacing so continue gathering data.
5941 */
5942 if (cpu_to_node(prev_cpu) == cpu_to_node(target))
5943 return;
5944
5945 /*
5946 * Temporarily prevent NUMA balancing trying to place waker/wakee after
5947 * wakee has been moved by wake_affine. This will potentially allow
5948 * related tasks to converge and update their data placement. The
5949 * 4 * numa_scan_period is to allow the two-pass filter to migrate
5950 * hot data to the wakers node.
5951 */
5952 interval = max(sysctl_numa_balancing_scan_delay,
5953 p->numa_scan_period << 2);
5954 p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
5955
5956 interval = max(sysctl_numa_balancing_scan_delay,
5957 current->numa_scan_period << 2);
5958 current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
5959}
5960#else
5961static void
5962update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
5963{
5964}
5965#endif
5966
5967static int wake_affine(struct sched_domain *sd, struct task_struct *p, 5913static int wake_affine(struct sched_domain *sd, struct task_struct *p,
5968 int this_cpu, int prev_cpu, int sync) 5914 int this_cpu, int prev_cpu, int sync)
5969{ 5915{
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
5979 if (target == nr_cpumask_bits) 5925 if (target == nr_cpumask_bits)
5980 return prev_cpu; 5926 return prev_cpu;
5981 5927
5982 update_wa_numa_placement(p, prev_cpu, target);
5983 schedstat_inc(sd->ttwu_move_affine); 5928 schedstat_inc(sd->ttwu_move_affine);
5984 schedstat_inc(p->se.statistics.nr_wakeups_affine); 5929 schedstat_inc(p->se.statistics.nr_wakeups_affine);
5985 return target; 5930 return target;