diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-13 13:46:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-13 13:46:53 -0400 |
commit | 86a4ac433b927a610c09aa6cfb1926d94a6b37b7 (patch) | |
tree | 30382aac1c4d9ce4612f9398f1e9237ab28cfcfb | |
parent | baeda7131f54e71e916c43d7a88cb68fcace37da (diff) | |
parent | 789ba28013ce23dbf5e9f5f014f4233b35523bf3 (diff) |
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fix from Thomas Gleixner:
"Revert the new NUMA aware placement approach which turned out to
create more problems than it solved"
* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
Revert "sched/numa: Delay retrying placement for automatic NUMA balance after wake_affine()"
-rw-r--r-- | kernel/sched/fair.c | 57 |
1 files changed, 1 insertions, 56 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 54dc31e7ab9b..f43627c6bb3d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p) | |||
1854 | static void numa_migrate_preferred(struct task_struct *p) | 1854 | static void numa_migrate_preferred(struct task_struct *p) |
1855 | { | 1855 | { |
1856 | unsigned long interval = HZ; | 1856 | unsigned long interval = HZ; |
1857 | unsigned long numa_migrate_retry; | ||
1858 | 1857 | ||
1859 | /* This task has no NUMA fault statistics yet */ | 1858 | /* This task has no NUMA fault statistics yet */ |
1860 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) | 1859 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) |
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p) | |||
1862 | 1861 | ||
1863 | /* Periodically retry migrating the task to the preferred node */ | 1862 | /* Periodically retry migrating the task to the preferred node */ |
1864 | interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); | 1863 | interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); |
1865 | numa_migrate_retry = jiffies + interval; | 1864 | p->numa_migrate_retry = jiffies + interval; |
1866 | |||
1867 | /* | ||
1868 | * Check that the new retry threshold is after the current one. If | ||
1869 | * the retry is in the future, it implies that wake_affine has | ||
1870 | * temporarily asked NUMA balancing to backoff from placement. | ||
1871 | */ | ||
1872 | if (numa_migrate_retry > p->numa_migrate_retry) | ||
1873 | return; | ||
1874 | |||
1875 | /* Safe to try placing the task on the preferred node */ | ||
1876 | p->numa_migrate_retry = numa_migrate_retry; | ||
1877 | 1865 | ||
1878 | /* Success if task is already running on preferred CPU */ | 1866 | /* Success if task is already running on preferred CPU */ |
1879 | if (task_node(p) == p->numa_preferred_nid) | 1867 | if (task_node(p) == p->numa_preferred_nid) |
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, | |||
5922 | return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; | 5910 | return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; |
5923 | } | 5911 | } |
5924 | 5912 | ||
5925 | #ifdef CONFIG_NUMA_BALANCING | ||
5926 | static void | ||
5927 | update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) | ||
5928 | { | ||
5929 | unsigned long interval; | ||
5930 | |||
5931 | if (!static_branch_likely(&sched_numa_balancing)) | ||
5932 | return; | ||
5933 | |||
5934 | /* If balancing has no preference then continue gathering data */ | ||
5935 | if (p->numa_preferred_nid == -1) | ||
5936 | return; | ||
5937 | |||
5938 | /* | ||
5939 | * If the wakeup is not affecting locality then it is neutral from | ||
5940 | * the perspective of NUMA balacing so continue gathering data. | ||
5941 | */ | ||
5942 | if (cpu_to_node(prev_cpu) == cpu_to_node(target)) | ||
5943 | return; | ||
5944 | |||
5945 | /* | ||
5946 | * Temporarily prevent NUMA balancing trying to place waker/wakee after | ||
5947 | * wakee has been moved by wake_affine. This will potentially allow | ||
5948 | * related tasks to converge and update their data placement. The | ||
5949 | * 4 * numa_scan_period is to allow the two-pass filter to migrate | ||
5950 | * hot data to the wakers node. | ||
5951 | */ | ||
5952 | interval = max(sysctl_numa_balancing_scan_delay, | ||
5953 | p->numa_scan_period << 2); | ||
5954 | p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); | ||
5955 | |||
5956 | interval = max(sysctl_numa_balancing_scan_delay, | ||
5957 | current->numa_scan_period << 2); | ||
5958 | current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); | ||
5959 | } | ||
5960 | #else | ||
5961 | static void | ||
5962 | update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) | ||
5963 | { | ||
5964 | } | ||
5965 | #endif | ||
5966 | |||
5967 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, | 5913 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, |
5968 | int this_cpu, int prev_cpu, int sync) | 5914 | int this_cpu, int prev_cpu, int sync) |
5969 | { | 5915 | { |
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, | |||
5979 | if (target == nr_cpumask_bits) | 5925 | if (target == nr_cpumask_bits) |
5980 | return prev_cpu; | 5926 | return prev_cpu; |
5981 | 5927 | ||
5982 | update_wa_numa_placement(p, prev_cpu, target); | ||
5983 | schedstat_inc(sd->ttwu_move_affine); | 5928 | schedstat_inc(sd->ttwu_move_affine); |
5984 | schedstat_inc(p->se.statistics.nr_wakeups_affine); | 5929 | schedstat_inc(p->se.statistics.nr_wakeups_affine); |
5985 | return target; | 5930 | return target; |