aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:29:37 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-09 08:48:18 -0400
commit930aa174fcc8b0efaad102fd80f677b92f35eaa2 (patch)
tree7746bbcf350f3ce305d9e55435f7a5e3c41b9c8e
parent04bb2f9475054298f0c67a89ca92cade42d3fe5e (diff)
sched/numa: Remove the numa_balancing_scan_period_reset sysctl
With scan rate adaptions based on whether the workload has properly converged or not there should be no need for the scan period reset hammer. Get rid of it. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-60-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/sysctl/kernel.txt11
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/sched/sysctl.h1
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/fair.c18
-rw-r--r--kernel/sysctl.c7
6 files changed, 4 insertions, 37 deletions
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index d48bca45b6f2..84f17800f8b5 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -374,15 +374,13 @@ guarantee. If the target workload is already bound to NUMA nodes then this
374feature should be disabled. Otherwise, if the system overhead from the 374feature should be disabled. Otherwise, if the system overhead from the
375feature is too high then the rate the kernel samples for NUMA hinting 375feature is too high then the rate the kernel samples for NUMA hinting
376faults may be controlled by the numa_balancing_scan_period_min_ms, 376faults may be controlled by the numa_balancing_scan_period_min_ms,
377numa_balancing_scan_delay_ms, numa_balancing_scan_period_reset, 377numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
378numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb and 378numa_balancing_scan_size_mb and numa_balancing_settle_count sysctls.
379numa_balancing_settle_count sysctls.
380 379
381============================================================== 380==============================================================
382 381
383numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, 382numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms,
384numa_balancing_scan_period_max_ms, numa_balancing_scan_period_reset, 383numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
385numa_balancing_scan_size_mb
386 384
387Automatic NUMA balancing scans tasks address space and unmaps pages to 385Automatic NUMA balancing scans tasks address space and unmaps pages to
388detect if pages are properly placed or if the data should be migrated to a 386detect if pages are properly placed or if the data should be migrated to a
@@ -418,9 +416,6 @@ rate for each task.
418numa_balancing_scan_size_mb is how many megabytes worth of pages are 416numa_balancing_scan_size_mb is how many megabytes worth of pages are
419scanned for a given scan. 417scanned for a given scan.
420 418
421numa_balancing_scan_period_reset is a blunt instrument that controls how
422often a tasks scan delay is reset to detect sudden changes in task behaviour.
423
424numa_balancing_settle_count is how many scan periods must complete before 419numa_balancing_settle_count is how many scan periods must complete before
425the schedule balancer stops pushing the task towards a preferred node. This 420the schedule balancer stops pushing the task towards a preferred node. This
426gives the scheduler a chance to place the task on an alternative node if the 421gives the scheduler a chance to place the task on an alternative node if the
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a30f9ca66557..a3198e5aaf4e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -420,9 +420,6 @@ struct mm_struct {
420 */ 420 */
421 unsigned long numa_next_scan; 421 unsigned long numa_next_scan;
422 422
423 /* numa_next_reset is when the PTE scanner period will be reset */
424 unsigned long numa_next_reset;
425
426 /* Restart point for scanning and setting pte_numa */ 423 /* Restart point for scanning and setting pte_numa */
427 unsigned long numa_scan_offset; 424 unsigned long numa_scan_offset;
428 425
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bf8086b2506e..10d16c4fbe89 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -47,7 +47,6 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
47extern unsigned int sysctl_numa_balancing_scan_delay; 47extern unsigned int sysctl_numa_balancing_scan_delay;
48extern unsigned int sysctl_numa_balancing_scan_period_min; 48extern unsigned int sysctl_numa_balancing_scan_period_min;
49extern unsigned int sysctl_numa_balancing_scan_period_max; 49extern unsigned int sysctl_numa_balancing_scan_period_max;
50extern unsigned int sysctl_numa_balancing_scan_period_reset;
51extern unsigned int sysctl_numa_balancing_scan_size; 50extern unsigned int sysctl_numa_balancing_scan_size;
52extern unsigned int sysctl_numa_balancing_settle_count; 51extern unsigned int sysctl_numa_balancing_settle_count;
53 52
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8cfd51f62241..89c5ae836f66 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1721,7 +1721,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
1721#ifdef CONFIG_NUMA_BALANCING 1721#ifdef CONFIG_NUMA_BALANCING
1722 if (p->mm && atomic_read(&p->mm->mm_users) == 1) { 1722 if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
1723 p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay); 1723 p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
1724 p->mm->numa_next_reset = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
1725 p->mm->numa_scan_seq = 0; 1724 p->mm->numa_scan_seq = 0;
1726 } 1725 }
1727 1726
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 66237ff8b01e..da6fa22be000 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -826,7 +826,6 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
826 */ 826 */
827unsigned int sysctl_numa_balancing_scan_period_min = 1000; 827unsigned int sysctl_numa_balancing_scan_period_min = 1000;
828unsigned int sysctl_numa_balancing_scan_period_max = 60000; 828unsigned int sysctl_numa_balancing_scan_period_max = 60000;
829unsigned int sysctl_numa_balancing_scan_period_reset = 60000;
830 829
831/* Portion of address space to scan in MB */ 830/* Portion of address space to scan in MB */
832unsigned int sysctl_numa_balancing_scan_size = 256; 831unsigned int sysctl_numa_balancing_scan_size = 256;
@@ -1685,24 +1684,9 @@ void task_numa_work(struct callback_head *work)
1685 if (p->flags & PF_EXITING) 1684 if (p->flags & PF_EXITING)
1686 return; 1685 return;
1687 1686
1688 if (!mm->numa_next_reset || !mm->numa_next_scan) { 1687 if (!mm->numa_next_scan) {
1689 mm->numa_next_scan = now + 1688 mm->numa_next_scan = now +
1690 msecs_to_jiffies(sysctl_numa_balancing_scan_delay); 1689 msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
1691 mm->numa_next_reset = now +
1692 msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
1693 }
1694
1695 /*
1696 * Reset the scan period if enough time has gone by. Objective is that
1697 * scanning will be reduced if pages are properly placed. As tasks
1698 * can enter different phases this needs to be re-examined. Lacking
1699 * proper tracking of reference behaviour, this blunt hammer is used.
1700 */
1701 migrate = mm->numa_next_reset;
1702 if (time_after(now, migrate)) {
1703 p->numa_scan_period = task_scan_min(p);
1704 next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
1705 xchg(&mm->numa_next_reset, next_scan);
1706 } 1690 }
1707 1691
1708 /* 1692 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 42f616a74f40..e509b90a8002 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -371,13 +371,6 @@ static struct ctl_table kern_table[] = {
371 .proc_handler = proc_dointvec, 371 .proc_handler = proc_dointvec,
372 }, 372 },
373 { 373 {
374 .procname = "numa_balancing_scan_period_reset",
375 .data = &sysctl_numa_balancing_scan_period_reset,
376 .maxlen = sizeof(unsigned int),
377 .mode = 0644,
378 .proc_handler = proc_dointvec,
379 },
380 {
381 .procname = "numa_balancing_scan_period_max_ms", 374 .procname = "numa_balancing_scan_period_max_ms",
382 .data = &sysctl_numa_balancing_scan_period_max, 375 .data = &sysctl_numa_balancing_scan_period_max,
383 .maxlen = sizeof(unsigned int), 376 .maxlen = sizeof(unsigned int),