aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-22 06:16:36 -0500
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:55 -0500
commit1a687c2e9a99335c9e77392f050fe607fa18a652 (patch)
tree06df958bfdfeaf9f38f333af106b55faa81f1c6b
parentb8593bfda1652755136333cdd362de125b283a9c (diff)
mm: sched: numa: Control enabling and disabling of NUMA balancing
This patch adds Kconfig options and kernel parameters to allow the enabling and disabling of automatic NUMA balancing. The existance of such a switch was and is very important when debugging problems related to transparent hugepages and we should have the same for automatic NUMA placement. Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r--Documentation/kernel-parameters.txt3
-rw-r--r--include/linux/sched.h4
-rw-r--r--init/Kconfig8
-rw-r--r--kernel/sched/core.c48
-rw-r--r--kernel/sched/fair.c3
-rw-r--r--kernel/sched/features.h6
-rw-r--r--mm/mempolicy.c46
7 files changed, 101 insertions, 17 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9776f068306b..2e8d2625b814 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1996 1996
1997 nr_uarts= [SERIAL] maximum number of UARTs to be registered. 1997 nr_uarts= [SERIAL] maximum number of UARTs to be registered.
1998 1998
1999 numa_balancing= [KNL,X86] Enable or disable automatic NUMA balancing.
2000 Allowed values are enable and disable
2001
1999 numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. 2002 numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
2000 one of ['zone', 'node', 'default'] can be specified 2003 one of ['zone', 'node', 'default'] can be specified
2001 This can be set from sysctl after boot. 2004 This can be set from sysctl after boot.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f4ff2bd03f6..b1e619f9ff1a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1563,10 +1563,14 @@ struct task_struct {
1563 1563
1564#ifdef CONFIG_NUMA_BALANCING 1564#ifdef CONFIG_NUMA_BALANCING
1565extern void task_numa_fault(int node, int pages, bool migrated); 1565extern void task_numa_fault(int node, int pages, bool migrated);
1566extern void set_numabalancing_state(bool enabled);
1566#else 1567#else
1567static inline void task_numa_fault(int node, int pages, bool migrated) 1568static inline void task_numa_fault(int node, int pages, bool migrated)
1568{ 1569{
1569} 1570}
1571static inline void set_numabalancing_state(bool enabled)
1572{
1573}
1570#endif 1574#endif
1571 1575
1572/* 1576/*
diff --git a/init/Kconfig b/init/Kconfig
index 9f00f004796a..18e2a5920a34 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE
720 depends on ARCH_WANTS_PROT_NUMA_PROT_NONE 720 depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
721 depends on NUMA_BALANCING 721 depends on NUMA_BALANCING
722 722
723config NUMA_BALANCING_DEFAULT_ENABLED
724 bool "Automatically enable NUMA aware memory/task placement"
725 default y
726 depends on NUMA_BALANCING
727 help
728 If set, autonumic NUMA balancing will be enabled if running on a NUMA
729 machine.
730
723config NUMA_BALANCING 731config NUMA_BALANCING
724 bool "Memory placement aware NUMA scheduler" 732 bool "Memory placement aware NUMA scheduler"
725 default y 733 default y
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9d255bc0e278..7a45015274ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { };
192static void sched_feat_enable(int i) { }; 192static void sched_feat_enable(int i) { };
193#endif /* HAVE_JUMP_LABEL */ 193#endif /* HAVE_JUMP_LABEL */
194 194
195static ssize_t 195static int sched_feat_set(char *cmp)
196sched_feat_write(struct file *filp, const char __user *ubuf,
197 size_t cnt, loff_t *ppos)
198{ 196{
199 char buf[64];
200 char *cmp;
201 int neg = 0;
202 int i; 197 int i;
203 198 int neg = 0;
204 if (cnt > 63)
205 cnt = 63;
206
207 if (copy_from_user(&buf, ubuf, cnt))
208 return -EFAULT;
209
210 buf[cnt] = 0;
211 cmp = strstrip(buf);
212 199
213 if (strncmp(cmp, "NO_", 3) == 0) { 200 if (strncmp(cmp, "NO_", 3) == 0) {
214 neg = 1; 201 neg = 1;
@@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
228 } 215 }
229 } 216 }
230 217
218 return i;
219}
220
221static ssize_t
222sched_feat_write(struct file *filp, const char __user *ubuf,
223 size_t cnt, loff_t *ppos)
224{
225 char buf[64];
226 char *cmp;
227 int i;
228
229 if (cnt > 63)
230 cnt = 63;
231
232 if (copy_from_user(&buf, ubuf, cnt))
233 return -EFAULT;
234
235 buf[cnt] = 0;
236 cmp = strstrip(buf);
237
238 i = sched_feat_set(cmp);
231 if (i == __SCHED_FEAT_NR) 239 if (i == __SCHED_FEAT_NR)
232 return -EINVAL; 240 return -EINVAL;
233 241
@@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p)
1549#endif /* CONFIG_NUMA_BALANCING */ 1557#endif /* CONFIG_NUMA_BALANCING */
1550} 1558}
1551 1559
1560#ifdef CONFIG_NUMA_BALANCING
1561void set_numabalancing_state(bool enabled)
1562{
1563 if (enabled)
1564 sched_feat_set("NUMA");
1565 else
1566 sched_feat_set("NO_NUMA");
1567}
1568#endif /* CONFIG_NUMA_BALANCING */
1569
1552/* 1570/*
1553 * fork()/clone()-time setup: 1571 * fork()/clone()-time setup:
1554 */ 1572 */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4b577863933f..7a02a2082e95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated)
811{ 811{
812 struct task_struct *p = current; 812 struct task_struct *p = current;
813 813
814 if (!sched_feat_numa(NUMA))
815 return;
816
814 /* FIXME: Allocate task-specific structure for placement policy here */ 817 /* FIXME: Allocate task-specific structure for placement policy here */
815 818
816 /* 819 /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5fb7aefbec80..d2373a3e3252 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
63SCHED_FEAT(LB_MIN, false) 63SCHED_FEAT(LB_MIN, false)
64 64
65/* 65/*
66 * Apply the automatic NUMA scheduling policy 66 * Apply the automatic NUMA scheduling policy. Enabled automatically
67 * at runtime if running on a NUMA machine. Can be controlled via
68 * numa_balancing=
67 */ 69 */
68#ifdef CONFIG_NUMA_BALANCING 70#ifdef CONFIG_NUMA_BALANCING
69SCHED_FEAT(NUMA, true) 71SCHED_FEAT(NUMA, false)
70#endif 72#endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index fd20e28fd2ad..046308e9b999 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p)
2521 mutex_unlock(&p->mutex); 2521 mutex_unlock(&p->mutex);
2522} 2522}
2523 2523
2524#ifdef CONFIG_NUMA_BALANCING
2525static bool __initdata numabalancing_override;
2526
2527static void __init check_numabalancing_enable(void)
2528{
2529 bool numabalancing_default = false;
2530
2531 if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))
2532 numabalancing_default = true;
2533
2534 if (nr_node_ids > 1 && !numabalancing_override) {
2535 printk(KERN_INFO "Enabling automatic NUMA balancing. "
2536 "Configure with numa_balancing= or sysctl");
2537 set_numabalancing_state(numabalancing_default);
2538 }
2539}
2540
2541static int __init setup_numabalancing(char *str)
2542{
2543 int ret = 0;
2544 if (!str)
2545 goto out;
2546 numabalancing_override = true;
2547
2548 if (!strcmp(str, "enable")) {
2549 set_numabalancing_state(true);
2550 ret = 1;
2551 } else if (!strcmp(str, "disable")) {
2552 set_numabalancing_state(false);
2553 ret = 1;
2554 }
2555out:
2556 if (!ret)
2557 printk(KERN_WARNING "Unable to parse numa_balancing=\n");
2558
2559 return ret;
2560}
2561__setup("numa_balancing=", setup_numabalancing);
2562#else
2563static inline void __init check_numabalancing_enable(void)
2564{
2565}
2566#endif /* CONFIG_NUMA_BALANCING */
2567
2524/* assumes fs == KERNEL_DS */ 2568/* assumes fs == KERNEL_DS */
2525void __init numa_policy_init(void) 2569void __init numa_policy_init(void)
2526{ 2570{
@@ -2571,6 +2615,8 @@ void __init numa_policy_init(void)
2571 2615
2572 if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) 2616 if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
2573 printk("numa_policy_init: interleaving failed\n"); 2617 printk("numa_policy_init: interleaving failed\n");
2618
2619 check_numabalancing_enable();
2574} 2620}
2575 2621
2576/* Reset policy of current process to default */ 2622/* Reset policy of current process to default */