diff options
author | Mel Gorman <mgorman@suse.de> | 2012-11-22 06:16:36 -0500 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 09:42:55 -0500 |
commit | 1a687c2e9a99335c9e77392f050fe607fa18a652 (patch) | |
tree | 06df958bfdfeaf9f38f333af106b55faa81f1c6b | |
parent | b8593bfda1652755136333cdd362de125b283a9c (diff) |
mm: sched: numa: Control enabling and disabling of NUMA balancing
This patch adds Kconfig options and kernel parameters to allow the
enabling and disabling of automatic NUMA balancing. The existance
of such a switch was and is very important when debugging problems
related to transparent hugepages and we should have the same for
automatic NUMA placement.
Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r-- | Documentation/kernel-parameters.txt | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | init/Kconfig | 8 | ||||
-rw-r--r-- | kernel/sched/core.c | 48 | ||||
-rw-r--r-- | kernel/sched/fair.c | 3 | ||||
-rw-r--r-- | kernel/sched/features.h | 6 | ||||
-rw-r--r-- | mm/mempolicy.c | 46 |
7 files changed, 101 insertions, 17 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9776f068306b..2e8d2625b814 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1996 | 1996 | ||
1997 | nr_uarts= [SERIAL] maximum number of UARTs to be registered. | 1997 | nr_uarts= [SERIAL] maximum number of UARTs to be registered. |
1998 | 1998 | ||
1999 | numa_balancing= [KNL,X86] Enable or disable automatic NUMA balancing. | ||
2000 | Allowed values are enable and disable | ||
2001 | |||
1999 | numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. | 2002 | numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. |
2000 | one of ['zone', 'node', 'default'] can be specified | 2003 | one of ['zone', 'node', 'default'] can be specified |
2001 | This can be set from sysctl after boot. | 2004 | This can be set from sysctl after boot. |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 0f4ff2bd03f6..b1e619f9ff1a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1563,10 +1563,14 @@ struct task_struct { | |||
1563 | 1563 | ||
1564 | #ifdef CONFIG_NUMA_BALANCING | 1564 | #ifdef CONFIG_NUMA_BALANCING |
1565 | extern void task_numa_fault(int node, int pages, bool migrated); | 1565 | extern void task_numa_fault(int node, int pages, bool migrated); |
1566 | extern void set_numabalancing_state(bool enabled); | ||
1566 | #else | 1567 | #else |
1567 | static inline void task_numa_fault(int node, int pages, bool migrated) | 1568 | static inline void task_numa_fault(int node, int pages, bool migrated) |
1568 | { | 1569 | { |
1569 | } | 1570 | } |
1571 | static inline void set_numabalancing_state(bool enabled) | ||
1572 | { | ||
1573 | } | ||
1570 | #endif | 1574 | #endif |
1571 | 1575 | ||
1572 | /* | 1576 | /* |
diff --git a/init/Kconfig b/init/Kconfig index 9f00f004796a..18e2a5920a34 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE | |||
720 | depends on ARCH_WANTS_PROT_NUMA_PROT_NONE | 720 | depends on ARCH_WANTS_PROT_NUMA_PROT_NONE |
721 | depends on NUMA_BALANCING | 721 | depends on NUMA_BALANCING |
722 | 722 | ||
723 | config NUMA_BALANCING_DEFAULT_ENABLED | ||
724 | bool "Automatically enable NUMA aware memory/task placement" | ||
725 | default y | ||
726 | depends on NUMA_BALANCING | ||
727 | help | ||
728 | If set, autonumic NUMA balancing will be enabled if running on a NUMA | ||
729 | machine. | ||
730 | |||
723 | config NUMA_BALANCING | 731 | config NUMA_BALANCING |
724 | bool "Memory placement aware NUMA scheduler" | 732 | bool "Memory placement aware NUMA scheduler" |
725 | default y | 733 | default y |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9d255bc0e278..7a45015274ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { }; | |||
192 | static void sched_feat_enable(int i) { }; | 192 | static void sched_feat_enable(int i) { }; |
193 | #endif /* HAVE_JUMP_LABEL */ | 193 | #endif /* HAVE_JUMP_LABEL */ |
194 | 194 | ||
195 | static ssize_t | 195 | static int sched_feat_set(char *cmp) |
196 | sched_feat_write(struct file *filp, const char __user *ubuf, | ||
197 | size_t cnt, loff_t *ppos) | ||
198 | { | 196 | { |
199 | char buf[64]; | ||
200 | char *cmp; | ||
201 | int neg = 0; | ||
202 | int i; | 197 | int i; |
203 | 198 | int neg = 0; | |
204 | if (cnt > 63) | ||
205 | cnt = 63; | ||
206 | |||
207 | if (copy_from_user(&buf, ubuf, cnt)) | ||
208 | return -EFAULT; | ||
209 | |||
210 | buf[cnt] = 0; | ||
211 | cmp = strstrip(buf); | ||
212 | 199 | ||
213 | if (strncmp(cmp, "NO_", 3) == 0) { | 200 | if (strncmp(cmp, "NO_", 3) == 0) { |
214 | neg = 1; | 201 | neg = 1; |
@@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
228 | } | 215 | } |
229 | } | 216 | } |
230 | 217 | ||
218 | return i; | ||
219 | } | ||
220 | |||
221 | static ssize_t | ||
222 | sched_feat_write(struct file *filp, const char __user *ubuf, | ||
223 | size_t cnt, loff_t *ppos) | ||
224 | { | ||
225 | char buf[64]; | ||
226 | char *cmp; | ||
227 | int i; | ||
228 | |||
229 | if (cnt > 63) | ||
230 | cnt = 63; | ||
231 | |||
232 | if (copy_from_user(&buf, ubuf, cnt)) | ||
233 | return -EFAULT; | ||
234 | |||
235 | buf[cnt] = 0; | ||
236 | cmp = strstrip(buf); | ||
237 | |||
238 | i = sched_feat_set(cmp); | ||
231 | if (i == __SCHED_FEAT_NR) | 239 | if (i == __SCHED_FEAT_NR) |
232 | return -EINVAL; | 240 | return -EINVAL; |
233 | 241 | ||
@@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p) | |||
1549 | #endif /* CONFIG_NUMA_BALANCING */ | 1557 | #endif /* CONFIG_NUMA_BALANCING */ |
1550 | } | 1558 | } |
1551 | 1559 | ||
1560 | #ifdef CONFIG_NUMA_BALANCING | ||
1561 | void set_numabalancing_state(bool enabled) | ||
1562 | { | ||
1563 | if (enabled) | ||
1564 | sched_feat_set("NUMA"); | ||
1565 | else | ||
1566 | sched_feat_set("NO_NUMA"); | ||
1567 | } | ||
1568 | #endif /* CONFIG_NUMA_BALANCING */ | ||
1569 | |||
1552 | /* | 1570 | /* |
1553 | * fork()/clone()-time setup: | 1571 | * fork()/clone()-time setup: |
1554 | */ | 1572 | */ |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4b577863933f..7a02a2082e95 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated) | |||
811 | { | 811 | { |
812 | struct task_struct *p = current; | 812 | struct task_struct *p = current; |
813 | 813 | ||
814 | if (!sched_feat_numa(NUMA)) | ||
815 | return; | ||
816 | |||
814 | /* FIXME: Allocate task-specific structure for placement policy here */ | 817 | /* FIXME: Allocate task-specific structure for placement policy here */ |
815 | 818 | ||
816 | /* | 819 | /* |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 5fb7aefbec80..d2373a3e3252 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h | |||
@@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) | |||
63 | SCHED_FEAT(LB_MIN, false) | 63 | SCHED_FEAT(LB_MIN, false) |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Apply the automatic NUMA scheduling policy | 66 | * Apply the automatic NUMA scheduling policy. Enabled automatically |
67 | * at runtime if running on a NUMA machine. Can be controlled via | ||
68 | * numa_balancing= | ||
67 | */ | 69 | */ |
68 | #ifdef CONFIG_NUMA_BALANCING | 70 | #ifdef CONFIG_NUMA_BALANCING |
69 | SCHED_FEAT(NUMA, true) | 71 | SCHED_FEAT(NUMA, false) |
70 | #endif | 72 | #endif |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index fd20e28fd2ad..046308e9b999 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p) | |||
2521 | mutex_unlock(&p->mutex); | 2521 | mutex_unlock(&p->mutex); |
2522 | } | 2522 | } |
2523 | 2523 | ||
2524 | #ifdef CONFIG_NUMA_BALANCING | ||
2525 | static bool __initdata numabalancing_override; | ||
2526 | |||
2527 | static void __init check_numabalancing_enable(void) | ||
2528 | { | ||
2529 | bool numabalancing_default = false; | ||
2530 | |||
2531 | if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED)) | ||
2532 | numabalancing_default = true; | ||
2533 | |||
2534 | if (nr_node_ids > 1 && !numabalancing_override) { | ||
2535 | printk(KERN_INFO "Enabling automatic NUMA balancing. " | ||
2536 | "Configure with numa_balancing= or sysctl"); | ||
2537 | set_numabalancing_state(numabalancing_default); | ||
2538 | } | ||
2539 | } | ||
2540 | |||
2541 | static int __init setup_numabalancing(char *str) | ||
2542 | { | ||
2543 | int ret = 0; | ||
2544 | if (!str) | ||
2545 | goto out; | ||
2546 | numabalancing_override = true; | ||
2547 | |||
2548 | if (!strcmp(str, "enable")) { | ||
2549 | set_numabalancing_state(true); | ||
2550 | ret = 1; | ||
2551 | } else if (!strcmp(str, "disable")) { | ||
2552 | set_numabalancing_state(false); | ||
2553 | ret = 1; | ||
2554 | } | ||
2555 | out: | ||
2556 | if (!ret) | ||
2557 | printk(KERN_WARNING "Unable to parse numa_balancing=\n"); | ||
2558 | |||
2559 | return ret; | ||
2560 | } | ||
2561 | __setup("numa_balancing=", setup_numabalancing); | ||
2562 | #else | ||
2563 | static inline void __init check_numabalancing_enable(void) | ||
2564 | { | ||
2565 | } | ||
2566 | #endif /* CONFIG_NUMA_BALANCING */ | ||
2567 | |||
2524 | /* assumes fs == KERNEL_DS */ | 2568 | /* assumes fs == KERNEL_DS */ |
2525 | void __init numa_policy_init(void) | 2569 | void __init numa_policy_init(void) |
2526 | { | 2570 | { |
@@ -2571,6 +2615,8 @@ void __init numa_policy_init(void) | |||
2571 | 2615 | ||
2572 | if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) | 2616 | if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) |
2573 | printk("numa_policy_init: interleaving failed\n"); | 2617 | printk("numa_policy_init: interleaving failed\n"); |
2618 | |||
2619 | check_numabalancing_enable(); | ||
2574 | } | 2620 | } |
2575 | 2621 | ||
2576 | /* Reset policy of current process to default */ | 2622 | /* Reset policy of current process to default */ |