diff options
author | Frederic Weisbecker <frederic@kernel.org> | 2017-10-26 22:42:37 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-10-27 03:55:30 -0400 |
commit | edb9382175c3ebdced8ffdb3e0f20052ad9fdbe9 (patch) | |
tree | 5257baafe1ada153e8eb0bfe41e02c8f0545e6fa | |
parent | 6f1982fedd59856bcc42a9b521be4c3ffd2f60a7 (diff) |
sched/isolation: Move isolcpus= handling to the housekeeping code
We want to centralize the isolation features, to be done by the housekeeping
subsystem and scheduler domain isolation is a significant part of it.
No intended behaviour change, we just reuse the housekeeping cpumask
and core code.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1509072159-31808-11-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | drivers/base/cpu.c | 11 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/linux/sched/isolation.h | 1 | ||||
-rw-r--r-- | kernel/cgroup/cpuset.c | 15 | ||||
-rw-r--r-- | kernel/sched/core.c | 16 | ||||
-rw-r--r-- | kernel/sched/isolation.c | 63 | ||||
-rw-r--r-- | kernel/sched/topology.c | 24 |
7 files changed, 73 insertions, 59 deletions
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 321cd7b4d817..a73ab95558f5 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/cpufeature.h> | 18 | #include <linux/cpufeature.h> |
19 | #include <linux/tick.h> | 19 | #include <linux/tick.h> |
20 | #include <linux/pm_qos.h> | 20 | #include <linux/pm_qos.h> |
21 | #include <linux/sched/isolation.h> | ||
21 | 22 | ||
22 | #include "base.h" | 23 | #include "base.h" |
23 | 24 | ||
@@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev, | |||
271 | struct device_attribute *attr, char *buf) | 272 | struct device_attribute *attr, char *buf) |
272 | { | 273 | { |
273 | int n = 0, len = PAGE_SIZE-2; | 274 | int n = 0, len = PAGE_SIZE-2; |
275 | cpumask_var_t isolated; | ||
274 | 276 | ||
275 | n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map)); | 277 | if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) |
278 | return -ENOMEM; | ||
279 | |||
280 | cpumask_andnot(isolated, cpu_possible_mask, | ||
281 | housekeeping_cpumask(HK_FLAG_DOMAIN)); | ||
282 | n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated)); | ||
283 | |||
284 | free_cpumask_var(isolated); | ||
276 | 285 | ||
277 | return n; | 286 | return n; |
278 | } | 287 | } |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 0f897dfc195e..1b0cc0d6df8d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -165,8 +165,6 @@ struct task_group; | |||
165 | /* Task command name length: */ | 165 | /* Task command name length: */ |
166 | #define TASK_COMM_LEN 16 | 166 | #define TASK_COMM_LEN 16 |
167 | 167 | ||
168 | extern cpumask_var_t cpu_isolated_map; | ||
169 | |||
170 | extern void scheduler_tick(void); | 168 | extern void scheduler_tick(void); |
171 | 169 | ||
172 | #define MAX_SCHEDULE_TIMEOUT LONG_MAX | 170 | #define MAX_SCHEDULE_TIMEOUT LONG_MAX |
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index e53cfa96e91e..d849431c8060 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h | |||
@@ -11,6 +11,7 @@ enum hk_flags { | |||
11 | HK_FLAG_MISC = (1 << 2), | 11 | HK_FLAG_MISC = (1 << 2), |
12 | HK_FLAG_SCHED = (1 << 3), | 12 | HK_FLAG_SCHED = (1 << 3), |
13 | HK_FLAG_TICK = (1 << 4), | 13 | HK_FLAG_TICK = (1 << 4), |
14 | HK_FLAG_DOMAIN = (1 << 5), | ||
14 | }; | 15 | }; |
15 | 16 | ||
16 | #ifdef CONFIG_CPU_ISOLATION | 17 | #ifdef CONFIG_CPU_ISOLATION |
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4657e2924ecb..f7efa7b4d825 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
@@ -57,7 +57,7 @@ | |||
57 | #include <linux/backing-dev.h> | 57 | #include <linux/backing-dev.h> |
58 | #include <linux/sort.h> | 58 | #include <linux/sort.h> |
59 | #include <linux/oom.h> | 59 | #include <linux/oom.h> |
60 | 60 | #include <linux/sched/isolation.h> | |
61 | #include <linux/uaccess.h> | 61 | #include <linux/uaccess.h> |
62 | #include <linux/atomic.h> | 62 | #include <linux/atomic.h> |
63 | #include <linux/mutex.h> | 63 | #include <linux/mutex.h> |
@@ -656,7 +656,6 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
656 | int csn; /* how many cpuset ptrs in csa so far */ | 656 | int csn; /* how many cpuset ptrs in csa so far */ |
657 | int i, j, k; /* indices for partition finding loops */ | 657 | int i, j, k; /* indices for partition finding loops */ |
658 | cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ | 658 | cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ |
659 | cpumask_var_t non_isolated_cpus; /* load balanced CPUs */ | ||
660 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | 659 | struct sched_domain_attr *dattr; /* attributes for custom domains */ |
661 | int ndoms = 0; /* number of sched domains in result */ | 660 | int ndoms = 0; /* number of sched domains in result */ |
662 | int nslot; /* next empty doms[] struct cpumask slot */ | 661 | int nslot; /* next empty doms[] struct cpumask slot */ |
@@ -666,10 +665,6 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
666 | dattr = NULL; | 665 | dattr = NULL; |
667 | csa = NULL; | 666 | csa = NULL; |
668 | 667 | ||
669 | if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL)) | ||
670 | goto done; | ||
671 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | ||
672 | |||
673 | /* Special case for the 99% of systems with one, full, sched domain */ | 668 | /* Special case for the 99% of systems with one, full, sched domain */ |
674 | if (is_sched_load_balance(&top_cpuset)) { | 669 | if (is_sched_load_balance(&top_cpuset)) { |
675 | ndoms = 1; | 670 | ndoms = 1; |
@@ -683,7 +678,7 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
683 | update_domain_attr_tree(dattr, &top_cpuset); | 678 | update_domain_attr_tree(dattr, &top_cpuset); |
684 | } | 679 | } |
685 | cpumask_and(doms[0], top_cpuset.effective_cpus, | 680 | cpumask_and(doms[0], top_cpuset.effective_cpus, |
686 | non_isolated_cpus); | 681 | housekeeping_cpumask(HK_FLAG_DOMAIN)); |
687 | 682 | ||
688 | goto done; | 683 | goto done; |
689 | } | 684 | } |
@@ -707,7 +702,8 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
707 | */ | 702 | */ |
708 | if (!cpumask_empty(cp->cpus_allowed) && | 703 | if (!cpumask_empty(cp->cpus_allowed) && |
709 | !(is_sched_load_balance(cp) && | 704 | !(is_sched_load_balance(cp) && |
710 | cpumask_intersects(cp->cpus_allowed, non_isolated_cpus))) | 705 | cpumask_intersects(cp->cpus_allowed, |
706 | housekeeping_cpumask(HK_FLAG_DOMAIN)))) | ||
711 | continue; | 707 | continue; |
712 | 708 | ||
713 | if (is_sched_load_balance(cp)) | 709 | if (is_sched_load_balance(cp)) |
@@ -789,7 +785,7 @@ restart: | |||
789 | 785 | ||
790 | if (apn == b->pn) { | 786 | if (apn == b->pn) { |
791 | cpumask_or(dp, dp, b->effective_cpus); | 787 | cpumask_or(dp, dp, b->effective_cpus); |
792 | cpumask_and(dp, dp, non_isolated_cpus); | 788 | cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN)); |
793 | if (dattr) | 789 | if (dattr) |
794 | update_domain_attr_tree(dattr + nslot, b); | 790 | update_domain_attr_tree(dattr + nslot, b); |
795 | 791 | ||
@@ -802,7 +798,6 @@ restart: | |||
802 | BUG_ON(nslot != ndoms); | 798 | BUG_ON(nslot != ndoms); |
803 | 799 | ||
804 | done: | 800 | done: |
805 | free_cpumask_var(non_isolated_cpus); | ||
806 | kfree(csa); | 801 | kfree(csa); |
807 | 802 | ||
808 | /* | 803 | /* |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2210c0203e51..1a55c842bfbc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -84,9 +84,6 @@ __read_mostly int scheduler_running; | |||
84 | */ | 84 | */ |
85 | int sysctl_sched_rt_runtime = 950000; | 85 | int sysctl_sched_rt_runtime = 950000; |
86 | 86 | ||
87 | /* CPUs with isolated domains */ | ||
88 | cpumask_var_t cpu_isolated_map; | ||
89 | |||
90 | /* | 87 | /* |
91 | * __task_rq_lock - lock the rq @p resides on. | 88 | * __task_rq_lock - lock the rq @p resides on. |
92 | */ | 89 | */ |
@@ -5735,10 +5732,6 @@ static inline void sched_init_smt(void) { } | |||
5735 | 5732 | ||
5736 | void __init sched_init_smp(void) | 5733 | void __init sched_init_smp(void) |
5737 | { | 5734 | { |
5738 | cpumask_var_t non_isolated_cpus; | ||
5739 | |||
5740 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); | ||
5741 | |||
5742 | sched_init_numa(); | 5735 | sched_init_numa(); |
5743 | 5736 | ||
5744 | /* | 5737 | /* |
@@ -5748,16 +5741,12 @@ void __init sched_init_smp(void) | |||
5748 | */ | 5741 | */ |
5749 | mutex_lock(&sched_domains_mutex); | 5742 | mutex_lock(&sched_domains_mutex); |
5750 | sched_init_domains(cpu_active_mask); | 5743 | sched_init_domains(cpu_active_mask); |
5751 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | ||
5752 | if (cpumask_empty(non_isolated_cpus)) | ||
5753 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | ||
5754 | mutex_unlock(&sched_domains_mutex); | 5744 | mutex_unlock(&sched_domains_mutex); |
5755 | 5745 | ||
5756 | /* Move init over to a non-isolated CPU */ | 5746 | /* Move init over to a non-isolated CPU */ |
5757 | if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) | 5747 | if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) |
5758 | BUG(); | 5748 | BUG(); |
5759 | sched_init_granularity(); | 5749 | sched_init_granularity(); |
5760 | free_cpumask_var(non_isolated_cpus); | ||
5761 | 5750 | ||
5762 | init_sched_rt_class(); | 5751 | init_sched_rt_class(); |
5763 | init_sched_dl_class(); | 5752 | init_sched_dl_class(); |
@@ -5961,9 +5950,6 @@ void __init sched_init(void) | |||
5961 | calc_load_update = jiffies + LOAD_FREQ; | 5950 | calc_load_update = jiffies + LOAD_FREQ; |
5962 | 5951 | ||
5963 | #ifdef CONFIG_SMP | 5952 | #ifdef CONFIG_SMP |
5964 | /* May be allocated at isolcpus cmdline parse time */ | ||
5965 | if (cpu_isolated_map == NULL) | ||
5966 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | ||
5967 | idle_thread_set_boot_cpu(); | 5953 | idle_thread_set_boot_cpu(); |
5968 | set_cpu_rq_start_time(smp_processor_id()); | 5954 | set_cpu_rq_start_time(smp_processor_id()); |
5969 | #endif | 5955 | #endif |
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 1f61e440358d..8f666bc5abe8 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c | |||
@@ -63,32 +63,69 @@ void __init housekeeping_init(void) | |||
63 | WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); | 63 | WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); |
64 | } | 64 | } |
65 | 65 | ||
66 | #ifdef CONFIG_NO_HZ_FULL | 66 | static int __init housekeeping_setup(char *str, enum hk_flags flags) |
67 | static int __init housekeeping_nohz_full_setup(char *str) | ||
68 | { | 67 | { |
69 | cpumask_var_t non_housekeeping_mask; | 68 | cpumask_var_t non_housekeeping_mask; |
69 | int err; | ||
70 | 70 | ||
71 | alloc_bootmem_cpumask_var(&non_housekeeping_mask); | 71 | alloc_bootmem_cpumask_var(&non_housekeeping_mask); |
72 | if (cpulist_parse(str, non_housekeeping_mask) < 0) { | 72 | err = cpulist_parse(str, non_housekeeping_mask); |
73 | pr_warn("Housekeeping: Incorrect nohz_full cpumask\n"); | 73 | if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) { |
74 | pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n"); | ||
74 | free_bootmem_cpumask_var(non_housekeeping_mask); | 75 | free_bootmem_cpumask_var(non_housekeeping_mask); |
75 | return 0; | 76 | return 0; |
76 | } | 77 | } |
77 | 78 | ||
78 | alloc_bootmem_cpumask_var(&housekeeping_mask); | 79 | if (!housekeeping_flags) { |
79 | cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask); | 80 | alloc_bootmem_cpumask_var(&housekeeping_mask); |
80 | 81 | cpumask_andnot(housekeeping_mask, | |
81 | if (cpumask_empty(housekeeping_mask)) | 82 | cpu_possible_mask, non_housekeeping_mask); |
82 | cpumask_set_cpu(smp_processor_id(), housekeeping_mask); | 83 | if (cpumask_empty(housekeeping_mask)) |
84 | cpumask_set_cpu(smp_processor_id(), housekeeping_mask); | ||
85 | } else { | ||
86 | cpumask_var_t tmp; | ||
87 | |||
88 | alloc_bootmem_cpumask_var(&tmp); | ||
89 | cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask); | ||
90 | if (!cpumask_equal(tmp, housekeeping_mask)) { | ||
91 | pr_warn("Housekeeping: nohz_full= must match isolcpus=\n"); | ||
92 | free_bootmem_cpumask_var(tmp); | ||
93 | free_bootmem_cpumask_var(non_housekeeping_mask); | ||
94 | return 0; | ||
95 | } | ||
96 | free_bootmem_cpumask_var(tmp); | ||
97 | } | ||
83 | 98 | ||
84 | housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER | | 99 | if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) { |
85 | HK_FLAG_RCU | HK_FLAG_MISC; | 100 | if (IS_ENABLED(CONFIG_NO_HZ_FULL)) { |
101 | tick_nohz_full_setup(non_housekeeping_mask); | ||
102 | } else { | ||
103 | pr_warn("Housekeeping: nohz unsupported." | ||
104 | " Build with CONFIG_NO_HZ_FULL\n"); | ||
105 | free_bootmem_cpumask_var(non_housekeeping_mask); | ||
106 | return 0; | ||
107 | } | ||
108 | } | ||
86 | 109 | ||
87 | tick_nohz_full_setup(non_housekeeping_mask); | 110 | housekeeping_flags |= flags; |
88 | 111 | ||
89 | free_bootmem_cpumask_var(non_housekeeping_mask); | 112 | free_bootmem_cpumask_var(non_housekeeping_mask); |
90 | 113 | ||
91 | return 1; | 114 | return 1; |
92 | } | 115 | } |
116 | |||
117 | static int __init housekeeping_nohz_full_setup(char *str) | ||
118 | { | ||
119 | unsigned int flags; | ||
120 | |||
121 | flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC; | ||
122 | |||
123 | return housekeeping_setup(str, flags); | ||
124 | } | ||
93 | __setup("nohz_full=", housekeeping_nohz_full_setup); | 125 | __setup("nohz_full=", housekeeping_nohz_full_setup); |
94 | #endif | 126 | |
127 | static int __init housekeeping_isolcpus_setup(char *str) | ||
128 | { | ||
129 | return housekeeping_setup(str, HK_FLAG_DOMAIN); | ||
130 | } | ||
131 | __setup("isolcpus=", housekeeping_isolcpus_setup); | ||
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index e3d31b0880dc..2e6b9126ffdc 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c | |||
@@ -3,6 +3,7 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/mutex.h> | 5 | #include <linux/mutex.h> |
6 | #include <linux/sched/isolation.h> | ||
6 | 7 | ||
7 | #include "sched.h" | 8 | #include "sched.h" |
8 | 9 | ||
@@ -469,21 +470,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
469 | update_top_cache_domain(cpu); | 470 | update_top_cache_domain(cpu); |
470 | } | 471 | } |
471 | 472 | ||
472 | /* Setup the mask of CPUs configured for isolated domains */ | ||
473 | static int __init isolated_cpu_setup(char *str) | ||
474 | { | ||
475 | int ret; | ||
476 | |||
477 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | ||
478 | ret = cpulist_parse(str, cpu_isolated_map); | ||
479 | if (ret || cpumask_last(cpu_isolated_map) >= nr_cpu_ids) { | ||
480 | pr_err("sched: Error, all isolcpus= values must be between 0 and %u - ignoring them.\n", nr_cpu_ids-1); | ||
481 | return 0; | ||
482 | } | ||
483 | return 1; | ||
484 | } | ||
485 | __setup("isolcpus=", isolated_cpu_setup); | ||
486 | |||
487 | struct s_data { | 473 | struct s_data { |
488 | struct sched_domain ** __percpu sd; | 474 | struct sched_domain ** __percpu sd; |
489 | struct root_domain *rd; | 475 | struct root_domain *rd; |
@@ -1792,7 +1778,7 @@ int sched_init_domains(const struct cpumask *cpu_map) | |||
1792 | doms_cur = alloc_sched_domains(ndoms_cur); | 1778 | doms_cur = alloc_sched_domains(ndoms_cur); |
1793 | if (!doms_cur) | 1779 | if (!doms_cur) |
1794 | doms_cur = &fallback_doms; | 1780 | doms_cur = &fallback_doms; |
1795 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); | 1781 | cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN)); |
1796 | err = build_sched_domains(doms_cur[0], NULL); | 1782 | err = build_sched_domains(doms_cur[0], NULL); |
1797 | register_sched_domain_sysctl(); | 1783 | register_sched_domain_sysctl(); |
1798 | 1784 | ||
@@ -1875,7 +1861,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], | |||
1875 | doms_new = alloc_sched_domains(1); | 1861 | doms_new = alloc_sched_domains(1); |
1876 | if (doms_new) { | 1862 | if (doms_new) { |
1877 | n = 1; | 1863 | n = 1; |
1878 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); | 1864 | cpumask_and(doms_new[0], cpu_active_mask, |
1865 | housekeeping_cpumask(HK_FLAG_DOMAIN)); | ||
1879 | } | 1866 | } |
1880 | } else { | 1867 | } else { |
1881 | n = ndoms_new; | 1868 | n = ndoms_new; |
@@ -1898,7 +1885,8 @@ match1: | |||
1898 | if (!doms_new) { | 1885 | if (!doms_new) { |
1899 | n = 0; | 1886 | n = 0; |
1900 | doms_new = &fallback_doms; | 1887 | doms_new = &fallback_doms; |
1901 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); | 1888 | cpumask_and(doms_new[0], cpu_active_mask, |
1889 | housekeeping_cpumask(HK_FLAG_DOMAIN)); | ||
1902 | } | 1890 | } |
1903 | 1891 | ||
1904 | /* Build new domains: */ | 1892 | /* Build new domains: */ |