aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/cpusets.txt10
-rw-r--r--include/linux/sched.h2
-rw-r--r--init/main.c2
-rw-r--r--kernel/cgroup.c8
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/sched/core.c6
6 files changed, 29 insertions, 12 deletions
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
index f2235a162529..fdf7dff3f607 100644
--- a/Documentation/cgroups/cpusets.txt
+++ b/Documentation/cgroups/cpusets.txt
@@ -392,8 +392,10 @@ Put simply, it costs less to balance between two smaller sched domains
392than one big one, but doing so means that overloads in one of the 392than one big one, but doing so means that overloads in one of the
393two domains won't be load balanced to the other one. 393two domains won't be load balanced to the other one.
394 394
395By default, there is one sched domain covering all CPUs, except those 395By default, there is one sched domain covering all CPUs, including those
396marked isolated using the kernel boot time "isolcpus=" argument. 396marked isolated using the kernel boot time "isolcpus=" argument. However,
397the isolated CPUs will not participate in load balancing, and will not
398have tasks running on them unless explicitly assigned.
397 399
398This default load balancing across all CPUs is not well suited for 400This default load balancing across all CPUs is not well suited for
399the following two situations: 401the following two situations:
@@ -465,6 +467,10 @@ such partially load balanced cpusets, as they may be artificially
465constrained to some subset of the CPUs allowed to them, for lack of 467constrained to some subset of the CPUs allowed to them, for lack of
466load balancing to the other CPUs. 468load balancing to the other CPUs.
467 469
470CPUs in "cpuset.isolcpus" were excluded from load balancing by the
471isolcpus= kernel boot option, and will never be load balanced regardless
472of the value of "cpuset.sched_load_balance" in any cpuset.
473
4681.7.1 sched_load_balance implementation details. 4741.7.1 sched_load_balance implementation details.
469------------------------------------------------ 475------------------------------------------------
470 476
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3f3308824fa4..f74d4cc3a3e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -337,6 +337,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
337extern void init_idle(struct task_struct *idle, int cpu); 337extern void init_idle(struct task_struct *idle, int cpu);
338extern void init_idle_bootup_task(struct task_struct *idle); 338extern void init_idle_bootup_task(struct task_struct *idle);
339 339
340extern cpumask_var_t cpu_isolated_map;
341
340extern int runqueue_is_locked(int cpu); 342extern int runqueue_is_locked(int cpu);
341 343
342#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 344#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
diff --git a/init/main.c b/init/main.c
index 6f0f1c5ff8cc..4a6974e67839 100644
--- a/init/main.c
+++ b/init/main.c
@@ -654,8 +654,8 @@ asmlinkage __visible void __init start_kernel(void)
654 page_writeback_init(); 654 page_writeback_init();
655 proc_root_init(); 655 proc_root_init();
656 nsfs_init(); 656 nsfs_init();
657 cgroup_init();
658 cpuset_init(); 657 cpuset_init();
658 cgroup_init();
659 taskstats_init_early(); 659 taskstats_init_early();
660 delayacct_init(); 660 delayacct_init();
661 661
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 29a7b2cc593e..a220fdb66568 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3806,10 +3806,7 @@ static void *pidlist_allocate(int count)
3806 3806
3807static void pidlist_free(void *p) 3807static void pidlist_free(void *p)
3808{ 3808{
3809 if (is_vmalloc_addr(p)) 3809 kvfree(p);
3810 vfree(p);
3811 else
3812 kfree(p);
3813} 3810}
3814 3811
3815/* 3812/*
@@ -5040,6 +5037,9 @@ int __init cgroup_init(void)
5040 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes)); 5037 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5041 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes)); 5038 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5042 } 5039 }
5040
5041 if (ss->bind)
5042 ss->bind(init_css_set.subsys[ssid]);
5043 } 5043 }
5044 5044
5045 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); 5045 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fc7f4748d34a..c68f0721df10 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -622,6 +622,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
622 int csn; /* how many cpuset ptrs in csa so far */ 622 int csn; /* how many cpuset ptrs in csa so far */
623 int i, j, k; /* indices for partition finding loops */ 623 int i, j, k; /* indices for partition finding loops */
624 cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ 624 cpumask_var_t *doms; /* resulting partition; i.e. sched domains */
625 cpumask_var_t non_isolated_cpus; /* load balanced CPUs */
625 struct sched_domain_attr *dattr; /* attributes for custom domains */ 626 struct sched_domain_attr *dattr; /* attributes for custom domains */
626 int ndoms = 0; /* number of sched domains in result */ 627 int ndoms = 0; /* number of sched domains in result */
627 int nslot; /* next empty doms[] struct cpumask slot */ 628 int nslot; /* next empty doms[] struct cpumask slot */
@@ -631,6 +632,10 @@ static int generate_sched_domains(cpumask_var_t **domains,
631 dattr = NULL; 632 dattr = NULL;
632 csa = NULL; 633 csa = NULL;
633 634
635 if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL))
636 goto done;
637 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
638
634 /* Special case for the 99% of systems with one, full, sched domain */ 639 /* Special case for the 99% of systems with one, full, sched domain */
635 if (is_sched_load_balance(&top_cpuset)) { 640 if (is_sched_load_balance(&top_cpuset)) {
636 ndoms = 1; 641 ndoms = 1;
@@ -643,7 +648,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
643 *dattr = SD_ATTR_INIT; 648 *dattr = SD_ATTR_INIT;
644 update_domain_attr_tree(dattr, &top_cpuset); 649 update_domain_attr_tree(dattr, &top_cpuset);
645 } 650 }
646 cpumask_copy(doms[0], top_cpuset.effective_cpus); 651 cpumask_and(doms[0], top_cpuset.effective_cpus,
652 non_isolated_cpus);
647 653
648 goto done; 654 goto done;
649 } 655 }
@@ -666,7 +672,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
666 * the corresponding sched domain. 672 * the corresponding sched domain.
667 */ 673 */
668 if (!cpumask_empty(cp->cpus_allowed) && 674 if (!cpumask_empty(cp->cpus_allowed) &&
669 !is_sched_load_balance(cp)) 675 !(is_sched_load_balance(cp) &&
676 cpumask_intersects(cp->cpus_allowed, non_isolated_cpus)))
670 continue; 677 continue;
671 678
672 if (is_sched_load_balance(cp)) 679 if (is_sched_load_balance(cp))
@@ -748,6 +755,7 @@ restart:
748 755
749 if (apn == b->pn) { 756 if (apn == b->pn) {
750 cpumask_or(dp, dp, b->effective_cpus); 757 cpumask_or(dp, dp, b->effective_cpus);
758 cpumask_and(dp, dp, non_isolated_cpus);
751 if (dattr) 759 if (dattr)
752 update_domain_attr_tree(dattr + nslot, b); 760 update_domain_attr_tree(dattr + nslot, b);
753 761
@@ -760,6 +768,7 @@ restart:
760 BUG_ON(nslot != ndoms); 768 BUG_ON(nslot != ndoms);
761 769
762done: 770done:
771 free_cpumask_var(non_isolated_cpus);
763 kfree(csa); 772 kfree(csa);
764 773
765 /* 774 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 261af7bfcb67..2f7937ee9e3a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -306,6 +306,9 @@ __read_mostly int scheduler_running;
306 */ 306 */
307int sysctl_sched_rt_runtime = 950000; 307int sysctl_sched_rt_runtime = 950000;
308 308
309/* cpus with isolated domains */
310cpumask_var_t cpu_isolated_map;
311
309/* 312/*
310 * this_rq_lock - lock this runqueue and disable interrupts. 313 * this_rq_lock - lock this runqueue and disable interrupts.
311 */ 314 */
@@ -5811,9 +5814,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
5811 update_top_cache_domain(cpu); 5814 update_top_cache_domain(cpu);
5812} 5815}
5813 5816
5814/* cpus with isolated domains */
5815static cpumask_var_t cpu_isolated_map;
5816
5817/* Setup the mask of cpus configured for isolated domains */ 5817/* Setup the mask of cpus configured for isolated domains */
5818static int __init isolated_cpu_setup(char *str) 5818static int __init isolated_cpu_setup(char *str)
5819{ 5819{