diff options
-rw-r--r-- | Documentation/cgroups/cpusets.txt | 10 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | init/main.c | 2 | ||||
-rw-r--r-- | kernel/cgroup.c | 8 | ||||
-rw-r--r-- | kernel/cpuset.c | 13 | ||||
-rw-r--r-- | kernel/sched/core.c | 6 |
6 files changed, 29 insertions, 12 deletions
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index f2235a162529..fdf7dff3f607 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt | |||
@@ -392,8 +392,10 @@ Put simply, it costs less to balance between two smaller sched domains | |||
392 | than one big one, but doing so means that overloads in one of the | 392 | than one big one, but doing so means that overloads in one of the |
393 | two domains won't be load balanced to the other one. | 393 | two domains won't be load balanced to the other one. |
394 | 394 | ||
395 | By default, there is one sched domain covering all CPUs, except those | 395 | By default, there is one sched domain covering all CPUs, including those |
396 | marked isolated using the kernel boot time "isolcpus=" argument. | 396 | marked isolated using the kernel boot time "isolcpus=" argument. However, |
397 | the isolated CPUs will not participate in load balancing, and will not | ||
398 | have tasks running on them unless explicitly assigned. | ||
397 | 399 | ||
398 | This default load balancing across all CPUs is not well suited for | 400 | This default load balancing across all CPUs is not well suited for |
399 | the following two situations: | 401 | the following two situations: |
@@ -465,6 +467,10 @@ such partially load balanced cpusets, as they may be artificially | |||
465 | constrained to some subset of the CPUs allowed to them, for lack of | 467 | constrained to some subset of the CPUs allowed to them, for lack of |
466 | load balancing to the other CPUs. | 468 | load balancing to the other CPUs. |
467 | 469 | ||
470 | CPUs in "cpuset.isolcpus" were excluded from load balancing by the | ||
471 | isolcpus= kernel boot option, and will never be load balanced regardless | ||
472 | of the value of "cpuset.sched_load_balance" in any cpuset. | ||
473 | |||
468 | 1.7.1 sched_load_balance implementation details. | 474 | 1.7.1 sched_load_balance implementation details. |
469 | ------------------------------------------------ | 475 | ------------------------------------------------ |
470 | 476 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 3f3308824fa4..f74d4cc3a3e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -337,6 +337,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); | |||
337 | extern void init_idle(struct task_struct *idle, int cpu); | 337 | extern void init_idle(struct task_struct *idle, int cpu); |
338 | extern void init_idle_bootup_task(struct task_struct *idle); | 338 | extern void init_idle_bootup_task(struct task_struct *idle); |
339 | 339 | ||
340 | extern cpumask_var_t cpu_isolated_map; | ||
341 | |||
340 | extern int runqueue_is_locked(int cpu); | 342 | extern int runqueue_is_locked(int cpu); |
341 | 343 | ||
342 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | 344 | #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) |
diff --git a/init/main.c b/init/main.c index 6f0f1c5ff8cc..4a6974e67839 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -654,8 +654,8 @@ asmlinkage __visible void __init start_kernel(void) | |||
654 | page_writeback_init(); | 654 | page_writeback_init(); |
655 | proc_root_init(); | 655 | proc_root_init(); |
656 | nsfs_init(); | 656 | nsfs_init(); |
657 | cgroup_init(); | ||
658 | cpuset_init(); | 657 | cpuset_init(); |
658 | cgroup_init(); | ||
659 | taskstats_init_early(); | 659 | taskstats_init_early(); |
660 | delayacct_init(); | 660 | delayacct_init(); |
661 | 661 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 29a7b2cc593e..a220fdb66568 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3806,10 +3806,7 @@ static void *pidlist_allocate(int count) | |||
3806 | 3806 | ||
3807 | static void pidlist_free(void *p) | 3807 | static void pidlist_free(void *p) |
3808 | { | 3808 | { |
3809 | if (is_vmalloc_addr(p)) | 3809 | kvfree(p); |
3810 | vfree(p); | ||
3811 | else | ||
3812 | kfree(p); | ||
3813 | } | 3810 | } |
3814 | 3811 | ||
3815 | /* | 3812 | /* |
@@ -5040,6 +5037,9 @@ int __init cgroup_init(void) | |||
5040 | WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes)); | 5037 | WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes)); |
5041 | WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes)); | 5038 | WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes)); |
5042 | } | 5039 | } |
5040 | |||
5041 | if (ss->bind) | ||
5042 | ss->bind(init_css_set.subsys[ssid]); | ||
5043 | } | 5043 | } |
5044 | 5044 | ||
5045 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); | 5045 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index fc7f4748d34a..c68f0721df10 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -622,6 +622,7 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
622 | int csn; /* how many cpuset ptrs in csa so far */ | 622 | int csn; /* how many cpuset ptrs in csa so far */ |
623 | int i, j, k; /* indices for partition finding loops */ | 623 | int i, j, k; /* indices for partition finding loops */ |
624 | cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ | 624 | cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ |
625 | cpumask_var_t non_isolated_cpus; /* load balanced CPUs */ | ||
625 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | 626 | struct sched_domain_attr *dattr; /* attributes for custom domains */ |
626 | int ndoms = 0; /* number of sched domains in result */ | 627 | int ndoms = 0; /* number of sched domains in result */ |
627 | int nslot; /* next empty doms[] struct cpumask slot */ | 628 | int nslot; /* next empty doms[] struct cpumask slot */ |
@@ -631,6 +632,10 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
631 | dattr = NULL; | 632 | dattr = NULL; |
632 | csa = NULL; | 633 | csa = NULL; |
633 | 634 | ||
635 | if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL)) | ||
636 | goto done; | ||
637 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | ||
638 | |||
634 | /* Special case for the 99% of systems with one, full, sched domain */ | 639 | /* Special case for the 99% of systems with one, full, sched domain */ |
635 | if (is_sched_load_balance(&top_cpuset)) { | 640 | if (is_sched_load_balance(&top_cpuset)) { |
636 | ndoms = 1; | 641 | ndoms = 1; |
@@ -643,7 +648,8 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
643 | *dattr = SD_ATTR_INIT; | 648 | *dattr = SD_ATTR_INIT; |
644 | update_domain_attr_tree(dattr, &top_cpuset); | 649 | update_domain_attr_tree(dattr, &top_cpuset); |
645 | } | 650 | } |
646 | cpumask_copy(doms[0], top_cpuset.effective_cpus); | 651 | cpumask_and(doms[0], top_cpuset.effective_cpus, |
652 | non_isolated_cpus); | ||
647 | 653 | ||
648 | goto done; | 654 | goto done; |
649 | } | 655 | } |
@@ -666,7 +672,8 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
666 | * the corresponding sched domain. | 672 | * the corresponding sched domain. |
667 | */ | 673 | */ |
668 | if (!cpumask_empty(cp->cpus_allowed) && | 674 | if (!cpumask_empty(cp->cpus_allowed) && |
669 | !is_sched_load_balance(cp)) | 675 | !(is_sched_load_balance(cp) && |
676 | cpumask_intersects(cp->cpus_allowed, non_isolated_cpus))) | ||
670 | continue; | 677 | continue; |
671 | 678 | ||
672 | if (is_sched_load_balance(cp)) | 679 | if (is_sched_load_balance(cp)) |
@@ -748,6 +755,7 @@ restart: | |||
748 | 755 | ||
749 | if (apn == b->pn) { | 756 | if (apn == b->pn) { |
750 | cpumask_or(dp, dp, b->effective_cpus); | 757 | cpumask_or(dp, dp, b->effective_cpus); |
758 | cpumask_and(dp, dp, non_isolated_cpus); | ||
751 | if (dattr) | 759 | if (dattr) |
752 | update_domain_attr_tree(dattr + nslot, b); | 760 | update_domain_attr_tree(dattr + nslot, b); |
753 | 761 | ||
@@ -760,6 +768,7 @@ restart: | |||
760 | BUG_ON(nslot != ndoms); | 768 | BUG_ON(nslot != ndoms); |
761 | 769 | ||
762 | done: | 770 | done: |
771 | free_cpumask_var(non_isolated_cpus); | ||
763 | kfree(csa); | 772 | kfree(csa); |
764 | 773 | ||
765 | /* | 774 | /* |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 261af7bfcb67..2f7937ee9e3a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -306,6 +306,9 @@ __read_mostly int scheduler_running; | |||
306 | */ | 306 | */ |
307 | int sysctl_sched_rt_runtime = 950000; | 307 | int sysctl_sched_rt_runtime = 950000; |
308 | 308 | ||
309 | /* cpus with isolated domains */ | ||
310 | cpumask_var_t cpu_isolated_map; | ||
311 | |||
309 | /* | 312 | /* |
310 | * this_rq_lock - lock this runqueue and disable interrupts. | 313 | * this_rq_lock - lock this runqueue and disable interrupts. |
311 | */ | 314 | */ |
@@ -5811,9 +5814,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
5811 | update_top_cache_domain(cpu); | 5814 | update_top_cache_domain(cpu); |
5812 | } | 5815 | } |
5813 | 5816 | ||
5814 | /* cpus with isolated domains */ | ||
5815 | static cpumask_var_t cpu_isolated_map; | ||
5816 | |||
5817 | /* Setup the mask of cpus configured for isolated domains */ | 5817 | /* Setup the mask of cpus configured for isolated domains */ |
5818 | static int __init isolated_cpu_setup(char *str) | 5818 | static int __init isolated_cpu_setup(char *str) |
5819 | { | 5819 | { |