diff options
-rw-r--r-- | Documentation/cpusets.txt | 17 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/cpuset.c | 84 | ||||
-rw-r--r-- | kernel/sched.c | 29 |
4 files changed, 2 insertions, 131 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index b875d231ac74..ec9de6917f01 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt | |||
@@ -87,9 +87,6 @@ This can be especially valuable on: | |||
87 | and a database), or | 87 | and a database), or |
88 | * NUMA systems running large HPC applications with demanding | 88 | * NUMA systems running large HPC applications with demanding |
89 | performance characteristics. | 89 | performance characteristics. |
90 | * Also cpu_exclusive cpusets are useful for servers running orthogonal | ||
91 | workloads such as RT applications requiring low latency and HPC | ||
92 | applications that are throughput sensitive | ||
93 | 90 | ||
94 | These subsets, or "soft partitions" must be able to be dynamically | 91 | These subsets, or "soft partitions" must be able to be dynamically |
95 | adjusted, as the job mix changes, without impacting other concurrently | 92 | adjusted, as the job mix changes, without impacting other concurrently |
@@ -132,8 +129,6 @@ Cpusets extends these two mechanisms as follows: | |||
132 | - A cpuset may be marked exclusive, which ensures that no other | 129 | - A cpuset may be marked exclusive, which ensures that no other |
133 | cpuset (except direct ancestors and descendents) may contain | 130 | cpuset (except direct ancestors and descendents) may contain |
134 | any overlapping CPUs or Memory Nodes. | 131 | any overlapping CPUs or Memory Nodes. |
135 | Also a cpu_exclusive cpuset would be associated with a sched | ||
136 | domain. | ||
137 | - You can list all the tasks (by pid) attached to any cpuset. | 132 | - You can list all the tasks (by pid) attached to any cpuset. |
138 | 133 | ||
139 | The implementation of cpusets requires a few, simple hooks | 134 | The implementation of cpusets requires a few, simple hooks |
@@ -145,9 +140,6 @@ into the rest of the kernel, none in performance critical paths: | |||
145 | allowed in that tasks cpuset. | 140 | allowed in that tasks cpuset. |
146 | - in sched.c migrate_all_tasks(), to keep migrating tasks within | 141 | - in sched.c migrate_all_tasks(), to keep migrating tasks within |
147 | the CPUs allowed by their cpuset, if possible. | 142 | the CPUs allowed by their cpuset, if possible. |
148 | - in sched.c, a new API partition_sched_domains for handling | ||
149 | sched domain changes associated with cpu_exclusive cpusets | ||
150 | and related changes in both sched.c and arch/ia64/kernel/domain.c | ||
151 | - in the mbind and set_mempolicy system calls, to mask the requested | 143 | - in the mbind and set_mempolicy system calls, to mask the requested |
152 | Memory Nodes by what's allowed in that tasks cpuset. | 144 | Memory Nodes by what's allowed in that tasks cpuset. |
153 | - in page_alloc.c, to restrict memory to allowed nodes. | 145 | - in page_alloc.c, to restrict memory to allowed nodes. |
@@ -232,15 +224,6 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than | |||
232 | a direct ancestor or descendent, may share any of the same CPUs or | 224 | a direct ancestor or descendent, may share any of the same CPUs or |
233 | Memory Nodes. | 225 | Memory Nodes. |
234 | 226 | ||
235 | A cpuset that is cpu_exclusive has a scheduler (sched) domain | ||
236 | associated with it. The sched domain consists of all CPUs in the | ||
237 | current cpuset that are not part of any exclusive child cpusets. | ||
238 | This ensures that the scheduler load balancing code only balances | ||
239 | against the CPUs that are in the sched domain as defined above and | ||
240 | not all of the CPUs in the system. This removes any overhead due to | ||
241 | load balancing code trying to pull tasks outside of the cpu_exclusive | ||
242 | cpuset only to be prevented by the tasks' cpus_allowed mask. | ||
243 | |||
244 | A cpuset that is mem_exclusive restricts kernel allocations for | 227 | A cpuset that is mem_exclusive restricts kernel allocations for |
245 | page, buffer and other data commonly shared by the kernel across | 228 | page, buffer and other data commonly shared by the kernel across |
246 | multiple users. All cpusets, whether mem_exclusive or not, restrict | 229 | multiple users. All cpusets, whether mem_exclusive or not, restrict |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 285ee4827a3c..592e3a55f818 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -729,9 +729,6 @@ struct sched_domain { | |||
729 | #endif | 729 | #endif |
730 | }; | 730 | }; |
731 | 731 | ||
732 | extern int partition_sched_domains(cpumask_t *partition1, | ||
733 | cpumask_t *partition2); | ||
734 | |||
735 | #endif /* CONFIG_SMP */ | 732 | #endif /* CONFIG_SMP */ |
736 | 733 | ||
737 | /* | 734 | /* |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e196510aa40f..0864f4097930 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -755,68 +755,13 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
755 | } | 755 | } |
756 | 756 | ||
757 | /* | 757 | /* |
758 | * For a given cpuset cur, partition the system as follows | ||
759 | * a. All cpus in the parent cpuset's cpus_allowed that are not part of any | ||
760 | * exclusive child cpusets | ||
761 | * b. All cpus in the current cpuset's cpus_allowed that are not part of any | ||
762 | * exclusive child cpusets | ||
763 | * Build these two partitions by calling partition_sched_domains | ||
764 | * | ||
765 | * Call with manage_mutex held. May nest a call to the | ||
766 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. | ||
767 | * Must not be called holding callback_mutex, because we must | ||
768 | * not call lock_cpu_hotplug() while holding callback_mutex. | ||
769 | */ | ||
770 | |||
771 | static void update_cpu_domains(struct cpuset *cur) | ||
772 | { | ||
773 | struct cpuset *c, *par = cur->parent; | ||
774 | cpumask_t pspan, cspan; | ||
775 | |||
776 | if (par == NULL || cpus_empty(cur->cpus_allowed)) | ||
777 | return; | ||
778 | |||
779 | /* | ||
780 | * Get all cpus from parent's cpus_allowed not part of exclusive | ||
781 | * children | ||
782 | */ | ||
783 | pspan = par->cpus_allowed; | ||
784 | list_for_each_entry(c, &par->children, sibling) { | ||
785 | if (is_cpu_exclusive(c)) | ||
786 | cpus_andnot(pspan, pspan, c->cpus_allowed); | ||
787 | } | ||
788 | if (!is_cpu_exclusive(cur)) { | ||
789 | cpus_or(pspan, pspan, cur->cpus_allowed); | ||
790 | if (cpus_equal(pspan, cur->cpus_allowed)) | ||
791 | return; | ||
792 | cspan = CPU_MASK_NONE; | ||
793 | } else { | ||
794 | if (cpus_empty(pspan)) | ||
795 | return; | ||
796 | cspan = cur->cpus_allowed; | ||
797 | /* | ||
798 | * Get all cpus from current cpuset's cpus_allowed not part | ||
799 | * of exclusive children | ||
800 | */ | ||
801 | list_for_each_entry(c, &cur->children, sibling) { | ||
802 | if (is_cpu_exclusive(c)) | ||
803 | cpus_andnot(cspan, cspan, c->cpus_allowed); | ||
804 | } | ||
805 | } | ||
806 | |||
807 | lock_cpu_hotplug(); | ||
808 | partition_sched_domains(&pspan, &cspan); | ||
809 | unlock_cpu_hotplug(); | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * Call with manage_mutex held. May take callback_mutex during call. | 758 | * Call with manage_mutex held. May take callback_mutex during call. |
814 | */ | 759 | */ |
815 | 760 | ||
816 | static int update_cpumask(struct cpuset *cs, char *buf) | 761 | static int update_cpumask(struct cpuset *cs, char *buf) |
817 | { | 762 | { |
818 | struct cpuset trialcs; | 763 | struct cpuset trialcs; |
819 | int retval, cpus_unchanged; | 764 | int retval; |
820 | 765 | ||
821 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ | 766 | /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ |
822 | if (cs == &top_cpuset) | 767 | if (cs == &top_cpuset) |
@@ -843,12 +788,9 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
843 | retval = validate_change(cs, &trialcs); | 788 | retval = validate_change(cs, &trialcs); |
844 | if (retval < 0) | 789 | if (retval < 0) |
845 | return retval; | 790 | return retval; |
846 | cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); | ||
847 | mutex_lock(&callback_mutex); | 791 | mutex_lock(&callback_mutex); |
848 | cs->cpus_allowed = trialcs.cpus_allowed; | 792 | cs->cpus_allowed = trialcs.cpus_allowed; |
849 | mutex_unlock(&callback_mutex); | 793 | mutex_unlock(&callback_mutex); |
850 | if (is_cpu_exclusive(cs) && !cpus_unchanged) | ||
851 | update_cpu_domains(cs); | ||
852 | return 0; | 794 | return 0; |
853 | } | 795 | } |
854 | 796 | ||
@@ -1085,7 +1027,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) | |||
1085 | { | 1027 | { |
1086 | int turning_on; | 1028 | int turning_on; |
1087 | struct cpuset trialcs; | 1029 | struct cpuset trialcs; |
1088 | int err, cpu_exclusive_changed; | 1030 | int err; |
1089 | 1031 | ||
1090 | turning_on = (simple_strtoul(buf, NULL, 10) != 0); | 1032 | turning_on = (simple_strtoul(buf, NULL, 10) != 0); |
1091 | 1033 | ||
@@ -1098,14 +1040,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) | |||
1098 | err = validate_change(cs, &trialcs); | 1040 | err = validate_change(cs, &trialcs); |
1099 | if (err < 0) | 1041 | if (err < 0) |
1100 | return err; | 1042 | return err; |
1101 | cpu_exclusive_changed = | ||
1102 | (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); | ||
1103 | mutex_lock(&callback_mutex); | 1043 | mutex_lock(&callback_mutex); |
1104 | cs->flags = trialcs.flags; | 1044 | cs->flags = trialcs.flags; |
1105 | mutex_unlock(&callback_mutex); | 1045 | mutex_unlock(&callback_mutex); |
1106 | 1046 | ||
1107 | if (cpu_exclusive_changed) | ||
1108 | update_cpu_domains(cs); | ||
1109 | return 0; | 1047 | return 0; |
1110 | } | 1048 | } |
1111 | 1049 | ||
@@ -1965,17 +1903,6 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
1965 | return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); | 1903 | return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); |
1966 | } | 1904 | } |
1967 | 1905 | ||
1968 | /* | ||
1969 | * Locking note on the strange update_flag() call below: | ||
1970 | * | ||
1971 | * If the cpuset being removed is marked cpu_exclusive, then simulate | ||
1972 | * turning cpu_exclusive off, which will call update_cpu_domains(). | ||
1973 | * The lock_cpu_hotplug() call in update_cpu_domains() must not be | ||
1974 | * made while holding callback_mutex. Elsewhere the kernel nests | ||
1975 | * callback_mutex inside lock_cpu_hotplug() calls. So the reverse | ||
1976 | * nesting would risk an ABBA deadlock. | ||
1977 | */ | ||
1978 | |||
1979 | static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | 1906 | static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) |
1980 | { | 1907 | { |
1981 | struct cpuset *cs = dentry->d_fsdata; | 1908 | struct cpuset *cs = dentry->d_fsdata; |
@@ -1995,13 +1922,6 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
1995 | mutex_unlock(&manage_mutex); | 1922 | mutex_unlock(&manage_mutex); |
1996 | return -EBUSY; | 1923 | return -EBUSY; |
1997 | } | 1924 | } |
1998 | if (is_cpu_exclusive(cs)) { | ||
1999 | int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); | ||
2000 | if (retval < 0) { | ||
2001 | mutex_unlock(&manage_mutex); | ||
2002 | return retval; | ||
2003 | } | ||
2004 | } | ||
2005 | parent = cs->parent; | 1925 | parent = cs->parent; |
2006 | mutex_lock(&callback_mutex); | 1926 | mutex_lock(&callback_mutex); |
2007 | set_bit(CS_REMOVED, &cs->flags); | 1927 | set_bit(CS_REMOVED, &cs->flags); |
diff --git a/kernel/sched.c b/kernel/sched.c index 78c8fbd373a3..0da2b2635c54 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -6348,35 +6348,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
6348 | arch_destroy_sched_domains(cpu_map); | 6348 | arch_destroy_sched_domains(cpu_map); |
6349 | } | 6349 | } |
6350 | 6350 | ||
6351 | /* | ||
6352 | * Partition sched domains as specified by the cpumasks below. | ||
6353 | * This attaches all cpus from the cpumasks to the NULL domain, | ||
6354 | * waits for a RCU quiescent period, recalculates sched | ||
6355 | * domain information and then attaches them back to the | ||
6356 | * correct sched domains | ||
6357 | * Call with hotplug lock held | ||
6358 | */ | ||
6359 | int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) | ||
6360 | { | ||
6361 | cpumask_t change_map; | ||
6362 | int err = 0; | ||
6363 | |||
6364 | cpus_and(*partition1, *partition1, cpu_online_map); | ||
6365 | cpus_and(*partition2, *partition2, cpu_online_map); | ||
6366 | cpus_or(change_map, *partition1, *partition2); | ||
6367 | |||
6368 | /* Detach sched domains from all of the affected cpus */ | ||
6369 | detach_destroy_domains(&change_map); | ||
6370 | if (!cpus_empty(*partition1)) | ||
6371 | err = build_sched_domains(partition1); | ||
6372 | if (!err && !cpus_empty(*partition2)) | ||
6373 | err = build_sched_domains(partition2); | ||
6374 | |||
6375 | register_sched_domain_sysctl(); | ||
6376 | |||
6377 | return err; | ||
6378 | } | ||
6379 | |||
6380 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 6351 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
6381 | static int arch_reinit_sched_domains(void) | 6352 | static int arch_reinit_sched_domains(void) |
6382 | { | 6353 | { |