diff options
author | Tejun Heo <tj@kernel.org> | 2013-01-07 11:51:07 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-01-07 11:51:07 -0500 |
commit | 699140ba838dd3fa2c5cce474e14f194b09f91aa (patch) | |
tree | afbdc601bebadc8230f5ae740bed2dc410e692d3 /kernel | |
parent | 3a5a6d0c2b0391e159fa5bf1dddb9bf1f35178a0 (diff) |
cpuset: drop async_rebuild_sched_domains()
In general, we want to make cgroup_mutex one of the outermost locks
and be able to use get_online_cpus() and friends from cgroup methods.
With cpuset hotplug made async, get_online_cpus() can now be nested
inside cgroup_mutex.
Currently, cpuset avoids nesting get_online_cpus() inside cgroup_mutex
by bouncing sched_domain rebuilding to a work item. As such nesting
is allowed now, remove the workqueue bouncing code and always rebuild
sched_domains synchronously. This also nests sched_domains_mutex
inside cgroup_mutex, which is intended and should be okay.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 76 |
1 files changed, 16 insertions, 60 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 658eb1a32084..74e412f908db 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -61,14 +61,6 @@ | |||
61 | #include <linux/cgroup.h> | 61 | #include <linux/cgroup.h> |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Workqueue for cpuset related tasks. | ||
65 | * | ||
66 | * Using kevent workqueue may cause deadlock when memory_migrate | ||
67 | * is set. So we create a separate workqueue thread for cpuset. | ||
68 | */ | ||
69 | static struct workqueue_struct *cpuset_wq; | ||
70 | |||
71 | /* | ||
72 | * Tracks how many cpusets are currently defined in system. | 64 | * Tracks how many cpusets are currently defined in system. |
73 | * When there is only one cpuset (the root cpuset) we can | 65 | * When there is only one cpuset (the root cpuset) we can |
74 | * short circuit some hooks. | 66 | * short circuit some hooks. |
@@ -753,25 +745,25 @@ done: | |||
753 | /* | 745 | /* |
754 | * Rebuild scheduler domains. | 746 | * Rebuild scheduler domains. |
755 | * | 747 | * |
756 | * Call with neither cgroup_mutex held nor within get_online_cpus(). | 748 | * If the flag 'sched_load_balance' of any cpuset with non-empty |
757 | * Takes both cgroup_mutex and get_online_cpus(). | 749 | * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset |
750 | * which has that flag enabled, or if any cpuset with a non-empty | ||
751 | * 'cpus' is removed, then call this routine to rebuild the | ||
752 | * scheduler's dynamic sched domains. | ||
758 | * | 753 | * |
759 | * Cannot be directly called from cpuset code handling changes | 754 | * Call with cgroup_mutex held. Takes get_online_cpus(). |
760 | * to the cpuset pseudo-filesystem, because it cannot be called | ||
761 | * from code that already holds cgroup_mutex. | ||
762 | */ | 755 | */ |
763 | static void do_rebuild_sched_domains(struct work_struct *unused) | 756 | static void rebuild_sched_domains_locked(void) |
764 | { | 757 | { |
765 | struct sched_domain_attr *attr; | 758 | struct sched_domain_attr *attr; |
766 | cpumask_var_t *doms; | 759 | cpumask_var_t *doms; |
767 | int ndoms; | 760 | int ndoms; |
768 | 761 | ||
762 | WARN_ON_ONCE(!cgroup_lock_is_held()); | ||
769 | get_online_cpus(); | 763 | get_online_cpus(); |
770 | 764 | ||
771 | /* Generate domain masks and attrs */ | 765 | /* Generate domain masks and attrs */ |
772 | cgroup_lock(); | ||
773 | ndoms = generate_sched_domains(&doms, &attr); | 766 | ndoms = generate_sched_domains(&doms, &attr); |
774 | cgroup_unlock(); | ||
775 | 767 | ||
776 | /* Have scheduler rebuild the domains */ | 768 | /* Have scheduler rebuild the domains */ |
777 | partition_sched_domains(ndoms, doms, attr); | 769 | partition_sched_domains(ndoms, doms, attr); |
@@ -779,7 +771,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused) | |||
779 | put_online_cpus(); | 771 | put_online_cpus(); |
780 | } | 772 | } |
781 | #else /* !CONFIG_SMP */ | 773 | #else /* !CONFIG_SMP */ |
782 | static void do_rebuild_sched_domains(struct work_struct *unused) | 774 | static void rebuild_sched_domains_locked(void) |
783 | { | 775 | { |
784 | } | 776 | } |
785 | 777 | ||
@@ -791,44 +783,11 @@ static int generate_sched_domains(cpumask_var_t **domains, | |||
791 | } | 783 | } |
792 | #endif /* CONFIG_SMP */ | 784 | #endif /* CONFIG_SMP */ |
793 | 785 | ||
794 | static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains); | ||
795 | |||
796 | /* | ||
797 | * Rebuild scheduler domains, asynchronously via workqueue. | ||
798 | * | ||
799 | * If the flag 'sched_load_balance' of any cpuset with non-empty | ||
800 | * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset | ||
801 | * which has that flag enabled, or if any cpuset with a non-empty | ||
802 | * 'cpus' is removed, then call this routine to rebuild the | ||
803 | * scheduler's dynamic sched domains. | ||
804 | * | ||
805 | * The rebuild_sched_domains() and partition_sched_domains() | ||
806 | * routines must nest cgroup_lock() inside get_online_cpus(), | ||
807 | * but such cpuset changes as these must nest that locking the | ||
808 | * other way, holding cgroup_lock() for much of the code. | ||
809 | * | ||
810 | * So in order to avoid an ABBA deadlock, the cpuset code handling | ||
811 | * these user changes delegates the actual sched domain rebuilding | ||
812 | * to a separate workqueue thread, which ends up processing the | ||
813 | * above do_rebuild_sched_domains() function. | ||
814 | */ | ||
815 | static void async_rebuild_sched_domains(void) | ||
816 | { | ||
817 | queue_work(cpuset_wq, &rebuild_sched_domains_work); | ||
818 | } | ||
819 | |||
820 | /* | ||
821 | * Accomplishes the same scheduler domain rebuild as the above | ||
822 | * async_rebuild_sched_domains(), however it directly calls the | ||
823 | * rebuild routine synchronously rather than calling it via an | ||
824 | * asynchronous work thread. | ||
825 | * | ||
826 | * This can only be called from code that is not holding | ||
827 | * cgroup_mutex (not nested in a cgroup_lock() call.) | ||
828 | */ | ||
829 | void rebuild_sched_domains(void) | 786 | void rebuild_sched_domains(void) |
830 | { | 787 | { |
831 | do_rebuild_sched_domains(NULL); | 788 | cgroup_lock(); |
789 | rebuild_sched_domains_locked(); | ||
790 | cgroup_unlock(); | ||
832 | } | 791 | } |
833 | 792 | ||
834 | /** | 793 | /** |
@@ -948,7 +907,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
948 | heap_free(&heap); | 907 | heap_free(&heap); |
949 | 908 | ||
950 | if (is_load_balanced) | 909 | if (is_load_balanced) |
951 | async_rebuild_sched_domains(); | 910 | rebuild_sched_domains_locked(); |
952 | return 0; | 911 | return 0; |
953 | } | 912 | } |
954 | 913 | ||
@@ -1196,7 +1155,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1196 | cs->relax_domain_level = val; | 1155 | cs->relax_domain_level = val; |
1197 | if (!cpumask_empty(cs->cpus_allowed) && | 1156 | if (!cpumask_empty(cs->cpus_allowed) && |
1198 | is_sched_load_balance(cs)) | 1157 | is_sched_load_balance(cs)) |
1199 | async_rebuild_sched_domains(); | 1158 | rebuild_sched_domains_locked(); |
1200 | } | 1159 | } |
1201 | 1160 | ||
1202 | return 0; | 1161 | return 0; |
@@ -1288,7 +1247,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1288 | mutex_unlock(&callback_mutex); | 1247 | mutex_unlock(&callback_mutex); |
1289 | 1248 | ||
1290 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | 1249 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
1291 | async_rebuild_sched_domains(); | 1250 | rebuild_sched_domains_locked(); |
1292 | 1251 | ||
1293 | if (spread_flag_changed) | 1252 | if (spread_flag_changed) |
1294 | update_tasks_flags(cs, &heap); | 1253 | update_tasks_flags(cs, &heap); |
@@ -1925,7 +1884,7 @@ static void cpuset_css_offline(struct cgroup *cgrp) | |||
1925 | /* | 1884 | /* |
1926 | * If the cpuset being removed has its flag 'sched_load_balance' | 1885 | * If the cpuset being removed has its flag 'sched_load_balance' |
1927 | * enabled, then simulate turning sched_load_balance off, which | 1886 | * enabled, then simulate turning sched_load_balance off, which |
1928 | * will call async_rebuild_sched_domains(). | 1887 | * will call rebuild_sched_domains_locked(). |
1929 | */ | 1888 | */ |
1930 | 1889 | ||
1931 | static void cpuset_css_free(struct cgroup *cont) | 1890 | static void cpuset_css_free(struct cgroup *cont) |
@@ -2237,9 +2196,6 @@ void __init cpuset_init_smp(void) | |||
2237 | top_cpuset.mems_allowed = node_states[N_MEMORY]; | 2196 | top_cpuset.mems_allowed = node_states[N_MEMORY]; |
2238 | 2197 | ||
2239 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); | 2198 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); |
2240 | |||
2241 | cpuset_wq = create_singlethread_workqueue("cpuset"); | ||
2242 | BUG_ON(!cpuset_wq); | ||
2243 | } | 2199 | } |
2244 | 2200 | ||
2245 | /** | 2201 | /** |