aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-01-07 11:51:07 -0500
committerTejun Heo <tj@kernel.org>2013-01-07 11:51:07 -0500
commit699140ba838dd3fa2c5cce474e14f194b09f91aa (patch)
treeafbdc601bebadc8230f5ae740bed2dc410e692d3 /kernel
parent3a5a6d0c2b0391e159fa5bf1dddb9bf1f35178a0 (diff)
cpuset: drop async_rebuild_sched_domains()
In general, we want to make cgroup_mutex one of the outermost locks and be able to use get_online_cpus() and friends from cgroup methods. With cpuset hotplug made async, get_online_cpus() can now be nested inside cgroup_mutex. Currently, cpuset avoids nesting get_online_cpus() inside cgroup_mutex by bouncing sched_domain rebuilding to a work item. As such nesting is allowed now, remove the workqueue bouncing code and always rebuild sched_domains synchronously. This also nests sched_domains_mutex inside cgroup_mutex, which is intended and should be okay. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c76
1 files changed, 16 insertions, 60 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 658eb1a32084..74e412f908db 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,14 +61,6 @@
61#include <linux/cgroup.h> 61#include <linux/cgroup.h>
62 62
63/* 63/*
64 * Workqueue for cpuset related tasks.
65 *
66 * Using kevent workqueue may cause deadlock when memory_migrate
67 * is set. So we create a separate workqueue thread for cpuset.
68 */
69static struct workqueue_struct *cpuset_wq;
70
71/*
72 * Tracks how many cpusets are currently defined in system. 64 * Tracks how many cpusets are currently defined in system.
73 * When there is only one cpuset (the root cpuset) we can 65 * When there is only one cpuset (the root cpuset) we can
74 * short circuit some hooks. 66 * short circuit some hooks.
@@ -753,25 +745,25 @@ done:
753/* 745/*
754 * Rebuild scheduler domains. 746 * Rebuild scheduler domains.
755 * 747 *
756 * Call with neither cgroup_mutex held nor within get_online_cpus(). 748 * If the flag 'sched_load_balance' of any cpuset with non-empty
757 * Takes both cgroup_mutex and get_online_cpus(). 749 * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
750 * which has that flag enabled, or if any cpuset with a non-empty
751 * 'cpus' is removed, then call this routine to rebuild the
752 * scheduler's dynamic sched domains.
758 * 753 *
759 * Cannot be directly called from cpuset code handling changes 754 * Call with cgroup_mutex held. Takes get_online_cpus().
760 * to the cpuset pseudo-filesystem, because it cannot be called
761 * from code that already holds cgroup_mutex.
762 */ 755 */
763static void do_rebuild_sched_domains(struct work_struct *unused) 756static void rebuild_sched_domains_locked(void)
764{ 757{
765 struct sched_domain_attr *attr; 758 struct sched_domain_attr *attr;
766 cpumask_var_t *doms; 759 cpumask_var_t *doms;
767 int ndoms; 760 int ndoms;
768 761
762 WARN_ON_ONCE(!cgroup_lock_is_held());
769 get_online_cpus(); 763 get_online_cpus();
770 764
771 /* Generate domain masks and attrs */ 765 /* Generate domain masks and attrs */
772 cgroup_lock();
773 ndoms = generate_sched_domains(&doms, &attr); 766 ndoms = generate_sched_domains(&doms, &attr);
774 cgroup_unlock();
775 767
776 /* Have scheduler rebuild the domains */ 768 /* Have scheduler rebuild the domains */
777 partition_sched_domains(ndoms, doms, attr); 769 partition_sched_domains(ndoms, doms, attr);
@@ -779,7 +771,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
779 put_online_cpus(); 771 put_online_cpus();
780} 772}
781#else /* !CONFIG_SMP */ 773#else /* !CONFIG_SMP */
782static void do_rebuild_sched_domains(struct work_struct *unused) 774static void rebuild_sched_domains_locked(void)
783{ 775{
784} 776}
785 777
@@ -791,44 +783,11 @@ static int generate_sched_domains(cpumask_var_t **domains,
791} 783}
792#endif /* CONFIG_SMP */ 784#endif /* CONFIG_SMP */
793 785
794static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
795
796/*
797 * Rebuild scheduler domains, asynchronously via workqueue.
798 *
799 * If the flag 'sched_load_balance' of any cpuset with non-empty
800 * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
801 * which has that flag enabled, or if any cpuset with a non-empty
802 * 'cpus' is removed, then call this routine to rebuild the
803 * scheduler's dynamic sched domains.
804 *
805 * The rebuild_sched_domains() and partition_sched_domains()
806 * routines must nest cgroup_lock() inside get_online_cpus(),
807 * but such cpuset changes as these must nest that locking the
808 * other way, holding cgroup_lock() for much of the code.
809 *
810 * So in order to avoid an ABBA deadlock, the cpuset code handling
811 * these user changes delegates the actual sched domain rebuilding
812 * to a separate workqueue thread, which ends up processing the
813 * above do_rebuild_sched_domains() function.
814 */
815static void async_rebuild_sched_domains(void)
816{
817 queue_work(cpuset_wq, &rebuild_sched_domains_work);
818}
819
820/*
821 * Accomplishes the same scheduler domain rebuild as the above
822 * async_rebuild_sched_domains(), however it directly calls the
823 * rebuild routine synchronously rather than calling it via an
824 * asynchronous work thread.
825 *
826 * This can only be called from code that is not holding
827 * cgroup_mutex (not nested in a cgroup_lock() call.)
828 */
829void rebuild_sched_domains(void) 786void rebuild_sched_domains(void)
830{ 787{
831 do_rebuild_sched_domains(NULL); 788 cgroup_lock();
789 rebuild_sched_domains_locked();
790 cgroup_unlock();
832} 791}
833 792
834/** 793/**
@@ -948,7 +907,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
948 heap_free(&heap); 907 heap_free(&heap);
949 908
950 if (is_load_balanced) 909 if (is_load_balanced)
951 async_rebuild_sched_domains(); 910 rebuild_sched_domains_locked();
952 return 0; 911 return 0;
953} 912}
954 913
@@ -1196,7 +1155,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1196 cs->relax_domain_level = val; 1155 cs->relax_domain_level = val;
1197 if (!cpumask_empty(cs->cpus_allowed) && 1156 if (!cpumask_empty(cs->cpus_allowed) &&
1198 is_sched_load_balance(cs)) 1157 is_sched_load_balance(cs))
1199 async_rebuild_sched_domains(); 1158 rebuild_sched_domains_locked();
1200 } 1159 }
1201 1160
1202 return 0; 1161 return 0;
@@ -1288,7 +1247,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1288 mutex_unlock(&callback_mutex); 1247 mutex_unlock(&callback_mutex);
1289 1248
1290 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) 1249 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1291 async_rebuild_sched_domains(); 1250 rebuild_sched_domains_locked();
1292 1251
1293 if (spread_flag_changed) 1252 if (spread_flag_changed)
1294 update_tasks_flags(cs, &heap); 1253 update_tasks_flags(cs, &heap);
@@ -1925,7 +1884,7 @@ static void cpuset_css_offline(struct cgroup *cgrp)
1925/* 1884/*
1926 * If the cpuset being removed has its flag 'sched_load_balance' 1885 * If the cpuset being removed has its flag 'sched_load_balance'
1927 * enabled, then simulate turning sched_load_balance off, which 1886 * enabled, then simulate turning sched_load_balance off, which
1928 * will call async_rebuild_sched_domains(). 1887 * will call rebuild_sched_domains_locked().
1929 */ 1888 */
1930 1889
1931static void cpuset_css_free(struct cgroup *cont) 1890static void cpuset_css_free(struct cgroup *cont)
@@ -2237,9 +2196,6 @@ void __init cpuset_init_smp(void)
2237 top_cpuset.mems_allowed = node_states[N_MEMORY]; 2196 top_cpuset.mems_allowed = node_states[N_MEMORY];
2238 2197
2239 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2198 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2240
2241 cpuset_wq = create_singlethread_workqueue("cpuset");
2242 BUG_ON(!cpuset_wq);
2243} 2199}
2244 2200
2245/** 2201/**