diff options
author | Prateek Sood <prsood@codeaurora.org> | 2017-11-15 09:20:14 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2017-11-27 11:48:10 -0500 |
commit | aa24163b2ee5c92120e32e99b5a93143a0f4258e (patch) | |
tree | cb52cad2feec49453ae62e1f5a9c72ee7c7a85cb /kernel/cgroup | |
parent | 4fbd8d194f06c8a3fd2af1ce560ddb31f7ec8323 (diff) |
cgroup/cpuset: remove circular dependency deadlock
Remove circular dependency deadlock in a scenario where hotplug of CPU is
being done while there is updation in cgroup and cpuset triggered from
userspace.
Process A => kthreadd => Process B => Process C => Process A
Process A
cpu_subsys_offline();
cpu_down();
_cpu_down();
percpu_down_write(&cpu_hotplug_lock); //held
cpuhp_invoke_callback();
workqueue_offline_cpu();
queue_work_on(); // unbind_work on system_highpri_wq
__queue_work();
insert_work();
wake_up_worker();
flush_work();
wait_for_completion();
worker_thread();
manage_workers();
create_worker();
kthread_create_on_node();
wake_up_process(kthreadd_task);
kthreadd
kthreadd();
kernel_thread();
do_fork();
copy_process();
percpu_down_read(&cgroup_threadgroup_rwsem);
__rwsem_down_read_failed_common(); //waiting
Process B
kernfs_fop_write();
cgroup_file_write();
cgroup_procs_write();
percpu_down_write(&cgroup_threadgroup_rwsem); //held
cgroup_attach_task();
cgroup_migrate();
cgroup_migrate_execute();
cpuset_can_attach();
mutex_lock(&cpuset_mutex); //waiting
Process C
kernfs_fop_write();
cgroup_file_write();
cpuset_write_resmask();
mutex_lock(&cpuset_mutex); //held
update_cpumask();
update_cpumasks_hier();
rebuild_sched_domains_locked();
get_online_cpus();
percpu_down_read(&cpu_hotplug_lock); //waiting
Eliminating deadlock by reversing the locking order for cpuset_mutex and
cpu_hotplug_lock.
Signed-off-by: Prateek Sood <prsood@codeaurora.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup')
-rw-r--r-- | kernel/cgroup/cpuset.c | 53 |
1 files changed, 30 insertions, 23 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f7efa7b4d825..cab5fd1ee767 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
@@ -812,6 +812,18 @@ done: | |||
812 | return ndoms; | 812 | return ndoms; |
813 | } | 813 | } |
814 | 814 | ||
815 | static void cpuset_sched_change_begin(void) | ||
816 | { | ||
817 | cpus_read_lock(); | ||
818 | mutex_lock(&cpuset_mutex); | ||
819 | } | ||
820 | |||
821 | static void cpuset_sched_change_end(void) | ||
822 | { | ||
823 | mutex_unlock(&cpuset_mutex); | ||
824 | cpus_read_unlock(); | ||
825 | } | ||
826 | |||
815 | /* | 827 | /* |
816 | * Rebuild scheduler domains. | 828 | * Rebuild scheduler domains. |
817 | * | 829 | * |
@@ -821,16 +833,14 @@ done: | |||
821 | * 'cpus' is removed, then call this routine to rebuild the | 833 | * 'cpus' is removed, then call this routine to rebuild the |
822 | * scheduler's dynamic sched domains. | 834 | * scheduler's dynamic sched domains. |
823 | * | 835 | * |
824 | * Call with cpuset_mutex held. Takes get_online_cpus(). | ||
825 | */ | 836 | */ |
826 | static void rebuild_sched_domains_locked(void) | 837 | static void rebuild_sched_domains_cpuslocked(void) |
827 | { | 838 | { |
828 | struct sched_domain_attr *attr; | 839 | struct sched_domain_attr *attr; |
829 | cpumask_var_t *doms; | 840 | cpumask_var_t *doms; |
830 | int ndoms; | 841 | int ndoms; |
831 | 842 | ||
832 | lockdep_assert_held(&cpuset_mutex); | 843 | lockdep_assert_held(&cpuset_mutex); |
833 | get_online_cpus(); | ||
834 | 844 | ||
835 | /* | 845 | /* |
836 | * We have raced with CPU hotplug. Don't do anything to avoid | 846 | * We have raced with CPU hotplug. Don't do anything to avoid |
@@ -838,27 +848,25 @@ static void rebuild_sched_domains_locked(void) | |||
838 | * Anyways, hotplug work item will rebuild sched domains. | 848 | * Anyways, hotplug work item will rebuild sched domains. |
839 | */ | 849 | */ |
840 | if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) | 850 | if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) |
841 | goto out; | 851 | return; |
842 | 852 | ||
843 | /* Generate domain masks and attrs */ | 853 | /* Generate domain masks and attrs */ |
844 | ndoms = generate_sched_domains(&doms, &attr); | 854 | ndoms = generate_sched_domains(&doms, &attr); |
845 | 855 | ||
846 | /* Have scheduler rebuild the domains */ | 856 | /* Have scheduler rebuild the domains */ |
847 | partition_sched_domains(ndoms, doms, attr); | 857 | partition_sched_domains(ndoms, doms, attr); |
848 | out: | ||
849 | put_online_cpus(); | ||
850 | } | 858 | } |
851 | #else /* !CONFIG_SMP */ | 859 | #else /* !CONFIG_SMP */ |
852 | static void rebuild_sched_domains_locked(void) | 860 | static void rebuild_sched_domains_cpuslocked(void) |
853 | { | 861 | { |
854 | } | 862 | } |
855 | #endif /* CONFIG_SMP */ | 863 | #endif /* CONFIG_SMP */ |
856 | 864 | ||
857 | void rebuild_sched_domains(void) | 865 | void rebuild_sched_domains(void) |
858 | { | 866 | { |
859 | mutex_lock(&cpuset_mutex); | 867 | cpuset_sched_change_begin(); |
860 | rebuild_sched_domains_locked(); | 868 | rebuild_sched_domains_cpuslocked(); |
861 | mutex_unlock(&cpuset_mutex); | 869 | cpuset_sched_change_end(); |
862 | } | 870 | } |
863 | 871 | ||
864 | /** | 872 | /** |
@@ -944,7 +952,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) | |||
944 | rcu_read_unlock(); | 952 | rcu_read_unlock(); |
945 | 953 | ||
946 | if (need_rebuild_sched_domains) | 954 | if (need_rebuild_sched_domains) |
947 | rebuild_sched_domains_locked(); | 955 | rebuild_sched_domains_cpuslocked(); |
948 | } | 956 | } |
949 | 957 | ||
950 | /** | 958 | /** |
@@ -1276,7 +1284,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1276 | cs->relax_domain_level = val; | 1284 | cs->relax_domain_level = val; |
1277 | if (!cpumask_empty(cs->cpus_allowed) && | 1285 | if (!cpumask_empty(cs->cpus_allowed) && |
1278 | is_sched_load_balance(cs)) | 1286 | is_sched_load_balance(cs)) |
1279 | rebuild_sched_domains_locked(); | 1287 | rebuild_sched_domains_cpuslocked(); |
1280 | } | 1288 | } |
1281 | 1289 | ||
1282 | return 0; | 1290 | return 0; |
@@ -1309,7 +1317,6 @@ static void update_tasks_flags(struct cpuset *cs) | |||
1309 | * | 1317 | * |
1310 | * Call with cpuset_mutex held. | 1318 | * Call with cpuset_mutex held. |
1311 | */ | 1319 | */ |
1312 | |||
1313 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | 1320 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, |
1314 | int turning_on) | 1321 | int turning_on) |
1315 | { | 1322 | { |
@@ -1342,7 +1349,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1342 | spin_unlock_irq(&callback_lock); | 1349 | spin_unlock_irq(&callback_lock); |
1343 | 1350 | ||
1344 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | 1351 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
1345 | rebuild_sched_domains_locked(); | 1352 | rebuild_sched_domains_cpuslocked(); |
1346 | 1353 | ||
1347 | if (spread_flag_changed) | 1354 | if (spread_flag_changed) |
1348 | update_tasks_flags(cs); | 1355 | update_tasks_flags(cs); |
@@ -1610,7 +1617,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, | |||
1610 | cpuset_filetype_t type = cft->private; | 1617 | cpuset_filetype_t type = cft->private; |
1611 | int retval = 0; | 1618 | int retval = 0; |
1612 | 1619 | ||
1613 | mutex_lock(&cpuset_mutex); | 1620 | cpuset_sched_change_begin(); |
1614 | if (!is_cpuset_online(cs)) { | 1621 | if (!is_cpuset_online(cs)) { |
1615 | retval = -ENODEV; | 1622 | retval = -ENODEV; |
1616 | goto out_unlock; | 1623 | goto out_unlock; |
@@ -1646,7 +1653,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, | |||
1646 | break; | 1653 | break; |
1647 | } | 1654 | } |
1648 | out_unlock: | 1655 | out_unlock: |
1649 | mutex_unlock(&cpuset_mutex); | 1656 | cpuset_sched_change_end(); |
1650 | return retval; | 1657 | return retval; |
1651 | } | 1658 | } |
1652 | 1659 | ||
@@ -1657,7 +1664,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, | |||
1657 | cpuset_filetype_t type = cft->private; | 1664 | cpuset_filetype_t type = cft->private; |
1658 | int retval = -ENODEV; | 1665 | int retval = -ENODEV; |
1659 | 1666 | ||
1660 | mutex_lock(&cpuset_mutex); | 1667 | cpuset_sched_change_begin(); |
1661 | if (!is_cpuset_online(cs)) | 1668 | if (!is_cpuset_online(cs)) |
1662 | goto out_unlock; | 1669 | goto out_unlock; |
1663 | 1670 | ||
@@ -1670,7 +1677,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, | |||
1670 | break; | 1677 | break; |
1671 | } | 1678 | } |
1672 | out_unlock: | 1679 | out_unlock: |
1673 | mutex_unlock(&cpuset_mutex); | 1680 | cpuset_sched_change_end(); |
1674 | return retval; | 1681 | return retval; |
1675 | } | 1682 | } |
1676 | 1683 | ||
@@ -1709,7 +1716,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, | |||
1709 | kernfs_break_active_protection(of->kn); | 1716 | kernfs_break_active_protection(of->kn); |
1710 | flush_work(&cpuset_hotplug_work); | 1717 | flush_work(&cpuset_hotplug_work); |
1711 | 1718 | ||
1712 | mutex_lock(&cpuset_mutex); | 1719 | cpuset_sched_change_begin(); |
1713 | if (!is_cpuset_online(cs)) | 1720 | if (!is_cpuset_online(cs)) |
1714 | goto out_unlock; | 1721 | goto out_unlock; |
1715 | 1722 | ||
@@ -1733,7 +1740,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, | |||
1733 | 1740 | ||
1734 | free_trial_cpuset(trialcs); | 1741 | free_trial_cpuset(trialcs); |
1735 | out_unlock: | 1742 | out_unlock: |
1736 | mutex_unlock(&cpuset_mutex); | 1743 | cpuset_sched_change_end(); |
1737 | kernfs_unbreak_active_protection(of->kn); | 1744 | kernfs_unbreak_active_protection(of->kn); |
1738 | css_put(&cs->css); | 1745 | css_put(&cs->css); |
1739 | flush_workqueue(cpuset_migrate_mm_wq); | 1746 | flush_workqueue(cpuset_migrate_mm_wq); |
@@ -2034,14 +2041,14 @@ out_unlock: | |||
2034 | /* | 2041 | /* |
2035 | * If the cpuset being removed has its flag 'sched_load_balance' | 2042 | * If the cpuset being removed has its flag 'sched_load_balance' |
2036 | * enabled, then simulate turning sched_load_balance off, which | 2043 | * enabled, then simulate turning sched_load_balance off, which |
2037 | * will call rebuild_sched_domains_locked(). | 2044 | * will call rebuild_sched_domains_cpuslocked(). |
2038 | */ | 2045 | */ |
2039 | 2046 | ||
2040 | static void cpuset_css_offline(struct cgroup_subsys_state *css) | 2047 | static void cpuset_css_offline(struct cgroup_subsys_state *css) |
2041 | { | 2048 | { |
2042 | struct cpuset *cs = css_cs(css); | 2049 | struct cpuset *cs = css_cs(css); |
2043 | 2050 | ||
2044 | mutex_lock(&cpuset_mutex); | 2051 | cpuset_sched_change_begin(); |
2045 | 2052 | ||
2046 | if (is_sched_load_balance(cs)) | 2053 | if (is_sched_load_balance(cs)) |
2047 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); | 2054 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); |
@@ -2049,7 +2056,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) | |||
2049 | cpuset_dec(); | 2056 | cpuset_dec(); |
2050 | clear_bit(CS_ONLINE, &cs->flags); | 2057 | clear_bit(CS_ONLINE, &cs->flags); |
2051 | 2058 | ||
2052 | mutex_unlock(&cpuset_mutex); | 2059 | cpuset_sched_change_end(); |
2053 | } | 2060 | } |
2054 | 2061 | ||
2055 | static void cpuset_css_free(struct cgroup_subsys_state *css) | 2062 | static void cpuset_css_free(struct cgroup_subsys_state *css) |