summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup
diff options
context:
space:
mode:
authorPrateek Sood <prsood@codeaurora.org>2017-11-15 09:20:14 -0500
committerTejun Heo <tj@kernel.org>2017-11-27 11:48:10 -0500
commitaa24163b2ee5c92120e32e99b5a93143a0f4258e (patch)
treecb52cad2feec49453ae62e1f5a9c72ee7c7a85cb /kernel/cgroup
parent4fbd8d194f06c8a3fd2af1ce560ddb31f7ec8323 (diff)
cgroup/cpuset: remove circular dependency deadlock
Remove circular dependency deadlock in a scenario where hotplug of CPU is being done while there is updation in cgroup and cpuset triggered from userspace. Process A => kthreadd => Process B => Process C => Process A Process A cpu_subsys_offline(); cpu_down(); _cpu_down(); percpu_down_write(&cpu_hotplug_lock); //held cpuhp_invoke_callback(); workqueue_offline_cpu(); queue_work_on(); // unbind_work on system_highpri_wq __queue_work(); insert_work(); wake_up_worker(); flush_work(); wait_for_completion(); worker_thread(); manage_workers(); create_worker(); kthread_create_on_node(); wake_up_process(kthreadd_task); kthreadd kthreadd(); kernel_thread(); do_fork(); copy_process(); percpu_down_read(&cgroup_threadgroup_rwsem); __rwsem_down_read_failed_common(); //waiting Process B kernfs_fop_write(); cgroup_file_write(); cgroup_procs_write(); percpu_down_write(&cgroup_threadgroup_rwsem); //held cgroup_attach_task(); cgroup_migrate(); cgroup_migrate_execute(); cpuset_can_attach(); mutex_lock(&cpuset_mutex); //waiting Process C kernfs_fop_write(); cgroup_file_write(); cpuset_write_resmask(); mutex_lock(&cpuset_mutex); //held update_cpumask(); update_cpumasks_hier(); rebuild_sched_domains_locked(); get_online_cpus(); percpu_down_read(&cpu_hotplug_lock); //waiting Eliminating deadlock by reversing the locking order for cpuset_mutex and cpu_hotplug_lock. Signed-off-by: Prateek Sood <prsood@codeaurora.org> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/cpuset.c53
1 files changed, 30 insertions, 23 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f7efa7b4d825..cab5fd1ee767 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -812,6 +812,18 @@ done:
812 return ndoms; 812 return ndoms;
813} 813}
814 814
815static void cpuset_sched_change_begin(void)
816{
817 cpus_read_lock();
818 mutex_lock(&cpuset_mutex);
819}
820
821static void cpuset_sched_change_end(void)
822{
823 mutex_unlock(&cpuset_mutex);
824 cpus_read_unlock();
825}
826
815/* 827/*
816 * Rebuild scheduler domains. 828 * Rebuild scheduler domains.
817 * 829 *
@@ -821,16 +833,14 @@ done:
821 * 'cpus' is removed, then call this routine to rebuild the 833 * 'cpus' is removed, then call this routine to rebuild the
822 * scheduler's dynamic sched domains. 834 * scheduler's dynamic sched domains.
823 * 835 *
824 * Call with cpuset_mutex held. Takes get_online_cpus().
825 */ 836 */
826static void rebuild_sched_domains_locked(void) 837static void rebuild_sched_domains_cpuslocked(void)
827{ 838{
828 struct sched_domain_attr *attr; 839 struct sched_domain_attr *attr;
829 cpumask_var_t *doms; 840 cpumask_var_t *doms;
830 int ndoms; 841 int ndoms;
831 842
832 lockdep_assert_held(&cpuset_mutex); 843 lockdep_assert_held(&cpuset_mutex);
833 get_online_cpus();
834 844
835 /* 845 /*
836 * We have raced with CPU hotplug. Don't do anything to avoid 846 * We have raced with CPU hotplug. Don't do anything to avoid
@@ -838,27 +848,25 @@ static void rebuild_sched_domains_locked(void)
838 * Anyways, hotplug work item will rebuild sched domains. 848 * Anyways, hotplug work item will rebuild sched domains.
839 */ 849 */
840 if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) 850 if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
841 goto out; 851 return;
842 852
843 /* Generate domain masks and attrs */ 853 /* Generate domain masks and attrs */
844 ndoms = generate_sched_domains(&doms, &attr); 854 ndoms = generate_sched_domains(&doms, &attr);
845 855
846 /* Have scheduler rebuild the domains */ 856 /* Have scheduler rebuild the domains */
847 partition_sched_domains(ndoms, doms, attr); 857 partition_sched_domains(ndoms, doms, attr);
848out:
849 put_online_cpus();
850} 858}
851#else /* !CONFIG_SMP */ 859#else /* !CONFIG_SMP */
852static void rebuild_sched_domains_locked(void) 860static void rebuild_sched_domains_cpuslocked(void)
853{ 861{
854} 862}
855#endif /* CONFIG_SMP */ 863#endif /* CONFIG_SMP */
856 864
857void rebuild_sched_domains(void) 865void rebuild_sched_domains(void)
858{ 866{
859 mutex_lock(&cpuset_mutex); 867 cpuset_sched_change_begin();
860 rebuild_sched_domains_locked(); 868 rebuild_sched_domains_cpuslocked();
861 mutex_unlock(&cpuset_mutex); 869 cpuset_sched_change_end();
862} 870}
863 871
864/** 872/**
@@ -944,7 +952,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
944 rcu_read_unlock(); 952 rcu_read_unlock();
945 953
946 if (need_rebuild_sched_domains) 954 if (need_rebuild_sched_domains)
947 rebuild_sched_domains_locked(); 955 rebuild_sched_domains_cpuslocked();
948} 956}
949 957
950/** 958/**
@@ -1276,7 +1284,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
1276 cs->relax_domain_level = val; 1284 cs->relax_domain_level = val;
1277 if (!cpumask_empty(cs->cpus_allowed) && 1285 if (!cpumask_empty(cs->cpus_allowed) &&
1278 is_sched_load_balance(cs)) 1286 is_sched_load_balance(cs))
1279 rebuild_sched_domains_locked(); 1287 rebuild_sched_domains_cpuslocked();
1280 } 1288 }
1281 1289
1282 return 0; 1290 return 0;
@@ -1309,7 +1317,6 @@ static void update_tasks_flags(struct cpuset *cs)
1309 * 1317 *
1310 * Call with cpuset_mutex held. 1318 * Call with cpuset_mutex held.
1311 */ 1319 */
1312
1313static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, 1320static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1314 int turning_on) 1321 int turning_on)
1315{ 1322{
@@ -1342,7 +1349,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1342 spin_unlock_irq(&callback_lock); 1349 spin_unlock_irq(&callback_lock);
1343 1350
1344 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) 1351 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1345 rebuild_sched_domains_locked(); 1352 rebuild_sched_domains_cpuslocked();
1346 1353
1347 if (spread_flag_changed) 1354 if (spread_flag_changed)
1348 update_tasks_flags(cs); 1355 update_tasks_flags(cs);
@@ -1610,7 +1617,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
1610 cpuset_filetype_t type = cft->private; 1617 cpuset_filetype_t type = cft->private;
1611 int retval = 0; 1618 int retval = 0;
1612 1619
1613 mutex_lock(&cpuset_mutex); 1620 cpuset_sched_change_begin();
1614 if (!is_cpuset_online(cs)) { 1621 if (!is_cpuset_online(cs)) {
1615 retval = -ENODEV; 1622 retval = -ENODEV;
1616 goto out_unlock; 1623 goto out_unlock;
@@ -1646,7 +1653,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
1646 break; 1653 break;
1647 } 1654 }
1648out_unlock: 1655out_unlock:
1649 mutex_unlock(&cpuset_mutex); 1656 cpuset_sched_change_end();
1650 return retval; 1657 return retval;
1651} 1658}
1652 1659
@@ -1657,7 +1664,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
1657 cpuset_filetype_t type = cft->private; 1664 cpuset_filetype_t type = cft->private;
1658 int retval = -ENODEV; 1665 int retval = -ENODEV;
1659 1666
1660 mutex_lock(&cpuset_mutex); 1667 cpuset_sched_change_begin();
1661 if (!is_cpuset_online(cs)) 1668 if (!is_cpuset_online(cs))
1662 goto out_unlock; 1669 goto out_unlock;
1663 1670
@@ -1670,7 +1677,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
1670 break; 1677 break;
1671 } 1678 }
1672out_unlock: 1679out_unlock:
1673 mutex_unlock(&cpuset_mutex); 1680 cpuset_sched_change_end();
1674 return retval; 1681 return retval;
1675} 1682}
1676 1683
@@ -1709,7 +1716,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1709 kernfs_break_active_protection(of->kn); 1716 kernfs_break_active_protection(of->kn);
1710 flush_work(&cpuset_hotplug_work); 1717 flush_work(&cpuset_hotplug_work);
1711 1718
1712 mutex_lock(&cpuset_mutex); 1719 cpuset_sched_change_begin();
1713 if (!is_cpuset_online(cs)) 1720 if (!is_cpuset_online(cs))
1714 goto out_unlock; 1721 goto out_unlock;
1715 1722
@@ -1733,7 +1740,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
1733 1740
1734 free_trial_cpuset(trialcs); 1741 free_trial_cpuset(trialcs);
1735out_unlock: 1742out_unlock:
1736 mutex_unlock(&cpuset_mutex); 1743 cpuset_sched_change_end();
1737 kernfs_unbreak_active_protection(of->kn); 1744 kernfs_unbreak_active_protection(of->kn);
1738 css_put(&cs->css); 1745 css_put(&cs->css);
1739 flush_workqueue(cpuset_migrate_mm_wq); 1746 flush_workqueue(cpuset_migrate_mm_wq);
@@ -2034,14 +2041,14 @@ out_unlock:
2034/* 2041/*
2035 * If the cpuset being removed has its flag 'sched_load_balance' 2042 * If the cpuset being removed has its flag 'sched_load_balance'
2036 * enabled, then simulate turning sched_load_balance off, which 2043 * enabled, then simulate turning sched_load_balance off, which
2037 * will call rebuild_sched_domains_locked(). 2044 * will call rebuild_sched_domains_cpuslocked().
2038 */ 2045 */
2039 2046
2040static void cpuset_css_offline(struct cgroup_subsys_state *css) 2047static void cpuset_css_offline(struct cgroup_subsys_state *css)
2041{ 2048{
2042 struct cpuset *cs = css_cs(css); 2049 struct cpuset *cs = css_cs(css);
2043 2050
2044 mutex_lock(&cpuset_mutex); 2051 cpuset_sched_change_begin();
2045 2052
2046 if (is_sched_load_balance(cs)) 2053 if (is_sched_load_balance(cs))
2047 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); 2054 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
@@ -2049,7 +2056,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
2049 cpuset_dec(); 2056 cpuset_dec();
2050 clear_bit(CS_ONLINE, &cs->flags); 2057 clear_bit(CS_ONLINE, &cs->flags);
2051 2058
2052 mutex_unlock(&cpuset_mutex); 2059 cpuset_sched_change_end();
2053} 2060}
2054 2061
2055static void cpuset_css_free(struct cgroup_subsys_state *css) 2062static void cpuset_css_free(struct cgroup_subsys_state *css)