summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMathieu Poirier <mathieu.poirier@linaro.org>2019-07-19 09:59:55 -0400
committerIngo Molnar <mingo@kernel.org>2019-07-25 09:55:01 -0400
commitf9a25f776d780bfa3279f0b6e5f5cf3224997976 (patch)
tree8b1fa082ea0a57b11fbee5fc91e208442fcafd0f
parent4b211f2b129dd1f6a6956bbc76e2f232c1ec3ad8 (diff)
cpusets: Rebuild root domain deadline accounting information
When the topology of root domains is modified by CPUset or CPUhotplug operations information about the current deadline bandwidth held in the root domain is lost. This patch addresses the issue by recalculating the lost deadline bandwidth information by circling through the deadline tasks held in CPUsets and adding their current load to the root domain they are associated with. Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org> Signed-off-by: Juri Lelli <juri.lelli@redhat.com> [ Various additional modifications. ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: bristot@redhat.com Cc: claudio@evidence.eu.com Cc: lizefan@huawei.com Cc: longman@redhat.com Cc: luca.abeni@santannapisa.it Cc: rostedt@goodmis.org Cc: tj@kernel.org Cc: tommaso.cucinotta@santannapisa.it Link: https://lkml.kernel.org/r/20190719140000.31694-4-juri.lelli@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/sched/deadline.h8
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/cgroup/cpuset.c64
-rw-r--r--kernel/sched/deadline.c30
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/sched/topology.c13
8 files changed, 120 insertions, 6 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f6b048902d6c..3ba3e6da13a6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -150,6 +150,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
150struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, 150struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
151 struct cgroup_subsys_state **dst_cssp); 151 struct cgroup_subsys_state **dst_cssp);
152 152
153void cgroup_enable_task_cg_lists(void);
153void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, 154void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
154 struct css_task_iter *it); 155 struct css_task_iter *it);
155struct task_struct *css_task_iter_next(struct css_task_iter *it); 156struct task_struct *css_task_iter_next(struct css_task_iter *it);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..b94ad92dfbe6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -295,6 +295,11 @@ enum uclamp_id {
295 UCLAMP_CNT 295 UCLAMP_CNT
296}; 296};
297 297
298#ifdef CONFIG_SMP
299extern struct root_domain def_root_domain;
300extern struct mutex sched_domains_mutex;
301#endif
302
298struct sched_info { 303struct sched_info {
299#ifdef CONFIG_SCHED_INFO 304#ifdef CONFIG_SCHED_INFO
300 /* Cumulative counters: */ 305 /* Cumulative counters: */
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 0cb034331cbb..1aff00b65f3c 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -24,3 +24,11 @@ static inline bool dl_time_before(u64 a, u64 b)
24{ 24{
25 return (s64)(a - b) < 0; 25 return (s64)(a - b) < 0;
26} 26}
27
28#ifdef CONFIG_SMP
29
30struct root_domain;
31extern void dl_add_task_root_domain(struct task_struct *p);
32extern void dl_clear_root_domain(struct root_domain *rd);
33
34#endif /* CONFIG_SMP */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 753afbca549f..4b5bc452176c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1891,7 +1891,7 @@ static int cgroup_reconfigure(struct fs_context *fc)
1891 */ 1891 */
1892static bool use_task_css_set_links __read_mostly; 1892static bool use_task_css_set_links __read_mostly;
1893 1893
1894static void cgroup_enable_task_cg_lists(void) 1894void cgroup_enable_task_cg_lists(void)
1895{ 1895{
1896 struct task_struct *p, *g; 1896 struct task_struct *p, *g;
1897 1897
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 5aa37531ce76..846cbdb68566 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -45,6 +45,7 @@
45#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
46#include <linux/rcupdate.h> 46#include <linux/rcupdate.h>
47#include <linux/sched.h> 47#include <linux/sched.h>
48#include <linux/sched/deadline.h>
48#include <linux/sched/mm.h> 49#include <linux/sched/mm.h>
49#include <linux/sched/task.h> 50#include <linux/sched/task.h>
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
@@ -894,6 +895,67 @@ done:
894 return ndoms; 895 return ndoms;
895} 896}
896 897
898static void update_tasks_root_domain(struct cpuset *cs)
899{
900 struct css_task_iter it;
901 struct task_struct *task;
902
903 css_task_iter_start(&cs->css, 0, &it);
904
905 while ((task = css_task_iter_next(&it)))
906 dl_add_task_root_domain(task);
907
908 css_task_iter_end(&it);
909}
910
911static void rebuild_root_domains(void)
912{
913 struct cpuset *cs = NULL;
914 struct cgroup_subsys_state *pos_css;
915
916 lockdep_assert_held(&cpuset_mutex);
917 lockdep_assert_cpus_held();
918 lockdep_assert_held(&sched_domains_mutex);
919
920 cgroup_enable_task_cg_lists();
921
922 rcu_read_lock();
923
924 /*
925 * Clear default root domain DL accounting, it will be computed again
926 * if a task belongs to it.
927 */
928 dl_clear_root_domain(&def_root_domain);
929
930 cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
931
932 if (cpumask_empty(cs->effective_cpus)) {
933 pos_css = css_rightmost_descendant(pos_css);
934 continue;
935 }
936
937 css_get(&cs->css);
938
939 rcu_read_unlock();
940
941 update_tasks_root_domain(cs);
942
943 rcu_read_lock();
944 css_put(&cs->css);
945 }
946 rcu_read_unlock();
947}
948
949static void
950partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
951 struct sched_domain_attr *dattr_new)
952{
953 mutex_lock(&sched_domains_mutex);
954 partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
955 rebuild_root_domains();
956 mutex_unlock(&sched_domains_mutex);
957}
958
897/* 959/*
898 * Rebuild scheduler domains. 960 * Rebuild scheduler domains.
899 * 961 *
@@ -931,7 +993,7 @@ static void rebuild_sched_domains_locked(void)
931 ndoms = generate_sched_domains(&doms, &attr); 993 ndoms = generate_sched_domains(&doms, &attr);
932 994
933 /* Have scheduler rebuild the domains */ 995 /* Have scheduler rebuild the domains */
934 partition_sched_domains(ndoms, doms, attr); 996 partition_and_rebuild_sched_domains(ndoms, doms, attr);
935out: 997out:
936 put_online_cpus(); 998 put_online_cpus();
937} 999}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ef5b9f6b1d42..0f9d2180be23 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2283,6 +2283,36 @@ void __init init_sched_dl_class(void)
2283 GFP_KERNEL, cpu_to_node(i)); 2283 GFP_KERNEL, cpu_to_node(i));
2284} 2284}
2285 2285
2286void dl_add_task_root_domain(struct task_struct *p)
2287{
2288 struct rq_flags rf;
2289 struct rq *rq;
2290 struct dl_bw *dl_b;
2291
2292 rq = task_rq_lock(p, &rf);
2293 if (!dl_task(p))
2294 goto unlock;
2295
2296 dl_b = &rq->rd->dl_bw;
2297 raw_spin_lock(&dl_b->lock);
2298
2299 __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
2300
2301 raw_spin_unlock(&dl_b->lock);
2302
2303unlock:
2304 task_rq_unlock(rq, p, &rf);
2305}
2306
2307void dl_clear_root_domain(struct root_domain *rd)
2308{
2309 unsigned long flags;
2310
2311 raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
2312 rd->dl_bw.total_bw = 0;
2313 raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
2314}
2315
2286#endif /* CONFIG_SMP */ 2316#endif /* CONFIG_SMP */
2287 2317
2288static void switched_from_dl(struct rq *rq, struct task_struct *p) 2318static void switched_from_dl(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 16126efd14ed..7583faddba33 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -778,9 +778,6 @@ struct root_domain {
778 struct perf_domain __rcu *pd; 778 struct perf_domain __rcu *pd;
779}; 779};
780 780
781extern struct root_domain def_root_domain;
782extern struct mutex sched_domains_mutex;
783
784extern void init_defrootdomain(void); 781extern void init_defrootdomain(void);
785extern int sched_init_domains(const struct cpumask *cpu_map); 782extern int sched_init_domains(const struct cpumask *cpu_map);
786extern void rq_attach_root(struct rq *rq, struct root_domain *rd); 783extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 5a174ae6ecf3..8f83e8e3ea9a 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2203,8 +2203,19 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
2203 for (i = 0; i < ndoms_cur; i++) { 2203 for (i = 0; i < ndoms_cur; i++) {
2204 for (j = 0; j < n && !new_topology; j++) { 2204 for (j = 0; j < n && !new_topology; j++) {
2205 if (cpumask_equal(doms_cur[i], doms_new[j]) && 2205 if (cpumask_equal(doms_cur[i], doms_new[j]) &&
2206 dattrs_equal(dattr_cur, i, dattr_new, j)) 2206 dattrs_equal(dattr_cur, i, dattr_new, j)) {
2207 struct root_domain *rd;
2208
2209 /*
2210 * This domain won't be destroyed and as such
2211 * its dl_bw->total_bw needs to be cleared. It
2212 * will be recomputed in function
2213 * update_tasks_root_domain().
2214 */
2215 rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;
2216 dl_clear_root_domain(rd);
2207 goto match1; 2217 goto match1;
2218 }
2208 } 2219 }
2209 /* No match - a current sched domain not in new doms_new[] */ 2220 /* No match - a current sched domain not in new doms_new[] */
2210 detach_destroy_domains(doms_cur[i]); 2221 detach_destroy_domains(doms_cur[i]);