aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuri Lelli <juri.lelli@arm.com>2014-09-19 05:22:40 -0400
committerIngo Molnar <mingo@kernel.org>2014-10-28 05:47:58 -0400
commit7f51412a415d87ea8598d14722fb31e4f5701257 (patch)
tree1b3f90cb539185177143a1bf37e5f4f8d86b64bb
parentd9aade7ae1d283097a3f626790e7c325a5c69007 (diff)
sched/deadline: Fix bandwidth check/update when migrating tasks between exclusive cpusets
Exclusive cpusets are the only way users can restrict SCHED_DEADLINE tasks affinity (performing what is commonly called clustered scheduling). Unfortunately, such thing is currently broken for two reasons: - No check is performed when the user tries to attach a task to an exlusive cpuset (recall that exclusive cpusets have an associated maximum allowed bandwidth). - Bandwidths of source and destination cpusets are not correctly updated after a task is migrated between them. This patch fixes both things at once, as they are opposite faces of the same coin. The check is performed in cpuset_can_attach(), as there aren't any points of failure after that function. The updated is split in two halves. We first reserve bandwidth in the destination cpuset, after we pass the check in cpuset_can_attach(). And we then release bandwidth from the source cpuset when the task's affinity is actually changed. Even if there can be time windows when sched_setattr() may erroneously fail in the source cpuset, we are fine with it, as we can't perfom an atomic update of both cpusets at once. Reported-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Reported-by: Vincent Legout <vincent@legout.info> Signed-off-by: Juri Lelli <juri.lelli@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Dario Faggioli <raistlin@linux.it> Cc: Michael Trimarchi <michael@amarulasolutions.com> Cc: Fabio Checconi <fchecconi@gmail.com> Cc: michael@amarulasolutions.com Cc: luca.abeni@unitn.it Cc: Li Zefan <lizefan@huawei.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: cgroups@vger.kernel.org Link: http://lkml.kernel.org/r/1411118561-26323-3-git-send-email-juri.lelli@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/sched/core.c70
-rw-r--r--kernel/sched/deadline.c25
-rw-r--r--kernel/sched/sched.h19
5 files changed, 97 insertions, 32 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e344bbe63ec..1d1fa081d44f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2052,6 +2052,8 @@ static inline void tsk_restore_flags(struct task_struct *task,
2052 task->flags |= orig_flags & flags; 2052 task->flags |= orig_flags & flags;
2053} 2053}
2054 2054
2055extern int task_can_attach(struct task_struct *p,
2056 const struct cpumask *cs_cpus_allowed);
2055#ifdef CONFIG_SMP 2057#ifdef CONFIG_SMP
2056extern void do_set_cpus_allowed(struct task_struct *p, 2058extern void do_set_cpus_allowed(struct task_struct *p,
2057 const struct cpumask *new_mask); 2059 const struct cpumask *new_mask);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1f107c74087b..7af8577fc8f8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1429,17 +1429,8 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
1429 goto out_unlock; 1429 goto out_unlock;
1430 1430
1431 cgroup_taskset_for_each(task, tset) { 1431 cgroup_taskset_for_each(task, tset) {
1432 /* 1432 ret = task_can_attach(task, cs->cpus_allowed);
1433 * Kthreads which disallow setaffinity shouldn't be moved 1433 if (ret)
1434 * to a new cpuset; we don't want to change their cpu
1435 * affinity and isolating such threads by their set of
1436 * allowed nodes is unnecessary. Thus, cpusets are not
1437 * applicable for such threads. This prevents checking for
1438 * success of set_cpus_allowed_ptr() on all attached tasks
1439 * before cpus_allowed may be changed.
1440 */
1441 ret = -EINVAL;
1442 if (task->flags & PF_NO_SETAFFINITY)
1443 goto out_unlock; 1434 goto out_unlock;
1444 ret = security_task_setscheduler(task); 1435 ret = security_task_setscheduler(task);
1445 if (ret) 1436 if (ret)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c067fd66db9..9993feeb8b10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2034,25 +2034,6 @@ static inline int dl_bw_cpus(int i)
2034} 2034}
2035#endif 2035#endif
2036 2036
2037static inline
2038void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
2039{
2040 dl_b->total_bw -= tsk_bw;
2041}
2042
2043static inline
2044void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
2045{
2046 dl_b->total_bw += tsk_bw;
2047}
2048
2049static inline
2050bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
2051{
2052 return dl_b->bw != -1 &&
2053 dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
2054}
2055
2056/* 2037/*
2057 * We must be sure that accepting a new task (or allowing changing the 2038 * We must be sure that accepting a new task (or allowing changing the
2058 * parameters of an existing one) is consistent with the bandwidth 2039 * parameters of an existing one) is consistent with the bandwidth
@@ -4669,6 +4650,57 @@ void init_idle(struct task_struct *idle, int cpu)
4669#endif 4650#endif
4670} 4651}
4671 4652
4653int task_can_attach(struct task_struct *p,
4654 const struct cpumask *cs_cpus_allowed)
4655{
4656 int ret = 0;
4657
4658 /*
4659 * Kthreads which disallow setaffinity shouldn't be moved
4660 * to a new cpuset; we don't want to change their cpu
4661 * affinity and isolating such threads by their set of
4662 * allowed nodes is unnecessary. Thus, cpusets are not
4663 * applicable for such threads. This prevents checking for
4664 * success of set_cpus_allowed_ptr() on all attached tasks
4665 * before cpus_allowed may be changed.
4666 */
4667 if (p->flags & PF_NO_SETAFFINITY) {
4668 ret = -EINVAL;
4669 goto out;
4670 }
4671
4672#ifdef CONFIG_SMP
4673 if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
4674 cs_cpus_allowed)) {
4675 unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
4676 cs_cpus_allowed);
4677 struct dl_bw *dl_b = dl_bw_of(dest_cpu);
4678 bool overflow;
4679 int cpus;
4680 unsigned long flags;
4681
4682 raw_spin_lock_irqsave(&dl_b->lock, flags);
4683 cpus = dl_bw_cpus(dest_cpu);
4684 overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
4685 if (overflow)
4686 ret = -EBUSY;
4687 else {
4688 /*
4689 * We reserve space for this task in the destination
4690 * root_domain, as we can't fail after this point.
4691 * We will free resources in the source root_domain
4692 * later on (see set_cpus_allowed_dl()).
4693 */
4694 __dl_add(dl_b, p->dl.dl_bw);
4695 }
4696 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
4697
4698 }
4699#endif
4700out:
4701 return ret;
4702}
4703
4672#ifdef CONFIG_SMP 4704#ifdef CONFIG_SMP
4673/* 4705/*
4674 * move_queued_task - move a queued task to new rq. 4706 * move_queued_task - move a queued task to new rq.
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9d1e76a21297..8aaa971ffecd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1517,10 +1517,33 @@ static void set_cpus_allowed_dl(struct task_struct *p,
1517 const struct cpumask *new_mask) 1517 const struct cpumask *new_mask)
1518{ 1518{
1519 struct rq *rq; 1519 struct rq *rq;
1520 struct root_domain *src_rd;
1520 int weight; 1521 int weight;
1521 1522
1522 BUG_ON(!dl_task(p)); 1523 BUG_ON(!dl_task(p));
1523 1524
1525 rq = task_rq(p);
1526 src_rd = rq->rd;
1527 /*
1528 * Migrating a SCHED_DEADLINE task between exclusive
1529 * cpusets (different root_domains) entails a bandwidth
1530 * update. We already made space for us in the destination
1531 * domain (see cpuset_can_attach()).
1532 */
1533 if (!cpumask_intersects(src_rd->span, new_mask)) {
1534 struct dl_bw *src_dl_b;
1535
1536 src_dl_b = dl_bw_of(cpu_of(rq));
1537 /*
1538 * We now free resources of the root_domain we are migrating
1539 * off. In the worst case, sched_setattr() may temporary fail
1540 * until we complete the update.
1541 */
1542 raw_spin_lock(&src_dl_b->lock);
1543 __dl_clear(src_dl_b, p->dl.dl_bw);
1544 raw_spin_unlock(&src_dl_b->lock);
1545 }
1546
1524 /* 1547 /*
1525 * Update only if the task is actually running (i.e., 1548 * Update only if the task is actually running (i.e.,
1526 * it is on the rq AND it is not throttled). 1549 * it is on the rq AND it is not throttled).
@@ -1537,8 +1560,6 @@ static void set_cpus_allowed_dl(struct task_struct *p,
1537 if ((p->nr_cpus_allowed > 1) == (weight > 1)) 1560 if ((p->nr_cpus_allowed > 1) == (weight > 1))
1538 return; 1561 return;
1539 1562
1540 rq = task_rq(p);
1541
1542 /* 1563 /*
1543 * The process used to be able to migrate OR it can now migrate 1564 * The process used to be able to migrate OR it can now migrate
1544 */ 1565 */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 57aacea1cbdf..ec3917c5f898 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -176,6 +176,25 @@ struct dl_bw {
176 u64 bw, total_bw; 176 u64 bw, total_bw;
177}; 177};
178 178
179static inline
180void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
181{
182 dl_b->total_bw -= tsk_bw;
183}
184
185static inline
186void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
187{
188 dl_b->total_bw += tsk_bw;
189}
190
191static inline
192bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
193{
194 return dl_b->bw != -1 &&
195 dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
196}
197
179extern struct mutex sched_domains_mutex; 198extern struct mutex sched_domains_mutex;
180 199
181#ifdef CONFIG_CGROUP_SCHED 200#ifdef CONFIG_CGROUP_SCHED