diff options
author | Peter Zijlstra <peterz@infradead.org> | 2013-12-19 05:54:45 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-01-13 07:47:25 -0500 |
commit | de212f18e92c952533d57c5510d2790199c75734 (patch) | |
tree | 0eb0755691e582abcc7aa104f68a58527d638c92 | |
parent | 1724813d9f2c7ff702b46d3e4a4f6d9b10a8f8c2 (diff) |
sched/deadline: Fix hotplug admission control
The current hotplug admission control is broken because:
CPU_DYING -> migration_call() -> migrate_tasks() -> __migrate_task()
cannot fail and hard assumes it _will_ move all tasks off of the dying
cpu, failing this will break hotplug.
The much simpler solution is a DOWN_PREPARE handler that fails when
removing one CPU gets us below the total allocated bandwidth.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131220171343.GL2480@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/core.c | 83 |
1 files changed, 32 insertions, 51 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1d33eb8143cc..a549d9a22502 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1887,9 +1887,15 @@ inline struct dl_bw *dl_bw_of(int i) | |||
1887 | return &cpu_rq(i)->rd->dl_bw; | 1887 | return &cpu_rq(i)->rd->dl_bw; |
1888 | } | 1888 | } |
1889 | 1889 | ||
1890 | static inline int __dl_span_weight(struct rq *rq) | 1890 | static inline int dl_bw_cpus(int i) |
1891 | { | 1891 | { |
1892 | return cpumask_weight(rq->rd->span); | 1892 | struct root_domain *rd = cpu_rq(i)->rd; |
1893 | int cpus = 0; | ||
1894 | |||
1895 | for_each_cpu_and(i, rd->span, cpu_active_mask) | ||
1896 | cpus++; | ||
1897 | |||
1898 | return cpus; | ||
1893 | } | 1899 | } |
1894 | #else | 1900 | #else |
1895 | inline struct dl_bw *dl_bw_of(int i) | 1901 | inline struct dl_bw *dl_bw_of(int i) |
@@ -1897,7 +1903,7 @@ inline struct dl_bw *dl_bw_of(int i) | |||
1897 | return &cpu_rq(i)->dl.dl_bw; | 1903 | return &cpu_rq(i)->dl.dl_bw; |
1898 | } | 1904 | } |
1899 | 1905 | ||
1900 | static inline int __dl_span_weight(struct rq *rq) | 1906 | static inline int dl_bw_cpus(int i) |
1901 | { | 1907 | { |
1902 | return 1; | 1908 | return 1; |
1903 | } | 1909 | } |
@@ -1938,8 +1944,7 @@ static int dl_overflow(struct task_struct *p, int policy, | |||
1938 | u64 period = attr->sched_period; | 1944 | u64 period = attr->sched_period; |
1939 | u64 runtime = attr->sched_runtime; | 1945 | u64 runtime = attr->sched_runtime; |
1940 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; | 1946 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; |
1941 | int cpus = __dl_span_weight(task_rq(p)); | 1947 | int cpus, err = -1; |
1942 | int err = -1; | ||
1943 | 1948 | ||
1944 | if (new_bw == p->dl.dl_bw) | 1949 | if (new_bw == p->dl.dl_bw) |
1945 | return 0; | 1950 | return 0; |
@@ -1950,6 +1955,7 @@ static int dl_overflow(struct task_struct *p, int policy, | |||
1950 | * allocated bandwidth of the container. | 1955 | * allocated bandwidth of the container. |
1951 | */ | 1956 | */ |
1952 | raw_spin_lock(&dl_b->lock); | 1957 | raw_spin_lock(&dl_b->lock); |
1958 | cpus = dl_bw_cpus(task_cpu(p)); | ||
1953 | if (dl_policy(policy) && !task_has_dl_policy(p) && | 1959 | if (dl_policy(policy) && !task_has_dl_policy(p) && |
1954 | !__dl_overflow(dl_b, cpus, 0, new_bw)) { | 1960 | !__dl_overflow(dl_b, cpus, 0, new_bw)) { |
1955 | __dl_add(dl_b, new_bw); | 1961 | __dl_add(dl_b, new_bw); |
@@ -4522,42 +4528,6 @@ out: | |||
4522 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | 4528 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); |
4523 | 4529 | ||
4524 | /* | 4530 | /* |
4525 | * When dealing with a -deadline task, we have to check if moving it to | ||
4526 | * a new CPU is possible or not. In fact, this is only true iff there | ||
4527 | * is enough bandwidth available on such CPU, otherwise we want the | ||
4528 | * whole migration procedure to fail over. | ||
4529 | */ | ||
4530 | static inline | ||
4531 | bool set_task_cpu_dl(struct task_struct *p, unsigned int cpu) | ||
4532 | { | ||
4533 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | ||
4534 | struct dl_bw *cpu_b = dl_bw_of(cpu); | ||
4535 | int ret = 1; | ||
4536 | u64 bw; | ||
4537 | |||
4538 | if (dl_b == cpu_b) | ||
4539 | return 1; | ||
4540 | |||
4541 | raw_spin_lock(&dl_b->lock); | ||
4542 | raw_spin_lock(&cpu_b->lock); | ||
4543 | |||
4544 | bw = cpu_b->bw * cpumask_weight(cpu_rq(cpu)->rd->span); | ||
4545 | if (dl_bandwidth_enabled() && | ||
4546 | bw < cpu_b->total_bw + p->dl.dl_bw) { | ||
4547 | ret = 0; | ||
4548 | goto unlock; | ||
4549 | } | ||
4550 | dl_b->total_bw -= p->dl.dl_bw; | ||
4551 | cpu_b->total_bw += p->dl.dl_bw; | ||
4552 | |||
4553 | unlock: | ||
4554 | raw_spin_unlock(&cpu_b->lock); | ||
4555 | raw_spin_unlock(&dl_b->lock); | ||
4556 | |||
4557 | return ret; | ||
4558 | } | ||
4559 | |||
4560 | /* | ||
4561 | * Move (not current) task off this cpu, onto dest cpu. We're doing | 4531 | * Move (not current) task off this cpu, onto dest cpu. We're doing |
4562 | * this because either it can't run here any more (set_cpus_allowed() | 4532 | * this because either it can't run here any more (set_cpus_allowed() |
4563 | * away from this CPU, or CPU going down), or because we're | 4533 | * away from this CPU, or CPU going down), or because we're |
@@ -4589,13 +4559,6 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
4589 | goto fail; | 4559 | goto fail; |
4590 | 4560 | ||
4591 | /* | 4561 | /* |
4592 | * If p is -deadline, proceed only if there is enough | ||
4593 | * bandwidth available on dest_cpu | ||
4594 | */ | ||
4595 | if (unlikely(dl_task(p)) && !set_task_cpu_dl(p, dest_cpu)) | ||
4596 | goto fail; | ||
4597 | |||
4598 | /* | ||
4599 | * If we're not on a rq, the next wake-up will ensure we're | 4562 | * If we're not on a rq, the next wake-up will ensure we're |
4600 | * placed properly. | 4563 | * placed properly. |
4601 | */ | 4564 | */ |
@@ -5052,13 +5015,31 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
5052 | static int sched_cpu_inactive(struct notifier_block *nfb, | 5015 | static int sched_cpu_inactive(struct notifier_block *nfb, |
5053 | unsigned long action, void *hcpu) | 5016 | unsigned long action, void *hcpu) |
5054 | { | 5017 | { |
5018 | unsigned long flags; | ||
5019 | long cpu = (long)hcpu; | ||
5020 | |||
5055 | switch (action & ~CPU_TASKS_FROZEN) { | 5021 | switch (action & ~CPU_TASKS_FROZEN) { |
5056 | case CPU_DOWN_PREPARE: | 5022 | case CPU_DOWN_PREPARE: |
5057 | set_cpu_active((long)hcpu, false); | 5023 | set_cpu_active(cpu, false); |
5024 | |||
5025 | /* explicitly allow suspend */ | ||
5026 | if (!(action & CPU_TASKS_FROZEN)) { | ||
5027 | struct dl_bw *dl_b = dl_bw_of(cpu); | ||
5028 | bool overflow; | ||
5029 | int cpus; | ||
5030 | |||
5031 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
5032 | cpus = dl_bw_cpus(cpu); | ||
5033 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | ||
5034 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
5035 | |||
5036 | if (overflow) | ||
5037 | return notifier_from_errno(-EBUSY); | ||
5038 | } | ||
5058 | return NOTIFY_OK; | 5039 | return NOTIFY_OK; |
5059 | default: | ||
5060 | return NOTIFY_DONE; | ||
5061 | } | 5040 | } |
5041 | |||
5042 | return NOTIFY_DONE; | ||
5062 | } | 5043 | } |
5063 | 5044 | ||
5064 | static int __init migration_init(void) | 5045 | static int __init migration_init(void) |