diff options
| author | Peter Zijlstra <peterz@infradead.org> | 2013-12-19 05:54:45 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2014-01-13 07:47:25 -0500 |
| commit | de212f18e92c952533d57c5510d2790199c75734 (patch) | |
| tree | 0eb0755691e582abcc7aa104f68a58527d638c92 /kernel/sched | |
| parent | 1724813d9f2c7ff702b46d3e4a4f6d9b10a8f8c2 (diff) | |
sched/deadline: Fix hotplug admission control
The current hotplug admission control is broken because:
CPU_DYING -> migration_call() -> migrate_tasks() -> __migrate_task()
cannot fail and hard assumes it _will_ move all tasks off of the dying
cpu, failing this will break hotplug.
The much simpler solution is a DOWN_PREPARE handler that fails when
removing one CPU gets us below the total allocated bandwidth.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20131220171343.GL2480@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/core.c | 83 |
1 files changed, 32 insertions, 51 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1d33eb8143cc..a549d9a22502 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -1887,9 +1887,15 @@ inline struct dl_bw *dl_bw_of(int i) | |||
| 1887 | return &cpu_rq(i)->rd->dl_bw; | 1887 | return &cpu_rq(i)->rd->dl_bw; |
| 1888 | } | 1888 | } |
| 1889 | 1889 | ||
| 1890 | static inline int __dl_span_weight(struct rq *rq) | 1890 | static inline int dl_bw_cpus(int i) |
| 1891 | { | 1891 | { |
| 1892 | return cpumask_weight(rq->rd->span); | 1892 | struct root_domain *rd = cpu_rq(i)->rd; |
| 1893 | int cpus = 0; | ||
| 1894 | |||
| 1895 | for_each_cpu_and(i, rd->span, cpu_active_mask) | ||
| 1896 | cpus++; | ||
| 1897 | |||
| 1898 | return cpus; | ||
| 1893 | } | 1899 | } |
| 1894 | #else | 1900 | #else |
| 1895 | inline struct dl_bw *dl_bw_of(int i) | 1901 | inline struct dl_bw *dl_bw_of(int i) |
| @@ -1897,7 +1903,7 @@ inline struct dl_bw *dl_bw_of(int i) | |||
| 1897 | return &cpu_rq(i)->dl.dl_bw; | 1903 | return &cpu_rq(i)->dl.dl_bw; |
| 1898 | } | 1904 | } |
| 1899 | 1905 | ||
| 1900 | static inline int __dl_span_weight(struct rq *rq) | 1906 | static inline int dl_bw_cpus(int i) |
| 1901 | { | 1907 | { |
| 1902 | return 1; | 1908 | return 1; |
| 1903 | } | 1909 | } |
| @@ -1938,8 +1944,7 @@ static int dl_overflow(struct task_struct *p, int policy, | |||
| 1938 | u64 period = attr->sched_period; | 1944 | u64 period = attr->sched_period; |
| 1939 | u64 runtime = attr->sched_runtime; | 1945 | u64 runtime = attr->sched_runtime; |
| 1940 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; | 1946 | u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; |
| 1941 | int cpus = __dl_span_weight(task_rq(p)); | 1947 | int cpus, err = -1; |
| 1942 | int err = -1; | ||
| 1943 | 1948 | ||
| 1944 | if (new_bw == p->dl.dl_bw) | 1949 | if (new_bw == p->dl.dl_bw) |
| 1945 | return 0; | 1950 | return 0; |
| @@ -1950,6 +1955,7 @@ static int dl_overflow(struct task_struct *p, int policy, | |||
| 1950 | * allocated bandwidth of the container. | 1955 | * allocated bandwidth of the container. |
| 1951 | */ | 1956 | */ |
| 1952 | raw_spin_lock(&dl_b->lock); | 1957 | raw_spin_lock(&dl_b->lock); |
| 1958 | cpus = dl_bw_cpus(task_cpu(p)); | ||
| 1953 | if (dl_policy(policy) && !task_has_dl_policy(p) && | 1959 | if (dl_policy(policy) && !task_has_dl_policy(p) && |
| 1954 | !__dl_overflow(dl_b, cpus, 0, new_bw)) { | 1960 | !__dl_overflow(dl_b, cpus, 0, new_bw)) { |
| 1955 | __dl_add(dl_b, new_bw); | 1961 | __dl_add(dl_b, new_bw); |
| @@ -4522,42 +4528,6 @@ out: | |||
| 4522 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | 4528 | EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); |
| 4523 | 4529 | ||
| 4524 | /* | 4530 | /* |
| 4525 | * When dealing with a -deadline task, we have to check if moving it to | ||
| 4526 | * a new CPU is possible or not. In fact, this is only true iff there | ||
| 4527 | * is enough bandwidth available on such CPU, otherwise we want the | ||
| 4528 | * whole migration procedure to fail over. | ||
| 4529 | */ | ||
| 4530 | static inline | ||
| 4531 | bool set_task_cpu_dl(struct task_struct *p, unsigned int cpu) | ||
| 4532 | { | ||
| 4533 | struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); | ||
| 4534 | struct dl_bw *cpu_b = dl_bw_of(cpu); | ||
| 4535 | int ret = 1; | ||
| 4536 | u64 bw; | ||
| 4537 | |||
| 4538 | if (dl_b == cpu_b) | ||
| 4539 | return 1; | ||
| 4540 | |||
| 4541 | raw_spin_lock(&dl_b->lock); | ||
| 4542 | raw_spin_lock(&cpu_b->lock); | ||
| 4543 | |||
| 4544 | bw = cpu_b->bw * cpumask_weight(cpu_rq(cpu)->rd->span); | ||
| 4545 | if (dl_bandwidth_enabled() && | ||
| 4546 | bw < cpu_b->total_bw + p->dl.dl_bw) { | ||
| 4547 | ret = 0; | ||
| 4548 | goto unlock; | ||
| 4549 | } | ||
| 4550 | dl_b->total_bw -= p->dl.dl_bw; | ||
| 4551 | cpu_b->total_bw += p->dl.dl_bw; | ||
| 4552 | |||
| 4553 | unlock: | ||
| 4554 | raw_spin_unlock(&cpu_b->lock); | ||
| 4555 | raw_spin_unlock(&dl_b->lock); | ||
| 4556 | |||
| 4557 | return ret; | ||
| 4558 | } | ||
| 4559 | |||
| 4560 | /* | ||
| 4561 | * Move (not current) task off this cpu, onto dest cpu. We're doing | 4531 | * Move (not current) task off this cpu, onto dest cpu. We're doing |
| 4562 | * this because either it can't run here any more (set_cpus_allowed() | 4532 | * this because either it can't run here any more (set_cpus_allowed() |
| 4563 | * away from this CPU, or CPU going down), or because we're | 4533 | * away from this CPU, or CPU going down), or because we're |
| @@ -4589,13 +4559,6 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
| 4589 | goto fail; | 4559 | goto fail; |
| 4590 | 4560 | ||
| 4591 | /* | 4561 | /* |
| 4592 | * If p is -deadline, proceed only if there is enough | ||
| 4593 | * bandwidth available on dest_cpu | ||
| 4594 | */ | ||
| 4595 | if (unlikely(dl_task(p)) && !set_task_cpu_dl(p, dest_cpu)) | ||
| 4596 | goto fail; | ||
| 4597 | |||
| 4598 | /* | ||
| 4599 | * If we're not on a rq, the next wake-up will ensure we're | 4562 | * If we're not on a rq, the next wake-up will ensure we're |
| 4600 | * placed properly. | 4563 | * placed properly. |
| 4601 | */ | 4564 | */ |
| @@ -5052,13 +5015,31 @@ static int sched_cpu_active(struct notifier_block *nfb, | |||
| 5052 | static int sched_cpu_inactive(struct notifier_block *nfb, | 5015 | static int sched_cpu_inactive(struct notifier_block *nfb, |
| 5053 | unsigned long action, void *hcpu) | 5016 | unsigned long action, void *hcpu) |
| 5054 | { | 5017 | { |
| 5018 | unsigned long flags; | ||
| 5019 | long cpu = (long)hcpu; | ||
| 5020 | |||
| 5055 | switch (action & ~CPU_TASKS_FROZEN) { | 5021 | switch (action & ~CPU_TASKS_FROZEN) { |
| 5056 | case CPU_DOWN_PREPARE: | 5022 | case CPU_DOWN_PREPARE: |
| 5057 | set_cpu_active((long)hcpu, false); | 5023 | set_cpu_active(cpu, false); |
| 5024 | |||
| 5025 | /* explicitly allow suspend */ | ||
| 5026 | if (!(action & CPU_TASKS_FROZEN)) { | ||
| 5027 | struct dl_bw *dl_b = dl_bw_of(cpu); | ||
| 5028 | bool overflow; | ||
| 5029 | int cpus; | ||
| 5030 | |||
| 5031 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
| 5032 | cpus = dl_bw_cpus(cpu); | ||
| 5033 | overflow = __dl_overflow(dl_b, cpus, 0, 0); | ||
| 5034 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
| 5035 | |||
| 5036 | if (overflow) | ||
| 5037 | return notifier_from_errno(-EBUSY); | ||
| 5038 | } | ||
| 5058 | return NOTIFY_OK; | 5039 | return NOTIFY_OK; |
| 5059 | default: | ||
| 5060 | return NOTIFY_DONE; | ||
| 5061 | } | 5040 | } |
| 5041 | |||
| 5042 | return NOTIFY_DONE; | ||
| 5062 | } | 5043 | } |
| 5063 | 5044 | ||
| 5064 | static int __init migration_init(void) | 5045 | static int __init migration_init(void) |
