diff options
author | Kirill Tkhai <ktkhai@parallels.com> | 2014-10-27 10:40:52 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-11-04 01:17:50 -0500 |
commit | 67dfa1b756f250972bde31d65e3f8fde6aeddc5b (patch) | |
tree | 0c7478994f086854b764fdc7474416facb660912 | |
parent | e7097e8bd0074b465f9c78dcff25cd3f82382581 (diff) |
sched/deadline: Implement cancel_dl_timer() to use in switched_from_dl()
Currently used hrtimer_try_to_cancel() is racy:
raw_spin_lock(&rq->lock)
... dl_task_timer raw_spin_lock(&rq->lock)
... raw_spin_lock(&rq->lock) ...
switched_from_dl() ... ...
hrtimer_try_to_cancel() ... ...
switched_to_fair() ... ...
... ... ...
... ... ...
raw_spin_unlock(&rq->lock) ... (asquired)
... ... ...
... ... ...
do_exit() ... ...
schedule() ... ...
raw_spin_lock(&rq->lock) ... raw_spin_unlock(&rq->lock)
... ... ...
raw_spin_unlock(&rq->lock) ... raw_spin_lock(&rq->lock)
... ... (asquired)
put_task_struct() ... ...
free_task_struct() ... ...
... ... raw_spin_unlock(&rq->lock)
... (asquired) ...
... ... ...
... (use after free) ...
So, let's implement 100% guaranteed way to cancel the timer and let's
be sure we are safe even in very unlikely situations.
rq unlocking does not limit the area of switched_from_dl() use, because
this has already been possible in pull_dl_task() below.
Let's consider the safety of of this unlocking. New code in the patch
is working when hrtimer_try_to_cancel() fails. This means the callback
is running. In this case hrtimer_cancel() is just waiting till the
callback is finished. Two
1) Since we are in switched_from_dl(), new class is not dl_sched_class and
new prio is not less MAX_DL_PRIO. So, the callback returns early; it's
right after !dl_task() check. After that hrtimer_cancel() returns back too.
The above is:
raw_spin_lock(rq->lock); ...
... dl_task_timer()
... raw_spin_lock(rq->lock);
switched_from_dl() ...
hrtimer_try_to_cancel() ...
raw_spin_unlock(rq->lock); ...
hrtimer_cancel() ...
... raw_spin_unlock(rq->lock);
... return HRTIMER_NORESTART;
... ...
raw_spin_lock(rq->lock); ...
2) But the below is also possible:
dl_task_timer()
raw_spin_lock(rq->lock);
...
raw_spin_unlock(rq->lock);
raw_spin_lock(rq->lock); ...
switched_from_dl() ...
hrtimer_try_to_cancel() ...
... return HRTIMER_NORESTART;
raw_spin_unlock(rq->lock); ...
hrtimer_cancel(); ...
raw_spin_lock(rq->lock); ...
In this case hrtimer_cancel() returns immediately. Very unlikely case,
just to mention.
Nobody can manipulate the task, because check_class_changed() is
always called with pi_lock locked. Nobody can force the task to
participate in (concurrent) priority inheritance schemes (the same reason).
All concurrent task operations require pi_lock, which is held by us.
No deadlocks with dl_task_timer() are possible, because it returns
right after !dl_task() check (it does nothing).
If we receive a new dl_task during the time of unlocked rq, we just
don't have to do pull_dl_task() in switched_from_dl() further.
Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
[ Added comments]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1414420852.19914.186.camel@tkhai
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/core.c | 4 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 34 | ||||
-rw-r--r-- | kernel/sched/sched.h | 5 |
3 files changed, 36 insertions, 7 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0cd34e68680c..379cb87da69d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1008,6 +1008,9 @@ inline int task_curr(const struct task_struct *p) | |||
1008 | return cpu_curr(task_cpu(p)) == p; | 1008 | return cpu_curr(task_cpu(p)) == p; |
1009 | } | 1009 | } |
1010 | 1010 | ||
1011 | /* | ||
1012 | * Can drop rq->lock because from sched_class::switched_from() methods drop it. | ||
1013 | */ | ||
1011 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1014 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1012 | const struct sched_class *prev_class, | 1015 | const struct sched_class *prev_class, |
1013 | int oldprio) | 1016 | int oldprio) |
@@ -1015,6 +1018,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1015 | if (prev_class != p->sched_class) { | 1018 | if (prev_class != p->sched_class) { |
1016 | if (prev_class->switched_from) | 1019 | if (prev_class->switched_from) |
1017 | prev_class->switched_from(rq, p); | 1020 | prev_class->switched_from(rq, p); |
1021 | /* Possble rq->lock 'hole'. */ | ||
1018 | p->sched_class->switched_to(rq, p); | 1022 | p->sched_class->switched_to(rq, p); |
1019 | } else if (oldprio != p->prio || dl_task(p)) | 1023 | } else if (oldprio != p->prio || dl_task(p)) |
1020 | p->sched_class->prio_changed(rq, p, oldprio); | 1024 | p->sched_class->prio_changed(rq, p, oldprio); |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 2e31a30e623c..9d483e862e58 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -563,11 +563,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |||
563 | { | 563 | { |
564 | struct hrtimer *timer = &dl_se->dl_timer; | 564 | struct hrtimer *timer = &dl_se->dl_timer; |
565 | 565 | ||
566 | if (hrtimer_active(timer)) { | ||
567 | hrtimer_try_to_cancel(timer); | ||
568 | return; | ||
569 | } | ||
570 | |||
571 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 566 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
572 | timer->function = dl_task_timer; | 567 | timer->function = dl_task_timer; |
573 | } | 568 | } |
@@ -1610,10 +1605,35 @@ void init_sched_dl_class(void) | |||
1610 | 1605 | ||
1611 | #endif /* CONFIG_SMP */ | 1606 | #endif /* CONFIG_SMP */ |
1612 | 1607 | ||
1608 | /* | ||
1609 | * Ensure p's dl_timer is cancelled. May drop rq->lock for a while. | ||
1610 | */ | ||
1611 | static void cancel_dl_timer(struct rq *rq, struct task_struct *p) | ||
1612 | { | ||
1613 | struct hrtimer *dl_timer = &p->dl.dl_timer; | ||
1614 | |||
1615 | /* Nobody will change task's class if pi_lock is held */ | ||
1616 | lockdep_assert_held(&p->pi_lock); | ||
1617 | |||
1618 | if (hrtimer_active(dl_timer)) { | ||
1619 | int ret = hrtimer_try_to_cancel(dl_timer); | ||
1620 | |||
1621 | if (unlikely(ret == -1)) { | ||
1622 | /* | ||
1623 | * Note, p may migrate OR new deadline tasks | ||
1624 | * may appear in rq when we are unlocking it. | ||
1625 | * A caller of us must be fine with that. | ||
1626 | */ | ||
1627 | raw_spin_unlock(&rq->lock); | ||
1628 | hrtimer_cancel(dl_timer); | ||
1629 | raw_spin_lock(&rq->lock); | ||
1630 | } | ||
1631 | } | ||
1632 | } | ||
1633 | |||
1613 | static void switched_from_dl(struct rq *rq, struct task_struct *p) | 1634 | static void switched_from_dl(struct rq *rq, struct task_struct *p) |
1614 | { | 1635 | { |
1615 | if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy)) | 1636 | cancel_dl_timer(rq, p); |
1616 | hrtimer_try_to_cancel(&p->dl.dl_timer); | ||
1617 | 1637 | ||
1618 | __dl_clear_params(p); | 1638 | __dl_clear_params(p); |
1619 | 1639 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ec3917c5f898..49b941fe2cc2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1157,6 +1157,11 @@ struct sched_class { | |||
1157 | void (*task_fork) (struct task_struct *p); | 1157 | void (*task_fork) (struct task_struct *p); |
1158 | void (*task_dead) (struct task_struct *p); | 1158 | void (*task_dead) (struct task_struct *p); |
1159 | 1159 | ||
1160 | /* | ||
1161 | * The switched_from() call is allowed to drop rq->lock, therefore we | ||
1162 | * cannot assume the switched_from/switched_to pair is serliazed by | ||
1163 | * rq->lock. They are however serialized by p->pi_lock. | ||
1164 | */ | ||
1160 | void (*switched_from) (struct rq *this_rq, struct task_struct *task); | 1165 | void (*switched_from) (struct rq *this_rq, struct task_struct *task); |
1161 | void (*switched_to) (struct rq *this_rq, struct task_struct *task); | 1166 | void (*switched_to) (struct rq *this_rq, struct task_struct *task); |
1162 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, | 1167 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, |