diff options
author | Ben Segall <bsegall@google.com> | 2014-05-19 18:49:45 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-06-05 05:52:00 -0400 |
commit | 51f2176d74ace4c3f58579a605ef5a9720befb00 (patch) | |
tree | 3c4f8405eacae93ef43707c2d335f8a12212eaeb /kernel | |
parent | 096aa33863a5e48de52d2ff30e0801b7487944f4 (diff) |
sched/fair: Fix unlocked reads of some cfs_b->quota/period
sched_cfs_period_timer() reads cfs_b->period without locks before calling
do_sched_cfs_period_timer(), and similarly unthrottle_offline_cfs_rqs()
would read cfs_b->period without the right lock. Thus a simultaneous
change of bandwidth could cause corruption on any platform where ktime_t
or u64 writes/reads are not atomic.
Extend cfs_b->lock from do_sched_cfs_period_timer() to include the read of
cfs_b->period to solve that issue; unthrottle_offline_cfs_rqs() can just
use 1 rather than the exact quota, much like distribute_cfs_runtime()
does.
There is also an unlocked read of cfs_b->runtime_expires, but a race
there would only delay runtime expiry by a tick. Still, the comparison
should just be != anyway, which clarifies even that problem.
Signed-off-by: Ben Segall <bsegall@google.com>
Tested-by: Roman Gushchin <klamm@yandex-team.ru>
[peterz: Fix compile warn]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140519224945.20303.93530.stgit@sword-of-the-dawn.mtv.corp.google.com
Cc: pjt@google.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/fair.c | 40 |
1 files changed, 21 insertions, 19 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c9617b73bcc0..b71d8c39f1fd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -3224,10 +3224,12 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
3224 | * has not truly expired. | 3224 | * has not truly expired. |
3225 | * | 3225 | * |
3226 | * Fortunately we can check determine whether this the case by checking | 3226 | * Fortunately we can check determine whether this the case by checking |
3227 | * whether the global deadline has advanced. | 3227 | * whether the global deadline has advanced. It is valid to compare |
3228 | * cfs_b->runtime_expires without any locks since we only care about | ||
3229 | * exact equality, so a partial write will still work. | ||
3228 | */ | 3230 | */ |
3229 | 3231 | ||
3230 | if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) { | 3232 | if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { |
3231 | /* extend local deadline, drift is bounded above by 2 ticks */ | 3233 | /* extend local deadline, drift is bounded above by 2 ticks */ |
3232 | cfs_rq->runtime_expires += TICK_NSEC; | 3234 | cfs_rq->runtime_expires += TICK_NSEC; |
3233 | } else { | 3235 | } else { |
@@ -3456,21 +3458,21 @@ next: | |||
3456 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | 3458 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) |
3457 | { | 3459 | { |
3458 | u64 runtime, runtime_expires; | 3460 | u64 runtime, runtime_expires; |
3459 | int idle = 1, throttled; | 3461 | int throttled; |
3460 | 3462 | ||
3461 | raw_spin_lock(&cfs_b->lock); | ||
3462 | /* no need to continue the timer with no bandwidth constraint */ | 3463 | /* no need to continue the timer with no bandwidth constraint */ |
3463 | if (cfs_b->quota == RUNTIME_INF) | 3464 | if (cfs_b->quota == RUNTIME_INF) |
3464 | goto out_unlock; | 3465 | goto out_deactivate; |
3465 | 3466 | ||
3466 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); | 3467 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); |
3467 | /* idle depends on !throttled (for the case of a large deficit) */ | ||
3468 | idle = cfs_b->idle && !throttled; | ||
3469 | cfs_b->nr_periods += overrun; | 3468 | cfs_b->nr_periods += overrun; |
3470 | 3469 | ||
3471 | /* if we're going inactive then everything else can be deferred */ | 3470 | /* |
3472 | if (idle) | 3471 | * idle depends on !throttled (for the case of a large deficit), and if |
3473 | goto out_unlock; | 3472 | * we're going inactive then everything else can be deferred |
3473 | */ | ||
3474 | if (cfs_b->idle && !throttled) | ||
3475 | goto out_deactivate; | ||
3474 | 3476 | ||
3475 | /* | 3477 | /* |
3476 | * if we have relooped after returning idle once, we need to update our | 3478 | * if we have relooped after returning idle once, we need to update our |
@@ -3484,7 +3486,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | |||
3484 | if (!throttled) { | 3486 | if (!throttled) { |
3485 | /* mark as potentially idle for the upcoming period */ | 3487 | /* mark as potentially idle for the upcoming period */ |
3486 | cfs_b->idle = 1; | 3488 | cfs_b->idle = 1; |
3487 | goto out_unlock; | 3489 | return 0; |
3488 | } | 3490 | } |
3489 | 3491 | ||
3490 | /* account preceding periods in which throttling occurred */ | 3492 | /* account preceding periods in which throttling occurred */ |
@@ -3524,12 +3526,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | |||
3524 | * timer to remain active while there are any throttled entities.) | 3526 | * timer to remain active while there are any throttled entities.) |
3525 | */ | 3527 | */ |
3526 | cfs_b->idle = 0; | 3528 | cfs_b->idle = 0; |
3527 | out_unlock: | ||
3528 | if (idle) | ||
3529 | cfs_b->timer_active = 0; | ||
3530 | raw_spin_unlock(&cfs_b->lock); | ||
3531 | 3529 | ||
3532 | return idle; | 3530 | return 0; |
3531 | |||
3532 | out_deactivate: | ||
3533 | cfs_b->timer_active = 0; | ||
3534 | return 1; | ||
3533 | } | 3535 | } |
3534 | 3536 | ||
3535 | /* a cfs_rq won't donate quota below this amount */ | 3537 | /* a cfs_rq won't donate quota below this amount */ |
@@ -3706,6 +3708,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | |||
3706 | int overrun; | 3708 | int overrun; |
3707 | int idle = 0; | 3709 | int idle = 0; |
3708 | 3710 | ||
3711 | raw_spin_lock(&cfs_b->lock); | ||
3709 | for (;;) { | 3712 | for (;;) { |
3710 | now = hrtimer_cb_get_time(timer); | 3713 | now = hrtimer_cb_get_time(timer); |
3711 | overrun = hrtimer_forward(timer, now, cfs_b->period); | 3714 | overrun = hrtimer_forward(timer, now, cfs_b->period); |
@@ -3715,6 +3718,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | |||
3715 | 3718 | ||
3716 | idle = do_sched_cfs_period_timer(cfs_b, overrun); | 3719 | idle = do_sched_cfs_period_timer(cfs_b, overrun); |
3717 | } | 3720 | } |
3721 | raw_spin_unlock(&cfs_b->lock); | ||
3718 | 3722 | ||
3719 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | 3723 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; |
3720 | } | 3724 | } |
@@ -3774,8 +3778,6 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) | |||
3774 | struct cfs_rq *cfs_rq; | 3778 | struct cfs_rq *cfs_rq; |
3775 | 3779 | ||
3776 | for_each_leaf_cfs_rq(rq, cfs_rq) { | 3780 | for_each_leaf_cfs_rq(rq, cfs_rq) { |
3777 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
3778 | |||
3779 | if (!cfs_rq->runtime_enabled) | 3781 | if (!cfs_rq->runtime_enabled) |
3780 | continue; | 3782 | continue; |
3781 | 3783 | ||
@@ -3783,7 +3785,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) | |||
3783 | * clock_task is not advancing so we just need to make sure | 3785 | * clock_task is not advancing so we just need to make sure |
3784 | * there's some valid quota amount | 3786 | * there's some valid quota amount |
3785 | */ | 3787 | */ |
3786 | cfs_rq->runtime_remaining = cfs_b->quota; | 3788 | cfs_rq->runtime_remaining = 1; |
3787 | if (cfs_rq_throttled(cfs_rq)) | 3789 | if (cfs_rq_throttled(cfs_rq)) |
3788 | unthrottle_cfs_rq(cfs_rq); | 3790 | unthrottle_cfs_rq(cfs_rq); |
3789 | } | 3791 | } |