aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorBen Segall <bsegall@google.com>2014-05-19 18:49:45 -0400
committerIngo Molnar <mingo@kernel.org>2014-06-05 05:52:00 -0400
commit51f2176d74ace4c3f58579a605ef5a9720befb00 (patch)
tree3c4f8405eacae93ef43707c2d335f8a12212eaeb /kernel
parent096aa33863a5e48de52d2ff30e0801b7487944f4 (diff)
sched/fair: Fix unlocked reads of some cfs_b->quota/period
sched_cfs_period_timer() reads cfs_b->period without locks before calling do_sched_cfs_period_timer(), and similarly unthrottle_offline_cfs_rqs() would read cfs_b->period without the right lock. Thus a simultaneous change of bandwidth could cause corruption on any platform where ktime_t or u64 writes/reads are not atomic. Extend cfs_b->lock from do_sched_cfs_period_timer() to include the read of cfs_b->period to solve that issue; unthrottle_offline_cfs_rqs() can just use 1 rather than the exact quota, much like distribute_cfs_runtime() does. There is also an unlocked read of cfs_b->runtime_expires, but a race there would only delay runtime expiry by a tick. Still, the comparison should just be != anyway, which clarifies even that problem. Signed-off-by: Ben Segall <bsegall@google.com> Tested-by: Roman Gushchin <klamm@yandex-team.ru> [peterz: Fix compile warn] Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140519224945.20303.93530.stgit@sword-of-the-dawn.mtv.corp.google.com Cc: pjt@google.com Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/fair.c40
1 files changed, 21 insertions, 19 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c9617b73bcc0..b71d8c39f1fd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3224,10 +3224,12 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
3224 * has not truly expired. 3224 * has not truly expired.
3225 * 3225 *
3226 * Fortunately we can check determine whether this the case by checking 3226 * Fortunately we can check determine whether this the case by checking
3227 * whether the global deadline has advanced. 3227 * whether the global deadline has advanced. It is valid to compare
3228 * cfs_b->runtime_expires without any locks since we only care about
3229 * exact equality, so a partial write will still work.
3228 */ 3230 */
3229 3231
3230 if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) { 3232 if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
3231 /* extend local deadline, drift is bounded above by 2 ticks */ 3233 /* extend local deadline, drift is bounded above by 2 ticks */
3232 cfs_rq->runtime_expires += TICK_NSEC; 3234 cfs_rq->runtime_expires += TICK_NSEC;
3233 } else { 3235 } else {
@@ -3456,21 +3458,21 @@ next:
3456static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) 3458static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
3457{ 3459{
3458 u64 runtime, runtime_expires; 3460 u64 runtime, runtime_expires;
3459 int idle = 1, throttled; 3461 int throttled;
3460 3462
3461 raw_spin_lock(&cfs_b->lock);
3462 /* no need to continue the timer with no bandwidth constraint */ 3463 /* no need to continue the timer with no bandwidth constraint */
3463 if (cfs_b->quota == RUNTIME_INF) 3464 if (cfs_b->quota == RUNTIME_INF)
3464 goto out_unlock; 3465 goto out_deactivate;
3465 3466
3466 throttled = !list_empty(&cfs_b->throttled_cfs_rq); 3467 throttled = !list_empty(&cfs_b->throttled_cfs_rq);
3467 /* idle depends on !throttled (for the case of a large deficit) */
3468 idle = cfs_b->idle && !throttled;
3469 cfs_b->nr_periods += overrun; 3468 cfs_b->nr_periods += overrun;
3470 3469
3471 /* if we're going inactive then everything else can be deferred */ 3470 /*
3472 if (idle) 3471 * idle depends on !throttled (for the case of a large deficit), and if
3473 goto out_unlock; 3472 * we're going inactive then everything else can be deferred
3473 */
3474 if (cfs_b->idle && !throttled)
3475 goto out_deactivate;
3474 3476
3475 /* 3477 /*
3476 * if we have relooped after returning idle once, we need to update our 3478 * if we have relooped after returning idle once, we need to update our
@@ -3484,7 +3486,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
3484 if (!throttled) { 3486 if (!throttled) {
3485 /* mark as potentially idle for the upcoming period */ 3487 /* mark as potentially idle for the upcoming period */
3486 cfs_b->idle = 1; 3488 cfs_b->idle = 1;
3487 goto out_unlock; 3489 return 0;
3488 } 3490 }
3489 3491
3490 /* account preceding periods in which throttling occurred */ 3492 /* account preceding periods in which throttling occurred */
@@ -3524,12 +3526,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
3524 * timer to remain active while there are any throttled entities.) 3526 * timer to remain active while there are any throttled entities.)
3525 */ 3527 */
3526 cfs_b->idle = 0; 3528 cfs_b->idle = 0;
3527out_unlock:
3528 if (idle)
3529 cfs_b->timer_active = 0;
3530 raw_spin_unlock(&cfs_b->lock);
3531 3529
3532 return idle; 3530 return 0;
3531
3532out_deactivate:
3533 cfs_b->timer_active = 0;
3534 return 1;
3533} 3535}
3534 3536
3535/* a cfs_rq won't donate quota below this amount */ 3537/* a cfs_rq won't donate quota below this amount */
@@ -3706,6 +3708,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
3706 int overrun; 3708 int overrun;
3707 int idle = 0; 3709 int idle = 0;
3708 3710
3711 raw_spin_lock(&cfs_b->lock);
3709 for (;;) { 3712 for (;;) {
3710 now = hrtimer_cb_get_time(timer); 3713 now = hrtimer_cb_get_time(timer);
3711 overrun = hrtimer_forward(timer, now, cfs_b->period); 3714 overrun = hrtimer_forward(timer, now, cfs_b->period);
@@ -3715,6 +3718,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
3715 3718
3716 idle = do_sched_cfs_period_timer(cfs_b, overrun); 3719 idle = do_sched_cfs_period_timer(cfs_b, overrun);
3717 } 3720 }
3721 raw_spin_unlock(&cfs_b->lock);
3718 3722
3719 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 3723 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
3720} 3724}
@@ -3774,8 +3778,6 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
3774 struct cfs_rq *cfs_rq; 3778 struct cfs_rq *cfs_rq;
3775 3779
3776 for_each_leaf_cfs_rq(rq, cfs_rq) { 3780 for_each_leaf_cfs_rq(rq, cfs_rq) {
3777 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
3778
3779 if (!cfs_rq->runtime_enabled) 3781 if (!cfs_rq->runtime_enabled)
3780 continue; 3782 continue;
3781 3783
@@ -3783,7 +3785,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
3783 * clock_task is not advancing so we just need to make sure 3785 * clock_task is not advancing so we just need to make sure
3784 * there's some valid quota amount 3786 * there's some valid quota amount
3785 */ 3787 */
3786 cfs_rq->runtime_remaining = cfs_b->quota; 3788 cfs_rq->runtime_remaining = 1;
3787 if (cfs_rq_throttled(cfs_rq)) 3789 if (cfs_rq_throttled(cfs_rq))
3788 unthrottle_cfs_rq(cfs_rq); 3790 unthrottle_cfs_rq(cfs_rq);
3789 } 3791 }