diff options
author | Paul Turner <pjt@google.com> | 2011-07-21 12:43:31 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-08-14 06:03:28 -0400 |
commit | 58088ad0152ba4b7997388c93d0ca208ec1ece75 (patch) | |
tree | 22d818b745056effc53ee6fa97ee9103548766b5 /kernel | |
parent | ec12cb7f31e28854efae7dd6f9544e0a66379040 (diff) |
sched: Add a timer to handle CFS bandwidth refresh
This patch adds a per-task_group timer which handles the refresh of the global
CFS bandwidth pool.
Since the RT pool is using a similar timer there's some small refactoring to
share this support.
Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.277271273@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 107 | ||||
-rw-r--r-- | kernel/sched_fair.c | 40 |
2 files changed, 123 insertions, 24 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 35561c63a490..34bf8e6db9af 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -196,10 +196,28 @@ static inline int rt_bandwidth_enabled(void) | |||
196 | return sysctl_sched_rt_runtime >= 0; | 196 | return sysctl_sched_rt_runtime >= 0; |
197 | } | 197 | } |
198 | 198 | ||
199 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | 199 | static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) |
200 | { | 200 | { |
201 | ktime_t now; | 201 | unsigned long delta; |
202 | ktime_t soft, hard, now; | ||
202 | 203 | ||
204 | for (;;) { | ||
205 | if (hrtimer_active(period_timer)) | ||
206 | break; | ||
207 | |||
208 | now = hrtimer_cb_get_time(period_timer); | ||
209 | hrtimer_forward(period_timer, now, period); | ||
210 | |||
211 | soft = hrtimer_get_softexpires(period_timer); | ||
212 | hard = hrtimer_get_expires(period_timer); | ||
213 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
214 | __hrtimer_start_range_ns(period_timer, soft, delta, | ||
215 | HRTIMER_MODE_ABS_PINNED, 0); | ||
216 | } | ||
217 | } | ||
218 | |||
219 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
220 | { | ||
203 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | 221 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) |
204 | return; | 222 | return; |
205 | 223 | ||
@@ -207,22 +225,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
207 | return; | 225 | return; |
208 | 226 | ||
209 | raw_spin_lock(&rt_b->rt_runtime_lock); | 227 | raw_spin_lock(&rt_b->rt_runtime_lock); |
210 | for (;;) { | 228 | start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); |
211 | unsigned long delta; | ||
212 | ktime_t soft, hard; | ||
213 | |||
214 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
215 | break; | ||
216 | |||
217 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | ||
218 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | ||
219 | |||
220 | soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); | ||
221 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); | ||
222 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
223 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | ||
224 | HRTIMER_MODE_ABS_PINNED, 0); | ||
225 | } | ||
226 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 229 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
227 | } | 230 | } |
228 | 231 | ||
@@ -253,6 +256,9 @@ struct cfs_bandwidth { | |||
253 | ktime_t period; | 256 | ktime_t period; |
254 | u64 quota, runtime; | 257 | u64 quota, runtime; |
255 | s64 hierarchal_quota; | 258 | s64 hierarchal_quota; |
259 | |||
260 | int idle, timer_active; | ||
261 | struct hrtimer period_timer; | ||
256 | #endif | 262 | #endif |
257 | }; | 263 | }; |
258 | 264 | ||
@@ -403,6 +409,28 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
403 | } | 409 | } |
404 | 410 | ||
405 | static inline u64 default_cfs_period(void); | 411 | static inline u64 default_cfs_period(void); |
412 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); | ||
413 | |||
414 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | ||
415 | { | ||
416 | struct cfs_bandwidth *cfs_b = | ||
417 | container_of(timer, struct cfs_bandwidth, period_timer); | ||
418 | ktime_t now; | ||
419 | int overrun; | ||
420 | int idle = 0; | ||
421 | |||
422 | for (;;) { | ||
423 | now = hrtimer_cb_get_time(timer); | ||
424 | overrun = hrtimer_forward(timer, now, cfs_b->period); | ||
425 | |||
426 | if (!overrun) | ||
427 | break; | ||
428 | |||
429 | idle = do_sched_cfs_period_timer(cfs_b, overrun); | ||
430 | } | ||
431 | |||
432 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
433 | } | ||
406 | 434 | ||
407 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | 435 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) |
408 | { | 436 | { |
@@ -410,6 +438,9 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
410 | cfs_b->runtime = 0; | 438 | cfs_b->runtime = 0; |
411 | cfs_b->quota = RUNTIME_INF; | 439 | cfs_b->quota = RUNTIME_INF; |
412 | cfs_b->period = ns_to_ktime(default_cfs_period()); | 440 | cfs_b->period = ns_to_ktime(default_cfs_period()); |
441 | |||
442 | hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
443 | cfs_b->period_timer.function = sched_cfs_period_timer; | ||
413 | } | 444 | } |
414 | 445 | ||
415 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 446 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
@@ -417,8 +448,34 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
417 | cfs_rq->runtime_enabled = 0; | 448 | cfs_rq->runtime_enabled = 0; |
418 | } | 449 | } |
419 | 450 | ||
451 | /* requires cfs_b->lock, may release to reprogram timer */ | ||
452 | static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
453 | { | ||
454 | /* | ||
455 | * The timer may be active because we're trying to set a new bandwidth | ||
456 | * period or because we're racing with the tear-down path | ||
457 | * (timer_active==0 becomes visible before the hrtimer call-back | ||
458 | * terminates). In either case we ensure that it's re-programmed | ||
459 | */ | ||
460 | while (unlikely(hrtimer_active(&cfs_b->period_timer))) { | ||
461 | raw_spin_unlock(&cfs_b->lock); | ||
462 | /* ensure cfs_b->lock is available while we wait */ | ||
463 | hrtimer_cancel(&cfs_b->period_timer); | ||
464 | |||
465 | raw_spin_lock(&cfs_b->lock); | ||
466 | /* if someone else restarted the timer then we're done */ | ||
467 | if (cfs_b->timer_active) | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | cfs_b->timer_active = 1; | ||
472 | start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); | ||
473 | } | ||
474 | |||
420 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | 475 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) |
421 | {} | 476 | { |
477 | hrtimer_cancel(&cfs_b->period_timer); | ||
478 | } | ||
422 | #else | 479 | #else |
423 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 480 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
424 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | 481 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} |
@@ -9078,7 +9135,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); | |||
9078 | 9135 | ||
9079 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | 9136 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) |
9080 | { | 9137 | { |
9081 | int i, ret = 0; | 9138 | int i, ret = 0, runtime_enabled; |
9082 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | 9139 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); |
9083 | 9140 | ||
9084 | if (tg == &root_task_group) | 9141 | if (tg == &root_task_group) |
@@ -9105,10 +9162,18 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
9105 | if (ret) | 9162 | if (ret) |
9106 | goto out_unlock; | 9163 | goto out_unlock; |
9107 | 9164 | ||
9165 | runtime_enabled = quota != RUNTIME_INF; | ||
9108 | raw_spin_lock_irq(&cfs_b->lock); | 9166 | raw_spin_lock_irq(&cfs_b->lock); |
9109 | cfs_b->period = ns_to_ktime(period); | 9167 | cfs_b->period = ns_to_ktime(period); |
9110 | cfs_b->quota = quota; | 9168 | cfs_b->quota = quota; |
9111 | cfs_b->runtime = quota; | 9169 | cfs_b->runtime = quota; |
9170 | |||
9171 | /* restart the period timer (if active) to handle new period expiry */ | ||
9172 | if (runtime_enabled && cfs_b->timer_active) { | ||
9173 | /* force a reprogram */ | ||
9174 | cfs_b->timer_active = 0; | ||
9175 | __start_cfs_bandwidth(cfs_b); | ||
9176 | } | ||
9112 | raw_spin_unlock_irq(&cfs_b->lock); | 9177 | raw_spin_unlock_irq(&cfs_b->lock); |
9113 | 9178 | ||
9114 | for_each_possible_cpu(i) { | 9179 | for_each_possible_cpu(i) { |
@@ -9116,7 +9181,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
9116 | struct rq *rq = rq_of(cfs_rq); | 9181 | struct rq *rq = rq_of(cfs_rq); |
9117 | 9182 | ||
9118 | raw_spin_lock_irq(&rq->lock); | 9183 | raw_spin_lock_irq(&rq->lock); |
9119 | cfs_rq->runtime_enabled = quota != RUNTIME_INF; | 9184 | cfs_rq->runtime_enabled = runtime_enabled; |
9120 | cfs_rq->runtime_remaining = 0; | 9185 | cfs_rq->runtime_remaining = 0; |
9121 | raw_spin_unlock_irq(&rq->lock); | 9186 | raw_spin_unlock_irq(&rq->lock); |
9122 | } | 9187 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9502aa899f73..af73a8a85eef 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1284,9 +1284,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
1284 | raw_spin_lock(&cfs_b->lock); | 1284 | raw_spin_lock(&cfs_b->lock); |
1285 | if (cfs_b->quota == RUNTIME_INF) | 1285 | if (cfs_b->quota == RUNTIME_INF) |
1286 | amount = min_amount; | 1286 | amount = min_amount; |
1287 | else if (cfs_b->runtime > 0) { | 1287 | else { |
1288 | amount = min(cfs_b->runtime, min_amount); | 1288 | /* ensure bandwidth timer remains active under consumption */ |
1289 | cfs_b->runtime -= amount; | 1289 | if (!cfs_b->timer_active) |
1290 | __start_cfs_bandwidth(cfs_b); | ||
1291 | |||
1292 | if (cfs_b->runtime > 0) { | ||
1293 | amount = min(cfs_b->runtime, min_amount); | ||
1294 | cfs_b->runtime -= amount; | ||
1295 | cfs_b->idle = 0; | ||
1296 | } | ||
1290 | } | 1297 | } |
1291 | raw_spin_unlock(&cfs_b->lock); | 1298 | raw_spin_unlock(&cfs_b->lock); |
1292 | 1299 | ||
@@ -1315,6 +1322,33 @@ static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | |||
1315 | __account_cfs_rq_runtime(cfs_rq, delta_exec); | 1322 | __account_cfs_rq_runtime(cfs_rq, delta_exec); |
1316 | } | 1323 | } |
1317 | 1324 | ||
1325 | /* | ||
1326 | * Responsible for refilling a task_group's bandwidth and unthrottling its | ||
1327 | * cfs_rqs as appropriate. If there has been no activity within the last | ||
1328 | * period the timer is deactivated until scheduling resumes; cfs_b->idle is | ||
1329 | * used to track this state. | ||
1330 | */ | ||
1331 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | ||
1332 | { | ||
1333 | int idle = 1; | ||
1334 | |||
1335 | raw_spin_lock(&cfs_b->lock); | ||
1336 | /* no need to continue the timer with no bandwidth constraint */ | ||
1337 | if (cfs_b->quota == RUNTIME_INF) | ||
1338 | goto out_unlock; | ||
1339 | |||
1340 | idle = cfs_b->idle; | ||
1341 | cfs_b->runtime = cfs_b->quota; | ||
1342 | |||
1343 | /* mark as potentially idle for the upcoming period */ | ||
1344 | cfs_b->idle = 1; | ||
1345 | out_unlock: | ||
1346 | if (idle) | ||
1347 | cfs_b->timer_active = 0; | ||
1348 | raw_spin_unlock(&cfs_b->lock); | ||
1349 | |||
1350 | return idle; | ||
1351 | } | ||
1318 | #else | 1352 | #else |
1319 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 1353 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, |
1320 | unsigned long delta_exec) {} | 1354 | unsigned long delta_exec) {} |