aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2011-07-21 12:43:31 -0400
committerIngo Molnar <mingo@elte.hu>2011-08-14 06:03:28 -0400
commit58088ad0152ba4b7997388c93d0ca208ec1ece75 (patch)
tree22d818b745056effc53ee6fa97ee9103548766b5 /kernel
parentec12cb7f31e28854efae7dd6f9544e0a66379040 (diff)
sched: Add a timer to handle CFS bandwidth refresh
This patch adds a per-task_group timer which handles the refresh of the global CFS bandwidth pool. Since the RT pool is using a similar timer there's some small refactoring to share this support. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110721184757.277271273@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c107
-rw-r--r--kernel/sched_fair.c40
2 files changed, 123 insertions, 24 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 35561c63a490..34bf8e6db9af 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -196,10 +196,28 @@ static inline int rt_bandwidth_enabled(void)
196 return sysctl_sched_rt_runtime >= 0; 196 return sysctl_sched_rt_runtime >= 0;
197} 197}
198 198
199static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 199static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
200{ 200{
201 ktime_t now; 201 unsigned long delta;
202 ktime_t soft, hard, now;
202 203
204 for (;;) {
205 if (hrtimer_active(period_timer))
206 break;
207
208 now = hrtimer_cb_get_time(period_timer);
209 hrtimer_forward(period_timer, now, period);
210
211 soft = hrtimer_get_softexpires(period_timer);
212 hard = hrtimer_get_expires(period_timer);
213 delta = ktime_to_ns(ktime_sub(hard, soft));
214 __hrtimer_start_range_ns(period_timer, soft, delta,
215 HRTIMER_MODE_ABS_PINNED, 0);
216 }
217}
218
219static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
220{
203 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 221 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
204 return; 222 return;
205 223
@@ -207,22 +225,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
207 return; 225 return;
208 226
209 raw_spin_lock(&rt_b->rt_runtime_lock); 227 raw_spin_lock(&rt_b->rt_runtime_lock);
210 for (;;) { 228 start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
211 unsigned long delta;
212 ktime_t soft, hard;
213
214 if (hrtimer_active(&rt_b->rt_period_timer))
215 break;
216
217 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
218 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
219
220 soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
221 hard = hrtimer_get_expires(&rt_b->rt_period_timer);
222 delta = ktime_to_ns(ktime_sub(hard, soft));
223 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
224 HRTIMER_MODE_ABS_PINNED, 0);
225 }
226 raw_spin_unlock(&rt_b->rt_runtime_lock); 229 raw_spin_unlock(&rt_b->rt_runtime_lock);
227} 230}
228 231
@@ -253,6 +256,9 @@ struct cfs_bandwidth {
253 ktime_t period; 256 ktime_t period;
254 u64 quota, runtime; 257 u64 quota, runtime;
255 s64 hierarchal_quota; 258 s64 hierarchal_quota;
259
260 int idle, timer_active;
261 struct hrtimer period_timer;
256#endif 262#endif
257}; 263};
258 264
@@ -403,6 +409,28 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
403} 409}
404 410
405static inline u64 default_cfs_period(void); 411static inline u64 default_cfs_period(void);
412static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
413
414static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
415{
416 struct cfs_bandwidth *cfs_b =
417 container_of(timer, struct cfs_bandwidth, period_timer);
418 ktime_t now;
419 int overrun;
420 int idle = 0;
421
422 for (;;) {
423 now = hrtimer_cb_get_time(timer);
424 overrun = hrtimer_forward(timer, now, cfs_b->period);
425
426 if (!overrun)
427 break;
428
429 idle = do_sched_cfs_period_timer(cfs_b, overrun);
430 }
431
432 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
433}
406 434
407static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) 435static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
408{ 436{
@@ -410,6 +438,9 @@ static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
410 cfs_b->runtime = 0; 438 cfs_b->runtime = 0;
411 cfs_b->quota = RUNTIME_INF; 439 cfs_b->quota = RUNTIME_INF;
412 cfs_b->period = ns_to_ktime(default_cfs_period()); 440 cfs_b->period = ns_to_ktime(default_cfs_period());
441
442 hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
443 cfs_b->period_timer.function = sched_cfs_period_timer;
413} 444}
414 445
415static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) 446static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -417,8 +448,34 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
417 cfs_rq->runtime_enabled = 0; 448 cfs_rq->runtime_enabled = 0;
418} 449}
419 450
451/* requires cfs_b->lock, may release to reprogram timer */
452static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
453{
454 /*
455 * The timer may be active because we're trying to set a new bandwidth
456 * period or because we're racing with the tear-down path
457 * (timer_active==0 becomes visible before the hrtimer call-back
458 * terminates). In either case we ensure that it's re-programmed
459 */
460 while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
461 raw_spin_unlock(&cfs_b->lock);
462 /* ensure cfs_b->lock is available while we wait */
463 hrtimer_cancel(&cfs_b->period_timer);
464
465 raw_spin_lock(&cfs_b->lock);
466 /* if someone else restarted the timer then we're done */
467 if (cfs_b->timer_active)
468 return;
469 }
470
471 cfs_b->timer_active = 1;
472 start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
473}
474
420static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) 475static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
421{} 476{
477 hrtimer_cancel(&cfs_b->period_timer);
478}
422#else 479#else
423static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 480static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
424static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} 481static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
@@ -9078,7 +9135,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
9078 9135
9079static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) 9136static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
9080{ 9137{
9081 int i, ret = 0; 9138 int i, ret = 0, runtime_enabled;
9082 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); 9139 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
9083 9140
9084 if (tg == &root_task_group) 9141 if (tg == &root_task_group)
@@ -9105,10 +9162,18 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
9105 if (ret) 9162 if (ret)
9106 goto out_unlock; 9163 goto out_unlock;
9107 9164
9165 runtime_enabled = quota != RUNTIME_INF;
9108 raw_spin_lock_irq(&cfs_b->lock); 9166 raw_spin_lock_irq(&cfs_b->lock);
9109 cfs_b->period = ns_to_ktime(period); 9167 cfs_b->period = ns_to_ktime(period);
9110 cfs_b->quota = quota; 9168 cfs_b->quota = quota;
9111 cfs_b->runtime = quota; 9169 cfs_b->runtime = quota;
9170
9171 /* restart the period timer (if active) to handle new period expiry */
9172 if (runtime_enabled && cfs_b->timer_active) {
9173 /* force a reprogram */
9174 cfs_b->timer_active = 0;
9175 __start_cfs_bandwidth(cfs_b);
9176 }
9112 raw_spin_unlock_irq(&cfs_b->lock); 9177 raw_spin_unlock_irq(&cfs_b->lock);
9113 9178
9114 for_each_possible_cpu(i) { 9179 for_each_possible_cpu(i) {
@@ -9116,7 +9181,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
9116 struct rq *rq = rq_of(cfs_rq); 9181 struct rq *rq = rq_of(cfs_rq);
9117 9182
9118 raw_spin_lock_irq(&rq->lock); 9183 raw_spin_lock_irq(&rq->lock);
9119 cfs_rq->runtime_enabled = quota != RUNTIME_INF; 9184 cfs_rq->runtime_enabled = runtime_enabled;
9120 cfs_rq->runtime_remaining = 0; 9185 cfs_rq->runtime_remaining = 0;
9121 raw_spin_unlock_irq(&rq->lock); 9186 raw_spin_unlock_irq(&rq->lock);
9122 } 9187 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9502aa899f73..af73a8a85eef 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1284,9 +1284,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1284 raw_spin_lock(&cfs_b->lock); 1284 raw_spin_lock(&cfs_b->lock);
1285 if (cfs_b->quota == RUNTIME_INF) 1285 if (cfs_b->quota == RUNTIME_INF)
1286 amount = min_amount; 1286 amount = min_amount;
1287 else if (cfs_b->runtime > 0) { 1287 else {
1288 amount = min(cfs_b->runtime, min_amount); 1288 /* ensure bandwidth timer remains active under consumption */
1289 cfs_b->runtime -= amount; 1289 if (!cfs_b->timer_active)
1290 __start_cfs_bandwidth(cfs_b);
1291
1292 if (cfs_b->runtime > 0) {
1293 amount = min(cfs_b->runtime, min_amount);
1294 cfs_b->runtime -= amount;
1295 cfs_b->idle = 0;
1296 }
1290 } 1297 }
1291 raw_spin_unlock(&cfs_b->lock); 1298 raw_spin_unlock(&cfs_b->lock);
1292 1299
@@ -1315,6 +1322,33 @@ static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
1315 __account_cfs_rq_runtime(cfs_rq, delta_exec); 1322 __account_cfs_rq_runtime(cfs_rq, delta_exec);
1316} 1323}
1317 1324
1325/*
1326 * Responsible for refilling a task_group's bandwidth and unthrottling its
1327 * cfs_rqs as appropriate. If there has been no activity within the last
1328 * period the timer is deactivated until scheduling resumes; cfs_b->idle is
1329 * used to track this state.
1330 */
1331static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
1332{
1333 int idle = 1;
1334
1335 raw_spin_lock(&cfs_b->lock);
1336 /* no need to continue the timer with no bandwidth constraint */
1337 if (cfs_b->quota == RUNTIME_INF)
1338 goto out_unlock;
1339
1340 idle = cfs_b->idle;
1341 cfs_b->runtime = cfs_b->quota;
1342
1343 /* mark as potentially idle for the upcoming period */
1344 cfs_b->idle = 1;
1345out_unlock:
1346 if (idle)
1347 cfs_b->timer_active = 0;
1348 raw_spin_unlock(&cfs_b->lock);
1349
1350 return idle;
1351}
1318#else 1352#else
1319static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, 1353static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
1320 unsigned long delta_exec) {} 1354 unsigned long delta_exec) {}