aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_rt.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r--kernel/sched_rt.c227
1 files changed, 176 insertions, 51 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 0a6d2e516420..c2730a5a4f05 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -62,7 +62,12 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
62 if (!rt_rq->tg) 62 if (!rt_rq->tg)
63 return RUNTIME_INF; 63 return RUNTIME_INF;
64 64
65 return rt_rq->tg->rt_runtime; 65 return rt_rq->rt_runtime;
66}
67
68static inline u64 sched_rt_period(struct rt_rq *rt_rq)
69{
70 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
66} 71}
67 72
68#define for_each_leaf_rt_rq(rt_rq, rq) \ 73#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -127,14 +132,39 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
127 return p->prio != p->normal_prio; 132 return p->prio != p->normal_prio;
128} 133}
129 134
135#ifdef CONFIG_SMP
136static inline cpumask_t sched_rt_period_mask(void)
137{
138 return cpu_rq(smp_processor_id())->rd->span;
139}
140#else
141static inline cpumask_t sched_rt_period_mask(void)
142{
143 return cpu_online_map;
144}
145#endif
146
147static inline
148struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
149{
150 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
151}
152
153static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
154{
155 return &rt_rq->tg->rt_bandwidth;
156}
157
130#else 158#else
131 159
132static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 160static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
133{ 161{
134 if (sysctl_sched_rt_runtime == -1) 162 return rt_rq->rt_runtime;
135 return RUNTIME_INF; 163}
136 164
137 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; 165static inline u64 sched_rt_period(struct rt_rq *rt_rq)
166{
167 return ktime_to_ns(def_rt_bandwidth.rt_period);
138} 168}
139 169
140#define for_each_leaf_rt_rq(rt_rq, rq) \ 170#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -173,6 +203,102 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
173{ 203{
174 return rt_rq->rt_throttled; 204 return rt_rq->rt_throttled;
175} 205}
206
207static inline cpumask_t sched_rt_period_mask(void)
208{
209 return cpu_online_map;
210}
211
212static inline
213struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
214{
215 return &cpu_rq(cpu)->rt;
216}
217
218static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
219{
220 return &def_rt_bandwidth;
221}
222
223#endif
224
225static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
226{
227 int i, idle = 1;
228 cpumask_t span;
229
230 if (rt_b->rt_runtime == RUNTIME_INF)
231 return 1;
232
233 span = sched_rt_period_mask();
234 for_each_cpu_mask(i, span) {
235 int enqueue = 0;
236 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
237 struct rq *rq = rq_of_rt_rq(rt_rq);
238
239 spin_lock(&rq->lock);
240 if (rt_rq->rt_time) {
241 u64 runtime;
242
243 spin_lock(&rt_rq->rt_runtime_lock);
244 runtime = rt_rq->rt_runtime;
245 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
246 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
247 rt_rq->rt_throttled = 0;
248 enqueue = 1;
249 }
250 if (rt_rq->rt_time || rt_rq->rt_nr_running)
251 idle = 0;
252 spin_unlock(&rt_rq->rt_runtime_lock);
253 }
254
255 if (enqueue)
256 sched_rt_rq_enqueue(rt_rq);
257 spin_unlock(&rq->lock);
258 }
259
260 return idle;
261}
262
263#ifdef CONFIG_SMP
264static int balance_runtime(struct rt_rq *rt_rq)
265{
266 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
267 struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
268 int i, weight, more = 0;
269 u64 rt_period;
270
271 weight = cpus_weight(rd->span);
272
273 spin_lock(&rt_b->rt_runtime_lock);
274 rt_period = ktime_to_ns(rt_b->rt_period);
275 for_each_cpu_mask(i, rd->span) {
276 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
277 s64 diff;
278
279 if (iter == rt_rq)
280 continue;
281
282 spin_lock(&iter->rt_runtime_lock);
283 diff = iter->rt_runtime - iter->rt_time;
284 if (diff > 0) {
285 do_div(diff, weight);
286 if (rt_rq->rt_runtime + diff > rt_period)
287 diff = rt_period - rt_rq->rt_runtime;
288 iter->rt_runtime -= diff;
289 rt_rq->rt_runtime += diff;
290 more = 1;
291 if (rt_rq->rt_runtime == rt_period) {
292 spin_unlock(&iter->rt_runtime_lock);
293 break;
294 }
295 }
296 spin_unlock(&iter->rt_runtime_lock);
297 }
298 spin_unlock(&rt_b->rt_runtime_lock);
299
300 return more;
301}
176#endif 302#endif
177 303
178static inline int rt_se_prio(struct sched_rt_entity *rt_se) 304static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -197,12 +323,24 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
197 if (rt_rq->rt_throttled) 323 if (rt_rq->rt_throttled)
198 return rt_rq_throttled(rt_rq); 324 return rt_rq_throttled(rt_rq);
199 325
326 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
327 return 0;
328
329#ifdef CONFIG_SMP
200 if (rt_rq->rt_time > runtime) { 330 if (rt_rq->rt_time > runtime) {
201 struct rq *rq = rq_of_rt_rq(rt_rq); 331 int more;
202 332
203 rq->rt_throttled = 1; 333 spin_unlock(&rt_rq->rt_runtime_lock);
204 rt_rq->rt_throttled = 1; 334 more = balance_runtime(rt_rq);
335 spin_lock(&rt_rq->rt_runtime_lock);
205 336
337 if (more)
338 runtime = sched_rt_runtime(rt_rq);
339 }
340#endif
341
342 if (rt_rq->rt_time > runtime) {
343 rt_rq->rt_throttled = 1;
206 if (rt_rq_throttled(rt_rq)) { 344 if (rt_rq_throttled(rt_rq)) {
207 sched_rt_rq_dequeue(rt_rq); 345 sched_rt_rq_dequeue(rt_rq);
208 return 1; 346 return 1;
@@ -212,29 +350,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
212 return 0; 350 return 0;
213} 351}
214 352
215static void update_sched_rt_period(struct rq *rq)
216{
217 struct rt_rq *rt_rq;
218 u64 period;
219
220 while (rq->clock > rq->rt_period_expire) {
221 period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
222 rq->rt_period_expire += period;
223
224 for_each_leaf_rt_rq(rt_rq, rq) {
225 u64 runtime = sched_rt_runtime(rt_rq);
226
227 rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
228 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
229 rt_rq->rt_throttled = 0;
230 sched_rt_rq_enqueue(rt_rq);
231 }
232 }
233
234 rq->rt_throttled = 0;
235 }
236}
237
238/* 353/*
239 * Update the current task's runtime statistics. Skip current tasks that 354 * Update the current task's runtime statistics. Skip current tasks that
240 * are not in our scheduling class. 355 * are not in our scheduling class.
@@ -259,9 +374,15 @@ static void update_curr_rt(struct rq *rq)
259 curr->se.exec_start = rq->clock; 374 curr->se.exec_start = rq->clock;
260 cpuacct_charge(curr, delta_exec); 375 cpuacct_charge(curr, delta_exec);
261 376
262 rt_rq->rt_time += delta_exec; 377 for_each_sched_rt_entity(rt_se) {
263 if (sched_rt_runtime_exceeded(rt_rq)) 378 rt_rq = rt_rq_of_se(rt_se);
264 resched_task(curr); 379
380 spin_lock(&rt_rq->rt_runtime_lock);
381 rt_rq->rt_time += delta_exec;
382 if (sched_rt_runtime_exceeded(rt_rq))
383 resched_task(curr);
384 spin_unlock(&rt_rq->rt_runtime_lock);
385 }
265} 386}
266 387
267static inline 388static inline
@@ -284,6 +405,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
284#ifdef CONFIG_RT_GROUP_SCHED 405#ifdef CONFIG_RT_GROUP_SCHED
285 if (rt_se_boosted(rt_se)) 406 if (rt_se_boosted(rt_se))
286 rt_rq->rt_nr_boosted++; 407 rt_rq->rt_nr_boosted++;
408
409 if (rt_rq->tg)
410 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
411#else
412 start_rt_bandwidth(&def_rt_bandwidth);
287#endif 413#endif
288} 414}
289 415
@@ -353,27 +479,21 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
353/* 479/*
354 * Because the prio of an upper entry depends on the lower 480 * Because the prio of an upper entry depends on the lower
355 * entries, we must remove entries top - down. 481 * entries, we must remove entries top - down.
356 *
357 * XXX: O(1/2 h^2) because we can only walk up, not down the chain.
358 * doesn't matter much for now, as h=2 for GROUP_SCHED.
359 */ 482 */
360static void dequeue_rt_stack(struct task_struct *p) 483static void dequeue_rt_stack(struct task_struct *p)
361{ 484{
362 struct sched_rt_entity *rt_se, *top_se; 485 struct sched_rt_entity *rt_se, *back = NULL;
363 486
364 /* 487 rt_se = &p->rt;
365 * dequeue all, top - down. 488 for_each_sched_rt_entity(rt_se) {
366 */ 489 rt_se->back = back;
367 do { 490 back = rt_se;
368 rt_se = &p->rt; 491 }
369 top_se = NULL; 492
370 for_each_sched_rt_entity(rt_se) { 493 for (rt_se = back; rt_se; rt_se = rt_se->back) {
371 if (on_rt_rq(rt_se)) 494 if (on_rt_rq(rt_se))
372 top_se = rt_se; 495 dequeue_rt_entity(rt_se);
373 } 496 }
374 if (top_se)
375 dequeue_rt_entity(top_se);
376 } while (top_se);
377} 497}
378 498
379/* 499/*
@@ -393,6 +513,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
393 */ 513 */
394 for_each_sched_rt_entity(rt_se) 514 for_each_sched_rt_entity(rt_se)
395 enqueue_rt_entity(rt_se); 515 enqueue_rt_entity(rt_se);
516
517 inc_cpu_load(rq, p->se.load.weight);
396} 518}
397 519
398static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) 520static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -412,6 +534,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
412 if (rt_rq && rt_rq->rt_nr_running) 534 if (rt_rq && rt_rq->rt_nr_running)
413 enqueue_rt_entity(rt_se); 535 enqueue_rt_entity(rt_se);
414 } 536 }
537
538 dec_cpu_load(rq, p->se.load.weight);
415} 539}
416 540
417/* 541/*
@@ -1001,7 +1125,8 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1001 return 0; 1125 return 0;
1002} 1126}
1003 1127
1004static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask) 1128static void set_cpus_allowed_rt(struct task_struct *p,
1129 const cpumask_t *new_mask)
1005{ 1130{
1006 int weight = cpus_weight(*new_mask); 1131 int weight = cpus_weight(*new_mask);
1007 1132