diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2011-11-15 11:14:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-11-17 06:20:22 -0500 |
commit | 391e43da797a96aeb65410281891f6d0b0e9611c (patch) | |
tree | 0ce6784525a5a8f75b377170cf1a7d60abccea29 /kernel/sched/rt.c | |
parent | 029632fbb7b7c9d85063cc9eb470de6c54873df3 (diff) |
sched: Move all scheduler bits into kernel/sched/
There's too many sched*.[ch] files in kernel/, give them their own
directory.
(No code changed, other than Makefile glue added.)
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched/rt.c')
-rw-r--r-- | kernel/sched/rt.c | 2045 |
1 files changed, 2045 insertions, 0 deletions
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c new file mode 100644 index 000000000000..023b35502509 --- /dev/null +++ b/kernel/sched/rt.c | |||
@@ -0,0 +1,2045 @@ | |||
1 | /* | ||
2 | * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR | ||
3 | * policies) | ||
4 | */ | ||
5 | |||
6 | #include "sched.h" | ||
7 | |||
8 | #include <linux/slab.h> | ||
9 | |||
10 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); | ||
11 | |||
12 | struct rt_bandwidth def_rt_bandwidth; | ||
13 | |||
14 | static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) | ||
15 | { | ||
16 | struct rt_bandwidth *rt_b = | ||
17 | container_of(timer, struct rt_bandwidth, rt_period_timer); | ||
18 | ktime_t now; | ||
19 | int overrun; | ||
20 | int idle = 0; | ||
21 | |||
22 | for (;;) { | ||
23 | now = hrtimer_cb_get_time(timer); | ||
24 | overrun = hrtimer_forward(timer, now, rt_b->rt_period); | ||
25 | |||
26 | if (!overrun) | ||
27 | break; | ||
28 | |||
29 | idle = do_sched_rt_period_timer(rt_b, overrun); | ||
30 | } | ||
31 | |||
32 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
33 | } | ||
34 | |||
35 | void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | ||
36 | { | ||
37 | rt_b->rt_period = ns_to_ktime(period); | ||
38 | rt_b->rt_runtime = runtime; | ||
39 | |||
40 | raw_spin_lock_init(&rt_b->rt_runtime_lock); | ||
41 | |||
42 | hrtimer_init(&rt_b->rt_period_timer, | ||
43 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
44 | rt_b->rt_period_timer.function = sched_rt_period_timer; | ||
45 | } | ||
46 | |||
47 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
48 | { | ||
49 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | ||
50 | return; | ||
51 | |||
52 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
53 | return; | ||
54 | |||
55 | raw_spin_lock(&rt_b->rt_runtime_lock); | ||
56 | start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); | ||
57 | raw_spin_unlock(&rt_b->rt_runtime_lock); | ||
58 | } | ||
59 | |||
60 | void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | ||
61 | { | ||
62 | struct rt_prio_array *array; | ||
63 | int i; | ||
64 | |||
65 | array = &rt_rq->active; | ||
66 | for (i = 0; i < MAX_RT_PRIO; i++) { | ||
67 | INIT_LIST_HEAD(array->queue + i); | ||
68 | __clear_bit(i, array->bitmap); | ||
69 | } | ||
70 | /* delimiter for bitsearch: */ | ||
71 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
72 | |||
73 | #if defined CONFIG_SMP | ||
74 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | ||
75 | rt_rq->highest_prio.next = MAX_RT_PRIO; | ||
76 | rt_rq->rt_nr_migratory = 0; | ||
77 | rt_rq->overloaded = 0; | ||
78 | plist_head_init(&rt_rq->pushable_tasks); | ||
79 | #endif | ||
80 | |||
81 | rt_rq->rt_time = 0; | ||
82 | rt_rq->rt_throttled = 0; | ||
83 | rt_rq->rt_runtime = 0; | ||
84 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); | ||
85 | } | ||
86 | |||
87 | #ifdef CONFIG_RT_GROUP_SCHED | ||
88 | static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
89 | { | ||
90 | hrtimer_cancel(&rt_b->rt_period_timer); | ||
91 | } | ||
92 | |||
93 | #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) | ||
94 | |||
95 | static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) | ||
96 | { | ||
97 | #ifdef CONFIG_SCHED_DEBUG | ||
98 | WARN_ON_ONCE(!rt_entity_is_task(rt_se)); | ||
99 | #endif | ||
100 | return container_of(rt_se, struct task_struct, rt); | ||
101 | } | ||
102 | |||
103 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) | ||
104 | { | ||
105 | return rt_rq->rq; | ||
106 | } | ||
107 | |||
108 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | ||
109 | { | ||
110 | return rt_se->rt_rq; | ||
111 | } | ||
112 | |||
113 | void free_rt_sched_group(struct task_group *tg) | ||
114 | { | ||
115 | int i; | ||
116 | |||
117 | if (tg->rt_se) | ||
118 | destroy_rt_bandwidth(&tg->rt_bandwidth); | ||
119 | |||
120 | for_each_possible_cpu(i) { | ||
121 | if (tg->rt_rq) | ||
122 | kfree(tg->rt_rq[i]); | ||
123 | if (tg->rt_se) | ||
124 | kfree(tg->rt_se[i]); | ||
125 | } | ||
126 | |||
127 | kfree(tg->rt_rq); | ||
128 | kfree(tg->rt_se); | ||
129 | } | ||
130 | |||
131 | void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | ||
132 | struct sched_rt_entity *rt_se, int cpu, | ||
133 | struct sched_rt_entity *parent) | ||
134 | { | ||
135 | struct rq *rq = cpu_rq(cpu); | ||
136 | |||
137 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | ||
138 | rt_rq->rt_nr_boosted = 0; | ||
139 | rt_rq->rq = rq; | ||
140 | rt_rq->tg = tg; | ||
141 | |||
142 | tg->rt_rq[cpu] = rt_rq; | ||
143 | tg->rt_se[cpu] = rt_se; | ||
144 | |||
145 | if (!rt_se) | ||
146 | return; | ||
147 | |||
148 | if (!parent) | ||
149 | rt_se->rt_rq = &rq->rt; | ||
150 | else | ||
151 | rt_se->rt_rq = parent->my_q; | ||
152 | |||
153 | rt_se->my_q = rt_rq; | ||
154 | rt_se->parent = parent; | ||
155 | INIT_LIST_HEAD(&rt_se->run_list); | ||
156 | } | ||
157 | |||
158 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | ||
159 | { | ||
160 | struct rt_rq *rt_rq; | ||
161 | struct sched_rt_entity *rt_se; | ||
162 | int i; | ||
163 | |||
164 | tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); | ||
165 | if (!tg->rt_rq) | ||
166 | goto err; | ||
167 | tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL); | ||
168 | if (!tg->rt_se) | ||
169 | goto err; | ||
170 | |||
171 | init_rt_bandwidth(&tg->rt_bandwidth, | ||
172 | ktime_to_ns(def_rt_bandwidth.rt_period), 0); | ||
173 | |||
174 | for_each_possible_cpu(i) { | ||
175 | rt_rq = kzalloc_node(sizeof(struct rt_rq), | ||
176 | GFP_KERNEL, cpu_to_node(i)); | ||
177 | if (!rt_rq) | ||
178 | goto err; | ||
179 | |||
180 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | ||
181 | GFP_KERNEL, cpu_to_node(i)); | ||
182 | if (!rt_se) | ||
183 | goto err_free_rq; | ||
184 | |||
185 | init_rt_rq(rt_rq, cpu_rq(i)); | ||
186 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
187 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); | ||
188 | } | ||
189 | |||
190 | return 1; | ||
191 | |||
192 | err_free_rq: | ||
193 | kfree(rt_rq); | ||
194 | err: | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | #else /* CONFIG_RT_GROUP_SCHED */ | ||
199 | |||
200 | #define rt_entity_is_task(rt_se) (1) | ||
201 | |||
202 | static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) | ||
203 | { | ||
204 | return container_of(rt_se, struct task_struct, rt); | ||
205 | } | ||
206 | |||
207 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) | ||
208 | { | ||
209 | return container_of(rt_rq, struct rq, rt); | ||
210 | } | ||
211 | |||
212 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | ||
213 | { | ||
214 | struct task_struct *p = rt_task_of(rt_se); | ||
215 | struct rq *rq = task_rq(p); | ||
216 | |||
217 | return &rq->rt; | ||
218 | } | ||
219 | |||
220 | void free_rt_sched_group(struct task_group *tg) { } | ||
221 | |||
222 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | ||
223 | { | ||
224 | return 1; | ||
225 | } | ||
226 | #endif /* CONFIG_RT_GROUP_SCHED */ | ||
227 | |||
228 | #ifdef CONFIG_SMP | ||
229 | |||
230 | static inline int rt_overloaded(struct rq *rq) | ||
231 | { | ||
232 | return atomic_read(&rq->rd->rto_count); | ||
233 | } | ||
234 | |||
235 | static inline void rt_set_overload(struct rq *rq) | ||
236 | { | ||
237 | if (!rq->online) | ||
238 | return; | ||
239 | |||
240 | cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); | ||
241 | /* | ||
242 | * Make sure the mask is visible before we set | ||
243 | * the overload count. That is checked to determine | ||
244 | * if we should look at the mask. It would be a shame | ||
245 | * if we looked at the mask, but the mask was not | ||
246 | * updated yet. | ||
247 | */ | ||
248 | wmb(); | ||
249 | atomic_inc(&rq->rd->rto_count); | ||
250 | } | ||
251 | |||
252 | static inline void rt_clear_overload(struct rq *rq) | ||
253 | { | ||
254 | if (!rq->online) | ||
255 | return; | ||
256 | |||
257 | /* the order here really doesn't matter */ | ||
258 | atomic_dec(&rq->rd->rto_count); | ||
259 | cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); | ||
260 | } | ||
261 | |||
262 | static void update_rt_migration(struct rt_rq *rt_rq) | ||
263 | { | ||
264 | if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { | ||
265 | if (!rt_rq->overloaded) { | ||
266 | rt_set_overload(rq_of_rt_rq(rt_rq)); | ||
267 | rt_rq->overloaded = 1; | ||
268 | } | ||
269 | } else if (rt_rq->overloaded) { | ||
270 | rt_clear_overload(rq_of_rt_rq(rt_rq)); | ||
271 | rt_rq->overloaded = 0; | ||
272 | } | ||
273 | } | ||
274 | |||
275 | static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
276 | { | ||
277 | if (!rt_entity_is_task(rt_se)) | ||
278 | return; | ||
279 | |||
280 | rt_rq = &rq_of_rt_rq(rt_rq)->rt; | ||
281 | |||
282 | rt_rq->rt_nr_total++; | ||
283 | if (rt_se->nr_cpus_allowed > 1) | ||
284 | rt_rq->rt_nr_migratory++; | ||
285 | |||
286 | update_rt_migration(rt_rq); | ||
287 | } | ||
288 | |||
289 | static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
290 | { | ||
291 | if (!rt_entity_is_task(rt_se)) | ||
292 | return; | ||
293 | |||
294 | rt_rq = &rq_of_rt_rq(rt_rq)->rt; | ||
295 | |||
296 | rt_rq->rt_nr_total--; | ||
297 | if (rt_se->nr_cpus_allowed > 1) | ||
298 | rt_rq->rt_nr_migratory--; | ||
299 | |||
300 | update_rt_migration(rt_rq); | ||
301 | } | ||
302 | |||
303 | static inline int has_pushable_tasks(struct rq *rq) | ||
304 | { | ||
305 | return !plist_head_empty(&rq->rt.pushable_tasks); | ||
306 | } | ||
307 | |||
308 | static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) | ||
309 | { | ||
310 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); | ||
311 | plist_node_init(&p->pushable_tasks, p->prio); | ||
312 | plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); | ||
313 | |||
314 | /* Update the highest prio pushable task */ | ||
315 | if (p->prio < rq->rt.highest_prio.next) | ||
316 | rq->rt.highest_prio.next = p->prio; | ||
317 | } | ||
318 | |||
319 | static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) | ||
320 | { | ||
321 | plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); | ||
322 | |||
323 | /* Update the new highest prio pushable task */ | ||
324 | if (has_pushable_tasks(rq)) { | ||
325 | p = plist_first_entry(&rq->rt.pushable_tasks, | ||
326 | struct task_struct, pushable_tasks); | ||
327 | rq->rt.highest_prio.next = p->prio; | ||
328 | } else | ||
329 | rq->rt.highest_prio.next = MAX_RT_PRIO; | ||
330 | } | ||
331 | |||
332 | #else | ||
333 | |||
334 | static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) | ||
335 | { | ||
336 | } | ||
337 | |||
338 | static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) | ||
339 | { | ||
340 | } | ||
341 | |||
342 | static inline | ||
343 | void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
344 | { | ||
345 | } | ||
346 | |||
347 | static inline | ||
348 | void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
349 | { | ||
350 | } | ||
351 | |||
352 | #endif /* CONFIG_SMP */ | ||
353 | |||
354 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) | ||
355 | { | ||
356 | return !list_empty(&rt_se->run_list); | ||
357 | } | ||
358 | |||
359 | #ifdef CONFIG_RT_GROUP_SCHED | ||
360 | |||
361 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | ||
362 | { | ||
363 | if (!rt_rq->tg) | ||
364 | return RUNTIME_INF; | ||
365 | |||
366 | return rt_rq->rt_runtime; | ||
367 | } | ||
368 | |||
369 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) | ||
370 | { | ||
371 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); | ||
372 | } | ||
373 | |||
374 | typedef struct task_group *rt_rq_iter_t; | ||
375 | |||
376 | static inline struct task_group *next_task_group(struct task_group *tg) | ||
377 | { | ||
378 | do { | ||
379 | tg = list_entry_rcu(tg->list.next, | ||
380 | typeof(struct task_group), list); | ||
381 | } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); | ||
382 | |||
383 | if (&tg->list == &task_groups) | ||
384 | tg = NULL; | ||
385 | |||
386 | return tg; | ||
387 | } | ||
388 | |||
389 | #define for_each_rt_rq(rt_rq, iter, rq) \ | ||
390 | for (iter = container_of(&task_groups, typeof(*iter), list); \ | ||
391 | (iter = next_task_group(iter)) && \ | ||
392 | (rt_rq = iter->rt_rq[cpu_of(rq)]);) | ||
393 | |||
394 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | ||
395 | { | ||
396 | list_add_rcu(&rt_rq->leaf_rt_rq_list, | ||
397 | &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list); | ||
398 | } | ||
399 | |||
400 | static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) | ||
401 | { | ||
402 | list_del_rcu(&rt_rq->leaf_rt_rq_list); | ||
403 | } | ||
404 | |||
405 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | ||
406 | list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) | ||
407 | |||
408 | #define for_each_sched_rt_entity(rt_se) \ | ||
409 | for (; rt_se; rt_se = rt_se->parent) | ||
410 | |||
411 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | ||
412 | { | ||
413 | return rt_se->my_q; | ||
414 | } | ||
415 | |||
416 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head); | ||
417 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | ||
418 | |||
419 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | ||
420 | { | ||
421 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; | ||
422 | struct sched_rt_entity *rt_se; | ||
423 | |||
424 | int cpu = cpu_of(rq_of_rt_rq(rt_rq)); | ||
425 | |||
426 | rt_se = rt_rq->tg->rt_se[cpu]; | ||
427 | |||
428 | if (rt_rq->rt_nr_running) { | ||
429 | if (rt_se && !on_rt_rq(rt_se)) | ||
430 | enqueue_rt_entity(rt_se, false); | ||
431 | if (rt_rq->highest_prio.curr < curr->prio) | ||
432 | resched_task(curr); | ||
433 | } | ||
434 | } | ||
435 | |||
436 | static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | ||
437 | { | ||
438 | struct sched_rt_entity *rt_se; | ||
439 | int cpu = cpu_of(rq_of_rt_rq(rt_rq)); | ||
440 | |||
441 | rt_se = rt_rq->tg->rt_se[cpu]; | ||
442 | |||
443 | if (rt_se && on_rt_rq(rt_se)) | ||
444 | dequeue_rt_entity(rt_se); | ||
445 | } | ||
446 | |||
447 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
448 | { | ||
449 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
450 | } | ||
451 | |||
452 | static int rt_se_boosted(struct sched_rt_entity *rt_se) | ||
453 | { | ||
454 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
455 | struct task_struct *p; | ||
456 | |||
457 | if (rt_rq) | ||
458 | return !!rt_rq->rt_nr_boosted; | ||
459 | |||
460 | p = rt_task_of(rt_se); | ||
461 | return p->prio != p->normal_prio; | ||
462 | } | ||
463 | |||
464 | #ifdef CONFIG_SMP | ||
465 | static inline const struct cpumask *sched_rt_period_mask(void) | ||
466 | { | ||
467 | return cpu_rq(smp_processor_id())->rd->span; | ||
468 | } | ||
469 | #else | ||
470 | static inline const struct cpumask *sched_rt_period_mask(void) | ||
471 | { | ||
472 | return cpu_online_mask; | ||
473 | } | ||
474 | #endif | ||
475 | |||
476 | static inline | ||
477 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | ||
478 | { | ||
479 | return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; | ||
480 | } | ||
481 | |||
482 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | ||
483 | { | ||
484 | return &rt_rq->tg->rt_bandwidth; | ||
485 | } | ||
486 | |||
487 | #else /* !CONFIG_RT_GROUP_SCHED */ | ||
488 | |||
489 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | ||
490 | { | ||
491 | return rt_rq->rt_runtime; | ||
492 | } | ||
493 | |||
494 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) | ||
495 | { | ||
496 | return ktime_to_ns(def_rt_bandwidth.rt_period); | ||
497 | } | ||
498 | |||
499 | typedef struct rt_rq *rt_rq_iter_t; | ||
500 | |||
501 | #define for_each_rt_rq(rt_rq, iter, rq) \ | ||
502 | for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) | ||
503 | |||
504 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | ||
505 | { | ||
506 | } | ||
507 | |||
508 | static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq) | ||
509 | { | ||
510 | } | ||
511 | |||
512 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | ||
513 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) | ||
514 | |||
515 | #define for_each_sched_rt_entity(rt_se) \ | ||
516 | for (; rt_se; rt_se = NULL) | ||
517 | |||
518 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | ||
519 | { | ||
520 | return NULL; | ||
521 | } | ||
522 | |||
523 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | ||
524 | { | ||
525 | if (rt_rq->rt_nr_running) | ||
526 | resched_task(rq_of_rt_rq(rt_rq)->curr); | ||
527 | } | ||
528 | |||
529 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | ||
530 | { | ||
531 | } | ||
532 | |||
533 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
534 | { | ||
535 | return rt_rq->rt_throttled; | ||
536 | } | ||
537 | |||
538 | static inline const struct cpumask *sched_rt_period_mask(void) | ||
539 | { | ||
540 | return cpu_online_mask; | ||
541 | } | ||
542 | |||
543 | static inline | ||
544 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | ||
545 | { | ||
546 | return &cpu_rq(cpu)->rt; | ||
547 | } | ||
548 | |||
549 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | ||
550 | { | ||
551 | return &def_rt_bandwidth; | ||
552 | } | ||
553 | |||
554 | #endif /* CONFIG_RT_GROUP_SCHED */ | ||
555 | |||
556 | #ifdef CONFIG_SMP | ||
557 | /* | ||
558 | * We ran out of runtime, see if we can borrow some from our neighbours. | ||
559 | */ | ||
560 | static int do_balance_runtime(struct rt_rq *rt_rq) | ||
561 | { | ||
562 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
563 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
564 | int i, weight, more = 0; | ||
565 | u64 rt_period; | ||
566 | |||
567 | weight = cpumask_weight(rd->span); | ||
568 | |||
569 | raw_spin_lock(&rt_b->rt_runtime_lock); | ||
570 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
571 | for_each_cpu(i, rd->span) { | ||
572 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
573 | s64 diff; | ||
574 | |||
575 | if (iter == rt_rq) | ||
576 | continue; | ||
577 | |||
578 | raw_spin_lock(&iter->rt_runtime_lock); | ||
579 | /* | ||
580 | * Either all rqs have inf runtime and there's nothing to steal | ||
581 | * or __disable_runtime() below sets a specific rq to inf to | ||
582 | * indicate its been disabled and disalow stealing. | ||
583 | */ | ||
584 | if (iter->rt_runtime == RUNTIME_INF) | ||
585 | goto next; | ||
586 | |||
587 | /* | ||
588 | * From runqueues with spare time, take 1/n part of their | ||
589 | * spare time, but no more than our period. | ||
590 | */ | ||
591 | diff = iter->rt_runtime - iter->rt_time; | ||
592 | if (diff > 0) { | ||
593 | diff = div_u64((u64)diff, weight); | ||
594 | if (rt_rq->rt_runtime + diff > rt_period) | ||
595 | diff = rt_period - rt_rq->rt_runtime; | ||
596 | iter->rt_runtime -= diff; | ||
597 | rt_rq->rt_runtime += diff; | ||
598 | more = 1; | ||
599 | if (rt_rq->rt_runtime == rt_period) { | ||
600 | raw_spin_unlock(&iter->rt_runtime_lock); | ||
601 | break; | ||
602 | } | ||
603 | } | ||
604 | next: | ||
605 | raw_spin_unlock(&iter->rt_runtime_lock); | ||
606 | } | ||
607 | raw_spin_unlock(&rt_b->rt_runtime_lock); | ||
608 | |||
609 | return more; | ||
610 | } | ||
611 | |||
612 | /* | ||
613 | * Ensure this RQ takes back all the runtime it lend to its neighbours. | ||
614 | */ | ||
615 | static void __disable_runtime(struct rq *rq) | ||
616 | { | ||
617 | struct root_domain *rd = rq->rd; | ||
618 | rt_rq_iter_t iter; | ||
619 | struct rt_rq *rt_rq; | ||
620 | |||
621 | if (unlikely(!scheduler_running)) | ||
622 | return; | ||
623 | |||
624 | for_each_rt_rq(rt_rq, iter, rq) { | ||
625 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
626 | s64 want; | ||
627 | int i; | ||
628 | |||
629 | raw_spin_lock(&rt_b->rt_runtime_lock); | ||
630 | raw_spin_lock(&rt_rq->rt_runtime_lock); | ||
631 | /* | ||
632 | * Either we're all inf and nobody needs to borrow, or we're | ||
633 | * already disabled and thus have nothing to do, or we have | ||
634 | * exactly the right amount of runtime to take out. | ||
635 | */ | ||
636 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
637 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
638 | goto balanced; | ||
639 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | ||
640 | |||
641 | /* | ||
642 | * Calculate the difference between what we started out with | ||
643 | * and what we current have, that's the amount of runtime | ||
644 | * we lend and now have to reclaim. | ||
645 | */ | ||
646 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
647 | |||
648 | /* | ||
649 | * Greedy reclaim, take back as much as we can. | ||
650 | */ | ||
651 | for_each_cpu(i, rd->span) { | ||
652 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
653 | s64 diff; | ||
654 | |||
655 | /* | ||
656 | * Can't reclaim from ourselves or disabled runqueues. | ||
657 | */ | ||
658 | if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) | ||
659 | continue; | ||
660 | |||
661 | raw_spin_lock(&iter->rt_runtime_lock); | ||
662 | if (want > 0) { | ||
663 | diff = min_t(s64, iter->rt_runtime, want); | ||
664 | iter->rt_runtime -= diff; | ||
665 | want -= diff; | ||
666 | } else { | ||
667 | iter->rt_runtime -= want; | ||
668 | want -= want; | ||
669 | } | ||
670 | raw_spin_unlock(&iter->rt_runtime_lock); | ||
671 | |||
672 | if (!want) | ||
673 | break; | ||
674 | } | ||
675 | |||
676 | raw_spin_lock(&rt_rq->rt_runtime_lock); | ||
677 | /* | ||
678 | * We cannot be left wanting - that would mean some runtime | ||
679 | * leaked out of the system. | ||
680 | */ | ||
681 | BUG_ON(want); | ||
682 | balanced: | ||
683 | /* | ||
684 | * Disable all the borrow logic by pretending we have inf | ||
685 | * runtime - in which case borrowing doesn't make sense. | ||
686 | */ | ||
687 | rt_rq->rt_runtime = RUNTIME_INF; | ||
688 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | ||
689 | raw_spin_unlock(&rt_b->rt_runtime_lock); | ||
690 | } | ||
691 | } | ||
692 | |||
693 | static void disable_runtime(struct rq *rq) | ||
694 | { | ||
695 | unsigned long flags; | ||
696 | |||
697 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
698 | __disable_runtime(rq); | ||
699 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
700 | } | ||
701 | |||
702 | static void __enable_runtime(struct rq *rq) | ||
703 | { | ||
704 | rt_rq_iter_t iter; | ||
705 | struct rt_rq *rt_rq; | ||
706 | |||
707 | if (unlikely(!scheduler_running)) | ||
708 | return; | ||
709 | |||
710 | /* | ||
711 | * Reset each runqueue's bandwidth settings | ||
712 | */ | ||
713 | for_each_rt_rq(rt_rq, iter, rq) { | ||
714 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
715 | |||
716 | raw_spin_lock(&rt_b->rt_runtime_lock); | ||
717 | raw_spin_lock(&rt_rq->rt_runtime_lock); | ||
718 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
719 | rt_rq->rt_time = 0; | ||
720 | rt_rq->rt_throttled = 0; | ||
721 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | ||
722 | raw_spin_unlock(&rt_b->rt_runtime_lock); | ||
723 | } | ||
724 | } | ||
725 | |||
726 | static void enable_runtime(struct rq *rq) | ||
727 | { | ||
728 | unsigned long flags; | ||
729 | |||
730 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
731 | __enable_runtime(rq); | ||
732 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
733 | } | ||
734 | |||
735 | int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
736 | { | ||
737 | int cpu = (int)(long)hcpu; | ||
738 | |||
739 | switch (action) { | ||
740 | case CPU_DOWN_PREPARE: | ||
741 | case CPU_DOWN_PREPARE_FROZEN: | ||
742 | disable_runtime(cpu_rq(cpu)); | ||
743 | return NOTIFY_OK; | ||
744 | |||
745 | case CPU_DOWN_FAILED: | ||
746 | case CPU_DOWN_FAILED_FROZEN: | ||
747 | case CPU_ONLINE: | ||
748 | case CPU_ONLINE_FROZEN: | ||
749 | enable_runtime(cpu_rq(cpu)); | ||
750 | return NOTIFY_OK; | ||
751 | |||
752 | default: | ||
753 | return NOTIFY_DONE; | ||
754 | } | ||
755 | } | ||
756 | |||
757 | static int balance_runtime(struct rt_rq *rt_rq) | ||
758 | { | ||
759 | int more = 0; | ||
760 | |||
761 | if (!sched_feat(RT_RUNTIME_SHARE)) | ||
762 | return more; | ||
763 | |||
764 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | ||
765 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | ||
766 | more = do_balance_runtime(rt_rq); | ||
767 | raw_spin_lock(&rt_rq->rt_runtime_lock); | ||
768 | } | ||
769 | |||
770 | return more; | ||
771 | } | ||
772 | #else /* !CONFIG_SMP */ | ||
773 | static inline int balance_runtime(struct rt_rq *rt_rq) | ||
774 | { | ||
775 | return 0; | ||
776 | } | ||
777 | #endif /* CONFIG_SMP */ | ||
778 | |||
779 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
780 | { | ||
781 | int i, idle = 1; | ||
782 | const struct cpumask *span; | ||
783 | |||
784 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | ||
785 | return 1; | ||
786 | |||
787 | span = sched_rt_period_mask(); | ||
788 | for_each_cpu(i, span) { | ||
789 | int enqueue = 0; | ||
790 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
791 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
792 | |||
793 | raw_spin_lock(&rq->lock); | ||
794 | if (rt_rq->rt_time) { | ||
795 | u64 runtime; | ||
796 | |||
797 | raw_spin_lock(&rt_rq->rt_runtime_lock); | ||
798 | if (rt_rq->rt_throttled) | ||
799 | balance_runtime(rt_rq); | ||
800 | runtime = rt_rq->rt_runtime; | ||
801 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
802 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
803 | rt_rq->rt_throttled = 0; | ||
804 | enqueue = 1; | ||
805 | |||
806 | /* | ||
807 | * Force a clock update if the CPU was idle, | ||
808 | * lest wakeup -> unthrottle time accumulate. | ||
809 | */ | ||
810 | if (rt_rq->rt_nr_running && rq->curr == rq->idle) | ||
811 | rq->skip_clock_update = -1; | ||
812 | } | ||
813 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
814 | idle = 0; | ||
815 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | ||
816 | } else if (rt_rq->rt_nr_running) { | ||
817 | idle = 0; | ||
818 | if (!rt_rq_throttled(rt_rq)) | ||
819 | enqueue = 1; | ||
820 | } | ||
821 | |||
822 | if (enqueue) | ||
823 | sched_rt_rq_enqueue(rt_rq); | ||
824 | raw_spin_unlock(&rq->lock); | ||
825 | } | ||
826 | |||
827 | return idle; | ||
828 | } | ||
829 | |||
830 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | ||
831 | { | ||
832 | #ifdef CONFIG_RT_GROUP_SCHED | ||
833 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
834 | |||
835 | if (rt_rq) | ||
836 | return rt_rq->highest_prio.curr; | ||
837 | #endif | ||
838 | |||
839 | return rt_task_of(rt_se)->prio; | ||
840 | } | ||
841 | |||
842 | static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | ||
843 | { | ||
844 | u64 runtime = sched_rt_runtime(rt_rq); | ||
845 | |||
846 | if (rt_rq->rt_throttled) | ||
847 | return rt_rq_throttled(rt_rq); | ||
848 | |||
849 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | ||
850 | return 0; | ||
851 | |||
852 | balance_runtime(rt_rq); | ||
853 | runtime = sched_rt_runtime(rt_rq); | ||
854 | if (runtime == RUNTIME_INF) | ||
855 | return 0; | ||
856 | |||
857 | if (rt_rq->rt_time > runtime) { | ||
858 | rt_rq->rt_throttled = 1; | ||
859 | printk_once(KERN_WARNING "sched: RT throttling activated\n"); | ||
860 | if (rt_rq_throttled(rt_rq)) { | ||
861 | sched_rt_rq_dequeue(rt_rq); | ||
862 | return 1; | ||
863 | } | ||
864 | } | ||
865 | |||
866 | return 0; | ||
867 | } | ||
868 | |||
869 | /* | ||
870 | * Update the current task's runtime statistics. Skip current tasks that | ||
871 | * are not in our scheduling class. | ||
872 | */ | ||
873 | static void update_curr_rt(struct rq *rq) | ||
874 | { | ||
875 | struct task_struct *curr = rq->curr; | ||
876 | struct sched_rt_entity *rt_se = &curr->rt; | ||
877 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | ||
878 | u64 delta_exec; | ||
879 | |||
880 | if (curr->sched_class != &rt_sched_class) | ||
881 | return; | ||
882 | |||
883 | delta_exec = rq->clock_task - curr->se.exec_start; | ||
884 | if (unlikely((s64)delta_exec < 0)) | ||
885 | delta_exec = 0; | ||
886 | |||
887 | schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); | ||
888 | |||
889 | curr->se.sum_exec_runtime += delta_exec; | ||
890 | account_group_exec_runtime(curr, delta_exec); | ||
891 | |||
892 | curr->se.exec_start = rq->clock_task; | ||
893 | cpuacct_charge(curr, delta_exec); | ||
894 | |||
895 | sched_rt_avg_update(rq, delta_exec); | ||
896 | |||
897 | if (!rt_bandwidth_enabled()) | ||
898 | return; | ||
899 | |||
900 | for_each_sched_rt_entity(rt_se) { | ||
901 | rt_rq = rt_rq_of_se(rt_se); | ||
902 | |||
903 | if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { | ||
904 | raw_spin_lock(&rt_rq->rt_runtime_lock); | ||
905 | rt_rq->rt_time += delta_exec; | ||
906 | if (sched_rt_runtime_exceeded(rt_rq)) | ||
907 | resched_task(curr); | ||
908 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | ||
909 | } | ||
910 | } | ||
911 | } | ||
912 | |||
913 | #if defined CONFIG_SMP | ||
914 | |||
915 | static void | ||
916 | inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | ||
917 | { | ||
918 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
919 | |||
920 | if (rq->online && prio < prev_prio) | ||
921 | cpupri_set(&rq->rd->cpupri, rq->cpu, prio); | ||
922 | } | ||
923 | |||
924 | static void | ||
925 | dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | ||
926 | { | ||
927 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
928 | |||
929 | if (rq->online && rt_rq->highest_prio.curr != prev_prio) | ||
930 | cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); | ||
931 | } | ||
932 | |||
933 | #else /* CONFIG_SMP */ | ||
934 | |||
935 | static inline | ||
936 | void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} | ||
937 | static inline | ||
938 | void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} | ||
939 | |||
940 | #endif /* CONFIG_SMP */ | ||
941 | |||
942 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | ||
943 | static void | ||
944 | inc_rt_prio(struct rt_rq *rt_rq, int prio) | ||
945 | { | ||
946 | int prev_prio = rt_rq->highest_prio.curr; | ||
947 | |||
948 | if (prio < prev_prio) | ||
949 | rt_rq->highest_prio.curr = prio; | ||
950 | |||
951 | inc_rt_prio_smp(rt_rq, prio, prev_prio); | ||
952 | } | ||
953 | |||
954 | static void | ||
955 | dec_rt_prio(struct rt_rq *rt_rq, int prio) | ||
956 | { | ||
957 | int prev_prio = rt_rq->highest_prio.curr; | ||
958 | |||
959 | if (rt_rq->rt_nr_running) { | ||
960 | |||
961 | WARN_ON(prio < prev_prio); | ||
962 | |||
963 | /* | ||
964 | * This may have been our highest task, and therefore | ||
965 | * we may have some recomputation to do | ||
966 | */ | ||
967 | if (prio == prev_prio) { | ||
968 | struct rt_prio_array *array = &rt_rq->active; | ||
969 | |||
970 | rt_rq->highest_prio.curr = | ||
971 | sched_find_first_bit(array->bitmap); | ||
972 | } | ||
973 | |||
974 | } else | ||
975 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | ||
976 | |||
977 | dec_rt_prio_smp(rt_rq, prio, prev_prio); | ||
978 | } | ||
979 | |||
980 | #else | ||
981 | |||
982 | static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} | ||
983 | static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} | ||
984 | |||
985 | #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ | ||
986 | |||
987 | #ifdef CONFIG_RT_GROUP_SCHED | ||
988 | |||
989 | static void | ||
990 | inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
991 | { | ||
992 | if (rt_se_boosted(rt_se)) | ||
993 | rt_rq->rt_nr_boosted++; | ||
994 | |||
995 | if (rt_rq->tg) | ||
996 | start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); | ||
997 | } | ||
998 | |||
999 | static void | ||
1000 | dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
1001 | { | ||
1002 | if (rt_se_boosted(rt_se)) | ||
1003 | rt_rq->rt_nr_boosted--; | ||
1004 | |||
1005 | WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); | ||
1006 | } | ||
1007 | |||
1008 | #else /* CONFIG_RT_GROUP_SCHED */ | ||
1009 | |||
1010 | static void | ||
1011 | inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
1012 | { | ||
1013 | start_rt_bandwidth(&def_rt_bandwidth); | ||
1014 | } | ||
1015 | |||
1016 | static inline | ||
1017 | void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} | ||
1018 | |||
1019 | #endif /* CONFIG_RT_GROUP_SCHED */ | ||
1020 | |||
1021 | static inline | ||
1022 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
1023 | { | ||
1024 | int prio = rt_se_prio(rt_se); | ||
1025 | |||
1026 | WARN_ON(!rt_prio(prio)); | ||
1027 | rt_rq->rt_nr_running++; | ||
1028 | |||
1029 | inc_rt_prio(rt_rq, prio); | ||
1030 | inc_rt_migration(rt_se, rt_rq); | ||
1031 | inc_rt_group(rt_se, rt_rq); | ||
1032 | } | ||
1033 | |||
1034 | static inline | ||
1035 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
1036 | { | ||
1037 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | ||
1038 | WARN_ON(!rt_rq->rt_nr_running); | ||
1039 | rt_rq->rt_nr_running--; | ||
1040 | |||
1041 | dec_rt_prio(rt_rq, rt_se_prio(rt_se)); | ||
1042 | dec_rt_migration(rt_se, rt_rq); | ||
1043 | dec_rt_group(rt_se, rt_rq); | ||
1044 | } | ||
1045 | |||
1046 | static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) | ||
1047 | { | ||
1048 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | ||
1049 | struct rt_prio_array *array = &rt_rq->active; | ||
1050 | struct rt_rq *group_rq = group_rt_rq(rt_se); | ||
1051 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
1052 | |||
1053 | /* | ||
1054 | * Don't enqueue the group if its throttled, or when empty. | ||
1055 | * The latter is a consequence of the former when a child group | ||
1056 | * get throttled and the current group doesn't have any other | ||
1057 | * active members. | ||
1058 | */ | ||
1059 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | ||
1060 | return; | ||
1061 | |||
1062 | if (!rt_rq->rt_nr_running) | ||
1063 | list_add_leaf_rt_rq(rt_rq); | ||
1064 | |||
1065 | if (head) | ||
1066 | list_add(&rt_se->run_list, queue); | ||
1067 | else | ||
1068 | list_add_tail(&rt_se->run_list, queue); | ||
1069 | __set_bit(rt_se_prio(rt_se), array->bitmap); | ||
1070 | |||
1071 | inc_rt_tasks(rt_se, rt_rq); | ||
1072 | } | ||
1073 | |||
1074 | static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) | ||
1075 | { | ||
1076 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | ||
1077 | struct rt_prio_array *array = &rt_rq->active; | ||
1078 | |||
1079 | list_del_init(&rt_se->run_list); | ||
1080 | if (list_empty(array->queue + rt_se_prio(rt_se))) | ||
1081 | __clear_bit(rt_se_prio(rt_se), array->bitmap); | ||
1082 | |||
1083 | dec_rt_tasks(rt_se, rt_rq); | ||
1084 | if (!rt_rq->rt_nr_running) | ||
1085 | list_del_leaf_rt_rq(rt_rq); | ||
1086 | } | ||
1087 | |||
1088 | /* | ||
1089 | * Because the prio of an upper entry depends on the lower | ||
1090 | * entries, we must remove entries top - down. | ||
1091 | */ | ||
1092 | static void dequeue_rt_stack(struct sched_rt_entity *rt_se) | ||
1093 | { | ||
1094 | struct sched_rt_entity *back = NULL; | ||
1095 | |||
1096 | for_each_sched_rt_entity(rt_se) { | ||
1097 | rt_se->back = back; | ||
1098 | back = rt_se; | ||
1099 | } | ||
1100 | |||
1101 | for (rt_se = back; rt_se; rt_se = rt_se->back) { | ||
1102 | if (on_rt_rq(rt_se)) | ||
1103 | __dequeue_rt_entity(rt_se); | ||
1104 | } | ||
1105 | } | ||
1106 | |||
1107 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) | ||
1108 | { | ||
1109 | dequeue_rt_stack(rt_se); | ||
1110 | for_each_sched_rt_entity(rt_se) | ||
1111 | __enqueue_rt_entity(rt_se, head); | ||
1112 | } | ||
1113 | |||
1114 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | ||
1115 | { | ||
1116 | dequeue_rt_stack(rt_se); | ||
1117 | |||
1118 | for_each_sched_rt_entity(rt_se) { | ||
1119 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
1120 | |||
1121 | if (rt_rq && rt_rq->rt_nr_running) | ||
1122 | __enqueue_rt_entity(rt_se, false); | ||
1123 | } | ||
1124 | } | ||
1125 | |||
1126 | /* | ||
1127 | * Adding/removing a task to/from a priority array: | ||
1128 | */ | ||
1129 | static void | ||
1130 | enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) | ||
1131 | { | ||
1132 | struct sched_rt_entity *rt_se = &p->rt; | ||
1133 | |||
1134 | if (flags & ENQUEUE_WAKEUP) | ||
1135 | rt_se->timeout = 0; | ||
1136 | |||
1137 | enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); | ||
1138 | |||
1139 | if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) | ||
1140 | enqueue_pushable_task(rq, p); | ||
1141 | |||
1142 | inc_nr_running(rq); | ||
1143 | } | ||
1144 | |||
1145 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) | ||
1146 | { | ||
1147 | struct sched_rt_entity *rt_se = &p->rt; | ||
1148 | |||
1149 | update_curr_rt(rq); | ||
1150 | dequeue_rt_entity(rt_se); | ||
1151 | |||
1152 | dequeue_pushable_task(rq, p); | ||
1153 | |||
1154 | dec_nr_running(rq); | ||
1155 | } | ||
1156 | |||
1157 | /* | ||
1158 | * Put task to the head or the end of the run list without the overhead of | ||
1159 | * dequeue followed by enqueue. | ||
1160 | */ | ||
1161 | static void | ||
1162 | requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) | ||
1163 | { | ||
1164 | if (on_rt_rq(rt_se)) { | ||
1165 | struct rt_prio_array *array = &rt_rq->active; | ||
1166 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
1167 | |||
1168 | if (head) | ||
1169 | list_move(&rt_se->run_list, queue); | ||
1170 | else | ||
1171 | list_move_tail(&rt_se->run_list, queue); | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) | ||
1176 | { | ||
1177 | struct sched_rt_entity *rt_se = &p->rt; | ||
1178 | struct rt_rq *rt_rq; | ||
1179 | |||
1180 | for_each_sched_rt_entity(rt_se) { | ||
1181 | rt_rq = rt_rq_of_se(rt_se); | ||
1182 | requeue_rt_entity(rt_rq, rt_se, head); | ||
1183 | } | ||
1184 | } | ||
1185 | |||
1186 | static void yield_task_rt(struct rq *rq) | ||
1187 | { | ||
1188 | requeue_task_rt(rq, rq->curr, 0); | ||
1189 | } | ||
1190 | |||
1191 | #ifdef CONFIG_SMP | ||
1192 | static int find_lowest_rq(struct task_struct *task); | ||
1193 | |||
1194 | static int | ||
1195 | select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) | ||
1196 | { | ||
1197 | struct task_struct *curr; | ||
1198 | struct rq *rq; | ||
1199 | int cpu; | ||
1200 | |||
1201 | cpu = task_cpu(p); | ||
1202 | |||
1203 | /* For anything but wake ups, just return the task_cpu */ | ||
1204 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | ||
1205 | goto out; | ||
1206 | |||
1207 | rq = cpu_rq(cpu); | ||
1208 | |||
1209 | rcu_read_lock(); | ||
1210 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | ||
1211 | |||
1212 | /* | ||
1213 | * If the current task on @p's runqueue is an RT task, then | ||
1214 | * try to see if we can wake this RT task up on another | ||
1215 | * runqueue. Otherwise simply start this RT task | ||
1216 | * on its current runqueue. | ||
1217 | * | ||
1218 | * We want to avoid overloading runqueues. If the woken | ||
1219 | * task is a higher priority, then it will stay on this CPU | ||
1220 | * and the lower prio task should be moved to another CPU. | ||
1221 | * Even though this will probably make the lower prio task | ||
1222 | * lose its cache, we do not want to bounce a higher task | ||
1223 | * around just because it gave up its CPU, perhaps for a | ||
1224 | * lock? | ||
1225 | * | ||
1226 | * For equal prio tasks, we just let the scheduler sort it out. | ||
1227 | * | ||
1228 | * Otherwise, just let it ride on the affined RQ and the | ||
1229 | * post-schedule router will push the preempted task away | ||
1230 | * | ||
1231 | * This test is optimistic, if we get it wrong the load-balancer | ||
1232 | * will have to sort it out. | ||
1233 | */ | ||
1234 | if (curr && unlikely(rt_task(curr)) && | ||
1235 | (curr->rt.nr_cpus_allowed < 2 || | ||
1236 | curr->prio <= p->prio) && | ||
1237 | (p->rt.nr_cpus_allowed > 1)) { | ||
1238 | int target = find_lowest_rq(p); | ||
1239 | |||
1240 | if (target != -1) | ||
1241 | cpu = target; | ||
1242 | } | ||
1243 | rcu_read_unlock(); | ||
1244 | |||
1245 | out: | ||
1246 | return cpu; | ||
1247 | } | ||
1248 | |||
1249 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | ||
1250 | { | ||
1251 | if (rq->curr->rt.nr_cpus_allowed == 1) | ||
1252 | return; | ||
1253 | |||
1254 | if (p->rt.nr_cpus_allowed != 1 | ||
1255 | && cpupri_find(&rq->rd->cpupri, p, NULL)) | ||
1256 | return; | ||
1257 | |||
1258 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) | ||
1259 | return; | ||
1260 | |||
1261 | /* | ||
1262 | * There appears to be other cpus that can accept | ||
1263 | * current and none to run 'p', so lets reschedule | ||
1264 | * to try and push current away: | ||
1265 | */ | ||
1266 | requeue_task_rt(rq, p, 1); | ||
1267 | resched_task(rq->curr); | ||
1268 | } | ||
1269 | |||
1270 | #endif /* CONFIG_SMP */ | ||
1271 | |||
1272 | /* | ||
1273 | * Preempt the current task with a newly woken task if needed: | ||
1274 | */ | ||
1275 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) | ||
1276 | { | ||
1277 | if (p->prio < rq->curr->prio) { | ||
1278 | resched_task(rq->curr); | ||
1279 | return; | ||
1280 | } | ||
1281 | |||
1282 | #ifdef CONFIG_SMP | ||
1283 | /* | ||
1284 | * If: | ||
1285 | * | ||
1286 | * - the newly woken task is of equal priority to the current task | ||
1287 | * - the newly woken task is non-migratable while current is migratable | ||
1288 | * - current will be preempted on the next reschedule | ||
1289 | * | ||
1290 | * we should check to see if current can readily move to a different | ||
1291 | * cpu. If so, we will reschedule to allow the push logic to try | ||
1292 | * to move current somewhere else, making room for our non-migratable | ||
1293 | * task. | ||
1294 | */ | ||
1295 | if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) | ||
1296 | check_preempt_equal_prio(rq, p); | ||
1297 | #endif | ||
1298 | } | ||
1299 | |||
1300 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | ||
1301 | struct rt_rq *rt_rq) | ||
1302 | { | ||
1303 | struct rt_prio_array *array = &rt_rq->active; | ||
1304 | struct sched_rt_entity *next = NULL; | ||
1305 | struct list_head *queue; | ||
1306 | int idx; | ||
1307 | |||
1308 | idx = sched_find_first_bit(array->bitmap); | ||
1309 | BUG_ON(idx >= MAX_RT_PRIO); | ||
1310 | |||
1311 | queue = array->queue + idx; | ||
1312 | next = list_entry(queue->next, struct sched_rt_entity, run_list); | ||
1313 | |||
1314 | return next; | ||
1315 | } | ||
1316 | |||
1317 | static struct task_struct *_pick_next_task_rt(struct rq *rq) | ||
1318 | { | ||
1319 | struct sched_rt_entity *rt_se; | ||
1320 | struct task_struct *p; | ||
1321 | struct rt_rq *rt_rq; | ||
1322 | |||
1323 | rt_rq = &rq->rt; | ||
1324 | |||
1325 | if (!rt_rq->rt_nr_running) | ||
1326 | return NULL; | ||
1327 | |||
1328 | if (rt_rq_throttled(rt_rq)) | ||
1329 | return NULL; | ||
1330 | |||
1331 | do { | ||
1332 | rt_se = pick_next_rt_entity(rq, rt_rq); | ||
1333 | BUG_ON(!rt_se); | ||
1334 | rt_rq = group_rt_rq(rt_se); | ||
1335 | } while (rt_rq); | ||
1336 | |||
1337 | p = rt_task_of(rt_se); | ||
1338 | p->se.exec_start = rq->clock_task; | ||
1339 | |||
1340 | return p; | ||
1341 | } | ||
1342 | |||
1343 | static struct task_struct *pick_next_task_rt(struct rq *rq) | ||
1344 | { | ||
1345 | struct task_struct *p = _pick_next_task_rt(rq); | ||
1346 | |||
1347 | /* The running task is never eligible for pushing */ | ||
1348 | if (p) | ||
1349 | dequeue_pushable_task(rq, p); | ||
1350 | |||
1351 | #ifdef CONFIG_SMP | ||
1352 | /* | ||
1353 | * We detect this state here so that we can avoid taking the RQ | ||
1354 | * lock again later if there is no need to push | ||
1355 | */ | ||
1356 | rq->post_schedule = has_pushable_tasks(rq); | ||
1357 | #endif | ||
1358 | |||
1359 | return p; | ||
1360 | } | ||
1361 | |||
1362 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | ||
1363 | { | ||
1364 | update_curr_rt(rq); | ||
1365 | |||
1366 | /* | ||
1367 | * The previous task needs to be made eligible for pushing | ||
1368 | * if it is still active | ||
1369 | */ | ||
1370 | if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) | ||
1371 | enqueue_pushable_task(rq, p); | ||
1372 | } | ||
1373 | |||
1374 | #ifdef CONFIG_SMP | ||
1375 | |||
1376 | /* Only try algorithms three times */ | ||
1377 | #define RT_MAX_TRIES 3 | ||
1378 | |||
1379 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | ||
1380 | { | ||
1381 | if (!task_running(rq, p) && | ||
1382 | (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && | ||
1383 | (p->rt.nr_cpus_allowed > 1)) | ||
1384 | return 1; | ||
1385 | return 0; | ||
1386 | } | ||
1387 | |||
1388 | /* Return the second highest RT task, NULL otherwise */ | ||
1389 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | ||
1390 | { | ||
1391 | struct task_struct *next = NULL; | ||
1392 | struct sched_rt_entity *rt_se; | ||
1393 | struct rt_prio_array *array; | ||
1394 | struct rt_rq *rt_rq; | ||
1395 | int idx; | ||
1396 | |||
1397 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
1398 | array = &rt_rq->active; | ||
1399 | idx = sched_find_first_bit(array->bitmap); | ||
1400 | next_idx: | ||
1401 | if (idx >= MAX_RT_PRIO) | ||
1402 | continue; | ||
1403 | if (next && next->prio < idx) | ||
1404 | continue; | ||
1405 | list_for_each_entry(rt_se, array->queue + idx, run_list) { | ||
1406 | struct task_struct *p; | ||
1407 | |||
1408 | if (!rt_entity_is_task(rt_se)) | ||
1409 | continue; | ||
1410 | |||
1411 | p = rt_task_of(rt_se); | ||
1412 | if (pick_rt_task(rq, p, cpu)) { | ||
1413 | next = p; | ||
1414 | break; | ||
1415 | } | ||
1416 | } | ||
1417 | if (!next) { | ||
1418 | idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); | ||
1419 | goto next_idx; | ||
1420 | } | ||
1421 | } | ||
1422 | |||
1423 | return next; | ||
1424 | } | ||
1425 | |||
1426 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); | ||
1427 | |||
1428 | static int find_lowest_rq(struct task_struct *task) | ||
1429 | { | ||
1430 | struct sched_domain *sd; | ||
1431 | struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); | ||
1432 | int this_cpu = smp_processor_id(); | ||
1433 | int cpu = task_cpu(task); | ||
1434 | |||
1435 | /* Make sure the mask is initialized first */ | ||
1436 | if (unlikely(!lowest_mask)) | ||
1437 | return -1; | ||
1438 | |||
1439 | if (task->rt.nr_cpus_allowed == 1) | ||
1440 | return -1; /* No other targets possible */ | ||
1441 | |||
1442 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) | ||
1443 | return -1; /* No targets found */ | ||
1444 | |||
1445 | /* | ||
1446 | * At this point we have built a mask of cpus representing the | ||
1447 | * lowest priority tasks in the system. Now we want to elect | ||
1448 | * the best one based on our affinity and topology. | ||
1449 | * | ||
1450 | * We prioritize the last cpu that the task executed on since | ||
1451 | * it is most likely cache-hot in that location. | ||
1452 | */ | ||
1453 | if (cpumask_test_cpu(cpu, lowest_mask)) | ||
1454 | return cpu; | ||
1455 | |||
1456 | /* | ||
1457 | * Otherwise, we consult the sched_domains span maps to figure | ||
1458 | * out which cpu is logically closest to our hot cache data. | ||
1459 | */ | ||
1460 | if (!cpumask_test_cpu(this_cpu, lowest_mask)) | ||
1461 | this_cpu = -1; /* Skip this_cpu opt if not among lowest */ | ||
1462 | |||
1463 | rcu_read_lock(); | ||
1464 | for_each_domain(cpu, sd) { | ||
1465 | if (sd->flags & SD_WAKE_AFFINE) { | ||
1466 | int best_cpu; | ||
1467 | |||
1468 | /* | ||
1469 | * "this_cpu" is cheaper to preempt than a | ||
1470 | * remote processor. | ||
1471 | */ | ||
1472 | if (this_cpu != -1 && | ||
1473 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { | ||
1474 | rcu_read_unlock(); | ||
1475 | return this_cpu; | ||
1476 | } | ||
1477 | |||
1478 | best_cpu = cpumask_first_and(lowest_mask, | ||
1479 | sched_domain_span(sd)); | ||
1480 | if (best_cpu < nr_cpu_ids) { | ||
1481 | rcu_read_unlock(); | ||
1482 | return best_cpu; | ||
1483 | } | ||
1484 | } | ||
1485 | } | ||
1486 | rcu_read_unlock(); | ||
1487 | |||
1488 | /* | ||
1489 | * And finally, if there were no matches within the domains | ||
1490 | * just give the caller *something* to work with from the compatible | ||
1491 | * locations. | ||
1492 | */ | ||
1493 | if (this_cpu != -1) | ||
1494 | return this_cpu; | ||
1495 | |||
1496 | cpu = cpumask_any(lowest_mask); | ||
1497 | if (cpu < nr_cpu_ids) | ||
1498 | return cpu; | ||
1499 | return -1; | ||
1500 | } | ||
1501 | |||
1502 | /* Will lock the rq it finds */ | ||
1503 | static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | ||
1504 | { | ||
1505 | struct rq *lowest_rq = NULL; | ||
1506 | int tries; | ||
1507 | int cpu; | ||
1508 | |||
1509 | for (tries = 0; tries < RT_MAX_TRIES; tries++) { | ||
1510 | cpu = find_lowest_rq(task); | ||
1511 | |||
1512 | if ((cpu == -1) || (cpu == rq->cpu)) | ||
1513 | break; | ||
1514 | |||
1515 | lowest_rq = cpu_rq(cpu); | ||
1516 | |||
1517 | /* if the prio of this runqueue changed, try again */ | ||
1518 | if (double_lock_balance(rq, lowest_rq)) { | ||
1519 | /* | ||
1520 | * We had to unlock the run queue. In | ||
1521 | * the mean time, task could have | ||
1522 | * migrated already or had its affinity changed. | ||
1523 | * Also make sure that it wasn't scheduled on its rq. | ||
1524 | */ | ||
1525 | if (unlikely(task_rq(task) != rq || | ||
1526 | !cpumask_test_cpu(lowest_rq->cpu, | ||
1527 | tsk_cpus_allowed(task)) || | ||
1528 | task_running(rq, task) || | ||
1529 | !task->on_rq)) { | ||
1530 | |||
1531 | raw_spin_unlock(&lowest_rq->lock); | ||
1532 | lowest_rq = NULL; | ||
1533 | break; | ||
1534 | } | ||
1535 | } | ||
1536 | |||
1537 | /* If this rq is still suitable use it. */ | ||
1538 | if (lowest_rq->rt.highest_prio.curr > task->prio) | ||
1539 | break; | ||
1540 | |||
1541 | /* try again */ | ||
1542 | double_unlock_balance(rq, lowest_rq); | ||
1543 | lowest_rq = NULL; | ||
1544 | } | ||
1545 | |||
1546 | return lowest_rq; | ||
1547 | } | ||
1548 | |||
1549 | static struct task_struct *pick_next_pushable_task(struct rq *rq) | ||
1550 | { | ||
1551 | struct task_struct *p; | ||
1552 | |||
1553 | if (!has_pushable_tasks(rq)) | ||
1554 | return NULL; | ||
1555 | |||
1556 | p = plist_first_entry(&rq->rt.pushable_tasks, | ||
1557 | struct task_struct, pushable_tasks); | ||
1558 | |||
1559 | BUG_ON(rq->cpu != task_cpu(p)); | ||
1560 | BUG_ON(task_current(rq, p)); | ||
1561 | BUG_ON(p->rt.nr_cpus_allowed <= 1); | ||
1562 | |||
1563 | BUG_ON(!p->on_rq); | ||
1564 | BUG_ON(!rt_task(p)); | ||
1565 | |||
1566 | return p; | ||
1567 | } | ||
1568 | |||
1569 | /* | ||
1570 | * If the current CPU has more than one RT task, see if the non | ||
1571 | * running task can migrate over to a CPU that is running a task | ||
1572 | * of lesser priority. | ||
1573 | */ | ||
1574 | static int push_rt_task(struct rq *rq) | ||
1575 | { | ||
1576 | struct task_struct *next_task; | ||
1577 | struct rq *lowest_rq; | ||
1578 | int ret = 0; | ||
1579 | |||
1580 | if (!rq->rt.overloaded) | ||
1581 | return 0; | ||
1582 | |||
1583 | next_task = pick_next_pushable_task(rq); | ||
1584 | if (!next_task) | ||
1585 | return 0; | ||
1586 | |||
1587 | retry: | ||
1588 | if (unlikely(next_task == rq->curr)) { | ||
1589 | WARN_ON(1); | ||
1590 | return 0; | ||
1591 | } | ||
1592 | |||
1593 | /* | ||
1594 | * It's possible that the next_task slipped in of | ||
1595 | * higher priority than current. If that's the case | ||
1596 | * just reschedule current. | ||
1597 | */ | ||
1598 | if (unlikely(next_task->prio < rq->curr->prio)) { | ||
1599 | resched_task(rq->curr); | ||
1600 | return 0; | ||
1601 | } | ||
1602 | |||
1603 | /* We might release rq lock */ | ||
1604 | get_task_struct(next_task); | ||
1605 | |||
1606 | /* find_lock_lowest_rq locks the rq if found */ | ||
1607 | lowest_rq = find_lock_lowest_rq(next_task, rq); | ||
1608 | if (!lowest_rq) { | ||
1609 | struct task_struct *task; | ||
1610 | /* | ||
1611 | * find_lock_lowest_rq releases rq->lock | ||
1612 | * so it is possible that next_task has migrated. | ||
1613 | * | ||
1614 | * We need to make sure that the task is still on the same | ||
1615 | * run-queue and is also still the next task eligible for | ||
1616 | * pushing. | ||
1617 | */ | ||
1618 | task = pick_next_pushable_task(rq); | ||
1619 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | ||
1620 | /* | ||
1621 | * The task hasn't migrated, and is still the next | ||
1622 | * eligible task, but we failed to find a run-queue | ||
1623 | * to push it to. Do not retry in this case, since | ||
1624 | * other cpus will pull from us when ready. | ||
1625 | */ | ||
1626 | goto out; | ||
1627 | } | ||
1628 | |||
1629 | if (!task) | ||
1630 | /* No more tasks, just exit */ | ||
1631 | goto out; | ||
1632 | |||
1633 | /* | ||
1634 | * Something has shifted, try again. | ||
1635 | */ | ||
1636 | put_task_struct(next_task); | ||
1637 | next_task = task; | ||
1638 | goto retry; | ||
1639 | } | ||
1640 | |||
1641 | deactivate_task(rq, next_task, 0); | ||
1642 | set_task_cpu(next_task, lowest_rq->cpu); | ||
1643 | activate_task(lowest_rq, next_task, 0); | ||
1644 | ret = 1; | ||
1645 | |||
1646 | resched_task(lowest_rq->curr); | ||
1647 | |||
1648 | double_unlock_balance(rq, lowest_rq); | ||
1649 | |||
1650 | out: | ||
1651 | put_task_struct(next_task); | ||
1652 | |||
1653 | return ret; | ||
1654 | } | ||
1655 | |||
1656 | static void push_rt_tasks(struct rq *rq) | ||
1657 | { | ||
1658 | /* push_rt_task will return true if it moved an RT */ | ||
1659 | while (push_rt_task(rq)) | ||
1660 | ; | ||
1661 | } | ||
1662 | |||
1663 | static int pull_rt_task(struct rq *this_rq) | ||
1664 | { | ||
1665 | int this_cpu = this_rq->cpu, ret = 0, cpu; | ||
1666 | struct task_struct *p; | ||
1667 | struct rq *src_rq; | ||
1668 | |||
1669 | if (likely(!rt_overloaded(this_rq))) | ||
1670 | return 0; | ||
1671 | |||
1672 | for_each_cpu(cpu, this_rq->rd->rto_mask) { | ||
1673 | if (this_cpu == cpu) | ||
1674 | continue; | ||
1675 | |||
1676 | src_rq = cpu_rq(cpu); | ||
1677 | |||
1678 | /* | ||
1679 | * Don't bother taking the src_rq->lock if the next highest | ||
1680 | * task is known to be lower-priority than our current task. | ||
1681 | * This may look racy, but if this value is about to go | ||
1682 | * logically higher, the src_rq will push this task away. | ||
1683 | * And if its going logically lower, we do not care | ||
1684 | */ | ||
1685 | if (src_rq->rt.highest_prio.next >= | ||
1686 | this_rq->rt.highest_prio.curr) | ||
1687 | continue; | ||
1688 | |||
1689 | /* | ||
1690 | * We can potentially drop this_rq's lock in | ||
1691 | * double_lock_balance, and another CPU could | ||
1692 | * alter this_rq | ||
1693 | */ | ||
1694 | double_lock_balance(this_rq, src_rq); | ||
1695 | |||
1696 | /* | ||
1697 | * Are there still pullable RT tasks? | ||
1698 | */ | ||
1699 | if (src_rq->rt.rt_nr_running <= 1) | ||
1700 | goto skip; | ||
1701 | |||
1702 | p = pick_next_highest_task_rt(src_rq, this_cpu); | ||
1703 | |||
1704 | /* | ||
1705 | * Do we have an RT task that preempts | ||
1706 | * the to-be-scheduled task? | ||
1707 | */ | ||
1708 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { | ||
1709 | WARN_ON(p == src_rq->curr); | ||
1710 | WARN_ON(!p->on_rq); | ||
1711 | |||
1712 | /* | ||
1713 | * There's a chance that p is higher in priority | ||
1714 | * than what's currently running on its cpu. | ||
1715 | * This is just that p is wakeing up and hasn't | ||
1716 | * had a chance to schedule. We only pull | ||
1717 | * p if it is lower in priority than the | ||
1718 | * current task on the run queue | ||
1719 | */ | ||
1720 | if (p->prio < src_rq->curr->prio) | ||
1721 | goto skip; | ||
1722 | |||
1723 | ret = 1; | ||
1724 | |||
1725 | deactivate_task(src_rq, p, 0); | ||
1726 | set_task_cpu(p, this_cpu); | ||
1727 | activate_task(this_rq, p, 0); | ||
1728 | /* | ||
1729 | * We continue with the search, just in | ||
1730 | * case there's an even higher prio task | ||
1731 | * in another runqueue. (low likelihood | ||
1732 | * but possible) | ||
1733 | */ | ||
1734 | } | ||
1735 | skip: | ||
1736 | double_unlock_balance(this_rq, src_rq); | ||
1737 | } | ||
1738 | |||
1739 | return ret; | ||
1740 | } | ||
1741 | |||
1742 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) | ||
1743 | { | ||
1744 | /* Try to pull RT tasks here if we lower this rq's prio */ | ||
1745 | if (rq->rt.highest_prio.curr > prev->prio) | ||
1746 | pull_rt_task(rq); | ||
1747 | } | ||
1748 | |||
1749 | static void post_schedule_rt(struct rq *rq) | ||
1750 | { | ||
1751 | push_rt_tasks(rq); | ||
1752 | } | ||
1753 | |||
1754 | /* | ||
1755 | * If we are not running and we are not going to reschedule soon, we should | ||
1756 | * try to push tasks away now | ||
1757 | */ | ||
1758 | static void task_woken_rt(struct rq *rq, struct task_struct *p) | ||
1759 | { | ||
1760 | if (!task_running(rq, p) && | ||
1761 | !test_tsk_need_resched(rq->curr) && | ||
1762 | has_pushable_tasks(rq) && | ||
1763 | p->rt.nr_cpus_allowed > 1 && | ||
1764 | rt_task(rq->curr) && | ||
1765 | (rq->curr->rt.nr_cpus_allowed < 2 || | ||
1766 | rq->curr->prio <= p->prio)) | ||
1767 | push_rt_tasks(rq); | ||
1768 | } | ||
1769 | |||
1770 | static void set_cpus_allowed_rt(struct task_struct *p, | ||
1771 | const struct cpumask *new_mask) | ||
1772 | { | ||
1773 | int weight = cpumask_weight(new_mask); | ||
1774 | |||
1775 | BUG_ON(!rt_task(p)); | ||
1776 | |||
1777 | /* | ||
1778 | * Update the migration status of the RQ if we have an RT task | ||
1779 | * which is running AND changing its weight value. | ||
1780 | */ | ||
1781 | if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) { | ||
1782 | struct rq *rq = task_rq(p); | ||
1783 | |||
1784 | if (!task_current(rq, p)) { | ||
1785 | /* | ||
1786 | * Make sure we dequeue this task from the pushable list | ||
1787 | * before going further. It will either remain off of | ||
1788 | * the list because we are no longer pushable, or it | ||
1789 | * will be requeued. | ||
1790 | */ | ||
1791 | if (p->rt.nr_cpus_allowed > 1) | ||
1792 | dequeue_pushable_task(rq, p); | ||
1793 | |||
1794 | /* | ||
1795 | * Requeue if our weight is changing and still > 1 | ||
1796 | */ | ||
1797 | if (weight > 1) | ||
1798 | enqueue_pushable_task(rq, p); | ||
1799 | |||
1800 | } | ||
1801 | |||
1802 | if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { | ||
1803 | rq->rt.rt_nr_migratory++; | ||
1804 | } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { | ||
1805 | BUG_ON(!rq->rt.rt_nr_migratory); | ||
1806 | rq->rt.rt_nr_migratory--; | ||
1807 | } | ||
1808 | |||
1809 | update_rt_migration(&rq->rt); | ||
1810 | } | ||
1811 | } | ||
1812 | |||
1813 | /* Assumes rq->lock is held */ | ||
1814 | static void rq_online_rt(struct rq *rq) | ||
1815 | { | ||
1816 | if (rq->rt.overloaded) | ||
1817 | rt_set_overload(rq); | ||
1818 | |||
1819 | __enable_runtime(rq); | ||
1820 | |||
1821 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); | ||
1822 | } | ||
1823 | |||
1824 | /* Assumes rq->lock is held */ | ||
1825 | static void rq_offline_rt(struct rq *rq) | ||
1826 | { | ||
1827 | if (rq->rt.overloaded) | ||
1828 | rt_clear_overload(rq); | ||
1829 | |||
1830 | __disable_runtime(rq); | ||
1831 | |||
1832 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
1833 | } | ||
1834 | |||
1835 | /* | ||
1836 | * When switch from the rt queue, we bring ourselves to a position | ||
1837 | * that we might want to pull RT tasks from other runqueues. | ||
1838 | */ | ||
1839 | static void switched_from_rt(struct rq *rq, struct task_struct *p) | ||
1840 | { | ||
1841 | /* | ||
1842 | * If there are other RT tasks then we will reschedule | ||
1843 | * and the scheduling of the other RT tasks will handle | ||
1844 | * the balancing. But if we are the last RT task | ||
1845 | * we may need to handle the pulling of RT tasks | ||
1846 | * now. | ||
1847 | */ | ||
1848 | if (p->on_rq && !rq->rt.rt_nr_running) | ||
1849 | pull_rt_task(rq); | ||
1850 | } | ||
1851 | |||
1852 | void init_sched_rt_class(void) | ||
1853 | { | ||
1854 | unsigned int i; | ||
1855 | |||
1856 | for_each_possible_cpu(i) { | ||
1857 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), | ||
1858 | GFP_KERNEL, cpu_to_node(i)); | ||
1859 | } | ||
1860 | } | ||
1861 | #endif /* CONFIG_SMP */ | ||
1862 | |||
1863 | /* | ||
1864 | * When switching a task to RT, we may overload the runqueue | ||
1865 | * with RT tasks. In this case we try to push them off to | ||
1866 | * other runqueues. | ||
1867 | */ | ||
1868 | static void switched_to_rt(struct rq *rq, struct task_struct *p) | ||
1869 | { | ||
1870 | int check_resched = 1; | ||
1871 | |||
1872 | /* | ||
1873 | * If we are already running, then there's nothing | ||
1874 | * that needs to be done. But if we are not running | ||
1875 | * we may need to preempt the current running task. | ||
1876 | * If that current running task is also an RT task | ||
1877 | * then see if we can move to another run queue. | ||
1878 | */ | ||
1879 | if (p->on_rq && rq->curr != p) { | ||
1880 | #ifdef CONFIG_SMP | ||
1881 | if (rq->rt.overloaded && push_rt_task(rq) && | ||
1882 | /* Don't resched if we changed runqueues */ | ||
1883 | rq != task_rq(p)) | ||
1884 | check_resched = 0; | ||
1885 | #endif /* CONFIG_SMP */ | ||
1886 | if (check_resched && p->prio < rq->curr->prio) | ||
1887 | resched_task(rq->curr); | ||
1888 | } | ||
1889 | } | ||
1890 | |||
1891 | /* | ||
1892 | * Priority of the task has changed. This may cause | ||
1893 | * us to initiate a push or pull. | ||
1894 | */ | ||
1895 | static void | ||
1896 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) | ||
1897 | { | ||
1898 | if (!p->on_rq) | ||
1899 | return; | ||
1900 | |||
1901 | if (rq->curr == p) { | ||
1902 | #ifdef CONFIG_SMP | ||
1903 | /* | ||
1904 | * If our priority decreases while running, we | ||
1905 | * may need to pull tasks to this runqueue. | ||
1906 | */ | ||
1907 | if (oldprio < p->prio) | ||
1908 | pull_rt_task(rq); | ||
1909 | /* | ||
1910 | * If there's a higher priority task waiting to run | ||
1911 | * then reschedule. Note, the above pull_rt_task | ||
1912 | * can release the rq lock and p could migrate. | ||
1913 | * Only reschedule if p is still on the same runqueue. | ||
1914 | */ | ||
1915 | if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) | ||
1916 | resched_task(p); | ||
1917 | #else | ||
1918 | /* For UP simply resched on drop of prio */ | ||
1919 | if (oldprio < p->prio) | ||
1920 | resched_task(p); | ||
1921 | #endif /* CONFIG_SMP */ | ||
1922 | } else { | ||
1923 | /* | ||
1924 | * This task is not running, but if it is | ||
1925 | * greater than the current running task | ||
1926 | * then reschedule. | ||
1927 | */ | ||
1928 | if (p->prio < rq->curr->prio) | ||
1929 | resched_task(rq->curr); | ||
1930 | } | ||
1931 | } | ||
1932 | |||
1933 | static void watchdog(struct rq *rq, struct task_struct *p) | ||
1934 | { | ||
1935 | unsigned long soft, hard; | ||
1936 | |||
1937 | /* max may change after cur was read, this will be fixed next tick */ | ||
1938 | soft = task_rlimit(p, RLIMIT_RTTIME); | ||
1939 | hard = task_rlimit_max(p, RLIMIT_RTTIME); | ||
1940 | |||
1941 | if (soft != RLIM_INFINITY) { | ||
1942 | unsigned long next; | ||
1943 | |||
1944 | p->rt.timeout++; | ||
1945 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | ||
1946 | if (p->rt.timeout > next) | ||
1947 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; | ||
1948 | } | ||
1949 | } | ||
1950 | |||
1951 | static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | ||
1952 | { | ||
1953 | update_curr_rt(rq); | ||
1954 | |||
1955 | watchdog(rq, p); | ||
1956 | |||
1957 | /* | ||
1958 | * RR tasks need a special form of timeslice management. | ||
1959 | * FIFO tasks have no timeslices. | ||
1960 | */ | ||
1961 | if (p->policy != SCHED_RR) | ||
1962 | return; | ||
1963 | |||
1964 | if (--p->rt.time_slice) | ||
1965 | return; | ||
1966 | |||
1967 | p->rt.time_slice = DEF_TIMESLICE; | ||
1968 | |||
1969 | /* | ||
1970 | * Requeue to the end of queue if we are not the only element | ||
1971 | * on the queue: | ||
1972 | */ | ||
1973 | if (p->rt.run_list.prev != p->rt.run_list.next) { | ||
1974 | requeue_task_rt(rq, p, 0); | ||
1975 | set_tsk_need_resched(p); | ||
1976 | } | ||
1977 | } | ||
1978 | |||
1979 | static void set_curr_task_rt(struct rq *rq) | ||
1980 | { | ||
1981 | struct task_struct *p = rq->curr; | ||
1982 | |||
1983 | p->se.exec_start = rq->clock_task; | ||
1984 | |||
1985 | /* The running task is never eligible for pushing */ | ||
1986 | dequeue_pushable_task(rq, p); | ||
1987 | } | ||
1988 | |||
1989 | static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) | ||
1990 | { | ||
1991 | /* | ||
1992 | * Time slice is 0 for SCHED_FIFO tasks | ||
1993 | */ | ||
1994 | if (task->policy == SCHED_RR) | ||
1995 | return DEF_TIMESLICE; | ||
1996 | else | ||
1997 | return 0; | ||
1998 | } | ||
1999 | |||
2000 | const struct sched_class rt_sched_class = { | ||
2001 | .next = &fair_sched_class, | ||
2002 | .enqueue_task = enqueue_task_rt, | ||
2003 | .dequeue_task = dequeue_task_rt, | ||
2004 | .yield_task = yield_task_rt, | ||
2005 | |||
2006 | .check_preempt_curr = check_preempt_curr_rt, | ||
2007 | |||
2008 | .pick_next_task = pick_next_task_rt, | ||
2009 | .put_prev_task = put_prev_task_rt, | ||
2010 | |||
2011 | #ifdef CONFIG_SMP | ||
2012 | .select_task_rq = select_task_rq_rt, | ||
2013 | |||
2014 | .set_cpus_allowed = set_cpus_allowed_rt, | ||
2015 | .rq_online = rq_online_rt, | ||
2016 | .rq_offline = rq_offline_rt, | ||
2017 | .pre_schedule = pre_schedule_rt, | ||
2018 | .post_schedule = post_schedule_rt, | ||
2019 | .task_woken = task_woken_rt, | ||
2020 | .switched_from = switched_from_rt, | ||
2021 | #endif | ||
2022 | |||
2023 | .set_curr_task = set_curr_task_rt, | ||
2024 | .task_tick = task_tick_rt, | ||
2025 | |||
2026 | .get_rr_interval = get_rr_interval_rt, | ||
2027 | |||
2028 | .prio_changed = prio_changed_rt, | ||
2029 | .switched_to = switched_to_rt, | ||
2030 | }; | ||
2031 | |||
2032 | #ifdef CONFIG_SCHED_DEBUG | ||
2033 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | ||
2034 | |||
2035 | void print_rt_stats(struct seq_file *m, int cpu) | ||
2036 | { | ||
2037 | rt_rq_iter_t iter; | ||
2038 | struct rt_rq *rt_rq; | ||
2039 | |||
2040 | rcu_read_lock(); | ||
2041 | for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) | ||
2042 | print_rt_rq(m, cpu, rt_rq); | ||
2043 | rcu_read_unlock(); | ||
2044 | } | ||
2045 | #endif /* CONFIG_SCHED_DEBUG */ | ||