diff options
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 1112 |
1 files changed, 1023 insertions, 89 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 9ba3daa03475..274b40d7bef2 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -3,6 +3,217 @@ | |||
3 | * policies) | 3 | * policies) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #ifdef CONFIG_SMP | ||
7 | |||
8 | static inline int rt_overloaded(struct rq *rq) | ||
9 | { | ||
10 | return atomic_read(&rq->rd->rto_count); | ||
11 | } | ||
12 | |||
13 | static inline void rt_set_overload(struct rq *rq) | ||
14 | { | ||
15 | cpu_set(rq->cpu, rq->rd->rto_mask); | ||
16 | /* | ||
17 | * Make sure the mask is visible before we set | ||
18 | * the overload count. That is checked to determine | ||
19 | * if we should look at the mask. It would be a shame | ||
20 | * if we looked at the mask, but the mask was not | ||
21 | * updated yet. | ||
22 | */ | ||
23 | wmb(); | ||
24 | atomic_inc(&rq->rd->rto_count); | ||
25 | } | ||
26 | |||
27 | static inline void rt_clear_overload(struct rq *rq) | ||
28 | { | ||
29 | /* the order here really doesn't matter */ | ||
30 | atomic_dec(&rq->rd->rto_count); | ||
31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | ||
32 | } | ||
33 | |||
34 | static void update_rt_migration(struct rq *rq) | ||
35 | { | ||
36 | if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) { | ||
37 | if (!rq->rt.overloaded) { | ||
38 | rt_set_overload(rq); | ||
39 | rq->rt.overloaded = 1; | ||
40 | } | ||
41 | } else if (rq->rt.overloaded) { | ||
42 | rt_clear_overload(rq); | ||
43 | rq->rt.overloaded = 0; | ||
44 | } | ||
45 | } | ||
46 | #endif /* CONFIG_SMP */ | ||
47 | |||
48 | static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) | ||
49 | { | ||
50 | return container_of(rt_se, struct task_struct, rt); | ||
51 | } | ||
52 | |||
53 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) | ||
54 | { | ||
55 | return !list_empty(&rt_se->run_list); | ||
56 | } | ||
57 | |||
58 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
59 | |||
60 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | ||
61 | { | ||
62 | if (!rt_rq->tg) | ||
63 | return SCHED_RT_FRAC; | ||
64 | |||
65 | return rt_rq->tg->rt_ratio; | ||
66 | } | ||
67 | |||
68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | ||
69 | list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) | ||
70 | |||
71 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) | ||
72 | { | ||
73 | return rt_rq->rq; | ||
74 | } | ||
75 | |||
76 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | ||
77 | { | ||
78 | return rt_se->rt_rq; | ||
79 | } | ||
80 | |||
81 | #define for_each_sched_rt_entity(rt_se) \ | ||
82 | for (; rt_se; rt_se = rt_se->parent) | ||
83 | |||
84 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | ||
85 | { | ||
86 | return rt_se->my_q; | ||
87 | } | ||
88 | |||
89 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); | ||
90 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | ||
91 | |||
92 | static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | ||
93 | { | ||
94 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | ||
95 | |||
96 | if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) { | ||
97 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; | ||
98 | |||
99 | enqueue_rt_entity(rt_se); | ||
100 | if (rt_rq->highest_prio < curr->prio) | ||
101 | resched_task(curr); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | ||
106 | { | ||
107 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | ||
108 | |||
109 | if (rt_se && on_rt_rq(rt_se)) | ||
110 | dequeue_rt_entity(rt_se); | ||
111 | } | ||
112 | |||
113 | #else | ||
114 | |||
115 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | ||
116 | { | ||
117 | return sysctl_sched_rt_ratio; | ||
118 | } | ||
119 | |||
120 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | ||
121 | for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) | ||
122 | |||
123 | static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) | ||
124 | { | ||
125 | return container_of(rt_rq, struct rq, rt); | ||
126 | } | ||
127 | |||
128 | static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | ||
129 | { | ||
130 | struct task_struct *p = rt_task_of(rt_se); | ||
131 | struct rq *rq = task_rq(p); | ||
132 | |||
133 | return &rq->rt; | ||
134 | } | ||
135 | |||
136 | #define for_each_sched_rt_entity(rt_se) \ | ||
137 | for (; rt_se; rt_se = NULL) | ||
138 | |||
139 | static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | ||
140 | { | ||
141 | return NULL; | ||
142 | } | ||
143 | |||
144 | static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | ||
145 | { | ||
146 | } | ||
147 | |||
148 | static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | ||
149 | { | ||
150 | } | ||
151 | |||
152 | #endif | ||
153 | |||
154 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | ||
155 | { | ||
156 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
157 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
158 | |||
159 | if (rt_rq) | ||
160 | return rt_rq->highest_prio; | ||
161 | #endif | ||
162 | |||
163 | return rt_task_of(rt_se)->prio; | ||
164 | } | ||
165 | |||
166 | static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) | ||
167 | { | ||
168 | unsigned int rt_ratio = sched_rt_ratio(rt_rq); | ||
169 | u64 period, ratio; | ||
170 | |||
171 | if (rt_ratio == SCHED_RT_FRAC) | ||
172 | return 0; | ||
173 | |||
174 | if (rt_rq->rt_throttled) | ||
175 | return 1; | ||
176 | |||
177 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
178 | ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
179 | |||
180 | if (rt_rq->rt_time > ratio) { | ||
181 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
182 | |||
183 | rq->rt_throttled = 1; | ||
184 | rt_rq->rt_throttled = 1; | ||
185 | |||
186 | sched_rt_ratio_dequeue(rt_rq); | ||
187 | return 1; | ||
188 | } | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | static void update_sched_rt_period(struct rq *rq) | ||
194 | { | ||
195 | struct rt_rq *rt_rq; | ||
196 | u64 period; | ||
197 | |||
198 | while (rq->clock > rq->rt_period_expire) { | ||
199 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
200 | rq->rt_period_expire += period; | ||
201 | |||
202 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
203 | unsigned long rt_ratio = sched_rt_ratio(rt_rq); | ||
204 | u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
205 | |||
206 | rt_rq->rt_time -= min(rt_rq->rt_time, ratio); | ||
207 | if (rt_rq->rt_throttled) { | ||
208 | rt_rq->rt_throttled = 0; | ||
209 | sched_rt_ratio_enqueue(rt_rq); | ||
210 | } | ||
211 | } | ||
212 | |||
213 | rq->rt_throttled = 0; | ||
214 | } | ||
215 | } | ||
216 | |||
6 | /* | 217 | /* |
7 | * Update the current task's runtime statistics. Skip current tasks that | 218 | * Update the current task's runtime statistics. Skip current tasks that |
8 | * are not in our scheduling class. | 219 | * are not in our scheduling class. |
@@ -10,6 +221,8 @@ | |||
10 | static void update_curr_rt(struct rq *rq) | 221 | static void update_curr_rt(struct rq *rq) |
11 | { | 222 | { |
12 | struct task_struct *curr = rq->curr; | 223 | struct task_struct *curr = rq->curr; |
224 | struct sched_rt_entity *rt_se = &curr->rt; | ||
225 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | ||
13 | u64 delta_exec; | 226 | u64 delta_exec; |
14 | 227 | ||
15 | if (!task_has_rt_policy(curr)) | 228 | if (!task_has_rt_policy(curr)) |
@@ -24,47 +237,228 @@ static void update_curr_rt(struct rq *rq) | |||
24 | curr->se.sum_exec_runtime += delta_exec; | 237 | curr->se.sum_exec_runtime += delta_exec; |
25 | curr->se.exec_start = rq->clock; | 238 | curr->se.exec_start = rq->clock; |
26 | cpuacct_charge(curr, delta_exec); | 239 | cpuacct_charge(curr, delta_exec); |
240 | |||
241 | rt_rq->rt_time += delta_exec; | ||
242 | /* | ||
243 | * might make it a tad more accurate: | ||
244 | * | ||
245 | * update_sched_rt_period(rq); | ||
246 | */ | ||
247 | if (sched_rt_ratio_exceeded(rt_rq)) | ||
248 | resched_task(curr); | ||
27 | } | 249 | } |
28 | 250 | ||
29 | static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | 251 | static inline |
252 | void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
253 | { | ||
254 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | ||
255 | rt_rq->rt_nr_running++; | ||
256 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | ||
257 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | ||
258 | rt_rq->highest_prio = rt_se_prio(rt_se); | ||
259 | #endif | ||
260 | #ifdef CONFIG_SMP | ||
261 | if (rt_se->nr_cpus_allowed > 1) { | ||
262 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
263 | rq->rt.rt_nr_migratory++; | ||
264 | } | ||
265 | |||
266 | update_rt_migration(rq_of_rt_rq(rt_rq)); | ||
267 | #endif | ||
268 | } | ||
269 | |||
270 | static inline | ||
271 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | ||
272 | { | ||
273 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | ||
274 | WARN_ON(!rt_rq->rt_nr_running); | ||
275 | rt_rq->rt_nr_running--; | ||
276 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | ||
277 | if (rt_rq->rt_nr_running) { | ||
278 | struct rt_prio_array *array; | ||
279 | |||
280 | WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio); | ||
281 | if (rt_se_prio(rt_se) == rt_rq->highest_prio) { | ||
282 | /* recalculate */ | ||
283 | array = &rt_rq->active; | ||
284 | rt_rq->highest_prio = | ||
285 | sched_find_first_bit(array->bitmap); | ||
286 | } /* otherwise leave rq->highest prio alone */ | ||
287 | } else | ||
288 | rt_rq->highest_prio = MAX_RT_PRIO; | ||
289 | #endif | ||
290 | #ifdef CONFIG_SMP | ||
291 | if (rt_se->nr_cpus_allowed > 1) { | ||
292 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
293 | rq->rt.rt_nr_migratory--; | ||
294 | } | ||
295 | |||
296 | update_rt_migration(rq_of_rt_rq(rt_rq)); | ||
297 | #endif /* CONFIG_SMP */ | ||
298 | } | ||
299 | |||
300 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | ||
301 | { | ||
302 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | ||
303 | struct rt_prio_array *array = &rt_rq->active; | ||
304 | struct rt_rq *group_rq = group_rt_rq(rt_se); | ||
305 | |||
306 | if (group_rq && group_rq->rt_throttled) | ||
307 | return; | ||
308 | |||
309 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | ||
310 | __set_bit(rt_se_prio(rt_se), array->bitmap); | ||
311 | |||
312 | inc_rt_tasks(rt_se, rt_rq); | ||
313 | } | ||
314 | |||
315 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | ||
30 | { | 316 | { |
31 | struct rt_prio_array *array = &rq->rt.active; | 317 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
318 | struct rt_prio_array *array = &rt_rq->active; | ||
319 | |||
320 | list_del_init(&rt_se->run_list); | ||
321 | if (list_empty(array->queue + rt_se_prio(rt_se))) | ||
322 | __clear_bit(rt_se_prio(rt_se), array->bitmap); | ||
32 | 323 | ||
33 | list_add_tail(&p->run_list, array->queue + p->prio); | 324 | dec_rt_tasks(rt_se, rt_rq); |
34 | __set_bit(p->prio, array->bitmap); | 325 | } |
326 | |||
327 | /* | ||
328 | * Because the prio of an upper entry depends on the lower | ||
329 | * entries, we must remove entries top - down. | ||
330 | * | ||
331 | * XXX: O(1/2 h^2) because we can only walk up, not down the chain. | ||
332 | * doesn't matter much for now, as h=2 for GROUP_SCHED. | ||
333 | */ | ||
334 | static void dequeue_rt_stack(struct task_struct *p) | ||
335 | { | ||
336 | struct sched_rt_entity *rt_se, *top_se; | ||
337 | |||
338 | /* | ||
339 | * dequeue all, top - down. | ||
340 | */ | ||
341 | do { | ||
342 | rt_se = &p->rt; | ||
343 | top_se = NULL; | ||
344 | for_each_sched_rt_entity(rt_se) { | ||
345 | if (on_rt_rq(rt_se)) | ||
346 | top_se = rt_se; | ||
347 | } | ||
348 | if (top_se) | ||
349 | dequeue_rt_entity(top_se); | ||
350 | } while (top_se); | ||
35 | } | 351 | } |
36 | 352 | ||
37 | /* | 353 | /* |
38 | * Adding/removing a task to/from a priority array: | 354 | * Adding/removing a task to/from a priority array: |
39 | */ | 355 | */ |
356 | static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | ||
357 | { | ||
358 | struct sched_rt_entity *rt_se = &p->rt; | ||
359 | |||
360 | if (wakeup) | ||
361 | rt_se->timeout = 0; | ||
362 | |||
363 | dequeue_rt_stack(p); | ||
364 | |||
365 | /* | ||
366 | * enqueue everybody, bottom - up. | ||
367 | */ | ||
368 | for_each_sched_rt_entity(rt_se) | ||
369 | enqueue_rt_entity(rt_se); | ||
370 | |||
371 | inc_cpu_load(rq, p->se.load.weight); | ||
372 | } | ||
373 | |||
40 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 374 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
41 | { | 375 | { |
42 | struct rt_prio_array *array = &rq->rt.active; | 376 | struct sched_rt_entity *rt_se = &p->rt; |
377 | struct rt_rq *rt_rq; | ||
43 | 378 | ||
44 | update_curr_rt(rq); | 379 | update_curr_rt(rq); |
45 | 380 | ||
46 | list_del(&p->run_list); | 381 | dequeue_rt_stack(p); |
47 | if (list_empty(array->queue + p->prio)) | 382 | |
48 | __clear_bit(p->prio, array->bitmap); | 383 | /* |
384 | * re-enqueue all non-empty rt_rq entities. | ||
385 | */ | ||
386 | for_each_sched_rt_entity(rt_se) { | ||
387 | rt_rq = group_rt_rq(rt_se); | ||
388 | if (rt_rq && rt_rq->rt_nr_running) | ||
389 | enqueue_rt_entity(rt_se); | ||
390 | } | ||
391 | |||
392 | dec_cpu_load(rq, p->se.load.weight); | ||
49 | } | 393 | } |
50 | 394 | ||
51 | /* | 395 | /* |
52 | * Put task to the end of the run list without the overhead of dequeue | 396 | * Put task to the end of the run list without the overhead of dequeue |
53 | * followed by enqueue. | 397 | * followed by enqueue. |
54 | */ | 398 | */ |
399 | static | ||
400 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | ||
401 | { | ||
402 | struct rt_prio_array *array = &rt_rq->active; | ||
403 | |||
404 | list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | ||
405 | } | ||
406 | |||
55 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 407 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) |
56 | { | 408 | { |
57 | struct rt_prio_array *array = &rq->rt.active; | 409 | struct sched_rt_entity *rt_se = &p->rt; |
410 | struct rt_rq *rt_rq; | ||
58 | 411 | ||
59 | list_move_tail(&p->run_list, array->queue + p->prio); | 412 | for_each_sched_rt_entity(rt_se) { |
413 | rt_rq = rt_rq_of_se(rt_se); | ||
414 | requeue_rt_entity(rt_rq, rt_se); | ||
415 | } | ||
60 | } | 416 | } |
61 | 417 | ||
62 | static void | 418 | static void yield_task_rt(struct rq *rq) |
63 | yield_task_rt(struct rq *rq) | ||
64 | { | 419 | { |
65 | requeue_task_rt(rq, rq->curr); | 420 | requeue_task_rt(rq, rq->curr); |
66 | } | 421 | } |
67 | 422 | ||
423 | #ifdef CONFIG_SMP | ||
424 | static int find_lowest_rq(struct task_struct *task); | ||
425 | |||
426 | static int select_task_rq_rt(struct task_struct *p, int sync) | ||
427 | { | ||
428 | struct rq *rq = task_rq(p); | ||
429 | |||
430 | /* | ||
431 | * If the current task is an RT task, then | ||
432 | * try to see if we can wake this RT task up on another | ||
433 | * runqueue. Otherwise simply start this RT task | ||
434 | * on its current runqueue. | ||
435 | * | ||
436 | * We want to avoid overloading runqueues. Even if | ||
437 | * the RT task is of higher priority than the current RT task. | ||
438 | * RT tasks behave differently than other tasks. If | ||
439 | * one gets preempted, we try to push it off to another queue. | ||
440 | * So trying to keep a preempting RT task on the same | ||
441 | * cache hot CPU will force the running RT task to | ||
442 | * a cold CPU. So we waste all the cache for the lower | ||
443 | * RT task in hopes of saving some of a RT task | ||
444 | * that is just being woken and probably will have | ||
445 | * cold cache anyway. | ||
446 | */ | ||
447 | if (unlikely(rt_task(rq->curr)) && | ||
448 | (p->rt.nr_cpus_allowed > 1)) { | ||
449 | int cpu = find_lowest_rq(p); | ||
450 | |||
451 | return (cpu == -1) ? task_cpu(p) : cpu; | ||
452 | } | ||
453 | |||
454 | /* | ||
455 | * Otherwise, just let it ride on the affined RQ and the | ||
456 | * post-schedule router will push the preempted task away | ||
457 | */ | ||
458 | return task_cpu(p); | ||
459 | } | ||
460 | #endif /* CONFIG_SMP */ | ||
461 | |||
68 | /* | 462 | /* |
69 | * Preempt the current task with a newly woken task if needed: | 463 | * Preempt the current task with a newly woken task if needed: |
70 | */ | 464 | */ |
@@ -74,25 +468,48 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | |||
74 | resched_task(rq->curr); | 468 | resched_task(rq->curr); |
75 | } | 469 | } |
76 | 470 | ||
77 | static struct task_struct *pick_next_task_rt(struct rq *rq) | 471 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
472 | struct rt_rq *rt_rq) | ||
78 | { | 473 | { |
79 | struct rt_prio_array *array = &rq->rt.active; | 474 | struct rt_prio_array *array = &rt_rq->active; |
80 | struct task_struct *next; | 475 | struct sched_rt_entity *next = NULL; |
81 | struct list_head *queue; | 476 | struct list_head *queue; |
82 | int idx; | 477 | int idx; |
83 | 478 | ||
84 | idx = sched_find_first_bit(array->bitmap); | 479 | idx = sched_find_first_bit(array->bitmap); |
85 | if (idx >= MAX_RT_PRIO) | 480 | BUG_ON(idx >= MAX_RT_PRIO); |
86 | return NULL; | ||
87 | 481 | ||
88 | queue = array->queue + idx; | 482 | queue = array->queue + idx; |
89 | next = list_entry(queue->next, struct task_struct, run_list); | 483 | next = list_entry(queue->next, struct sched_rt_entity, run_list); |
90 | |||
91 | next->se.exec_start = rq->clock; | ||
92 | 484 | ||
93 | return next; | 485 | return next; |
94 | } | 486 | } |
95 | 487 | ||
488 | static struct task_struct *pick_next_task_rt(struct rq *rq) | ||
489 | { | ||
490 | struct sched_rt_entity *rt_se; | ||
491 | struct task_struct *p; | ||
492 | struct rt_rq *rt_rq; | ||
493 | |||
494 | rt_rq = &rq->rt; | ||
495 | |||
496 | if (unlikely(!rt_rq->rt_nr_running)) | ||
497 | return NULL; | ||
498 | |||
499 | if (sched_rt_ratio_exceeded(rt_rq)) | ||
500 | return NULL; | ||
501 | |||
502 | do { | ||
503 | rt_se = pick_next_rt_entity(rq, rt_rq); | ||
504 | BUG_ON(!rt_se); | ||
505 | rt_rq = group_rt_rq(rt_se); | ||
506 | } while (rt_rq); | ||
507 | |||
508 | p = rt_task_of(rt_se); | ||
509 | p->se.exec_start = rq->clock; | ||
510 | return p; | ||
511 | } | ||
512 | |||
96 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | 513 | static void put_prev_task_rt(struct rq *rq, struct task_struct *p) |
97 | { | 514 | { |
98 | update_curr_rt(rq); | 515 | update_curr_rt(rq); |
@@ -100,76 +517,448 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
100 | } | 517 | } |
101 | 518 | ||
102 | #ifdef CONFIG_SMP | 519 | #ifdef CONFIG_SMP |
103 | /* | 520 | |
104 | * Load-balancing iterator. Note: while the runqueue stays locked | 521 | /* Only try algorithms three times */ |
105 | * during the whole iteration, the current task might be | 522 | #define RT_MAX_TRIES 3 |
106 | * dequeued so the iterator has to be dequeue-safe. Here we | 523 | |
107 | * achieve that by always pre-iterating before returning | 524 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest); |
108 | * the current task: | 525 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); |
109 | */ | 526 | |
110 | static struct task_struct *load_balance_start_rt(void *arg) | 527 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
111 | { | 528 | { |
112 | struct rq *rq = arg; | 529 | if (!task_running(rq, p) && |
113 | struct rt_prio_array *array = &rq->rt.active; | 530 | (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && |
114 | struct list_head *head, *curr; | 531 | (p->rt.nr_cpus_allowed > 1)) |
115 | struct task_struct *p; | 532 | return 1; |
533 | return 0; | ||
534 | } | ||
535 | |||
536 | /* Return the second highest RT task, NULL otherwise */ | ||
537 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | ||
538 | { | ||
539 | struct task_struct *next = NULL; | ||
540 | struct sched_rt_entity *rt_se; | ||
541 | struct rt_prio_array *array; | ||
542 | struct rt_rq *rt_rq; | ||
116 | int idx; | 543 | int idx; |
117 | 544 | ||
118 | idx = sched_find_first_bit(array->bitmap); | 545 | for_each_leaf_rt_rq(rt_rq, rq) { |
119 | if (idx >= MAX_RT_PRIO) | 546 | array = &rt_rq->active; |
120 | return NULL; | 547 | idx = sched_find_first_bit(array->bitmap); |
548 | next_idx: | ||
549 | if (idx >= MAX_RT_PRIO) | ||
550 | continue; | ||
551 | if (next && next->prio < idx) | ||
552 | continue; | ||
553 | list_for_each_entry(rt_se, array->queue + idx, run_list) { | ||
554 | struct task_struct *p = rt_task_of(rt_se); | ||
555 | if (pick_rt_task(rq, p, cpu)) { | ||
556 | next = p; | ||
557 | break; | ||
558 | } | ||
559 | } | ||
560 | if (!next) { | ||
561 | idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); | ||
562 | goto next_idx; | ||
563 | } | ||
564 | } | ||
121 | 565 | ||
122 | head = array->queue + idx; | 566 | return next; |
123 | curr = head->prev; | 567 | } |
124 | 568 | ||
125 | p = list_entry(curr, struct task_struct, run_list); | 569 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
126 | 570 | ||
127 | curr = curr->prev; | 571 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) |
572 | { | ||
573 | int lowest_prio = -1; | ||
574 | int lowest_cpu = -1; | ||
575 | int count = 0; | ||
576 | int cpu; | ||
128 | 577 | ||
129 | rq->rt.rt_load_balance_idx = idx; | 578 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); |
130 | rq->rt.rt_load_balance_head = head; | ||
131 | rq->rt.rt_load_balance_curr = curr; | ||
132 | 579 | ||
133 | return p; | 580 | /* |
581 | * Scan each rq for the lowest prio. | ||
582 | */ | ||
583 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
584 | struct rq *rq = cpu_rq(cpu); | ||
585 | |||
586 | /* We look for lowest RT prio or non-rt CPU */ | ||
587 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
588 | /* | ||
589 | * if we already found a low RT queue | ||
590 | * and now we found this non-rt queue | ||
591 | * clear the mask and set our bit. | ||
592 | * Otherwise just return the queue as is | ||
593 | * and the count==1 will cause the algorithm | ||
594 | * to use the first bit found. | ||
595 | */ | ||
596 | if (lowest_cpu != -1) { | ||
597 | cpus_clear(*lowest_mask); | ||
598 | cpu_set(rq->cpu, *lowest_mask); | ||
599 | } | ||
600 | return 1; | ||
601 | } | ||
602 | |||
603 | /* no locking for now */ | ||
604 | if ((rq->rt.highest_prio > task->prio) | ||
605 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
606 | if (rq->rt.highest_prio > lowest_prio) { | ||
607 | /* new low - clear old data */ | ||
608 | lowest_prio = rq->rt.highest_prio; | ||
609 | lowest_cpu = cpu; | ||
610 | count = 0; | ||
611 | } | ||
612 | count++; | ||
613 | } else | ||
614 | cpu_clear(cpu, *lowest_mask); | ||
615 | } | ||
616 | |||
617 | /* | ||
618 | * Clear out all the set bits that represent | ||
619 | * runqueues that were of higher prio than | ||
620 | * the lowest_prio. | ||
621 | */ | ||
622 | if (lowest_cpu > 0) { | ||
623 | /* | ||
624 | * Perhaps we could add another cpumask op to | ||
625 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
626 | * Then that could be optimized to use memset and such. | ||
627 | */ | ||
628 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
629 | if (cpu >= lowest_cpu) | ||
630 | break; | ||
631 | cpu_clear(cpu, *lowest_mask); | ||
632 | } | ||
633 | } | ||
634 | |||
635 | return count; | ||
134 | } | 636 | } |
135 | 637 | ||
136 | static struct task_struct *load_balance_next_rt(void *arg) | 638 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
137 | { | 639 | { |
138 | struct rq *rq = arg; | 640 | int first; |
139 | struct rt_prio_array *array = &rq->rt.active; | 641 | |
140 | struct list_head *head, *curr; | 642 | /* "this_cpu" is cheaper to preempt than a remote processor */ |
141 | struct task_struct *p; | 643 | if ((this_cpu != -1) && cpu_isset(this_cpu, *mask)) |
142 | int idx; | 644 | return this_cpu; |
645 | |||
646 | first = first_cpu(*mask); | ||
647 | if (first != NR_CPUS) | ||
648 | return first; | ||
649 | |||
650 | return -1; | ||
651 | } | ||
652 | |||
653 | static int find_lowest_rq(struct task_struct *task) | ||
654 | { | ||
655 | struct sched_domain *sd; | ||
656 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | ||
657 | int this_cpu = smp_processor_id(); | ||
658 | int cpu = task_cpu(task); | ||
659 | int count = find_lowest_cpus(task, lowest_mask); | ||
143 | 660 | ||
144 | idx = rq->rt.rt_load_balance_idx; | 661 | if (!count) |
145 | head = rq->rt.rt_load_balance_head; | 662 | return -1; /* No targets found */ |
146 | curr = rq->rt.rt_load_balance_curr; | ||
147 | 663 | ||
148 | /* | 664 | /* |
149 | * If we arrived back to the head again then | 665 | * There is no sense in performing an optimal search if only one |
150 | * iterate to the next queue (if any): | 666 | * target is found. |
151 | */ | 667 | */ |
152 | if (unlikely(head == curr)) { | 668 | if (count == 1) |
153 | int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); | 669 | return first_cpu(*lowest_mask); |
154 | 670 | ||
155 | if (next_idx >= MAX_RT_PRIO) | 671 | /* |
156 | return NULL; | 672 | * At this point we have built a mask of cpus representing the |
673 | * lowest priority tasks in the system. Now we want to elect | ||
674 | * the best one based on our affinity and topology. | ||
675 | * | ||
676 | * We prioritize the last cpu that the task executed on since | ||
677 | * it is most likely cache-hot in that location. | ||
678 | */ | ||
679 | if (cpu_isset(cpu, *lowest_mask)) | ||
680 | return cpu; | ||
681 | |||
682 | /* | ||
683 | * Otherwise, we consult the sched_domains span maps to figure | ||
684 | * out which cpu is logically closest to our hot cache data. | ||
685 | */ | ||
686 | if (this_cpu == cpu) | ||
687 | this_cpu = -1; /* Skip this_cpu opt if the same */ | ||
688 | |||
689 | for_each_domain(cpu, sd) { | ||
690 | if (sd->flags & SD_WAKE_AFFINE) { | ||
691 | cpumask_t domain_mask; | ||
692 | int best_cpu; | ||
157 | 693 | ||
158 | idx = next_idx; | 694 | cpus_and(domain_mask, sd->span, *lowest_mask); |
159 | head = array->queue + idx; | ||
160 | curr = head->prev; | ||
161 | 695 | ||
162 | rq->rt.rt_load_balance_idx = idx; | 696 | best_cpu = pick_optimal_cpu(this_cpu, |
163 | rq->rt.rt_load_balance_head = head; | 697 | &domain_mask); |
698 | if (best_cpu != -1) | ||
699 | return best_cpu; | ||
700 | } | ||
164 | } | 701 | } |
165 | 702 | ||
166 | p = list_entry(curr, struct task_struct, run_list); | 703 | /* |
704 | * And finally, if there were no matches within the domains | ||
705 | * just give the caller *something* to work with from the compatible | ||
706 | * locations. | ||
707 | */ | ||
708 | return pick_optimal_cpu(this_cpu, lowest_mask); | ||
709 | } | ||
167 | 710 | ||
168 | curr = curr->prev; | 711 | /* Will lock the rq it finds */ |
712 | static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | ||
713 | { | ||
714 | struct rq *lowest_rq = NULL; | ||
715 | int tries; | ||
716 | int cpu; | ||
169 | 717 | ||
170 | rq->rt.rt_load_balance_curr = curr; | 718 | for (tries = 0; tries < RT_MAX_TRIES; tries++) { |
719 | cpu = find_lowest_rq(task); | ||
171 | 720 | ||
172 | return p; | 721 | if ((cpu == -1) || (cpu == rq->cpu)) |
722 | break; | ||
723 | |||
724 | lowest_rq = cpu_rq(cpu); | ||
725 | |||
726 | /* if the prio of this runqueue changed, try again */ | ||
727 | if (double_lock_balance(rq, lowest_rq)) { | ||
728 | /* | ||
729 | * We had to unlock the run queue. In | ||
730 | * the mean time, task could have | ||
731 | * migrated already or had its affinity changed. | ||
732 | * Also make sure that it wasn't scheduled on its rq. | ||
733 | */ | ||
734 | if (unlikely(task_rq(task) != rq || | ||
735 | !cpu_isset(lowest_rq->cpu, | ||
736 | task->cpus_allowed) || | ||
737 | task_running(rq, task) || | ||
738 | !task->se.on_rq)) { | ||
739 | |||
740 | spin_unlock(&lowest_rq->lock); | ||
741 | lowest_rq = NULL; | ||
742 | break; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | /* If this rq is still suitable use it. */ | ||
747 | if (lowest_rq->rt.highest_prio > task->prio) | ||
748 | break; | ||
749 | |||
750 | /* try again */ | ||
751 | spin_unlock(&lowest_rq->lock); | ||
752 | lowest_rq = NULL; | ||
753 | } | ||
754 | |||
755 | return lowest_rq; | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * If the current CPU has more than one RT task, see if the non | ||
760 | * running task can migrate over to a CPU that is running a task | ||
761 | * of lesser priority. | ||
762 | */ | ||
763 | static int push_rt_task(struct rq *rq) | ||
764 | { | ||
765 | struct task_struct *next_task; | ||
766 | struct rq *lowest_rq; | ||
767 | int ret = 0; | ||
768 | int paranoid = RT_MAX_TRIES; | ||
769 | |||
770 | if (!rq->rt.overloaded) | ||
771 | return 0; | ||
772 | |||
773 | next_task = pick_next_highest_task_rt(rq, -1); | ||
774 | if (!next_task) | ||
775 | return 0; | ||
776 | |||
777 | retry: | ||
778 | if (unlikely(next_task == rq->curr)) { | ||
779 | WARN_ON(1); | ||
780 | return 0; | ||
781 | } | ||
782 | |||
783 | /* | ||
784 | * It's possible that the next_task slipped in of | ||
785 | * higher priority than current. If that's the case | ||
786 | * just reschedule current. | ||
787 | */ | ||
788 | if (unlikely(next_task->prio < rq->curr->prio)) { | ||
789 | resched_task(rq->curr); | ||
790 | return 0; | ||
791 | } | ||
792 | |||
793 | /* We might release rq lock */ | ||
794 | get_task_struct(next_task); | ||
795 | |||
796 | /* find_lock_lowest_rq locks the rq if found */ | ||
797 | lowest_rq = find_lock_lowest_rq(next_task, rq); | ||
798 | if (!lowest_rq) { | ||
799 | struct task_struct *task; | ||
800 | /* | ||
801 | * find lock_lowest_rq releases rq->lock | ||
802 | * so it is possible that next_task has changed. | ||
803 | * If it has, then try again. | ||
804 | */ | ||
805 | task = pick_next_highest_task_rt(rq, -1); | ||
806 | if (unlikely(task != next_task) && task && paranoid--) { | ||
807 | put_task_struct(next_task); | ||
808 | next_task = task; | ||
809 | goto retry; | ||
810 | } | ||
811 | goto out; | ||
812 | } | ||
813 | |||
814 | deactivate_task(rq, next_task, 0); | ||
815 | set_task_cpu(next_task, lowest_rq->cpu); | ||
816 | activate_task(lowest_rq, next_task, 0); | ||
817 | |||
818 | resched_task(lowest_rq->curr); | ||
819 | |||
820 | spin_unlock(&lowest_rq->lock); | ||
821 | |||
822 | ret = 1; | ||
823 | out: | ||
824 | put_task_struct(next_task); | ||
825 | |||
826 | return ret; | ||
827 | } | ||
828 | |||
829 | /* | ||
830 | * TODO: Currently we just use the second highest prio task on | ||
831 | * the queue, and stop when it can't migrate (or there's | ||
832 | * no more RT tasks). There may be a case where a lower | ||
833 | * priority RT task has a different affinity than the | ||
834 | * higher RT task. In this case the lower RT task could | ||
835 | * possibly be able to migrate where as the higher priority | ||
836 | * RT task could not. We currently ignore this issue. | ||
837 | * Enhancements are welcome! | ||
838 | */ | ||
839 | static void push_rt_tasks(struct rq *rq) | ||
840 | { | ||
841 | /* push_rt_task will return true if it moved an RT */ | ||
842 | while (push_rt_task(rq)) | ||
843 | ; | ||
844 | } | ||
845 | |||
846 | static int pull_rt_task(struct rq *this_rq) | ||
847 | { | ||
848 | int this_cpu = this_rq->cpu, ret = 0, cpu; | ||
849 | struct task_struct *p, *next; | ||
850 | struct rq *src_rq; | ||
851 | |||
852 | if (likely(!rt_overloaded(this_rq))) | ||
853 | return 0; | ||
854 | |||
855 | next = pick_next_task_rt(this_rq); | ||
856 | |||
857 | for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { | ||
858 | if (this_cpu == cpu) | ||
859 | continue; | ||
860 | |||
861 | src_rq = cpu_rq(cpu); | ||
862 | /* | ||
863 | * We can potentially drop this_rq's lock in | ||
864 | * double_lock_balance, and another CPU could | ||
865 | * steal our next task - hence we must cause | ||
866 | * the caller to recalculate the next task | ||
867 | * in that case: | ||
868 | */ | ||
869 | if (double_lock_balance(this_rq, src_rq)) { | ||
870 | struct task_struct *old_next = next; | ||
871 | |||
872 | next = pick_next_task_rt(this_rq); | ||
873 | if (next != old_next) | ||
874 | ret = 1; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * Are there still pullable RT tasks? | ||
879 | */ | ||
880 | if (src_rq->rt.rt_nr_running <= 1) | ||
881 | goto skip; | ||
882 | |||
883 | p = pick_next_highest_task_rt(src_rq, this_cpu); | ||
884 | |||
885 | /* | ||
886 | * Do we have an RT task that preempts | ||
887 | * the to-be-scheduled task? | ||
888 | */ | ||
889 | if (p && (!next || (p->prio < next->prio))) { | ||
890 | WARN_ON(p == src_rq->curr); | ||
891 | WARN_ON(!p->se.on_rq); | ||
892 | |||
893 | /* | ||
894 | * There's a chance that p is higher in priority | ||
895 | * than what's currently running on its cpu. | ||
896 | * This is just that p is wakeing up and hasn't | ||
897 | * had a chance to schedule. We only pull | ||
898 | * p if it is lower in priority than the | ||
899 | * current task on the run queue or | ||
900 | * this_rq next task is lower in prio than | ||
901 | * the current task on that rq. | ||
902 | */ | ||
903 | if (p->prio < src_rq->curr->prio || | ||
904 | (next && next->prio < src_rq->curr->prio)) | ||
905 | goto skip; | ||
906 | |||
907 | ret = 1; | ||
908 | |||
909 | deactivate_task(src_rq, p, 0); | ||
910 | set_task_cpu(p, this_cpu); | ||
911 | activate_task(this_rq, p, 0); | ||
912 | /* | ||
913 | * We continue with the search, just in | ||
914 | * case there's an even higher prio task | ||
915 | * in another runqueue. (low likelyhood | ||
916 | * but possible) | ||
917 | * | ||
918 | * Update next so that we won't pick a task | ||
919 | * on another cpu with a priority lower (or equal) | ||
920 | * than the one we just picked. | ||
921 | */ | ||
922 | next = p; | ||
923 | |||
924 | } | ||
925 | skip: | ||
926 | spin_unlock(&src_rq->lock); | ||
927 | } | ||
928 | |||
929 | return ret; | ||
930 | } | ||
931 | |||
932 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) | ||
933 | { | ||
934 | /* Try to pull RT tasks here if we lower this rq's prio */ | ||
935 | if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio) | ||
936 | pull_rt_task(rq); | ||
937 | } | ||
938 | |||
939 | static void post_schedule_rt(struct rq *rq) | ||
940 | { | ||
941 | /* | ||
942 | * If we have more than one rt_task queued, then | ||
943 | * see if we can push the other rt_tasks off to other CPUS. | ||
944 | * Note we may release the rq lock, and since | ||
945 | * the lock was owned by prev, we need to release it | ||
946 | * first via finish_lock_switch and then reaquire it here. | ||
947 | */ | ||
948 | if (unlikely(rq->rt.overloaded)) { | ||
949 | spin_lock_irq(&rq->lock); | ||
950 | push_rt_tasks(rq); | ||
951 | spin_unlock_irq(&rq->lock); | ||
952 | } | ||
953 | } | ||
954 | |||
955 | |||
956 | static void task_wake_up_rt(struct rq *rq, struct task_struct *p) | ||
957 | { | ||
958 | if (!task_running(rq, p) && | ||
959 | (p->prio >= rq->rt.highest_prio) && | ||
960 | rq->rt.overloaded) | ||
961 | push_rt_tasks(rq); | ||
173 | } | 962 | } |
174 | 963 | ||
175 | static unsigned long | 964 | static unsigned long |
@@ -178,38 +967,170 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
178 | struct sched_domain *sd, enum cpu_idle_type idle, | 967 | struct sched_domain *sd, enum cpu_idle_type idle, |
179 | int *all_pinned, int *this_best_prio) | 968 | int *all_pinned, int *this_best_prio) |
180 | { | 969 | { |
181 | struct rq_iterator rt_rq_iterator; | 970 | /* don't touch RT tasks */ |
182 | 971 | return 0; | |
183 | rt_rq_iterator.start = load_balance_start_rt; | ||
184 | rt_rq_iterator.next = load_balance_next_rt; | ||
185 | /* pass 'busiest' rq argument into | ||
186 | * load_balance_[start|next]_rt iterators | ||
187 | */ | ||
188 | rt_rq_iterator.arg = busiest; | ||
189 | |||
190 | return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd, | ||
191 | idle, all_pinned, this_best_prio, &rt_rq_iterator); | ||
192 | } | 972 | } |
193 | 973 | ||
194 | static int | 974 | static int |
195 | move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | 975 | move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, |
196 | struct sched_domain *sd, enum cpu_idle_type idle) | 976 | struct sched_domain *sd, enum cpu_idle_type idle) |
197 | { | 977 | { |
198 | struct rq_iterator rt_rq_iterator; | 978 | /* don't touch RT tasks */ |
979 | return 0; | ||
980 | } | ||
981 | |||
982 | static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask) | ||
983 | { | ||
984 | int weight = cpus_weight(*new_mask); | ||
985 | |||
986 | BUG_ON(!rt_task(p)); | ||
199 | 987 | ||
200 | rt_rq_iterator.start = load_balance_start_rt; | 988 | /* |
201 | rt_rq_iterator.next = load_balance_next_rt; | 989 | * Update the migration status of the RQ if we have an RT task |
202 | rt_rq_iterator.arg = busiest; | 990 | * which is running AND changing its weight value. |
991 | */ | ||
992 | if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { | ||
993 | struct rq *rq = task_rq(p); | ||
994 | |||
995 | if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) { | ||
996 | rq->rt.rt_nr_migratory++; | ||
997 | } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) { | ||
998 | BUG_ON(!rq->rt.rt_nr_migratory); | ||
999 | rq->rt.rt_nr_migratory--; | ||
1000 | } | ||
1001 | |||
1002 | update_rt_migration(rq); | ||
1003 | } | ||
203 | 1004 | ||
204 | return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, | 1005 | p->cpus_allowed = *new_mask; |
205 | &rt_rq_iterator); | 1006 | p->rt.nr_cpus_allowed = weight; |
206 | } | 1007 | } |
207 | #endif | ||
208 | 1008 | ||
209 | static void task_tick_rt(struct rq *rq, struct task_struct *p) | 1009 | /* Assumes rq->lock is held */ |
1010 | static void join_domain_rt(struct rq *rq) | ||
1011 | { | ||
1012 | if (rq->rt.overloaded) | ||
1013 | rt_set_overload(rq); | ||
1014 | } | ||
1015 | |||
1016 | /* Assumes rq->lock is held */ | ||
1017 | static void leave_domain_rt(struct rq *rq) | ||
1018 | { | ||
1019 | if (rq->rt.overloaded) | ||
1020 | rt_clear_overload(rq); | ||
1021 | } | ||
1022 | |||
1023 | /* | ||
1024 | * When switch from the rt queue, we bring ourselves to a position | ||
1025 | * that we might want to pull RT tasks from other runqueues. | ||
1026 | */ | ||
1027 | static void switched_from_rt(struct rq *rq, struct task_struct *p, | ||
1028 | int running) | ||
1029 | { | ||
1030 | /* | ||
1031 | * If there are other RT tasks then we will reschedule | ||
1032 | * and the scheduling of the other RT tasks will handle | ||
1033 | * the balancing. But if we are the last RT task | ||
1034 | * we may need to handle the pulling of RT tasks | ||
1035 | * now. | ||
1036 | */ | ||
1037 | if (!rq->rt.rt_nr_running) | ||
1038 | pull_rt_task(rq); | ||
1039 | } | ||
1040 | #endif /* CONFIG_SMP */ | ||
1041 | |||
1042 | /* | ||
1043 | * When switching a task to RT, we may overload the runqueue | ||
1044 | * with RT tasks. In this case we try to push them off to | ||
1045 | * other runqueues. | ||
1046 | */ | ||
1047 | static void switched_to_rt(struct rq *rq, struct task_struct *p, | ||
1048 | int running) | ||
1049 | { | ||
1050 | int check_resched = 1; | ||
1051 | |||
1052 | /* | ||
1053 | * If we are already running, then there's nothing | ||
1054 | * that needs to be done. But if we are not running | ||
1055 | * we may need to preempt the current running task. | ||
1056 | * If that current running task is also an RT task | ||
1057 | * then see if we can move to another run queue. | ||
1058 | */ | ||
1059 | if (!running) { | ||
1060 | #ifdef CONFIG_SMP | ||
1061 | if (rq->rt.overloaded && push_rt_task(rq) && | ||
1062 | /* Don't resched if we changed runqueues */ | ||
1063 | rq != task_rq(p)) | ||
1064 | check_resched = 0; | ||
1065 | #endif /* CONFIG_SMP */ | ||
1066 | if (check_resched && p->prio < rq->curr->prio) | ||
1067 | resched_task(rq->curr); | ||
1068 | } | ||
1069 | } | ||
1070 | |||
1071 | /* | ||
1072 | * Priority of the task has changed. This may cause | ||
1073 | * us to initiate a push or pull. | ||
1074 | */ | ||
1075 | static void prio_changed_rt(struct rq *rq, struct task_struct *p, | ||
1076 | int oldprio, int running) | ||
1077 | { | ||
1078 | if (running) { | ||
1079 | #ifdef CONFIG_SMP | ||
1080 | /* | ||
1081 | * If our priority decreases while running, we | ||
1082 | * may need to pull tasks to this runqueue. | ||
1083 | */ | ||
1084 | if (oldprio < p->prio) | ||
1085 | pull_rt_task(rq); | ||
1086 | /* | ||
1087 | * If there's a higher priority task waiting to run | ||
1088 | * then reschedule. | ||
1089 | */ | ||
1090 | if (p->prio > rq->rt.highest_prio) | ||
1091 | resched_task(p); | ||
1092 | #else | ||
1093 | /* For UP simply resched on drop of prio */ | ||
1094 | if (oldprio < p->prio) | ||
1095 | resched_task(p); | ||
1096 | #endif /* CONFIG_SMP */ | ||
1097 | } else { | ||
1098 | /* | ||
1099 | * This task is not running, but if it is | ||
1100 | * greater than the current running task | ||
1101 | * then reschedule. | ||
1102 | */ | ||
1103 | if (p->prio < rq->curr->prio) | ||
1104 | resched_task(rq->curr); | ||
1105 | } | ||
1106 | } | ||
1107 | |||
1108 | static void watchdog(struct rq *rq, struct task_struct *p) | ||
1109 | { | ||
1110 | unsigned long soft, hard; | ||
1111 | |||
1112 | if (!p->signal) | ||
1113 | return; | ||
1114 | |||
1115 | soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur; | ||
1116 | hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max; | ||
1117 | |||
1118 | if (soft != RLIM_INFINITY) { | ||
1119 | unsigned long next; | ||
1120 | |||
1121 | p->rt.timeout++; | ||
1122 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | ||
1123 | if (p->rt.timeout > next) | ||
1124 | p->it_sched_expires = p->se.sum_exec_runtime; | ||
1125 | } | ||
1126 | } | ||
1127 | |||
1128 | static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | ||
210 | { | 1129 | { |
211 | update_curr_rt(rq); | 1130 | update_curr_rt(rq); |
212 | 1131 | ||
1132 | watchdog(rq, p); | ||
1133 | |||
213 | /* | 1134 | /* |
214 | * RR tasks need a special form of timeslice management. | 1135 | * RR tasks need a special form of timeslice management. |
215 | * FIFO tasks have no timeslices. | 1136 | * FIFO tasks have no timeslices. |
@@ -217,16 +1138,16 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p) | |||
217 | if (p->policy != SCHED_RR) | 1138 | if (p->policy != SCHED_RR) |
218 | return; | 1139 | return; |
219 | 1140 | ||
220 | if (--p->time_slice) | 1141 | if (--p->rt.time_slice) |
221 | return; | 1142 | return; |
222 | 1143 | ||
223 | p->time_slice = DEF_TIMESLICE; | 1144 | p->rt.time_slice = DEF_TIMESLICE; |
224 | 1145 | ||
225 | /* | 1146 | /* |
226 | * Requeue to the end of queue if we are not the only element | 1147 | * Requeue to the end of queue if we are not the only element |
227 | * on the queue: | 1148 | * on the queue: |
228 | */ | 1149 | */ |
229 | if (p->run_list.prev != p->run_list.next) { | 1150 | if (p->rt.run_list.prev != p->rt.run_list.next) { |
230 | requeue_task_rt(rq, p); | 1151 | requeue_task_rt(rq, p); |
231 | set_tsk_need_resched(p); | 1152 | set_tsk_need_resched(p); |
232 | } | 1153 | } |
@@ -244,6 +1165,9 @@ const struct sched_class rt_sched_class = { | |||
244 | .enqueue_task = enqueue_task_rt, | 1165 | .enqueue_task = enqueue_task_rt, |
245 | .dequeue_task = dequeue_task_rt, | 1166 | .dequeue_task = dequeue_task_rt, |
246 | .yield_task = yield_task_rt, | 1167 | .yield_task = yield_task_rt, |
1168 | #ifdef CONFIG_SMP | ||
1169 | .select_task_rq = select_task_rq_rt, | ||
1170 | #endif /* CONFIG_SMP */ | ||
247 | 1171 | ||
248 | .check_preempt_curr = check_preempt_curr_rt, | 1172 | .check_preempt_curr = check_preempt_curr_rt, |
249 | 1173 | ||
@@ -253,8 +1177,18 @@ const struct sched_class rt_sched_class = { | |||
253 | #ifdef CONFIG_SMP | 1177 | #ifdef CONFIG_SMP |
254 | .load_balance = load_balance_rt, | 1178 | .load_balance = load_balance_rt, |
255 | .move_one_task = move_one_task_rt, | 1179 | .move_one_task = move_one_task_rt, |
1180 | .set_cpus_allowed = set_cpus_allowed_rt, | ||
1181 | .join_domain = join_domain_rt, | ||
1182 | .leave_domain = leave_domain_rt, | ||
1183 | .pre_schedule = pre_schedule_rt, | ||
1184 | .post_schedule = post_schedule_rt, | ||
1185 | .task_wake_up = task_wake_up_rt, | ||
1186 | .switched_from = switched_from_rt, | ||
256 | #endif | 1187 | #endif |
257 | 1188 | ||
258 | .set_curr_task = set_curr_task_rt, | 1189 | .set_curr_task = set_curr_task_rt, |
259 | .task_tick = task_tick_rt, | 1190 | .task_tick = task_tick_rt, |
1191 | |||
1192 | .prio_changed = prio_changed_rt, | ||
1193 | .switched_to = switched_to_rt, | ||
260 | }; | 1194 | }; |