diff options
-rw-r--r-- | kernel/sched.c | 8 | ||||
-rw-r--r-- | kernel/sched_rt.c | 225 |
2 files changed, 231 insertions, 2 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 6185fa080ec8..97cab609fc31 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1952,6 +1952,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
1952 | prev_state = prev->state; | 1952 | prev_state = prev->state; |
1953 | finish_arch_switch(prev); | 1953 | finish_arch_switch(prev); |
1954 | finish_lock_switch(rq, prev); | 1954 | finish_lock_switch(rq, prev); |
1955 | schedule_tail_balance_rt(rq); | ||
1956 | |||
1955 | fire_sched_in_preempt_notifiers(current); | 1957 | fire_sched_in_preempt_notifiers(current); |
1956 | if (mm) | 1958 | if (mm) |
1957 | mmdrop(mm); | 1959 | mmdrop(mm); |
@@ -2185,11 +2187,13 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
2185 | /* | 2187 | /* |
2186 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | 2188 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. |
2187 | */ | 2189 | */ |
2188 | static void double_lock_balance(struct rq *this_rq, struct rq *busiest) | 2190 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) |
2189 | __releases(this_rq->lock) | 2191 | __releases(this_rq->lock) |
2190 | __acquires(busiest->lock) | 2192 | __acquires(busiest->lock) |
2191 | __acquires(this_rq->lock) | 2193 | __acquires(this_rq->lock) |
2192 | { | 2194 | { |
2195 | int ret = 0; | ||
2196 | |||
2193 | if (unlikely(!irqs_disabled())) { | 2197 | if (unlikely(!irqs_disabled())) { |
2194 | /* printk() doesn't work good under rq->lock */ | 2198 | /* printk() doesn't work good under rq->lock */ |
2195 | spin_unlock(&this_rq->lock); | 2199 | spin_unlock(&this_rq->lock); |
@@ -2200,9 +2204,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
2200 | spin_unlock(&this_rq->lock); | 2204 | spin_unlock(&this_rq->lock); |
2201 | spin_lock(&busiest->lock); | 2205 | spin_lock(&busiest->lock); |
2202 | spin_lock(&this_rq->lock); | 2206 | spin_lock(&this_rq->lock); |
2207 | ret = 1; | ||
2203 | } else | 2208 | } else |
2204 | spin_lock(&busiest->lock); | 2209 | spin_lock(&busiest->lock); |
2205 | } | 2210 | } |
2211 | return ret; | ||
2206 | } | 2212 | } |
2207 | 2213 | ||
2208 | /* | 2214 | /* |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 136c2857a049..7815e90b1147 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -137,6 +137,227 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
137 | } | 137 | } |
138 | 138 | ||
139 | #ifdef CONFIG_SMP | 139 | #ifdef CONFIG_SMP |
140 | /* Only try algorithms three times */ | ||
141 | #define RT_MAX_TRIES 3 | ||
142 | |||
143 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest); | ||
144 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); | ||
145 | |||
146 | /* Return the second highest RT task, NULL otherwise */ | ||
147 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq) | ||
148 | { | ||
149 | struct rt_prio_array *array = &rq->rt.active; | ||
150 | struct task_struct *next; | ||
151 | struct list_head *queue; | ||
152 | int idx; | ||
153 | |||
154 | assert_spin_locked(&rq->lock); | ||
155 | |||
156 | if (likely(rq->rt.rt_nr_running < 2)) | ||
157 | return NULL; | ||
158 | |||
159 | idx = sched_find_first_bit(array->bitmap); | ||
160 | if (unlikely(idx >= MAX_RT_PRIO)) { | ||
161 | WARN_ON(1); /* rt_nr_running is bad */ | ||
162 | return NULL; | ||
163 | } | ||
164 | |||
165 | queue = array->queue + idx; | ||
166 | next = list_entry(queue->next, struct task_struct, run_list); | ||
167 | if (unlikely(next != rq->curr)) | ||
168 | return next; | ||
169 | |||
170 | if (queue->next->next != queue) { | ||
171 | /* same prio task */ | ||
172 | next = list_entry(queue->next->next, struct task_struct, run_list); | ||
173 | return next; | ||
174 | } | ||
175 | |||
176 | /* slower, but more flexible */ | ||
177 | idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); | ||
178 | if (unlikely(idx >= MAX_RT_PRIO)) { | ||
179 | WARN_ON(1); /* rt_nr_running was 2 and above! */ | ||
180 | return NULL; | ||
181 | } | ||
182 | |||
183 | queue = array->queue + idx; | ||
184 | next = list_entry(queue->next, struct task_struct, run_list); | ||
185 | |||
186 | return next; | ||
187 | } | ||
188 | |||
189 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | ||
190 | |||
191 | /* Will lock the rq it finds */ | ||
192 | static struct rq *find_lock_lowest_rq(struct task_struct *task, | ||
193 | struct rq *this_rq) | ||
194 | { | ||
195 | struct rq *lowest_rq = NULL; | ||
196 | int cpu; | ||
197 | int tries; | ||
198 | cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask); | ||
199 | |||
200 | cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed); | ||
201 | |||
202 | for (tries = 0; tries < RT_MAX_TRIES; tries++) { | ||
203 | /* | ||
204 | * Scan each rq for the lowest prio. | ||
205 | */ | ||
206 | for_each_cpu_mask(cpu, *cpu_mask) { | ||
207 | struct rq *rq = &per_cpu(runqueues, cpu); | ||
208 | |||
209 | if (cpu == this_rq->cpu) | ||
210 | continue; | ||
211 | |||
212 | /* We look for lowest RT prio or non-rt CPU */ | ||
213 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
214 | lowest_rq = rq; | ||
215 | break; | ||
216 | } | ||
217 | |||
218 | /* no locking for now */ | ||
219 | if (rq->rt.highest_prio > task->prio && | ||
220 | (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) { | ||
221 | lowest_rq = rq; | ||
222 | } | ||
223 | } | ||
224 | |||
225 | if (!lowest_rq) | ||
226 | break; | ||
227 | |||
228 | /* if the prio of this runqueue changed, try again */ | ||
229 | if (double_lock_balance(this_rq, lowest_rq)) { | ||
230 | /* | ||
231 | * We had to unlock the run queue. In | ||
232 | * the mean time, task could have | ||
233 | * migrated already or had its affinity changed. | ||
234 | * Also make sure that it wasn't scheduled on its rq. | ||
235 | */ | ||
236 | if (unlikely(task_rq(task) != this_rq || | ||
237 | !cpu_isset(lowest_rq->cpu, task->cpus_allowed) || | ||
238 | task_running(this_rq, task) || | ||
239 | !task->se.on_rq)) { | ||
240 | spin_unlock(&lowest_rq->lock); | ||
241 | lowest_rq = NULL; | ||
242 | break; | ||
243 | } | ||
244 | } | ||
245 | |||
246 | /* If this rq is still suitable use it. */ | ||
247 | if (lowest_rq->rt.highest_prio > task->prio) | ||
248 | break; | ||
249 | |||
250 | /* try again */ | ||
251 | spin_unlock(&lowest_rq->lock); | ||
252 | lowest_rq = NULL; | ||
253 | } | ||
254 | |||
255 | return lowest_rq; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * If the current CPU has more than one RT task, see if the non | ||
260 | * running task can migrate over to a CPU that is running a task | ||
261 | * of lesser priority. | ||
262 | */ | ||
263 | static int push_rt_task(struct rq *this_rq) | ||
264 | { | ||
265 | struct task_struct *next_task; | ||
266 | struct rq *lowest_rq; | ||
267 | int ret = 0; | ||
268 | int paranoid = RT_MAX_TRIES; | ||
269 | |||
270 | assert_spin_locked(&this_rq->lock); | ||
271 | |||
272 | next_task = pick_next_highest_task_rt(this_rq); | ||
273 | if (!next_task) | ||
274 | return 0; | ||
275 | |||
276 | retry: | ||
277 | if (unlikely(next_task == this_rq->curr)) | ||
278 | return 0; | ||
279 | |||
280 | /* | ||
281 | * It's possible that the next_task slipped in of | ||
282 | * higher priority than current. If that's the case | ||
283 | * just reschedule current. | ||
284 | */ | ||
285 | if (unlikely(next_task->prio < this_rq->curr->prio)) { | ||
286 | resched_task(this_rq->curr); | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | /* We might release this_rq lock */ | ||
291 | get_task_struct(next_task); | ||
292 | |||
293 | /* find_lock_lowest_rq locks the rq if found */ | ||
294 | lowest_rq = find_lock_lowest_rq(next_task, this_rq); | ||
295 | if (!lowest_rq) { | ||
296 | struct task_struct *task; | ||
297 | /* | ||
298 | * find lock_lowest_rq releases this_rq->lock | ||
299 | * so it is possible that next_task has changed. | ||
300 | * If it has, then try again. | ||
301 | */ | ||
302 | task = pick_next_highest_task_rt(this_rq); | ||
303 | if (unlikely(task != next_task) && task && paranoid--) { | ||
304 | put_task_struct(next_task); | ||
305 | next_task = task; | ||
306 | goto retry; | ||
307 | } | ||
308 | goto out; | ||
309 | } | ||
310 | |||
311 | assert_spin_locked(&lowest_rq->lock); | ||
312 | |||
313 | deactivate_task(this_rq, next_task, 0); | ||
314 | set_task_cpu(next_task, lowest_rq->cpu); | ||
315 | activate_task(lowest_rq, next_task, 0); | ||
316 | |||
317 | resched_task(lowest_rq->curr); | ||
318 | |||
319 | spin_unlock(&lowest_rq->lock); | ||
320 | |||
321 | ret = 1; | ||
322 | out: | ||
323 | put_task_struct(next_task); | ||
324 | |||
325 | return ret; | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * TODO: Currently we just use the second highest prio task on | ||
330 | * the queue, and stop when it can't migrate (or there's | ||
331 | * no more RT tasks). There may be a case where a lower | ||
332 | * priority RT task has a different affinity than the | ||
333 | * higher RT task. In this case the lower RT task could | ||
334 | * possibly be able to migrate where as the higher priority | ||
335 | * RT task could not. We currently ignore this issue. | ||
336 | * Enhancements are welcome! | ||
337 | */ | ||
338 | static void push_rt_tasks(struct rq *rq) | ||
339 | { | ||
340 | /* push_rt_task will return true if it moved an RT */ | ||
341 | while (push_rt_task(rq)) | ||
342 | ; | ||
343 | } | ||
344 | |||
345 | static void schedule_tail_balance_rt(struct rq *rq) | ||
346 | { | ||
347 | /* | ||
348 | * If we have more than one rt_task queued, then | ||
349 | * see if we can push the other rt_tasks off to other CPUS. | ||
350 | * Note we may release the rq lock, and since | ||
351 | * the lock was owned by prev, we need to release it | ||
352 | * first via finish_lock_switch and then reaquire it here. | ||
353 | */ | ||
354 | if (unlikely(rq->rt.rt_nr_running > 1)) { | ||
355 | spin_lock_irq(&rq->lock); | ||
356 | push_rt_tasks(rq); | ||
357 | spin_unlock_irq(&rq->lock); | ||
358 | } | ||
359 | } | ||
360 | |||
140 | /* | 361 | /* |
141 | * Load-balancing iterator. Note: while the runqueue stays locked | 362 | * Load-balancing iterator. Note: while the runqueue stays locked |
142 | * during the whole iteration, the current task might be | 363 | * during the whole iteration, the current task might be |
@@ -241,7 +462,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
241 | return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, | 462 | return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, |
242 | &rt_rq_iterator); | 463 | &rt_rq_iterator); |
243 | } | 464 | } |
244 | #endif | 465 | #else /* CONFIG_SMP */ |
466 | # define schedule_tail_balance_rt(rq) do { } while (0) | ||
467 | #endif /* CONFIG_SMP */ | ||
245 | 468 | ||
246 | static void task_tick_rt(struct rq *rq, struct task_struct *p) | 469 | static void task_tick_rt(struct rq *rq, struct task_struct *p) |
247 | { | 470 | { |