aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/sched_rt.c225
2 files changed, 231 insertions, 2 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6185fa080ec8..97cab609fc31 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1952,6 +1952,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
1952 prev_state = prev->state; 1952 prev_state = prev->state;
1953 finish_arch_switch(prev); 1953 finish_arch_switch(prev);
1954 finish_lock_switch(rq, prev); 1954 finish_lock_switch(rq, prev);
1955 schedule_tail_balance_rt(rq);
1956
1955 fire_sched_in_preempt_notifiers(current); 1957 fire_sched_in_preempt_notifiers(current);
1956 if (mm) 1958 if (mm)
1957 mmdrop(mm); 1959 mmdrop(mm);
@@ -2185,11 +2187,13 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
2185/* 2187/*
2186 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 2188 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
2187 */ 2189 */
2188static void double_lock_balance(struct rq *this_rq, struct rq *busiest) 2190static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2189 __releases(this_rq->lock) 2191 __releases(this_rq->lock)
2190 __acquires(busiest->lock) 2192 __acquires(busiest->lock)
2191 __acquires(this_rq->lock) 2193 __acquires(this_rq->lock)
2192{ 2194{
2195 int ret = 0;
2196
2193 if (unlikely(!irqs_disabled())) { 2197 if (unlikely(!irqs_disabled())) {
2194 /* printk() doesn't work good under rq->lock */ 2198 /* printk() doesn't work good under rq->lock */
2195 spin_unlock(&this_rq->lock); 2199 spin_unlock(&this_rq->lock);
@@ -2200,9 +2204,11 @@ static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
2200 spin_unlock(&this_rq->lock); 2204 spin_unlock(&this_rq->lock);
2201 spin_lock(&busiest->lock); 2205 spin_lock(&busiest->lock);
2202 spin_lock(&this_rq->lock); 2206 spin_lock(&this_rq->lock);
2207 ret = 1;
2203 } else 2208 } else
2204 spin_lock(&busiest->lock); 2209 spin_lock(&busiest->lock);
2205 } 2210 }
2211 return ret;
2206} 2212}
2207 2213
2208/* 2214/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 136c2857a049..7815e90b1147 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -137,6 +137,227 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
137} 137}
138 138
139#ifdef CONFIG_SMP 139#ifdef CONFIG_SMP
140/* Only try algorithms three times */
141#define RT_MAX_TRIES 3
142
143static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
144static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
145
146/* Return the second highest RT task, NULL otherwise */
147static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
148{
149 struct rt_prio_array *array = &rq->rt.active;
150 struct task_struct *next;
151 struct list_head *queue;
152 int idx;
153
154 assert_spin_locked(&rq->lock);
155
156 if (likely(rq->rt.rt_nr_running < 2))
157 return NULL;
158
159 idx = sched_find_first_bit(array->bitmap);
160 if (unlikely(idx >= MAX_RT_PRIO)) {
161 WARN_ON(1); /* rt_nr_running is bad */
162 return NULL;
163 }
164
165 queue = array->queue + idx;
166 next = list_entry(queue->next, struct task_struct, run_list);
167 if (unlikely(next != rq->curr))
168 return next;
169
170 if (queue->next->next != queue) {
171 /* same prio task */
172 next = list_entry(queue->next->next, struct task_struct, run_list);
173 return next;
174 }
175
176 /* slower, but more flexible */
177 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
178 if (unlikely(idx >= MAX_RT_PRIO)) {
179 WARN_ON(1); /* rt_nr_running was 2 and above! */
180 return NULL;
181 }
182
183 queue = array->queue + idx;
184 next = list_entry(queue->next, struct task_struct, run_list);
185
186 return next;
187}
188
189static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
190
191/* Will lock the rq it finds */
192static struct rq *find_lock_lowest_rq(struct task_struct *task,
193 struct rq *this_rq)
194{
195 struct rq *lowest_rq = NULL;
196 int cpu;
197 int tries;
198 cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask);
199
200 cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed);
201
202 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
203 /*
204 * Scan each rq for the lowest prio.
205 */
206 for_each_cpu_mask(cpu, *cpu_mask) {
207 struct rq *rq = &per_cpu(runqueues, cpu);
208
209 if (cpu == this_rq->cpu)
210 continue;
211
212 /* We look for lowest RT prio or non-rt CPU */
213 if (rq->rt.highest_prio >= MAX_RT_PRIO) {
214 lowest_rq = rq;
215 break;
216 }
217
218 /* no locking for now */
219 if (rq->rt.highest_prio > task->prio &&
220 (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) {
221 lowest_rq = rq;
222 }
223 }
224
225 if (!lowest_rq)
226 break;
227
228 /* if the prio of this runqueue changed, try again */
229 if (double_lock_balance(this_rq, lowest_rq)) {
230 /*
231 * We had to unlock the run queue. In
232 * the mean time, task could have
233 * migrated already or had its affinity changed.
234 * Also make sure that it wasn't scheduled on its rq.
235 */
236 if (unlikely(task_rq(task) != this_rq ||
237 !cpu_isset(lowest_rq->cpu, task->cpus_allowed) ||
238 task_running(this_rq, task) ||
239 !task->se.on_rq)) {
240 spin_unlock(&lowest_rq->lock);
241 lowest_rq = NULL;
242 break;
243 }
244 }
245
246 /* If this rq is still suitable use it. */
247 if (lowest_rq->rt.highest_prio > task->prio)
248 break;
249
250 /* try again */
251 spin_unlock(&lowest_rq->lock);
252 lowest_rq = NULL;
253 }
254
255 return lowest_rq;
256}
257
258/*
259 * If the current CPU has more than one RT task, see if the non
260 * running task can migrate over to a CPU that is running a task
261 * of lesser priority.
262 */
263static int push_rt_task(struct rq *this_rq)
264{
265 struct task_struct *next_task;
266 struct rq *lowest_rq;
267 int ret = 0;
268 int paranoid = RT_MAX_TRIES;
269
270 assert_spin_locked(&this_rq->lock);
271
272 next_task = pick_next_highest_task_rt(this_rq);
273 if (!next_task)
274 return 0;
275
276 retry:
277 if (unlikely(next_task == this_rq->curr))
278 return 0;
279
280 /*
281 * It's possible that the next_task slipped in of
282 * higher priority than current. If that's the case
283 * just reschedule current.
284 */
285 if (unlikely(next_task->prio < this_rq->curr->prio)) {
286 resched_task(this_rq->curr);
287 return 0;
288 }
289
290 /* We might release this_rq lock */
291 get_task_struct(next_task);
292
293 /* find_lock_lowest_rq locks the rq if found */
294 lowest_rq = find_lock_lowest_rq(next_task, this_rq);
295 if (!lowest_rq) {
296 struct task_struct *task;
297 /*
298 * find lock_lowest_rq releases this_rq->lock
299 * so it is possible that next_task has changed.
300 * If it has, then try again.
301 */
302 task = pick_next_highest_task_rt(this_rq);
303 if (unlikely(task != next_task) && task && paranoid--) {
304 put_task_struct(next_task);
305 next_task = task;
306 goto retry;
307 }
308 goto out;
309 }
310
311 assert_spin_locked(&lowest_rq->lock);
312
313 deactivate_task(this_rq, next_task, 0);
314 set_task_cpu(next_task, lowest_rq->cpu);
315 activate_task(lowest_rq, next_task, 0);
316
317 resched_task(lowest_rq->curr);
318
319 spin_unlock(&lowest_rq->lock);
320
321 ret = 1;
322out:
323 put_task_struct(next_task);
324
325 return ret;
326}
327
328/*
329 * TODO: Currently we just use the second highest prio task on
330 * the queue, and stop when it can't migrate (or there's
331 * no more RT tasks). There may be a case where a lower
332 * priority RT task has a different affinity than the
333 * higher RT task. In this case the lower RT task could
334 * possibly be able to migrate where as the higher priority
335 * RT task could not. We currently ignore this issue.
336 * Enhancements are welcome!
337 */
338static void push_rt_tasks(struct rq *rq)
339{
340 /* push_rt_task will return true if it moved an RT */
341 while (push_rt_task(rq))
342 ;
343}
344
345static void schedule_tail_balance_rt(struct rq *rq)
346{
347 /*
348 * If we have more than one rt_task queued, then
349 * see if we can push the other rt_tasks off to other CPUS.
350 * Note we may release the rq lock, and since
351 * the lock was owned by prev, we need to release it
352 * first via finish_lock_switch and then reaquire it here.
353 */
354 if (unlikely(rq->rt.rt_nr_running > 1)) {
355 spin_lock_irq(&rq->lock);
356 push_rt_tasks(rq);
357 spin_unlock_irq(&rq->lock);
358 }
359}
360
140/* 361/*
141 * Load-balancing iterator. Note: while the runqueue stays locked 362 * Load-balancing iterator. Note: while the runqueue stays locked
142 * during the whole iteration, the current task might be 363 * during the whole iteration, the current task might be
@@ -241,7 +462,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
241 return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, 462 return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
242 &rt_rq_iterator); 463 &rt_rq_iterator);
243} 464}
244#endif 465#else /* CONFIG_SMP */
466# define schedule_tail_balance_rt(rq) do { } while (0)
467#endif /* CONFIG_SMP */
245 468
246static void task_tick_rt(struct rq *rq, struct task_struct *p) 469static void task_tick_rt(struct rq *rq, struct task_struct *p)
247{ 470{