aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c748
1 files changed, 403 insertions, 345 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index d5e37072ea54..4ee400f9d56b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/completion.h> 31#include <linux/completion.h>
32#include <linux/kernel_stat.h> 32#include <linux/kernel_stat.h>
33#include <linux/debug_locks.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/notifier.h> 35#include <linux/notifier.h>
35#include <linux/profile.h> 36#include <linux/profile.h>
@@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio)
178 return SCALE_PRIO(DEF_TIMESLICE, static_prio); 179 return SCALE_PRIO(DEF_TIMESLICE, static_prio);
179} 180}
180 181
181static inline unsigned int task_timeslice(task_t *p) 182static inline unsigned int task_timeslice(struct task_struct *p)
182{ 183{
183 return static_prio_timeslice(p->static_prio); 184 return static_prio_timeslice(p->static_prio);
184} 185}
185 186
186#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
187 < (long long) (sd)->cache_hot_time)
188
189/* 187/*
190 * These are the runqueue data structures: 188 * These are the runqueue data structures:
191 */ 189 */
192 190
193typedef struct runqueue runqueue_t;
194
195struct prio_array { 191struct prio_array {
196 unsigned int nr_active; 192 unsigned int nr_active;
197 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ 193 DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
@@ -205,7 +201,7 @@ struct prio_array {
205 * (such as the load balancing or the thread migration code), lock 201 * (such as the load balancing or the thread migration code), lock
206 * acquire operations must be ordered by ascending &runqueue. 202 * acquire operations must be ordered by ascending &runqueue.
207 */ 203 */
208struct runqueue { 204struct rq {
209 spinlock_t lock; 205 spinlock_t lock;
210 206
211 /* 207 /*
@@ -229,9 +225,9 @@ struct runqueue {
229 225
230 unsigned long expired_timestamp; 226 unsigned long expired_timestamp;
231 unsigned long long timestamp_last_tick; 227 unsigned long long timestamp_last_tick;
232 task_t *curr, *idle; 228 struct task_struct *curr, *idle;
233 struct mm_struct *prev_mm; 229 struct mm_struct *prev_mm;
234 prio_array_t *active, *expired, arrays[2]; 230 struct prio_array *active, *expired, arrays[2];
235 int best_expired_prio; 231 int best_expired_prio;
236 atomic_t nr_iowait; 232 atomic_t nr_iowait;
237 233
@@ -242,7 +238,7 @@ struct runqueue {
242 int active_balance; 238 int active_balance;
243 int push_cpu; 239 int push_cpu;
244 240
245 task_t *migration_thread; 241 struct task_struct *migration_thread;
246 struct list_head migration_queue; 242 struct list_head migration_queue;
247#endif 243#endif
248 244
@@ -265,9 +261,10 @@ struct runqueue {
265 unsigned long ttwu_cnt; 261 unsigned long ttwu_cnt;
266 unsigned long ttwu_local; 262 unsigned long ttwu_local;
267#endif 263#endif
264 struct lock_class_key rq_lock_key;
268}; 265};
269 266
270static DEFINE_PER_CPU(struct runqueue, runqueues); 267static DEFINE_PER_CPU(struct rq, runqueues);
271 268
272/* 269/*
273 * The domain tree (rq->sd) is protected by RCU's quiescent state transition. 270 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
276 * The domain tree of any CPU may only be accessed from within 273 * The domain tree of any CPU may only be accessed from within
277 * preempt-disabled sections. 274 * preempt-disabled sections.
278 */ 275 */
279#define for_each_domain(cpu, domain) \ 276#define for_each_domain(cpu, __sd) \
280for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) 277 for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
281 278
282#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) 279#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
283#define this_rq() (&__get_cpu_var(runqueues)) 280#define this_rq() (&__get_cpu_var(runqueues))
@@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
292#endif 289#endif
293 290
294#ifndef __ARCH_WANT_UNLOCKED_CTXSW 291#ifndef __ARCH_WANT_UNLOCKED_CTXSW
295static inline int task_running(runqueue_t *rq, task_t *p) 292static inline int task_running(struct rq *rq, struct task_struct *p)
296{ 293{
297 return rq->curr == p; 294 return rq->curr == p;
298} 295}
299 296
300static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 297static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
301{ 298{
302} 299}
303 300
304static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 301static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
305{ 302{
306#ifdef CONFIG_DEBUG_SPINLOCK 303#ifdef CONFIG_DEBUG_SPINLOCK
307 /* this is a valid case when another task releases the spinlock */ 304 /* this is a valid case when another task releases the spinlock */
308 rq->lock.owner = current; 305 rq->lock.owner = current;
309#endif 306#endif
307 /*
308 * If we are tracking spinlock dependencies then we have to
309 * fix up the runqueue lock - which gets 'carried over' from
310 * prev into current:
311 */
312 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
313
310 spin_unlock_irq(&rq->lock); 314 spin_unlock_irq(&rq->lock);
311} 315}
312 316
313#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 317#else /* __ARCH_WANT_UNLOCKED_CTXSW */
314static inline int task_running(runqueue_t *rq, task_t *p) 318static inline int task_running(struct rq *rq, struct task_struct *p)
315{ 319{
316#ifdef CONFIG_SMP 320#ifdef CONFIG_SMP
317 return p->oncpu; 321 return p->oncpu;
@@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)
320#endif 324#endif
321} 325}
322 326
323static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) 327static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
324{ 328{
325#ifdef CONFIG_SMP 329#ifdef CONFIG_SMP
326 /* 330 /*
@@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
337#endif 341#endif
338} 342}
339 343
340static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) 344static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
341{ 345{
342#ifdef CONFIG_SMP 346#ifdef CONFIG_SMP
343 /* 347 /*
@@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
358 * __task_rq_lock - lock the runqueue a given task resides on. 362 * __task_rq_lock - lock the runqueue a given task resides on.
359 * Must be called interrupts disabled. 363 * Must be called interrupts disabled.
360 */ 364 */
361static inline runqueue_t *__task_rq_lock(task_t *p) 365static inline struct rq *__task_rq_lock(struct task_struct *p)
362 __acquires(rq->lock) 366 __acquires(rq->lock)
363{ 367{
364 struct runqueue *rq; 368 struct rq *rq;
365 369
366repeat_lock_task: 370repeat_lock_task:
367 rq = task_rq(p); 371 rq = task_rq(p);
@@ -378,10 +382,10 @@ repeat_lock_task:
378 * interrupts. Note the ordering: we can safely lookup the task_rq without 382 * interrupts. Note the ordering: we can safely lookup the task_rq without
379 * explicitly disabling preemption. 383 * explicitly disabling preemption.
380 */ 384 */
381static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) 385static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
382 __acquires(rq->lock) 386 __acquires(rq->lock)
383{ 387{
384 struct runqueue *rq; 388 struct rq *rq;
385 389
386repeat_lock_task: 390repeat_lock_task:
387 local_irq_save(*flags); 391 local_irq_save(*flags);
@@ -394,13 +398,13 @@ repeat_lock_task:
394 return rq; 398 return rq;
395} 399}
396 400
397static inline void __task_rq_unlock(runqueue_t *rq) 401static inline void __task_rq_unlock(struct rq *rq)
398 __releases(rq->lock) 402 __releases(rq->lock)
399{ 403{
400 spin_unlock(&rq->lock); 404 spin_unlock(&rq->lock);
401} 405}
402 406
403static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) 407static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
404 __releases(rq->lock) 408 __releases(rq->lock)
405{ 409{
406 spin_unlock_irqrestore(&rq->lock, *flags); 410 spin_unlock_irqrestore(&rq->lock, *flags);
@@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
420 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); 424 seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
421 seq_printf(seq, "timestamp %lu\n", jiffies); 425 seq_printf(seq, "timestamp %lu\n", jiffies);
422 for_each_online_cpu(cpu) { 426 for_each_online_cpu(cpu) {
423 runqueue_t *rq = cpu_rq(cpu); 427 struct rq *rq = cpu_rq(cpu);
424#ifdef CONFIG_SMP 428#ifdef CONFIG_SMP
425 struct sched_domain *sd; 429 struct sched_domain *sd;
426 int dcnt = 0; 430 int dcnt = 0;
@@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = {
507/* 511/*
508 * rq_lock - lock a given runqueue and disable interrupts. 512 * rq_lock - lock a given runqueue and disable interrupts.
509 */ 513 */
510static inline runqueue_t *this_rq_lock(void) 514static inline struct rq *this_rq_lock(void)
511 __acquires(rq->lock) 515 __acquires(rq->lock)
512{ 516{
513 runqueue_t *rq; 517 struct rq *rq;
514 518
515 local_irq_disable(); 519 local_irq_disable();
516 rq = this_rq(); 520 rq = this_rq();
@@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void)
535 * long it was from the *first* time it was queued to the time that it 539 * long it was from the *first* time it was queued to the time that it
536 * finally hit a cpu. 540 * finally hit a cpu.
537 */ 541 */
538static inline void sched_info_dequeued(task_t *t) 542static inline void sched_info_dequeued(struct task_struct *t)
539{ 543{
540 t->sched_info.last_queued = 0; 544 t->sched_info.last_queued = 0;
541} 545}
@@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t)
545 * long it was waiting to run. We also note when it began so that we 549 * long it was waiting to run. We also note when it began so that we
546 * can keep stats on how long its timeslice is. 550 * can keep stats on how long its timeslice is.
547 */ 551 */
548static void sched_info_arrive(task_t *t) 552static void sched_info_arrive(struct task_struct *t)
549{ 553{
550 unsigned long now = jiffies, diff = 0; 554 unsigned long now = jiffies, diff = 0;
551 struct runqueue *rq = task_rq(t); 555 struct rq *rq = task_rq(t);
552 556
553 if (t->sched_info.last_queued) 557 if (t->sched_info.last_queued)
554 diff = now - t->sched_info.last_queued; 558 diff = now - t->sched_info.last_queued;
@@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t)
579 * the timestamp if it is already not set. It's assumed that 583 * the timestamp if it is already not set. It's assumed that
580 * sched_info_dequeued() will clear that stamp when appropriate. 584 * sched_info_dequeued() will clear that stamp when appropriate.
581 */ 585 */
582static inline void sched_info_queued(task_t *t) 586static inline void sched_info_queued(struct task_struct *t)
583{ 587{
584 if (!t->sched_info.last_queued) 588 if (!t->sched_info.last_queued)
585 t->sched_info.last_queued = jiffies; 589 t->sched_info.last_queued = jiffies;
@@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t)
589 * Called when a process ceases being the active-running process, either 593 * Called when a process ceases being the active-running process, either
590 * voluntarily or involuntarily. Now we can calculate how long we ran. 594 * voluntarily or involuntarily. Now we can calculate how long we ran.
591 */ 595 */
592static inline void sched_info_depart(task_t *t) 596static inline void sched_info_depart(struct task_struct *t)
593{ 597{
594 struct runqueue *rq = task_rq(t); 598 struct rq *rq = task_rq(t);
595 unsigned long diff = jiffies - t->sched_info.last_arrival; 599 unsigned long diff = jiffies - t->sched_info.last_arrival;
596 600
597 t->sched_info.cpu_time += diff; 601 t->sched_info.cpu_time += diff;
@@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t)
605 * their time slice. (This may also be called when switching to or from 609 * their time slice. (This may also be called when switching to or from
606 * the idle task.) We are only called when prev != next. 610 * the idle task.) We are only called when prev != next.
607 */ 611 */
608static inline void sched_info_switch(task_t *prev, task_t *next) 612static inline void
613sched_info_switch(struct task_struct *prev, struct task_struct *next)
609{ 614{
610 struct runqueue *rq = task_rq(prev); 615 struct rq *rq = task_rq(prev);
611 616
612 /* 617 /*
613 * prev now departs the cpu. It's not interesting to record 618 * prev now departs the cpu. It's not interesting to record
@@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next)
628/* 633/*
629 * Adding/removing a task to/from a priority array: 634 * Adding/removing a task to/from a priority array:
630 */ 635 */
631static void dequeue_task(struct task_struct *p, prio_array_t *array) 636static void dequeue_task(struct task_struct *p, struct prio_array *array)
632{ 637{
633 array->nr_active--; 638 array->nr_active--;
634 list_del(&p->run_list); 639 list_del(&p->run_list);
@@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
636 __clear_bit(p->prio, array->bitmap); 641 __clear_bit(p->prio, array->bitmap);
637} 642}
638 643
639static void enqueue_task(struct task_struct *p, prio_array_t *array) 644static void enqueue_task(struct task_struct *p, struct prio_array *array)
640{ 645{
641 sched_info_queued(p); 646 sched_info_queued(p);
642 list_add_tail(&p->run_list, array->queue + p->prio); 647 list_add_tail(&p->run_list, array->queue + p->prio);
@@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
649 * Put task to the end of the run list without the overhead of dequeue 654 * Put task to the end of the run list without the overhead of dequeue
650 * followed by enqueue. 655 * followed by enqueue.
651 */ 656 */
652static void requeue_task(struct task_struct *p, prio_array_t *array) 657static void requeue_task(struct task_struct *p, struct prio_array *array)
653{ 658{
654 list_move_tail(&p->run_list, array->queue + p->prio); 659 list_move_tail(&p->run_list, array->queue + p->prio);
655} 660}
656 661
657static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) 662static inline void
663enqueue_task_head(struct task_struct *p, struct prio_array *array)
658{ 664{
659 list_add(&p->run_list, array->queue + p->prio); 665 list_add(&p->run_list, array->queue + p->prio);
660 __set_bit(p->prio, array->bitmap); 666 __set_bit(p->prio, array->bitmap);
@@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
677 * Both properties are important to certain workloads. 683 * Both properties are important to certain workloads.
678 */ 684 */
679 685
680static inline int __normal_prio(task_t *p) 686static inline int __normal_prio(struct task_struct *p)
681{ 687{
682 int bonus, prio; 688 int bonus, prio;
683 689
@@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p)
713#define RTPRIO_TO_LOAD_WEIGHT(rp) \ 719#define RTPRIO_TO_LOAD_WEIGHT(rp) \
714 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) 720 (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
715 721
716static void set_load_weight(task_t *p) 722static void set_load_weight(struct task_struct *p)
717{ 723{
718 if (has_rt_policy(p)) { 724 if (has_rt_policy(p)) {
719#ifdef CONFIG_SMP 725#ifdef CONFIG_SMP
@@ -731,23 +737,25 @@ static void set_load_weight(task_t *p)
731 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); 737 p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
732} 738}
733 739
734static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) 740static inline void
741inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
735{ 742{
736 rq->raw_weighted_load += p->load_weight; 743 rq->raw_weighted_load += p->load_weight;
737} 744}
738 745
739static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) 746static inline void
747dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
740{ 748{
741 rq->raw_weighted_load -= p->load_weight; 749 rq->raw_weighted_load -= p->load_weight;
742} 750}
743 751
744static inline void inc_nr_running(task_t *p, runqueue_t *rq) 752static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
745{ 753{
746 rq->nr_running++; 754 rq->nr_running++;
747 inc_raw_weighted_load(rq, p); 755 inc_raw_weighted_load(rq, p);
748} 756}
749 757
750static inline void dec_nr_running(task_t *p, runqueue_t *rq) 758static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
751{ 759{
752 rq->nr_running--; 760 rq->nr_running--;
753 dec_raw_weighted_load(rq, p); 761 dec_raw_weighted_load(rq, p);
@@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)
760 * setprio syscalls, and whenever the interactivity 768 * setprio syscalls, and whenever the interactivity
761 * estimator recalculates. 769 * estimator recalculates.
762 */ 770 */
763static inline int normal_prio(task_t *p) 771static inline int normal_prio(struct task_struct *p)
764{ 772{
765 int prio; 773 int prio;
766 774
@@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p)
778 * interactivity modifiers. Will be RT if the task got 786 * interactivity modifiers. Will be RT if the task got
779 * RT-boosted. If not then it returns p->normal_prio. 787 * RT-boosted. If not then it returns p->normal_prio.
780 */ 788 */
781static int effective_prio(task_t *p) 789static int effective_prio(struct task_struct *p)
782{ 790{
783 p->normal_prio = normal_prio(p); 791 p->normal_prio = normal_prio(p);
784 /* 792 /*
@@ -794,9 +802,9 @@ static int effective_prio(task_t *p)
794/* 802/*
795 * __activate_task - move a task to the runqueue. 803 * __activate_task - move a task to the runqueue.
796 */ 804 */
797static void __activate_task(task_t *p, runqueue_t *rq) 805static void __activate_task(struct task_struct *p, struct rq *rq)
798{ 806{
799 prio_array_t *target = rq->active; 807 struct prio_array *target = rq->active;
800 808
801 if (batch_task(p)) 809 if (batch_task(p))
802 target = rq->expired; 810 target = rq->expired;
@@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)
807/* 815/*
808 * __activate_idle_task - move idle task to the _front_ of runqueue. 816 * __activate_idle_task - move idle task to the _front_ of runqueue.
809 */ 817 */
810static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 818static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
811{ 819{
812 enqueue_task_head(p, rq->active); 820 enqueue_task_head(p, rq->active);
813 inc_nr_running(p, rq); 821 inc_nr_running(p, rq);
@@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
817 * Recalculate p->normal_prio and p->prio after having slept, 825 * Recalculate p->normal_prio and p->prio after having slept,
818 * updating the sleep-average too: 826 * updating the sleep-average too:
819 */ 827 */
820static int recalc_task_prio(task_t *p, unsigned long long now) 828static int recalc_task_prio(struct task_struct *p, unsigned long long now)
821{ 829{
822 /* Caller must always ensure 'now >= p->timestamp' */ 830 /* Caller must always ensure 'now >= p->timestamp' */
823 unsigned long sleep_time = now - p->timestamp; 831 unsigned long sleep_time = now - p->timestamp;
@@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
889 * Update all the scheduling statistics stuff. (sleep average 897 * Update all the scheduling statistics stuff. (sleep average
890 * calculation, priority modifiers, etc.) 898 * calculation, priority modifiers, etc.)
891 */ 899 */
892static void activate_task(task_t *p, runqueue_t *rq, int local) 900static void activate_task(struct task_struct *p, struct rq *rq, int local)
893{ 901{
894 unsigned long long now; 902 unsigned long long now;
895 903
@@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
897#ifdef CONFIG_SMP 905#ifdef CONFIG_SMP
898 if (!local) { 906 if (!local) {
899 /* Compensate for drifting sched_clock */ 907 /* Compensate for drifting sched_clock */
900 runqueue_t *this_rq = this_rq(); 908 struct rq *this_rq = this_rq();
901 now = (now - this_rq->timestamp_last_tick) 909 now = (now - this_rq->timestamp_last_tick)
902 + rq->timestamp_last_tick; 910 + rq->timestamp_last_tick;
903 } 911 }
@@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
936/* 944/*
937 * deactivate_task - remove a task from the runqueue. 945 * deactivate_task - remove a task from the runqueue.
938 */ 946 */
939static void deactivate_task(struct task_struct *p, runqueue_t *rq) 947static void deactivate_task(struct task_struct *p, struct rq *rq)
940{ 948{
941 dec_nr_running(p, rq); 949 dec_nr_running(p, rq);
942 dequeue_task(p, p->array); 950 dequeue_task(p, p->array);
@@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
956#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 964#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
957#endif 965#endif
958 966
959static void resched_task(task_t *p) 967static void resched_task(struct task_struct *p)
960{ 968{
961 int cpu; 969 int cpu;
962 970
@@ -977,7 +985,7 @@ static void resched_task(task_t *p)
977 smp_send_reschedule(cpu); 985 smp_send_reschedule(cpu);
978} 986}
979#else 987#else
980static inline void resched_task(task_t *p) 988static inline void resched_task(struct task_struct *p)
981{ 989{
982 assert_spin_locked(&task_rq(p)->lock); 990 assert_spin_locked(&task_rq(p)->lock);
983 set_tsk_need_resched(p); 991 set_tsk_need_resched(p);
@@ -988,7 +996,7 @@ static inline void resched_task(task_t *p)
988 * task_curr - is this task currently executing on a CPU? 996 * task_curr - is this task currently executing on a CPU?
989 * @p: the task in question. 997 * @p: the task in question.
990 */ 998 */
991inline int task_curr(const task_t *p) 999inline int task_curr(const struct task_struct *p)
992{ 1000{
993 return cpu_curr(task_cpu(p)) == p; 1001 return cpu_curr(task_cpu(p)) == p;
994} 1002}
@@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)
1000} 1008}
1001 1009
1002#ifdef CONFIG_SMP 1010#ifdef CONFIG_SMP
1003typedef struct { 1011struct migration_req {
1004 struct list_head list; 1012 struct list_head list;
1005 1013
1006 task_t *task; 1014 struct task_struct *task;
1007 int dest_cpu; 1015 int dest_cpu;
1008 1016
1009 struct completion done; 1017 struct completion done;
1010} migration_req_t; 1018};
1011 1019
1012/* 1020/*
1013 * The task's runqueue lock must be held. 1021 * The task's runqueue lock must be held.
1014 * Returns true if you have to wait for migration thread. 1022 * Returns true if you have to wait for migration thread.
1015 */ 1023 */
1016static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) 1024static int
1025migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
1017{ 1026{
1018 runqueue_t *rq = task_rq(p); 1027 struct rq *rq = task_rq(p);
1019 1028
1020 /* 1029 /*
1021 * If the task is not on a runqueue (and not running), then 1030 * If the task is not on a runqueue (and not running), then
@@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1030 req->task = p; 1039 req->task = p;
1031 req->dest_cpu = dest_cpu; 1040 req->dest_cpu = dest_cpu;
1032 list_add(&req->list, &rq->migration_queue); 1041 list_add(&req->list, &rq->migration_queue);
1042
1033 return 1; 1043 return 1;
1034} 1044}
1035 1045
@@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
1042 * smp_call_function() if an IPI is sent by the same process we are 1052 * smp_call_function() if an IPI is sent by the same process we are
1043 * waiting to become inactive. 1053 * waiting to become inactive.
1044 */ 1054 */
1045void wait_task_inactive(task_t *p) 1055void wait_task_inactive(struct task_struct *p)
1046{ 1056{
1047 unsigned long flags; 1057 unsigned long flags;
1048 runqueue_t *rq; 1058 struct rq *rq;
1049 int preempted; 1059 int preempted;
1050 1060
1051repeat: 1061repeat:
@@ -1076,7 +1086,7 @@ repeat:
1076 * to another CPU then no harm is done and the purpose has been 1086 * to another CPU then no harm is done and the purpose has been
1077 * achieved as well. 1087 * achieved as well.
1078 */ 1088 */
1079void kick_process(task_t *p) 1089void kick_process(struct task_struct *p)
1080{ 1090{
1081 int cpu; 1091 int cpu;
1082 1092
@@ -1096,7 +1106,7 @@ void kick_process(task_t *p)
1096 */ 1106 */
1097static inline unsigned long source_load(int cpu, int type) 1107static inline unsigned long source_load(int cpu, int type)
1098{ 1108{
1099 runqueue_t *rq = cpu_rq(cpu); 1109 struct rq *rq = cpu_rq(cpu);
1100 1110
1101 if (type == 0) 1111 if (type == 0)
1102 return rq->raw_weighted_load; 1112 return rq->raw_weighted_load;
@@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)
1110 */ 1120 */
1111static inline unsigned long target_load(int cpu, int type) 1121static inline unsigned long target_load(int cpu, int type)
1112{ 1122{
1113 runqueue_t *rq = cpu_rq(cpu); 1123 struct rq *rq = cpu_rq(cpu);
1114 1124
1115 if (type == 0) 1125 if (type == 0)
1116 return rq->raw_weighted_load; 1126 return rq->raw_weighted_load;
@@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type)
1123 */ 1133 */
1124static inline unsigned long cpu_avg_load_per_task(int cpu) 1134static inline unsigned long cpu_avg_load_per_task(int cpu)
1125{ 1135{
1126 runqueue_t *rq = cpu_rq(cpu); 1136 struct rq *rq = cpu_rq(cpu);
1127 unsigned long n = rq->nr_running; 1137 unsigned long n = rq->nr_running;
1128 1138
1129 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE; 1139 return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
1130} 1140}
1131 1141
1132/* 1142/*
@@ -1279,7 +1289,7 @@ nextlevel:
1279 * Returns the CPU we should wake onto. 1289 * Returns the CPU we should wake onto.
1280 */ 1290 */
1281#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1291#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1282static int wake_idle(int cpu, task_t *p) 1292static int wake_idle(int cpu, struct task_struct *p)
1283{ 1293{
1284 cpumask_t tmp; 1294 cpumask_t tmp;
1285 struct sched_domain *sd; 1295 struct sched_domain *sd;
@@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p)
1302 return cpu; 1312 return cpu;
1303} 1313}
1304#else 1314#else
1305static inline int wake_idle(int cpu, task_t *p) 1315static inline int wake_idle(int cpu, struct task_struct *p)
1306{ 1316{
1307 return cpu; 1317 return cpu;
1308} 1318}
@@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p)
1322 * 1332 *
1323 * returns failure only if the task is already active. 1333 * returns failure only if the task is already active.
1324 */ 1334 */
1325static int try_to_wake_up(task_t *p, unsigned int state, int sync) 1335static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1326{ 1336{
1327 int cpu, this_cpu, success = 0; 1337 int cpu, this_cpu, success = 0;
1328 unsigned long flags; 1338 unsigned long flags;
1329 long old_state; 1339 long old_state;
1330 runqueue_t *rq; 1340 struct rq *rq;
1331#ifdef CONFIG_SMP 1341#ifdef CONFIG_SMP
1332 unsigned long load, this_load;
1333 struct sched_domain *sd, *this_sd = NULL; 1342 struct sched_domain *sd, *this_sd = NULL;
1343 unsigned long load, this_load;
1334 int new_cpu; 1344 int new_cpu;
1335#endif 1345#endif
1336 1346
@@ -1480,15 +1490,14 @@ out:
1480 return success; 1490 return success;
1481} 1491}
1482 1492
1483int fastcall wake_up_process(task_t *p) 1493int fastcall wake_up_process(struct task_struct *p)
1484{ 1494{
1485 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | 1495 return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
1486 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); 1496 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
1487} 1497}
1488
1489EXPORT_SYMBOL(wake_up_process); 1498EXPORT_SYMBOL(wake_up_process);
1490 1499
1491int fastcall wake_up_state(task_t *p, unsigned int state) 1500int fastcall wake_up_state(struct task_struct *p, unsigned int state)
1492{ 1501{
1493 return try_to_wake_up(p, state, 0); 1502 return try_to_wake_up(p, state, 0);
1494} 1503}
@@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
1497 * Perform scheduler related setup for a newly forked process p. 1506 * Perform scheduler related setup for a newly forked process p.
1498 * p is forked by current. 1507 * p is forked by current.
1499 */ 1508 */
1500void fastcall sched_fork(task_t *p, int clone_flags) 1509void fastcall sched_fork(struct task_struct *p, int clone_flags)
1501{ 1510{
1502 int cpu = get_cpu(); 1511 int cpu = get_cpu();
1503 1512
@@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1565 * that must be done for every newly created context, then puts the task 1574 * that must be done for every newly created context, then puts the task
1566 * on the runqueue and wakes it. 1575 * on the runqueue and wakes it.
1567 */ 1576 */
1568void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) 1577void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1569{ 1578{
1579 struct rq *rq, *this_rq;
1570 unsigned long flags; 1580 unsigned long flags;
1571 int this_cpu, cpu; 1581 int this_cpu, cpu;
1572 runqueue_t *rq, *this_rq;
1573 1582
1574 rq = task_rq_lock(p, &flags); 1583 rq = task_rq_lock(p, &flags);
1575 BUG_ON(p->state != TASK_RUNNING); 1584 BUG_ON(p->state != TASK_RUNNING);
@@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1649 * artificially, because any timeslice recovered here 1658 * artificially, because any timeslice recovered here
1650 * was given away by the parent in the first place.) 1659 * was given away by the parent in the first place.)
1651 */ 1660 */
1652void fastcall sched_exit(task_t *p) 1661void fastcall sched_exit(struct task_struct *p)
1653{ 1662{
1654 unsigned long flags; 1663 unsigned long flags;
1655 runqueue_t *rq; 1664 struct rq *rq;
1656 1665
1657 /* 1666 /*
1658 * If the child was a (relative-) CPU hog then decrease 1667 * If the child was a (relative-) CPU hog then decrease
@@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p)
1683 * prepare_task_switch sets up locking and calls architecture specific 1692 * prepare_task_switch sets up locking and calls architecture specific
1684 * hooks. 1693 * hooks.
1685 */ 1694 */
1686static inline void prepare_task_switch(runqueue_t *rq, task_t *next) 1695static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
1687{ 1696{
1688 prepare_lock_switch(rq, next); 1697 prepare_lock_switch(rq, next);
1689 prepare_arch_switch(next); 1698 prepare_arch_switch(next);
@@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
1704 * with the lock held can cause deadlocks; see schedule() for 1713 * with the lock held can cause deadlocks; see schedule() for
1705 * details.) 1714 * details.)
1706 */ 1715 */
1707static inline void finish_task_switch(runqueue_t *rq, task_t *prev) 1716static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1708 __releases(rq->lock) 1717 __releases(rq->lock)
1709{ 1718{
1710 struct mm_struct *mm = rq->prev_mm; 1719 struct mm_struct *mm = rq->prev_mm;
@@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1742 * schedule_tail - first thing a freshly forked thread must call. 1751 * schedule_tail - first thing a freshly forked thread must call.
1743 * @prev: the thread we just switched away from. 1752 * @prev: the thread we just switched away from.
1744 */ 1753 */
1745asmlinkage void schedule_tail(task_t *prev) 1754asmlinkage void schedule_tail(struct task_struct *prev)
1746 __releases(rq->lock) 1755 __releases(rq->lock)
1747{ 1756{
1748 runqueue_t *rq = this_rq(); 1757 struct rq *rq = this_rq();
1758
1749 finish_task_switch(rq, prev); 1759 finish_task_switch(rq, prev);
1750#ifdef __ARCH_WANT_UNLOCKED_CTXSW 1760#ifdef __ARCH_WANT_UNLOCKED_CTXSW
1751 /* In this case, finish_task_switch does not reenable preemption */ 1761 /* In this case, finish_task_switch does not reenable preemption */
@@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)
1759 * context_switch - switch to the new MM and the new 1769 * context_switch - switch to the new MM and the new
1760 * thread's register state. 1770 * thread's register state.
1761 */ 1771 */
1762static inline 1772static inline struct task_struct *
1763task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) 1773context_switch(struct rq *rq, struct task_struct *prev,
1774 struct task_struct *next)
1764{ 1775{
1765 struct mm_struct *mm = next->mm; 1776 struct mm_struct *mm = next->mm;
1766 struct mm_struct *oldmm = prev->active_mm; 1777 struct mm_struct *oldmm = prev->active_mm;
@@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
1777 WARN_ON(rq->prev_mm); 1788 WARN_ON(rq->prev_mm);
1778 rq->prev_mm = oldmm; 1789 rq->prev_mm = oldmm;
1779 } 1790 }
1791 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
1780 1792
1781 /* Here we just switch the register state and the stack. */ 1793 /* Here we just switch the register state and the stack. */
1782 switch_to(prev, next, prev); 1794 switch_to(prev, next, prev);
@@ -1857,12 +1869,21 @@ unsigned long nr_active(void)
1857#ifdef CONFIG_SMP 1869#ifdef CONFIG_SMP
1858 1870
1859/* 1871/*
1872 * Is this task likely cache-hot:
1873 */
1874static inline int
1875task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
1876{
1877 return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
1878}
1879
1880/*
1860 * double_rq_lock - safely lock two runqueues 1881 * double_rq_lock - safely lock two runqueues
1861 * 1882 *
1862 * Note this does not disable interrupts like task_rq_lock, 1883 * Note this does not disable interrupts like task_rq_lock,
1863 * you need to do so manually before calling. 1884 * you need to do so manually before calling.
1864 */ 1885 */
1865static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) 1886static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1866 __acquires(rq1->lock) 1887 __acquires(rq1->lock)
1867 __acquires(rq2->lock) 1888 __acquires(rq2->lock)
1868{ 1889{
@@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
1886 * Note this does not restore interrupts like task_rq_unlock, 1907 * Note this does not restore interrupts like task_rq_unlock,
1887 * you need to do so manually after calling. 1908 * you need to do so manually after calling.
1888 */ 1909 */
1889static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) 1910static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1890 __releases(rq1->lock) 1911 __releases(rq1->lock)
1891 __releases(rq2->lock) 1912 __releases(rq2->lock)
1892{ 1913{
@@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
1900/* 1921/*
1901 * double_lock_balance - lock the busiest runqueue, this_rq is locked already. 1922 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1902 */ 1923 */
1903static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) 1924static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
1904 __releases(this_rq->lock) 1925 __releases(this_rq->lock)
1905 __acquires(busiest->lock) 1926 __acquires(busiest->lock)
1906 __acquires(this_rq->lock) 1927 __acquires(this_rq->lock)
@@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
1921 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 1942 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
1922 * the cpu_allowed mask is restored. 1943 * the cpu_allowed mask is restored.
1923 */ 1944 */
1924static void sched_migrate_task(task_t *p, int dest_cpu) 1945static void sched_migrate_task(struct task_struct *p, int dest_cpu)
1925{ 1946{
1926 migration_req_t req; 1947 struct migration_req req;
1927 runqueue_t *rq;
1928 unsigned long flags; 1948 unsigned long flags;
1949 struct rq *rq;
1929 1950
1930 rq = task_rq_lock(p, &flags); 1951 rq = task_rq_lock(p, &flags);
1931 if (!cpu_isset(dest_cpu, p->cpus_allowed) 1952 if (!cpu_isset(dest_cpu, p->cpus_allowed)
@@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
1936 if (migrate_task(p, dest_cpu, &req)) { 1957 if (migrate_task(p, dest_cpu, &req)) {
1937 /* Need to wait for migration thread (might exit: take ref). */ 1958 /* Need to wait for migration thread (might exit: take ref). */
1938 struct task_struct *mt = rq->migration_thread; 1959 struct task_struct *mt = rq->migration_thread;
1960
1939 get_task_struct(mt); 1961 get_task_struct(mt);
1940 task_rq_unlock(rq, &flags); 1962 task_rq_unlock(rq, &flags);
1941 wake_up_process(mt); 1963 wake_up_process(mt);
1942 put_task_struct(mt); 1964 put_task_struct(mt);
1943 wait_for_completion(&req.done); 1965 wait_for_completion(&req.done);
1966
1944 return; 1967 return;
1945 } 1968 }
1946out: 1969out:
@@ -1964,9 +1987,9 @@ void sched_exec(void)
1964 * pull_task - move a task from a remote runqueue to the local runqueue. 1987 * pull_task - move a task from a remote runqueue to the local runqueue.
1965 * Both runqueues must be locked. 1988 * Both runqueues must be locked.
1966 */ 1989 */
1967static 1990static void pull_task(struct rq *src_rq, struct prio_array *src_array,
1968void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, 1991 struct task_struct *p, struct rq *this_rq,
1969 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1992 struct prio_array *this_array, int this_cpu)
1970{ 1993{
1971 dequeue_task(p, src_array); 1994 dequeue_task(p, src_array);
1972 dec_nr_running(p, src_rq); 1995 dec_nr_running(p, src_rq);
@@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1987 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? 2010 * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
1988 */ 2011 */
1989static 2012static
1990int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, 2013int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
1991 struct sched_domain *sd, enum idle_type idle, 2014 struct sched_domain *sd, enum idle_type idle,
1992 int *all_pinned) 2015 int *all_pinned)
1993{ 2016{
@@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2019} 2042}
2020 2043
2021#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) 2044#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
2045
2022/* 2046/*
2023 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2047 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
2024 * load from busiest to this_rq, as part of a balancing operation within 2048 * load from busiest to this_rq, as part of a balancing operation within
@@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
2026 * 2050 *
2027 * Called with both runqueues locked. 2051 * Called with both runqueues locked.
2028 */ 2052 */
2029static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, 2053static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2030 unsigned long max_nr_move, unsigned long max_load_move, 2054 unsigned long max_nr_move, unsigned long max_load_move,
2031 struct sched_domain *sd, enum idle_type idle, 2055 struct sched_domain *sd, enum idle_type idle,
2032 int *all_pinned) 2056 int *all_pinned)
2033{ 2057{
2034 prio_array_t *array, *dst_array; 2058 int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
2059 best_prio_seen, skip_for_load;
2060 struct prio_array *array, *dst_array;
2035 struct list_head *head, *curr; 2061 struct list_head *head, *curr;
2036 int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; 2062 struct task_struct *tmp;
2037 int busiest_best_prio_seen;
2038 int skip_for_load; /* skip the task based on weighted load issues */
2039 long rem_load_move; 2063 long rem_load_move;
2040 task_t *tmp;
2041 2064
2042 if (max_nr_move == 0 || max_load_move == 0) 2065 if (max_nr_move == 0 || max_load_move == 0)
2043 goto out; 2066 goto out;
@@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
2045 rem_load_move = max_load_move; 2068 rem_load_move = max_load_move;
2046 pinned = 1; 2069 pinned = 1;
2047 this_best_prio = rq_best_prio(this_rq); 2070 this_best_prio = rq_best_prio(this_rq);
2048 busiest_best_prio = rq_best_prio(busiest); 2071 best_prio = rq_best_prio(busiest);
2049 /* 2072 /*
2050 * Enable handling of the case where there is more than one task 2073 * Enable handling of the case where there is more than one task
2051 * with the best priority. If the current running task is one 2074 * with the best priority. If the current running task is one
2052 * of those with prio==busiest_best_prio we know it won't be moved 2075 * of those with prio==best_prio we know it won't be moved
2053 * and therefore it's safe to override the skip (based on load) of 2076 * and therefore it's safe to override the skip (based on load) of
2054 * any task we find with that prio. 2077 * any task we find with that prio.
2055 */ 2078 */
2056 busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; 2079 best_prio_seen = best_prio == busiest->curr->prio;
2057 2080
2058 /* 2081 /*
2059 * We first consider expired tasks. Those will likely not be 2082 * We first consider expired tasks. Those will likely not be
@@ -2089,7 +2112,7 @@ skip_bitmap:
2089 head = array->queue + idx; 2112 head = array->queue + idx;
2090 curr = head->prev; 2113 curr = head->prev;
2091skip_queue: 2114skip_queue:
2092 tmp = list_entry(curr, task_t, run_list); 2115 tmp = list_entry(curr, struct task_struct, run_list);
2093 2116
2094 curr = curr->prev; 2117 curr = curr->prev;
2095 2118
@@ -2100,10 +2123,11 @@ skip_queue:
2100 */ 2123 */
2101 skip_for_load = tmp->load_weight > rem_load_move; 2124 skip_for_load = tmp->load_weight > rem_load_move;
2102 if (skip_for_load && idx < this_best_prio) 2125 if (skip_for_load && idx < this_best_prio)
2103 skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; 2126 skip_for_load = !best_prio_seen && idx == best_prio;
2104 if (skip_for_load || 2127 if (skip_for_load ||
2105 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { 2128 !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
2106 busiest_best_prio_seen |= idx == busiest_best_prio; 2129
2130 best_prio_seen |= idx == best_prio;
2107 if (curr != head) 2131 if (curr != head)
2108 goto skip_queue; 2132 goto skip_queue;
2109 idx++; 2133 idx++;
@@ -2146,8 +2170,8 @@ out:
2146 2170
2147/* 2171/*
2148 * find_busiest_group finds and returns the busiest CPU group within the 2172 * find_busiest_group finds and returns the busiest CPU group within the
2149 * domain. It calculates and returns the amount of weighted load which should be 2173 * domain. It calculates and returns the amount of weighted load which
2150 * moved to restore balance via the imbalance parameter. 2174 * should be moved to restore balance via the imbalance parameter.
2151 */ 2175 */
2152static struct sched_group * 2176static struct sched_group *
2153find_busiest_group(struct sched_domain *sd, int this_cpu, 2177find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2188 sum_weighted_load = sum_nr_running = avg_load = 0; 2212 sum_weighted_load = sum_nr_running = avg_load = 0;
2189 2213
2190 for_each_cpu_mask(i, group->cpumask) { 2214 for_each_cpu_mask(i, group->cpumask) {
2191 runqueue_t *rq = cpu_rq(i); 2215 struct rq *rq = cpu_rq(i);
2192 2216
2193 if (*sd_idle && !idle_cpu(i)) 2217 if (*sd_idle && !idle_cpu(i))
2194 *sd_idle = 0; 2218 *sd_idle = 0;
@@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2269 * capacity but still has some space to pick up some load 2293 * capacity but still has some space to pick up some load
2270 * from other group and save more power 2294 * from other group and save more power
2271 */ 2295 */
2272 if (sum_nr_running <= group_capacity - 1) 2296 if (sum_nr_running <= group_capacity - 1) {
2273 if (sum_nr_running > leader_nr_running || 2297 if (sum_nr_running > leader_nr_running ||
2274 (sum_nr_running == leader_nr_running && 2298 (sum_nr_running == leader_nr_running &&
2275 first_cpu(group->cpumask) > 2299 first_cpu(group->cpumask) >
@@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2277 group_leader = group; 2301 group_leader = group;
2278 leader_nr_running = sum_nr_running; 2302 leader_nr_running = sum_nr_running;
2279 } 2303 }
2280 2304 }
2281group_next: 2305group_next:
2282#endif 2306#endif
2283 group = group->next; 2307 group = group->next;
@@ -2332,8 +2356,7 @@ group_next:
2332 * moved 2356 * moved
2333 */ 2357 */
2334 if (*imbalance < busiest_load_per_task) { 2358 if (*imbalance < busiest_load_per_task) {
2335 unsigned long pwr_now, pwr_move; 2359 unsigned long tmp, pwr_now, pwr_move;
2336 unsigned long tmp;
2337 unsigned int imbn; 2360 unsigned int imbn;
2338 2361
2339small_imbalance: 2362small_imbalance:
@@ -2405,22 +2428,23 @@ ret:
2405/* 2428/*
2406 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2429 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2407 */ 2430 */
2408static runqueue_t *find_busiest_queue(struct sched_group *group, 2431static struct rq *
2409 enum idle_type idle, unsigned long imbalance) 2432find_busiest_queue(struct sched_group *group, enum idle_type idle,
2433 unsigned long imbalance)
2410{ 2434{
2435 struct rq *busiest = NULL, *rq;
2411 unsigned long max_load = 0; 2436 unsigned long max_load = 0;
2412 runqueue_t *busiest = NULL, *rqi;
2413 int i; 2437 int i;
2414 2438
2415 for_each_cpu_mask(i, group->cpumask) { 2439 for_each_cpu_mask(i, group->cpumask) {
2416 rqi = cpu_rq(i); 2440 rq = cpu_rq(i);
2417 2441
2418 if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) 2442 if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
2419 continue; 2443 continue;
2420 2444
2421 if (rqi->raw_weighted_load > max_load) { 2445 if (rq->raw_weighted_load > max_load) {
2422 max_load = rqi->raw_weighted_load; 2446 max_load = rq->raw_weighted_load;
2423 busiest = rqi; 2447 busiest = rq;
2424 } 2448 }
2425 } 2449 }
2426 2450
@@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2433 */ 2457 */
2434#define MAX_PINNED_INTERVAL 512 2458#define MAX_PINNED_INTERVAL 512
2435 2459
2436#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) 2460static inline unsigned long minus_1_or_zero(unsigned long n)
2461{
2462 return n > 0 ? n - 1 : 0;
2463}
2464
2437/* 2465/*
2438 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2466 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2439 * tasks if there is an imbalance. 2467 * tasks if there is an imbalance.
2440 * 2468 *
2441 * Called with this_rq unlocked. 2469 * Called with this_rq unlocked.
2442 */ 2470 */
2443static int load_balance(int this_cpu, runqueue_t *this_rq, 2471static int load_balance(int this_cpu, struct rq *this_rq,
2444 struct sched_domain *sd, enum idle_type idle) 2472 struct sched_domain *sd, enum idle_type idle)
2445{ 2473{
2474 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2446 struct sched_group *group; 2475 struct sched_group *group;
2447 runqueue_t *busiest;
2448 unsigned long imbalance; 2476 unsigned long imbalance;
2449 int nr_moved, all_pinned = 0; 2477 struct rq *busiest;
2450 int active_balance = 0;
2451 int sd_idle = 0;
2452 2478
2453 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2479 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2454 !sched_smt_power_savings) 2480 !sched_smt_power_savings)
@@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2482 */ 2508 */
2483 double_rq_lock(this_rq, busiest); 2509 double_rq_lock(this_rq, busiest);
2484 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2510 nr_moved = move_tasks(this_rq, this_cpu, busiest,
2485 minus_1_or_zero(busiest->nr_running), 2511 minus_1_or_zero(busiest->nr_running),
2486 imbalance, sd, idle, &all_pinned); 2512 imbalance, sd, idle, &all_pinned);
2487 double_rq_unlock(this_rq, busiest); 2513 double_rq_unlock(this_rq, busiest);
2488 2514
2489 /* All tasks on this runqueue were pinned by CPU affinity */ 2515 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2556,7 +2582,8 @@ out_one_pinned:
2556 (sd->balance_interval < sd->max_interval)) 2582 (sd->balance_interval < sd->max_interval))
2557 sd->balance_interval *= 2; 2583 sd->balance_interval *= 2;
2558 2584
2559 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2585 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2586 !sched_smt_power_savings)
2560 return -1; 2587 return -1;
2561 return 0; 2588 return 0;
2562} 2589}
@@ -2568,11 +2595,11 @@ out_one_pinned:
2568 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). 2595 * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
2569 * this_rq is locked. 2596 * this_rq is locked.
2570 */ 2597 */
2571static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, 2598static int
2572 struct sched_domain *sd) 2599load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2573{ 2600{
2574 struct sched_group *group; 2601 struct sched_group *group;
2575 runqueue_t *busiest = NULL; 2602 struct rq *busiest = NULL;
2576 unsigned long imbalance; 2603 unsigned long imbalance;
2577 int nr_moved = 0; 2604 int nr_moved = 0;
2578 int sd_idle = 0; 2605 int sd_idle = 0;
@@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2618 2645
2619out_balanced: 2646out_balanced:
2620 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2647 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2621 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2648 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2649 !sched_smt_power_savings)
2622 return -1; 2650 return -1;
2623 sd->nr_balance_failed = 0; 2651 sd->nr_balance_failed = 0;
2652
2624 return 0; 2653 return 0;
2625} 2654}
2626 2655
@@ -2628,16 +2657,15 @@ out_balanced:
2628 * idle_balance is called by schedule() if this_cpu is about to become 2657 * idle_balance is called by schedule() if this_cpu is about to become
2629 * idle. Attempts to pull tasks from other CPUs. 2658 * idle. Attempts to pull tasks from other CPUs.
2630 */ 2659 */
2631static void idle_balance(int this_cpu, runqueue_t *this_rq) 2660static void idle_balance(int this_cpu, struct rq *this_rq)
2632{ 2661{
2633 struct sched_domain *sd; 2662 struct sched_domain *sd;
2634 2663
2635 for_each_domain(this_cpu, sd) { 2664 for_each_domain(this_cpu, sd) {
2636 if (sd->flags & SD_BALANCE_NEWIDLE) { 2665 if (sd->flags & SD_BALANCE_NEWIDLE) {
2637 if (load_balance_newidle(this_cpu, this_rq, sd)) { 2666 /* If we've pulled tasks over stop searching: */
2638 /* We've pulled tasks over so stop searching */ 2667 if (load_balance_newidle(this_cpu, this_rq, sd))
2639 break; 2668 break;
2640 }
2641 } 2669 }
2642 } 2670 }
2643} 2671}
@@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
2650 * 2678 *
2651 * Called with busiest_rq locked. 2679 * Called with busiest_rq locked.
2652 */ 2680 */
2653static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) 2681static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2654{ 2682{
2655 struct sched_domain *sd;
2656 runqueue_t *target_rq;
2657 int target_cpu = busiest_rq->push_cpu; 2683 int target_cpu = busiest_rq->push_cpu;
2684 struct sched_domain *sd;
2685 struct rq *target_rq;
2658 2686
2687 /* Is there any task to move? */
2659 if (busiest_rq->nr_running <= 1) 2688 if (busiest_rq->nr_running <= 1)
2660 /* no task to move */
2661 return; 2689 return;
2662 2690
2663 target_rq = cpu_rq(target_cpu); 2691 target_rq = cpu_rq(target_cpu);
@@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
2675 /* Search for an sd spanning us and the target CPU. */ 2703 /* Search for an sd spanning us and the target CPU. */
2676 for_each_domain(target_cpu, sd) { 2704 for_each_domain(target_cpu, sd) {
2677 if ((sd->flags & SD_LOAD_BALANCE) && 2705 if ((sd->flags & SD_LOAD_BALANCE) &&
2678 cpu_isset(busiest_cpu, sd->span)) 2706 cpu_isset(busiest_cpu, sd->span))
2679 break; 2707 break;
2680 } 2708 }
2681 2709
2682 if (unlikely(sd == NULL)) 2710 if (likely(sd)) {
2683 goto out; 2711 schedstat_inc(sd, alb_cnt);
2684
2685 schedstat_inc(sd, alb_cnt);
2686 2712
2687 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2713 if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
2688 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) 2714 RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
2689 schedstat_inc(sd, alb_pushed); 2715 NULL))
2690 else 2716 schedstat_inc(sd, alb_pushed);
2691 schedstat_inc(sd, alb_failed); 2717 else
2692out: 2718 schedstat_inc(sd, alb_failed);
2719 }
2693 spin_unlock(&target_rq->lock); 2720 spin_unlock(&target_rq->lock);
2694} 2721}
2695 2722
@@ -2702,23 +2729,27 @@ out:
2702 * Balancing parameters are set up in arch_init_sched_domains. 2729 * Balancing parameters are set up in arch_init_sched_domains.
2703 */ 2730 */
2704 2731
2705/* Don't have all balancing operations going off at once */ 2732/* Don't have all balancing operations going off at once: */
2706#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) 2733static inline unsigned long cpu_offset(int cpu)
2734{
2735 return jiffies + cpu * HZ / NR_CPUS;
2736}
2707 2737
2708static void rebalance_tick(int this_cpu, runqueue_t *this_rq, 2738static void
2709 enum idle_type idle) 2739rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
2710{ 2740{
2711 unsigned long old_load, this_load; 2741 unsigned long this_load, interval, j = cpu_offset(this_cpu);
2712 unsigned long j = jiffies + CPU_OFFSET(this_cpu);
2713 struct sched_domain *sd; 2742 struct sched_domain *sd;
2714 int i; 2743 int i, scale;
2715 2744
2716 this_load = this_rq->raw_weighted_load; 2745 this_load = this_rq->raw_weighted_load;
2717 /* Update our load */ 2746
2718 for (i = 0; i < 3; i++) { 2747 /* Update our load: */
2719 unsigned long new_load = this_load; 2748 for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
2720 int scale = 1 << i; 2749 unsigned long old_load, new_load;
2750
2721 old_load = this_rq->cpu_load[i]; 2751 old_load = this_rq->cpu_load[i];
2752 new_load = this_load;
2722 /* 2753 /*
2723 * Round up the averaging division if load is increasing. This 2754 * Round up the averaging division if load is increasing. This
2724 * prevents us from getting stuck on 9 if the load is 10, for 2755 * prevents us from getting stuck on 9 if the load is 10, for
@@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2730 } 2761 }
2731 2762
2732 for_each_domain(this_cpu, sd) { 2763 for_each_domain(this_cpu, sd) {
2733 unsigned long interval;
2734
2735 if (!(sd->flags & SD_LOAD_BALANCE)) 2764 if (!(sd->flags & SD_LOAD_BALANCE))
2736 continue; 2765 continue;
2737 2766
@@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
2761/* 2790/*
2762 * on UP we do not need to balance between CPUs: 2791 * on UP we do not need to balance between CPUs:
2763 */ 2792 */
2764static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) 2793static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
2765{ 2794{
2766} 2795}
2767static inline void idle_balance(int cpu, runqueue_t *rq) 2796static inline void idle_balance(int cpu, struct rq *rq)
2768{ 2797{
2769} 2798}
2770#endif 2799#endif
2771 2800
2772static inline int wake_priority_sleeper(runqueue_t *rq) 2801static inline int wake_priority_sleeper(struct rq *rq)
2773{ 2802{
2774 int ret = 0; 2803 int ret = 0;
2804
2775#ifdef CONFIG_SCHED_SMT 2805#ifdef CONFIG_SCHED_SMT
2776 spin_lock(&rq->lock); 2806 spin_lock(&rq->lock);
2777 /* 2807 /*
@@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2795 * This is called on clock ticks and on context switches. 2825 * This is called on clock ticks and on context switches.
2796 * Bank in p->sched_time the ns elapsed since the last tick or switch. 2826 * Bank in p->sched_time the ns elapsed since the last tick or switch.
2797 */ 2827 */
2798static inline void update_cpu_clock(task_t *p, runqueue_t *rq, 2828static inline void
2799 unsigned long long now) 2829update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
2800{ 2830{
2801 unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); 2831 p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
2802 p->sched_time += now - last;
2803} 2832}
2804 2833
2805/* 2834/*
2806 * Return current->sched_time plus any more ns on the sched_clock 2835 * Return current->sched_time plus any more ns on the sched_clock
2807 * that have not yet been banked. 2836 * that have not yet been banked.
2808 */ 2837 */
2809unsigned long long current_sched_time(const task_t *tsk) 2838unsigned long long current_sched_time(const struct task_struct *p)
2810{ 2839{
2811 unsigned long long ns; 2840 unsigned long long ns;
2812 unsigned long flags; 2841 unsigned long flags;
2842
2813 local_irq_save(flags); 2843 local_irq_save(flags);
2814 ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); 2844 ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
2815 ns = tsk->sched_time + (sched_clock() - ns); 2845 ns = p->sched_time + sched_clock() - ns;
2816 local_irq_restore(flags); 2846 local_irq_restore(flags);
2847
2817 return ns; 2848 return ns;
2818} 2849}
2819 2850
@@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk)
2827 * increasing number of running tasks. We also ignore the interactivity 2858 * increasing number of running tasks. We also ignore the interactivity
2828 * if a better static_prio task has expired: 2859 * if a better static_prio task has expired:
2829 */ 2860 */
2830#define EXPIRED_STARVING(rq) \ 2861static inline int expired_starving(struct rq *rq)
2831 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ 2862{
2832 (jiffies - (rq)->expired_timestamp >= \ 2863 if (rq->curr->static_prio > rq->best_expired_prio)
2833 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ 2864 return 1;
2834 ((rq)->curr->static_prio > (rq)->best_expired_prio)) 2865 if (!STARVATION_LIMIT || !rq->expired_timestamp)
2866 return 0;
2867 if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
2868 return 1;
2869 return 0;
2870}
2835 2871
2836/* 2872/*
2837 * Account user cpu time to a process. 2873 * Account user cpu time to a process.
@@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
2864 cputime_t cputime) 2900 cputime_t cputime)
2865{ 2901{
2866 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2902 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2867 runqueue_t *rq = this_rq(); 2903 struct rq *rq = this_rq();
2868 cputime64_t tmp; 2904 cputime64_t tmp;
2869 2905
2870 p->stime = cputime_add(p->stime, cputime); 2906 p->stime = cputime_add(p->stime, cputime);
@@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2894{ 2930{
2895 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 2931 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
2896 cputime64_t tmp = cputime_to_cputime64(steal); 2932 cputime64_t tmp = cputime_to_cputime64(steal);
2897 runqueue_t *rq = this_rq(); 2933 struct rq *rq = this_rq();
2898 2934
2899 if (p == rq->idle) { 2935 if (p == rq->idle) {
2900 p->stime = cputime_add(p->stime, steal); 2936 p->stime = cputime_add(p->stime, steal);
@@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
2915 */ 2951 */
2916void scheduler_tick(void) 2952void scheduler_tick(void)
2917{ 2953{
2918 int cpu = smp_processor_id();
2919 runqueue_t *rq = this_rq();
2920 task_t *p = current;
2921 unsigned long long now = sched_clock(); 2954 unsigned long long now = sched_clock();
2955 struct task_struct *p = current;
2956 int cpu = smp_processor_id();
2957 struct rq *rq = cpu_rq(cpu);
2922 2958
2923 update_cpu_clock(p, rq, now); 2959 update_cpu_clock(p, rq, now);
2924 2960
@@ -2968,7 +3004,7 @@ void scheduler_tick(void)
2968 3004
2969 if (!rq->expired_timestamp) 3005 if (!rq->expired_timestamp)
2970 rq->expired_timestamp = jiffies; 3006 rq->expired_timestamp = jiffies;
2971 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 3007 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
2972 enqueue_task(p, rq->expired); 3008 enqueue_task(p, rq->expired);
2973 if (p->static_prio < rq->best_expired_prio) 3009 if (p->static_prio < rq->best_expired_prio)
2974 rq->best_expired_prio = p->static_prio; 3010 rq->best_expired_prio = p->static_prio;
@@ -3007,7 +3043,7 @@ out:
3007} 3043}
3008 3044
3009#ifdef CONFIG_SCHED_SMT 3045#ifdef CONFIG_SCHED_SMT
3010static inline void wakeup_busy_runqueue(runqueue_t *rq) 3046static inline void wakeup_busy_runqueue(struct rq *rq)
3011{ 3047{
3012 /* If an SMT runqueue is sleeping due to priority reasons wake it up */ 3048 /* If an SMT runqueue is sleeping due to priority reasons wake it up */
3013 if (rq->curr == rq->idle && rq->nr_running) 3049 if (rq->curr == rq->idle && rq->nr_running)
@@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)
3033 return; 3069 return;
3034 3070
3035 for_each_cpu_mask(i, sd->span) { 3071 for_each_cpu_mask(i, sd->span) {
3036 runqueue_t *smt_rq = cpu_rq(i); 3072 struct rq *smt_rq = cpu_rq(i);
3037 3073
3038 if (i == this_cpu) 3074 if (i == this_cpu)
3039 continue; 3075 continue;
@@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)
3050 * utilize, if another task runs on a sibling. This models the 3086 * utilize, if another task runs on a sibling. This models the
3051 * slowdown effect of other tasks running on siblings: 3087 * slowdown effect of other tasks running on siblings:
3052 */ 3088 */
3053static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) 3089static inline unsigned long
3090smt_slice(struct task_struct *p, struct sched_domain *sd)
3054{ 3091{
3055 return p->time_slice * (100 - sd->per_cpu_gain) / 100; 3092 return p->time_slice * (100 - sd->per_cpu_gain) / 100;
3056} 3093}
@@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
3061 * acquire their lock. As we only trylock the normal locking order does not 3098 * acquire their lock. As we only trylock the normal locking order does not
3062 * need to be obeyed. 3099 * need to be obeyed.
3063 */ 3100 */
3064static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) 3101static int
3102dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3065{ 3103{
3066 struct sched_domain *tmp, *sd = NULL; 3104 struct sched_domain *tmp, *sd = NULL;
3067 int ret = 0, i; 3105 int ret = 0, i;
@@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
3081 return 0; 3119 return 0;
3082 3120
3083 for_each_cpu_mask(i, sd->span) { 3121 for_each_cpu_mask(i, sd->span) {
3084 runqueue_t *smt_rq; 3122 struct task_struct *smt_curr;
3085 task_t *smt_curr; 3123 struct rq *smt_rq;
3086 3124
3087 if (i == this_cpu) 3125 if (i == this_cpu)
3088 continue; 3126 continue;
@@ -3127,9 +3165,8 @@ unlock:
3127static inline void wake_sleeping_dependent(int this_cpu) 3165static inline void wake_sleeping_dependent(int this_cpu)
3128{ 3166{
3129} 3167}
3130 3168static inline int
3131static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, 3169dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
3132 task_t *p)
3133{ 3170{
3134 return 0; 3171 return 0;
3135} 3172}
@@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val)
3142 /* 3179 /*
3143 * Underflow? 3180 * Underflow?
3144 */ 3181 */
3145 BUG_ON((preempt_count() < 0)); 3182 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
3183 return;
3146 preempt_count() += val; 3184 preempt_count() += val;
3147 /* 3185 /*
3148 * Spinlock count overflowing soon? 3186 * Spinlock count overflowing soon?
3149 */ 3187 */
3150 BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); 3188 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
3151} 3189}
3152EXPORT_SYMBOL(add_preempt_count); 3190EXPORT_SYMBOL(add_preempt_count);
3153 3191
@@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val)
3156 /* 3194 /*
3157 * Underflow? 3195 * Underflow?
3158 */ 3196 */
3159 BUG_ON(val > preempt_count()); 3197 if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
3198 return;
3160 /* 3199 /*
3161 * Is the spinlock portion underflowing? 3200 * Is the spinlock portion underflowing?
3162 */ 3201 */
3163 BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); 3202 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
3203 !(preempt_count() & PREEMPT_MASK)))
3204 return;
3205
3164 preempt_count() -= val; 3206 preempt_count() -= val;
3165} 3207}
3166EXPORT_SYMBOL(sub_preempt_count); 3208EXPORT_SYMBOL(sub_preempt_count);
@@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
3178 */ 3220 */
3179asmlinkage void __sched schedule(void) 3221asmlinkage void __sched schedule(void)
3180{ 3222{
3181 long *switch_count; 3223 struct task_struct *prev, *next;
3182 task_t *prev, *next; 3224 struct prio_array *array;
3183 runqueue_t *rq;
3184 prio_array_t *array;
3185 struct list_head *queue; 3225 struct list_head *queue;
3186 unsigned long long now; 3226 unsigned long long now;
3187 unsigned long run_time; 3227 unsigned long run_time;
3188 int cpu, idx, new_prio; 3228 int cpu, idx, new_prio;
3229 long *switch_count;
3230 struct rq *rq;
3189 3231
3190 /* 3232 /*
3191 * Test if we are atomic. Since do_exit() needs to call into 3233 * Test if we are atomic. Since do_exit() needs to call into
@@ -3275,7 +3317,7 @@ need_resched_nonpreemptible:
3275 3317
3276 idx = sched_find_first_bit(array->bitmap); 3318 idx = sched_find_first_bit(array->bitmap);
3277 queue = array->queue + idx; 3319 queue = array->queue + idx;
3278 next = list_entry(queue->next, task_t, run_list); 3320 next = list_entry(queue->next, struct task_struct, run_list);
3279 3321
3280 if (!rt_task(next) && interactive_sleep(next->sleep_type)) { 3322 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
3281 unsigned long long delta = now - next->timestamp; 3323 unsigned long long delta = now - next->timestamp;
@@ -3338,7 +3380,6 @@ switch_tasks:
3338 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3380 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3339 goto need_resched; 3381 goto need_resched;
3340} 3382}
3341
3342EXPORT_SYMBOL(schedule); 3383EXPORT_SYMBOL(schedule);
3343 3384
3344#ifdef CONFIG_PREEMPT 3385#ifdef CONFIG_PREEMPT
@@ -3383,7 +3424,6 @@ need_resched:
3383 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3424 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3384 goto need_resched; 3425 goto need_resched;
3385} 3426}
3386
3387EXPORT_SYMBOL(preempt_schedule); 3427EXPORT_SYMBOL(preempt_schedule);
3388 3428
3389/* 3429/*
@@ -3432,10 +3472,8 @@ need_resched:
3432int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, 3472int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
3433 void *key) 3473 void *key)
3434{ 3474{
3435 task_t *p = curr->private; 3475 return try_to_wake_up(curr->private, mode, sync);
3436 return try_to_wake_up(p, mode, sync);
3437} 3476}
3438
3439EXPORT_SYMBOL(default_wake_function); 3477EXPORT_SYMBOL(default_wake_function);
3440 3478
3441/* 3479/*
@@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
3453 struct list_head *tmp, *next; 3491 struct list_head *tmp, *next;
3454 3492
3455 list_for_each_safe(tmp, next, &q->task_list) { 3493 list_for_each_safe(tmp, next, &q->task_list) {
3456 wait_queue_t *curr; 3494 wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
3457 unsigned flags; 3495 unsigned flags = curr->flags;
3458 curr = list_entry(tmp, wait_queue_t, task_list); 3496
3459 flags = curr->flags;
3460 if (curr->func(curr, mode, sync, key) && 3497 if (curr->func(curr, mode, sync, key) &&
3461 (flags & WQ_FLAG_EXCLUSIVE) && 3498 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
3462 !--nr_exclusive)
3463 break; 3499 break;
3464 } 3500 }
3465} 3501}
@@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
3480 __wake_up_common(q, mode, nr_exclusive, 0, key); 3516 __wake_up_common(q, mode, nr_exclusive, 0, key);
3481 spin_unlock_irqrestore(&q->lock, flags); 3517 spin_unlock_irqrestore(&q->lock, flags);
3482} 3518}
3483
3484EXPORT_SYMBOL(__wake_up); 3519EXPORT_SYMBOL(__wake_up);
3485 3520
3486/* 3521/*
@@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all);
3549void fastcall __sched wait_for_completion(struct completion *x) 3584void fastcall __sched wait_for_completion(struct completion *x)
3550{ 3585{
3551 might_sleep(); 3586 might_sleep();
3587
3552 spin_lock_irq(&x->wait.lock); 3588 spin_lock_irq(&x->wait.lock);
3553 if (!x->done) { 3589 if (!x->done) {
3554 DECLARE_WAITQUEUE(wait, current); 3590 DECLARE_WAITQUEUE(wait, current);
@@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
3693 schedule(); 3729 schedule();
3694 SLEEP_ON_TAIL 3730 SLEEP_ON_TAIL
3695} 3731}
3696
3697EXPORT_SYMBOL(interruptible_sleep_on); 3732EXPORT_SYMBOL(interruptible_sleep_on);
3698 3733
3699long fastcall __sched 3734long fastcall __sched
@@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
3709 3744
3710 return timeout; 3745 return timeout;
3711} 3746}
3712
3713EXPORT_SYMBOL(interruptible_sleep_on_timeout); 3747EXPORT_SYMBOL(interruptible_sleep_on_timeout);
3714 3748
3715void fastcall __sched sleep_on(wait_queue_head_t *q) 3749void fastcall __sched sleep_on(wait_queue_head_t *q)
@@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)
3722 schedule(); 3756 schedule();
3723 SLEEP_ON_TAIL 3757 SLEEP_ON_TAIL
3724} 3758}
3725
3726EXPORT_SYMBOL(sleep_on); 3759EXPORT_SYMBOL(sleep_on);
3727 3760
3728long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) 3761long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
@@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout);
3752 * 3785 *
3753 * Used by the rt_mutex code to implement priority inheritance logic. 3786 * Used by the rt_mutex code to implement priority inheritance logic.
3754 */ 3787 */
3755void rt_mutex_setprio(task_t *p, int prio) 3788void rt_mutex_setprio(struct task_struct *p, int prio)
3756{ 3789{
3790 struct prio_array *array;
3757 unsigned long flags; 3791 unsigned long flags;
3758 prio_array_t *array; 3792 struct rq *rq;
3759 runqueue_t *rq;
3760 int oldprio; 3793 int oldprio;
3761 3794
3762 BUG_ON(prio < 0 || prio > MAX_PRIO); 3795 BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio)
3793 3826
3794#endif 3827#endif
3795 3828
3796void set_user_nice(task_t *p, long nice) 3829void set_user_nice(struct task_struct *p, long nice)
3797{ 3830{
3798 unsigned long flags; 3831 struct prio_array *array;
3799 prio_array_t *array;
3800 runqueue_t *rq;
3801 int old_prio, delta; 3832 int old_prio, delta;
3833 unsigned long flags;
3834 struct rq *rq;
3802 3835
3803 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3836 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
3804 return; 3837 return;
@@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice);
3849 * @p: task 3882 * @p: task
3850 * @nice: nice value 3883 * @nice: nice value
3851 */ 3884 */
3852int can_nice(const task_t *p, const int nice) 3885int can_nice(const struct task_struct *p, const int nice)
3853{ 3886{
3854 /* convert nice value [19,-20] to rlimit style value [1,40] */ 3887 /* convert nice value [19,-20] to rlimit style value [1,40] */
3855 int nice_rlim = 20 - nice; 3888 int nice_rlim = 20 - nice;
3889
3856 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || 3890 return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
3857 capable(CAP_SYS_NICE)); 3891 capable(CAP_SYS_NICE));
3858} 3892}
@@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice)
3868 */ 3902 */
3869asmlinkage long sys_nice(int increment) 3903asmlinkage long sys_nice(int increment)
3870{ 3904{
3871 int retval; 3905 long nice, retval;
3872 long nice;
3873 3906
3874 /* 3907 /*
3875 * Setpriority might change our priority at the same moment. 3908 * Setpriority might change our priority at the same moment.
@@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment)
3908 * RT tasks are offset by -200. Normal tasks are centered 3941 * RT tasks are offset by -200. Normal tasks are centered
3909 * around 0, value goes from -16 to +15. 3942 * around 0, value goes from -16 to +15.
3910 */ 3943 */
3911int task_prio(const task_t *p) 3944int task_prio(const struct task_struct *p)
3912{ 3945{
3913 return p->prio - MAX_RT_PRIO; 3946 return p->prio - MAX_RT_PRIO;
3914} 3947}
@@ -3917,7 +3950,7 @@ int task_prio(const task_t *p)
3917 * task_nice - return the nice value of a given task. 3950 * task_nice - return the nice value of a given task.
3918 * @p: the task in question. 3951 * @p: the task in question.
3919 */ 3952 */
3920int task_nice(const task_t *p) 3953int task_nice(const struct task_struct *p)
3921{ 3954{
3922 return TASK_NICE(p); 3955 return TASK_NICE(p);
3923} 3956}
@@ -3936,7 +3969,7 @@ int idle_cpu(int cpu)
3936 * idle_task - return the idle task for a given cpu. 3969 * idle_task - return the idle task for a given cpu.
3937 * @cpu: the processor in question. 3970 * @cpu: the processor in question.
3938 */ 3971 */
3939task_t *idle_task(int cpu) 3972struct task_struct *idle_task(int cpu)
3940{ 3973{
3941 return cpu_rq(cpu)->idle; 3974 return cpu_rq(cpu)->idle;
3942} 3975}
@@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu)
3945 * find_process_by_pid - find a process with a matching PID value. 3978 * find_process_by_pid - find a process with a matching PID value.
3946 * @pid: the pid in question. 3979 * @pid: the pid in question.
3947 */ 3980 */
3948static inline task_t *find_process_by_pid(pid_t pid) 3981static inline struct task_struct *find_process_by_pid(pid_t pid)
3949{ 3982{
3950 return pid ? find_task_by_pid(pid) : current; 3983 return pid ? find_task_by_pid(pid) : current;
3951} 3984}
@@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid)
3954static void __setscheduler(struct task_struct *p, int policy, int prio) 3987static void __setscheduler(struct task_struct *p, int policy, int prio)
3955{ 3988{
3956 BUG_ON(p->array); 3989 BUG_ON(p->array);
3990
3957 p->policy = policy; 3991 p->policy = policy;
3958 p->rt_priority = prio; 3992 p->rt_priority = prio;
3959 p->normal_prio = normal_prio(p); 3993 p->normal_prio = normal_prio(p);
@@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
3977int sched_setscheduler(struct task_struct *p, int policy, 4011int sched_setscheduler(struct task_struct *p, int policy,
3978 struct sched_param *param) 4012 struct sched_param *param)
3979{ 4013{
3980 int retval; 4014 int retval, oldprio, oldpolicy = -1;
3981 int oldprio, oldpolicy = -1; 4015 struct prio_array *array;
3982 prio_array_t *array;
3983 unsigned long flags; 4016 unsigned long flags;
3984 runqueue_t *rq; 4017 struct rq *rq;
3985 4018
3986 /* may grab non-irq protected spin_locks */ 4019 /* may grab non-irq protected spin_locks */
3987 BUG_ON(in_interrupt()); 4020 BUG_ON(in_interrupt());
@@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
4079static int 4112static int
4080do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 4113do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4081{ 4114{
4082 int retval;
4083 struct sched_param lparam; 4115 struct sched_param lparam;
4084 struct task_struct *p; 4116 struct task_struct *p;
4117 int retval;
4085 4118
4086 if (!param || pid < 0) 4119 if (!param || pid < 0)
4087 return -EINVAL; 4120 return -EINVAL;
@@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4097 read_unlock_irq(&tasklist_lock); 4130 read_unlock_irq(&tasklist_lock);
4098 retval = sched_setscheduler(p, policy, &lparam); 4131 retval = sched_setscheduler(p, policy, &lparam);
4099 put_task_struct(p); 4132 put_task_struct(p);
4133
4100 return retval; 4134 return retval;
4101} 4135}
4102 4136
@@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
4132 */ 4166 */
4133asmlinkage long sys_sched_getscheduler(pid_t pid) 4167asmlinkage long sys_sched_getscheduler(pid_t pid)
4134{ 4168{
4169 struct task_struct *p;
4135 int retval = -EINVAL; 4170 int retval = -EINVAL;
4136 task_t *p;
4137 4171
4138 if (pid < 0) 4172 if (pid < 0)
4139 goto out_nounlock; 4173 goto out_nounlock;
@@ -4160,8 +4194,8 @@ out_nounlock:
4160asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) 4194asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
4161{ 4195{
4162 struct sched_param lp; 4196 struct sched_param lp;
4197 struct task_struct *p;
4163 int retval = -EINVAL; 4198 int retval = -EINVAL;
4164 task_t *p;
4165 4199
4166 if (!param || pid < 0) 4200 if (!param || pid < 0)
4167 goto out_nounlock; 4201 goto out_nounlock;
@@ -4194,9 +4228,9 @@ out_unlock:
4194 4228
4195long sched_setaffinity(pid_t pid, cpumask_t new_mask) 4229long sched_setaffinity(pid_t pid, cpumask_t new_mask)
4196{ 4230{
4197 task_t *p;
4198 int retval;
4199 cpumask_t cpus_allowed; 4231 cpumask_t cpus_allowed;
4232 struct task_struct *p;
4233 int retval;
4200 4234
4201 lock_cpu_hotplug(); 4235 lock_cpu_hotplug();
4202 read_lock(&tasklist_lock); 4236 read_lock(&tasklist_lock);
@@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
4282 4316
4283long sched_getaffinity(pid_t pid, cpumask_t *mask) 4317long sched_getaffinity(pid_t pid, cpumask_t *mask)
4284{ 4318{
4319 struct task_struct *p;
4285 int retval; 4320 int retval;
4286 task_t *p;
4287 4321
4288 lock_cpu_hotplug(); 4322 lock_cpu_hotplug();
4289 read_lock(&tasklist_lock); 4323 read_lock(&tasklist_lock);
@@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
4342 */ 4376 */
4343asmlinkage long sys_sched_yield(void) 4377asmlinkage long sys_sched_yield(void)
4344{ 4378{
4345 runqueue_t *rq = this_rq_lock(); 4379 struct rq *rq = this_rq_lock();
4346 prio_array_t *array = current->array; 4380 struct prio_array *array = current->array, *target = rq->expired;
4347 prio_array_t *target = rq->expired;
4348 4381
4349 schedstat_inc(rq, yld_cnt); 4382 schedstat_inc(rq, yld_cnt);
4350 /* 4383 /*
@@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void)
4378 * no need to preempt or enable interrupts: 4411 * no need to preempt or enable interrupts:
4379 */ 4412 */
4380 __release(rq->lock); 4413 __release(rq->lock);
4414 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
4381 _raw_spin_unlock(&rq->lock); 4415 _raw_spin_unlock(&rq->lock);
4382 preempt_enable_no_resched(); 4416 preempt_enable_no_resched();
4383 4417
@@ -4441,6 +4475,7 @@ int cond_resched_lock(spinlock_t *lock)
4441 spin_lock(lock); 4475 spin_lock(lock);
4442 } 4476 }
4443 if (need_resched() && __resched_legal()) { 4477 if (need_resched() && __resched_legal()) {
4478 spin_release(&lock->dep_map, 1, _THIS_IP_);
4444 _raw_spin_unlock(lock); 4479 _raw_spin_unlock(lock);
4445 preempt_enable_no_resched(); 4480 preempt_enable_no_resched();
4446 __cond_resched(); 4481 __cond_resched();
@@ -4456,7 +4491,9 @@ int __sched cond_resched_softirq(void)
4456 BUG_ON(!in_softirq()); 4491 BUG_ON(!in_softirq());
4457 4492
4458 if (need_resched() && __resched_legal()) { 4493 if (need_resched() && __resched_legal()) {
4459 __local_bh_enable(); 4494 raw_local_irq_disable();
4495 _local_bh_enable();
4496 raw_local_irq_enable();
4460 __cond_resched(); 4497 __cond_resched();
4461 local_bh_disable(); 4498 local_bh_disable();
4462 return 1; 4499 return 1;
@@ -4476,7 +4513,6 @@ void __sched yield(void)
4476 set_current_state(TASK_RUNNING); 4513 set_current_state(TASK_RUNNING);
4477 sys_sched_yield(); 4514 sys_sched_yield();
4478} 4515}
4479
4480EXPORT_SYMBOL(yield); 4516EXPORT_SYMBOL(yield);
4481 4517
4482/* 4518/*
@@ -4488,18 +4524,17 @@ EXPORT_SYMBOL(yield);
4488 */ 4524 */
4489void __sched io_schedule(void) 4525void __sched io_schedule(void)
4490{ 4526{
4491 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4527 struct rq *rq = &__raw_get_cpu_var(runqueues);
4492 4528
4493 atomic_inc(&rq->nr_iowait); 4529 atomic_inc(&rq->nr_iowait);
4494 schedule(); 4530 schedule();
4495 atomic_dec(&rq->nr_iowait); 4531 atomic_dec(&rq->nr_iowait);
4496} 4532}
4497
4498EXPORT_SYMBOL(io_schedule); 4533EXPORT_SYMBOL(io_schedule);
4499 4534
4500long __sched io_schedule_timeout(long timeout) 4535long __sched io_schedule_timeout(long timeout)
4501{ 4536{
4502 struct runqueue *rq = &__raw_get_cpu_var(runqueues); 4537 struct rq *rq = &__raw_get_cpu_var(runqueues);
4503 long ret; 4538 long ret;
4504 4539
4505 atomic_inc(&rq->nr_iowait); 4540 atomic_inc(&rq->nr_iowait);
@@ -4566,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)
4566asmlinkage 4601asmlinkage
4567long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) 4602long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
4568{ 4603{
4604 struct task_struct *p;
4569 int retval = -EINVAL; 4605 int retval = -EINVAL;
4570 struct timespec t; 4606 struct timespec t;
4571 task_t *p;
4572 4607
4573 if (pid < 0) 4608 if (pid < 0)
4574 goto out_nounlock; 4609 goto out_nounlock;
@@ -4596,28 +4631,32 @@ out_unlock:
4596 4631
4597static inline struct task_struct *eldest_child(struct task_struct *p) 4632static inline struct task_struct *eldest_child(struct task_struct *p)
4598{ 4633{
4599 if (list_empty(&p->children)) return NULL; 4634 if (list_empty(&p->children))
4635 return NULL;
4600 return list_entry(p->children.next,struct task_struct,sibling); 4636 return list_entry(p->children.next,struct task_struct,sibling);
4601} 4637}
4602 4638
4603static inline struct task_struct *older_sibling(struct task_struct *p) 4639static inline struct task_struct *older_sibling(struct task_struct *p)
4604{ 4640{
4605 if (p->sibling.prev==&p->parent->children) return NULL; 4641 if (p->sibling.prev==&p->parent->children)
4642 return NULL;
4606 return list_entry(p->sibling.prev,struct task_struct,sibling); 4643 return list_entry(p->sibling.prev,struct task_struct,sibling);
4607} 4644}
4608 4645
4609static inline struct task_struct *younger_sibling(struct task_struct *p) 4646static inline struct task_struct *younger_sibling(struct task_struct *p)
4610{ 4647{
4611 if (p->sibling.next==&p->parent->children) return NULL; 4648 if (p->sibling.next==&p->parent->children)
4649 return NULL;
4612 return list_entry(p->sibling.next,struct task_struct,sibling); 4650 return list_entry(p->sibling.next,struct task_struct,sibling);
4613} 4651}
4614 4652
4615static void show_task(task_t *p) 4653static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
4654
4655static void show_task(struct task_struct *p)
4616{ 4656{
4617 task_t *relative; 4657 struct task_struct *relative;
4618 unsigned state;
4619 unsigned long free = 0; 4658 unsigned long free = 0;
4620 static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; 4659 unsigned state;
4621 4660
4622 printk("%-13.13s ", p->comm); 4661 printk("%-13.13s ", p->comm);
4623 state = p->state ? __ffs(p->state) + 1 : 0; 4662 state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4668,7 +4707,7 @@ static void show_task(task_t *p)
4668 4707
4669void show_state(void) 4708void show_state(void)
4670{ 4709{
4671 task_t *g, *p; 4710 struct task_struct *g, *p;
4672 4711
4673#if (BITS_PER_LONG == 32) 4712#if (BITS_PER_LONG == 32)
4674 printk("\n" 4713 printk("\n"
@@ -4690,7 +4729,7 @@ void show_state(void)
4690 } while_each_thread(g, p); 4729 } while_each_thread(g, p);
4691 4730
4692 read_unlock(&tasklist_lock); 4731 read_unlock(&tasklist_lock);
4693 mutex_debug_show_all_locks(); 4732 debug_show_all_locks();
4694} 4733}
4695 4734
4696/** 4735/**
@@ -4701,9 +4740,9 @@ void show_state(void)
4701 * NOTE: this function does not set the idle thread's NEED_RESCHED 4740 * NOTE: this function does not set the idle thread's NEED_RESCHED
4702 * flag, to make booting more robust. 4741 * flag, to make booting more robust.
4703 */ 4742 */
4704void __devinit init_idle(task_t *idle, int cpu) 4743void __devinit init_idle(struct task_struct *idle, int cpu)
4705{ 4744{
4706 runqueue_t *rq = cpu_rq(cpu); 4745 struct rq *rq = cpu_rq(cpu);
4707 unsigned long flags; 4746 unsigned long flags;
4708 4747
4709 idle->timestamp = sched_clock(); 4748 idle->timestamp = sched_clock();
@@ -4742,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4742/* 4781/*
4743 * This is how migration works: 4782 * This is how migration works:
4744 * 4783 *
4745 * 1) we queue a migration_req_t structure in the source CPU's 4784 * 1) we queue a struct migration_req structure in the source CPU's
4746 * runqueue and wake up that CPU's migration thread. 4785 * runqueue and wake up that CPU's migration thread.
4747 * 2) we down() the locked semaphore => thread blocks. 4786 * 2) we down() the locked semaphore => thread blocks.
4748 * 3) migration thread wakes up (implicitly it forces the migrated 4787 * 3) migration thread wakes up (implicitly it forces the migrated
@@ -4764,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4764 * task must not exit() & deallocate itself prematurely. The 4803 * task must not exit() & deallocate itself prematurely. The
4765 * call is not atomic; no spinlocks may be held. 4804 * call is not atomic; no spinlocks may be held.
4766 */ 4805 */
4767int set_cpus_allowed(task_t *p, cpumask_t new_mask) 4806int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
4768{ 4807{
4808 struct migration_req req;
4769 unsigned long flags; 4809 unsigned long flags;
4810 struct rq *rq;
4770 int ret = 0; 4811 int ret = 0;
4771 migration_req_t req;
4772 runqueue_t *rq;
4773 4812
4774 rq = task_rq_lock(p, &flags); 4813 rq = task_rq_lock(p, &flags);
4775 if (!cpus_intersects(new_mask, cpu_online_map)) { 4814 if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -4792,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
4792 } 4831 }
4793out: 4832out:
4794 task_rq_unlock(rq, &flags); 4833 task_rq_unlock(rq, &flags);
4834
4795 return ret; 4835 return ret;
4796} 4836}
4797
4798EXPORT_SYMBOL_GPL(set_cpus_allowed); 4837EXPORT_SYMBOL_GPL(set_cpus_allowed);
4799 4838
4800/* 4839/*
@@ -4810,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
4810 */ 4849 */
4811static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 4850static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4812{ 4851{
4813 runqueue_t *rq_dest, *rq_src; 4852 struct rq *rq_dest, *rq_src;
4814 int ret = 0; 4853 int ret = 0;
4815 4854
4816 if (unlikely(cpu_is_offline(dest_cpu))) 4855 if (unlikely(cpu_is_offline(dest_cpu)))
@@ -4855,16 +4894,16 @@ out:
4855 */ 4894 */
4856static int migration_thread(void *data) 4895static int migration_thread(void *data)
4857{ 4896{
4858 runqueue_t *rq;
4859 int cpu = (long)data; 4897 int cpu = (long)data;
4898 struct rq *rq;
4860 4899
4861 rq = cpu_rq(cpu); 4900 rq = cpu_rq(cpu);
4862 BUG_ON(rq->migration_thread != current); 4901 BUG_ON(rq->migration_thread != current);
4863 4902
4864 set_current_state(TASK_INTERRUPTIBLE); 4903 set_current_state(TASK_INTERRUPTIBLE);
4865 while (!kthread_should_stop()) { 4904 while (!kthread_should_stop()) {
4905 struct migration_req *req;
4866 struct list_head *head; 4906 struct list_head *head;
4867 migration_req_t *req;
4868 4907
4869 try_to_freeze(); 4908 try_to_freeze();
4870 4909
@@ -4888,7 +4927,7 @@ static int migration_thread(void *data)
4888 set_current_state(TASK_INTERRUPTIBLE); 4927 set_current_state(TASK_INTERRUPTIBLE);
4889 continue; 4928 continue;
4890 } 4929 }
4891 req = list_entry(head->next, migration_req_t, list); 4930 req = list_entry(head->next, struct migration_req, list);
4892 list_del_init(head->next); 4931 list_del_init(head->next);
4893 4932
4894 spin_unlock(&rq->lock); 4933 spin_unlock(&rq->lock);
@@ -4913,28 +4952,28 @@ wait_to_die:
4913 4952
4914#ifdef CONFIG_HOTPLUG_CPU 4953#ifdef CONFIG_HOTPLUG_CPU
4915/* Figure out where task on dead CPU should go, use force if neccessary. */ 4954/* Figure out where task on dead CPU should go, use force if neccessary. */
4916static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) 4955static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
4917{ 4956{
4918 runqueue_t *rq;
4919 unsigned long flags; 4957 unsigned long flags;
4920 int dest_cpu;
4921 cpumask_t mask; 4958 cpumask_t mask;
4959 struct rq *rq;
4960 int dest_cpu;
4922 4961
4923restart: 4962restart:
4924 /* On same node? */ 4963 /* On same node? */
4925 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 4964 mask = node_to_cpumask(cpu_to_node(dead_cpu));
4926 cpus_and(mask, mask, tsk->cpus_allowed); 4965 cpus_and(mask, mask, p->cpus_allowed);
4927 dest_cpu = any_online_cpu(mask); 4966 dest_cpu = any_online_cpu(mask);
4928 4967
4929 /* On any allowed CPU? */ 4968 /* On any allowed CPU? */
4930 if (dest_cpu == NR_CPUS) 4969 if (dest_cpu == NR_CPUS)
4931 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4970 dest_cpu = any_online_cpu(p->cpus_allowed);
4932 4971
4933 /* No more Mr. Nice Guy. */ 4972 /* No more Mr. Nice Guy. */
4934 if (dest_cpu == NR_CPUS) { 4973 if (dest_cpu == NR_CPUS) {
4935 rq = task_rq_lock(tsk, &flags); 4974 rq = task_rq_lock(p, &flags);
4936 cpus_setall(tsk->cpus_allowed); 4975 cpus_setall(p->cpus_allowed);
4937 dest_cpu = any_online_cpu(tsk->cpus_allowed); 4976 dest_cpu = any_online_cpu(p->cpus_allowed);
4938 task_rq_unlock(rq, &flags); 4977 task_rq_unlock(rq, &flags);
4939 4978
4940 /* 4979 /*
@@ -4942,12 +4981,12 @@ restart:
4942 * kernel threads (both mm NULL), since they never 4981 * kernel threads (both mm NULL), since they never
4943 * leave kernel. 4982 * leave kernel.
4944 */ 4983 */
4945 if (tsk->mm && printk_ratelimit()) 4984 if (p->mm && printk_ratelimit())
4946 printk(KERN_INFO "process %d (%s) no " 4985 printk(KERN_INFO "process %d (%s) no "
4947 "longer affine to cpu%d\n", 4986 "longer affine to cpu%d\n",
4948 tsk->pid, tsk->comm, dead_cpu); 4987 p->pid, p->comm, dead_cpu);
4949 } 4988 }
4950 if (!__migrate_task(tsk, dead_cpu, dest_cpu)) 4989 if (!__migrate_task(p, dead_cpu, dest_cpu))
4951 goto restart; 4990 goto restart;
4952} 4991}
4953 4992
@@ -4958,9 +4997,9 @@ restart:
4958 * their home CPUs. So we just add the counter to another CPU's counter, 4997 * their home CPUs. So we just add the counter to another CPU's counter,
4959 * to keep the global sum constant after CPU-down: 4998 * to keep the global sum constant after CPU-down:
4960 */ 4999 */
4961static void migrate_nr_uninterruptible(runqueue_t *rq_src) 5000static void migrate_nr_uninterruptible(struct rq *rq_src)
4962{ 5001{
4963 runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); 5002 struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
4964 unsigned long flags; 5003 unsigned long flags;
4965 5004
4966 local_irq_save(flags); 5005 local_irq_save(flags);
@@ -4974,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)
4974/* Run through task list and migrate tasks from the dead cpu. */ 5013/* Run through task list and migrate tasks from the dead cpu. */
4975static void migrate_live_tasks(int src_cpu) 5014static void migrate_live_tasks(int src_cpu)
4976{ 5015{
4977 struct task_struct *tsk, *t; 5016 struct task_struct *p, *t;
4978 5017
4979 write_lock_irq(&tasklist_lock); 5018 write_lock_irq(&tasklist_lock);
4980 5019
4981 do_each_thread(t, tsk) { 5020 do_each_thread(t, p) {
4982 if (tsk == current) 5021 if (p == current)
4983 continue; 5022 continue;
4984 5023
4985 if (task_cpu(tsk) == src_cpu) 5024 if (task_cpu(p) == src_cpu)
4986 move_task_off_dead_cpu(src_cpu, tsk); 5025 move_task_off_dead_cpu(src_cpu, p);
4987 } while_each_thread(t, tsk); 5026 } while_each_thread(t, p);
4988 5027
4989 write_unlock_irq(&tasklist_lock); 5028 write_unlock_irq(&tasklist_lock);
4990} 5029}
4991 5030
4992/* Schedules idle task to be the next runnable task on current CPU. 5031/* Schedules idle task to be the next runnable task on current CPU.
4993 * It does so by boosting its priority to highest possible and adding it to 5032 * It does so by boosting its priority to highest possible and adding it to
4994 * the _front_ of runqueue. Used by CPU offline code. 5033 * the _front_ of the runqueue. Used by CPU offline code.
4995 */ 5034 */
4996void sched_idle_next(void) 5035void sched_idle_next(void)
4997{ 5036{
4998 int cpu = smp_processor_id(); 5037 int this_cpu = smp_processor_id();
4999 runqueue_t *rq = this_rq(); 5038 struct rq *rq = cpu_rq(this_cpu);
5000 struct task_struct *p = rq->idle; 5039 struct task_struct *p = rq->idle;
5001 unsigned long flags; 5040 unsigned long flags;
5002 5041
5003 /* cpu has to be offline */ 5042 /* cpu has to be offline */
5004 BUG_ON(cpu_online(cpu)); 5043 BUG_ON(cpu_online(this_cpu));
5005 5044
5006 /* Strictly not necessary since rest of the CPUs are stopped by now 5045 /*
5007 * and interrupts disabled on current cpu. 5046 * Strictly not necessary since rest of the CPUs are stopped by now
5047 * and interrupts disabled on the current cpu.
5008 */ 5048 */
5009 spin_lock_irqsave(&rq->lock, flags); 5049 spin_lock_irqsave(&rq->lock, flags);
5010 5050
5011 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); 5051 __setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
5012 /* Add idle task to _front_ of it's priority queue */ 5052
5053 /* Add idle task to the _front_ of its priority queue: */
5013 __activate_idle_task(p, rq); 5054 __activate_idle_task(p, rq);
5014 5055
5015 spin_unlock_irqrestore(&rq->lock, flags); 5056 spin_unlock_irqrestore(&rq->lock, flags);
5016} 5057}
5017 5058
5018/* Ensures that the idle task is using init_mm right before its cpu goes 5059/*
5060 * Ensures that the idle task is using init_mm right before its cpu goes
5019 * offline. 5061 * offline.
5020 */ 5062 */
5021void idle_task_exit(void) 5063void idle_task_exit(void)
@@ -5029,17 +5071,17 @@ void idle_task_exit(void)
5029 mmdrop(mm); 5071 mmdrop(mm);
5030} 5072}
5031 5073
5032static void migrate_dead(unsigned int dead_cpu, task_t *tsk) 5074static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5033{ 5075{
5034 struct runqueue *rq = cpu_rq(dead_cpu); 5076 struct rq *rq = cpu_rq(dead_cpu);
5035 5077
5036 /* Must be exiting, otherwise would be on tasklist. */ 5078 /* Must be exiting, otherwise would be on tasklist. */
5037 BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); 5079 BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
5038 5080
5039 /* Cannot have done final schedule yet: would have vanished. */ 5081 /* Cannot have done final schedule yet: would have vanished. */
5040 BUG_ON(tsk->flags & PF_DEAD); 5082 BUG_ON(p->flags & PF_DEAD);
5041 5083
5042 get_task_struct(tsk); 5084 get_task_struct(p);
5043 5085
5044 /* 5086 /*
5045 * Drop lock around migration; if someone else moves it, 5087 * Drop lock around migration; if someone else moves it,
@@ -5047,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
5047 * fine. 5089 * fine.
5048 */ 5090 */
5049 spin_unlock_irq(&rq->lock); 5091 spin_unlock_irq(&rq->lock);
5050 move_task_off_dead_cpu(dead_cpu, tsk); 5092 move_task_off_dead_cpu(dead_cpu, p);
5051 spin_lock_irq(&rq->lock); 5093 spin_lock_irq(&rq->lock);
5052 5094
5053 put_task_struct(tsk); 5095 put_task_struct(p);
5054} 5096}
5055 5097
5056/* release_task() removes task from tasklist, so we won't find dead tasks. */ 5098/* release_task() removes task from tasklist, so we won't find dead tasks. */
5057static void migrate_dead_tasks(unsigned int dead_cpu) 5099static void migrate_dead_tasks(unsigned int dead_cpu)
5058{ 5100{
5059 unsigned arr, i; 5101 struct rq *rq = cpu_rq(dead_cpu);
5060 struct runqueue *rq = cpu_rq(dead_cpu); 5102 unsigned int arr, i;
5061 5103
5062 for (arr = 0; arr < 2; arr++) { 5104 for (arr = 0; arr < 2; arr++) {
5063 for (i = 0; i < MAX_PRIO; i++) { 5105 for (i = 0; i < MAX_PRIO; i++) {
5064 struct list_head *list = &rq->arrays[arr].queue[i]; 5106 struct list_head *list = &rq->arrays[arr].queue[i];
5107
5065 while (!list_empty(list)) 5108 while (!list_empty(list))
5066 migrate_dead(dead_cpu, 5109 migrate_dead(dead_cpu, list_entry(list->next,
5067 list_entry(list->next, task_t, 5110 struct task_struct, run_list));
5068 run_list));
5069 } 5111 }
5070 } 5112 }
5071} 5113}
@@ -5075,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5075 * migration_call - callback that gets triggered when a CPU is added. 5117 * migration_call - callback that gets triggered when a CPU is added.
5076 * Here we can start up the necessary migration thread for the new CPU. 5118 * Here we can start up the necessary migration thread for the new CPU.
5077 */ 5119 */
5078static int __cpuinit migration_call(struct notifier_block *nfb, 5120static int __cpuinit
5079 unsigned long action, 5121migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5080 void *hcpu)
5081{ 5122{
5082 int cpu = (long)hcpu;
5083 struct task_struct *p; 5123 struct task_struct *p;
5084 struct runqueue *rq; 5124 int cpu = (long)hcpu;
5085 unsigned long flags; 5125 unsigned long flags;
5126 struct rq *rq;
5086 5127
5087 switch (action) { 5128 switch (action) {
5088 case CPU_UP_PREPARE: 5129 case CPU_UP_PREPARE:
@@ -5097,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5097 task_rq_unlock(rq, &flags); 5138 task_rq_unlock(rq, &flags);
5098 cpu_rq(cpu)->migration_thread = p; 5139 cpu_rq(cpu)->migration_thread = p;
5099 break; 5140 break;
5141
5100 case CPU_ONLINE: 5142 case CPU_ONLINE:
5101 /* Strictly unneccessary, as first user will wake it. */ 5143 /* Strictly unneccessary, as first user will wake it. */
5102 wake_up_process(cpu_rq(cpu)->migration_thread); 5144 wake_up_process(cpu_rq(cpu)->migration_thread);
5103 break; 5145 break;
5146
5104#ifdef CONFIG_HOTPLUG_CPU 5147#ifdef CONFIG_HOTPLUG_CPU
5105 case CPU_UP_CANCELED: 5148 case CPU_UP_CANCELED:
5106 if (!cpu_rq(cpu)->migration_thread) 5149 if (!cpu_rq(cpu)->migration_thread)
@@ -5111,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5111 kthread_stop(cpu_rq(cpu)->migration_thread); 5154 kthread_stop(cpu_rq(cpu)->migration_thread);
5112 cpu_rq(cpu)->migration_thread = NULL; 5155 cpu_rq(cpu)->migration_thread = NULL;
5113 break; 5156 break;
5157
5114 case CPU_DEAD: 5158 case CPU_DEAD:
5115 migrate_live_tasks(cpu); 5159 migrate_live_tasks(cpu);
5116 rq = cpu_rq(cpu); 5160 rq = cpu_rq(cpu);
@@ -5131,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
5131 * the requestors. */ 5175 * the requestors. */
5132 spin_lock_irq(&rq->lock); 5176 spin_lock_irq(&rq->lock);
5133 while (!list_empty(&rq->migration_queue)) { 5177 while (!list_empty(&rq->migration_queue)) {
5134 migration_req_t *req; 5178 struct migration_req *req;
5179
5135 req = list_entry(rq->migration_queue.next, 5180 req = list_entry(rq->migration_queue.next,
5136 migration_req_t, list); 5181 struct migration_req, list);
5137 list_del_init(&req->list); 5182 list_del_init(&req->list);
5138 complete(&req->done); 5183 complete(&req->done);
5139 } 5184 }
@@ -5155,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {
5155int __init migration_init(void) 5200int __init migration_init(void)
5156{ 5201{
5157 void *cpu = (void *)(long)smp_processor_id(); 5202 void *cpu = (void *)(long)smp_processor_id();
5158 /* Start one for boot CPU. */ 5203
5204 /* Start one for the boot CPU: */
5159 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5205 migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5160 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5206 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5161 register_cpu_notifier(&migration_notifier); 5207 register_cpu_notifier(&migration_notifier);
5208
5162 return 0; 5209 return 0;
5163} 5210}
5164#endif 5211#endif
@@ -5254,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
5254 } while (sd); 5301 } while (sd);
5255} 5302}
5256#else 5303#else
5257#define sched_domain_debug(sd, cpu) {} 5304# define sched_domain_debug(sd, cpu) do { } while (0)
5258#endif 5305#endif
5259 5306
5260static int sd_degenerate(struct sched_domain *sd) 5307static int sd_degenerate(struct sched_domain *sd)
@@ -5280,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd)
5280 return 1; 5327 return 1;
5281} 5328}
5282 5329
5283static int sd_parent_degenerate(struct sched_domain *sd, 5330static int
5284 struct sched_domain *parent) 5331sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5285{ 5332{
5286 unsigned long cflags = sd->flags, pflags = parent->flags; 5333 unsigned long cflags = sd->flags, pflags = parent->flags;
5287 5334
@@ -5314,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
5314 */ 5361 */
5315static void cpu_attach_domain(struct sched_domain *sd, int cpu) 5362static void cpu_attach_domain(struct sched_domain *sd, int cpu)
5316{ 5363{
5317 runqueue_t *rq = cpu_rq(cpu); 5364 struct rq *rq = cpu_rq(cpu);
5318 struct sched_domain *tmp; 5365 struct sched_domain *tmp;
5319 5366
5320 /* Remove the sched domains which do not contribute to scheduling. */ 5367 /* Remove the sched domains which do not contribute to scheduling. */
@@ -5576,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size)
5576/* 5623/*
5577 * Measure the cache-cost of one task migration. Returns in units of nsec. 5624 * Measure the cache-cost of one task migration. Returns in units of nsec.
5578 */ 5625 */
5579static unsigned long long measure_one(void *cache, unsigned long size, 5626static unsigned long long
5580 int source, int target) 5627measure_one(void *cache, unsigned long size, int source, int target)
5581{ 5628{
5582 cpumask_t mask, saved_mask; 5629 cpumask_t mask, saved_mask;
5583 unsigned long long t0, t1, t2, t3, cost; 5630 unsigned long long t0, t1, t2, t3, cost;
@@ -5927,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
5927 */ 5974 */
5928static cpumask_t sched_domain_node_span(int node) 5975static cpumask_t sched_domain_node_span(int node)
5929{ 5976{
5930 int i;
5931 cpumask_t span, nodemask;
5932 DECLARE_BITMAP(used_nodes, MAX_NUMNODES); 5977 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
5978 cpumask_t span, nodemask;
5979 int i;
5933 5980
5934 cpus_clear(span); 5981 cpus_clear(span);
5935 bitmap_zero(used_nodes, MAX_NUMNODES); 5982 bitmap_zero(used_nodes, MAX_NUMNODES);
@@ -5940,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node)
5940 5987
5941 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { 5988 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
5942 int next_node = find_next_best_node(node, used_nodes); 5989 int next_node = find_next_best_node(node, used_nodes);
5990
5943 nodemask = node_to_cpumask(next_node); 5991 nodemask = node_to_cpumask(next_node);
5944 cpus_or(span, span, nodemask); 5992 cpus_or(span, span, nodemask);
5945 } 5993 }
@@ -5949,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node)
5949#endif 5997#endif
5950 5998
5951int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 5999int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
6000
5952/* 6001/*
5953 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we 6002 * SMT sched-domains:
5954 * can switch it on easily if needed.
5955 */ 6003 */
5956#ifdef CONFIG_SCHED_SMT 6004#ifdef CONFIG_SCHED_SMT
5957static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 6005static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
5958static struct sched_group sched_group_cpus[NR_CPUS]; 6006static struct sched_group sched_group_cpus[NR_CPUS];
6007
5959static int cpu_to_cpu_group(int cpu) 6008static int cpu_to_cpu_group(int cpu)
5960{ 6009{
5961 return cpu; 6010 return cpu;
5962} 6011}
5963#endif 6012#endif
5964 6013
6014/*
6015 * multi-core sched-domains:
6016 */
5965#ifdef CONFIG_SCHED_MC 6017#ifdef CONFIG_SCHED_MC
5966static DEFINE_PER_CPU(struct sched_domain, core_domains); 6018static DEFINE_PER_CPU(struct sched_domain, core_domains);
5967static struct sched_group *sched_group_core_bycpu[NR_CPUS]; 6019static struct sched_group *sched_group_core_bycpu[NR_CPUS];
@@ -5981,9 +6033,10 @@ static int cpu_to_core_group(int cpu)
5981 6033
5982static DEFINE_PER_CPU(struct sched_domain, phys_domains); 6034static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5983static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; 6035static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
6036
5984static int cpu_to_phys_group(int cpu) 6037static int cpu_to_phys_group(int cpu)
5985{ 6038{
5986#if defined(CONFIG_SCHED_MC) 6039#ifdef CONFIG_SCHED_MC
5987 cpumask_t mask = cpu_coregroup_map(cpu); 6040 cpumask_t mask = cpu_coregroup_map(cpu);
5988 return first_cpu(mask); 6041 return first_cpu(mask);
5989#elif defined(CONFIG_SCHED_SMT) 6042#elif defined(CONFIG_SCHED_SMT)
@@ -6529,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
6529int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) 6582int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
6530{ 6583{
6531 int err = 0; 6584 int err = 0;
6585
6532#ifdef CONFIG_SCHED_SMT 6586#ifdef CONFIG_SCHED_SMT
6533 if (smt_capable()) 6587 if (smt_capable())
6534 err = sysfs_create_file(&cls->kset.kobj, 6588 err = sysfs_create_file(&cls->kset.kobj,
@@ -6548,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
6548{ 6602{
6549 return sprintf(page, "%u\n", sched_mc_power_savings); 6603 return sprintf(page, "%u\n", sched_mc_power_savings);
6550} 6604}
6551static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6605static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
6606 const char *buf, size_t count)
6552{ 6607{
6553 return sched_power_savings_store(buf, count, 0); 6608 return sched_power_savings_store(buf, count, 0);
6554} 6609}
@@ -6561,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
6561{ 6616{
6562 return sprintf(page, "%u\n", sched_smt_power_savings); 6617 return sprintf(page, "%u\n", sched_smt_power_savings);
6563} 6618}
6564static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) 6619static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
6620 const char *buf, size_t count)
6565{ 6621{
6566 return sched_power_savings_store(buf, count, 1); 6622 return sched_power_savings_store(buf, count, 1);
6567} 6623}
@@ -6623,6 +6679,7 @@ int in_sched_functions(unsigned long addr)
6623{ 6679{
6624 /* Linker adds these: start and end of __sched functions */ 6680 /* Linker adds these: start and end of __sched functions */
6625 extern char __sched_text_start[], __sched_text_end[]; 6681 extern char __sched_text_start[], __sched_text_end[];
6682
6626 return in_lock_functions(addr) || 6683 return in_lock_functions(addr) ||
6627 (addr >= (unsigned long)__sched_text_start 6684 (addr >= (unsigned long)__sched_text_start
6628 && addr < (unsigned long)__sched_text_end); 6685 && addr < (unsigned long)__sched_text_end);
@@ -6630,14 +6687,15 @@ int in_sched_functions(unsigned long addr)
6630 6687
6631void __init sched_init(void) 6688void __init sched_init(void)
6632{ 6689{
6633 runqueue_t *rq;
6634 int i, j, k; 6690 int i, j, k;
6635 6691
6636 for_each_possible_cpu(i) { 6692 for_each_possible_cpu(i) {
6637 prio_array_t *array; 6693 struct prio_array *array;
6694 struct rq *rq;
6638 6695
6639 rq = cpu_rq(i); 6696 rq = cpu_rq(i);
6640 spin_lock_init(&rq->lock); 6697 spin_lock_init(&rq->lock);
6698 lockdep_set_class(&rq->lock, &rq->rq_lock_key);
6641 rq->nr_running = 0; 6699 rq->nr_running = 0;
6642 rq->active = rq->arrays; 6700 rq->active = rq->arrays;
6643 rq->expired = rq->arrays + 1; 6701 rq->expired = rq->arrays + 1;
@@ -6684,7 +6742,7 @@ void __init sched_init(void)
6684#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6742#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
6685void __might_sleep(char *file, int line) 6743void __might_sleep(char *file, int line)
6686{ 6744{
6687#if defined(in_atomic) 6745#ifdef in_atomic
6688 static unsigned long prev_jiffy; /* ratelimiting */ 6746 static unsigned long prev_jiffy; /* ratelimiting */
6689 6747
6690 if ((in_atomic() || irqs_disabled()) && 6748 if ((in_atomic() || irqs_disabled()) &&
@@ -6706,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);
6706#ifdef CONFIG_MAGIC_SYSRQ 6764#ifdef CONFIG_MAGIC_SYSRQ
6707void normalize_rt_tasks(void) 6765void normalize_rt_tasks(void)
6708{ 6766{
6767 struct prio_array *array;
6709 struct task_struct *p; 6768 struct task_struct *p;
6710 prio_array_t *array;
6711 unsigned long flags; 6769 unsigned long flags;
6712 runqueue_t *rq; 6770 struct rq *rq;
6713 6771
6714 read_lock_irq(&tasklist_lock); 6772 read_lock_irq(&tasklist_lock);
6715 for_each_process(p) { 6773 for_each_process(p) {
@@ -6753,7 +6811,7 @@ void normalize_rt_tasks(void)
6753 * 6811 *
6754 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6812 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6755 */ 6813 */
6756task_t *curr_task(int cpu) 6814struct task_struct *curr_task(int cpu)
6757{ 6815{
6758 return cpu_curr(cpu); 6816 return cpu_curr(cpu);
6759} 6817}
@@ -6773,7 +6831,7 @@ task_t *curr_task(int cpu)
6773 * 6831 *
6774 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! 6832 * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
6775 */ 6833 */
6776void set_curr_task(int cpu, task_t *p) 6834void set_curr_task(int cpu, struct task_struct *p)
6777{ 6835{
6778 cpu_curr(cpu) = p; 6836 cpu_curr(cpu) = p;
6779} 6837}