aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c339
1 files changed, 178 insertions, 161 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 72bb9483d949..b0afd8db1396 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -318,15 +318,19 @@ static inline int cpu_of(struct rq *rq)
318} 318}
319 319
320/* 320/*
321 * Per-runqueue clock, as finegrained as the platform can give us: 321 * Update the per-runqueue clock, as finegrained as the platform can give
322 * us, but without assuming monotonicity, etc.:
322 */ 323 */
323static unsigned long long __rq_clock(struct rq *rq) 324static void __update_rq_clock(struct rq *rq)
324{ 325{
325 u64 prev_raw = rq->prev_clock_raw; 326 u64 prev_raw = rq->prev_clock_raw;
326 u64 now = sched_clock(); 327 u64 now = sched_clock();
327 s64 delta = now - prev_raw; 328 s64 delta = now - prev_raw;
328 u64 clock = rq->clock; 329 u64 clock = rq->clock;
329 330
331#ifdef CONFIG_SCHED_DEBUG
332 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
333#endif
330 /* 334 /*
331 * Protect against sched_clock() occasionally going backwards: 335 * Protect against sched_clock() occasionally going backwards:
332 */ 336 */
@@ -349,18 +353,12 @@ static unsigned long long __rq_clock(struct rq *rq)
349 353
350 rq->prev_clock_raw = now; 354 rq->prev_clock_raw = now;
351 rq->clock = clock; 355 rq->clock = clock;
352
353 return clock;
354} 356}
355 357
356static inline unsigned long long rq_clock(struct rq *rq) 358static void update_rq_clock(struct rq *rq)
357{ 359{
358 int this_cpu = smp_processor_id(); 360 if (likely(smp_processor_id() == cpu_of(rq)))
359 361 __update_rq_clock(rq);
360 if (this_cpu == cpu_of(rq))
361 return __rq_clock(rq);
362
363 return rq->clock;
364} 362}
365 363
366/* 364/*
@@ -386,9 +384,12 @@ unsigned long long cpu_clock(int cpu)
386{ 384{
387 unsigned long long now; 385 unsigned long long now;
388 unsigned long flags; 386 unsigned long flags;
387 struct rq *rq;
389 388
390 local_irq_save(flags); 389 local_irq_save(flags);
391 now = rq_clock(cpu_rq(cpu)); 390 rq = cpu_rq(cpu);
391 update_rq_clock(rq);
392 now = rq->clock;
392 local_irq_restore(flags); 393 local_irq_restore(flags);
393 394
394 return now; 395 return now;
@@ -637,6 +638,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
637 638
638#define WMULT_SHIFT 32 639#define WMULT_SHIFT 32
639 640
641/*
642 * Shift right and round:
643 */
644#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
645
640static unsigned long 646static unsigned long
641calc_delta_mine(unsigned long delta_exec, unsigned long weight, 647calc_delta_mine(unsigned long delta_exec, unsigned long weight,
642 struct load_weight *lw) 648 struct load_weight *lw)
@@ -644,18 +650,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
644 u64 tmp; 650 u64 tmp;
645 651
646 if (unlikely(!lw->inv_weight)) 652 if (unlikely(!lw->inv_weight))
647 lw->inv_weight = WMULT_CONST / lw->weight; 653 lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
648 654
649 tmp = (u64)delta_exec * weight; 655 tmp = (u64)delta_exec * weight;
650 /* 656 /*
651 * Check whether we'd overflow the 64-bit multiplication: 657 * Check whether we'd overflow the 64-bit multiplication:
652 */ 658 */
653 if (unlikely(tmp > WMULT_CONST)) { 659 if (unlikely(tmp > WMULT_CONST))
654 tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight) 660 tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
655 >> (WMULT_SHIFT/2); 661 WMULT_SHIFT/2);
656 } else { 662 else
657 tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; 663 tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT);
658 }
659 664
660 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 665 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
661} 666}
@@ -703,11 +708,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
703 * the relative distance between them is ~25%.) 708 * the relative distance between them is ~25%.)
704 */ 709 */
705static const int prio_to_weight[40] = { 710static const int prio_to_weight[40] = {
706/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, 711 /* -20 */ 88761, 71755, 56483, 46273, 36291,
707/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, 712 /* -15 */ 29154, 23254, 18705, 14949, 11916,
708/* 0 */ NICE_0_LOAD /* 1024 */, 713 /* -10 */ 9548, 7620, 6100, 4904, 3906,
709/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, 714 /* -5 */ 3121, 2501, 1991, 1586, 1277,
710/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, 715 /* 0 */ 1024, 820, 655, 526, 423,
716 /* 5 */ 335, 272, 215, 172, 137,
717 /* 10 */ 110, 87, 70, 56, 45,
718 /* 15 */ 36, 29, 23, 18, 15,
711}; 719};
712 720
713/* 721/*
@@ -718,14 +726,14 @@ static const int prio_to_weight[40] = {
718 * into multiplications: 726 * into multiplications:
719 */ 727 */
720static const u32 prio_to_wmult[40] = { 728static const u32 prio_to_wmult[40] = {
721/* -20 */ 48356, 60446, 75558, 94446, 118058, 729 /* -20 */ 48388, 59856, 76040, 92818, 118348,
722/* -15 */ 147573, 184467, 230589, 288233, 360285, 730 /* -15 */ 147320, 184698, 229616, 287308, 360437,
723/* -10 */ 450347, 562979, 703746, 879575, 1099582, 731 /* -10 */ 449829, 563644, 704093, 875809, 1099582,
724/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, 732 /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
725/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, 733 /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
726/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, 734 /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
727/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, 735 /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
728/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, 736 /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
729}; 737};
730 738
731static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); 739static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
@@ -745,8 +753,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
745 unsigned long max_nr_move, unsigned long max_load_move, 753 unsigned long max_nr_move, unsigned long max_load_move,
746 struct sched_domain *sd, enum cpu_idle_type idle, 754 struct sched_domain *sd, enum cpu_idle_type idle,
747 int *all_pinned, unsigned long *load_moved, 755 int *all_pinned, unsigned long *load_moved,
748 int this_best_prio, int best_prio, int best_prio_seen, 756 int *this_best_prio, struct rq_iterator *iterator);
749 struct rq_iterator *iterator);
750 757
751#include "sched_stats.h" 758#include "sched_stats.h"
752#include "sched_rt.c" 759#include "sched_rt.c"
@@ -782,14 +789,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
782 * This function is called /before/ updating rq->ls.load 789 * This function is called /before/ updating rq->ls.load
783 * and when switching tasks. 790 * and when switching tasks.
784 */ 791 */
785static void update_curr_load(struct rq *rq, u64 now) 792static void update_curr_load(struct rq *rq)
786{ 793{
787 struct load_stat *ls = &rq->ls; 794 struct load_stat *ls = &rq->ls;
788 u64 start; 795 u64 start;
789 796
790 start = ls->load_update_start; 797 start = ls->load_update_start;
791 ls->load_update_start = now; 798 ls->load_update_start = rq->clock;
792 ls->delta_stat += now - start; 799 ls->delta_stat += rq->clock - start;
793 /* 800 /*
794 * Stagger updates to ls->delta_fair. Very frequent updates 801 * Stagger updates to ls->delta_fair. Very frequent updates
795 * can be expensive. 802 * can be expensive.
@@ -798,30 +805,28 @@ static void update_curr_load(struct rq *rq, u64 now)
798 __update_curr_load(rq, ls); 805 __update_curr_load(rq, ls);
799} 806}
800 807
801static inline void 808static inline void inc_load(struct rq *rq, const struct task_struct *p)
802inc_load(struct rq *rq, const struct task_struct *p, u64 now)
803{ 809{
804 update_curr_load(rq, now); 810 update_curr_load(rq);
805 update_load_add(&rq->ls.load, p->se.load.weight); 811 update_load_add(&rq->ls.load, p->se.load.weight);
806} 812}
807 813
808static inline void 814static inline void dec_load(struct rq *rq, const struct task_struct *p)
809dec_load(struct rq *rq, const struct task_struct *p, u64 now)
810{ 815{
811 update_curr_load(rq, now); 816 update_curr_load(rq);
812 update_load_sub(&rq->ls.load, p->se.load.weight); 817 update_load_sub(&rq->ls.load, p->se.load.weight);
813} 818}
814 819
815static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) 820static void inc_nr_running(struct task_struct *p, struct rq *rq)
816{ 821{
817 rq->nr_running++; 822 rq->nr_running++;
818 inc_load(rq, p, now); 823 inc_load(rq, p);
819} 824}
820 825
821static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) 826static void dec_nr_running(struct task_struct *p, struct rq *rq)
822{ 827{
823 rq->nr_running--; 828 rq->nr_running--;
824 dec_load(rq, p, now); 829 dec_load(rq, p);
825} 830}
826 831
827static void set_load_weight(struct task_struct *p) 832static void set_load_weight(struct task_struct *p)
@@ -848,18 +853,16 @@ static void set_load_weight(struct task_struct *p)
848 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; 853 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
849} 854}
850 855
851static void 856static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
852enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
853{ 857{
854 sched_info_queued(p); 858 sched_info_queued(p);
855 p->sched_class->enqueue_task(rq, p, wakeup, now); 859 p->sched_class->enqueue_task(rq, p, wakeup);
856 p->se.on_rq = 1; 860 p->se.on_rq = 1;
857} 861}
858 862
859static void 863static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
860dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now)
861{ 864{
862 p->sched_class->dequeue_task(rq, p, sleep, now); 865 p->sched_class->dequeue_task(rq, p, sleep);
863 p->se.on_rq = 0; 866 p->se.on_rq = 0;
864} 867}
865 868
@@ -914,13 +917,11 @@ static int effective_prio(struct task_struct *p)
914 */ 917 */
915static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) 918static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
916{ 919{
917 u64 now = rq_clock(rq);
918
919 if (p->state == TASK_UNINTERRUPTIBLE) 920 if (p->state == TASK_UNINTERRUPTIBLE)
920 rq->nr_uninterruptible--; 921 rq->nr_uninterruptible--;
921 922
922 enqueue_task(rq, p, wakeup, now); 923 enqueue_task(rq, p, wakeup);
923 inc_nr_running(p, rq, now); 924 inc_nr_running(p, rq);
924} 925}
925 926
926/* 927/*
@@ -928,13 +929,13 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
928 */ 929 */
929static inline void activate_idle_task(struct task_struct *p, struct rq *rq) 930static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
930{ 931{
931 u64 now = rq_clock(rq); 932 update_rq_clock(rq);
932 933
933 if (p->state == TASK_UNINTERRUPTIBLE) 934 if (p->state == TASK_UNINTERRUPTIBLE)
934 rq->nr_uninterruptible--; 935 rq->nr_uninterruptible--;
935 936
936 enqueue_task(rq, p, 0, now); 937 enqueue_task(rq, p, 0);
937 inc_nr_running(p, rq, now); 938 inc_nr_running(p, rq);
938} 939}
939 940
940/* 941/*
@@ -942,13 +943,11 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
942 */ 943 */
943static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) 944static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
944{ 945{
945 u64 now = rq_clock(rq);
946
947 if (p->state == TASK_UNINTERRUPTIBLE) 946 if (p->state == TASK_UNINTERRUPTIBLE)
948 rq->nr_uninterruptible++; 947 rq->nr_uninterruptible++;
949 948
950 dequeue_task(rq, p, sleep, now); 949 dequeue_task(rq, p, sleep);
951 dec_nr_running(p, rq, now); 950 dec_nr_running(p, rq);
952} 951}
953 952
954/** 953/**
@@ -1516,6 +1515,7 @@ out_set_cpu:
1516 1515
1517out_activate: 1516out_activate:
1518#endif /* CONFIG_SMP */ 1517#endif /* CONFIG_SMP */
1518 update_rq_clock(rq);
1519 activate_task(rq, p, 1); 1519 activate_task(rq, p, 1);
1520 /* 1520 /*
1521 * Sync wakeups (i.e. those types of wakeups where the waker 1521 * Sync wakeups (i.e. those types of wakeups where the waker
@@ -1647,12 +1647,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1647 unsigned long flags; 1647 unsigned long flags;
1648 struct rq *rq; 1648 struct rq *rq;
1649 int this_cpu; 1649 int this_cpu;
1650 u64 now;
1651 1650
1652 rq = task_rq_lock(p, &flags); 1651 rq = task_rq_lock(p, &flags);
1653 BUG_ON(p->state != TASK_RUNNING); 1652 BUG_ON(p->state != TASK_RUNNING);
1654 this_cpu = smp_processor_id(); /* parent's CPU */ 1653 this_cpu = smp_processor_id(); /* parent's CPU */
1655 now = rq_clock(rq); 1654 update_rq_clock(rq);
1656 1655
1657 p->prio = effective_prio(p); 1656 p->prio = effective_prio(p);
1658 1657
@@ -1666,8 +1665,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1666 * Let the scheduling class do new task startup 1665 * Let the scheduling class do new task startup
1667 * management (if any): 1666 * management (if any):
1668 */ 1667 */
1669 p->sched_class->task_new(rq, p, now); 1668 p->sched_class->task_new(rq, p);
1670 inc_nr_running(p, rq, now); 1669 inc_nr_running(p, rq);
1671 } 1670 }
1672 check_preempt_curr(rq, p); 1671 check_preempt_curr(rq, p);
1673 task_rq_unlock(rq, &flags); 1672 task_rq_unlock(rq, &flags);
@@ -1954,7 +1953,6 @@ static void update_cpu_load(struct rq *this_rq)
1954 unsigned long total_load = this_rq->ls.load.weight; 1953 unsigned long total_load = this_rq->ls.load.weight;
1955 unsigned long this_load = total_load; 1954 unsigned long this_load = total_load;
1956 struct load_stat *ls = &this_rq->ls; 1955 struct load_stat *ls = &this_rq->ls;
1957 u64 now = __rq_clock(this_rq);
1958 int i, scale; 1956 int i, scale;
1959 1957
1960 this_rq->nr_load_updates++; 1958 this_rq->nr_load_updates++;
@@ -1962,7 +1960,7 @@ static void update_cpu_load(struct rq *this_rq)
1962 goto do_avg; 1960 goto do_avg;
1963 1961
1964 /* Update delta_fair/delta_exec fields first */ 1962 /* Update delta_fair/delta_exec fields first */
1965 update_curr_load(this_rq, now); 1963 update_curr_load(this_rq);
1966 1964
1967 fair_delta64 = ls->delta_fair + 1; 1965 fair_delta64 = ls->delta_fair + 1;
1968 ls->delta_fair = 0; 1966 ls->delta_fair = 0;
@@ -1970,8 +1968,8 @@ static void update_cpu_load(struct rq *this_rq)
1970 exec_delta64 = ls->delta_exec + 1; 1968 exec_delta64 = ls->delta_exec + 1;
1971 ls->delta_exec = 0; 1969 ls->delta_exec = 0;
1972 1970
1973 sample_interval64 = now - ls->load_update_last; 1971 sample_interval64 = this_rq->clock - ls->load_update_last;
1974 ls->load_update_last = now; 1972 ls->load_update_last = this_rq->clock;
1975 1973
1976 if ((s64)sample_interval64 < (s64)TICK_NSEC) 1974 if ((s64)sample_interval64 < (s64)TICK_NSEC)
1977 sample_interval64 = TICK_NSEC; 1975 sample_interval64 = TICK_NSEC;
@@ -2026,6 +2024,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
2026 spin_lock(&rq1->lock); 2024 spin_lock(&rq1->lock);
2027 } 2025 }
2028 } 2026 }
2027 update_rq_clock(rq1);
2028 update_rq_clock(rq2);
2029} 2029}
2030 2030
2031/* 2031/*
@@ -2166,8 +2166,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2166 unsigned long max_nr_move, unsigned long max_load_move, 2166 unsigned long max_nr_move, unsigned long max_load_move,
2167 struct sched_domain *sd, enum cpu_idle_type idle, 2167 struct sched_domain *sd, enum cpu_idle_type idle,
2168 int *all_pinned, unsigned long *load_moved, 2168 int *all_pinned, unsigned long *load_moved,
2169 int this_best_prio, int best_prio, int best_prio_seen, 2169 int *this_best_prio, struct rq_iterator *iterator)
2170 struct rq_iterator *iterator)
2171{ 2170{
2172 int pulled = 0, pinned = 0, skip_for_load; 2171 int pulled = 0, pinned = 0, skip_for_load;
2173 struct task_struct *p; 2172 struct task_struct *p;
@@ -2192,12 +2191,8 @@ next:
2192 */ 2191 */
2193 skip_for_load = (p->se.load.weight >> 1) > rem_load_move + 2192 skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
2194 SCHED_LOAD_SCALE_FUZZ; 2193 SCHED_LOAD_SCALE_FUZZ;
2195 if (skip_for_load && p->prio < this_best_prio) 2194 if ((skip_for_load && p->prio >= *this_best_prio) ||
2196 skip_for_load = !best_prio_seen && p->prio == best_prio;
2197 if (skip_for_load ||
2198 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { 2195 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
2199
2200 best_prio_seen |= p->prio == best_prio;
2201 p = iterator->next(iterator->arg); 2196 p = iterator->next(iterator->arg);
2202 goto next; 2197 goto next;
2203 } 2198 }
@@ -2211,8 +2206,8 @@ next:
2211 * and the prescribed amount of weighted load. 2206 * and the prescribed amount of weighted load.
2212 */ 2207 */
2213 if (pulled < max_nr_move && rem_load_move > 0) { 2208 if (pulled < max_nr_move && rem_load_move > 0) {
2214 if (p->prio < this_best_prio) 2209 if (p->prio < *this_best_prio)
2215 this_best_prio = p->prio; 2210 *this_best_prio = p->prio;
2216 p = iterator->next(iterator->arg); 2211 p = iterator->next(iterator->arg);
2217 goto next; 2212 goto next;
2218 } 2213 }
@@ -2231,32 +2226,52 @@ out:
2231} 2226}
2232 2227
2233/* 2228/*
2234 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2229 * move_tasks tries to move up to max_load_move weighted load from busiest to
2235 * load from busiest to this_rq, as part of a balancing operation within 2230 * this_rq, as part of a balancing operation within domain "sd".
2236 * "domain". Returns the number of tasks moved. 2231 * Returns 1 if successful and 0 otherwise.
2237 * 2232 *
2238 * Called with both runqueues locked. 2233 * Called with both runqueues locked.
2239 */ 2234 */
2240static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, 2235static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2241 unsigned long max_nr_move, unsigned long max_load_move, 2236 unsigned long max_load_move,
2242 struct sched_domain *sd, enum cpu_idle_type idle, 2237 struct sched_domain *sd, enum cpu_idle_type idle,
2243 int *all_pinned) 2238 int *all_pinned)
2244{ 2239{
2245 struct sched_class *class = sched_class_highest; 2240 struct sched_class *class = sched_class_highest;
2246 unsigned long load_moved, total_nr_moved = 0, nr_moved; 2241 unsigned long total_load_moved = 0;
2247 long rem_load_move = max_load_move; 2242 int this_best_prio = this_rq->curr->prio;
2248 2243
2249 do { 2244 do {
2250 nr_moved = class->load_balance(this_rq, this_cpu, busiest, 2245 total_load_moved +=
2251 max_nr_move, (unsigned long)rem_load_move, 2246 class->load_balance(this_rq, this_cpu, busiest,
2252 sd, idle, all_pinned, &load_moved); 2247 ULONG_MAX, max_load_move - total_load_moved,
2253 total_nr_moved += nr_moved; 2248 sd, idle, all_pinned, &this_best_prio);
2254 max_nr_move -= nr_moved;
2255 rem_load_move -= load_moved;
2256 class = class->next; 2249 class = class->next;
2257 } while (class && max_nr_move && rem_load_move > 0); 2250 } while (class && max_load_move > total_load_moved);
2258 2251
2259 return total_nr_moved; 2252 return total_load_moved > 0;
2253}
2254
2255/*
2256 * move_one_task tries to move exactly one task from busiest to this_rq, as
2257 * part of active balancing operations within "domain".
2258 * Returns 1 if successful and 0 otherwise.
2259 *
2260 * Called with both runqueues locked.
2261 */
2262static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
2263 struct sched_domain *sd, enum cpu_idle_type idle)
2264{
2265 struct sched_class *class;
2266 int this_best_prio = MAX_PRIO;
2267
2268 for (class = sched_class_highest; class; class = class->next)
2269 if (class->load_balance(this_rq, this_cpu, busiest,
2270 1, ULONG_MAX, sd, idle, NULL,
2271 &this_best_prio))
2272 return 1;
2273
2274 return 0;
2260} 2275}
2261 2276
2262/* 2277/*
@@ -2588,11 +2603,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
2588 */ 2603 */
2589#define MAX_PINNED_INTERVAL 512 2604#define MAX_PINNED_INTERVAL 512
2590 2605
2591static inline unsigned long minus_1_or_zero(unsigned long n)
2592{
2593 return n > 0 ? n - 1 : 0;
2594}
2595
2596/* 2606/*
2597 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2607 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2598 * tasks if there is an imbalance. 2608 * tasks if there is an imbalance.
@@ -2601,7 +2611,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2601 struct sched_domain *sd, enum cpu_idle_type idle, 2611 struct sched_domain *sd, enum cpu_idle_type idle,
2602 int *balance) 2612 int *balance)
2603{ 2613{
2604 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 2614 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2605 struct sched_group *group; 2615 struct sched_group *group;
2606 unsigned long imbalance; 2616 unsigned long imbalance;
2607 struct rq *busiest; 2617 struct rq *busiest;
@@ -2642,18 +2652,17 @@ redo:
2642 2652
2643 schedstat_add(sd, lb_imbalance[idle], imbalance); 2653 schedstat_add(sd, lb_imbalance[idle], imbalance);
2644 2654
2645 nr_moved = 0; 2655 ld_moved = 0;
2646 if (busiest->nr_running > 1) { 2656 if (busiest->nr_running > 1) {
2647 /* 2657 /*
2648 * Attempt to move tasks. If find_busiest_group has found 2658 * Attempt to move tasks. If find_busiest_group has found
2649 * an imbalance but busiest->nr_running <= 1, the group is 2659 * an imbalance but busiest->nr_running <= 1, the group is
2650 * still unbalanced. nr_moved simply stays zero, so it is 2660 * still unbalanced. ld_moved simply stays zero, so it is
2651 * correctly treated as an imbalance. 2661 * correctly treated as an imbalance.
2652 */ 2662 */
2653 local_irq_save(flags); 2663 local_irq_save(flags);
2654 double_rq_lock(this_rq, busiest); 2664 double_rq_lock(this_rq, busiest);
2655 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2665 ld_moved = move_tasks(this_rq, this_cpu, busiest,
2656 minus_1_or_zero(busiest->nr_running),
2657 imbalance, sd, idle, &all_pinned); 2666 imbalance, sd, idle, &all_pinned);
2658 double_rq_unlock(this_rq, busiest); 2667 double_rq_unlock(this_rq, busiest);
2659 local_irq_restore(flags); 2668 local_irq_restore(flags);
@@ -2661,7 +2670,7 @@ redo:
2661 /* 2670 /*
2662 * some other cpu did the load balance for us. 2671 * some other cpu did the load balance for us.
2663 */ 2672 */
2664 if (nr_moved && this_cpu != smp_processor_id()) 2673 if (ld_moved && this_cpu != smp_processor_id())
2665 resched_cpu(this_cpu); 2674 resched_cpu(this_cpu);
2666 2675
2667 /* All tasks on this runqueue were pinned by CPU affinity */ 2676 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2673,7 +2682,7 @@ redo:
2673 } 2682 }
2674 } 2683 }
2675 2684
2676 if (!nr_moved) { 2685 if (!ld_moved) {
2677 schedstat_inc(sd, lb_failed[idle]); 2686 schedstat_inc(sd, lb_failed[idle]);
2678 sd->nr_balance_failed++; 2687 sd->nr_balance_failed++;
2679 2688
@@ -2722,10 +2731,10 @@ redo:
2722 sd->balance_interval *= 2; 2731 sd->balance_interval *= 2;
2723 } 2732 }
2724 2733
2725 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2734 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2726 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 2735 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2727 return -1; 2736 return -1;
2728 return nr_moved; 2737 return ld_moved;
2729 2738
2730out_balanced: 2739out_balanced:
2731 schedstat_inc(sd, lb_balanced[idle]); 2740 schedstat_inc(sd, lb_balanced[idle]);
@@ -2757,7 +2766,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2757 struct sched_group *group; 2766 struct sched_group *group;
2758 struct rq *busiest = NULL; 2767 struct rq *busiest = NULL;
2759 unsigned long imbalance; 2768 unsigned long imbalance;
2760 int nr_moved = 0; 2769 int ld_moved = 0;
2761 int sd_idle = 0; 2770 int sd_idle = 0;
2762 int all_pinned = 0; 2771 int all_pinned = 0;
2763 cpumask_t cpus = CPU_MASK_ALL; 2772 cpumask_t cpus = CPU_MASK_ALL;
@@ -2792,12 +2801,13 @@ redo:
2792 2801
2793 schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); 2802 schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
2794 2803
2795 nr_moved = 0; 2804 ld_moved = 0;
2796 if (busiest->nr_running > 1) { 2805 if (busiest->nr_running > 1) {
2797 /* Attempt to move tasks */ 2806 /* Attempt to move tasks */
2798 double_lock_balance(this_rq, busiest); 2807 double_lock_balance(this_rq, busiest);
2799 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2808 /* this_rq->clock is already updated */
2800 minus_1_or_zero(busiest->nr_running), 2809 update_rq_clock(busiest);
2810 ld_moved = move_tasks(this_rq, this_cpu, busiest,
2801 imbalance, sd, CPU_NEWLY_IDLE, 2811 imbalance, sd, CPU_NEWLY_IDLE,
2802 &all_pinned); 2812 &all_pinned);
2803 spin_unlock(&busiest->lock); 2813 spin_unlock(&busiest->lock);
@@ -2809,7 +2819,7 @@ redo:
2809 } 2819 }
2810 } 2820 }
2811 2821
2812 if (!nr_moved) { 2822 if (!ld_moved) {
2813 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); 2823 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
2814 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2824 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2815 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 2825 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
@@ -2817,7 +2827,7 @@ redo:
2817 } else 2827 } else
2818 sd->nr_balance_failed = 0; 2828 sd->nr_balance_failed = 0;
2819 2829
2820 return nr_moved; 2830 return ld_moved;
2821 2831
2822out_balanced: 2832out_balanced:
2823 schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); 2833 schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
@@ -2894,6 +2904,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2894 2904
2895 /* move a task from busiest_rq to target_rq */ 2905 /* move a task from busiest_rq to target_rq */
2896 double_lock_balance(busiest_rq, target_rq); 2906 double_lock_balance(busiest_rq, target_rq);
2907 update_rq_clock(busiest_rq);
2908 update_rq_clock(target_rq);
2897 2909
2898 /* Search for an sd spanning us and the target CPU. */ 2910 /* Search for an sd spanning us and the target CPU. */
2899 for_each_domain(target_cpu, sd) { 2911 for_each_domain(target_cpu, sd) {
@@ -2905,8 +2917,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2905 if (likely(sd)) { 2917 if (likely(sd)) {
2906 schedstat_inc(sd, alb_cnt); 2918 schedstat_inc(sd, alb_cnt);
2907 2919
2908 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2920 if (move_one_task(target_rq, target_cpu, busiest_rq,
2909 ULONG_MAX, sd, CPU_IDLE, NULL)) 2921 sd, CPU_IDLE))
2910 schedstat_inc(sd, alb_pushed); 2922 schedstat_inc(sd, alb_pushed);
2911 else 2923 else
2912 schedstat_inc(sd, alb_failed); 2924 schedstat_inc(sd, alb_failed);
@@ -3175,8 +3187,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3175 unsigned long max_nr_move, unsigned long max_load_move, 3187 unsigned long max_nr_move, unsigned long max_load_move,
3176 struct sched_domain *sd, enum cpu_idle_type idle, 3188 struct sched_domain *sd, enum cpu_idle_type idle,
3177 int *all_pinned, unsigned long *load_moved, 3189 int *all_pinned, unsigned long *load_moved,
3178 int this_best_prio, int best_prio, int best_prio_seen, 3190 int *this_best_prio, struct rq_iterator *iterator)
3179 struct rq_iterator *iterator)
3180{ 3191{
3181 *load_moved = 0; 3192 *load_moved = 0;
3182 3193
@@ -3202,7 +3213,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
3202 rq = task_rq_lock(p, &flags); 3213 rq = task_rq_lock(p, &flags);
3203 ns = p->se.sum_exec_runtime; 3214 ns = p->se.sum_exec_runtime;
3204 if (rq->curr == p) { 3215 if (rq->curr == p) {
3205 delta_exec = rq_clock(rq) - p->se.exec_start; 3216 update_rq_clock(rq);
3217 delta_exec = rq->clock - p->se.exec_start;
3206 if ((s64)delta_exec > 0) 3218 if ((s64)delta_exec > 0)
3207 ns += delta_exec; 3219 ns += delta_exec;
3208 } 3220 }
@@ -3298,9 +3310,10 @@ void scheduler_tick(void)
3298 struct task_struct *curr = rq->curr; 3310 struct task_struct *curr = rq->curr;
3299 3311
3300 spin_lock(&rq->lock); 3312 spin_lock(&rq->lock);
3313 __update_rq_clock(rq);
3314 update_cpu_load(rq);
3301 if (curr != rq->idle) /* FIXME: needed? */ 3315 if (curr != rq->idle) /* FIXME: needed? */
3302 curr->sched_class->task_tick(rq, curr); 3316 curr->sched_class->task_tick(rq, curr);
3303 update_cpu_load(rq);
3304 spin_unlock(&rq->lock); 3317 spin_unlock(&rq->lock);
3305 3318
3306#ifdef CONFIG_SMP 3319#ifdef CONFIG_SMP
@@ -3382,7 +3395,7 @@ static inline void schedule_debug(struct task_struct *prev)
3382 * Pick up the highest-prio task: 3395 * Pick up the highest-prio task:
3383 */ 3396 */
3384static inline struct task_struct * 3397static inline struct task_struct *
3385pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) 3398pick_next_task(struct rq *rq, struct task_struct *prev)
3386{ 3399{
3387 struct sched_class *class; 3400 struct sched_class *class;
3388 struct task_struct *p; 3401 struct task_struct *p;
@@ -3392,14 +3405,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
3392 * the fair class we can call that function directly: 3405 * the fair class we can call that function directly:
3393 */ 3406 */
3394 if (likely(rq->nr_running == rq->cfs.nr_running)) { 3407 if (likely(rq->nr_running == rq->cfs.nr_running)) {
3395 p = fair_sched_class.pick_next_task(rq, now); 3408 p = fair_sched_class.pick_next_task(rq);
3396 if (likely(p)) 3409 if (likely(p))
3397 return p; 3410 return p;
3398 } 3411 }
3399 3412
3400 class = sched_class_highest; 3413 class = sched_class_highest;
3401 for ( ; ; ) { 3414 for ( ; ; ) {
3402 p = class->pick_next_task(rq, now); 3415 p = class->pick_next_task(rq);
3403 if (p) 3416 if (p)
3404 return p; 3417 return p;
3405 /* 3418 /*
@@ -3418,7 +3431,6 @@ asmlinkage void __sched schedule(void)
3418 struct task_struct *prev, *next; 3431 struct task_struct *prev, *next;
3419 long *switch_count; 3432 long *switch_count;
3420 struct rq *rq; 3433 struct rq *rq;
3421 u64 now;
3422 int cpu; 3434 int cpu;
3423 3435
3424need_resched: 3436need_resched:
@@ -3436,6 +3448,7 @@ need_resched_nonpreemptible:
3436 3448
3437 spin_lock_irq(&rq->lock); 3449 spin_lock_irq(&rq->lock);
3438 clear_tsk_need_resched(prev); 3450 clear_tsk_need_resched(prev);
3451 __update_rq_clock(rq);
3439 3452
3440 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 3453 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
3441 if (unlikely((prev->state & TASK_INTERRUPTIBLE) && 3454 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
@@ -3450,9 +3463,8 @@ need_resched_nonpreemptible:
3450 if (unlikely(!rq->nr_running)) 3463 if (unlikely(!rq->nr_running))
3451 idle_balance(cpu, rq); 3464 idle_balance(cpu, rq);
3452 3465
3453 now = __rq_clock(rq); 3466 prev->sched_class->put_prev_task(rq, prev);
3454 prev->sched_class->put_prev_task(rq, prev, now); 3467 next = pick_next_task(rq, prev);
3455 next = pick_next_task(rq, prev, now);
3456 3468
3457 sched_info_switch(prev, next); 3469 sched_info_switch(prev, next);
3458 3470
@@ -3895,17 +3907,16 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
3895 unsigned long flags; 3907 unsigned long flags;
3896 int oldprio, on_rq; 3908 int oldprio, on_rq;
3897 struct rq *rq; 3909 struct rq *rq;
3898 u64 now;
3899 3910
3900 BUG_ON(prio < 0 || prio > MAX_PRIO); 3911 BUG_ON(prio < 0 || prio > MAX_PRIO);
3901 3912
3902 rq = task_rq_lock(p, &flags); 3913 rq = task_rq_lock(p, &flags);
3903 now = rq_clock(rq); 3914 update_rq_clock(rq);
3904 3915
3905 oldprio = p->prio; 3916 oldprio = p->prio;
3906 on_rq = p->se.on_rq; 3917 on_rq = p->se.on_rq;
3907 if (on_rq) 3918 if (on_rq)
3908 dequeue_task(rq, p, 0, now); 3919 dequeue_task(rq, p, 0);
3909 3920
3910 if (rt_prio(prio)) 3921 if (rt_prio(prio))
3911 p->sched_class = &rt_sched_class; 3922 p->sched_class = &rt_sched_class;
@@ -3915,7 +3926,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
3915 p->prio = prio; 3926 p->prio = prio;
3916 3927
3917 if (on_rq) { 3928 if (on_rq) {
3918 enqueue_task(rq, p, 0, now); 3929 enqueue_task(rq, p, 0);
3919 /* 3930 /*
3920 * Reschedule if we are currently running on this runqueue and 3931 * Reschedule if we are currently running on this runqueue and
3921 * our priority decreased, or if we are not currently running on 3932 * our priority decreased, or if we are not currently running on
@@ -3938,7 +3949,6 @@ void set_user_nice(struct task_struct *p, long nice)
3938 int old_prio, delta, on_rq; 3949 int old_prio, delta, on_rq;
3939 unsigned long flags; 3950 unsigned long flags;
3940 struct rq *rq; 3951 struct rq *rq;
3941 u64 now;
3942 3952
3943 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3953 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
3944 return; 3954 return;
@@ -3947,7 +3957,7 @@ void set_user_nice(struct task_struct *p, long nice)
3947 * the task might be in the middle of scheduling on another CPU. 3957 * the task might be in the middle of scheduling on another CPU.
3948 */ 3958 */
3949 rq = task_rq_lock(p, &flags); 3959 rq = task_rq_lock(p, &flags);
3950 now = rq_clock(rq); 3960 update_rq_clock(rq);
3951 /* 3961 /*
3952 * The RT priorities are set via sched_setscheduler(), but we still 3962 * The RT priorities are set via sched_setscheduler(), but we still
3953 * allow the 'normal' nice value to be set - but as expected 3963 * allow the 'normal' nice value to be set - but as expected
@@ -3960,8 +3970,8 @@ void set_user_nice(struct task_struct *p, long nice)
3960 } 3970 }
3961 on_rq = p->se.on_rq; 3971 on_rq = p->se.on_rq;
3962 if (on_rq) { 3972 if (on_rq) {
3963 dequeue_task(rq, p, 0, now); 3973 dequeue_task(rq, p, 0);
3964 dec_load(rq, p, now); 3974 dec_load(rq, p);
3965 } 3975 }
3966 3976
3967 p->static_prio = NICE_TO_PRIO(nice); 3977 p->static_prio = NICE_TO_PRIO(nice);
@@ -3971,8 +3981,8 @@ void set_user_nice(struct task_struct *p, long nice)
3971 delta = p->prio - old_prio; 3981 delta = p->prio - old_prio;
3972 3982
3973 if (on_rq) { 3983 if (on_rq) {
3974 enqueue_task(rq, p, 0, now); 3984 enqueue_task(rq, p, 0);
3975 inc_load(rq, p, now); 3985 inc_load(rq, p);
3976 /* 3986 /*
3977 * If the task increased its priority or is running and 3987 * If the task increased its priority or is running and
3978 * lowered its priority, then reschedule its CPU: 3988 * lowered its priority, then reschedule its CPU:
@@ -4208,6 +4218,7 @@ recheck:
4208 spin_unlock_irqrestore(&p->pi_lock, flags); 4218 spin_unlock_irqrestore(&p->pi_lock, flags);
4209 goto recheck; 4219 goto recheck;
4210 } 4220 }
4221 update_rq_clock(rq);
4211 on_rq = p->se.on_rq; 4222 on_rq = p->se.on_rq;
4212 if (on_rq) 4223 if (on_rq)
4213 deactivate_task(rq, p, 0); 4224 deactivate_task(rq, p, 0);
@@ -4463,10 +4474,8 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
4463out_unlock: 4474out_unlock:
4464 read_unlock(&tasklist_lock); 4475 read_unlock(&tasklist_lock);
4465 mutex_unlock(&sched_hotcpu_mutex); 4476 mutex_unlock(&sched_hotcpu_mutex);
4466 if (retval)
4467 return retval;
4468 4477
4469 return 0; 4478 return retval;
4470} 4479}
4471 4480
4472/** 4481/**
@@ -4966,6 +4975,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4966 on_rq = p->se.on_rq; 4975 on_rq = p->se.on_rq;
4967 if (on_rq) 4976 if (on_rq)
4968 deactivate_task(rq_src, p, 0); 4977 deactivate_task(rq_src, p, 0);
4978
4969 set_task_cpu(p, dest_cpu); 4979 set_task_cpu(p, dest_cpu);
4970 if (on_rq) { 4980 if (on_rq) {
4971 activate_task(rq_dest, p, 0); 4981 activate_task(rq_dest, p, 0);
@@ -5198,7 +5208,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5198 for ( ; ; ) { 5208 for ( ; ; ) {
5199 if (!rq->nr_running) 5209 if (!rq->nr_running)
5200 break; 5210 break;
5201 next = pick_next_task(rq, rq->curr, rq_clock(rq)); 5211 update_rq_clock(rq);
5212 next = pick_next_task(rq, rq->curr);
5202 if (!next) 5213 if (!next)
5203 break; 5214 break;
5204 migrate_dead(dead_cpu, next); 5215 migrate_dead(dead_cpu, next);
@@ -5210,12 +5221,19 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5210#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) 5221#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
5211 5222
5212static struct ctl_table sd_ctl_dir[] = { 5223static struct ctl_table sd_ctl_dir[] = {
5213 {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, 5224 {
5225 .procname = "sched_domain",
5226 .mode = 0755,
5227 },
5214 {0,}, 5228 {0,},
5215}; 5229};
5216 5230
5217static struct ctl_table sd_ctl_root[] = { 5231static struct ctl_table sd_ctl_root[] = {
5218 {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, 5232 {
5233 .procname = "kernel",
5234 .mode = 0755,
5235 .child = sd_ctl_dir,
5236 },
5219 {0,}, 5237 {0,},
5220}; 5238};
5221 5239
@@ -5231,11 +5249,10 @@ static struct ctl_table *sd_alloc_ctl_entry(int n)
5231} 5249}
5232 5250
5233static void 5251static void
5234set_table_entry(struct ctl_table *entry, int ctl_name, 5252set_table_entry(struct ctl_table *entry,
5235 const char *procname, void *data, int maxlen, 5253 const char *procname, void *data, int maxlen,
5236 mode_t mode, proc_handler *proc_handler) 5254 mode_t mode, proc_handler *proc_handler)
5237{ 5255{
5238 entry->ctl_name = ctl_name;
5239 entry->procname = procname; 5256 entry->procname = procname;
5240 entry->data = data; 5257 entry->data = data;
5241 entry->maxlen = maxlen; 5258 entry->maxlen = maxlen;
@@ -5248,28 +5265,28 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
5248{ 5265{
5249 struct ctl_table *table = sd_alloc_ctl_entry(14); 5266 struct ctl_table *table = sd_alloc_ctl_entry(14);
5250 5267
5251 set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, 5268 set_table_entry(&table[0], "min_interval", &sd->min_interval,
5252 sizeof(long), 0644, proc_doulongvec_minmax); 5269 sizeof(long), 0644, proc_doulongvec_minmax);
5253 set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, 5270 set_table_entry(&table[1], "max_interval", &sd->max_interval,
5254 sizeof(long), 0644, proc_doulongvec_minmax); 5271 sizeof(long), 0644, proc_doulongvec_minmax);
5255 set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, 5272 set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
5256 sizeof(int), 0644, proc_dointvec_minmax); 5273 sizeof(int), 0644, proc_dointvec_minmax);
5257 set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, 5274 set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
5258 sizeof(int), 0644, proc_dointvec_minmax); 5275 sizeof(int), 0644, proc_dointvec_minmax);
5259 set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, 5276 set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
5260 sizeof(int), 0644, proc_dointvec_minmax); 5277 sizeof(int), 0644, proc_dointvec_minmax);
5261 set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, 5278 set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
5262 sizeof(int), 0644, proc_dointvec_minmax); 5279 sizeof(int), 0644, proc_dointvec_minmax);
5263 set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, 5280 set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
5264 sizeof(int), 0644, proc_dointvec_minmax); 5281 sizeof(int), 0644, proc_dointvec_minmax);
5265 set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, 5282 set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
5266 sizeof(int), 0644, proc_dointvec_minmax); 5283 sizeof(int), 0644, proc_dointvec_minmax);
5267 set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, 5284 set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
5268 sizeof(int), 0644, proc_dointvec_minmax); 5285 sizeof(int), 0644, proc_dointvec_minmax);
5269 set_table_entry(&table[10], 11, "cache_nice_tries", 5286 set_table_entry(&table[10], "cache_nice_tries",
5270 &sd->cache_nice_tries, 5287 &sd->cache_nice_tries,
5271 sizeof(int), 0644, proc_dointvec_minmax); 5288 sizeof(int), 0644, proc_dointvec_minmax);
5272 set_table_entry(&table[12], 13, "flags", &sd->flags, 5289 set_table_entry(&table[12], "flags", &sd->flags,
5273 sizeof(int), 0644, proc_dointvec_minmax); 5290 sizeof(int), 0644, proc_dointvec_minmax);
5274 5291
5275 return table; 5292 return table;
@@ -5289,7 +5306,6 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5289 i = 0; 5306 i = 0;
5290 for_each_domain(cpu, sd) { 5307 for_each_domain(cpu, sd) {
5291 snprintf(buf, 32, "domain%d", i); 5308 snprintf(buf, 32, "domain%d", i);
5292 entry->ctl_name = i + 1;
5293 entry->procname = kstrdup(buf, GFP_KERNEL); 5309 entry->procname = kstrdup(buf, GFP_KERNEL);
5294 entry->mode = 0755; 5310 entry->mode = 0755;
5295 entry->child = sd_alloc_ctl_domain_table(sd); 5311 entry->child = sd_alloc_ctl_domain_table(sd);
@@ -5310,7 +5326,6 @@ static void init_sched_domain_sysctl(void)
5310 5326
5311 for (i = 0; i < cpu_num; i++, entry++) { 5327 for (i = 0; i < cpu_num; i++, entry++) {
5312 snprintf(buf, 32, "cpu%d", i); 5328 snprintf(buf, 32, "cpu%d", i);
5313 entry->ctl_name = i + 1;
5314 entry->procname = kstrdup(buf, GFP_KERNEL); 5329 entry->procname = kstrdup(buf, GFP_KERNEL);
5315 entry->mode = 0755; 5330 entry->mode = 0755;
5316 entry->child = sd_alloc_ctl_cpu_table(i); 5331 entry->child = sd_alloc_ctl_cpu_table(i);
@@ -5379,6 +5394,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5379 rq->migration_thread = NULL; 5394 rq->migration_thread = NULL;
5380 /* Idle task back to normal (off runqueue, low prio) */ 5395 /* Idle task back to normal (off runqueue, low prio) */
5381 rq = task_rq_lock(rq->idle, &flags); 5396 rq = task_rq_lock(rq->idle, &flags);
5397 update_rq_clock(rq);
5382 deactivate_task(rq, rq->idle, 0); 5398 deactivate_task(rq, rq->idle, 0);
5383 rq->idle->static_prio = MAX_PRIO; 5399 rq->idle->static_prio = MAX_PRIO;
5384 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); 5400 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
@@ -6616,12 +6632,13 @@ void normalize_rt_tasks(void)
6616 goto out_unlock; 6632 goto out_unlock;
6617#endif 6633#endif
6618 6634
6635 update_rq_clock(rq);
6619 on_rq = p->se.on_rq; 6636 on_rq = p->se.on_rq;
6620 if (on_rq) 6637 if (on_rq)
6621 deactivate_task(task_rq(p), p, 0); 6638 deactivate_task(rq, p, 0);
6622 __setscheduler(rq, p, SCHED_NORMAL, 0); 6639 __setscheduler(rq, p, SCHED_NORMAL, 0);
6623 if (on_rq) { 6640 if (on_rq) {
6624 activate_task(task_rq(p), p, 0); 6641 activate_task(rq, p, 0);
6625 resched_task(rq->curr); 6642 resched_task(rq->curr);
6626 } 6643 }
6627#ifdef CONFIG_SMP 6644#ifdef CONFIG_SMP