aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c160
1 files changed, 131 insertions, 29 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 3ce26954be12..b6506671b2be 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -206,6 +206,7 @@ struct runqueue {
206 */ 206 */
207 unsigned long nr_running; 207 unsigned long nr_running;
208#ifdef CONFIG_SMP 208#ifdef CONFIG_SMP
209 unsigned long prio_bias;
209 unsigned long cpu_load[3]; 210 unsigned long cpu_load[3];
210#endif 211#endif
211 unsigned long long nr_switches; 212 unsigned long long nr_switches;
@@ -659,13 +660,68 @@ static int effective_prio(task_t *p)
659 return prio; 660 return prio;
660} 661}
661 662
663#ifdef CONFIG_SMP
664static inline void inc_prio_bias(runqueue_t *rq, int prio)
665{
666 rq->prio_bias += MAX_PRIO - prio;
667}
668
669static inline void dec_prio_bias(runqueue_t *rq, int prio)
670{
671 rq->prio_bias -= MAX_PRIO - prio;
672}
673
674static inline void inc_nr_running(task_t *p, runqueue_t *rq)
675{
676 rq->nr_running++;
677 if (rt_task(p)) {
678 if (p != rq->migration_thread)
679 /*
680 * The migration thread does the actual balancing. Do
681 * not bias by its priority as the ultra high priority
682 * will skew balancing adversely.
683 */
684 inc_prio_bias(rq, p->prio);
685 } else
686 inc_prio_bias(rq, p->static_prio);
687}
688
689static inline void dec_nr_running(task_t *p, runqueue_t *rq)
690{
691 rq->nr_running--;
692 if (rt_task(p)) {
693 if (p != rq->migration_thread)
694 dec_prio_bias(rq, p->prio);
695 } else
696 dec_prio_bias(rq, p->static_prio);
697}
698#else
699static inline void inc_prio_bias(runqueue_t *rq, int prio)
700{
701}
702
703static inline void dec_prio_bias(runqueue_t *rq, int prio)
704{
705}
706
707static inline void inc_nr_running(task_t *p, runqueue_t *rq)
708{
709 rq->nr_running++;
710}
711
712static inline void dec_nr_running(task_t *p, runqueue_t *rq)
713{
714 rq->nr_running--;
715}
716#endif
717
662/* 718/*
663 * __activate_task - move a task to the runqueue. 719 * __activate_task - move a task to the runqueue.
664 */ 720 */
665static inline void __activate_task(task_t *p, runqueue_t *rq) 721static inline void __activate_task(task_t *p, runqueue_t *rq)
666{ 722{
667 enqueue_task(p, rq->active); 723 enqueue_task(p, rq->active);
668 rq->nr_running++; 724 inc_nr_running(p, rq);
669} 725}
670 726
671/* 727/*
@@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
674static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 730static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
675{ 731{
676 enqueue_task_head(p, rq->active); 732 enqueue_task_head(p, rq->active);
677 rq->nr_running++; 733 inc_nr_running(p, rq);
678} 734}
679 735
680static int recalc_task_prio(task_t *p, unsigned long long now) 736static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -759,7 +815,8 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
759 } 815 }
760#endif 816#endif
761 817
762 p->prio = recalc_task_prio(p, now); 818 if (!rt_task(p))
819 p->prio = recalc_task_prio(p, now);
763 820
764 /* 821 /*
765 * This checks to make sure it's not an uninterruptible task 822 * This checks to make sure it's not an uninterruptible task
@@ -793,7 +850,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
793 */ 850 */
794static void deactivate_task(struct task_struct *p, runqueue_t *rq) 851static void deactivate_task(struct task_struct *p, runqueue_t *rq)
795{ 852{
796 rq->nr_running--; 853 dec_nr_running(p, rq);
797 dequeue_task(p, p->array); 854 dequeue_task(p, p->array);
798 p->array = NULL; 855 p->array = NULL;
799} 856}
@@ -808,21 +865,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
808#ifdef CONFIG_SMP 865#ifdef CONFIG_SMP
809static void resched_task(task_t *p) 866static void resched_task(task_t *p)
810{ 867{
811 int need_resched, nrpolling; 868 int cpu;
812 869
813 assert_spin_locked(&task_rq(p)->lock); 870 assert_spin_locked(&task_rq(p)->lock);
814 871
815 /* minimise the chance of sending an interrupt to poll_idle() */ 872 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
816 nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); 873 return;
817 need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); 874
818 nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); 875 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
876
877 cpu = task_cpu(p);
878 if (cpu == smp_processor_id())
879 return;
819 880
820 if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) 881 /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
821 smp_send_reschedule(task_cpu(p)); 882 smp_mb();
883 if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
884 smp_send_reschedule(cpu);
822} 885}
823#else 886#else
824static inline void resched_task(task_t *p) 887static inline void resched_task(task_t *p)
825{ 888{
889 assert_spin_locked(&task_rq(p)->lock);
826 set_tsk_need_resched(p); 890 set_tsk_need_resched(p);
827} 891}
828#endif 892#endif
@@ -930,27 +994,61 @@ void kick_process(task_t *p)
930 * We want to under-estimate the load of migration sources, to 994 * We want to under-estimate the load of migration sources, to
931 * balance conservatively. 995 * balance conservatively.
932 */ 996 */
933static inline unsigned long source_load(int cpu, int type) 997static inline unsigned long __source_load(int cpu, int type, enum idle_type idle)
934{ 998{
935 runqueue_t *rq = cpu_rq(cpu); 999 runqueue_t *rq = cpu_rq(cpu);
936 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; 1000 unsigned long running = rq->nr_running;
1001 unsigned long source_load, cpu_load = rq->cpu_load[type-1],
1002 load_now = running * SCHED_LOAD_SCALE;
1003
937 if (type == 0) 1004 if (type == 0)
938 return load_now; 1005 source_load = load_now;
1006 else
1007 source_load = min(cpu_load, load_now);
939 1008
940 return min(rq->cpu_load[type-1], load_now); 1009 if (running > 1 || (idle == NOT_IDLE && running))
1010 /*
1011 * If we are busy rebalancing the load is biased by
1012 * priority to create 'nice' support across cpus. When
1013 * idle rebalancing we should only bias the source_load if
1014 * there is more than one task running on that queue to
1015 * prevent idle rebalance from trying to pull tasks from a
1016 * queue with only one running task.
1017 */
1018 source_load = source_load * rq->prio_bias / running;
1019
1020 return source_load;
1021}
1022
1023static inline unsigned long source_load(int cpu, int type)
1024{
1025 return __source_load(cpu, type, NOT_IDLE);
941} 1026}
942 1027
943/* 1028/*
944 * Return a high guess at the load of a migration-target cpu 1029 * Return a high guess at the load of a migration-target cpu
945 */ 1030 */
946static inline unsigned long target_load(int cpu, int type) 1031static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
947{ 1032{
948 runqueue_t *rq = cpu_rq(cpu); 1033 runqueue_t *rq = cpu_rq(cpu);
949 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; 1034 unsigned long running = rq->nr_running;
1035 unsigned long target_load, cpu_load = rq->cpu_load[type-1],
1036 load_now = running * SCHED_LOAD_SCALE;
1037
950 if (type == 0) 1038 if (type == 0)
951 return load_now; 1039 target_load = load_now;
1040 else
1041 target_load = max(cpu_load, load_now);
1042
1043 if (running > 1 || (idle == NOT_IDLE && running))
1044 target_load = target_load * rq->prio_bias / running;
952 1045
953 return max(rq->cpu_load[type-1], load_now); 1046 return target_load;
1047}
1048
1049static inline unsigned long target_load(int cpu, int type)
1050{
1051 return __target_load(cpu, type, NOT_IDLE);
954} 1052}
955 1053
956/* 1054/*
@@ -1411,7 +1509,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1411 list_add_tail(&p->run_list, &current->run_list); 1509 list_add_tail(&p->run_list, &current->run_list);
1412 p->array = current->array; 1510 p->array = current->array;
1413 p->array->nr_active++; 1511 p->array->nr_active++;
1414 rq->nr_running++; 1512 inc_nr_running(p, rq);
1415 } 1513 }
1416 set_need_resched(); 1514 set_need_resched();
1417 } else 1515 } else
@@ -1756,9 +1854,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1756 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1854 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1757{ 1855{
1758 dequeue_task(p, src_array); 1856 dequeue_task(p, src_array);
1759 src_rq->nr_running--; 1857 dec_nr_running(p, src_rq);
1760 set_task_cpu(p, this_cpu); 1858 set_task_cpu(p, this_cpu);
1761 this_rq->nr_running++; 1859 inc_nr_running(p, this_rq);
1762 enqueue_task(p, this_array); 1860 enqueue_task(p, this_array);
1763 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1861 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1764 + this_rq->timestamp_last_tick; 1862 + this_rq->timestamp_last_tick;
@@ -1937,9 +2035,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1937 2035
1938 /* Bias balancing toward cpus of our domain */ 2036 /* Bias balancing toward cpus of our domain */
1939 if (local_group) 2037 if (local_group)
1940 load = target_load(i, load_idx); 2038 load = __target_load(i, load_idx, idle);
1941 else 2039 else
1942 load = source_load(i, load_idx); 2040 load = __source_load(i, load_idx, idle);
1943 2041
1944 avg_load += load; 2042 avg_load += load;
1945 } 2043 }
@@ -2044,14 +2142,15 @@ out_balanced:
2044/* 2142/*
2045 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2143 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2046 */ 2144 */
2047static runqueue_t *find_busiest_queue(struct sched_group *group) 2145static runqueue_t *find_busiest_queue(struct sched_group *group,
2146 enum idle_type idle)
2048{ 2147{
2049 unsigned long load, max_load = 0; 2148 unsigned long load, max_load = 0;
2050 runqueue_t *busiest = NULL; 2149 runqueue_t *busiest = NULL;
2051 int i; 2150 int i;
2052 2151
2053 for_each_cpu_mask(i, group->cpumask) { 2152 for_each_cpu_mask(i, group->cpumask) {
2054 load = source_load(i, 0); 2153 load = __source_load(i, 0, idle);
2055 2154
2056 if (load > max_load) { 2155 if (load > max_load) {
2057 max_load = load; 2156 max_load = load;
@@ -2095,7 +2194,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2095 goto out_balanced; 2194 goto out_balanced;
2096 } 2195 }
2097 2196
2098 busiest = find_busiest_queue(group); 2197 busiest = find_busiest_queue(group, idle);
2099 if (!busiest) { 2198 if (!busiest) {
2100 schedstat_inc(sd, lb_nobusyq[idle]); 2199 schedstat_inc(sd, lb_nobusyq[idle]);
2101 goto out_balanced; 2200 goto out_balanced;
@@ -2218,7 +2317,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2218 goto out_balanced; 2317 goto out_balanced;
2219 } 2318 }
2220 2319
2221 busiest = find_busiest_queue(group); 2320 busiest = find_busiest_queue(group, NEWLY_IDLE);
2222 if (!busiest) { 2321 if (!busiest) {
2223 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); 2322 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
2224 goto out_balanced; 2323 goto out_balanced;
@@ -3451,8 +3550,10 @@ void set_user_nice(task_t *p, long nice)
3451 goto out_unlock; 3550 goto out_unlock;
3452 } 3551 }
3453 array = p->array; 3552 array = p->array;
3454 if (array) 3553 if (array) {
3455 dequeue_task(p, array); 3554 dequeue_task(p, array);
3555 dec_prio_bias(rq, p->static_prio);
3556 }
3456 3557
3457 old_prio = p->prio; 3558 old_prio = p->prio;
3458 new_prio = NICE_TO_PRIO(nice); 3559 new_prio = NICE_TO_PRIO(nice);
@@ -3462,6 +3563,7 @@ void set_user_nice(task_t *p, long nice)
3462 3563
3463 if (array) { 3564 if (array) {
3464 enqueue_task(p, array); 3565 enqueue_task(p, array);
3566 inc_prio_bias(rq, p->static_prio);
3465 /* 3567 /*
3466 * If the task increased its priority or is running and 3568 * If the task increased its priority or is running and
3467 * lowered its priority, then reschedule its CPU: 3569 * lowered its priority, then reschedule its CPU: