diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 152 |
1 files changed, 31 insertions, 121 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index bc38804e40dd..12d291bf3379 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -215,7 +215,6 @@ struct runqueue { | |||
215 | */ | 215 | */ |
216 | unsigned long nr_running; | 216 | unsigned long nr_running; |
217 | #ifdef CONFIG_SMP | 217 | #ifdef CONFIG_SMP |
218 | unsigned long prio_bias; | ||
219 | unsigned long cpu_load[3]; | 218 | unsigned long cpu_load[3]; |
220 | #endif | 219 | #endif |
221 | unsigned long long nr_switches; | 220 | unsigned long long nr_switches; |
@@ -669,68 +668,13 @@ static int effective_prio(task_t *p) | |||
669 | return prio; | 668 | return prio; |
670 | } | 669 | } |
671 | 670 | ||
672 | #ifdef CONFIG_SMP | ||
673 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
674 | { | ||
675 | rq->prio_bias += MAX_PRIO - prio; | ||
676 | } | ||
677 | |||
678 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
679 | { | ||
680 | rq->prio_bias -= MAX_PRIO - prio; | ||
681 | } | ||
682 | |||
683 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
684 | { | ||
685 | rq->nr_running++; | ||
686 | if (rt_task(p)) { | ||
687 | if (p != rq->migration_thread) | ||
688 | /* | ||
689 | * The migration thread does the actual balancing. Do | ||
690 | * not bias by its priority as the ultra high priority | ||
691 | * will skew balancing adversely. | ||
692 | */ | ||
693 | inc_prio_bias(rq, p->prio); | ||
694 | } else | ||
695 | inc_prio_bias(rq, p->static_prio); | ||
696 | } | ||
697 | |||
698 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
699 | { | ||
700 | rq->nr_running--; | ||
701 | if (rt_task(p)) { | ||
702 | if (p != rq->migration_thread) | ||
703 | dec_prio_bias(rq, p->prio); | ||
704 | } else | ||
705 | dec_prio_bias(rq, p->static_prio); | ||
706 | } | ||
707 | #else | ||
708 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
709 | { | ||
710 | } | ||
711 | |||
712 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
713 | { | ||
714 | } | ||
715 | |||
716 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
717 | { | ||
718 | rq->nr_running++; | ||
719 | } | ||
720 | |||
721 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
722 | { | ||
723 | rq->nr_running--; | ||
724 | } | ||
725 | #endif | ||
726 | |||
727 | /* | 671 | /* |
728 | * __activate_task - move a task to the runqueue. | 672 | * __activate_task - move a task to the runqueue. |
729 | */ | 673 | */ |
730 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 674 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
731 | { | 675 | { |
732 | enqueue_task(p, rq->active); | 676 | enqueue_task(p, rq->active); |
733 | inc_nr_running(p, rq); | 677 | rq->nr_running++; |
734 | } | 678 | } |
735 | 679 | ||
736 | /* | 680 | /* |
@@ -739,7 +683,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
739 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 683 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
740 | { | 684 | { |
741 | enqueue_task_head(p, rq->active); | 685 | enqueue_task_head(p, rq->active); |
742 | inc_nr_running(p, rq); | 686 | rq->nr_running++; |
743 | } | 687 | } |
744 | 688 | ||
745 | static int recalc_task_prio(task_t *p, unsigned long long now) | 689 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -863,7 +807,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
863 | */ | 807 | */ |
864 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 808 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
865 | { | 809 | { |
866 | dec_nr_running(p, rq); | 810 | rq->nr_running--; |
867 | dequeue_task(p, p->array); | 811 | dequeue_task(p, p->array); |
868 | p->array = NULL; | 812 | p->array = NULL; |
869 | } | 813 | } |
@@ -1007,61 +951,27 @@ void kick_process(task_t *p) | |||
1007 | * We want to under-estimate the load of migration sources, to | 951 | * We want to under-estimate the load of migration sources, to |
1008 | * balance conservatively. | 952 | * balance conservatively. |
1009 | */ | 953 | */ |
1010 | static unsigned long __source_load(int cpu, int type, enum idle_type idle) | 954 | static inline unsigned long source_load(int cpu, int type) |
1011 | { | 955 | { |
1012 | runqueue_t *rq = cpu_rq(cpu); | 956 | runqueue_t *rq = cpu_rq(cpu); |
1013 | unsigned long running = rq->nr_running; | 957 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1014 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1015 | load_now = running * SCHED_LOAD_SCALE; | ||
1016 | |||
1017 | if (type == 0) | 958 | if (type == 0) |
1018 | source_load = load_now; | 959 | return load_now; |
1019 | else | ||
1020 | source_load = min(cpu_load, load_now); | ||
1021 | |||
1022 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1023 | /* | ||
1024 | * If we are busy rebalancing the load is biased by | ||
1025 | * priority to create 'nice' support across cpus. When | ||
1026 | * idle rebalancing we should only bias the source_load if | ||
1027 | * there is more than one task running on that queue to | ||
1028 | * prevent idle rebalance from trying to pull tasks from a | ||
1029 | * queue with only one running task. | ||
1030 | */ | ||
1031 | source_load = source_load * rq->prio_bias / running; | ||
1032 | |||
1033 | return source_load; | ||
1034 | } | ||
1035 | 960 | ||
1036 | static inline unsigned long source_load(int cpu, int type) | 961 | return min(rq->cpu_load[type-1], load_now); |
1037 | { | ||
1038 | return __source_load(cpu, type, NOT_IDLE); | ||
1039 | } | 962 | } |
1040 | 963 | ||
1041 | /* | 964 | /* |
1042 | * Return a high guess at the load of a migration-target cpu | 965 | * Return a high guess at the load of a migration-target cpu |
1043 | */ | 966 | */ |
1044 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) | 967 | static inline unsigned long target_load(int cpu, int type) |
1045 | { | 968 | { |
1046 | runqueue_t *rq = cpu_rq(cpu); | 969 | runqueue_t *rq = cpu_rq(cpu); |
1047 | unsigned long running = rq->nr_running; | 970 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1048 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1049 | load_now = running * SCHED_LOAD_SCALE; | ||
1050 | |||
1051 | if (type == 0) | 971 | if (type == 0) |
1052 | target_load = load_now; | 972 | return load_now; |
1053 | else | ||
1054 | target_load = max(cpu_load, load_now); | ||
1055 | |||
1056 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1057 | target_load = target_load * rq->prio_bias / running; | ||
1058 | |||
1059 | return target_load; | ||
1060 | } | ||
1061 | 973 | ||
1062 | static inline unsigned long target_load(int cpu, int type) | 974 | return max(rq->cpu_load[type-1], load_now); |
1063 | { | ||
1064 | return __target_load(cpu, type, NOT_IDLE); | ||
1065 | } | 975 | } |
1066 | 976 | ||
1067 | /* | 977 | /* |
@@ -1294,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync) | |||
1294 | } | 1204 | } |
1295 | } | 1205 | } |
1296 | 1206 | ||
1297 | if (p->last_waker_cpu != this_cpu) | ||
1298 | goto out_set_cpu; | ||
1299 | |||
1300 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1207 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1301 | goto out_set_cpu; | 1208 | goto out_set_cpu; |
1302 | 1209 | ||
@@ -1367,8 +1274,6 @@ out_set_cpu: | |||
1367 | cpu = task_cpu(p); | 1274 | cpu = task_cpu(p); |
1368 | } | 1275 | } |
1369 | 1276 | ||
1370 | p->last_waker_cpu = this_cpu; | ||
1371 | |||
1372 | out_activate: | 1277 | out_activate: |
1373 | #endif /* CONFIG_SMP */ | 1278 | #endif /* CONFIG_SMP */ |
1374 | if (old_state == TASK_UNINTERRUPTIBLE) { | 1279 | if (old_state == TASK_UNINTERRUPTIBLE) { |
@@ -1450,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
1450 | #ifdef CONFIG_SCHEDSTATS | 1355 | #ifdef CONFIG_SCHEDSTATS |
1451 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1356 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
1452 | #endif | 1357 | #endif |
1453 | #if defined(CONFIG_SMP) | 1358 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1454 | p->last_waker_cpu = cpu; | ||
1455 | #if defined(__ARCH_WANT_UNLOCKED_CTXSW) | ||
1456 | p->oncpu = 0; | 1359 | p->oncpu = 0; |
1457 | #endif | 1360 | #endif |
1458 | #endif | ||
1459 | #ifdef CONFIG_PREEMPT | 1361 | #ifdef CONFIG_PREEMPT |
1460 | /* Want to start with kernel preemption disabled. */ | 1362 | /* Want to start with kernel preemption disabled. */ |
1461 | task_thread_info(p)->preempt_count = 1; | 1363 | task_thread_info(p)->preempt_count = 1; |
@@ -1530,7 +1432,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1530 | list_add_tail(&p->run_list, ¤t->run_list); | 1432 | list_add_tail(&p->run_list, ¤t->run_list); |
1531 | p->array = current->array; | 1433 | p->array = current->array; |
1532 | p->array->nr_active++; | 1434 | p->array->nr_active++; |
1533 | inc_nr_running(p, rq); | 1435 | rq->nr_running++; |
1534 | } | 1436 | } |
1535 | set_need_resched(); | 1437 | set_need_resched(); |
1536 | } else | 1438 | } else |
@@ -1875,9 +1777,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1875 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1777 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1876 | { | 1778 | { |
1877 | dequeue_task(p, src_array); | 1779 | dequeue_task(p, src_array); |
1878 | dec_nr_running(p, src_rq); | 1780 | src_rq->nr_running--; |
1879 | set_task_cpu(p, this_cpu); | 1781 | set_task_cpu(p, this_cpu); |
1880 | inc_nr_running(p, this_rq); | 1782 | this_rq->nr_running++; |
1881 | enqueue_task(p, this_array); | 1783 | enqueue_task(p, this_array); |
1882 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1784 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1883 | + this_rq->timestamp_last_tick; | 1785 | + this_rq->timestamp_last_tick; |
@@ -2056,9 +1958,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2056 | 1958 | ||
2057 | /* Bias balancing toward cpus of our domain */ | 1959 | /* Bias balancing toward cpus of our domain */ |
2058 | if (local_group) | 1960 | if (local_group) |
2059 | load = __target_load(i, load_idx, idle); | 1961 | load = target_load(i, load_idx); |
2060 | else | 1962 | else |
2061 | load = __source_load(i, load_idx, idle); | 1963 | load = source_load(i, load_idx); |
2062 | 1964 | ||
2063 | avg_load += load; | 1965 | avg_load += load; |
2064 | } | 1966 | } |
@@ -2171,7 +2073,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group, | |||
2171 | int i; | 2073 | int i; |
2172 | 2074 | ||
2173 | for_each_cpu_mask(i, group->cpumask) { | 2075 | for_each_cpu_mask(i, group->cpumask) { |
2174 | load = __source_load(i, 0, idle); | 2076 | load = source_load(i, 0); |
2175 | 2077 | ||
2176 | if (load > max_load) { | 2078 | if (load > max_load) { |
2177 | max_load = load; | 2079 | max_load = load; |
@@ -3571,10 +3473,8 @@ void set_user_nice(task_t *p, long nice) | |||
3571 | goto out_unlock; | 3473 | goto out_unlock; |
3572 | } | 3474 | } |
3573 | array = p->array; | 3475 | array = p->array; |
3574 | if (array) { | 3476 | if (array) |
3575 | dequeue_task(p, array); | 3477 | dequeue_task(p, array); |
3576 | dec_prio_bias(rq, p->static_prio); | ||
3577 | } | ||
3578 | 3478 | ||
3579 | old_prio = p->prio; | 3479 | old_prio = p->prio; |
3580 | new_prio = NICE_TO_PRIO(nice); | 3480 | new_prio = NICE_TO_PRIO(nice); |
@@ -3584,7 +3484,6 @@ void set_user_nice(task_t *p, long nice) | |||
3584 | 3484 | ||
3585 | if (array) { | 3485 | if (array) { |
3586 | enqueue_task(p, array); | 3486 | enqueue_task(p, array); |
3587 | inc_prio_bias(rq, p->static_prio); | ||
3588 | /* | 3487 | /* |
3589 | * If the task increased its priority or is running and | 3488 | * If the task increased its priority or is running and |
3590 | * lowered its priority, then reschedule its CPU: | 3489 | * lowered its priority, then reschedule its CPU: |
@@ -5159,7 +5058,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5159 | #define MAX_DOMAIN_DISTANCE 32 | 5058 | #define MAX_DOMAIN_DISTANCE 32 |
5160 | 5059 | ||
5161 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = | 5060 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = |
5162 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; | 5061 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = |
5062 | /* | ||
5063 | * Architectures may override the migration cost and thus avoid | ||
5064 | * boot-time calibration. Unit is nanoseconds. Mostly useful for | ||
5065 | * virtualized hardware: | ||
5066 | */ | ||
5067 | #ifdef CONFIG_DEFAULT_MIGRATION_COST | ||
5068 | CONFIG_DEFAULT_MIGRATION_COST | ||
5069 | #else | ||
5070 | -1LL | ||
5071 | #endif | ||
5072 | }; | ||
5163 | 5073 | ||
5164 | /* | 5074 | /* |
5165 | * Allow override of migration cost - in units of microseconds. | 5075 | * Allow override of migration cost - in units of microseconds. |