diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 162 |
1 files changed, 34 insertions, 128 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index bc38804e40dd..4d46e90f59c3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p) | |||
178 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ | 178 | #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ |
179 | < (long long) (sd)->cache_hot_time) | 179 | < (long long) (sd)->cache_hot_time) |
180 | 180 | ||
181 | void __put_task_struct_cb(struct rcu_head *rhp) | ||
182 | { | ||
183 | __put_task_struct(container_of(rhp, struct task_struct, rcu)); | ||
184 | } | ||
185 | |||
186 | EXPORT_SYMBOL_GPL(__put_task_struct_cb); | ||
187 | |||
188 | /* | 181 | /* |
189 | * These are the runqueue data structures: | 182 | * These are the runqueue data structures: |
190 | */ | 183 | */ |
@@ -215,7 +208,6 @@ struct runqueue { | |||
215 | */ | 208 | */ |
216 | unsigned long nr_running; | 209 | unsigned long nr_running; |
217 | #ifdef CONFIG_SMP | 210 | #ifdef CONFIG_SMP |
218 | unsigned long prio_bias; | ||
219 | unsigned long cpu_load[3]; | 211 | unsigned long cpu_load[3]; |
220 | #endif | 212 | #endif |
221 | unsigned long long nr_switches; | 213 | unsigned long long nr_switches; |
@@ -669,68 +661,13 @@ static int effective_prio(task_t *p) | |||
669 | return prio; | 661 | return prio; |
670 | } | 662 | } |
671 | 663 | ||
672 | #ifdef CONFIG_SMP | ||
673 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
674 | { | ||
675 | rq->prio_bias += MAX_PRIO - prio; | ||
676 | } | ||
677 | |||
678 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
679 | { | ||
680 | rq->prio_bias -= MAX_PRIO - prio; | ||
681 | } | ||
682 | |||
683 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
684 | { | ||
685 | rq->nr_running++; | ||
686 | if (rt_task(p)) { | ||
687 | if (p != rq->migration_thread) | ||
688 | /* | ||
689 | * The migration thread does the actual balancing. Do | ||
690 | * not bias by its priority as the ultra high priority | ||
691 | * will skew balancing adversely. | ||
692 | */ | ||
693 | inc_prio_bias(rq, p->prio); | ||
694 | } else | ||
695 | inc_prio_bias(rq, p->static_prio); | ||
696 | } | ||
697 | |||
698 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
699 | { | ||
700 | rq->nr_running--; | ||
701 | if (rt_task(p)) { | ||
702 | if (p != rq->migration_thread) | ||
703 | dec_prio_bias(rq, p->prio); | ||
704 | } else | ||
705 | dec_prio_bias(rq, p->static_prio); | ||
706 | } | ||
707 | #else | ||
708 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
709 | { | ||
710 | } | ||
711 | |||
712 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
713 | { | ||
714 | } | ||
715 | |||
716 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
717 | { | ||
718 | rq->nr_running++; | ||
719 | } | ||
720 | |||
721 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
722 | { | ||
723 | rq->nr_running--; | ||
724 | } | ||
725 | #endif | ||
726 | |||
727 | /* | 664 | /* |
728 | * __activate_task - move a task to the runqueue. | 665 | * __activate_task - move a task to the runqueue. |
729 | */ | 666 | */ |
730 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 667 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
731 | { | 668 | { |
732 | enqueue_task(p, rq->active); | 669 | enqueue_task(p, rq->active); |
733 | inc_nr_running(p, rq); | 670 | rq->nr_running++; |
734 | } | 671 | } |
735 | 672 | ||
736 | /* | 673 | /* |
@@ -739,7 +676,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
739 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 676 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
740 | { | 677 | { |
741 | enqueue_task_head(p, rq->active); | 678 | enqueue_task_head(p, rq->active); |
742 | inc_nr_running(p, rq); | 679 | rq->nr_running++; |
743 | } | 680 | } |
744 | 681 | ||
745 | static int recalc_task_prio(task_t *p, unsigned long long now) | 682 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -863,7 +800,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
863 | */ | 800 | */ |
864 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 801 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
865 | { | 802 | { |
866 | dec_nr_running(p, rq); | 803 | rq->nr_running--; |
867 | dequeue_task(p, p->array); | 804 | dequeue_task(p, p->array); |
868 | p->array = NULL; | 805 | p->array = NULL; |
869 | } | 806 | } |
@@ -1007,61 +944,27 @@ void kick_process(task_t *p) | |||
1007 | * We want to under-estimate the load of migration sources, to | 944 | * We want to under-estimate the load of migration sources, to |
1008 | * balance conservatively. | 945 | * balance conservatively. |
1009 | */ | 946 | */ |
1010 | static unsigned long __source_load(int cpu, int type, enum idle_type idle) | 947 | static inline unsigned long source_load(int cpu, int type) |
1011 | { | 948 | { |
1012 | runqueue_t *rq = cpu_rq(cpu); | 949 | runqueue_t *rq = cpu_rq(cpu); |
1013 | unsigned long running = rq->nr_running; | 950 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1014 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1015 | load_now = running * SCHED_LOAD_SCALE; | ||
1016 | |||
1017 | if (type == 0) | 951 | if (type == 0) |
1018 | source_load = load_now; | 952 | return load_now; |
1019 | else | ||
1020 | source_load = min(cpu_load, load_now); | ||
1021 | |||
1022 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1023 | /* | ||
1024 | * If we are busy rebalancing the load is biased by | ||
1025 | * priority to create 'nice' support across cpus. When | ||
1026 | * idle rebalancing we should only bias the source_load if | ||
1027 | * there is more than one task running on that queue to | ||
1028 | * prevent idle rebalance from trying to pull tasks from a | ||
1029 | * queue with only one running task. | ||
1030 | */ | ||
1031 | source_load = source_load * rq->prio_bias / running; | ||
1032 | 953 | ||
1033 | return source_load; | 954 | return min(rq->cpu_load[type-1], load_now); |
1034 | } | ||
1035 | |||
1036 | static inline unsigned long source_load(int cpu, int type) | ||
1037 | { | ||
1038 | return __source_load(cpu, type, NOT_IDLE); | ||
1039 | } | 955 | } |
1040 | 956 | ||
1041 | /* | 957 | /* |
1042 | * Return a high guess at the load of a migration-target cpu | 958 | * Return a high guess at the load of a migration-target cpu |
1043 | */ | 959 | */ |
1044 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) | 960 | static inline unsigned long target_load(int cpu, int type) |
1045 | { | 961 | { |
1046 | runqueue_t *rq = cpu_rq(cpu); | 962 | runqueue_t *rq = cpu_rq(cpu); |
1047 | unsigned long running = rq->nr_running; | 963 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1048 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1049 | load_now = running * SCHED_LOAD_SCALE; | ||
1050 | |||
1051 | if (type == 0) | 964 | if (type == 0) |
1052 | target_load = load_now; | 965 | return load_now; |
1053 | else | ||
1054 | target_load = max(cpu_load, load_now); | ||
1055 | |||
1056 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1057 | target_load = target_load * rq->prio_bias / running; | ||
1058 | 966 | ||
1059 | return target_load; | 967 | return max(rq->cpu_load[type-1], load_now); |
1060 | } | ||
1061 | |||
1062 | static inline unsigned long target_load(int cpu, int type) | ||
1063 | { | ||
1064 | return __target_load(cpu, type, NOT_IDLE); | ||
1065 | } | 968 | } |
1066 | 969 | ||
1067 | /* | 970 | /* |
@@ -1294,9 +1197,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync) | |||
1294 | } | 1197 | } |
1295 | } | 1198 | } |
1296 | 1199 | ||
1297 | if (p->last_waker_cpu != this_cpu) | ||
1298 | goto out_set_cpu; | ||
1299 | |||
1300 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1200 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1301 | goto out_set_cpu; | 1201 | goto out_set_cpu; |
1302 | 1202 | ||
@@ -1367,8 +1267,6 @@ out_set_cpu: | |||
1367 | cpu = task_cpu(p); | 1267 | cpu = task_cpu(p); |
1368 | } | 1268 | } |
1369 | 1269 | ||
1370 | p->last_waker_cpu = this_cpu; | ||
1371 | |||
1372 | out_activate: | 1270 | out_activate: |
1373 | #endif /* CONFIG_SMP */ | 1271 | #endif /* CONFIG_SMP */ |
1374 | if (old_state == TASK_UNINTERRUPTIBLE) { | 1272 | if (old_state == TASK_UNINTERRUPTIBLE) { |
@@ -1450,12 +1348,9 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
1450 | #ifdef CONFIG_SCHEDSTATS | 1348 | #ifdef CONFIG_SCHEDSTATS |
1451 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1349 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
1452 | #endif | 1350 | #endif |
1453 | #if defined(CONFIG_SMP) | 1351 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1454 | p->last_waker_cpu = cpu; | ||
1455 | #if defined(__ARCH_WANT_UNLOCKED_CTXSW) | ||
1456 | p->oncpu = 0; | 1352 | p->oncpu = 0; |
1457 | #endif | 1353 | #endif |
1458 | #endif | ||
1459 | #ifdef CONFIG_PREEMPT | 1354 | #ifdef CONFIG_PREEMPT |
1460 | /* Want to start with kernel preemption disabled. */ | 1355 | /* Want to start with kernel preemption disabled. */ |
1461 | task_thread_info(p)->preempt_count = 1; | 1356 | task_thread_info(p)->preempt_count = 1; |
@@ -1530,7 +1425,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1530 | list_add_tail(&p->run_list, ¤t->run_list); | 1425 | list_add_tail(&p->run_list, ¤t->run_list); |
1531 | p->array = current->array; | 1426 | p->array = current->array; |
1532 | p->array->nr_active++; | 1427 | p->array->nr_active++; |
1533 | inc_nr_running(p, rq); | 1428 | rq->nr_running++; |
1534 | } | 1429 | } |
1535 | set_need_resched(); | 1430 | set_need_resched(); |
1536 | } else | 1431 | } else |
@@ -1875,9 +1770,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1875 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1770 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1876 | { | 1771 | { |
1877 | dequeue_task(p, src_array); | 1772 | dequeue_task(p, src_array); |
1878 | dec_nr_running(p, src_rq); | 1773 | src_rq->nr_running--; |
1879 | set_task_cpu(p, this_cpu); | 1774 | set_task_cpu(p, this_cpu); |
1880 | inc_nr_running(p, this_rq); | 1775 | this_rq->nr_running++; |
1881 | enqueue_task(p, this_array); | 1776 | enqueue_task(p, this_array); |
1882 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1777 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1883 | + this_rq->timestamp_last_tick; | 1778 | + this_rq->timestamp_last_tick; |
@@ -2056,9 +1951,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2056 | 1951 | ||
2057 | /* Bias balancing toward cpus of our domain */ | 1952 | /* Bias balancing toward cpus of our domain */ |
2058 | if (local_group) | 1953 | if (local_group) |
2059 | load = __target_load(i, load_idx, idle); | 1954 | load = target_load(i, load_idx); |
2060 | else | 1955 | else |
2061 | load = __source_load(i, load_idx, idle); | 1956 | load = source_load(i, load_idx); |
2062 | 1957 | ||
2063 | avg_load += load; | 1958 | avg_load += load; |
2064 | } | 1959 | } |
@@ -2171,7 +2066,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group, | |||
2171 | int i; | 2066 | int i; |
2172 | 2067 | ||
2173 | for_each_cpu_mask(i, group->cpumask) { | 2068 | for_each_cpu_mask(i, group->cpumask) { |
2174 | load = __source_load(i, 0, idle); | 2069 | load = source_load(i, 0); |
2175 | 2070 | ||
2176 | if (load > max_load) { | 2071 | if (load > max_load) { |
2177 | max_load = load; | 2072 | max_load = load; |
@@ -3571,10 +3466,8 @@ void set_user_nice(task_t *p, long nice) | |||
3571 | goto out_unlock; | 3466 | goto out_unlock; |
3572 | } | 3467 | } |
3573 | array = p->array; | 3468 | array = p->array; |
3574 | if (array) { | 3469 | if (array) |
3575 | dequeue_task(p, array); | 3470 | dequeue_task(p, array); |
3576 | dec_prio_bias(rq, p->static_prio); | ||
3577 | } | ||
3578 | 3471 | ||
3579 | old_prio = p->prio; | 3472 | old_prio = p->prio; |
3580 | new_prio = NICE_TO_PRIO(nice); | 3473 | new_prio = NICE_TO_PRIO(nice); |
@@ -3584,7 +3477,6 @@ void set_user_nice(task_t *p, long nice) | |||
3584 | 3477 | ||
3585 | if (array) { | 3478 | if (array) { |
3586 | enqueue_task(p, array); | 3479 | enqueue_task(p, array); |
3587 | inc_prio_bias(rq, p->static_prio); | ||
3588 | /* | 3480 | /* |
3589 | * If the task increased its priority or is running and | 3481 | * If the task increased its priority or is running and |
3590 | * lowered its priority, then reschedule its CPU: | 3482 | * lowered its priority, then reschedule its CPU: |
@@ -4129,6 +4021,8 @@ static inline void __cond_resched(void) | |||
4129 | */ | 4021 | */ |
4130 | if (unlikely(preempt_count())) | 4022 | if (unlikely(preempt_count())) |
4131 | return; | 4023 | return; |
4024 | if (unlikely(system_state != SYSTEM_RUNNING)) | ||
4025 | return; | ||
4132 | do { | 4026 | do { |
4133 | add_preempt_count(PREEMPT_ACTIVE); | 4027 | add_preempt_count(PREEMPT_ACTIVE); |
4134 | schedule(); | 4028 | schedule(); |
@@ -4434,6 +4328,7 @@ void __devinit init_idle(task_t *idle, int cpu) | |||
4434 | runqueue_t *rq = cpu_rq(cpu); | 4328 | runqueue_t *rq = cpu_rq(cpu); |
4435 | unsigned long flags; | 4329 | unsigned long flags; |
4436 | 4330 | ||
4331 | idle->timestamp = sched_clock(); | ||
4437 | idle->sleep_avg = 0; | 4332 | idle->sleep_avg = 0; |
4438 | idle->array = NULL; | 4333 | idle->array = NULL; |
4439 | idle->prio = MAX_PRIO; | 4334 | idle->prio = MAX_PRIO; |
@@ -5159,7 +5054,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, | |||
5159 | #define MAX_DOMAIN_DISTANCE 32 | 5054 | #define MAX_DOMAIN_DISTANCE 32 |
5160 | 5055 | ||
5161 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = | 5056 | static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = |
5162 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; | 5057 | { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = |
5058 | /* | ||
5059 | * Architectures may override the migration cost and thus avoid | ||
5060 | * boot-time calibration. Unit is nanoseconds. Mostly useful for | ||
5061 | * virtualized hardware: | ||
5062 | */ | ||
5063 | #ifdef CONFIG_DEFAULT_MIGRATION_COST | ||
5064 | CONFIG_DEFAULT_MIGRATION_COST | ||
5065 | #else | ||
5066 | -1LL | ||
5067 | #endif | ||
5068 | }; | ||
5163 | 5069 | ||
5164 | /* | 5070 | /* |
5165 | * Allow override of migration cost - in units of microseconds. | 5071 | * Allow override of migration cost - in units of microseconds. |