aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c162
1 files changed, 34 insertions, 128 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index bc38804e40dd..4d46e90f59c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p)
178#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ 178#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
179 < (long long) (sd)->cache_hot_time) 179 < (long long) (sd)->cache_hot_time)
180 180
181void __put_task_struct_cb(struct rcu_head *rhp)
182{
183 __put_task_struct(container_of(rhp, struct task_struct, rcu));
184}
185
186EXPORT_SYMBOL_GPL(__put_task_struct_cb);
187
188/* 181/*
189 * These are the runqueue data structures: 182 * These are the runqueue data structures:
190 */ 183 */
@@ -215,7 +208,6 @@ struct runqueue {
215 */ 208 */
216 unsigned long nr_running; 209 unsigned long nr_running;
217#ifdef CONFIG_SMP 210#ifdef CONFIG_SMP
218 unsigned long prio_bias;
219 unsigned long cpu_load[3]; 211 unsigned long cpu_load[3];
220#endif 212#endif
221 unsigned long long nr_switches; 213 unsigned long long nr_switches;
@@ -669,68 +661,13 @@ static int effective_prio(task_t *p)
669 return prio; 661 return prio;
670} 662}
671 663
672#ifdef CONFIG_SMP
673static inline void inc_prio_bias(runqueue_t *rq, int prio)
674{
675 rq->prio_bias += MAX_PRIO - prio;
676}
677
678static inline void dec_prio_bias(runqueue_t *rq, int prio)
679{
680 rq->prio_bias -= MAX_PRIO - prio;
681}
682
683static inline void inc_nr_running(task_t *p, runqueue_t *rq)
684{
685 rq->nr_running++;
686 if (rt_task(p)) {
687 if (p != rq->migration_thread)
688 /*
689 * The migration thread does the actual balancing. Do
690 * not bias by its priority as the ultra high priority
691 * will skew balancing adversely.
692 */
693 inc_prio_bias(rq, p->prio);
694 } else
695 inc_prio_bias(rq, p->static_prio);
696}
697
698static inline void dec_nr_running(task_t *p, runqueue_t *rq)
699{
700 rq->nr_running--;
701 if (rt_task(p)) {
702 if (p != rq->migration_thread)
703 dec_prio_bias(rq, p->prio);
704 } else
705 dec_prio_bias(rq, p->static_prio);
706}
707#else
708static inline void inc_prio_bias(runqueue_t *rq, int prio)
709{
710}
711
712static inline void dec_prio_bias(runqueue_t *rq, int prio)
713{
714}
715
716static inline void inc_nr_running(task_t *p, runqueue_t *rq)
717{
718 rq->nr_running++;
719}
720
721static inline void dec_nr_running(task_t *p, runqueue_t *rq)
722{
723 rq->nr_running--;
724}
725#endif
726
727/* 664/*
728 * __activate_task - move a task to the runqueue. 665 * __activate_task - move a task to the runqueue.
729 */ 666 */
730static inline void __activate_task(task_t *p, runqueue_t *rq) 667static inline void __activate_task(task_t *p, runqueue_t *rq)
731{ 668{
732 enqueue_task(p, rq->active); 669 enqueue_task(p, rq->active);
733 inc_nr_running(p, rq); 670 rq->nr_running++;
734} 671}
735 672
736/* 673/*
@@ -739,7 +676,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
739static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 676static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
740{ 677{
741 enqueue_task_head(p, rq->active); 678 enqueue_task_head(p, rq->active);
742 inc_nr_running(p, rq); 679 rq->nr_running++;
743} 680}
744 681
745static int recalc_task_prio(task_t *p, unsigned long long now) 682static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -863,7 +800,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
863 */ 800 */
864static void deactivate_task(struct task_struct *p, runqueue_t *rq) 801static void deactivate_task(struct task_struct *p, runqueue_t *rq)
865{ 802{
866 dec_nr_running(p, rq); 803 rq->nr_running--;
867 dequeue_task(p, p->array); 804 dequeue_task(p, p->array);
868 p->array = NULL; 805 p->array = NULL;
869} 806}
@@ -1007,61 +944,27 @@ void kick_process(task_t *p)
1007 * We want to under-estimate the load of migration sources, to 944 * We want to under-estimate the load of migration sources, to
1008 * balance conservatively. 945 * balance conservatively.
1009 */ 946 */
1010static unsigned long __source_load(int cpu, int type, enum idle_type idle) 947static inline unsigned long source_load(int cpu, int type)
1011{ 948{
1012 runqueue_t *rq = cpu_rq(cpu); 949 runqueue_t *rq = cpu_rq(cpu);
1013 unsigned long running = rq->nr_running; 950 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1014 unsigned long source_load, cpu_load = rq->cpu_load[type-1],
1015 load_now = running * SCHED_LOAD_SCALE;
1016
1017 if (type == 0) 951 if (type == 0)
1018 source_load = load_now; 952 return load_now;
1019 else
1020 source_load = min(cpu_load, load_now);
1021
1022 if (running > 1 || (idle == NOT_IDLE && running))
1023 /*
1024 * If we are busy rebalancing the load is biased by
1025 * priority to create 'nice' support across cpus. When
1026 * idle rebalancing we should only bias the source_load if
1027 * there is more than one task running on that queue to
1028 * prevent idle rebalance from trying to pull tasks from a
1029 * queue with only one running task.
1030 */
1031 source_load = source_load * rq->prio_bias / running;
1032 953
1033 return source_load; 954 return min(rq->cpu_load[type-1], load_now);
1034}
1035
1036static inline unsigned long source_load(int cpu, int type)
1037{
1038 return __source_load(cpu, type, NOT_IDLE);
1039} 955}
1040 956
1041/* 957/*
1042 * Return a high guess at the load of a migration-target cpu 958 * Return a high guess at the load of a migration-target cpu
1043 */ 959 */
1044static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) 960static inline unsigned long target_load(int cpu, int type)
1045{ 961{
1046 runqueue_t *rq = cpu_rq(cpu); 962 runqueue_t *rq = cpu_rq(cpu);
1047 unsigned long running = rq->nr_running; 963 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1048 unsigned long target_load, cpu_load = rq->cpu_load[type-1],
1049 load_now = running * SCHED_LOAD_SCALE;
1050
1051 if (type == 0) 964 if (type == 0)
1052 target_load = load_now; 965 return load_now;
1053 else
1054 target_load = max(cpu_load, load_now);
1055
1056 if (running > 1 || (idle == NOT_IDLE && running))
1057 target_load = target_load * rq->prio_bias / running;
1058 966
1059 return target_load; 967 return max(rq->cpu_load[type-1], load_now);
1060}
1061
1062static inline unsigned long target_load(int cpu, int type)
1063{
1064 return __target_load(cpu, type, NOT_IDLE);
1065} 968}
1066 969
1067/* 970/*
@@ -1294,9 +1197,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
1294 } 1197 }
1295 } 1198 }
1296 1199
1297 if (p->last_waker_cpu != this_cpu)
1298 goto out_set_cpu;
1299
1300 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1200 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
1301 goto out_set_cpu; 1201 goto out_set_cpu;
1302 1202
@@ -1367,8 +1267,6 @@ out_set_cpu:
1367 cpu = task_cpu(p); 1267 cpu = task_cpu(p);
1368 } 1268 }
1369 1269
1370 p->last_waker_cpu = this_cpu;
1371
1372out_activate: 1270out_activate:
1373#endif /* CONFIG_SMP */ 1271#endif /* CONFIG_SMP */
1374 if (old_state == TASK_UNINTERRUPTIBLE) { 1272 if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1450,12 +1348,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1450#ifdef CONFIG_SCHEDSTATS 1348#ifdef CONFIG_SCHEDSTATS
1451 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1349 memset(&p->sched_info, 0, sizeof(p->sched_info));
1452#endif 1350#endif
1453#if defined(CONFIG_SMP) 1351#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1454 p->last_waker_cpu = cpu;
1455#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
1456 p->oncpu = 0; 1352 p->oncpu = 0;
1457#endif 1353#endif
1458#endif
1459#ifdef CONFIG_PREEMPT 1354#ifdef CONFIG_PREEMPT
1460 /* Want to start with kernel preemption disabled. */ 1355 /* Want to start with kernel preemption disabled. */
1461 task_thread_info(p)->preempt_count = 1; 1356 task_thread_info(p)->preempt_count = 1;
@@ -1530,7 +1425,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1530 list_add_tail(&p->run_list, &current->run_list); 1425 list_add_tail(&p->run_list, &current->run_list);
1531 p->array = current->array; 1426 p->array = current->array;
1532 p->array->nr_active++; 1427 p->array->nr_active++;
1533 inc_nr_running(p, rq); 1428 rq->nr_running++;
1534 } 1429 }
1535 set_need_resched(); 1430 set_need_resched();
1536 } else 1431 } else
@@ -1875,9 +1770,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1875 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1770 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1876{ 1771{
1877 dequeue_task(p, src_array); 1772 dequeue_task(p, src_array);
1878 dec_nr_running(p, src_rq); 1773 src_rq->nr_running--;
1879 set_task_cpu(p, this_cpu); 1774 set_task_cpu(p, this_cpu);
1880 inc_nr_running(p, this_rq); 1775 this_rq->nr_running++;
1881 enqueue_task(p, this_array); 1776 enqueue_task(p, this_array);
1882 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1777 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1883 + this_rq->timestamp_last_tick; 1778 + this_rq->timestamp_last_tick;
@@ -2056,9 +1951,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2056 1951
2057 /* Bias balancing toward cpus of our domain */ 1952 /* Bias balancing toward cpus of our domain */
2058 if (local_group) 1953 if (local_group)
2059 load = __target_load(i, load_idx, idle); 1954 load = target_load(i, load_idx);
2060 else 1955 else
2061 load = __source_load(i, load_idx, idle); 1956 load = source_load(i, load_idx);
2062 1957
2063 avg_load += load; 1958 avg_load += load;
2064 } 1959 }
@@ -2171,7 +2066,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2171 int i; 2066 int i;
2172 2067
2173 for_each_cpu_mask(i, group->cpumask) { 2068 for_each_cpu_mask(i, group->cpumask) {
2174 load = __source_load(i, 0, idle); 2069 load = source_load(i, 0);
2175 2070
2176 if (load > max_load) { 2071 if (load > max_load) {
2177 max_load = load; 2072 max_load = load;
@@ -3571,10 +3466,8 @@ void set_user_nice(task_t *p, long nice)
3571 goto out_unlock; 3466 goto out_unlock;
3572 } 3467 }
3573 array = p->array; 3468 array = p->array;
3574 if (array) { 3469 if (array)
3575 dequeue_task(p, array); 3470 dequeue_task(p, array);
3576 dec_prio_bias(rq, p->static_prio);
3577 }
3578 3471
3579 old_prio = p->prio; 3472 old_prio = p->prio;
3580 new_prio = NICE_TO_PRIO(nice); 3473 new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3477,6 @@ void set_user_nice(task_t *p, long nice)
3584 3477
3585 if (array) { 3478 if (array) {
3586 enqueue_task(p, array); 3479 enqueue_task(p, array);
3587 inc_prio_bias(rq, p->static_prio);
3588 /* 3480 /*
3589 * If the task increased its priority or is running and 3481 * If the task increased its priority or is running and
3590 * lowered its priority, then reschedule its CPU: 3482 * lowered its priority, then reschedule its CPU:
@@ -4129,6 +4021,8 @@ static inline void __cond_resched(void)
4129 */ 4021 */
4130 if (unlikely(preempt_count())) 4022 if (unlikely(preempt_count()))
4131 return; 4023 return;
4024 if (unlikely(system_state != SYSTEM_RUNNING))
4025 return;
4132 do { 4026 do {
4133 add_preempt_count(PREEMPT_ACTIVE); 4027 add_preempt_count(PREEMPT_ACTIVE);
4134 schedule(); 4028 schedule();
@@ -4434,6 +4328,7 @@ void __devinit init_idle(task_t *idle, int cpu)
4434 runqueue_t *rq = cpu_rq(cpu); 4328 runqueue_t *rq = cpu_rq(cpu);
4435 unsigned long flags; 4329 unsigned long flags;
4436 4330
4331 idle->timestamp = sched_clock();
4437 idle->sleep_avg = 0; 4332 idle->sleep_avg = 0;
4438 idle->array = NULL; 4333 idle->array = NULL;
4439 idle->prio = MAX_PRIO; 4334 idle->prio = MAX_PRIO;
@@ -5159,7 +5054,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
5159#define MAX_DOMAIN_DISTANCE 32 5054#define MAX_DOMAIN_DISTANCE 32
5160 5055
5161static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] = 5056static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
5162 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL }; 5057 { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
5058/*
5059 * Architectures may override the migration cost and thus avoid
5060 * boot-time calibration. Unit is nanoseconds. Mostly useful for
5061 * virtualized hardware:
5062 */
5063#ifdef CONFIG_DEFAULT_MIGRATION_COST
5064 CONFIG_DEFAULT_MIGRATION_COST
5065#else
5066 -1LL
5067#endif
5068};
5163 5069
5164/* 5070/*
5165 * Allow override of migration cost - in units of microseconds. 5071 * Allow override of migration cost - in units of microseconds.