aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c179
1 files changed, 139 insertions, 40 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1f31a528fdba..6f46c94cc29e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -206,6 +206,7 @@ struct runqueue {
206 */ 206 */
207 unsigned long nr_running; 207 unsigned long nr_running;
208#ifdef CONFIG_SMP 208#ifdef CONFIG_SMP
209 unsigned long prio_bias;
209 unsigned long cpu_load[3]; 210 unsigned long cpu_load[3];
210#endif 211#endif
211 unsigned long long nr_switches; 212 unsigned long long nr_switches;
@@ -659,13 +660,68 @@ static int effective_prio(task_t *p)
659 return prio; 660 return prio;
660} 661}
661 662
663#ifdef CONFIG_SMP
664static inline void inc_prio_bias(runqueue_t *rq, int prio)
665{
666 rq->prio_bias += MAX_PRIO - prio;
667}
668
669static inline void dec_prio_bias(runqueue_t *rq, int prio)
670{
671 rq->prio_bias -= MAX_PRIO - prio;
672}
673
674static inline void inc_nr_running(task_t *p, runqueue_t *rq)
675{
676 rq->nr_running++;
677 if (rt_task(p)) {
678 if (p != rq->migration_thread)
679 /*
680 * The migration thread does the actual balancing. Do
681 * not bias by its priority as the ultra high priority
682 * will skew balancing adversely.
683 */
684 inc_prio_bias(rq, p->prio);
685 } else
686 inc_prio_bias(rq, p->static_prio);
687}
688
689static inline void dec_nr_running(task_t *p, runqueue_t *rq)
690{
691 rq->nr_running--;
692 if (rt_task(p)) {
693 if (p != rq->migration_thread)
694 dec_prio_bias(rq, p->prio);
695 } else
696 dec_prio_bias(rq, p->static_prio);
697}
698#else
699static inline void inc_prio_bias(runqueue_t *rq, int prio)
700{
701}
702
703static inline void dec_prio_bias(runqueue_t *rq, int prio)
704{
705}
706
707static inline void inc_nr_running(task_t *p, runqueue_t *rq)
708{
709 rq->nr_running++;
710}
711
712static inline void dec_nr_running(task_t *p, runqueue_t *rq)
713{
714 rq->nr_running--;
715}
716#endif
717
662/* 718/*
663 * __activate_task - move a task to the runqueue. 719 * __activate_task - move a task to the runqueue.
664 */ 720 */
665static inline void __activate_task(task_t *p, runqueue_t *rq) 721static inline void __activate_task(task_t *p, runqueue_t *rq)
666{ 722{
667 enqueue_task(p, rq->active); 723 enqueue_task(p, rq->active);
668 rq->nr_running++; 724 inc_nr_running(p, rq);
669} 725}
670 726
671/* 727/*
@@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
674static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 730static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
675{ 731{
676 enqueue_task_head(p, rq->active); 732 enqueue_task_head(p, rq->active);
677 rq->nr_running++; 733 inc_nr_running(p, rq);
678} 734}
679 735
680static int recalc_task_prio(task_t *p, unsigned long long now) 736static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -759,7 +815,8 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
759 } 815 }
760#endif 816#endif
761 817
762 p->prio = recalc_task_prio(p, now); 818 if (!rt_task(p))
819 p->prio = recalc_task_prio(p, now);
763 820
764 /* 821 /*
765 * This checks to make sure it's not an uninterruptible task 822 * This checks to make sure it's not an uninterruptible task
@@ -793,7 +850,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
793 */ 850 */
794static void deactivate_task(struct task_struct *p, runqueue_t *rq) 851static void deactivate_task(struct task_struct *p, runqueue_t *rq)
795{ 852{
796 rq->nr_running--; 853 dec_nr_running(p, rq);
797 dequeue_task(p, p->array); 854 dequeue_task(p, p->array);
798 p->array = NULL; 855 p->array = NULL;
799} 856}
@@ -808,21 +865,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
808#ifdef CONFIG_SMP 865#ifdef CONFIG_SMP
809static void resched_task(task_t *p) 866static void resched_task(task_t *p)
810{ 867{
811 int need_resched, nrpolling; 868 int cpu;
812 869
813 assert_spin_locked(&task_rq(p)->lock); 870 assert_spin_locked(&task_rq(p)->lock);
814 871
815 /* minimise the chance of sending an interrupt to poll_idle() */ 872 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
816 nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); 873 return;
817 need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); 874
818 nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); 875 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
876
877 cpu = task_cpu(p);
878 if (cpu == smp_processor_id())
879 return;
819 880
820 if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) 881 /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
821 smp_send_reschedule(task_cpu(p)); 882 smp_mb();
883 if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
884 smp_send_reschedule(cpu);
822} 885}
823#else 886#else
824static inline void resched_task(task_t *p) 887static inline void resched_task(task_t *p)
825{ 888{
889 assert_spin_locked(&task_rq(p)->lock);
826 set_tsk_need_resched(p); 890 set_tsk_need_resched(p);
827} 891}
828#endif 892#endif
@@ -930,27 +994,61 @@ void kick_process(task_t *p)
930 * We want to under-estimate the load of migration sources, to 994 * We want to under-estimate the load of migration sources, to
931 * balance conservatively. 995 * balance conservatively.
932 */ 996 */
933static inline unsigned long source_load(int cpu, int type) 997static inline unsigned long __source_load(int cpu, int type, enum idle_type idle)
934{ 998{
935 runqueue_t *rq = cpu_rq(cpu); 999 runqueue_t *rq = cpu_rq(cpu);
936 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; 1000 unsigned long running = rq->nr_running;
1001 unsigned long source_load, cpu_load = rq->cpu_load[type-1],
1002 load_now = running * SCHED_LOAD_SCALE;
1003
937 if (type == 0) 1004 if (type == 0)
938 return load_now; 1005 source_load = load_now;
1006 else
1007 source_load = min(cpu_load, load_now);
1008
1009 if (running > 1 || (idle == NOT_IDLE && running))
1010 /*
1011 * If we are busy rebalancing the load is biased by
1012 * priority to create 'nice' support across cpus. When
1013 * idle rebalancing we should only bias the source_load if
1014 * there is more than one task running on that queue to
1015 * prevent idle rebalance from trying to pull tasks from a
1016 * queue with only one running task.
1017 */
1018 source_load = source_load * rq->prio_bias / running;
1019
1020 return source_load;
1021}
939 1022
940 return min(rq->cpu_load[type-1], load_now); 1023static inline unsigned long source_load(int cpu, int type)
1024{
1025 return __source_load(cpu, type, NOT_IDLE);
941} 1026}
942 1027
943/* 1028/*
944 * Return a high guess at the load of a migration-target cpu 1029 * Return a high guess at the load of a migration-target cpu
945 */ 1030 */
946static inline unsigned long target_load(int cpu, int type) 1031static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
947{ 1032{
948 runqueue_t *rq = cpu_rq(cpu); 1033 runqueue_t *rq = cpu_rq(cpu);
949 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; 1034 unsigned long running = rq->nr_running;
1035 unsigned long target_load, cpu_load = rq->cpu_load[type-1],
1036 load_now = running * SCHED_LOAD_SCALE;
1037
950 if (type == 0) 1038 if (type == 0)
951 return load_now; 1039 target_load = load_now;
1040 else
1041 target_load = max(cpu_load, load_now);
1042
1043 if (running > 1 || (idle == NOT_IDLE && running))
1044 target_load = target_load * rq->prio_bias / running;
1045
1046 return target_load;
1047}
952 1048
953 return max(rq->cpu_load[type-1], load_now); 1049static inline unsigned long target_load(int cpu, int type)
1050{
1051 return __target_load(cpu, type, NOT_IDLE);
954} 1052}
955 1053
956/* 1054/*
@@ -1339,7 +1437,7 @@ void fastcall sched_fork(task_t *p, int clone_flags)
1339#endif 1437#endif
1340#ifdef CONFIG_PREEMPT 1438#ifdef CONFIG_PREEMPT
1341 /* Want to start with kernel preemption disabled. */ 1439 /* Want to start with kernel preemption disabled. */
1342 p->thread_info->preempt_count = 1; 1440 task_thread_info(p)->preempt_count = 1;
1343#endif 1441#endif
1344 /* 1442 /*
1345 * Share the timeslice between parent and child, thus the 1443 * Share the timeslice between parent and child, thus the
@@ -1411,7 +1509,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1411 list_add_tail(&p->run_list, &current->run_list); 1509 list_add_tail(&p->run_list, &current->run_list);
1412 p->array = current->array; 1510 p->array = current->array;
1413 p->array->nr_active++; 1511 p->array->nr_active++;
1414 rq->nr_running++; 1512 inc_nr_running(p, rq);
1415 } 1513 }
1416 set_need_resched(); 1514 set_need_resched();
1417 } else 1515 } else
@@ -1468,7 +1566,7 @@ void fastcall sched_exit(task_t *p)
1468 * the sleep_avg of the parent as well. 1566 * the sleep_avg of the parent as well.
1469 */ 1567 */
1470 rq = task_rq_lock(p->parent, &flags); 1568 rq = task_rq_lock(p->parent, &flags);
1471 if (p->first_time_slice) { 1569 if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) {
1472 p->parent->time_slice += p->time_slice; 1570 p->parent->time_slice += p->time_slice;
1473 if (unlikely(p->parent->time_slice > task_timeslice(p))) 1571 if (unlikely(p->parent->time_slice > task_timeslice(p)))
1474 p->parent->time_slice = task_timeslice(p); 1572 p->parent->time_slice = task_timeslice(p);
@@ -1756,9 +1854,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1756 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1854 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1757{ 1855{
1758 dequeue_task(p, src_array); 1856 dequeue_task(p, src_array);
1759 src_rq->nr_running--; 1857 dec_nr_running(p, src_rq);
1760 set_task_cpu(p, this_cpu); 1858 set_task_cpu(p, this_cpu);
1761 this_rq->nr_running++; 1859 inc_nr_running(p, this_rq);
1762 enqueue_task(p, this_array); 1860 enqueue_task(p, this_array);
1763 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1861 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1764 + this_rq->timestamp_last_tick; 1862 + this_rq->timestamp_last_tick;
@@ -1937,9 +2035,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1937 2035
1938 /* Bias balancing toward cpus of our domain */ 2036 /* Bias balancing toward cpus of our domain */
1939 if (local_group) 2037 if (local_group)
1940 load = target_load(i, load_idx); 2038 load = __target_load(i, load_idx, idle);
1941 else 2039 else
1942 load = source_load(i, load_idx); 2040 load = __source_load(i, load_idx, idle);
1943 2041
1944 avg_load += load; 2042 avg_load += load;
1945 } 2043 }
@@ -2044,14 +2142,15 @@ out_balanced:
2044/* 2142/*
2045 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2143 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2046 */ 2144 */
2047static runqueue_t *find_busiest_queue(struct sched_group *group) 2145static runqueue_t *find_busiest_queue(struct sched_group *group,
2146 enum idle_type idle)
2048{ 2147{
2049 unsigned long load, max_load = 0; 2148 unsigned long load, max_load = 0;
2050 runqueue_t *busiest = NULL; 2149 runqueue_t *busiest = NULL;
2051 int i; 2150 int i;
2052 2151
2053 for_each_cpu_mask(i, group->cpumask) { 2152 for_each_cpu_mask(i, group->cpumask) {
2054 load = source_load(i, 0); 2153 load = __source_load(i, 0, idle);
2055 2154
2056 if (load > max_load) { 2155 if (load > max_load) {
2057 max_load = load; 2156 max_load = load;
@@ -2095,7 +2194,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2095 goto out_balanced; 2194 goto out_balanced;
2096 } 2195 }
2097 2196
2098 busiest = find_busiest_queue(group); 2197 busiest = find_busiest_queue(group, idle);
2099 if (!busiest) { 2198 if (!busiest) {
2100 schedstat_inc(sd, lb_nobusyq[idle]); 2199 schedstat_inc(sd, lb_nobusyq[idle]);
2101 goto out_balanced; 2200 goto out_balanced;
@@ -2218,7 +2317,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2218 goto out_balanced; 2317 goto out_balanced;
2219 } 2318 }
2220 2319
2221 busiest = find_busiest_queue(group); 2320 busiest = find_busiest_queue(group, NEWLY_IDLE);
2222 if (!busiest) { 2321 if (!busiest) {
2223 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); 2322 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
2224 goto out_balanced; 2323 goto out_balanced;
@@ -2511,8 +2610,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
2511 cpustat->idle = cputime64_add(cpustat->idle, tmp); 2610 cpustat->idle = cputime64_add(cpustat->idle, tmp);
2512 /* Account for system time used */ 2611 /* Account for system time used */
2513 acct_update_integrals(p); 2612 acct_update_integrals(p);
2514 /* Update rss highwater mark */
2515 update_mem_hiwater(p);
2516} 2613}
2517 2614
2518/* 2615/*
@@ -3453,8 +3550,10 @@ void set_user_nice(task_t *p, long nice)
3453 goto out_unlock; 3550 goto out_unlock;
3454 } 3551 }
3455 array = p->array; 3552 array = p->array;
3456 if (array) 3553 if (array) {
3457 dequeue_task(p, array); 3554 dequeue_task(p, array);
3555 dec_prio_bias(rq, p->static_prio);
3556 }
3458 3557
3459 old_prio = p->prio; 3558 old_prio = p->prio;
3460 new_prio = NICE_TO_PRIO(nice); 3559 new_prio = NICE_TO_PRIO(nice);
@@ -3464,6 +3563,7 @@ void set_user_nice(task_t *p, long nice)
3464 3563
3465 if (array) { 3564 if (array) {
3466 enqueue_task(p, array); 3565 enqueue_task(p, array);
3566 inc_prio_bias(rq, p->static_prio);
3467 /* 3567 /*
3468 * If the task increased its priority or is running and 3568 * If the task increased its priority or is running and
3469 * lowered its priority, then reschedule its CPU: 3569 * lowered its priority, then reschedule its CPU:
@@ -3565,8 +3665,6 @@ int idle_cpu(int cpu)
3565 return cpu_curr(cpu) == cpu_rq(cpu)->idle; 3665 return cpu_curr(cpu) == cpu_rq(cpu)->idle;
3566} 3666}
3567 3667
3568EXPORT_SYMBOL_GPL(idle_cpu);
3569
3570/** 3668/**
3571 * idle_task - return the idle task for a given cpu. 3669 * idle_task - return the idle task for a given cpu.
3572 * @cpu: the processor in question. 3670 * @cpu: the processor in question.
@@ -4229,10 +4327,10 @@ static void show_task(task_t *p)
4229#endif 4327#endif
4230#ifdef CONFIG_DEBUG_STACK_USAGE 4328#ifdef CONFIG_DEBUG_STACK_USAGE
4231 { 4329 {
4232 unsigned long *n = (unsigned long *) (p->thread_info+1); 4330 unsigned long *n = end_of_stack(p);
4233 while (!*n) 4331 while (!*n)
4234 n++; 4332 n++;
4235 free = (unsigned long) n - (unsigned long)(p->thread_info+1); 4333 free = (unsigned long)n - (unsigned long)end_of_stack(p);
4236 } 4334 }
4237#endif 4335#endif
4238 printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); 4336 printk("%5lu %5d %6d ", free, p->pid, p->parent->pid);
@@ -4312,9 +4410,9 @@ void __devinit init_idle(task_t *idle, int cpu)
4312 4410
4313 /* Set the preempt count _outside_ the spinlocks! */ 4411 /* Set the preempt count _outside_ the spinlocks! */
4314#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) 4412#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
4315 idle->thread_info->preempt_count = (idle->lock_depth >= 0); 4413 task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
4316#else 4414#else
4317 idle->thread_info->preempt_count = 0; 4415 task_thread_info(idle)->preempt_count = 0;
4318#endif 4416#endif
4319} 4417}
4320 4418
@@ -4682,7 +4780,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
4682#ifdef CONFIG_HOTPLUG_CPU 4780#ifdef CONFIG_HOTPLUG_CPU
4683 case CPU_UP_CANCELED: 4781 case CPU_UP_CANCELED:
4684 /* Unbind it from offline cpu so it can run. Fall thru. */ 4782 /* Unbind it from offline cpu so it can run. Fall thru. */
4685 kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); 4783 kthread_bind(cpu_rq(cpu)->migration_thread,
4784 any_online_cpu(cpu_online_map));
4686 kthread_stop(cpu_rq(cpu)->migration_thread); 4785 kthread_stop(cpu_rq(cpu)->migration_thread);
4687 cpu_rq(cpu)->migration_thread = NULL; 4786 cpu_rq(cpu)->migration_thread = NULL;
4688 break; 4787 break;