diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 179 |
1 files changed, 139 insertions, 40 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 1f31a528fdba..6f46c94cc29e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -206,6 +206,7 @@ struct runqueue { | |||
206 | */ | 206 | */ |
207 | unsigned long nr_running; | 207 | unsigned long nr_running; |
208 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
209 | unsigned long prio_bias; | ||
209 | unsigned long cpu_load[3]; | 210 | unsigned long cpu_load[3]; |
210 | #endif | 211 | #endif |
211 | unsigned long long nr_switches; | 212 | unsigned long long nr_switches; |
@@ -659,13 +660,68 @@ static int effective_prio(task_t *p) | |||
659 | return prio; | 660 | return prio; |
660 | } | 661 | } |
661 | 662 | ||
663 | #ifdef CONFIG_SMP | ||
664 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
665 | { | ||
666 | rq->prio_bias += MAX_PRIO - prio; | ||
667 | } | ||
668 | |||
669 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
670 | { | ||
671 | rq->prio_bias -= MAX_PRIO - prio; | ||
672 | } | ||
673 | |||
674 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
675 | { | ||
676 | rq->nr_running++; | ||
677 | if (rt_task(p)) { | ||
678 | if (p != rq->migration_thread) | ||
679 | /* | ||
680 | * The migration thread does the actual balancing. Do | ||
681 | * not bias by its priority as the ultra high priority | ||
682 | * will skew balancing adversely. | ||
683 | */ | ||
684 | inc_prio_bias(rq, p->prio); | ||
685 | } else | ||
686 | inc_prio_bias(rq, p->static_prio); | ||
687 | } | ||
688 | |||
689 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
690 | { | ||
691 | rq->nr_running--; | ||
692 | if (rt_task(p)) { | ||
693 | if (p != rq->migration_thread) | ||
694 | dec_prio_bias(rq, p->prio); | ||
695 | } else | ||
696 | dec_prio_bias(rq, p->static_prio); | ||
697 | } | ||
698 | #else | ||
699 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
700 | { | ||
701 | } | ||
702 | |||
703 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
704 | { | ||
705 | } | ||
706 | |||
707 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
708 | { | ||
709 | rq->nr_running++; | ||
710 | } | ||
711 | |||
712 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
713 | { | ||
714 | rq->nr_running--; | ||
715 | } | ||
716 | #endif | ||
717 | |||
662 | /* | 718 | /* |
663 | * __activate_task - move a task to the runqueue. | 719 | * __activate_task - move a task to the runqueue. |
664 | */ | 720 | */ |
665 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 721 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
666 | { | 722 | { |
667 | enqueue_task(p, rq->active); | 723 | enqueue_task(p, rq->active); |
668 | rq->nr_running++; | 724 | inc_nr_running(p, rq); |
669 | } | 725 | } |
670 | 726 | ||
671 | /* | 727 | /* |
@@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
674 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 730 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
675 | { | 731 | { |
676 | enqueue_task_head(p, rq->active); | 732 | enqueue_task_head(p, rq->active); |
677 | rq->nr_running++; | 733 | inc_nr_running(p, rq); |
678 | } | 734 | } |
679 | 735 | ||
680 | static int recalc_task_prio(task_t *p, unsigned long long now) | 736 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -759,7 +815,8 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
759 | } | 815 | } |
760 | #endif | 816 | #endif |
761 | 817 | ||
762 | p->prio = recalc_task_prio(p, now); | 818 | if (!rt_task(p)) |
819 | p->prio = recalc_task_prio(p, now); | ||
763 | 820 | ||
764 | /* | 821 | /* |
765 | * This checks to make sure it's not an uninterruptible task | 822 | * This checks to make sure it's not an uninterruptible task |
@@ -793,7 +850,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
793 | */ | 850 | */ |
794 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 851 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
795 | { | 852 | { |
796 | rq->nr_running--; | 853 | dec_nr_running(p, rq); |
797 | dequeue_task(p, p->array); | 854 | dequeue_task(p, p->array); |
798 | p->array = NULL; | 855 | p->array = NULL; |
799 | } | 856 | } |
@@ -808,21 +865,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) | |||
808 | #ifdef CONFIG_SMP | 865 | #ifdef CONFIG_SMP |
809 | static void resched_task(task_t *p) | 866 | static void resched_task(task_t *p) |
810 | { | 867 | { |
811 | int need_resched, nrpolling; | 868 | int cpu; |
812 | 869 | ||
813 | assert_spin_locked(&task_rq(p)->lock); | 870 | assert_spin_locked(&task_rq(p)->lock); |
814 | 871 | ||
815 | /* minimise the chance of sending an interrupt to poll_idle() */ | 872 | if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) |
816 | nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 873 | return; |
817 | need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); | 874 | |
818 | nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); | 875 | set_tsk_thread_flag(p, TIF_NEED_RESCHED); |
876 | |||
877 | cpu = task_cpu(p); | ||
878 | if (cpu == smp_processor_id()) | ||
879 | return; | ||
819 | 880 | ||
820 | if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) | 881 | /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ |
821 | smp_send_reschedule(task_cpu(p)); | 882 | smp_mb(); |
883 | if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) | ||
884 | smp_send_reschedule(cpu); | ||
822 | } | 885 | } |
823 | #else | 886 | #else |
824 | static inline void resched_task(task_t *p) | 887 | static inline void resched_task(task_t *p) |
825 | { | 888 | { |
889 | assert_spin_locked(&task_rq(p)->lock); | ||
826 | set_tsk_need_resched(p); | 890 | set_tsk_need_resched(p); |
827 | } | 891 | } |
828 | #endif | 892 | #endif |
@@ -930,27 +994,61 @@ void kick_process(task_t *p) | |||
930 | * We want to under-estimate the load of migration sources, to | 994 | * We want to under-estimate the load of migration sources, to |
931 | * balance conservatively. | 995 | * balance conservatively. |
932 | */ | 996 | */ |
933 | static inline unsigned long source_load(int cpu, int type) | 997 | static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) |
934 | { | 998 | { |
935 | runqueue_t *rq = cpu_rq(cpu); | 999 | runqueue_t *rq = cpu_rq(cpu); |
936 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1000 | unsigned long running = rq->nr_running; |
1001 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1002 | load_now = running * SCHED_LOAD_SCALE; | ||
1003 | |||
937 | if (type == 0) | 1004 | if (type == 0) |
938 | return load_now; | 1005 | source_load = load_now; |
1006 | else | ||
1007 | source_load = min(cpu_load, load_now); | ||
1008 | |||
1009 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1010 | /* | ||
1011 | * If we are busy rebalancing the load is biased by | ||
1012 | * priority to create 'nice' support across cpus. When | ||
1013 | * idle rebalancing we should only bias the source_load if | ||
1014 | * there is more than one task running on that queue to | ||
1015 | * prevent idle rebalance from trying to pull tasks from a | ||
1016 | * queue with only one running task. | ||
1017 | */ | ||
1018 | source_load = source_load * rq->prio_bias / running; | ||
1019 | |||
1020 | return source_load; | ||
1021 | } | ||
939 | 1022 | ||
940 | return min(rq->cpu_load[type-1], load_now); | 1023 | static inline unsigned long source_load(int cpu, int type) |
1024 | { | ||
1025 | return __source_load(cpu, type, NOT_IDLE); | ||
941 | } | 1026 | } |
942 | 1027 | ||
943 | /* | 1028 | /* |
944 | * Return a high guess at the load of a migration-target cpu | 1029 | * Return a high guess at the load of a migration-target cpu |
945 | */ | 1030 | */ |
946 | static inline unsigned long target_load(int cpu, int type) | 1031 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) |
947 | { | 1032 | { |
948 | runqueue_t *rq = cpu_rq(cpu); | 1033 | runqueue_t *rq = cpu_rq(cpu); |
949 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 1034 | unsigned long running = rq->nr_running; |
1035 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1036 | load_now = running * SCHED_LOAD_SCALE; | ||
1037 | |||
950 | if (type == 0) | 1038 | if (type == 0) |
951 | return load_now; | 1039 | target_load = load_now; |
1040 | else | ||
1041 | target_load = max(cpu_load, load_now); | ||
1042 | |||
1043 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1044 | target_load = target_load * rq->prio_bias / running; | ||
1045 | |||
1046 | return target_load; | ||
1047 | } | ||
952 | 1048 | ||
953 | return max(rq->cpu_load[type-1], load_now); | 1049 | static inline unsigned long target_load(int cpu, int type) |
1050 | { | ||
1051 | return __target_load(cpu, type, NOT_IDLE); | ||
954 | } | 1052 | } |
955 | 1053 | ||
956 | /* | 1054 | /* |
@@ -1339,7 +1437,7 @@ void fastcall sched_fork(task_t *p, int clone_flags) | |||
1339 | #endif | 1437 | #endif |
1340 | #ifdef CONFIG_PREEMPT | 1438 | #ifdef CONFIG_PREEMPT |
1341 | /* Want to start with kernel preemption disabled. */ | 1439 | /* Want to start with kernel preemption disabled. */ |
1342 | p->thread_info->preempt_count = 1; | 1440 | task_thread_info(p)->preempt_count = 1; |
1343 | #endif | 1441 | #endif |
1344 | /* | 1442 | /* |
1345 | * Share the timeslice between parent and child, thus the | 1443 | * Share the timeslice between parent and child, thus the |
@@ -1411,7 +1509,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1411 | list_add_tail(&p->run_list, ¤t->run_list); | 1509 | list_add_tail(&p->run_list, ¤t->run_list); |
1412 | p->array = current->array; | 1510 | p->array = current->array; |
1413 | p->array->nr_active++; | 1511 | p->array->nr_active++; |
1414 | rq->nr_running++; | 1512 | inc_nr_running(p, rq); |
1415 | } | 1513 | } |
1416 | set_need_resched(); | 1514 | set_need_resched(); |
1417 | } else | 1515 | } else |
@@ -1468,7 +1566,7 @@ void fastcall sched_exit(task_t *p) | |||
1468 | * the sleep_avg of the parent as well. | 1566 | * the sleep_avg of the parent as well. |
1469 | */ | 1567 | */ |
1470 | rq = task_rq_lock(p->parent, &flags); | 1568 | rq = task_rq_lock(p->parent, &flags); |
1471 | if (p->first_time_slice) { | 1569 | if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) { |
1472 | p->parent->time_slice += p->time_slice; | 1570 | p->parent->time_slice += p->time_slice; |
1473 | if (unlikely(p->parent->time_slice > task_timeslice(p))) | 1571 | if (unlikely(p->parent->time_slice > task_timeslice(p))) |
1474 | p->parent->time_slice = task_timeslice(p); | 1572 | p->parent->time_slice = task_timeslice(p); |
@@ -1756,9 +1854,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1756 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1854 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1757 | { | 1855 | { |
1758 | dequeue_task(p, src_array); | 1856 | dequeue_task(p, src_array); |
1759 | src_rq->nr_running--; | 1857 | dec_nr_running(p, src_rq); |
1760 | set_task_cpu(p, this_cpu); | 1858 | set_task_cpu(p, this_cpu); |
1761 | this_rq->nr_running++; | 1859 | inc_nr_running(p, this_rq); |
1762 | enqueue_task(p, this_array); | 1860 | enqueue_task(p, this_array); |
1763 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1861 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1764 | + this_rq->timestamp_last_tick; | 1862 | + this_rq->timestamp_last_tick; |
@@ -1937,9 +2035,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1937 | 2035 | ||
1938 | /* Bias balancing toward cpus of our domain */ | 2036 | /* Bias balancing toward cpus of our domain */ |
1939 | if (local_group) | 2037 | if (local_group) |
1940 | load = target_load(i, load_idx); | 2038 | load = __target_load(i, load_idx, idle); |
1941 | else | 2039 | else |
1942 | load = source_load(i, load_idx); | 2040 | load = __source_load(i, load_idx, idle); |
1943 | 2041 | ||
1944 | avg_load += load; | 2042 | avg_load += load; |
1945 | } | 2043 | } |
@@ -2044,14 +2142,15 @@ out_balanced: | |||
2044 | /* | 2142 | /* |
2045 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 2143 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
2046 | */ | 2144 | */ |
2047 | static runqueue_t *find_busiest_queue(struct sched_group *group) | 2145 | static runqueue_t *find_busiest_queue(struct sched_group *group, |
2146 | enum idle_type idle) | ||
2048 | { | 2147 | { |
2049 | unsigned long load, max_load = 0; | 2148 | unsigned long load, max_load = 0; |
2050 | runqueue_t *busiest = NULL; | 2149 | runqueue_t *busiest = NULL; |
2051 | int i; | 2150 | int i; |
2052 | 2151 | ||
2053 | for_each_cpu_mask(i, group->cpumask) { | 2152 | for_each_cpu_mask(i, group->cpumask) { |
2054 | load = source_load(i, 0); | 2153 | load = __source_load(i, 0, idle); |
2055 | 2154 | ||
2056 | if (load > max_load) { | 2155 | if (load > max_load) { |
2057 | max_load = load; | 2156 | max_load = load; |
@@ -2095,7 +2194,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2095 | goto out_balanced; | 2194 | goto out_balanced; |
2096 | } | 2195 | } |
2097 | 2196 | ||
2098 | busiest = find_busiest_queue(group); | 2197 | busiest = find_busiest_queue(group, idle); |
2099 | if (!busiest) { | 2198 | if (!busiest) { |
2100 | schedstat_inc(sd, lb_nobusyq[idle]); | 2199 | schedstat_inc(sd, lb_nobusyq[idle]); |
2101 | goto out_balanced; | 2200 | goto out_balanced; |
@@ -2218,7 +2317,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2218 | goto out_balanced; | 2317 | goto out_balanced; |
2219 | } | 2318 | } |
2220 | 2319 | ||
2221 | busiest = find_busiest_queue(group); | 2320 | busiest = find_busiest_queue(group, NEWLY_IDLE); |
2222 | if (!busiest) { | 2321 | if (!busiest) { |
2223 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2322 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
2224 | goto out_balanced; | 2323 | goto out_balanced; |
@@ -2511,8 +2610,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
2511 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | 2610 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
2512 | /* Account for system time used */ | 2611 | /* Account for system time used */ |
2513 | acct_update_integrals(p); | 2612 | acct_update_integrals(p); |
2514 | /* Update rss highwater mark */ | ||
2515 | update_mem_hiwater(p); | ||
2516 | } | 2613 | } |
2517 | 2614 | ||
2518 | /* | 2615 | /* |
@@ -3453,8 +3550,10 @@ void set_user_nice(task_t *p, long nice) | |||
3453 | goto out_unlock; | 3550 | goto out_unlock; |
3454 | } | 3551 | } |
3455 | array = p->array; | 3552 | array = p->array; |
3456 | if (array) | 3553 | if (array) { |
3457 | dequeue_task(p, array); | 3554 | dequeue_task(p, array); |
3555 | dec_prio_bias(rq, p->static_prio); | ||
3556 | } | ||
3458 | 3557 | ||
3459 | old_prio = p->prio; | 3558 | old_prio = p->prio; |
3460 | new_prio = NICE_TO_PRIO(nice); | 3559 | new_prio = NICE_TO_PRIO(nice); |
@@ -3464,6 +3563,7 @@ void set_user_nice(task_t *p, long nice) | |||
3464 | 3563 | ||
3465 | if (array) { | 3564 | if (array) { |
3466 | enqueue_task(p, array); | 3565 | enqueue_task(p, array); |
3566 | inc_prio_bias(rq, p->static_prio); | ||
3467 | /* | 3567 | /* |
3468 | * If the task increased its priority or is running and | 3568 | * If the task increased its priority or is running and |
3469 | * lowered its priority, then reschedule its CPU: | 3569 | * lowered its priority, then reschedule its CPU: |
@@ -3565,8 +3665,6 @@ int idle_cpu(int cpu) | |||
3565 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; | 3665 | return cpu_curr(cpu) == cpu_rq(cpu)->idle; |
3566 | } | 3666 | } |
3567 | 3667 | ||
3568 | EXPORT_SYMBOL_GPL(idle_cpu); | ||
3569 | |||
3570 | /** | 3668 | /** |
3571 | * idle_task - return the idle task for a given cpu. | 3669 | * idle_task - return the idle task for a given cpu. |
3572 | * @cpu: the processor in question. | 3670 | * @cpu: the processor in question. |
@@ -4229,10 +4327,10 @@ static void show_task(task_t *p) | |||
4229 | #endif | 4327 | #endif |
4230 | #ifdef CONFIG_DEBUG_STACK_USAGE | 4328 | #ifdef CONFIG_DEBUG_STACK_USAGE |
4231 | { | 4329 | { |
4232 | unsigned long *n = (unsigned long *) (p->thread_info+1); | 4330 | unsigned long *n = end_of_stack(p); |
4233 | while (!*n) | 4331 | while (!*n) |
4234 | n++; | 4332 | n++; |
4235 | free = (unsigned long) n - (unsigned long)(p->thread_info+1); | 4333 | free = (unsigned long)n - (unsigned long)end_of_stack(p); |
4236 | } | 4334 | } |
4237 | #endif | 4335 | #endif |
4238 | printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); | 4336 | printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); |
@@ -4312,9 +4410,9 @@ void __devinit init_idle(task_t *idle, int cpu) | |||
4312 | 4410 | ||
4313 | /* Set the preempt count _outside_ the spinlocks! */ | 4411 | /* Set the preempt count _outside_ the spinlocks! */ |
4314 | #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) | 4412 | #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) |
4315 | idle->thread_info->preempt_count = (idle->lock_depth >= 0); | 4413 | task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); |
4316 | #else | 4414 | #else |
4317 | idle->thread_info->preempt_count = 0; | 4415 | task_thread_info(idle)->preempt_count = 0; |
4318 | #endif | 4416 | #endif |
4319 | } | 4417 | } |
4320 | 4418 | ||
@@ -4682,7 +4780,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, | |||
4682 | #ifdef CONFIG_HOTPLUG_CPU | 4780 | #ifdef CONFIG_HOTPLUG_CPU |
4683 | case CPU_UP_CANCELED: | 4781 | case CPU_UP_CANCELED: |
4684 | /* Unbind it from offline cpu so it can run. Fall thru. */ | 4782 | /* Unbind it from offline cpu so it can run. Fall thru. */ |
4685 | kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); | 4783 | kthread_bind(cpu_rq(cpu)->migration_thread, |
4784 | any_online_cpu(cpu_online_map)); | ||
4686 | kthread_stop(cpu_rq(cpu)->migration_thread); | 4785 | kthread_stop(cpu_rq(cpu)->migration_thread); |
4687 | cpu_rq(cpu)->migration_thread = NULL; | 4786 | cpu_rq(cpu)->migration_thread = NULL; |
4688 | break; | 4787 | break; |