diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 1179 |
1 files changed, 709 insertions, 470 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 4e2f60335656..6acf749d3336 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -70,10 +70,13 @@ | |||
70 | #include <linux/bootmem.h> | 70 | #include <linux/bootmem.h> |
71 | #include <linux/debugfs.h> | 71 | #include <linux/debugfs.h> |
72 | #include <linux/ctype.h> | 72 | #include <linux/ctype.h> |
73 | #include <linux/ftrace.h> | ||
73 | 74 | ||
74 | #include <asm/tlb.h> | 75 | #include <asm/tlb.h> |
75 | #include <asm/irq_regs.h> | 76 | #include <asm/irq_regs.h> |
76 | 77 | ||
78 | #include "sched_cpupri.h" | ||
79 | |||
77 | /* | 80 | /* |
78 | * Convert user-nice values [ -20 ... 0 ... 19 ] | 81 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
79 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | 82 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], |
@@ -289,15 +292,15 @@ struct task_group root_task_group; | |||
289 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | 292 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); |
290 | /* Default task group's cfs_rq on each cpu */ | 293 | /* Default task group's cfs_rq on each cpu */ |
291 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | 294 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; |
292 | #endif | 295 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
293 | 296 | ||
294 | #ifdef CONFIG_RT_GROUP_SCHED | 297 | #ifdef CONFIG_RT_GROUP_SCHED |
295 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 298 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
296 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | 299 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; |
297 | #endif | 300 | #endif /* CONFIG_RT_GROUP_SCHED */ |
298 | #else | 301 | #else /* !CONFIG_FAIR_GROUP_SCHED */ |
299 | #define root_task_group init_task_group | 302 | #define root_task_group init_task_group |
300 | #endif | 303 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
301 | 304 | ||
302 | /* task_group_lock serializes add/remove of task groups and also changes to | 305 | /* task_group_lock serializes add/remove of task groups and also changes to |
303 | * a task group's cpu shares. | 306 | * a task group's cpu shares. |
@@ -307,9 +310,9 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
307 | #ifdef CONFIG_FAIR_GROUP_SCHED | 310 | #ifdef CONFIG_FAIR_GROUP_SCHED |
308 | #ifdef CONFIG_USER_SCHED | 311 | #ifdef CONFIG_USER_SCHED |
309 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 312 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
310 | #else | 313 | #else /* !CONFIG_USER_SCHED */ |
311 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 314 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD |
312 | #endif | 315 | #endif /* CONFIG_USER_SCHED */ |
313 | 316 | ||
314 | /* | 317 | /* |
315 | * A weight of 0 or 1 can cause arithmetics problems. | 318 | * A weight of 0 or 1 can cause arithmetics problems. |
@@ -363,6 +366,10 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | |||
363 | #else | 366 | #else |
364 | 367 | ||
365 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 368 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
369 | static inline struct task_group *task_group(struct task_struct *p) | ||
370 | { | ||
371 | return NULL; | ||
372 | } | ||
366 | 373 | ||
367 | #endif /* CONFIG_GROUP_SCHED */ | 374 | #endif /* CONFIG_GROUP_SCHED */ |
368 | 375 | ||
@@ -373,6 +380,7 @@ struct cfs_rq { | |||
373 | 380 | ||
374 | u64 exec_clock; | 381 | u64 exec_clock; |
375 | u64 min_vruntime; | 382 | u64 min_vruntime; |
383 | u64 pair_start; | ||
376 | 384 | ||
377 | struct rb_root tasks_timeline; | 385 | struct rb_root tasks_timeline; |
378 | struct rb_node *rb_leftmost; | 386 | struct rb_node *rb_leftmost; |
@@ -401,6 +409,31 @@ struct cfs_rq { | |||
401 | */ | 409 | */ |
402 | struct list_head leaf_cfs_rq_list; | 410 | struct list_head leaf_cfs_rq_list; |
403 | struct task_group *tg; /* group that "owns" this runqueue */ | 411 | struct task_group *tg; /* group that "owns" this runqueue */ |
412 | |||
413 | #ifdef CONFIG_SMP | ||
414 | /* | ||
415 | * the part of load.weight contributed by tasks | ||
416 | */ | ||
417 | unsigned long task_weight; | ||
418 | |||
419 | /* | ||
420 | * h_load = weight * f(tg) | ||
421 | * | ||
422 | * Where f(tg) is the recursive weight fraction assigned to | ||
423 | * this group. | ||
424 | */ | ||
425 | unsigned long h_load; | ||
426 | |||
427 | /* | ||
428 | * this cpu's part of tg->shares | ||
429 | */ | ||
430 | unsigned long shares; | ||
431 | |||
432 | /* | ||
433 | * load.weight at the time we set shares | ||
434 | */ | ||
435 | unsigned long rq_weight; | ||
436 | #endif | ||
404 | #endif | 437 | #endif |
405 | }; | 438 | }; |
406 | 439 | ||
@@ -452,6 +485,9 @@ struct root_domain { | |||
452 | */ | 485 | */ |
453 | cpumask_t rto_mask; | 486 | cpumask_t rto_mask; |
454 | atomic_t rto_count; | 487 | atomic_t rto_count; |
488 | #ifdef CONFIG_SMP | ||
489 | struct cpupri cpupri; | ||
490 | #endif | ||
455 | }; | 491 | }; |
456 | 492 | ||
457 | /* | 493 | /* |
@@ -526,14 +562,19 @@ struct rq { | |||
526 | int push_cpu; | 562 | int push_cpu; |
527 | /* cpu of this runqueue: */ | 563 | /* cpu of this runqueue: */ |
528 | int cpu; | 564 | int cpu; |
565 | int online; | ||
566 | |||
567 | unsigned long avg_load_per_task; | ||
529 | 568 | ||
530 | struct task_struct *migration_thread; | 569 | struct task_struct *migration_thread; |
531 | struct list_head migration_queue; | 570 | struct list_head migration_queue; |
532 | #endif | 571 | #endif |
533 | 572 | ||
534 | #ifdef CONFIG_SCHED_HRTICK | 573 | #ifdef CONFIG_SCHED_HRTICK |
535 | unsigned long hrtick_flags; | 574 | #ifdef CONFIG_SMP |
536 | ktime_t hrtick_expire; | 575 | int hrtick_csd_pending; |
576 | struct call_single_data hrtick_csd; | ||
577 | #endif | ||
537 | struct hrtimer hrtick_timer; | 578 | struct hrtimer hrtick_timer; |
538 | #endif | 579 | #endif |
539 | 580 | ||
@@ -607,6 +648,24 @@ static inline void update_rq_clock(struct rq *rq) | |||
607 | # define const_debug static const | 648 | # define const_debug static const |
608 | #endif | 649 | #endif |
609 | 650 | ||
651 | /** | ||
652 | * runqueue_is_locked | ||
653 | * | ||
654 | * Returns true if the current cpu runqueue is locked. | ||
655 | * This interface allows printk to be called with the runqueue lock | ||
656 | * held and know whether or not it is OK to wake up the klogd. | ||
657 | */ | ||
658 | int runqueue_is_locked(void) | ||
659 | { | ||
660 | int cpu = get_cpu(); | ||
661 | struct rq *rq = cpu_rq(cpu); | ||
662 | int ret; | ||
663 | |||
664 | ret = spin_is_locked(&rq->lock); | ||
665 | put_cpu(); | ||
666 | return ret; | ||
667 | } | ||
668 | |||
610 | /* | 669 | /* |
611 | * Debugging: various feature bits | 670 | * Debugging: various feature bits |
612 | */ | 671 | */ |
@@ -749,6 +808,12 @@ late_initcall(sched_init_debug); | |||
749 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 808 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
750 | 809 | ||
751 | /* | 810 | /* |
811 | * ratelimit for updating the group shares. | ||
812 | * default: 0.5ms | ||
813 | */ | ||
814 | const_debug unsigned int sysctl_sched_shares_ratelimit = 500000; | ||
815 | |||
816 | /* | ||
752 | * period over which we measure -rt task cpu usage in us. | 817 | * period over which we measure -rt task cpu usage in us. |
753 | * default: 1s | 818 | * default: 1s |
754 | */ | 819 | */ |
@@ -775,82 +840,6 @@ static inline u64 global_rt_runtime(void) | |||
775 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | 840 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; |
776 | } | 841 | } |
777 | 842 | ||
778 | unsigned long long time_sync_thresh = 100000; | ||
779 | |||
780 | static DEFINE_PER_CPU(unsigned long long, time_offset); | ||
781 | static DEFINE_PER_CPU(unsigned long long, prev_cpu_time); | ||
782 | |||
783 | /* | ||
784 | * Global lock which we take every now and then to synchronize | ||
785 | * the CPUs time. This method is not warp-safe, but it's good | ||
786 | * enough to synchronize slowly diverging time sources and thus | ||
787 | * it's good enough for tracing: | ||
788 | */ | ||
789 | static DEFINE_SPINLOCK(time_sync_lock); | ||
790 | static unsigned long long prev_global_time; | ||
791 | |||
792 | static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu) | ||
793 | { | ||
794 | /* | ||
795 | * We want this inlined, to not get tracer function calls | ||
796 | * in this critical section: | ||
797 | */ | ||
798 | spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_); | ||
799 | __raw_spin_lock(&time_sync_lock.raw_lock); | ||
800 | |||
801 | if (time < prev_global_time) { | ||
802 | per_cpu(time_offset, cpu) += prev_global_time - time; | ||
803 | time = prev_global_time; | ||
804 | } else { | ||
805 | prev_global_time = time; | ||
806 | } | ||
807 | |||
808 | __raw_spin_unlock(&time_sync_lock.raw_lock); | ||
809 | spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_); | ||
810 | |||
811 | return time; | ||
812 | } | ||
813 | |||
814 | static unsigned long long __cpu_clock(int cpu) | ||
815 | { | ||
816 | unsigned long long now; | ||
817 | |||
818 | /* | ||
819 | * Only call sched_clock() if the scheduler has already been | ||
820 | * initialized (some code might call cpu_clock() very early): | ||
821 | */ | ||
822 | if (unlikely(!scheduler_running)) | ||
823 | return 0; | ||
824 | |||
825 | now = sched_clock_cpu(cpu); | ||
826 | |||
827 | return now; | ||
828 | } | ||
829 | |||
830 | /* | ||
831 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | ||
832 | * clock constructed from sched_clock(): | ||
833 | */ | ||
834 | unsigned long long cpu_clock(int cpu) | ||
835 | { | ||
836 | unsigned long long prev_cpu_time, time, delta_time; | ||
837 | unsigned long flags; | ||
838 | |||
839 | local_irq_save(flags); | ||
840 | prev_cpu_time = per_cpu(prev_cpu_time, cpu); | ||
841 | time = __cpu_clock(cpu) + per_cpu(time_offset, cpu); | ||
842 | delta_time = time-prev_cpu_time; | ||
843 | |||
844 | if (unlikely(delta_time > time_sync_thresh)) { | ||
845 | time = __sync_cpu_clock(time, cpu); | ||
846 | per_cpu(prev_cpu_time, cpu) = time; | ||
847 | } | ||
848 | local_irq_restore(flags); | ||
849 | |||
850 | return time; | ||
851 | } | ||
852 | EXPORT_SYMBOL_GPL(cpu_clock); | ||
853 | |||
854 | #ifndef prepare_arch_switch | 843 | #ifndef prepare_arch_switch |
855 | # define prepare_arch_switch(next) do { } while (0) | 844 | # define prepare_arch_switch(next) do { } while (0) |
856 | #endif | 845 | #endif |
@@ -996,13 +985,6 @@ static struct rq *this_rq_lock(void) | |||
996 | return rq; | 985 | return rq; |
997 | } | 986 | } |
998 | 987 | ||
999 | static void __resched_task(struct task_struct *p, int tif_bit); | ||
1000 | |||
1001 | static inline void resched_task(struct task_struct *p) | ||
1002 | { | ||
1003 | __resched_task(p, TIF_NEED_RESCHED); | ||
1004 | } | ||
1005 | |||
1006 | #ifdef CONFIG_SCHED_HRTICK | 988 | #ifdef CONFIG_SCHED_HRTICK |
1007 | /* | 989 | /* |
1008 | * Use HR-timers to deliver accurate preemption points. | 990 | * Use HR-timers to deliver accurate preemption points. |
@@ -1014,25 +996,6 @@ static inline void resched_task(struct task_struct *p) | |||
1014 | * When we get rescheduled we reprogram the hrtick_timer outside of the | 996 | * When we get rescheduled we reprogram the hrtick_timer outside of the |
1015 | * rq->lock. | 997 | * rq->lock. |
1016 | */ | 998 | */ |
1017 | static inline void resched_hrt(struct task_struct *p) | ||
1018 | { | ||
1019 | __resched_task(p, TIF_HRTICK_RESCHED); | ||
1020 | } | ||
1021 | |||
1022 | static inline void resched_rq(struct rq *rq) | ||
1023 | { | ||
1024 | unsigned long flags; | ||
1025 | |||
1026 | spin_lock_irqsave(&rq->lock, flags); | ||
1027 | resched_task(rq->curr); | ||
1028 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1029 | } | ||
1030 | |||
1031 | enum { | ||
1032 | HRTICK_SET, /* re-programm hrtick_timer */ | ||
1033 | HRTICK_RESET, /* not a new slice */ | ||
1034 | HRTICK_BLOCK, /* stop hrtick operations */ | ||
1035 | }; | ||
1036 | 999 | ||
1037 | /* | 1000 | /* |
1038 | * Use hrtick when: | 1001 | * Use hrtick when: |
@@ -1043,40 +1006,11 @@ static inline int hrtick_enabled(struct rq *rq) | |||
1043 | { | 1006 | { |
1044 | if (!sched_feat(HRTICK)) | 1007 | if (!sched_feat(HRTICK)) |
1045 | return 0; | 1008 | return 0; |
1046 | if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags))) | 1009 | if (!cpu_active(cpu_of(rq))) |
1047 | return 0; | 1010 | return 0; |
1048 | return hrtimer_is_hres_active(&rq->hrtick_timer); | 1011 | return hrtimer_is_hres_active(&rq->hrtick_timer); |
1049 | } | 1012 | } |
1050 | 1013 | ||
1051 | /* | ||
1052 | * Called to set the hrtick timer state. | ||
1053 | * | ||
1054 | * called with rq->lock held and irqs disabled | ||
1055 | */ | ||
1056 | static void hrtick_start(struct rq *rq, u64 delay, int reset) | ||
1057 | { | ||
1058 | assert_spin_locked(&rq->lock); | ||
1059 | |||
1060 | /* | ||
1061 | * preempt at: now + delay | ||
1062 | */ | ||
1063 | rq->hrtick_expire = | ||
1064 | ktime_add_ns(rq->hrtick_timer.base->get_time(), delay); | ||
1065 | /* | ||
1066 | * indicate we need to program the timer | ||
1067 | */ | ||
1068 | __set_bit(HRTICK_SET, &rq->hrtick_flags); | ||
1069 | if (reset) | ||
1070 | __set_bit(HRTICK_RESET, &rq->hrtick_flags); | ||
1071 | |||
1072 | /* | ||
1073 | * New slices are called from the schedule path and don't need a | ||
1074 | * forced reschedule. | ||
1075 | */ | ||
1076 | if (reset) | ||
1077 | resched_hrt(rq->curr); | ||
1078 | } | ||
1079 | |||
1080 | static void hrtick_clear(struct rq *rq) | 1014 | static void hrtick_clear(struct rq *rq) |
1081 | { | 1015 | { |
1082 | if (hrtimer_active(&rq->hrtick_timer)) | 1016 | if (hrtimer_active(&rq->hrtick_timer)) |
@@ -1084,32 +1018,6 @@ static void hrtick_clear(struct rq *rq) | |||
1084 | } | 1018 | } |
1085 | 1019 | ||
1086 | /* | 1020 | /* |
1087 | * Update the timer from the possible pending state. | ||
1088 | */ | ||
1089 | static void hrtick_set(struct rq *rq) | ||
1090 | { | ||
1091 | ktime_t time; | ||
1092 | int set, reset; | ||
1093 | unsigned long flags; | ||
1094 | |||
1095 | WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); | ||
1096 | |||
1097 | spin_lock_irqsave(&rq->lock, flags); | ||
1098 | set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags); | ||
1099 | reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags); | ||
1100 | time = rq->hrtick_expire; | ||
1101 | clear_thread_flag(TIF_HRTICK_RESCHED); | ||
1102 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1103 | |||
1104 | if (set) { | ||
1105 | hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS); | ||
1106 | if (reset && !hrtimer_active(&rq->hrtick_timer)) | ||
1107 | resched_rq(rq); | ||
1108 | } else | ||
1109 | hrtick_clear(rq); | ||
1110 | } | ||
1111 | |||
1112 | /* | ||
1113 | * High-resolution timer tick. | 1021 | * High-resolution timer tick. |
1114 | * Runs from hardirq context with interrupts disabled. | 1022 | * Runs from hardirq context with interrupts disabled. |
1115 | */ | 1023 | */ |
@@ -1128,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
1128 | } | 1036 | } |
1129 | 1037 | ||
1130 | #ifdef CONFIG_SMP | 1038 | #ifdef CONFIG_SMP |
1131 | static void hotplug_hrtick_disable(int cpu) | 1039 | /* |
1040 | * called from hardirq (IPI) context | ||
1041 | */ | ||
1042 | static void __hrtick_start(void *arg) | ||
1132 | { | 1043 | { |
1133 | struct rq *rq = cpu_rq(cpu); | 1044 | struct rq *rq = arg; |
1134 | unsigned long flags; | ||
1135 | |||
1136 | spin_lock_irqsave(&rq->lock, flags); | ||
1137 | rq->hrtick_flags = 0; | ||
1138 | __set_bit(HRTICK_BLOCK, &rq->hrtick_flags); | ||
1139 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1140 | 1045 | ||
1141 | hrtick_clear(rq); | 1046 | spin_lock(&rq->lock); |
1047 | hrtimer_restart(&rq->hrtick_timer); | ||
1048 | rq->hrtick_csd_pending = 0; | ||
1049 | spin_unlock(&rq->lock); | ||
1142 | } | 1050 | } |
1143 | 1051 | ||
1144 | static void hotplug_hrtick_enable(int cpu) | 1052 | /* |
1053 | * Called to set the hrtick timer state. | ||
1054 | * | ||
1055 | * called with rq->lock held and irqs disabled | ||
1056 | */ | ||
1057 | static void hrtick_start(struct rq *rq, u64 delay) | ||
1145 | { | 1058 | { |
1146 | struct rq *rq = cpu_rq(cpu); | 1059 | struct hrtimer *timer = &rq->hrtick_timer; |
1147 | unsigned long flags; | 1060 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); |
1148 | 1061 | ||
1149 | spin_lock_irqsave(&rq->lock, flags); | 1062 | timer->expires = time; |
1150 | __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags); | 1063 | |
1151 | spin_unlock_irqrestore(&rq->lock, flags); | 1064 | if (rq == this_rq()) { |
1065 | hrtimer_restart(timer); | ||
1066 | } else if (!rq->hrtick_csd_pending) { | ||
1067 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); | ||
1068 | rq->hrtick_csd_pending = 1; | ||
1069 | } | ||
1152 | } | 1070 | } |
1153 | 1071 | ||
1154 | static int | 1072 | static int |
@@ -1163,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
1163 | case CPU_DOWN_PREPARE_FROZEN: | 1081 | case CPU_DOWN_PREPARE_FROZEN: |
1164 | case CPU_DEAD: | 1082 | case CPU_DEAD: |
1165 | case CPU_DEAD_FROZEN: | 1083 | case CPU_DEAD_FROZEN: |
1166 | hotplug_hrtick_disable(cpu); | 1084 | hrtick_clear(cpu_rq(cpu)); |
1167 | return NOTIFY_OK; | ||
1168 | |||
1169 | case CPU_UP_PREPARE: | ||
1170 | case CPU_UP_PREPARE_FROZEN: | ||
1171 | case CPU_DOWN_FAILED: | ||
1172 | case CPU_DOWN_FAILED_FROZEN: | ||
1173 | case CPU_ONLINE: | ||
1174 | case CPU_ONLINE_FROZEN: | ||
1175 | hotplug_hrtick_enable(cpu); | ||
1176 | return NOTIFY_OK; | 1085 | return NOTIFY_OK; |
1177 | } | 1086 | } |
1178 | 1087 | ||
@@ -1183,46 +1092,45 @@ static void init_hrtick(void) | |||
1183 | { | 1092 | { |
1184 | hotcpu_notifier(hotplug_hrtick, 0); | 1093 | hotcpu_notifier(hotplug_hrtick, 0); |
1185 | } | 1094 | } |
1186 | #endif /* CONFIG_SMP */ | 1095 | #else |
1096 | /* | ||
1097 | * Called to set the hrtick timer state. | ||
1098 | * | ||
1099 | * called with rq->lock held and irqs disabled | ||
1100 | */ | ||
1101 | static void hrtick_start(struct rq *rq, u64 delay) | ||
1102 | { | ||
1103 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); | ||
1104 | } | ||
1187 | 1105 | ||
1188 | static void init_rq_hrtick(struct rq *rq) | 1106 | static void init_hrtick(void) |
1189 | { | 1107 | { |
1190 | rq->hrtick_flags = 0; | ||
1191 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
1192 | rq->hrtick_timer.function = hrtick; | ||
1193 | rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
1194 | } | 1108 | } |
1109 | #endif /* CONFIG_SMP */ | ||
1195 | 1110 | ||
1196 | void hrtick_resched(void) | 1111 | static void init_rq_hrtick(struct rq *rq) |
1197 | { | 1112 | { |
1198 | struct rq *rq; | 1113 | #ifdef CONFIG_SMP |
1199 | unsigned long flags; | 1114 | rq->hrtick_csd_pending = 0; |
1200 | 1115 | ||
1201 | if (!test_thread_flag(TIF_HRTICK_RESCHED)) | 1116 | rq->hrtick_csd.flags = 0; |
1202 | return; | 1117 | rq->hrtick_csd.func = __hrtick_start; |
1118 | rq->hrtick_csd.info = rq; | ||
1119 | #endif | ||
1203 | 1120 | ||
1204 | local_irq_save(flags); | 1121 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
1205 | rq = cpu_rq(smp_processor_id()); | 1122 | rq->hrtick_timer.function = hrtick; |
1206 | hrtick_set(rq); | 1123 | rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; |
1207 | local_irq_restore(flags); | ||
1208 | } | 1124 | } |
1209 | #else | 1125 | #else |
1210 | static inline void hrtick_clear(struct rq *rq) | 1126 | static inline void hrtick_clear(struct rq *rq) |
1211 | { | 1127 | { |
1212 | } | 1128 | } |
1213 | 1129 | ||
1214 | static inline void hrtick_set(struct rq *rq) | ||
1215 | { | ||
1216 | } | ||
1217 | |||
1218 | static inline void init_rq_hrtick(struct rq *rq) | 1130 | static inline void init_rq_hrtick(struct rq *rq) |
1219 | { | 1131 | { |
1220 | } | 1132 | } |
1221 | 1133 | ||
1222 | void hrtick_resched(void) | ||
1223 | { | ||
1224 | } | ||
1225 | |||
1226 | static inline void init_hrtick(void) | 1134 | static inline void init_hrtick(void) |
1227 | { | 1135 | { |
1228 | } | 1136 | } |
@@ -1241,16 +1149,16 @@ static inline void init_hrtick(void) | |||
1241 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | 1149 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) |
1242 | #endif | 1150 | #endif |
1243 | 1151 | ||
1244 | static void __resched_task(struct task_struct *p, int tif_bit) | 1152 | static void resched_task(struct task_struct *p) |
1245 | { | 1153 | { |
1246 | int cpu; | 1154 | int cpu; |
1247 | 1155 | ||
1248 | assert_spin_locked(&task_rq(p)->lock); | 1156 | assert_spin_locked(&task_rq(p)->lock); |
1249 | 1157 | ||
1250 | if (unlikely(test_tsk_thread_flag(p, tif_bit))) | 1158 | if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) |
1251 | return; | 1159 | return; |
1252 | 1160 | ||
1253 | set_tsk_thread_flag(p, tif_bit); | 1161 | set_tsk_thread_flag(p, TIF_NEED_RESCHED); |
1254 | 1162 | ||
1255 | cpu = task_cpu(p); | 1163 | cpu = task_cpu(p); |
1256 | if (cpu == smp_processor_id()) | 1164 | if (cpu == smp_processor_id()) |
@@ -1313,15 +1221,15 @@ void wake_up_idle_cpu(int cpu) | |||
1313 | if (!tsk_is_polling(rq->idle)) | 1221 | if (!tsk_is_polling(rq->idle)) |
1314 | smp_send_reschedule(cpu); | 1222 | smp_send_reschedule(cpu); |
1315 | } | 1223 | } |
1316 | #endif | 1224 | #endif /* CONFIG_NO_HZ */ |
1317 | 1225 | ||
1318 | #else | 1226 | #else /* !CONFIG_SMP */ |
1319 | static void __resched_task(struct task_struct *p, int tif_bit) | 1227 | static void resched_task(struct task_struct *p) |
1320 | { | 1228 | { |
1321 | assert_spin_locked(&task_rq(p)->lock); | 1229 | assert_spin_locked(&task_rq(p)->lock); |
1322 | set_tsk_thread_flag(p, tif_bit); | 1230 | set_tsk_need_resched(p); |
1323 | } | 1231 | } |
1324 | #endif | 1232 | #endif /* CONFIG_SMP */ |
1325 | 1233 | ||
1326 | #if BITS_PER_LONG == 32 | 1234 | #if BITS_PER_LONG == 32 |
1327 | # define WMULT_CONST (~0UL) | 1235 | # define WMULT_CONST (~0UL) |
@@ -1336,6 +1244,9 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
1336 | */ | 1244 | */ |
1337 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1245 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
1338 | 1246 | ||
1247 | /* | ||
1248 | * delta *= weight / lw | ||
1249 | */ | ||
1339 | static unsigned long | 1250 | static unsigned long |
1340 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1251 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
1341 | struct load_weight *lw) | 1252 | struct load_weight *lw) |
@@ -1363,12 +1274,6 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1363 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1274 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
1364 | } | 1275 | } |
1365 | 1276 | ||
1366 | static inline unsigned long | ||
1367 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
1368 | { | ||
1369 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
1370 | } | ||
1371 | |||
1372 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1277 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
1373 | { | 1278 | { |
1374 | lw->weight += inc; | 1279 | lw->weight += inc; |
@@ -1479,17 +1384,211 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) | |||
1479 | #ifdef CONFIG_SMP | 1384 | #ifdef CONFIG_SMP |
1480 | static unsigned long source_load(int cpu, int type); | 1385 | static unsigned long source_load(int cpu, int type); |
1481 | static unsigned long target_load(int cpu, int type); | 1386 | static unsigned long target_load(int cpu, int type); |
1482 | static unsigned long cpu_avg_load_per_task(int cpu); | ||
1483 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1387 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1484 | #else /* CONFIG_SMP */ | 1388 | |
1389 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
1390 | { | ||
1391 | struct rq *rq = cpu_rq(cpu); | ||
1392 | |||
1393 | if (rq->nr_running) | ||
1394 | rq->avg_load_per_task = rq->load.weight / rq->nr_running; | ||
1395 | |||
1396 | return rq->avg_load_per_task; | ||
1397 | } | ||
1485 | 1398 | ||
1486 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1399 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1487 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | 1400 | |
1401 | typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *); | ||
1402 | |||
1403 | /* | ||
1404 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
1405 | * leaving it for the final time. | ||
1406 | */ | ||
1407 | static void | ||
1408 | walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd) | ||
1409 | { | ||
1410 | struct task_group *parent, *child; | ||
1411 | |||
1412 | rcu_read_lock(); | ||
1413 | parent = &root_task_group; | ||
1414 | down: | ||
1415 | (*down)(parent, cpu, sd); | ||
1416 | list_for_each_entry_rcu(child, &parent->children, siblings) { | ||
1417 | parent = child; | ||
1418 | goto down; | ||
1419 | |||
1420 | up: | ||
1421 | continue; | ||
1422 | } | ||
1423 | (*up)(parent, cpu, sd); | ||
1424 | |||
1425 | child = parent; | ||
1426 | parent = parent->parent; | ||
1427 | if (parent) | ||
1428 | goto up; | ||
1429 | rcu_read_unlock(); | ||
1430 | } | ||
1431 | |||
1432 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
1433 | |||
1434 | /* | ||
1435 | * Calculate and set the cpu's group shares. | ||
1436 | */ | ||
1437 | static void | ||
1438 | __update_group_shares_cpu(struct task_group *tg, int cpu, | ||
1439 | unsigned long sd_shares, unsigned long sd_rq_weight) | ||
1440 | { | ||
1441 | int boost = 0; | ||
1442 | unsigned long shares; | ||
1443 | unsigned long rq_weight; | ||
1444 | |||
1445 | if (!tg->se[cpu]) | ||
1446 | return; | ||
1447 | |||
1448 | rq_weight = tg->cfs_rq[cpu]->load.weight; | ||
1449 | |||
1450 | /* | ||
1451 | * If there are currently no tasks on the cpu pretend there is one of | ||
1452 | * average load so that when a new task gets to run here it will not | ||
1453 | * get delayed by group starvation. | ||
1454 | */ | ||
1455 | if (!rq_weight) { | ||
1456 | boost = 1; | ||
1457 | rq_weight = NICE_0_LOAD; | ||
1458 | } | ||
1459 | |||
1460 | if (unlikely(rq_weight > sd_rq_weight)) | ||
1461 | rq_weight = sd_rq_weight; | ||
1462 | |||
1463 | /* | ||
1464 | * \Sum shares * rq_weight | ||
1465 | * shares = ----------------------- | ||
1466 | * \Sum rq_weight | ||
1467 | * | ||
1468 | */ | ||
1469 | shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); | ||
1470 | |||
1471 | /* | ||
1472 | * record the actual number of shares, not the boosted amount. | ||
1473 | */ | ||
1474 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | ||
1475 | tg->cfs_rq[cpu]->rq_weight = rq_weight; | ||
1476 | |||
1477 | if (shares < MIN_SHARES) | ||
1478 | shares = MIN_SHARES; | ||
1479 | else if (shares > MAX_SHARES) | ||
1480 | shares = MAX_SHARES; | ||
1481 | |||
1482 | __set_se_shares(tg->se[cpu], shares); | ||
1483 | } | ||
1484 | |||
1485 | /* | ||
1486 | * Re-compute the task group their per cpu shares over the given domain. | ||
1487 | * This needs to be done in a bottom-up fashion because the rq weight of a | ||
1488 | * parent group depends on the shares of its child groups. | ||
1489 | */ | ||
1490 | static void | ||
1491 | tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1492 | { | ||
1493 | unsigned long rq_weight = 0; | ||
1494 | unsigned long shares = 0; | ||
1495 | int i; | ||
1496 | |||
1497 | for_each_cpu_mask(i, sd->span) { | ||
1498 | rq_weight += tg->cfs_rq[i]->load.weight; | ||
1499 | shares += tg->cfs_rq[i]->shares; | ||
1500 | } | ||
1501 | |||
1502 | if ((!shares && rq_weight) || shares > tg->shares) | ||
1503 | shares = tg->shares; | ||
1504 | |||
1505 | if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) | ||
1506 | shares = tg->shares; | ||
1507 | |||
1508 | if (!rq_weight) | ||
1509 | rq_weight = cpus_weight(sd->span) * NICE_0_LOAD; | ||
1510 | |||
1511 | for_each_cpu_mask(i, sd->span) { | ||
1512 | struct rq *rq = cpu_rq(i); | ||
1513 | unsigned long flags; | ||
1514 | |||
1515 | spin_lock_irqsave(&rq->lock, flags); | ||
1516 | __update_group_shares_cpu(tg, i, shares, rq_weight); | ||
1517 | spin_unlock_irqrestore(&rq->lock, flags); | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | /* | ||
1522 | * Compute the cpu's hierarchical load factor for each task group. | ||
1523 | * This needs to be done in a top-down fashion because the load of a child | ||
1524 | * group is a fraction of its parents load. | ||
1525 | */ | ||
1526 | static void | ||
1527 | tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1528 | { | ||
1529 | unsigned long load; | ||
1530 | |||
1531 | if (!tg->parent) { | ||
1532 | load = cpu_rq(cpu)->load.weight; | ||
1533 | } else { | ||
1534 | load = tg->parent->cfs_rq[cpu]->h_load; | ||
1535 | load *= tg->cfs_rq[cpu]->shares; | ||
1536 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | ||
1537 | } | ||
1538 | |||
1539 | tg->cfs_rq[cpu]->h_load = load; | ||
1540 | } | ||
1541 | |||
1542 | static void | ||
1543 | tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd) | ||
1544 | { | ||
1545 | } | ||
1546 | |||
1547 | static void update_shares(struct sched_domain *sd) | ||
1548 | { | ||
1549 | u64 now = cpu_clock(raw_smp_processor_id()); | ||
1550 | s64 elapsed = now - sd->last_update; | ||
1551 | |||
1552 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | ||
1553 | sd->last_update = now; | ||
1554 | walk_tg_tree(tg_nop, tg_shares_up, 0, sd); | ||
1555 | } | ||
1556 | } | ||
1557 | |||
1558 | static void update_shares_locked(struct rq *rq, struct sched_domain *sd) | ||
1559 | { | ||
1560 | spin_unlock(&rq->lock); | ||
1561 | update_shares(sd); | ||
1562 | spin_lock(&rq->lock); | ||
1563 | } | ||
1564 | |||
1565 | static void update_h_load(int cpu) | ||
1566 | { | ||
1567 | walk_tg_tree(tg_load_down, tg_nop, cpu, NULL); | ||
1568 | } | ||
1569 | |||
1570 | #else | ||
1571 | |||
1572 | static inline void update_shares(struct sched_domain *sd) | ||
1573 | { | ||
1574 | } | ||
1575 | |||
1576 | static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | ||
1488 | { | 1577 | { |
1489 | } | 1578 | } |
1579 | |||
1490 | #endif | 1580 | #endif |
1491 | 1581 | ||
1492 | #endif /* CONFIG_SMP */ | 1582 | #endif |
1583 | |||
1584 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1585 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1586 | { | ||
1587 | #ifdef CONFIG_SMP | ||
1588 | cfs_rq->shares = shares; | ||
1589 | #endif | ||
1590 | } | ||
1591 | #endif | ||
1493 | 1592 | ||
1494 | #include "sched_stats.h" | 1593 | #include "sched_stats.h" |
1495 | #include "sched_idletask.c" | 1594 | #include "sched_idletask.c" |
@@ -1500,27 +1599,17 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1500 | #endif | 1599 | #endif |
1501 | 1600 | ||
1502 | #define sched_class_highest (&rt_sched_class) | 1601 | #define sched_class_highest (&rt_sched_class) |
1602 | #define for_each_class(class) \ | ||
1603 | for (class = sched_class_highest; class; class = class->next) | ||
1503 | 1604 | ||
1504 | static inline void inc_load(struct rq *rq, const struct task_struct *p) | 1605 | static void inc_nr_running(struct rq *rq) |
1505 | { | ||
1506 | update_load_add(&rq->load, p->se.load.weight); | ||
1507 | } | ||
1508 | |||
1509 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
1510 | { | ||
1511 | update_load_sub(&rq->load, p->se.load.weight); | ||
1512 | } | ||
1513 | |||
1514 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
1515 | { | 1606 | { |
1516 | rq->nr_running++; | 1607 | rq->nr_running++; |
1517 | inc_load(rq, p); | ||
1518 | } | 1608 | } |
1519 | 1609 | ||
1520 | static void dec_nr_running(struct task_struct *p, struct rq *rq) | 1610 | static void dec_nr_running(struct rq *rq) |
1521 | { | 1611 | { |
1522 | rq->nr_running--; | 1612 | rq->nr_running--; |
1523 | dec_load(rq, p); | ||
1524 | } | 1613 | } |
1525 | 1614 | ||
1526 | static void set_load_weight(struct task_struct *p) | 1615 | static void set_load_weight(struct task_struct *p) |
@@ -1544,6 +1633,12 @@ static void set_load_weight(struct task_struct *p) | |||
1544 | p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; | 1633 | p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; |
1545 | } | 1634 | } |
1546 | 1635 | ||
1636 | static void update_avg(u64 *avg, u64 sample) | ||
1637 | { | ||
1638 | s64 diff = sample - *avg; | ||
1639 | *avg += diff >> 3; | ||
1640 | } | ||
1641 | |||
1547 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | 1642 | static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) |
1548 | { | 1643 | { |
1549 | sched_info_queued(p); | 1644 | sched_info_queued(p); |
@@ -1553,6 +1648,13 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1553 | 1648 | ||
1554 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) | 1649 | static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) |
1555 | { | 1650 | { |
1651 | if (sleep && p->se.last_wakeup) { | ||
1652 | update_avg(&p->se.avg_overlap, | ||
1653 | p->se.sum_exec_runtime - p->se.last_wakeup); | ||
1654 | p->se.last_wakeup = 0; | ||
1655 | } | ||
1656 | |||
1657 | sched_info_dequeued(p); | ||
1556 | p->sched_class->dequeue_task(rq, p, sleep); | 1658 | p->sched_class->dequeue_task(rq, p, sleep); |
1557 | p->se.on_rq = 0; | 1659 | p->se.on_rq = 0; |
1558 | } | 1660 | } |
@@ -1612,7 +1714,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
1612 | rq->nr_uninterruptible--; | 1714 | rq->nr_uninterruptible--; |
1613 | 1715 | ||
1614 | enqueue_task(rq, p, wakeup); | 1716 | enqueue_task(rq, p, wakeup); |
1615 | inc_nr_running(p, rq); | 1717 | inc_nr_running(rq); |
1616 | } | 1718 | } |
1617 | 1719 | ||
1618 | /* | 1720 | /* |
@@ -1624,7 +1726,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
1624 | rq->nr_uninterruptible++; | 1726 | rq->nr_uninterruptible++; |
1625 | 1727 | ||
1626 | dequeue_task(rq, p, sleep); | 1728 | dequeue_task(rq, p, sleep); |
1627 | dec_nr_running(p, rq); | 1729 | dec_nr_running(rq); |
1628 | } | 1730 | } |
1629 | 1731 | ||
1630 | /** | 1732 | /** |
@@ -1636,12 +1738,6 @@ inline int task_curr(const struct task_struct *p) | |||
1636 | return cpu_curr(task_cpu(p)) == p; | 1738 | return cpu_curr(task_cpu(p)) == p; |
1637 | } | 1739 | } |
1638 | 1740 | ||
1639 | /* Used instead of source_load when we know the type == 0 */ | ||
1640 | unsigned long weighted_cpuload(const int cpu) | ||
1641 | { | ||
1642 | return cpu_rq(cpu)->load.weight; | ||
1643 | } | ||
1644 | |||
1645 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | 1741 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) |
1646 | { | 1742 | { |
1647 | set_task_rq(p, cpu); | 1743 | set_task_rq(p, cpu); |
@@ -1670,6 +1766,12 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1670 | 1766 | ||
1671 | #ifdef CONFIG_SMP | 1767 | #ifdef CONFIG_SMP |
1672 | 1768 | ||
1769 | /* Used instead of source_load when we know the type == 0 */ | ||
1770 | static unsigned long weighted_cpuload(const int cpu) | ||
1771 | { | ||
1772 | return cpu_rq(cpu)->load.weight; | ||
1773 | } | ||
1774 | |||
1673 | /* | 1775 | /* |
1674 | * Is this task likely cache-hot: | 1776 | * Is this task likely cache-hot: |
1675 | */ | 1777 | */ |
@@ -1880,7 +1982,7 @@ static unsigned long source_load(int cpu, int type) | |||
1880 | struct rq *rq = cpu_rq(cpu); | 1982 | struct rq *rq = cpu_rq(cpu); |
1881 | unsigned long total = weighted_cpuload(cpu); | 1983 | unsigned long total = weighted_cpuload(cpu); |
1882 | 1984 | ||
1883 | if (type == 0) | 1985 | if (type == 0 || !sched_feat(LB_BIAS)) |
1884 | return total; | 1986 | return total; |
1885 | 1987 | ||
1886 | return min(rq->cpu_load[type-1], total); | 1988 | return min(rq->cpu_load[type-1], total); |
@@ -1895,25 +1997,13 @@ static unsigned long target_load(int cpu, int type) | |||
1895 | struct rq *rq = cpu_rq(cpu); | 1997 | struct rq *rq = cpu_rq(cpu); |
1896 | unsigned long total = weighted_cpuload(cpu); | 1998 | unsigned long total = weighted_cpuload(cpu); |
1897 | 1999 | ||
1898 | if (type == 0) | 2000 | if (type == 0 || !sched_feat(LB_BIAS)) |
1899 | return total; | 2001 | return total; |
1900 | 2002 | ||
1901 | return max(rq->cpu_load[type-1], total); | 2003 | return max(rq->cpu_load[type-1], total); |
1902 | } | 2004 | } |
1903 | 2005 | ||
1904 | /* | 2006 | /* |
1905 | * Return the average load per task on the cpu's run queue | ||
1906 | */ | ||
1907 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
1908 | { | ||
1909 | struct rq *rq = cpu_rq(cpu); | ||
1910 | unsigned long total = weighted_cpuload(cpu); | ||
1911 | unsigned long n = rq->nr_running; | ||
1912 | |||
1913 | return n ? total / n : SCHED_LOAD_SCALE; | ||
1914 | } | ||
1915 | |||
1916 | /* | ||
1917 | * find_idlest_group finds and returns the least busy CPU group within the | 2007 | * find_idlest_group finds and returns the least busy CPU group within the |
1918 | * domain. | 2008 | * domain. |
1919 | */ | 2009 | */ |
@@ -1939,7 +2029,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
1939 | /* Tally up the load of all CPUs in the group */ | 2029 | /* Tally up the load of all CPUs in the group */ |
1940 | avg_load = 0; | 2030 | avg_load = 0; |
1941 | 2031 | ||
1942 | for_each_cpu_mask(i, group->cpumask) { | 2032 | for_each_cpu_mask_nr(i, group->cpumask) { |
1943 | /* Bias balancing toward cpus of our domain */ | 2033 | /* Bias balancing toward cpus of our domain */ |
1944 | if (local_group) | 2034 | if (local_group) |
1945 | load = source_load(i, load_idx); | 2035 | load = source_load(i, load_idx); |
@@ -1981,7 +2071,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, | |||
1981 | /* Traverse only the allowed CPUs */ | 2071 | /* Traverse only the allowed CPUs */ |
1982 | cpus_and(*tmp, group->cpumask, p->cpus_allowed); | 2072 | cpus_and(*tmp, group->cpumask, p->cpus_allowed); |
1983 | 2073 | ||
1984 | for_each_cpu_mask(i, *tmp) { | 2074 | for_each_cpu_mask_nr(i, *tmp) { |
1985 | load = weighted_cpuload(i); | 2075 | load = weighted_cpuload(i); |
1986 | 2076 | ||
1987 | if (load < min_load || (load == min_load && i == this_cpu)) { | 2077 | if (load < min_load || (load == min_load && i == this_cpu)) { |
@@ -2019,6 +2109,9 @@ static int sched_balance_self(int cpu, int flag) | |||
2019 | sd = tmp; | 2109 | sd = tmp; |
2020 | } | 2110 | } |
2021 | 2111 | ||
2112 | if (sd) | ||
2113 | update_shares(sd); | ||
2114 | |||
2022 | while (sd) { | 2115 | while (sd) { |
2023 | cpumask_t span, tmpmask; | 2116 | cpumask_t span, tmpmask; |
2024 | struct sched_group *group; | 2117 | struct sched_group *group; |
@@ -2085,6 +2178,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2085 | if (!sched_feat(SYNC_WAKEUPS)) | 2178 | if (!sched_feat(SYNC_WAKEUPS)) |
2086 | sync = 0; | 2179 | sync = 0; |
2087 | 2180 | ||
2181 | #ifdef CONFIG_SMP | ||
2182 | if (sched_feat(LB_WAKEUP_UPDATE)) { | ||
2183 | struct sched_domain *sd; | ||
2184 | |||
2185 | this_cpu = raw_smp_processor_id(); | ||
2186 | cpu = task_cpu(p); | ||
2187 | |||
2188 | for_each_domain(this_cpu, sd) { | ||
2189 | if (cpu_isset(cpu, sd->span)) { | ||
2190 | update_shares(sd); | ||
2191 | break; | ||
2192 | } | ||
2193 | } | ||
2194 | } | ||
2195 | #endif | ||
2196 | |||
2088 | smp_wmb(); | 2197 | smp_wmb(); |
2089 | rq = task_rq_lock(p, &flags); | 2198 | rq = task_rq_lock(p, &flags); |
2090 | old_state = p->state; | 2199 | old_state = p->state; |
@@ -2131,7 +2240,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2131 | } | 2240 | } |
2132 | } | 2241 | } |
2133 | } | 2242 | } |
2134 | #endif | 2243 | #endif /* CONFIG_SCHEDSTATS */ |
2135 | 2244 | ||
2136 | out_activate: | 2245 | out_activate: |
2137 | #endif /* CONFIG_SMP */ | 2246 | #endif /* CONFIG_SMP */ |
@@ -2149,6 +2258,9 @@ out_activate: | |||
2149 | success = 1; | 2258 | success = 1; |
2150 | 2259 | ||
2151 | out_running: | 2260 | out_running: |
2261 | trace_mark(kernel_sched_wakeup, | ||
2262 | "pid %d state %ld ## rq %p task %p rq->curr %p", | ||
2263 | p->pid, p->state, rq, p, rq->curr); | ||
2152 | check_preempt_curr(rq, p); | 2264 | check_preempt_curr(rq, p); |
2153 | 2265 | ||
2154 | p->state = TASK_RUNNING; | 2266 | p->state = TASK_RUNNING; |
@@ -2157,6 +2269,8 @@ out_running: | |||
2157 | p->sched_class->task_wake_up(rq, p); | 2269 | p->sched_class->task_wake_up(rq, p); |
2158 | #endif | 2270 | #endif |
2159 | out: | 2271 | out: |
2272 | current->se.last_wakeup = current->se.sum_exec_runtime; | ||
2273 | |||
2160 | task_rq_unlock(rq, &flags); | 2274 | task_rq_unlock(rq, &flags); |
2161 | 2275 | ||
2162 | return success; | 2276 | return success; |
@@ -2277,8 +2391,11 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2277 | * management (if any): | 2391 | * management (if any): |
2278 | */ | 2392 | */ |
2279 | p->sched_class->task_new(rq, p); | 2393 | p->sched_class->task_new(rq, p); |
2280 | inc_nr_running(p, rq); | 2394 | inc_nr_running(rq); |
2281 | } | 2395 | } |
2396 | trace_mark(kernel_sched_wakeup_new, | ||
2397 | "pid %d state %ld ## rq %p task %p rq->curr %p", | ||
2398 | p->pid, p->state, rq, p, rq->curr); | ||
2282 | check_preempt_curr(rq, p); | 2399 | check_preempt_curr(rq, p); |
2283 | #ifdef CONFIG_SMP | 2400 | #ifdef CONFIG_SMP |
2284 | if (p->sched_class->task_wake_up) | 2401 | if (p->sched_class->task_wake_up) |
@@ -2331,7 +2448,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2331 | notifier->ops->sched_out(notifier, next); | 2448 | notifier->ops->sched_out(notifier, next); |
2332 | } | 2449 | } |
2333 | 2450 | ||
2334 | #else | 2451 | #else /* !CONFIG_PREEMPT_NOTIFIERS */ |
2335 | 2452 | ||
2336 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | 2453 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) |
2337 | { | 2454 | { |
@@ -2343,7 +2460,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2343 | { | 2460 | { |
2344 | } | 2461 | } |
2345 | 2462 | ||
2346 | #endif | 2463 | #endif /* CONFIG_PREEMPT_NOTIFIERS */ |
2347 | 2464 | ||
2348 | /** | 2465 | /** |
2349 | * prepare_task_switch - prepare to switch tasks | 2466 | * prepare_task_switch - prepare to switch tasks |
@@ -2451,6 +2568,11 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2451 | struct mm_struct *mm, *oldmm; | 2568 | struct mm_struct *mm, *oldmm; |
2452 | 2569 | ||
2453 | prepare_task_switch(rq, prev, next); | 2570 | prepare_task_switch(rq, prev, next); |
2571 | trace_mark(kernel_sched_schedule, | ||
2572 | "prev_pid %d next_pid %d prev_state %ld " | ||
2573 | "## rq %p prev %p next %p", | ||
2574 | prev->pid, next->pid, prev->state, | ||
2575 | rq, prev, next); | ||
2454 | mm = next->mm; | 2576 | mm = next->mm; |
2455 | oldmm = prev->active_mm; | 2577 | oldmm = prev->active_mm; |
2456 | /* | 2578 | /* |
@@ -2680,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
2680 | 2802 | ||
2681 | rq = task_rq_lock(p, &flags); | 2803 | rq = task_rq_lock(p, &flags); |
2682 | if (!cpu_isset(dest_cpu, p->cpus_allowed) | 2804 | if (!cpu_isset(dest_cpu, p->cpus_allowed) |
2683 | || unlikely(cpu_is_offline(dest_cpu))) | 2805 | || unlikely(!cpu_active(dest_cpu))) |
2684 | goto out; | 2806 | goto out; |
2685 | 2807 | ||
2686 | /* force the process onto the specified CPU */ | 2808 | /* force the process onto the specified CPU */ |
@@ -2785,7 +2907,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2785 | enum cpu_idle_type idle, int *all_pinned, | 2907 | enum cpu_idle_type idle, int *all_pinned, |
2786 | int *this_best_prio, struct rq_iterator *iterator) | 2908 | int *this_best_prio, struct rq_iterator *iterator) |
2787 | { | 2909 | { |
2788 | int loops = 0, pulled = 0, pinned = 0, skip_for_load; | 2910 | int loops = 0, pulled = 0, pinned = 0; |
2789 | struct task_struct *p; | 2911 | struct task_struct *p; |
2790 | long rem_load_move = max_load_move; | 2912 | long rem_load_move = max_load_move; |
2791 | 2913 | ||
@@ -2801,14 +2923,8 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2801 | next: | 2923 | next: |
2802 | if (!p || loops++ > sysctl_sched_nr_migrate) | 2924 | if (!p || loops++ > sysctl_sched_nr_migrate) |
2803 | goto out; | 2925 | goto out; |
2804 | /* | 2926 | |
2805 | * To help distribute high priority tasks across CPUs we don't | 2927 | if ((p->se.load.weight >> 1) > rem_load_move || |
2806 | * skip a task if it will be the highest priority task (i.e. smallest | ||
2807 | * prio value) on its new queue regardless of its load weight | ||
2808 | */ | ||
2809 | skip_for_load = (p->se.load.weight >> 1) > rem_load_move + | ||
2810 | SCHED_LOAD_SCALE_FUZZ; | ||
2811 | if ((skip_for_load && p->prio >= *this_best_prio) || | ||
2812 | !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { | 2928 | !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { |
2813 | p = iterator->next(iterator->arg); | 2929 | p = iterator->next(iterator->arg); |
2814 | goto next; | 2930 | goto next; |
@@ -2863,6 +2979,10 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2863 | max_load_move - total_load_moved, | 2979 | max_load_move - total_load_moved, |
2864 | sd, idle, all_pinned, &this_best_prio); | 2980 | sd, idle, all_pinned, &this_best_prio); |
2865 | class = class->next; | 2981 | class = class->next; |
2982 | |||
2983 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) | ||
2984 | break; | ||
2985 | |||
2866 | } while (class && max_load_move > total_load_moved); | 2986 | } while (class && max_load_move > total_load_moved); |
2867 | 2987 | ||
2868 | return total_load_moved > 0; | 2988 | return total_load_moved > 0; |
@@ -2939,6 +3059,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2939 | max_load = this_load = total_load = total_pwr = 0; | 3059 | max_load = this_load = total_load = total_pwr = 0; |
2940 | busiest_load_per_task = busiest_nr_running = 0; | 3060 | busiest_load_per_task = busiest_nr_running = 0; |
2941 | this_load_per_task = this_nr_running = 0; | 3061 | this_load_per_task = this_nr_running = 0; |
3062 | |||
2942 | if (idle == CPU_NOT_IDLE) | 3063 | if (idle == CPU_NOT_IDLE) |
2943 | load_idx = sd->busy_idx; | 3064 | load_idx = sd->busy_idx; |
2944 | else if (idle == CPU_NEWLY_IDLE) | 3065 | else if (idle == CPU_NEWLY_IDLE) |
@@ -2953,6 +3074,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2953 | int __group_imb = 0; | 3074 | int __group_imb = 0; |
2954 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | 3075 | unsigned int balance_cpu = -1, first_idle_cpu = 0; |
2955 | unsigned long sum_nr_running, sum_weighted_load; | 3076 | unsigned long sum_nr_running, sum_weighted_load; |
3077 | unsigned long sum_avg_load_per_task; | ||
3078 | unsigned long avg_load_per_task; | ||
2956 | 3079 | ||
2957 | local_group = cpu_isset(this_cpu, group->cpumask); | 3080 | local_group = cpu_isset(this_cpu, group->cpumask); |
2958 | 3081 | ||
@@ -2961,10 +3084,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2961 | 3084 | ||
2962 | /* Tally up the load of all CPUs in the group */ | 3085 | /* Tally up the load of all CPUs in the group */ |
2963 | sum_weighted_load = sum_nr_running = avg_load = 0; | 3086 | sum_weighted_load = sum_nr_running = avg_load = 0; |
3087 | sum_avg_load_per_task = avg_load_per_task = 0; | ||
3088 | |||
2964 | max_cpu_load = 0; | 3089 | max_cpu_load = 0; |
2965 | min_cpu_load = ~0UL; | 3090 | min_cpu_load = ~0UL; |
2966 | 3091 | ||
2967 | for_each_cpu_mask(i, group->cpumask) { | 3092 | for_each_cpu_mask_nr(i, group->cpumask) { |
2968 | struct rq *rq; | 3093 | struct rq *rq; |
2969 | 3094 | ||
2970 | if (!cpu_isset(i, *cpus)) | 3095 | if (!cpu_isset(i, *cpus)) |
@@ -2994,6 +3119,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2994 | avg_load += load; | 3119 | avg_load += load; |
2995 | sum_nr_running += rq->nr_running; | 3120 | sum_nr_running += rq->nr_running; |
2996 | sum_weighted_load += weighted_cpuload(i); | 3121 | sum_weighted_load += weighted_cpuload(i); |
3122 | |||
3123 | sum_avg_load_per_task += cpu_avg_load_per_task(i); | ||
2997 | } | 3124 | } |
2998 | 3125 | ||
2999 | /* | 3126 | /* |
@@ -3015,7 +3142,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3015 | avg_load = sg_div_cpu_power(group, | 3142 | avg_load = sg_div_cpu_power(group, |
3016 | avg_load * SCHED_LOAD_SCALE); | 3143 | avg_load * SCHED_LOAD_SCALE); |
3017 | 3144 | ||
3018 | if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE) | 3145 | |
3146 | /* | ||
3147 | * Consider the group unbalanced when the imbalance is larger | ||
3148 | * than the average weight of two tasks. | ||
3149 | * | ||
3150 | * APZ: with cgroup the avg task weight can vary wildly and | ||
3151 | * might not be a suitable number - should we keep a | ||
3152 | * normalized nr_running number somewhere that negates | ||
3153 | * the hierarchy? | ||
3154 | */ | ||
3155 | avg_load_per_task = sg_div_cpu_power(group, | ||
3156 | sum_avg_load_per_task * SCHED_LOAD_SCALE); | ||
3157 | |||
3158 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | ||
3019 | __group_imb = 1; | 3159 | __group_imb = 1; |
3020 | 3160 | ||
3021 | group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; | 3161 | group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; |
@@ -3156,9 +3296,9 @@ small_imbalance: | |||
3156 | if (busiest_load_per_task > this_load_per_task) | 3296 | if (busiest_load_per_task > this_load_per_task) |
3157 | imbn = 1; | 3297 | imbn = 1; |
3158 | } else | 3298 | } else |
3159 | this_load_per_task = SCHED_LOAD_SCALE; | 3299 | this_load_per_task = cpu_avg_load_per_task(this_cpu); |
3160 | 3300 | ||
3161 | if (max_load - this_load + SCHED_LOAD_SCALE_FUZZ >= | 3301 | if (max_load - this_load + 2*busiest_load_per_task >= |
3162 | busiest_load_per_task * imbn) { | 3302 | busiest_load_per_task * imbn) { |
3163 | *imbalance = busiest_load_per_task; | 3303 | *imbalance = busiest_load_per_task; |
3164 | return busiest; | 3304 | return busiest; |
@@ -3228,7 +3368,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
3228 | unsigned long max_load = 0; | 3368 | unsigned long max_load = 0; |
3229 | int i; | 3369 | int i; |
3230 | 3370 | ||
3231 | for_each_cpu_mask(i, group->cpumask) { | 3371 | for_each_cpu_mask_nr(i, group->cpumask) { |
3232 | unsigned long wl; | 3372 | unsigned long wl; |
3233 | 3373 | ||
3234 | if (!cpu_isset(i, *cpus)) | 3374 | if (!cpu_isset(i, *cpus)) |
@@ -3284,6 +3424,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3284 | schedstat_inc(sd, lb_count[idle]); | 3424 | schedstat_inc(sd, lb_count[idle]); |
3285 | 3425 | ||
3286 | redo: | 3426 | redo: |
3427 | update_shares(sd); | ||
3287 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3428 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
3288 | cpus, balance); | 3429 | cpus, balance); |
3289 | 3430 | ||
@@ -3386,8 +3527,9 @@ redo: | |||
3386 | 3527 | ||
3387 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3528 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3388 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3529 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3389 | return -1; | 3530 | ld_moved = -1; |
3390 | return ld_moved; | 3531 | |
3532 | goto out; | ||
3391 | 3533 | ||
3392 | out_balanced: | 3534 | out_balanced: |
3393 | schedstat_inc(sd, lb_balanced[idle]); | 3535 | schedstat_inc(sd, lb_balanced[idle]); |
@@ -3402,8 +3544,13 @@ out_one_pinned: | |||
3402 | 3544 | ||
3403 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3545 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
3404 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3546 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
3405 | return -1; | 3547 | ld_moved = -1; |
3406 | return 0; | 3548 | else |
3549 | ld_moved = 0; | ||
3550 | out: | ||
3551 | if (ld_moved) | ||
3552 | update_shares(sd); | ||
3553 | return ld_moved; | ||
3407 | } | 3554 | } |
3408 | 3555 | ||
3409 | /* | 3556 | /* |
@@ -3438,6 +3585,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, | |||
3438 | 3585 | ||
3439 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); | 3586 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); |
3440 | redo: | 3587 | redo: |
3588 | update_shares_locked(this_rq, sd); | ||
3441 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, | 3589 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, |
3442 | &sd_idle, cpus, NULL); | 3590 | &sd_idle, cpus, NULL); |
3443 | if (!group) { | 3591 | if (!group) { |
@@ -3481,6 +3629,7 @@ redo: | |||
3481 | } else | 3629 | } else |
3482 | sd->nr_balance_failed = 0; | 3630 | sd->nr_balance_failed = 0; |
3483 | 3631 | ||
3632 | update_shares_locked(this_rq, sd); | ||
3484 | return ld_moved; | 3633 | return ld_moved; |
3485 | 3634 | ||
3486 | out_balanced: | 3635 | out_balanced: |
@@ -3621,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
3621 | /* | 3770 | /* |
3622 | * If we are going offline and still the leader, give up! | 3771 | * If we are going offline and still the leader, give up! |
3623 | */ | 3772 | */ |
3624 | if (cpu_is_offline(cpu) && | 3773 | if (!cpu_active(cpu) && |
3625 | atomic_read(&nohz.load_balancer) == cpu) { | 3774 | atomic_read(&nohz.load_balancer) == cpu) { |
3626 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) | 3775 | if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) |
3627 | BUG(); | 3776 | BUG(); |
@@ -3672,6 +3821,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3672 | /* Earliest time when we have to do rebalance again */ | 3821 | /* Earliest time when we have to do rebalance again */ |
3673 | unsigned long next_balance = jiffies + 60*HZ; | 3822 | unsigned long next_balance = jiffies + 60*HZ; |
3674 | int update_next_balance = 0; | 3823 | int update_next_balance = 0; |
3824 | int need_serialize; | ||
3675 | cpumask_t tmp; | 3825 | cpumask_t tmp; |
3676 | 3826 | ||
3677 | for_each_domain(cpu, sd) { | 3827 | for_each_domain(cpu, sd) { |
@@ -3689,8 +3839,9 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3689 | if (interval > HZ*NR_CPUS/10) | 3839 | if (interval > HZ*NR_CPUS/10) |
3690 | interval = HZ*NR_CPUS/10; | 3840 | interval = HZ*NR_CPUS/10; |
3691 | 3841 | ||
3842 | need_serialize = sd->flags & SD_SERIALIZE; | ||
3692 | 3843 | ||
3693 | if (sd->flags & SD_SERIALIZE) { | 3844 | if (need_serialize) { |
3694 | if (!spin_trylock(&balancing)) | 3845 | if (!spin_trylock(&balancing)) |
3695 | goto out; | 3846 | goto out; |
3696 | } | 3847 | } |
@@ -3706,7 +3857,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3706 | } | 3857 | } |
3707 | sd->last_balance = jiffies; | 3858 | sd->last_balance = jiffies; |
3708 | } | 3859 | } |
3709 | if (sd->flags & SD_SERIALIZE) | 3860 | if (need_serialize) |
3710 | spin_unlock(&balancing); | 3861 | spin_unlock(&balancing); |
3711 | out: | 3862 | out: |
3712 | if (time_after(next_balance, sd->last_balance + interval)) { | 3863 | if (time_after(next_balance, sd->last_balance + interval)) { |
@@ -3759,7 +3910,7 @@ static void run_rebalance_domains(struct softirq_action *h) | |||
3759 | int balance_cpu; | 3910 | int balance_cpu; |
3760 | 3911 | ||
3761 | cpu_clear(this_cpu, cpus); | 3912 | cpu_clear(this_cpu, cpus); |
3762 | for_each_cpu_mask(balance_cpu, cpus) { | 3913 | for_each_cpu_mask_nr(balance_cpu, cpus) { |
3763 | /* | 3914 | /* |
3764 | * If this cpu gets work to do, stop the load balancing | 3915 | * If this cpu gets work to do, stop the load balancing |
3765 | * work being done for other cpus. Next load | 3916 | * work being done for other cpus. Next load |
@@ -4021,26 +4172,44 @@ void scheduler_tick(void) | |||
4021 | #endif | 4172 | #endif |
4022 | } | 4173 | } |
4023 | 4174 | ||
4024 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) | 4175 | #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
4176 | defined(CONFIG_PREEMPT_TRACER)) | ||
4177 | |||
4178 | static inline unsigned long get_parent_ip(unsigned long addr) | ||
4179 | { | ||
4180 | if (in_lock_functions(addr)) { | ||
4181 | addr = CALLER_ADDR2; | ||
4182 | if (in_lock_functions(addr)) | ||
4183 | addr = CALLER_ADDR3; | ||
4184 | } | ||
4185 | return addr; | ||
4186 | } | ||
4025 | 4187 | ||
4026 | void __kprobes add_preempt_count(int val) | 4188 | void __kprobes add_preempt_count(int val) |
4027 | { | 4189 | { |
4190 | #ifdef CONFIG_DEBUG_PREEMPT | ||
4028 | /* | 4191 | /* |
4029 | * Underflow? | 4192 | * Underflow? |
4030 | */ | 4193 | */ |
4031 | if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) | 4194 | if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) |
4032 | return; | 4195 | return; |
4196 | #endif | ||
4033 | preempt_count() += val; | 4197 | preempt_count() += val; |
4198 | #ifdef CONFIG_DEBUG_PREEMPT | ||
4034 | /* | 4199 | /* |
4035 | * Spinlock count overflowing soon? | 4200 | * Spinlock count overflowing soon? |
4036 | */ | 4201 | */ |
4037 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= | 4202 | DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= |
4038 | PREEMPT_MASK - 10); | 4203 | PREEMPT_MASK - 10); |
4204 | #endif | ||
4205 | if (preempt_count() == val) | ||
4206 | trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | ||
4039 | } | 4207 | } |
4040 | EXPORT_SYMBOL(add_preempt_count); | 4208 | EXPORT_SYMBOL(add_preempt_count); |
4041 | 4209 | ||
4042 | void __kprobes sub_preempt_count(int val) | 4210 | void __kprobes sub_preempt_count(int val) |
4043 | { | 4211 | { |
4212 | #ifdef CONFIG_DEBUG_PREEMPT | ||
4044 | /* | 4213 | /* |
4045 | * Underflow? | 4214 | * Underflow? |
4046 | */ | 4215 | */ |
@@ -4052,7 +4221,10 @@ void __kprobes sub_preempt_count(int val) | |||
4052 | if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && | 4221 | if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && |
4053 | !(preempt_count() & PREEMPT_MASK))) | 4222 | !(preempt_count() & PREEMPT_MASK))) |
4054 | return; | 4223 | return; |
4224 | #endif | ||
4055 | 4225 | ||
4226 | if (preempt_count() == val) | ||
4227 | trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | ||
4056 | preempt_count() -= val; | 4228 | preempt_count() -= val; |
4057 | } | 4229 | } |
4058 | EXPORT_SYMBOL(sub_preempt_count); | 4230 | EXPORT_SYMBOL(sub_preempt_count); |
@@ -4070,6 +4242,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
4070 | prev->comm, prev->pid, preempt_count()); | 4242 | prev->comm, prev->pid, preempt_count()); |
4071 | 4243 | ||
4072 | debug_show_held_locks(prev); | 4244 | debug_show_held_locks(prev); |
4245 | print_modules(); | ||
4073 | if (irqs_disabled()) | 4246 | if (irqs_disabled()) |
4074 | print_irqtrace_events(prev); | 4247 | print_irqtrace_events(prev); |
4075 | 4248 | ||
@@ -4158,7 +4331,8 @@ need_resched_nonpreemptible: | |||
4158 | 4331 | ||
4159 | schedule_debug(prev); | 4332 | schedule_debug(prev); |
4160 | 4333 | ||
4161 | hrtick_clear(rq); | 4334 | if (sched_feat(HRTICK)) |
4335 | hrtick_clear(rq); | ||
4162 | 4336 | ||
4163 | /* | 4337 | /* |
4164 | * Do the rq-clock update outside the rq lock: | 4338 | * Do the rq-clock update outside the rq lock: |
@@ -4204,8 +4378,6 @@ need_resched_nonpreemptible: | |||
4204 | } else | 4378 | } else |
4205 | spin_unlock_irq(&rq->lock); | 4379 | spin_unlock_irq(&rq->lock); |
4206 | 4380 | ||
4207 | hrtick_set(rq); | ||
4208 | |||
4209 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 4381 | if (unlikely(reacquire_kernel_lock(current) < 0)) |
4210 | goto need_resched_nonpreemptible; | 4382 | goto need_resched_nonpreemptible; |
4211 | 4383 | ||
@@ -4586,10 +4758,8 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4586 | goto out_unlock; | 4758 | goto out_unlock; |
4587 | } | 4759 | } |
4588 | on_rq = p->se.on_rq; | 4760 | on_rq = p->se.on_rq; |
4589 | if (on_rq) { | 4761 | if (on_rq) |
4590 | dequeue_task(rq, p, 0); | 4762 | dequeue_task(rq, p, 0); |
4591 | dec_load(rq, p); | ||
4592 | } | ||
4593 | 4763 | ||
4594 | p->static_prio = NICE_TO_PRIO(nice); | 4764 | p->static_prio = NICE_TO_PRIO(nice); |
4595 | set_load_weight(p); | 4765 | set_load_weight(p); |
@@ -4599,7 +4769,6 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4599 | 4769 | ||
4600 | if (on_rq) { | 4770 | if (on_rq) { |
4601 | enqueue_task(rq, p, 0); | 4771 | enqueue_task(rq, p, 0); |
4602 | inc_load(rq, p); | ||
4603 | /* | 4772 | /* |
4604 | * If the task increased its priority or is running and | 4773 | * If the task increased its priority or is running and |
4605 | * lowered its priority, then reschedule its CPU: | 4774 | * lowered its priority, then reschedule its CPU: |
@@ -4744,16 +4913,8 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
4744 | set_load_weight(p); | 4913 | set_load_weight(p); |
4745 | } | 4914 | } |
4746 | 4915 | ||
4747 | /** | 4916 | static int __sched_setscheduler(struct task_struct *p, int policy, |
4748 | * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. | 4917 | struct sched_param *param, bool user) |
4749 | * @p: the task in question. | ||
4750 | * @policy: new policy. | ||
4751 | * @param: structure containing the new RT priority. | ||
4752 | * | ||
4753 | * NOTE that the task may be already dead. | ||
4754 | */ | ||
4755 | int sched_setscheduler(struct task_struct *p, int policy, | ||
4756 | struct sched_param *param) | ||
4757 | { | 4918 | { |
4758 | int retval, oldprio, oldpolicy = -1, on_rq, running; | 4919 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
4759 | unsigned long flags; | 4920 | unsigned long flags; |
@@ -4785,7 +4946,7 @@ recheck: | |||
4785 | /* | 4946 | /* |
4786 | * Allow unprivileged RT tasks to decrease priority: | 4947 | * Allow unprivileged RT tasks to decrease priority: |
4787 | */ | 4948 | */ |
4788 | if (!capable(CAP_SYS_NICE)) { | 4949 | if (user && !capable(CAP_SYS_NICE)) { |
4789 | if (rt_policy(policy)) { | 4950 | if (rt_policy(policy)) { |
4790 | unsigned long rlim_rtprio; | 4951 | unsigned long rlim_rtprio; |
4791 | 4952 | ||
@@ -4821,7 +4982,8 @@ recheck: | |||
4821 | * Do not allow realtime tasks into groups that have no runtime | 4982 | * Do not allow realtime tasks into groups that have no runtime |
4822 | * assigned. | 4983 | * assigned. |
4823 | */ | 4984 | */ |
4824 | if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) | 4985 | if (user |
4986 | && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) | ||
4825 | return -EPERM; | 4987 | return -EPERM; |
4826 | #endif | 4988 | #endif |
4827 | 4989 | ||
@@ -4870,8 +5032,39 @@ recheck: | |||
4870 | 5032 | ||
4871 | return 0; | 5033 | return 0; |
4872 | } | 5034 | } |
5035 | |||
5036 | /** | ||
5037 | * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. | ||
5038 | * @p: the task in question. | ||
5039 | * @policy: new policy. | ||
5040 | * @param: structure containing the new RT priority. | ||
5041 | * | ||
5042 | * NOTE that the task may be already dead. | ||
5043 | */ | ||
5044 | int sched_setscheduler(struct task_struct *p, int policy, | ||
5045 | struct sched_param *param) | ||
5046 | { | ||
5047 | return __sched_setscheduler(p, policy, param, true); | ||
5048 | } | ||
4873 | EXPORT_SYMBOL_GPL(sched_setscheduler); | 5049 | EXPORT_SYMBOL_GPL(sched_setscheduler); |
4874 | 5050 | ||
5051 | /** | ||
5052 | * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. | ||
5053 | * @p: the task in question. | ||
5054 | * @policy: new policy. | ||
5055 | * @param: structure containing the new RT priority. | ||
5056 | * | ||
5057 | * Just like sched_setscheduler, only don't bother checking if the | ||
5058 | * current context has permission. For example, this is needed in | ||
5059 | * stop_machine(): we create temporary high priority worker threads, | ||
5060 | * but our caller might not have that capability. | ||
5061 | */ | ||
5062 | int sched_setscheduler_nocheck(struct task_struct *p, int policy, | ||
5063 | struct sched_param *param) | ||
5064 | { | ||
5065 | return __sched_setscheduler(p, policy, param, false); | ||
5066 | } | ||
5067 | |||
4875 | static int | 5068 | static int |
4876 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | 5069 | do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) |
4877 | { | 5070 | { |
@@ -5070,24 +5263,6 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, | |||
5070 | return sched_setaffinity(pid, &new_mask); | 5263 | return sched_setaffinity(pid, &new_mask); |
5071 | } | 5264 | } |
5072 | 5265 | ||
5073 | /* | ||
5074 | * Represents all cpu's present in the system | ||
5075 | * In systems capable of hotplug, this map could dynamically grow | ||
5076 | * as new cpu's are detected in the system via any platform specific | ||
5077 | * method, such as ACPI for e.g. | ||
5078 | */ | ||
5079 | |||
5080 | cpumask_t cpu_present_map __read_mostly; | ||
5081 | EXPORT_SYMBOL(cpu_present_map); | ||
5082 | |||
5083 | #ifndef CONFIG_SMP | ||
5084 | cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; | ||
5085 | EXPORT_SYMBOL(cpu_online_map); | ||
5086 | |||
5087 | cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; | ||
5088 | EXPORT_SYMBOL(cpu_possible_map); | ||
5089 | #endif | ||
5090 | |||
5091 | long sched_getaffinity(pid_t pid, cpumask_t *mask) | 5266 | long sched_getaffinity(pid_t pid, cpumask_t *mask) |
5092 | { | 5267 | { |
5093 | struct task_struct *p; | 5268 | struct task_struct *p; |
@@ -5384,7 +5559,7 @@ out_unlock: | |||
5384 | return retval; | 5559 | return retval; |
5385 | } | 5560 | } |
5386 | 5561 | ||
5387 | static const char stat_nam[] = "RSDTtZX"; | 5562 | static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; |
5388 | 5563 | ||
5389 | void sched_show_task(struct task_struct *p) | 5564 | void sched_show_task(struct task_struct *p) |
5390 | { | 5565 | { |
@@ -5571,6 +5746,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) | |||
5571 | goto out; | 5746 | goto out; |
5572 | } | 5747 | } |
5573 | 5748 | ||
5749 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && | ||
5750 | !cpus_equal(p->cpus_allowed, *new_mask))) { | ||
5751 | ret = -EINVAL; | ||
5752 | goto out; | ||
5753 | } | ||
5754 | |||
5574 | if (p->sched_class->set_cpus_allowed) | 5755 | if (p->sched_class->set_cpus_allowed) |
5575 | p->sched_class->set_cpus_allowed(p, new_mask); | 5756 | p->sched_class->set_cpus_allowed(p, new_mask); |
5576 | else { | 5757 | else { |
@@ -5613,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5613 | struct rq *rq_dest, *rq_src; | 5794 | struct rq *rq_dest, *rq_src; |
5614 | int ret = 0, on_rq; | 5795 | int ret = 0, on_rq; |
5615 | 5796 | ||
5616 | if (unlikely(cpu_is_offline(dest_cpu))) | 5797 | if (unlikely(!cpu_active(dest_cpu))) |
5617 | return ret; | 5798 | return ret; |
5618 | 5799 | ||
5619 | rq_src = cpu_rq(src_cpu); | 5800 | rq_src = cpu_rq(src_cpu); |
@@ -6060,6 +6241,36 @@ static void unregister_sched_domain_sysctl(void) | |||
6060 | } | 6241 | } |
6061 | #endif | 6242 | #endif |
6062 | 6243 | ||
6244 | static void set_rq_online(struct rq *rq) | ||
6245 | { | ||
6246 | if (!rq->online) { | ||
6247 | const struct sched_class *class; | ||
6248 | |||
6249 | cpu_set(rq->cpu, rq->rd->online); | ||
6250 | rq->online = 1; | ||
6251 | |||
6252 | for_each_class(class) { | ||
6253 | if (class->rq_online) | ||
6254 | class->rq_online(rq); | ||
6255 | } | ||
6256 | } | ||
6257 | } | ||
6258 | |||
6259 | static void set_rq_offline(struct rq *rq) | ||
6260 | { | ||
6261 | if (rq->online) { | ||
6262 | const struct sched_class *class; | ||
6263 | |||
6264 | for_each_class(class) { | ||
6265 | if (class->rq_offline) | ||
6266 | class->rq_offline(rq); | ||
6267 | } | ||
6268 | |||
6269 | cpu_clear(rq->cpu, rq->rd->online); | ||
6270 | rq->online = 0; | ||
6271 | } | ||
6272 | } | ||
6273 | |||
6063 | /* | 6274 | /* |
6064 | * migration_call - callback that gets triggered when a CPU is added. | 6275 | * migration_call - callback that gets triggered when a CPU is added. |
6065 | * Here we can start up the necessary migration thread for the new CPU. | 6276 | * Here we can start up the necessary migration thread for the new CPU. |
@@ -6097,7 +6308,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6097 | spin_lock_irqsave(&rq->lock, flags); | 6308 | spin_lock_irqsave(&rq->lock, flags); |
6098 | if (rq->rd) { | 6309 | if (rq->rd) { |
6099 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | 6310 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); |
6100 | cpu_set(cpu, rq->rd->online); | 6311 | |
6312 | set_rq_online(rq); | ||
6101 | } | 6313 | } |
6102 | spin_unlock_irqrestore(&rq->lock, flags); | 6314 | spin_unlock_irqrestore(&rq->lock, flags); |
6103 | break; | 6315 | break; |
@@ -6158,7 +6370,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6158 | spin_lock_irqsave(&rq->lock, flags); | 6370 | spin_lock_irqsave(&rq->lock, flags); |
6159 | if (rq->rd) { | 6371 | if (rq->rd) { |
6160 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | 6372 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); |
6161 | cpu_clear(cpu, rq->rd->online); | 6373 | set_rq_offline(rq); |
6162 | } | 6374 | } |
6163 | spin_unlock_irqrestore(&rq->lock, flags); | 6375 | spin_unlock_irqrestore(&rq->lock, flags); |
6164 | break; | 6376 | break; |
@@ -6192,6 +6404,28 @@ void __init migration_init(void) | |||
6192 | 6404 | ||
6193 | #ifdef CONFIG_SCHED_DEBUG | 6405 | #ifdef CONFIG_SCHED_DEBUG |
6194 | 6406 | ||
6407 | static inline const char *sd_level_to_string(enum sched_domain_level lvl) | ||
6408 | { | ||
6409 | switch (lvl) { | ||
6410 | case SD_LV_NONE: | ||
6411 | return "NONE"; | ||
6412 | case SD_LV_SIBLING: | ||
6413 | return "SIBLING"; | ||
6414 | case SD_LV_MC: | ||
6415 | return "MC"; | ||
6416 | case SD_LV_CPU: | ||
6417 | return "CPU"; | ||
6418 | case SD_LV_NODE: | ||
6419 | return "NODE"; | ||
6420 | case SD_LV_ALLNODES: | ||
6421 | return "ALLNODES"; | ||
6422 | case SD_LV_MAX: | ||
6423 | return "MAX"; | ||
6424 | |||
6425 | } | ||
6426 | return "MAX"; | ||
6427 | } | ||
6428 | |||
6195 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 6429 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
6196 | cpumask_t *groupmask) | 6430 | cpumask_t *groupmask) |
6197 | { | 6431 | { |
@@ -6211,7 +6445,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6211 | return -1; | 6445 | return -1; |
6212 | } | 6446 | } |
6213 | 6447 | ||
6214 | printk(KERN_CONT "span %s\n", str); | 6448 | printk(KERN_CONT "span %s level %s\n", |
6449 | str, sd_level_to_string(sd->level)); | ||
6215 | 6450 | ||
6216 | if (!cpu_isset(cpu, sd->span)) { | 6451 | if (!cpu_isset(cpu, sd->span)) { |
6217 | printk(KERN_ERR "ERROR: domain->span does not contain " | 6452 | printk(KERN_ERR "ERROR: domain->span does not contain " |
@@ -6295,9 +6530,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
6295 | } | 6530 | } |
6296 | kfree(groupmask); | 6531 | kfree(groupmask); |
6297 | } | 6532 | } |
6298 | #else | 6533 | #else /* !CONFIG_SCHED_DEBUG */ |
6299 | # define sched_domain_debug(sd, cpu) do { } while (0) | 6534 | # define sched_domain_debug(sd, cpu) do { } while (0) |
6300 | #endif | 6535 | #endif /* CONFIG_SCHED_DEBUG */ |
6301 | 6536 | ||
6302 | static int sd_degenerate(struct sched_domain *sd) | 6537 | static int sd_degenerate(struct sched_domain *sd) |
6303 | { | 6538 | { |
@@ -6357,20 +6592,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
6357 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) | 6592 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) |
6358 | { | 6593 | { |
6359 | unsigned long flags; | 6594 | unsigned long flags; |
6360 | const struct sched_class *class; | ||
6361 | 6595 | ||
6362 | spin_lock_irqsave(&rq->lock, flags); | 6596 | spin_lock_irqsave(&rq->lock, flags); |
6363 | 6597 | ||
6364 | if (rq->rd) { | 6598 | if (rq->rd) { |
6365 | struct root_domain *old_rd = rq->rd; | 6599 | struct root_domain *old_rd = rq->rd; |
6366 | 6600 | ||
6367 | for (class = sched_class_highest; class; class = class->next) { | 6601 | if (cpu_isset(rq->cpu, old_rd->online)) |
6368 | if (class->leave_domain) | 6602 | set_rq_offline(rq); |
6369 | class->leave_domain(rq); | ||
6370 | } | ||
6371 | 6603 | ||
6372 | cpu_clear(rq->cpu, old_rd->span); | 6604 | cpu_clear(rq->cpu, old_rd->span); |
6373 | cpu_clear(rq->cpu, old_rd->online); | ||
6374 | 6605 | ||
6375 | if (atomic_dec_and_test(&old_rd->refcount)) | 6606 | if (atomic_dec_and_test(&old_rd->refcount)) |
6376 | kfree(old_rd); | 6607 | kfree(old_rd); |
@@ -6381,12 +6612,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6381 | 6612 | ||
6382 | cpu_set(rq->cpu, rd->span); | 6613 | cpu_set(rq->cpu, rd->span); |
6383 | if (cpu_isset(rq->cpu, cpu_online_map)) | 6614 | if (cpu_isset(rq->cpu, cpu_online_map)) |
6384 | cpu_set(rq->cpu, rd->online); | 6615 | set_rq_online(rq); |
6385 | |||
6386 | for (class = sched_class_highest; class; class = class->next) { | ||
6387 | if (class->join_domain) | ||
6388 | class->join_domain(rq); | ||
6389 | } | ||
6390 | 6616 | ||
6391 | spin_unlock_irqrestore(&rq->lock, flags); | 6617 | spin_unlock_irqrestore(&rq->lock, flags); |
6392 | } | 6618 | } |
@@ -6397,6 +6623,8 @@ static void init_rootdomain(struct root_domain *rd) | |||
6397 | 6623 | ||
6398 | cpus_clear(rd->span); | 6624 | cpus_clear(rd->span); |
6399 | cpus_clear(rd->online); | 6625 | cpus_clear(rd->online); |
6626 | |||
6627 | cpupri_init(&rd->cpupri); | ||
6400 | } | 6628 | } |
6401 | 6629 | ||
6402 | static void init_defrootdomain(void) | 6630 | static void init_defrootdomain(void) |
@@ -6458,7 +6686,8 @@ static cpumask_t cpu_isolated_map = CPU_MASK_NONE; | |||
6458 | /* Setup the mask of cpus configured for isolated domains */ | 6686 | /* Setup the mask of cpus configured for isolated domains */ |
6459 | static int __init isolated_cpu_setup(char *str) | 6687 | static int __init isolated_cpu_setup(char *str) |
6460 | { | 6688 | { |
6461 | int ints[NR_CPUS], i; | 6689 | static int __initdata ints[NR_CPUS]; |
6690 | int i; | ||
6462 | 6691 | ||
6463 | str = get_options(str, ARRAY_SIZE(ints), ints); | 6692 | str = get_options(str, ARRAY_SIZE(ints), ints); |
6464 | cpus_clear(cpu_isolated_map); | 6693 | cpus_clear(cpu_isolated_map); |
@@ -6492,7 +6721,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, | |||
6492 | 6721 | ||
6493 | cpus_clear(*covered); | 6722 | cpus_clear(*covered); |
6494 | 6723 | ||
6495 | for_each_cpu_mask(i, *span) { | 6724 | for_each_cpu_mask_nr(i, *span) { |
6496 | struct sched_group *sg; | 6725 | struct sched_group *sg; |
6497 | int group = group_fn(i, cpu_map, &sg, tmpmask); | 6726 | int group = group_fn(i, cpu_map, &sg, tmpmask); |
6498 | int j; | 6727 | int j; |
@@ -6503,7 +6732,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, | |||
6503 | cpus_clear(sg->cpumask); | 6732 | cpus_clear(sg->cpumask); |
6504 | sg->__cpu_power = 0; | 6733 | sg->__cpu_power = 0; |
6505 | 6734 | ||
6506 | for_each_cpu_mask(j, *span) { | 6735 | for_each_cpu_mask_nr(j, *span) { |
6507 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) | 6736 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) |
6508 | continue; | 6737 | continue; |
6509 | 6738 | ||
@@ -6539,9 +6768,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) | |||
6539 | 6768 | ||
6540 | min_val = INT_MAX; | 6769 | min_val = INT_MAX; |
6541 | 6770 | ||
6542 | for (i = 0; i < MAX_NUMNODES; i++) { | 6771 | for (i = 0; i < nr_node_ids; i++) { |
6543 | /* Start at @node */ | 6772 | /* Start at @node */ |
6544 | n = (node + i) % MAX_NUMNODES; | 6773 | n = (node + i) % nr_node_ids; |
6545 | 6774 | ||
6546 | if (!nr_cpus_node(n)) | 6775 | if (!nr_cpus_node(n)) |
6547 | continue; | 6776 | continue; |
@@ -6591,7 +6820,7 @@ static void sched_domain_node_span(int node, cpumask_t *span) | |||
6591 | cpus_or(*span, *span, *nodemask); | 6820 | cpus_or(*span, *span, *nodemask); |
6592 | } | 6821 | } |
6593 | } | 6822 | } |
6594 | #endif | 6823 | #endif /* CONFIG_NUMA */ |
6595 | 6824 | ||
6596 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | 6825 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; |
6597 | 6826 | ||
@@ -6610,7 +6839,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, | |||
6610 | *sg = &per_cpu(sched_group_cpus, cpu); | 6839 | *sg = &per_cpu(sched_group_cpus, cpu); |
6611 | return cpu; | 6840 | return cpu; |
6612 | } | 6841 | } |
6613 | #endif | 6842 | #endif /* CONFIG_SCHED_SMT */ |
6614 | 6843 | ||
6615 | /* | 6844 | /* |
6616 | * multi-core sched-domains: | 6845 | * multi-core sched-domains: |
@@ -6618,7 +6847,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, | |||
6618 | #ifdef CONFIG_SCHED_MC | 6847 | #ifdef CONFIG_SCHED_MC |
6619 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | 6848 | static DEFINE_PER_CPU(struct sched_domain, core_domains); |
6620 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); | 6849 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); |
6621 | #endif | 6850 | #endif /* CONFIG_SCHED_MC */ |
6622 | 6851 | ||
6623 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6852 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6624 | static int | 6853 | static int |
@@ -6703,7 +6932,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
6703 | if (!sg) | 6932 | if (!sg) |
6704 | return; | 6933 | return; |
6705 | do { | 6934 | do { |
6706 | for_each_cpu_mask(j, sg->cpumask) { | 6935 | for_each_cpu_mask_nr(j, sg->cpumask) { |
6707 | struct sched_domain *sd; | 6936 | struct sched_domain *sd; |
6708 | 6937 | ||
6709 | sd = &per_cpu(phys_domains, j); | 6938 | sd = &per_cpu(phys_domains, j); |
@@ -6720,7 +6949,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
6720 | sg = sg->next; | 6949 | sg = sg->next; |
6721 | } while (sg != group_head); | 6950 | } while (sg != group_head); |
6722 | } | 6951 | } |
6723 | #endif | 6952 | #endif /* CONFIG_NUMA */ |
6724 | 6953 | ||
6725 | #ifdef CONFIG_NUMA | 6954 | #ifdef CONFIG_NUMA |
6726 | /* Free memory allocated for various sched_group structures */ | 6955 | /* Free memory allocated for various sched_group structures */ |
@@ -6728,14 +6957,14 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) | |||
6728 | { | 6957 | { |
6729 | int cpu, i; | 6958 | int cpu, i; |
6730 | 6959 | ||
6731 | for_each_cpu_mask(cpu, *cpu_map) { | 6960 | for_each_cpu_mask_nr(cpu, *cpu_map) { |
6732 | struct sched_group **sched_group_nodes | 6961 | struct sched_group **sched_group_nodes |
6733 | = sched_group_nodes_bycpu[cpu]; | 6962 | = sched_group_nodes_bycpu[cpu]; |
6734 | 6963 | ||
6735 | if (!sched_group_nodes) | 6964 | if (!sched_group_nodes) |
6736 | continue; | 6965 | continue; |
6737 | 6966 | ||
6738 | for (i = 0; i < MAX_NUMNODES; i++) { | 6967 | for (i = 0; i < nr_node_ids; i++) { |
6739 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; | 6968 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; |
6740 | 6969 | ||
6741 | *nodemask = node_to_cpumask(i); | 6970 | *nodemask = node_to_cpumask(i); |
@@ -6757,11 +6986,11 @@ next_sg: | |||
6757 | sched_group_nodes_bycpu[cpu] = NULL; | 6986 | sched_group_nodes_bycpu[cpu] = NULL; |
6758 | } | 6987 | } |
6759 | } | 6988 | } |
6760 | #else | 6989 | #else /* !CONFIG_NUMA */ |
6761 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) | 6990 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) |
6762 | { | 6991 | { |
6763 | } | 6992 | } |
6764 | #endif | 6993 | #endif /* CONFIG_NUMA */ |
6765 | 6994 | ||
6766 | /* | 6995 | /* |
6767 | * Initialize sched groups cpu_power. | 6996 | * Initialize sched groups cpu_power. |
@@ -6928,7 +7157,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
6928 | /* | 7157 | /* |
6929 | * Allocate the per-node list of sched groups | 7158 | * Allocate the per-node list of sched groups |
6930 | */ | 7159 | */ |
6931 | sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), | 7160 | sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *), |
6932 | GFP_KERNEL); | 7161 | GFP_KERNEL); |
6933 | if (!sched_group_nodes) { | 7162 | if (!sched_group_nodes) { |
6934 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | 7163 | printk(KERN_WARNING "Can not alloc sched group node list\n"); |
@@ -6967,7 +7196,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
6967 | /* | 7196 | /* |
6968 | * Set up domains for cpus specified by the cpu_map. | 7197 | * Set up domains for cpus specified by the cpu_map. |
6969 | */ | 7198 | */ |
6970 | for_each_cpu_mask(i, *cpu_map) { | 7199 | for_each_cpu_mask_nr(i, *cpu_map) { |
6971 | struct sched_domain *sd = NULL, *p; | 7200 | struct sched_domain *sd = NULL, *p; |
6972 | SCHED_CPUMASK_VAR(nodemask, allmasks); | 7201 | SCHED_CPUMASK_VAR(nodemask, allmasks); |
6973 | 7202 | ||
@@ -7034,7 +7263,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7034 | 7263 | ||
7035 | #ifdef CONFIG_SCHED_SMT | 7264 | #ifdef CONFIG_SCHED_SMT |
7036 | /* Set up CPU (sibling) groups */ | 7265 | /* Set up CPU (sibling) groups */ |
7037 | for_each_cpu_mask(i, *cpu_map) { | 7266 | for_each_cpu_mask_nr(i, *cpu_map) { |
7038 | SCHED_CPUMASK_VAR(this_sibling_map, allmasks); | 7267 | SCHED_CPUMASK_VAR(this_sibling_map, allmasks); |
7039 | SCHED_CPUMASK_VAR(send_covered, allmasks); | 7268 | SCHED_CPUMASK_VAR(send_covered, allmasks); |
7040 | 7269 | ||
@@ -7051,7 +7280,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7051 | 7280 | ||
7052 | #ifdef CONFIG_SCHED_MC | 7281 | #ifdef CONFIG_SCHED_MC |
7053 | /* Set up multi-core groups */ | 7282 | /* Set up multi-core groups */ |
7054 | for_each_cpu_mask(i, *cpu_map) { | 7283 | for_each_cpu_mask_nr(i, *cpu_map) { |
7055 | SCHED_CPUMASK_VAR(this_core_map, allmasks); | 7284 | SCHED_CPUMASK_VAR(this_core_map, allmasks); |
7056 | SCHED_CPUMASK_VAR(send_covered, allmasks); | 7285 | SCHED_CPUMASK_VAR(send_covered, allmasks); |
7057 | 7286 | ||
@@ -7067,7 +7296,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7067 | #endif | 7296 | #endif |
7068 | 7297 | ||
7069 | /* Set up physical groups */ | 7298 | /* Set up physical groups */ |
7070 | for (i = 0; i < MAX_NUMNODES; i++) { | 7299 | for (i = 0; i < nr_node_ids; i++) { |
7071 | SCHED_CPUMASK_VAR(nodemask, allmasks); | 7300 | SCHED_CPUMASK_VAR(nodemask, allmasks); |
7072 | SCHED_CPUMASK_VAR(send_covered, allmasks); | 7301 | SCHED_CPUMASK_VAR(send_covered, allmasks); |
7073 | 7302 | ||
@@ -7091,7 +7320,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7091 | send_covered, tmpmask); | 7320 | send_covered, tmpmask); |
7092 | } | 7321 | } |
7093 | 7322 | ||
7094 | for (i = 0; i < MAX_NUMNODES; i++) { | 7323 | for (i = 0; i < nr_node_ids; i++) { |
7095 | /* Set up node groups */ | 7324 | /* Set up node groups */ |
7096 | struct sched_group *sg, *prev; | 7325 | struct sched_group *sg, *prev; |
7097 | SCHED_CPUMASK_VAR(nodemask, allmasks); | 7326 | SCHED_CPUMASK_VAR(nodemask, allmasks); |
@@ -7118,7 +7347,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7118 | goto error; | 7347 | goto error; |
7119 | } | 7348 | } |
7120 | sched_group_nodes[i] = sg; | 7349 | sched_group_nodes[i] = sg; |
7121 | for_each_cpu_mask(j, *nodemask) { | 7350 | for_each_cpu_mask_nr(j, *nodemask) { |
7122 | struct sched_domain *sd; | 7351 | struct sched_domain *sd; |
7123 | 7352 | ||
7124 | sd = &per_cpu(node_domains, j); | 7353 | sd = &per_cpu(node_domains, j); |
@@ -7130,9 +7359,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7130 | cpus_or(*covered, *covered, *nodemask); | 7359 | cpus_or(*covered, *covered, *nodemask); |
7131 | prev = sg; | 7360 | prev = sg; |
7132 | 7361 | ||
7133 | for (j = 0; j < MAX_NUMNODES; j++) { | 7362 | for (j = 0; j < nr_node_ids; j++) { |
7134 | SCHED_CPUMASK_VAR(notcovered, allmasks); | 7363 | SCHED_CPUMASK_VAR(notcovered, allmasks); |
7135 | int n = (i + j) % MAX_NUMNODES; | 7364 | int n = (i + j) % nr_node_ids; |
7136 | node_to_cpumask_ptr(pnodemask, n); | 7365 | node_to_cpumask_ptr(pnodemask, n); |
7137 | 7366 | ||
7138 | cpus_complement(*notcovered, *covered); | 7367 | cpus_complement(*notcovered, *covered); |
@@ -7164,28 +7393,28 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7164 | 7393 | ||
7165 | /* Calculate CPU power for physical packages and nodes */ | 7394 | /* Calculate CPU power for physical packages and nodes */ |
7166 | #ifdef CONFIG_SCHED_SMT | 7395 | #ifdef CONFIG_SCHED_SMT |
7167 | for_each_cpu_mask(i, *cpu_map) { | 7396 | for_each_cpu_mask_nr(i, *cpu_map) { |
7168 | struct sched_domain *sd = &per_cpu(cpu_domains, i); | 7397 | struct sched_domain *sd = &per_cpu(cpu_domains, i); |
7169 | 7398 | ||
7170 | init_sched_groups_power(i, sd); | 7399 | init_sched_groups_power(i, sd); |
7171 | } | 7400 | } |
7172 | #endif | 7401 | #endif |
7173 | #ifdef CONFIG_SCHED_MC | 7402 | #ifdef CONFIG_SCHED_MC |
7174 | for_each_cpu_mask(i, *cpu_map) { | 7403 | for_each_cpu_mask_nr(i, *cpu_map) { |
7175 | struct sched_domain *sd = &per_cpu(core_domains, i); | 7404 | struct sched_domain *sd = &per_cpu(core_domains, i); |
7176 | 7405 | ||
7177 | init_sched_groups_power(i, sd); | 7406 | init_sched_groups_power(i, sd); |
7178 | } | 7407 | } |
7179 | #endif | 7408 | #endif |
7180 | 7409 | ||
7181 | for_each_cpu_mask(i, *cpu_map) { | 7410 | for_each_cpu_mask_nr(i, *cpu_map) { |
7182 | struct sched_domain *sd = &per_cpu(phys_domains, i); | 7411 | struct sched_domain *sd = &per_cpu(phys_domains, i); |
7183 | 7412 | ||
7184 | init_sched_groups_power(i, sd); | 7413 | init_sched_groups_power(i, sd); |
7185 | } | 7414 | } |
7186 | 7415 | ||
7187 | #ifdef CONFIG_NUMA | 7416 | #ifdef CONFIG_NUMA |
7188 | for (i = 0; i < MAX_NUMNODES; i++) | 7417 | for (i = 0; i < nr_node_ids; i++) |
7189 | init_numa_sched_groups_power(sched_group_nodes[i]); | 7418 | init_numa_sched_groups_power(sched_group_nodes[i]); |
7190 | 7419 | ||
7191 | if (sd_allnodes) { | 7420 | if (sd_allnodes) { |
@@ -7198,7 +7427,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
7198 | #endif | 7427 | #endif |
7199 | 7428 | ||
7200 | /* Attach the domains */ | 7429 | /* Attach the domains */ |
7201 | for_each_cpu_mask(i, *cpu_map) { | 7430 | for_each_cpu_mask_nr(i, *cpu_map) { |
7202 | struct sched_domain *sd; | 7431 | struct sched_domain *sd; |
7203 | #ifdef CONFIG_SCHED_SMT | 7432 | #ifdef CONFIG_SCHED_SMT |
7204 | sd = &per_cpu(cpu_domains, i); | 7433 | sd = &per_cpu(cpu_domains, i); |
@@ -7243,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void) | |||
7243 | } | 7472 | } |
7244 | 7473 | ||
7245 | /* | 7474 | /* |
7246 | * Free current domain masks. | ||
7247 | * Called after all cpus are attached to NULL domain. | ||
7248 | */ | ||
7249 | static void free_sched_domains(void) | ||
7250 | { | ||
7251 | ndoms_cur = 0; | ||
7252 | if (doms_cur != &fallback_doms) | ||
7253 | kfree(doms_cur); | ||
7254 | doms_cur = &fallback_doms; | ||
7255 | } | ||
7256 | |||
7257 | /* | ||
7258 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 7475 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
7259 | * For now this just excludes isolated cpus, but could be used to | 7476 | * For now this just excludes isolated cpus, but could be used to |
7260 | * exclude other special cases in the future. | 7477 | * exclude other special cases in the future. |
@@ -7293,7 +7510,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
7293 | 7510 | ||
7294 | unregister_sched_domain_sysctl(); | 7511 | unregister_sched_domain_sysctl(); |
7295 | 7512 | ||
7296 | for_each_cpu_mask(i, *cpu_map) | 7513 | for_each_cpu_mask_nr(i, *cpu_map) |
7297 | cpu_attach_domain(NULL, &def_root_domain, i); | 7514 | cpu_attach_domain(NULL, &def_root_domain, i); |
7298 | synchronize_sched(); | 7515 | synchronize_sched(); |
7299 | arch_destroy_sched_domains(cpu_map, &tmpmask); | 7516 | arch_destroy_sched_domains(cpu_map, &tmpmask); |
@@ -7332,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
7332 | * ownership of it and will kfree it when done with it. If the caller | 7549 | * ownership of it and will kfree it when done with it. If the caller |
7333 | * failed the kmalloc call, then it can pass in doms_new == NULL, | 7550 | * failed the kmalloc call, then it can pass in doms_new == NULL, |
7334 | * and partition_sched_domains() will fallback to the single partition | 7551 | * and partition_sched_domains() will fallback to the single partition |
7335 | * 'fallback_doms'. | 7552 | * 'fallback_doms', it also forces the domains to be rebuilt. |
7336 | * | 7553 | * |
7337 | * Call with hotplug lock held | 7554 | * Call with hotplug lock held |
7338 | */ | 7555 | */ |
@@ -7346,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, | |||
7346 | /* always unregister in case we don't destroy any domains */ | 7563 | /* always unregister in case we don't destroy any domains */ |
7347 | unregister_sched_domain_sysctl(); | 7564 | unregister_sched_domain_sysctl(); |
7348 | 7565 | ||
7349 | if (doms_new == NULL) { | 7566 | if (doms_new == NULL) |
7350 | ndoms_new = 1; | 7567 | ndoms_new = 0; |
7351 | doms_new = &fallback_doms; | ||
7352 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | ||
7353 | dattr_new = NULL; | ||
7354 | } | ||
7355 | 7568 | ||
7356 | /* Destroy deleted domains */ | 7569 | /* Destroy deleted domains */ |
7357 | for (i = 0; i < ndoms_cur; i++) { | 7570 | for (i = 0; i < ndoms_cur; i++) { |
@@ -7366,6 +7579,14 @@ match1: | |||
7366 | ; | 7579 | ; |
7367 | } | 7580 | } |
7368 | 7581 | ||
7582 | if (doms_new == NULL) { | ||
7583 | ndoms_cur = 0; | ||
7584 | ndoms_new = 1; | ||
7585 | doms_new = &fallback_doms; | ||
7586 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | ||
7587 | dattr_new = NULL; | ||
7588 | } | ||
7589 | |||
7369 | /* Build new domains */ | 7590 | /* Build new domains */ |
7370 | for (i = 0; i < ndoms_new; i++) { | 7591 | for (i = 0; i < ndoms_new; i++) { |
7371 | for (j = 0; j < ndoms_cur; j++) { | 7592 | for (j = 0; j < ndoms_cur; j++) { |
@@ -7396,17 +7617,10 @@ match2: | |||
7396 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 7617 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
7397 | int arch_reinit_sched_domains(void) | 7618 | int arch_reinit_sched_domains(void) |
7398 | { | 7619 | { |
7399 | int err; | ||
7400 | |||
7401 | get_online_cpus(); | 7620 | get_online_cpus(); |
7402 | mutex_lock(&sched_domains_mutex); | 7621 | rebuild_sched_domains(); |
7403 | detach_destroy_domains(&cpu_online_map); | ||
7404 | free_sched_domains(); | ||
7405 | err = arch_init_sched_domains(&cpu_online_map); | ||
7406 | mutex_unlock(&sched_domains_mutex); | ||
7407 | put_online_cpus(); | 7622 | put_online_cpus(); |
7408 | 7623 | return 0; | |
7409 | return err; | ||
7410 | } | 7624 | } |
7411 | 7625 | ||
7412 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | 7626 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) |
@@ -7427,11 +7641,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | |||
7427 | } | 7641 | } |
7428 | 7642 | ||
7429 | #ifdef CONFIG_SCHED_MC | 7643 | #ifdef CONFIG_SCHED_MC |
7430 | static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) | 7644 | static ssize_t sched_mc_power_savings_show(struct sys_device *dev, |
7645 | struct sysdev_attribute *attr, char *page) | ||
7431 | { | 7646 | { |
7432 | return sprintf(page, "%u\n", sched_mc_power_savings); | 7647 | return sprintf(page, "%u\n", sched_mc_power_savings); |
7433 | } | 7648 | } |
7434 | static ssize_t sched_mc_power_savings_store(struct sys_device *dev, | 7649 | static ssize_t sched_mc_power_savings_store(struct sys_device *dev, |
7650 | struct sysdev_attribute *attr, | ||
7435 | const char *buf, size_t count) | 7651 | const char *buf, size_t count) |
7436 | { | 7652 | { |
7437 | return sched_power_savings_store(buf, count, 0); | 7653 | return sched_power_savings_store(buf, count, 0); |
@@ -7441,11 +7657,13 @@ static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, | |||
7441 | #endif | 7657 | #endif |
7442 | 7658 | ||
7443 | #ifdef CONFIG_SCHED_SMT | 7659 | #ifdef CONFIG_SCHED_SMT |
7444 | static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) | 7660 | static ssize_t sched_smt_power_savings_show(struct sys_device *dev, |
7661 | struct sysdev_attribute *attr, char *page) | ||
7445 | { | 7662 | { |
7446 | return sprintf(page, "%u\n", sched_smt_power_savings); | 7663 | return sprintf(page, "%u\n", sched_smt_power_savings); |
7447 | } | 7664 | } |
7448 | static ssize_t sched_smt_power_savings_store(struct sys_device *dev, | 7665 | static ssize_t sched_smt_power_savings_store(struct sys_device *dev, |
7666 | struct sysdev_attribute *attr, | ||
7449 | const char *buf, size_t count) | 7667 | const char *buf, size_t count) |
7450 | { | 7668 | { |
7451 | return sched_power_savings_store(buf, count, 1); | 7669 | return sched_power_savings_store(buf, count, 1); |
@@ -7470,54 +7688,51 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | |||
7470 | #endif | 7688 | #endif |
7471 | return err; | 7689 | return err; |
7472 | } | 7690 | } |
7473 | #endif | 7691 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
7474 | 7692 | ||
7693 | #ifndef CONFIG_CPUSETS | ||
7475 | /* | 7694 | /* |
7476 | * Force a reinitialization of the sched domains hierarchy. The domains | 7695 | * Add online and remove offline CPUs from the scheduler domains. |
7477 | * and groups cannot be updated in place without racing with the balancing | 7696 | * When cpusets are enabled they take over this function. |
7478 | * code, so we temporarily attach all running cpus to the NULL domain | ||
7479 | * which will prevent rebalancing while the sched domains are recalculated. | ||
7480 | */ | 7697 | */ |
7481 | static int update_sched_domains(struct notifier_block *nfb, | 7698 | static int update_sched_domains(struct notifier_block *nfb, |
7482 | unsigned long action, void *hcpu) | 7699 | unsigned long action, void *hcpu) |
7483 | { | 7700 | { |
7484 | switch (action) { | 7701 | switch (action) { |
7485 | case CPU_UP_PREPARE: | 7702 | case CPU_ONLINE: |
7486 | case CPU_UP_PREPARE_FROZEN: | 7703 | case CPU_ONLINE_FROZEN: |
7704 | case CPU_DEAD: | ||
7705 | case CPU_DEAD_FROZEN: | ||
7706 | partition_sched_domains(0, NULL, NULL); | ||
7707 | return NOTIFY_OK; | ||
7708 | |||
7709 | default: | ||
7710 | return NOTIFY_DONE; | ||
7711 | } | ||
7712 | } | ||
7713 | #endif | ||
7714 | |||
7715 | static int update_runtime(struct notifier_block *nfb, | ||
7716 | unsigned long action, void *hcpu) | ||
7717 | { | ||
7718 | int cpu = (int)(long)hcpu; | ||
7719 | |||
7720 | switch (action) { | ||
7487 | case CPU_DOWN_PREPARE: | 7721 | case CPU_DOWN_PREPARE: |
7488 | case CPU_DOWN_PREPARE_FROZEN: | 7722 | case CPU_DOWN_PREPARE_FROZEN: |
7489 | detach_destroy_domains(&cpu_online_map); | 7723 | disable_runtime(cpu_rq(cpu)); |
7490 | free_sched_domains(); | ||
7491 | return NOTIFY_OK; | 7724 | return NOTIFY_OK; |
7492 | 7725 | ||
7493 | case CPU_UP_CANCELED: | ||
7494 | case CPU_UP_CANCELED_FROZEN: | ||
7495 | case CPU_DOWN_FAILED: | 7726 | case CPU_DOWN_FAILED: |
7496 | case CPU_DOWN_FAILED_FROZEN: | 7727 | case CPU_DOWN_FAILED_FROZEN: |
7497 | case CPU_ONLINE: | 7728 | case CPU_ONLINE: |
7498 | case CPU_ONLINE_FROZEN: | 7729 | case CPU_ONLINE_FROZEN: |
7499 | case CPU_DEAD: | 7730 | enable_runtime(cpu_rq(cpu)); |
7500 | case CPU_DEAD_FROZEN: | 7731 | return NOTIFY_OK; |
7501 | /* | 7732 | |
7502 | * Fall through and re-initialise the domains. | ||
7503 | */ | ||
7504 | break; | ||
7505 | default: | 7733 | default: |
7506 | return NOTIFY_DONE; | 7734 | return NOTIFY_DONE; |
7507 | } | 7735 | } |
7508 | |||
7509 | #ifndef CONFIG_CPUSETS | ||
7510 | /* | ||
7511 | * Create default domain partitioning if cpusets are disabled. | ||
7512 | * Otherwise we let cpusets rebuild the domains based on the | ||
7513 | * current setup. | ||
7514 | */ | ||
7515 | |||
7516 | /* The hotplug lock is already held by cpu_up/cpu_down */ | ||
7517 | arch_init_sched_domains(&cpu_online_map); | ||
7518 | #endif | ||
7519 | |||
7520 | return NOTIFY_OK; | ||
7521 | } | 7736 | } |
7522 | 7737 | ||
7523 | void __init sched_init_smp(void) | 7738 | void __init sched_init_smp(void) |
@@ -7537,8 +7752,15 @@ void __init sched_init_smp(void) | |||
7537 | cpu_set(smp_processor_id(), non_isolated_cpus); | 7752 | cpu_set(smp_processor_id(), non_isolated_cpus); |
7538 | mutex_unlock(&sched_domains_mutex); | 7753 | mutex_unlock(&sched_domains_mutex); |
7539 | put_online_cpus(); | 7754 | put_online_cpus(); |
7755 | |||
7756 | #ifndef CONFIG_CPUSETS | ||
7540 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 7757 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
7541 | hotcpu_notifier(update_sched_domains, 0); | 7758 | hotcpu_notifier(update_sched_domains, 0); |
7759 | #endif | ||
7760 | |||
7761 | /* RT runtime code needs to handle some hotplug events */ | ||
7762 | hotcpu_notifier(update_runtime, 0); | ||
7763 | |||
7542 | init_hrtick(); | 7764 | init_hrtick(); |
7543 | 7765 | ||
7544 | /* Move init over to a non-isolated CPU */ | 7766 | /* Move init over to a non-isolated CPU */ |
@@ -7695,8 +7917,8 @@ void __init sched_init(void) | |||
7695 | 7917 | ||
7696 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; | 7918 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
7697 | ptr += nr_cpu_ids * sizeof(void **); | 7919 | ptr += nr_cpu_ids * sizeof(void **); |
7698 | #endif | 7920 | #endif /* CONFIG_USER_SCHED */ |
7699 | #endif | 7921 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
7700 | #ifdef CONFIG_RT_GROUP_SCHED | 7922 | #ifdef CONFIG_RT_GROUP_SCHED |
7701 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; | 7923 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; |
7702 | ptr += nr_cpu_ids * sizeof(void **); | 7924 | ptr += nr_cpu_ids * sizeof(void **); |
@@ -7710,8 +7932,8 @@ void __init sched_init(void) | |||
7710 | 7932 | ||
7711 | root_task_group.rt_rq = (struct rt_rq **)ptr; | 7933 | root_task_group.rt_rq = (struct rt_rq **)ptr; |
7712 | ptr += nr_cpu_ids * sizeof(void **); | 7934 | ptr += nr_cpu_ids * sizeof(void **); |
7713 | #endif | 7935 | #endif /* CONFIG_USER_SCHED */ |
7714 | #endif | 7936 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7715 | } | 7937 | } |
7716 | 7938 | ||
7717 | #ifdef CONFIG_SMP | 7939 | #ifdef CONFIG_SMP |
@@ -7727,8 +7949,8 @@ void __init sched_init(void) | |||
7727 | #ifdef CONFIG_USER_SCHED | 7949 | #ifdef CONFIG_USER_SCHED |
7728 | init_rt_bandwidth(&root_task_group.rt_bandwidth, | 7950 | init_rt_bandwidth(&root_task_group.rt_bandwidth, |
7729 | global_rt_period(), RUNTIME_INF); | 7951 | global_rt_period(), RUNTIME_INF); |
7730 | #endif | 7952 | #endif /* CONFIG_USER_SCHED */ |
7731 | #endif | 7953 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7732 | 7954 | ||
7733 | #ifdef CONFIG_GROUP_SCHED | 7955 | #ifdef CONFIG_GROUP_SCHED |
7734 | list_add(&init_task_group.list, &task_groups); | 7956 | list_add(&init_task_group.list, &task_groups); |
@@ -7738,8 +7960,8 @@ void __init sched_init(void) | |||
7738 | INIT_LIST_HEAD(&root_task_group.children); | 7960 | INIT_LIST_HEAD(&root_task_group.children); |
7739 | init_task_group.parent = &root_task_group; | 7961 | init_task_group.parent = &root_task_group; |
7740 | list_add(&init_task_group.siblings, &root_task_group.children); | 7962 | list_add(&init_task_group.siblings, &root_task_group.children); |
7741 | #endif | 7963 | #endif /* CONFIG_USER_SCHED */ |
7742 | #endif | 7964 | #endif /* CONFIG_GROUP_SCHED */ |
7743 | 7965 | ||
7744 | for_each_possible_cpu(i) { | 7966 | for_each_possible_cpu(i) { |
7745 | struct rq *rq; | 7967 | struct rq *rq; |
@@ -7819,6 +8041,7 @@ void __init sched_init(void) | |||
7819 | rq->next_balance = jiffies; | 8041 | rq->next_balance = jiffies; |
7820 | rq->push_cpu = 0; | 8042 | rq->push_cpu = 0; |
7821 | rq->cpu = i; | 8043 | rq->cpu = i; |
8044 | rq->online = 0; | ||
7822 | rq->migration_thread = NULL; | 8045 | rq->migration_thread = NULL; |
7823 | INIT_LIST_HEAD(&rq->migration_queue); | 8046 | INIT_LIST_HEAD(&rq->migration_queue); |
7824 | rq_attach_root(rq, &def_root_domain); | 8047 | rq_attach_root(rq, &def_root_domain); |
@@ -7834,7 +8057,7 @@ void __init sched_init(void) | |||
7834 | #endif | 8057 | #endif |
7835 | 8058 | ||
7836 | #ifdef CONFIG_SMP | 8059 | #ifdef CONFIG_SMP |
7837 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); | 8060 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); |
7838 | #endif | 8061 | #endif |
7839 | 8062 | ||
7840 | #ifdef CONFIG_RT_MUTEXES | 8063 | #ifdef CONFIG_RT_MUTEXES |
@@ -8058,7 +8281,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | |||
8058 | { | 8281 | { |
8059 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | 8282 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); |
8060 | } | 8283 | } |
8061 | #else | 8284 | #else /* !CONFG_FAIR_GROUP_SCHED */ |
8062 | static inline void free_fair_sched_group(struct task_group *tg) | 8285 | static inline void free_fair_sched_group(struct task_group *tg) |
8063 | { | 8286 | { |
8064 | } | 8287 | } |
@@ -8076,7 +8299,7 @@ static inline void register_fair_sched_group(struct task_group *tg, int cpu) | |||
8076 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8299 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8077 | { | 8300 | { |
8078 | } | 8301 | } |
8079 | #endif | 8302 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8080 | 8303 | ||
8081 | #ifdef CONFIG_RT_GROUP_SCHED | 8304 | #ifdef CONFIG_RT_GROUP_SCHED |
8082 | static void free_rt_sched_group(struct task_group *tg) | 8305 | static void free_rt_sched_group(struct task_group *tg) |
@@ -8147,7 +8370,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | |||
8147 | { | 8370 | { |
8148 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | 8371 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); |
8149 | } | 8372 | } |
8150 | #else | 8373 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8151 | static inline void free_rt_sched_group(struct task_group *tg) | 8374 | static inline void free_rt_sched_group(struct task_group *tg) |
8152 | { | 8375 | { |
8153 | } | 8376 | } |
@@ -8165,7 +8388,7 @@ static inline void register_rt_sched_group(struct task_group *tg, int cpu) | |||
8165 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | 8388 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) |
8166 | { | 8389 | { |
8167 | } | 8390 | } |
8168 | #endif | 8391 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8169 | 8392 | ||
8170 | #ifdef CONFIG_GROUP_SCHED | 8393 | #ifdef CONFIG_GROUP_SCHED |
8171 | static void free_sched_group(struct task_group *tg) | 8394 | static void free_sched_group(struct task_group *tg) |
@@ -8276,17 +8499,14 @@ void sched_move_task(struct task_struct *tsk) | |||
8276 | 8499 | ||
8277 | task_rq_unlock(rq, &flags); | 8500 | task_rq_unlock(rq, &flags); |
8278 | } | 8501 | } |
8279 | #endif | 8502 | #endif /* CONFIG_GROUP_SCHED */ |
8280 | 8503 | ||
8281 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8504 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8282 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 8505 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) |
8283 | { | 8506 | { |
8284 | struct cfs_rq *cfs_rq = se->cfs_rq; | 8507 | struct cfs_rq *cfs_rq = se->cfs_rq; |
8285 | struct rq *rq = cfs_rq->rq; | ||
8286 | int on_rq; | 8508 | int on_rq; |
8287 | 8509 | ||
8288 | spin_lock_irq(&rq->lock); | ||
8289 | |||
8290 | on_rq = se->on_rq; | 8510 | on_rq = se->on_rq; |
8291 | if (on_rq) | 8511 | if (on_rq) |
8292 | dequeue_entity(cfs_rq, se, 0); | 8512 | dequeue_entity(cfs_rq, se, 0); |
@@ -8296,8 +8516,17 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
8296 | 8516 | ||
8297 | if (on_rq) | 8517 | if (on_rq) |
8298 | enqueue_entity(cfs_rq, se, 0); | 8518 | enqueue_entity(cfs_rq, se, 0); |
8519 | } | ||
8299 | 8520 | ||
8300 | spin_unlock_irq(&rq->lock); | 8521 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
8522 | { | ||
8523 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
8524 | struct rq *rq = cfs_rq->rq; | ||
8525 | unsigned long flags; | ||
8526 | |||
8527 | spin_lock_irqsave(&rq->lock, flags); | ||
8528 | __set_se_shares(se, shares); | ||
8529 | spin_unlock_irqrestore(&rq->lock, flags); | ||
8301 | } | 8530 | } |
8302 | 8531 | ||
8303 | static DEFINE_MUTEX(shares_mutex); | 8532 | static DEFINE_MUTEX(shares_mutex); |
@@ -8336,8 +8565,13 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
8336 | * w/o tripping rebalance_share or load_balance_fair. | 8565 | * w/o tripping rebalance_share or load_balance_fair. |
8337 | */ | 8566 | */ |
8338 | tg->shares = shares; | 8567 | tg->shares = shares; |
8339 | for_each_possible_cpu(i) | 8568 | for_each_possible_cpu(i) { |
8569 | /* | ||
8570 | * force a rebalance | ||
8571 | */ | ||
8572 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | ||
8340 | set_se_shares(tg->se[i], shares); | 8573 | set_se_shares(tg->se[i], shares); |
8574 | } | ||
8341 | 8575 | ||
8342 | /* | 8576 | /* |
8343 | * Enable load balance activity on this group, by inserting it back on | 8577 | * Enable load balance activity on this group, by inserting it back on |
@@ -8376,7 +8610,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
8376 | #ifdef CONFIG_CGROUP_SCHED | 8610 | #ifdef CONFIG_CGROUP_SCHED |
8377 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8611 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
8378 | { | 8612 | { |
8379 | struct task_group *tgi, *parent = tg ? tg->parent : NULL; | 8613 | struct task_group *tgi, *parent = tg->parent; |
8380 | unsigned long total = 0; | 8614 | unsigned long total = 0; |
8381 | 8615 | ||
8382 | if (!parent) { | 8616 | if (!parent) { |
@@ -8400,7 +8634,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | |||
8400 | } | 8634 | } |
8401 | rcu_read_unlock(); | 8635 | rcu_read_unlock(); |
8402 | 8636 | ||
8403 | return total + to_ratio(period, runtime) < | 8637 | return total + to_ratio(period, runtime) <= |
8404 | to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), | 8638 | to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), |
8405 | parent->rt_bandwidth.rt_runtime); | 8639 | parent->rt_bandwidth.rt_runtime); |
8406 | } | 8640 | } |
@@ -8520,16 +8754,21 @@ long sched_group_rt_period(struct task_group *tg) | |||
8520 | 8754 | ||
8521 | static int sched_rt_global_constraints(void) | 8755 | static int sched_rt_global_constraints(void) |
8522 | { | 8756 | { |
8757 | struct task_group *tg = &root_task_group; | ||
8758 | u64 rt_runtime, rt_period; | ||
8523 | int ret = 0; | 8759 | int ret = 0; |
8524 | 8760 | ||
8761 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); | ||
8762 | rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
8763 | |||
8525 | mutex_lock(&rt_constraints_mutex); | 8764 | mutex_lock(&rt_constraints_mutex); |
8526 | if (!__rt_schedulable(NULL, 1, 0)) | 8765 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) |
8527 | ret = -EINVAL; | 8766 | ret = -EINVAL; |
8528 | mutex_unlock(&rt_constraints_mutex); | 8767 | mutex_unlock(&rt_constraints_mutex); |
8529 | 8768 | ||
8530 | return ret; | 8769 | return ret; |
8531 | } | 8770 | } |
8532 | #else | 8771 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8533 | static int sched_rt_global_constraints(void) | 8772 | static int sched_rt_global_constraints(void) |
8534 | { | 8773 | { |
8535 | unsigned long flags; | 8774 | unsigned long flags; |
@@ -8547,7 +8786,7 @@ static int sched_rt_global_constraints(void) | |||
8547 | 8786 | ||
8548 | return 0; | 8787 | return 0; |
8549 | } | 8788 | } |
8550 | #endif | 8789 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8551 | 8790 | ||
8552 | int sched_rt_handler(struct ctl_table *table, int write, | 8791 | int sched_rt_handler(struct ctl_table *table, int write, |
8553 | struct file *filp, void __user *buffer, size_t *lenp, | 8792 | struct file *filp, void __user *buffer, size_t *lenp, |
@@ -8655,7 +8894,7 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | |||
8655 | 8894 | ||
8656 | return (u64) tg->shares; | 8895 | return (u64) tg->shares; |
8657 | } | 8896 | } |
8658 | #endif | 8897 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8659 | 8898 | ||
8660 | #ifdef CONFIG_RT_GROUP_SCHED | 8899 | #ifdef CONFIG_RT_GROUP_SCHED |
8661 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, | 8900 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
@@ -8679,7 +8918,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
8679 | { | 8918 | { |
8680 | return sched_group_rt_period(cgroup_tg(cgrp)); | 8919 | return sched_group_rt_period(cgroup_tg(cgrp)); |
8681 | } | 8920 | } |
8682 | #endif | 8921 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8683 | 8922 | ||
8684 | static struct cftype cpu_files[] = { | 8923 | static struct cftype cpu_files[] = { |
8685 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8924 | #ifdef CONFIG_FAIR_GROUP_SCHED |