diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 1444 |
1 files changed, 831 insertions, 613 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 6c10fa796ca0..bba57adb9504 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -96,7 +96,7 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
96 | /* | 96 | /* |
97 | * Some helpers for converting nanosecond timing to jiffy resolution | 97 | * Some helpers for converting nanosecond timing to jiffy resolution |
98 | */ | 98 | */ |
99 | #define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) | 99 | #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (1000000000 / HZ)) |
100 | #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) | 100 | #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) |
101 | 101 | ||
102 | #define NICE_0_LOAD SCHED_LOAD_SCALE | 102 | #define NICE_0_LOAD SCHED_LOAD_SCALE |
@@ -105,11 +105,9 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
105 | /* | 105 | /* |
106 | * These are the 'tuning knobs' of the scheduler: | 106 | * These are the 'tuning knobs' of the scheduler: |
107 | * | 107 | * |
108 | * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger), | 108 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). |
109 | * default timeslice is 100 msecs, maximum timeslice is 800 msecs. | ||
110 | * Timeslices get refilled after they expire. | 109 | * Timeslices get refilled after they expire. |
111 | */ | 110 | */ |
112 | #define MIN_TIMESLICE max(5 * HZ / 1000, 1) | ||
113 | #define DEF_TIMESLICE (100 * HZ / 1000) | 111 | #define DEF_TIMESLICE (100 * HZ / 1000) |
114 | 112 | ||
115 | #ifdef CONFIG_SMP | 113 | #ifdef CONFIG_SMP |
@@ -133,24 +131,6 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) | |||
133 | } | 131 | } |
134 | #endif | 132 | #endif |
135 | 133 | ||
136 | #define SCALE_PRIO(x, prio) \ | ||
137 | max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) | ||
138 | |||
139 | /* | ||
140 | * static_prio_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] | ||
141 | * to time slice values: [800ms ... 100ms ... 5ms] | ||
142 | */ | ||
143 | static unsigned int static_prio_timeslice(int static_prio) | ||
144 | { | ||
145 | if (static_prio == NICE_TO_PRIO(19)) | ||
146 | return 1; | ||
147 | |||
148 | if (static_prio < NICE_TO_PRIO(0)) | ||
149 | return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio); | ||
150 | else | ||
151 | return SCALE_PRIO(DEF_TIMESLICE, static_prio); | ||
152 | } | ||
153 | |||
154 | static inline int rt_policy(int policy) | 134 | static inline int rt_policy(int policy) |
155 | { | 135 | { |
156 | if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) | 136 | if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) |
@@ -171,31 +151,91 @@ struct rt_prio_array { | |||
171 | struct list_head queue[MAX_RT_PRIO]; | 151 | struct list_head queue[MAX_RT_PRIO]; |
172 | }; | 152 | }; |
173 | 153 | ||
174 | struct load_stat { | 154 | #ifdef CONFIG_FAIR_GROUP_SCHED |
175 | struct load_weight load; | 155 | |
176 | u64 load_update_start, load_update_last; | 156 | struct cfs_rq; |
177 | unsigned long delta_fair, delta_exec, delta_stat; | 157 | |
158 | /* task group related information */ | ||
159 | struct task_group { | ||
160 | /* schedulable entities of this group on each cpu */ | ||
161 | struct sched_entity **se; | ||
162 | /* runqueue "owned" by this group on each cpu */ | ||
163 | struct cfs_rq **cfs_rq; | ||
164 | unsigned long shares; | ||
165 | /* spinlock to serialize modification to shares */ | ||
166 | spinlock_t lock; | ||
167 | }; | ||
168 | |||
169 | /* Default task group's sched entity on each cpu */ | ||
170 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | ||
171 | /* Default task group's cfs_rq on each cpu */ | ||
172 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | ||
173 | |||
174 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; | ||
175 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; | ||
176 | |||
177 | /* Default task group. | ||
178 | * Every task in system belong to this group at bootup. | ||
179 | */ | ||
180 | struct task_group init_task_group = { | ||
181 | .se = init_sched_entity_p, | ||
182 | .cfs_rq = init_cfs_rq_p, | ||
178 | }; | 183 | }; |
179 | 184 | ||
185 | #ifdef CONFIG_FAIR_USER_SCHED | ||
186 | # define INIT_TASK_GRP_LOAD 2*NICE_0_LOAD | ||
187 | #else | ||
188 | # define INIT_TASK_GRP_LOAD NICE_0_LOAD | ||
189 | #endif | ||
190 | |||
191 | static int init_task_group_load = INIT_TASK_GRP_LOAD; | ||
192 | |||
193 | /* return group to which a task belongs */ | ||
194 | static inline struct task_group *task_group(struct task_struct *p) | ||
195 | { | ||
196 | struct task_group *tg; | ||
197 | |||
198 | #ifdef CONFIG_FAIR_USER_SCHED | ||
199 | tg = p->user->tg; | ||
200 | #else | ||
201 | tg = &init_task_group; | ||
202 | #endif | ||
203 | |||
204 | return tg; | ||
205 | } | ||
206 | |||
207 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
208 | static inline void set_task_cfs_rq(struct task_struct *p) | ||
209 | { | ||
210 | p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)]; | ||
211 | p->se.parent = task_group(p)->se[task_cpu(p)]; | ||
212 | } | ||
213 | |||
214 | #else | ||
215 | |||
216 | static inline void set_task_cfs_rq(struct task_struct *p) { } | ||
217 | |||
218 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
219 | |||
180 | /* CFS-related fields in a runqueue */ | 220 | /* CFS-related fields in a runqueue */ |
181 | struct cfs_rq { | 221 | struct cfs_rq { |
182 | struct load_weight load; | 222 | struct load_weight load; |
183 | unsigned long nr_running; | 223 | unsigned long nr_running; |
184 | 224 | ||
185 | s64 fair_clock; | ||
186 | u64 exec_clock; | 225 | u64 exec_clock; |
187 | s64 wait_runtime; | 226 | u64 min_vruntime; |
188 | u64 sleeper_bonus; | ||
189 | unsigned long wait_runtime_overruns, wait_runtime_underruns; | ||
190 | 227 | ||
191 | struct rb_root tasks_timeline; | 228 | struct rb_root tasks_timeline; |
192 | struct rb_node *rb_leftmost; | 229 | struct rb_node *rb_leftmost; |
193 | struct rb_node *rb_load_balance_curr; | 230 | struct rb_node *rb_load_balance_curr; |
194 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
195 | /* 'curr' points to currently running entity on this cfs_rq. | 231 | /* 'curr' points to currently running entity on this cfs_rq. |
196 | * It is set to NULL otherwise (i.e when none are currently running). | 232 | * It is set to NULL otherwise (i.e when none are currently running). |
197 | */ | 233 | */ |
198 | struct sched_entity *curr; | 234 | struct sched_entity *curr; |
235 | |||
236 | unsigned long nr_spread_over; | ||
237 | |||
238 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
199 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ | 239 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ |
200 | 240 | ||
201 | /* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in | 241 | /* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in |
@@ -206,6 +246,8 @@ struct cfs_rq { | |||
206 | * list is used during load balance. | 246 | * list is used during load balance. |
207 | */ | 247 | */ |
208 | struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ | 248 | struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ |
249 | struct task_group *tg; /* group that "owns" this runqueue */ | ||
250 | struct rcu_head rcu; | ||
209 | #endif | 251 | #endif |
210 | }; | 252 | }; |
211 | 253 | ||
@@ -237,7 +279,7 @@ struct rq { | |||
237 | #ifdef CONFIG_NO_HZ | 279 | #ifdef CONFIG_NO_HZ |
238 | unsigned char in_nohz_recently; | 280 | unsigned char in_nohz_recently; |
239 | #endif | 281 | #endif |
240 | struct load_stat ls; /* capture load from *all* tasks on this cpu */ | 282 | struct load_weight load; /* capture load from *all* tasks on this cpu */ |
241 | unsigned long nr_load_updates; | 283 | unsigned long nr_load_updates; |
242 | u64 nr_switches; | 284 | u64 nr_switches; |
243 | 285 | ||
@@ -289,16 +331,19 @@ struct rq { | |||
289 | unsigned long yld_exp_empty; | 331 | unsigned long yld_exp_empty; |
290 | unsigned long yld_act_empty; | 332 | unsigned long yld_act_empty; |
291 | unsigned long yld_both_empty; | 333 | unsigned long yld_both_empty; |
292 | unsigned long yld_cnt; | 334 | unsigned long yld_count; |
293 | 335 | ||
294 | /* schedule() stats */ | 336 | /* schedule() stats */ |
295 | unsigned long sched_switch; | 337 | unsigned long sched_switch; |
296 | unsigned long sched_cnt; | 338 | unsigned long sched_count; |
297 | unsigned long sched_goidle; | 339 | unsigned long sched_goidle; |
298 | 340 | ||
299 | /* try_to_wake_up() stats */ | 341 | /* try_to_wake_up() stats */ |
300 | unsigned long ttwu_cnt; | 342 | unsigned long ttwu_count; |
301 | unsigned long ttwu_local; | 343 | unsigned long ttwu_local; |
344 | |||
345 | /* BKL stats */ | ||
346 | unsigned long bkl_count; | ||
302 | #endif | 347 | #endif |
303 | struct lock_class_key rq_lock_key; | 348 | struct lock_class_key rq_lock_key; |
304 | }; | 349 | }; |
@@ -383,6 +428,37 @@ static void update_rq_clock(struct rq *rq) | |||
383 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 428 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
384 | 429 | ||
385 | /* | 430 | /* |
431 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: | ||
432 | */ | ||
433 | #ifdef CONFIG_SCHED_DEBUG | ||
434 | # define const_debug __read_mostly | ||
435 | #else | ||
436 | # define const_debug static const | ||
437 | #endif | ||
438 | |||
439 | /* | ||
440 | * Debugging: various feature bits | ||
441 | */ | ||
442 | enum { | ||
443 | SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, | ||
444 | SCHED_FEAT_START_DEBIT = 2, | ||
445 | SCHED_FEAT_TREE_AVG = 4, | ||
446 | SCHED_FEAT_APPROX_AVG = 8, | ||
447 | SCHED_FEAT_WAKEUP_PREEMPT = 16, | ||
448 | SCHED_FEAT_PREEMPT_RESTRICT = 32, | ||
449 | }; | ||
450 | |||
451 | const_debug unsigned int sysctl_sched_features = | ||
452 | SCHED_FEAT_NEW_FAIR_SLEEPERS *1 | | ||
453 | SCHED_FEAT_START_DEBIT *1 | | ||
454 | SCHED_FEAT_TREE_AVG *0 | | ||
455 | SCHED_FEAT_APPROX_AVG *0 | | ||
456 | SCHED_FEAT_WAKEUP_PREEMPT *1 | | ||
457 | SCHED_FEAT_PREEMPT_RESTRICT *1; | ||
458 | |||
459 | #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) | ||
460 | |||
461 | /* | ||
386 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 462 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
387 | * clock constructed from sched_clock(): | 463 | * clock constructed from sched_clock(): |
388 | */ | 464 | */ |
@@ -400,18 +476,7 @@ unsigned long long cpu_clock(int cpu) | |||
400 | 476 | ||
401 | return now; | 477 | return now; |
402 | } | 478 | } |
403 | 479 | EXPORT_SYMBOL_GPL(cpu_clock); | |
404 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
405 | /* Change a task's ->cfs_rq if it moves across CPUs */ | ||
406 | static inline void set_task_cfs_rq(struct task_struct *p) | ||
407 | { | ||
408 | p->se.cfs_rq = &task_rq(p)->cfs; | ||
409 | } | ||
410 | #else | ||
411 | static inline void set_task_cfs_rq(struct task_struct *p) | ||
412 | { | ||
413 | } | ||
414 | #endif | ||
415 | 480 | ||
416 | #ifndef prepare_arch_switch | 481 | #ifndef prepare_arch_switch |
417 | # define prepare_arch_switch(next) do { } while (0) | 482 | # define prepare_arch_switch(next) do { } while (0) |
@@ -497,16 +562,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
497 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 562 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
498 | __acquires(rq->lock) | 563 | __acquires(rq->lock) |
499 | { | 564 | { |
500 | struct rq *rq; | 565 | for (;;) { |
501 | 566 | struct rq *rq = task_rq(p); | |
502 | repeat_lock_task: | 567 | spin_lock(&rq->lock); |
503 | rq = task_rq(p); | 568 | if (likely(rq == task_rq(p))) |
504 | spin_lock(&rq->lock); | 569 | return rq; |
505 | if (unlikely(rq != task_rq(p))) { | ||
506 | spin_unlock(&rq->lock); | 570 | spin_unlock(&rq->lock); |
507 | goto repeat_lock_task; | ||
508 | } | 571 | } |
509 | return rq; | ||
510 | } | 572 | } |
511 | 573 | ||
512 | /* | 574 | /* |
@@ -519,18 +581,17 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
519 | { | 581 | { |
520 | struct rq *rq; | 582 | struct rq *rq; |
521 | 583 | ||
522 | repeat_lock_task: | 584 | for (;;) { |
523 | local_irq_save(*flags); | 585 | local_irq_save(*flags); |
524 | rq = task_rq(p); | 586 | rq = task_rq(p); |
525 | spin_lock(&rq->lock); | 587 | spin_lock(&rq->lock); |
526 | if (unlikely(rq != task_rq(p))) { | 588 | if (likely(rq == task_rq(p))) |
589 | return rq; | ||
527 | spin_unlock_irqrestore(&rq->lock, *flags); | 590 | spin_unlock_irqrestore(&rq->lock, *flags); |
528 | goto repeat_lock_task; | ||
529 | } | 591 | } |
530 | return rq; | ||
531 | } | 592 | } |
532 | 593 | ||
533 | static inline void __task_rq_unlock(struct rq *rq) | 594 | static void __task_rq_unlock(struct rq *rq) |
534 | __releases(rq->lock) | 595 | __releases(rq->lock) |
535 | { | 596 | { |
536 | spin_unlock(&rq->lock); | 597 | spin_unlock(&rq->lock); |
@@ -545,7 +606,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | |||
545 | /* | 606 | /* |
546 | * this_rq_lock - lock this runqueue and disable interrupts. | 607 | * this_rq_lock - lock this runqueue and disable interrupts. |
547 | */ | 608 | */ |
548 | static inline struct rq *this_rq_lock(void) | 609 | static struct rq *this_rq_lock(void) |
549 | __acquires(rq->lock) | 610 | __acquires(rq->lock) |
550 | { | 611 | { |
551 | struct rq *rq; | 612 | struct rq *rq; |
@@ -645,19 +706,6 @@ static inline void resched_task(struct task_struct *p) | |||
645 | } | 706 | } |
646 | #endif | 707 | #endif |
647 | 708 | ||
648 | static u64 div64_likely32(u64 divident, unsigned long divisor) | ||
649 | { | ||
650 | #if BITS_PER_LONG == 32 | ||
651 | if (likely(divident <= 0xffffffffULL)) | ||
652 | return (u32)divident / divisor; | ||
653 | do_div(divident, divisor); | ||
654 | |||
655 | return divident; | ||
656 | #else | ||
657 | return divident / divisor; | ||
658 | #endif | ||
659 | } | ||
660 | |||
661 | #if BITS_PER_LONG == 32 | 709 | #if BITS_PER_LONG == 32 |
662 | # define WMULT_CONST (~0UL) | 710 | # define WMULT_CONST (~0UL) |
663 | #else | 711 | #else |
@@ -699,16 +747,14 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | |||
699 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | 747 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); |
700 | } | 748 | } |
701 | 749 | ||
702 | static void update_load_add(struct load_weight *lw, unsigned long inc) | 750 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
703 | { | 751 | { |
704 | lw->weight += inc; | 752 | lw->weight += inc; |
705 | lw->inv_weight = 0; | ||
706 | } | 753 | } |
707 | 754 | ||
708 | static void update_load_sub(struct load_weight *lw, unsigned long dec) | 755 | static inline void update_load_sub(struct load_weight *lw, unsigned long dec) |
709 | { | 756 | { |
710 | lw->weight -= dec; | 757 | lw->weight -= dec; |
711 | lw->inv_weight = 0; | ||
712 | } | 758 | } |
713 | 759 | ||
714 | /* | 760 | /* |
@@ -784,29 +830,20 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
784 | int *this_best_prio, struct rq_iterator *iterator); | 830 | int *this_best_prio, struct rq_iterator *iterator); |
785 | 831 | ||
786 | #include "sched_stats.h" | 832 | #include "sched_stats.h" |
787 | #include "sched_rt.c" | ||
788 | #include "sched_fair.c" | ||
789 | #include "sched_idletask.c" | 833 | #include "sched_idletask.c" |
834 | #include "sched_fair.c" | ||
835 | #include "sched_rt.c" | ||
790 | #ifdef CONFIG_SCHED_DEBUG | 836 | #ifdef CONFIG_SCHED_DEBUG |
791 | # include "sched_debug.c" | 837 | # include "sched_debug.c" |
792 | #endif | 838 | #endif |
793 | 839 | ||
794 | #define sched_class_highest (&rt_sched_class) | 840 | #define sched_class_highest (&rt_sched_class) |
795 | 841 | ||
796 | static void __update_curr_load(struct rq *rq, struct load_stat *ls) | ||
797 | { | ||
798 | if (rq->curr != rq->idle && ls->load.weight) { | ||
799 | ls->delta_exec += ls->delta_stat; | ||
800 | ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load); | ||
801 | ls->delta_stat = 0; | ||
802 | } | ||
803 | } | ||
804 | |||
805 | /* | 842 | /* |
806 | * Update delta_exec, delta_fair fields for rq. | 843 | * Update delta_exec, delta_fair fields for rq. |
807 | * | 844 | * |
808 | * delta_fair clock advances at a rate inversely proportional to | 845 | * delta_fair clock advances at a rate inversely proportional to |
809 | * total load (rq->ls.load.weight) on the runqueue, while | 846 | * total load (rq->load.weight) on the runqueue, while |
810 | * delta_exec advances at the same rate as wall-clock (provided | 847 | * delta_exec advances at the same rate as wall-clock (provided |
811 | * cpu is not idle). | 848 | * cpu is not idle). |
812 | * | 849 | * |
@@ -814,35 +851,17 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls) | |||
814 | * runqueue over any given interval. This (smoothened) load is used | 851 | * runqueue over any given interval. This (smoothened) load is used |
815 | * during load balance. | 852 | * during load balance. |
816 | * | 853 | * |
817 | * This function is called /before/ updating rq->ls.load | 854 | * This function is called /before/ updating rq->load |
818 | * and when switching tasks. | 855 | * and when switching tasks. |
819 | */ | 856 | */ |
820 | static void update_curr_load(struct rq *rq) | ||
821 | { | ||
822 | struct load_stat *ls = &rq->ls; | ||
823 | u64 start; | ||
824 | |||
825 | start = ls->load_update_start; | ||
826 | ls->load_update_start = rq->clock; | ||
827 | ls->delta_stat += rq->clock - start; | ||
828 | /* | ||
829 | * Stagger updates to ls->delta_fair. Very frequent updates | ||
830 | * can be expensive. | ||
831 | */ | ||
832 | if (ls->delta_stat >= sysctl_sched_stat_granularity) | ||
833 | __update_curr_load(rq, ls); | ||
834 | } | ||
835 | |||
836 | static inline void inc_load(struct rq *rq, const struct task_struct *p) | 857 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
837 | { | 858 | { |
838 | update_curr_load(rq); | 859 | update_load_add(&rq->load, p->se.load.weight); |
839 | update_load_add(&rq->ls.load, p->se.load.weight); | ||
840 | } | 860 | } |
841 | 861 | ||
842 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | 862 | static inline void dec_load(struct rq *rq, const struct task_struct *p) |
843 | { | 863 | { |
844 | update_curr_load(rq); | 864 | update_load_sub(&rq->load, p->se.load.weight); |
845 | update_load_sub(&rq->ls.load, p->se.load.weight); | ||
846 | } | 865 | } |
847 | 866 | ||
848 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | 867 | static void inc_nr_running(struct task_struct *p, struct rq *rq) |
@@ -859,8 +878,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq) | |||
859 | 878 | ||
860 | static void set_load_weight(struct task_struct *p) | 879 | static void set_load_weight(struct task_struct *p) |
861 | { | 880 | { |
862 | p->se.wait_runtime = 0; | ||
863 | |||
864 | if (task_has_rt_policy(p)) { | 881 | if (task_has_rt_policy(p)) { |
865 | p->se.load.weight = prio_to_weight[0] * 2; | 882 | p->se.load.weight = prio_to_weight[0] * 2; |
866 | p->se.load.inv_weight = prio_to_wmult[0] >> 1; | 883 | p->se.load.inv_weight = prio_to_wmult[0] >> 1; |
@@ -952,20 +969,6 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
952 | } | 969 | } |
953 | 970 | ||
954 | /* | 971 | /* |
955 | * activate_idle_task - move idle task to the _front_ of runqueue. | ||
956 | */ | ||
957 | static inline void activate_idle_task(struct task_struct *p, struct rq *rq) | ||
958 | { | ||
959 | update_rq_clock(rq); | ||
960 | |||
961 | if (p->state == TASK_UNINTERRUPTIBLE) | ||
962 | rq->nr_uninterruptible--; | ||
963 | |||
964 | enqueue_task(rq, p, 0); | ||
965 | inc_nr_running(p, rq); | ||
966 | } | ||
967 | |||
968 | /* | ||
969 | * deactivate_task - remove a task from the runqueue. | 972 | * deactivate_task - remove a task from the runqueue. |
970 | */ | 973 | */ |
971 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | 974 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) |
@@ -989,32 +992,50 @@ inline int task_curr(const struct task_struct *p) | |||
989 | /* Used instead of source_load when we know the type == 0 */ | 992 | /* Used instead of source_load when we know the type == 0 */ |
990 | unsigned long weighted_cpuload(const int cpu) | 993 | unsigned long weighted_cpuload(const int cpu) |
991 | { | 994 | { |
992 | return cpu_rq(cpu)->ls.load.weight; | 995 | return cpu_rq(cpu)->load.weight; |
993 | } | 996 | } |
994 | 997 | ||
995 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | 998 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) |
996 | { | 999 | { |
997 | #ifdef CONFIG_SMP | 1000 | #ifdef CONFIG_SMP |
998 | task_thread_info(p)->cpu = cpu; | 1001 | task_thread_info(p)->cpu = cpu; |
999 | set_task_cfs_rq(p); | ||
1000 | #endif | 1002 | #endif |
1003 | set_task_cfs_rq(p); | ||
1001 | } | 1004 | } |
1002 | 1005 | ||
1003 | #ifdef CONFIG_SMP | 1006 | #ifdef CONFIG_SMP |
1004 | 1007 | ||
1008 | /* | ||
1009 | * Is this task likely cache-hot: | ||
1010 | */ | ||
1011 | static inline int | ||
1012 | task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | ||
1013 | { | ||
1014 | s64 delta; | ||
1015 | |||
1016 | if (p->sched_class != &fair_sched_class) | ||
1017 | return 0; | ||
1018 | |||
1019 | if (sysctl_sched_migration_cost == -1) | ||
1020 | return 1; | ||
1021 | if (sysctl_sched_migration_cost == 0) | ||
1022 | return 0; | ||
1023 | |||
1024 | delta = now - p->se.exec_start; | ||
1025 | |||
1026 | return delta < (s64)sysctl_sched_migration_cost; | ||
1027 | } | ||
1028 | |||
1029 | |||
1005 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 1030 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
1006 | { | 1031 | { |
1007 | int old_cpu = task_cpu(p); | 1032 | int old_cpu = task_cpu(p); |
1008 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | 1033 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); |
1009 | u64 clock_offset, fair_clock_offset; | 1034 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), |
1035 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | ||
1036 | u64 clock_offset; | ||
1010 | 1037 | ||
1011 | clock_offset = old_rq->clock - new_rq->clock; | 1038 | clock_offset = old_rq->clock - new_rq->clock; |
1012 | fair_clock_offset = old_rq->cfs.fair_clock - new_rq->cfs.fair_clock; | ||
1013 | |||
1014 | if (p->se.wait_start_fair) | ||
1015 | p->se.wait_start_fair -= fair_clock_offset; | ||
1016 | if (p->se.sleep_start_fair) | ||
1017 | p->se.sleep_start_fair -= fair_clock_offset; | ||
1018 | 1039 | ||
1019 | #ifdef CONFIG_SCHEDSTATS | 1040 | #ifdef CONFIG_SCHEDSTATS |
1020 | if (p->se.wait_start) | 1041 | if (p->se.wait_start) |
@@ -1023,7 +1044,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1023 | p->se.sleep_start -= clock_offset; | 1044 | p->se.sleep_start -= clock_offset; |
1024 | if (p->se.block_start) | 1045 | if (p->se.block_start) |
1025 | p->se.block_start -= clock_offset; | 1046 | p->se.block_start -= clock_offset; |
1047 | if (old_cpu != new_cpu) { | ||
1048 | schedstat_inc(p, se.nr_migrations); | ||
1049 | if (task_hot(p, old_rq->clock, NULL)) | ||
1050 | schedstat_inc(p, se.nr_forced2_migrations); | ||
1051 | } | ||
1026 | #endif | 1052 | #endif |
1053 | p->se.vruntime -= old_cfsrq->min_vruntime - | ||
1054 | new_cfsrq->min_vruntime; | ||
1027 | 1055 | ||
1028 | __set_task_cpu(p, new_cpu); | 1056 | __set_task_cpu(p, new_cpu); |
1029 | } | 1057 | } |
@@ -1078,69 +1106,71 @@ void wait_task_inactive(struct task_struct *p) | |||
1078 | int running, on_rq; | 1106 | int running, on_rq; |
1079 | struct rq *rq; | 1107 | struct rq *rq; |
1080 | 1108 | ||
1081 | repeat: | 1109 | for (;;) { |
1082 | /* | 1110 | /* |
1083 | * We do the initial early heuristics without holding | 1111 | * We do the initial early heuristics without holding |
1084 | * any task-queue locks at all. We'll only try to get | 1112 | * any task-queue locks at all. We'll only try to get |
1085 | * the runqueue lock when things look like they will | 1113 | * the runqueue lock when things look like they will |
1086 | * work out! | 1114 | * work out! |
1087 | */ | 1115 | */ |
1088 | rq = task_rq(p); | 1116 | rq = task_rq(p); |
1089 | 1117 | ||
1090 | /* | 1118 | /* |
1091 | * If the task is actively running on another CPU | 1119 | * If the task is actively running on another CPU |
1092 | * still, just relax and busy-wait without holding | 1120 | * still, just relax and busy-wait without holding |
1093 | * any locks. | 1121 | * any locks. |
1094 | * | 1122 | * |
1095 | * NOTE! Since we don't hold any locks, it's not | 1123 | * NOTE! Since we don't hold any locks, it's not |
1096 | * even sure that "rq" stays as the right runqueue! | 1124 | * even sure that "rq" stays as the right runqueue! |
1097 | * But we don't care, since "task_running()" will | 1125 | * But we don't care, since "task_running()" will |
1098 | * return false if the runqueue has changed and p | 1126 | * return false if the runqueue has changed and p |
1099 | * is actually now running somewhere else! | 1127 | * is actually now running somewhere else! |
1100 | */ | 1128 | */ |
1101 | while (task_running(rq, p)) | 1129 | while (task_running(rq, p)) |
1102 | cpu_relax(); | 1130 | cpu_relax(); |
1103 | 1131 | ||
1104 | /* | 1132 | /* |
1105 | * Ok, time to look more closely! We need the rq | 1133 | * Ok, time to look more closely! We need the rq |
1106 | * lock now, to be *sure*. If we're wrong, we'll | 1134 | * lock now, to be *sure*. If we're wrong, we'll |
1107 | * just go back and repeat. | 1135 | * just go back and repeat. |
1108 | */ | 1136 | */ |
1109 | rq = task_rq_lock(p, &flags); | 1137 | rq = task_rq_lock(p, &flags); |
1110 | running = task_running(rq, p); | 1138 | running = task_running(rq, p); |
1111 | on_rq = p->se.on_rq; | 1139 | on_rq = p->se.on_rq; |
1112 | task_rq_unlock(rq, &flags); | 1140 | task_rq_unlock(rq, &flags); |
1113 | 1141 | ||
1114 | /* | 1142 | /* |
1115 | * Was it really running after all now that we | 1143 | * Was it really running after all now that we |
1116 | * checked with the proper locks actually held? | 1144 | * checked with the proper locks actually held? |
1117 | * | 1145 | * |
1118 | * Oops. Go back and try again.. | 1146 | * Oops. Go back and try again.. |
1119 | */ | 1147 | */ |
1120 | if (unlikely(running)) { | 1148 | if (unlikely(running)) { |
1121 | cpu_relax(); | 1149 | cpu_relax(); |
1122 | goto repeat; | 1150 | continue; |
1123 | } | 1151 | } |
1124 | 1152 | ||
1125 | /* | 1153 | /* |
1126 | * It's not enough that it's not actively running, | 1154 | * It's not enough that it's not actively running, |
1127 | * it must be off the runqueue _entirely_, and not | 1155 | * it must be off the runqueue _entirely_, and not |
1128 | * preempted! | 1156 | * preempted! |
1129 | * | 1157 | * |
1130 | * So if it wa still runnable (but just not actively | 1158 | * So if it wa still runnable (but just not actively |
1131 | * running right now), it's preempted, and we should | 1159 | * running right now), it's preempted, and we should |
1132 | * yield - it could be a while. | 1160 | * yield - it could be a while. |
1133 | */ | 1161 | */ |
1134 | if (unlikely(on_rq)) { | 1162 | if (unlikely(on_rq)) { |
1135 | yield(); | 1163 | schedule_timeout_uninterruptible(1); |
1136 | goto repeat; | 1164 | continue; |
1137 | } | 1165 | } |
1138 | 1166 | ||
1139 | /* | 1167 | /* |
1140 | * Ahh, all good. It wasn't running, and it wasn't | 1168 | * Ahh, all good. It wasn't running, and it wasn't |
1141 | * runnable, which means that it will never become | 1169 | * runnable, which means that it will never become |
1142 | * running in the future either. We're all done! | 1170 | * running in the future either. We're all done! |
1143 | */ | 1171 | */ |
1172 | break; | ||
1173 | } | ||
1144 | } | 1174 | } |
1145 | 1175 | ||
1146 | /*** | 1176 | /*** |
@@ -1174,7 +1204,7 @@ void kick_process(struct task_struct *p) | |||
1174 | * We want to under-estimate the load of migration sources, to | 1204 | * We want to under-estimate the load of migration sources, to |
1175 | * balance conservatively. | 1205 | * balance conservatively. |
1176 | */ | 1206 | */ |
1177 | static inline unsigned long source_load(int cpu, int type) | 1207 | static unsigned long source_load(int cpu, int type) |
1178 | { | 1208 | { |
1179 | struct rq *rq = cpu_rq(cpu); | 1209 | struct rq *rq = cpu_rq(cpu); |
1180 | unsigned long total = weighted_cpuload(cpu); | 1210 | unsigned long total = weighted_cpuload(cpu); |
@@ -1189,7 +1219,7 @@ static inline unsigned long source_load(int cpu, int type) | |||
1189 | * Return a high guess at the load of a migration-target cpu weighted | 1219 | * Return a high guess at the load of a migration-target cpu weighted |
1190 | * according to the scheduling class and "nice" value. | 1220 | * according to the scheduling class and "nice" value. |
1191 | */ | 1221 | */ |
1192 | static inline unsigned long target_load(int cpu, int type) | 1222 | static unsigned long target_load(int cpu, int type) |
1193 | { | 1223 | { |
1194 | struct rq *rq = cpu_rq(cpu); | 1224 | struct rq *rq = cpu_rq(cpu); |
1195 | unsigned long total = weighted_cpuload(cpu); | 1225 | unsigned long total = weighted_cpuload(cpu); |
@@ -1231,7 +1261,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
1231 | 1261 | ||
1232 | /* Skip over this group if it has no CPUs allowed */ | 1262 | /* Skip over this group if it has no CPUs allowed */ |
1233 | if (!cpus_intersects(group->cpumask, p->cpus_allowed)) | 1263 | if (!cpus_intersects(group->cpumask, p->cpus_allowed)) |
1234 | goto nextgroup; | 1264 | continue; |
1235 | 1265 | ||
1236 | local_group = cpu_isset(this_cpu, group->cpumask); | 1266 | local_group = cpu_isset(this_cpu, group->cpumask); |
1237 | 1267 | ||
@@ -1259,9 +1289,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | |||
1259 | min_load = avg_load; | 1289 | min_load = avg_load; |
1260 | idlest = group; | 1290 | idlest = group; |
1261 | } | 1291 | } |
1262 | nextgroup: | 1292 | } while (group = group->next, group != sd->groups); |
1263 | group = group->next; | ||
1264 | } while (group != sd->groups); | ||
1265 | 1293 | ||
1266 | if (!idlest || 100*this_load < imbalance*min_load) | 1294 | if (!idlest || 100*this_load < imbalance*min_load) |
1267 | return NULL; | 1295 | return NULL; |
@@ -1393,8 +1421,13 @@ static int wake_idle(int cpu, struct task_struct *p) | |||
1393 | if (sd->flags & SD_WAKE_IDLE) { | 1421 | if (sd->flags & SD_WAKE_IDLE) { |
1394 | cpus_and(tmp, sd->span, p->cpus_allowed); | 1422 | cpus_and(tmp, sd->span, p->cpus_allowed); |
1395 | for_each_cpu_mask(i, tmp) { | 1423 | for_each_cpu_mask(i, tmp) { |
1396 | if (idle_cpu(i)) | 1424 | if (idle_cpu(i)) { |
1425 | if (i != task_cpu(p)) { | ||
1426 | schedstat_inc(p, | ||
1427 | se.nr_wakeups_idle); | ||
1428 | } | ||
1397 | return i; | 1429 | return i; |
1430 | } | ||
1398 | } | 1431 | } |
1399 | } else { | 1432 | } else { |
1400 | break; | 1433 | break; |
@@ -1425,7 +1458,7 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
1425 | */ | 1458 | */ |
1426 | static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | 1459 | static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) |
1427 | { | 1460 | { |
1428 | int cpu, this_cpu, success = 0; | 1461 | int cpu, orig_cpu, this_cpu, success = 0; |
1429 | unsigned long flags; | 1462 | unsigned long flags; |
1430 | long old_state; | 1463 | long old_state; |
1431 | struct rq *rq; | 1464 | struct rq *rq; |
@@ -1444,6 +1477,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1444 | goto out_running; | 1477 | goto out_running; |
1445 | 1478 | ||
1446 | cpu = task_cpu(p); | 1479 | cpu = task_cpu(p); |
1480 | orig_cpu = cpu; | ||
1447 | this_cpu = smp_processor_id(); | 1481 | this_cpu = smp_processor_id(); |
1448 | 1482 | ||
1449 | #ifdef CONFIG_SMP | 1483 | #ifdef CONFIG_SMP |
@@ -1452,7 +1486,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1452 | 1486 | ||
1453 | new_cpu = cpu; | 1487 | new_cpu = cpu; |
1454 | 1488 | ||
1455 | schedstat_inc(rq, ttwu_cnt); | 1489 | schedstat_inc(rq, ttwu_count); |
1456 | if (cpu == this_cpu) { | 1490 | if (cpu == this_cpu) { |
1457 | schedstat_inc(rq, ttwu_local); | 1491 | schedstat_inc(rq, ttwu_local); |
1458 | goto out_set_cpu; | 1492 | goto out_set_cpu; |
@@ -1487,6 +1521,13 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1487 | unsigned long tl = this_load; | 1521 | unsigned long tl = this_load; |
1488 | unsigned long tl_per_task; | 1522 | unsigned long tl_per_task; |
1489 | 1523 | ||
1524 | /* | ||
1525 | * Attract cache-cold tasks on sync wakeups: | ||
1526 | */ | ||
1527 | if (sync && !task_hot(p, rq->clock, this_sd)) | ||
1528 | goto out_set_cpu; | ||
1529 | |||
1530 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
1490 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1531 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
1491 | 1532 | ||
1492 | /* | 1533 | /* |
@@ -1506,6 +1547,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1506 | * there is no bad imbalance. | 1547 | * there is no bad imbalance. |
1507 | */ | 1548 | */ |
1508 | schedstat_inc(this_sd, ttwu_move_affine); | 1549 | schedstat_inc(this_sd, ttwu_move_affine); |
1550 | schedstat_inc(p, se.nr_wakeups_affine); | ||
1509 | goto out_set_cpu; | 1551 | goto out_set_cpu; |
1510 | } | 1552 | } |
1511 | } | 1553 | } |
@@ -1517,6 +1559,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
1517 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1559 | if (this_sd->flags & SD_WAKE_BALANCE) { |
1518 | if (imbalance*this_load <= 100*load) { | 1560 | if (imbalance*this_load <= 100*load) { |
1519 | schedstat_inc(this_sd, ttwu_move_balance); | 1561 | schedstat_inc(this_sd, ttwu_move_balance); |
1562 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1520 | goto out_set_cpu; | 1563 | goto out_set_cpu; |
1521 | } | 1564 | } |
1522 | } | 1565 | } |
@@ -1542,18 +1585,18 @@ out_set_cpu: | |||
1542 | 1585 | ||
1543 | out_activate: | 1586 | out_activate: |
1544 | #endif /* CONFIG_SMP */ | 1587 | #endif /* CONFIG_SMP */ |
1588 | schedstat_inc(p, se.nr_wakeups); | ||
1589 | if (sync) | ||
1590 | schedstat_inc(p, se.nr_wakeups_sync); | ||
1591 | if (orig_cpu != cpu) | ||
1592 | schedstat_inc(p, se.nr_wakeups_migrate); | ||
1593 | if (cpu == this_cpu) | ||
1594 | schedstat_inc(p, se.nr_wakeups_local); | ||
1595 | else | ||
1596 | schedstat_inc(p, se.nr_wakeups_remote); | ||
1545 | update_rq_clock(rq); | 1597 | update_rq_clock(rq); |
1546 | activate_task(rq, p, 1); | 1598 | activate_task(rq, p, 1); |
1547 | /* | 1599 | check_preempt_curr(rq, p); |
1548 | * Sync wakeups (i.e. those types of wakeups where the waker | ||
1549 | * has indicated that it will leave the CPU in short order) | ||
1550 | * don't trigger a preemption, if the woken up task will run on | ||
1551 | * this cpu. (in this case the 'I will reschedule' promise of | ||
1552 | * the waker guarantees that the freshly woken up task is going | ||
1553 | * to be considered on this CPU.) | ||
1554 | */ | ||
1555 | if (!sync || cpu != this_cpu) | ||
1556 | check_preempt_curr(rq, p); | ||
1557 | success = 1; | 1600 | success = 1; |
1558 | 1601 | ||
1559 | out_running: | 1602 | out_running: |
@@ -1584,28 +1627,20 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state) | |||
1584 | */ | 1627 | */ |
1585 | static void __sched_fork(struct task_struct *p) | 1628 | static void __sched_fork(struct task_struct *p) |
1586 | { | 1629 | { |
1587 | p->se.wait_start_fair = 0; | ||
1588 | p->se.exec_start = 0; | 1630 | p->se.exec_start = 0; |
1589 | p->se.sum_exec_runtime = 0; | 1631 | p->se.sum_exec_runtime = 0; |
1590 | p->se.prev_sum_exec_runtime = 0; | 1632 | p->se.prev_sum_exec_runtime = 0; |
1591 | p->se.delta_exec = 0; | ||
1592 | p->se.delta_fair_run = 0; | ||
1593 | p->se.delta_fair_sleep = 0; | ||
1594 | p->se.wait_runtime = 0; | ||
1595 | p->se.sleep_start_fair = 0; | ||
1596 | 1633 | ||
1597 | #ifdef CONFIG_SCHEDSTATS | 1634 | #ifdef CONFIG_SCHEDSTATS |
1598 | p->se.wait_start = 0; | 1635 | p->se.wait_start = 0; |
1599 | p->se.sum_wait_runtime = 0; | ||
1600 | p->se.sum_sleep_runtime = 0; | 1636 | p->se.sum_sleep_runtime = 0; |
1601 | p->se.sleep_start = 0; | 1637 | p->se.sleep_start = 0; |
1602 | p->se.block_start = 0; | 1638 | p->se.block_start = 0; |
1603 | p->se.sleep_max = 0; | 1639 | p->se.sleep_max = 0; |
1604 | p->se.block_max = 0; | 1640 | p->se.block_max = 0; |
1605 | p->se.exec_max = 0; | 1641 | p->se.exec_max = 0; |
1642 | p->se.slice_max = 0; | ||
1606 | p->se.wait_max = 0; | 1643 | p->se.wait_max = 0; |
1607 | p->se.wait_runtime_overruns = 0; | ||
1608 | p->se.wait_runtime_underruns = 0; | ||
1609 | #endif | 1644 | #endif |
1610 | 1645 | ||
1611 | INIT_LIST_HEAD(&p->run_list); | 1646 | INIT_LIST_HEAD(&p->run_list); |
@@ -1636,12 +1671,14 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
1636 | #ifdef CONFIG_SMP | 1671 | #ifdef CONFIG_SMP |
1637 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | 1672 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); |
1638 | #endif | 1673 | #endif |
1639 | __set_task_cpu(p, cpu); | 1674 | set_task_cpu(p, cpu); |
1640 | 1675 | ||
1641 | /* | 1676 | /* |
1642 | * Make sure we do not leak PI boosting priority to the child: | 1677 | * Make sure we do not leak PI boosting priority to the child: |
1643 | */ | 1678 | */ |
1644 | p->prio = current->normal_prio; | 1679 | p->prio = current->normal_prio; |
1680 | if (!rt_prio(p->prio)) | ||
1681 | p->sched_class = &fair_sched_class; | ||
1645 | 1682 | ||
1646 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 1683 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1647 | if (likely(sched_info_on())) | 1684 | if (likely(sched_info_on())) |
@@ -1658,12 +1695,6 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
1658 | } | 1695 | } |
1659 | 1696 | ||
1660 | /* | 1697 | /* |
1661 | * After fork, child runs first. (default) If set to 0 then | ||
1662 | * parent will (try to) run first. | ||
1663 | */ | ||
1664 | unsigned int __read_mostly sysctl_sched_child_runs_first = 1; | ||
1665 | |||
1666 | /* | ||
1667 | * wake_up_new_task - wake up a newly created task for the first time. | 1698 | * wake_up_new_task - wake up a newly created task for the first time. |
1668 | * | 1699 | * |
1669 | * This function will do some initial scheduler statistics housekeeping | 1700 | * This function will do some initial scheduler statistics housekeeping |
@@ -1674,24 +1705,14 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
1674 | { | 1705 | { |
1675 | unsigned long flags; | 1706 | unsigned long flags; |
1676 | struct rq *rq; | 1707 | struct rq *rq; |
1677 | int this_cpu; | ||
1678 | 1708 | ||
1679 | rq = task_rq_lock(p, &flags); | 1709 | rq = task_rq_lock(p, &flags); |
1680 | BUG_ON(p->state != TASK_RUNNING); | 1710 | BUG_ON(p->state != TASK_RUNNING); |
1681 | this_cpu = smp_processor_id(); /* parent's CPU */ | ||
1682 | update_rq_clock(rq); | 1711 | update_rq_clock(rq); |
1683 | 1712 | ||
1684 | p->prio = effective_prio(p); | 1713 | p->prio = effective_prio(p); |
1685 | 1714 | ||
1686 | if (rt_prio(p->prio)) | 1715 | if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) { |
1687 | p->sched_class = &rt_sched_class; | ||
1688 | else | ||
1689 | p->sched_class = &fair_sched_class; | ||
1690 | |||
1691 | if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || | ||
1692 | (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || | ||
1693 | !current->se.on_rq) { | ||
1694 | |||
1695 | activate_task(rq, p, 0); | 1716 | activate_task(rq, p, 0); |
1696 | } else { | 1717 | } else { |
1697 | /* | 1718 | /* |
@@ -1800,7 +1821,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, | |||
1800 | * with the lock held can cause deadlocks; see schedule() for | 1821 | * with the lock held can cause deadlocks; see schedule() for |
1801 | * details.) | 1822 | * details.) |
1802 | */ | 1823 | */ |
1803 | static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) | 1824 | static void finish_task_switch(struct rq *rq, struct task_struct *prev) |
1804 | __releases(rq->lock) | 1825 | __releases(rq->lock) |
1805 | { | 1826 | { |
1806 | struct mm_struct *mm = rq->prev_mm; | 1827 | struct mm_struct *mm = rq->prev_mm; |
@@ -1982,42 +2003,10 @@ unsigned long nr_active(void) | |||
1982 | */ | 2003 | */ |
1983 | static void update_cpu_load(struct rq *this_rq) | 2004 | static void update_cpu_load(struct rq *this_rq) |
1984 | { | 2005 | { |
1985 | u64 fair_delta64, exec_delta64, idle_delta64, sample_interval64, tmp64; | 2006 | unsigned long this_load = this_rq->load.weight; |
1986 | unsigned long total_load = this_rq->ls.load.weight; | ||
1987 | unsigned long this_load = total_load; | ||
1988 | struct load_stat *ls = &this_rq->ls; | ||
1989 | int i, scale; | 2007 | int i, scale; |
1990 | 2008 | ||
1991 | this_rq->nr_load_updates++; | 2009 | this_rq->nr_load_updates++; |
1992 | if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD))) | ||
1993 | goto do_avg; | ||
1994 | |||
1995 | /* Update delta_fair/delta_exec fields first */ | ||
1996 | update_curr_load(this_rq); | ||
1997 | |||
1998 | fair_delta64 = ls->delta_fair + 1; | ||
1999 | ls->delta_fair = 0; | ||
2000 | |||
2001 | exec_delta64 = ls->delta_exec + 1; | ||
2002 | ls->delta_exec = 0; | ||
2003 | |||
2004 | sample_interval64 = this_rq->clock - ls->load_update_last; | ||
2005 | ls->load_update_last = this_rq->clock; | ||
2006 | |||
2007 | if ((s64)sample_interval64 < (s64)TICK_NSEC) | ||
2008 | sample_interval64 = TICK_NSEC; | ||
2009 | |||
2010 | if (exec_delta64 > sample_interval64) | ||
2011 | exec_delta64 = sample_interval64; | ||
2012 | |||
2013 | idle_delta64 = sample_interval64 - exec_delta64; | ||
2014 | |||
2015 | tmp64 = div64_64(SCHED_LOAD_SCALE * exec_delta64, fair_delta64); | ||
2016 | tmp64 = div64_64(tmp64 * exec_delta64, sample_interval64); | ||
2017 | |||
2018 | this_load = (unsigned long)tmp64; | ||
2019 | |||
2020 | do_avg: | ||
2021 | 2010 | ||
2022 | /* Update our load: */ | 2011 | /* Update our load: */ |
2023 | for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { | 2012 | for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) { |
@@ -2027,7 +2016,13 @@ do_avg: | |||
2027 | 2016 | ||
2028 | old_load = this_rq->cpu_load[i]; | 2017 | old_load = this_rq->cpu_load[i]; |
2029 | new_load = this_load; | 2018 | new_load = this_load; |
2030 | 2019 | /* | |
2020 | * Round up the averaging division if load is increasing. This | ||
2021 | * prevents us from getting stuck on 9 if the load is 10, for | ||
2022 | * example. | ||
2023 | */ | ||
2024 | if (new_load > old_load) | ||
2025 | new_load += scale-1; | ||
2031 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | 2026 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; |
2032 | } | 2027 | } |
2033 | } | 2028 | } |
@@ -2179,13 +2174,38 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
2179 | * 2) cannot be migrated to this CPU due to cpus_allowed, or | 2174 | * 2) cannot be migrated to this CPU due to cpus_allowed, or |
2180 | * 3) are cache-hot on their current CPU. | 2175 | * 3) are cache-hot on their current CPU. |
2181 | */ | 2176 | */ |
2182 | if (!cpu_isset(this_cpu, p->cpus_allowed)) | 2177 | if (!cpu_isset(this_cpu, p->cpus_allowed)) { |
2178 | schedstat_inc(p, se.nr_failed_migrations_affine); | ||
2183 | return 0; | 2179 | return 0; |
2180 | } | ||
2184 | *all_pinned = 0; | 2181 | *all_pinned = 0; |
2185 | 2182 | ||
2186 | if (task_running(rq, p)) | 2183 | if (task_running(rq, p)) { |
2184 | schedstat_inc(p, se.nr_failed_migrations_running); | ||
2187 | return 0; | 2185 | return 0; |
2186 | } | ||
2187 | |||
2188 | /* | ||
2189 | * Aggressive migration if: | ||
2190 | * 1) task is cache cold, or | ||
2191 | * 2) too many balance attempts have failed. | ||
2192 | */ | ||
2193 | |||
2194 | if (!task_hot(p, rq->clock, sd) || | ||
2195 | sd->nr_balance_failed > sd->cache_nice_tries) { | ||
2196 | #ifdef CONFIG_SCHEDSTATS | ||
2197 | if (task_hot(p, rq->clock, sd)) { | ||
2198 | schedstat_inc(sd, lb_hot_gained[idle]); | ||
2199 | schedstat_inc(p, se.nr_forced_migrations); | ||
2200 | } | ||
2201 | #endif | ||
2202 | return 1; | ||
2203 | } | ||
2188 | 2204 | ||
2205 | if (task_hot(p, rq->clock, sd)) { | ||
2206 | schedstat_inc(p, se.nr_failed_migrations_hot); | ||
2207 | return 0; | ||
2208 | } | ||
2189 | return 1; | 2209 | return 1; |
2190 | } | 2210 | } |
2191 | 2211 | ||
@@ -2264,7 +2284,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2264 | struct sched_domain *sd, enum cpu_idle_type idle, | 2284 | struct sched_domain *sd, enum cpu_idle_type idle, |
2265 | int *all_pinned) | 2285 | int *all_pinned) |
2266 | { | 2286 | { |
2267 | struct sched_class *class = sched_class_highest; | 2287 | const struct sched_class *class = sched_class_highest; |
2268 | unsigned long total_load_moved = 0; | 2288 | unsigned long total_load_moved = 0; |
2269 | int this_best_prio = this_rq->curr->prio; | 2289 | int this_best_prio = this_rq->curr->prio; |
2270 | 2290 | ||
@@ -2289,7 +2309,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2289 | static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2309 | static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, |
2290 | struct sched_domain *sd, enum cpu_idle_type idle) | 2310 | struct sched_domain *sd, enum cpu_idle_type idle) |
2291 | { | 2311 | { |
2292 | struct sched_class *class; | 2312 | const struct sched_class *class; |
2293 | int this_best_prio = MAX_PRIO; | 2313 | int this_best_prio = MAX_PRIO; |
2294 | 2314 | ||
2295 | for (class = sched_class_highest; class; class = class->next) | 2315 | for (class = sched_class_highest; class; class = class->next) |
@@ -2653,7 +2673,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2653 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2673 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2654 | sd_idle = 1; | 2674 | sd_idle = 1; |
2655 | 2675 | ||
2656 | schedstat_inc(sd, lb_cnt[idle]); | 2676 | schedstat_inc(sd, lb_count[idle]); |
2657 | 2677 | ||
2658 | redo: | 2678 | redo: |
2659 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 2679 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, |
@@ -2806,7 +2826,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2806 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2826 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2807 | sd_idle = 1; | 2827 | sd_idle = 1; |
2808 | 2828 | ||
2809 | schedstat_inc(sd, lb_cnt[CPU_NEWLY_IDLE]); | 2829 | schedstat_inc(sd, lb_count[CPU_NEWLY_IDLE]); |
2810 | redo: | 2830 | redo: |
2811 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, | 2831 | group = find_busiest_group(sd, this_cpu, &imbalance, CPU_NEWLY_IDLE, |
2812 | &sd_idle, &cpus, NULL); | 2832 | &sd_idle, &cpus, NULL); |
@@ -2940,7 +2960,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
2940 | } | 2960 | } |
2941 | 2961 | ||
2942 | if (likely(sd)) { | 2962 | if (likely(sd)) { |
2943 | schedstat_inc(sd, alb_cnt); | 2963 | schedstat_inc(sd, alb_count); |
2944 | 2964 | ||
2945 | if (move_one_task(target_rq, target_cpu, busiest_rq, | 2965 | if (move_one_task(target_rq, target_cpu, busiest_rq, |
2946 | sd, CPU_IDLE)) | 2966 | sd, CPU_IDLE)) |
@@ -3033,7 +3053,7 @@ static DEFINE_SPINLOCK(balancing); | |||
3033 | * | 3053 | * |
3034 | * Balancing parameters are set up in arch_init_sched_domains. | 3054 | * Balancing parameters are set up in arch_init_sched_domains. |
3035 | */ | 3055 | */ |
3036 | static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | 3056 | static void rebalance_domains(int cpu, enum cpu_idle_type idle) |
3037 | { | 3057 | { |
3038 | int balance = 1; | 3058 | int balance = 1; |
3039 | struct rq *rq = cpu_rq(cpu); | 3059 | struct rq *rq = cpu_rq(cpu); |
@@ -3280,6 +3300,25 @@ void account_user_time(struct task_struct *p, cputime_t cputime) | |||
3280 | } | 3300 | } |
3281 | 3301 | ||
3282 | /* | 3302 | /* |
3303 | * Account guest cpu time to a process. | ||
3304 | * @p: the process that the cpu time gets accounted to | ||
3305 | * @cputime: the cpu time spent in virtual machine since the last update | ||
3306 | */ | ||
3307 | void account_guest_time(struct task_struct *p, cputime_t cputime) | ||
3308 | { | ||
3309 | cputime64_t tmp; | ||
3310 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | ||
3311 | |||
3312 | tmp = cputime_to_cputime64(cputime); | ||
3313 | |||
3314 | p->utime = cputime_add(p->utime, cputime); | ||
3315 | p->gtime = cputime_add(p->gtime, cputime); | ||
3316 | |||
3317 | cpustat->user = cputime64_add(cpustat->user, tmp); | ||
3318 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | ||
3319 | } | ||
3320 | |||
3321 | /* | ||
3283 | * Account system cpu time to a process. | 3322 | * Account system cpu time to a process. |
3284 | * @p: the process that the cpu time gets accounted to | 3323 | * @p: the process that the cpu time gets accounted to |
3285 | * @hardirq_offset: the offset to subtract from hardirq_count() | 3324 | * @hardirq_offset: the offset to subtract from hardirq_count() |
@@ -3292,6 +3331,12 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3292 | struct rq *rq = this_rq(); | 3331 | struct rq *rq = this_rq(); |
3293 | cputime64_t tmp; | 3332 | cputime64_t tmp; |
3294 | 3333 | ||
3334 | if (p->flags & PF_VCPU) { | ||
3335 | account_guest_time(p, cputime); | ||
3336 | p->flags &= ~PF_VCPU; | ||
3337 | return; | ||
3338 | } | ||
3339 | |||
3295 | p->stime = cputime_add(p->stime, cputime); | 3340 | p->stime = cputime_add(p->stime, cputime); |
3296 | 3341 | ||
3297 | /* Add system time to cpustat. */ | 3342 | /* Add system time to cpustat. */ |
@@ -3430,7 +3475,13 @@ static inline void schedule_debug(struct task_struct *prev) | |||
3430 | 3475 | ||
3431 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 3476 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
3432 | 3477 | ||
3433 | schedstat_inc(this_rq(), sched_cnt); | 3478 | schedstat_inc(this_rq(), sched_count); |
3479 | #ifdef CONFIG_SCHEDSTATS | ||
3480 | if (unlikely(prev->lock_depth >= 0)) { | ||
3481 | schedstat_inc(this_rq(), bkl_count); | ||
3482 | schedstat_inc(prev, sched_info.bkl_count); | ||
3483 | } | ||
3484 | #endif | ||
3434 | } | 3485 | } |
3435 | 3486 | ||
3436 | /* | 3487 | /* |
@@ -3439,7 +3490,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
3439 | static inline struct task_struct * | 3490 | static inline struct task_struct * |
3440 | pick_next_task(struct rq *rq, struct task_struct *prev) | 3491 | pick_next_task(struct rq *rq, struct task_struct *prev) |
3441 | { | 3492 | { |
3442 | struct sched_class *class; | 3493 | const struct sched_class *class; |
3443 | struct task_struct *p; | 3494 | struct task_struct *p; |
3444 | 3495 | ||
3445 | /* | 3496 | /* |
@@ -3488,9 +3539,13 @@ need_resched_nonpreemptible: | |||
3488 | 3539 | ||
3489 | schedule_debug(prev); | 3540 | schedule_debug(prev); |
3490 | 3541 | ||
3491 | spin_lock_irq(&rq->lock); | 3542 | /* |
3492 | clear_tsk_need_resched(prev); | 3543 | * Do the rq-clock update outside the rq lock: |
3544 | */ | ||
3545 | local_irq_disable(); | ||
3493 | __update_rq_clock(rq); | 3546 | __update_rq_clock(rq); |
3547 | spin_lock(&rq->lock); | ||
3548 | clear_tsk_need_resched(prev); | ||
3494 | 3549 | ||
3495 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 3550 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
3496 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && | 3551 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && |
@@ -3550,27 +3605,30 @@ asmlinkage void __sched preempt_schedule(void) | |||
3550 | if (likely(ti->preempt_count || irqs_disabled())) | 3605 | if (likely(ti->preempt_count || irqs_disabled())) |
3551 | return; | 3606 | return; |
3552 | 3607 | ||
3553 | need_resched: | 3608 | do { |
3554 | add_preempt_count(PREEMPT_ACTIVE); | 3609 | add_preempt_count(PREEMPT_ACTIVE); |
3555 | /* | 3610 | |
3556 | * We keep the big kernel semaphore locked, but we | 3611 | /* |
3557 | * clear ->lock_depth so that schedule() doesnt | 3612 | * We keep the big kernel semaphore locked, but we |
3558 | * auto-release the semaphore: | 3613 | * clear ->lock_depth so that schedule() doesnt |
3559 | */ | 3614 | * auto-release the semaphore: |
3615 | */ | ||
3560 | #ifdef CONFIG_PREEMPT_BKL | 3616 | #ifdef CONFIG_PREEMPT_BKL |
3561 | saved_lock_depth = task->lock_depth; | 3617 | saved_lock_depth = task->lock_depth; |
3562 | task->lock_depth = -1; | 3618 | task->lock_depth = -1; |
3563 | #endif | 3619 | #endif |
3564 | schedule(); | 3620 | schedule(); |
3565 | #ifdef CONFIG_PREEMPT_BKL | 3621 | #ifdef CONFIG_PREEMPT_BKL |
3566 | task->lock_depth = saved_lock_depth; | 3622 | task->lock_depth = saved_lock_depth; |
3567 | #endif | 3623 | #endif |
3568 | sub_preempt_count(PREEMPT_ACTIVE); | 3624 | sub_preempt_count(PREEMPT_ACTIVE); |
3569 | 3625 | ||
3570 | /* we could miss a preemption opportunity between schedule and now */ | 3626 | /* |
3571 | barrier(); | 3627 | * Check again in case we missed a preemption opportunity |
3572 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 3628 | * between schedule and now. |
3573 | goto need_resched; | 3629 | */ |
3630 | barrier(); | ||
3631 | } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); | ||
3574 | } | 3632 | } |
3575 | EXPORT_SYMBOL(preempt_schedule); | 3633 | EXPORT_SYMBOL(preempt_schedule); |
3576 | 3634 | ||
@@ -3590,29 +3648,32 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
3590 | /* Catch callers which need to be fixed */ | 3648 | /* Catch callers which need to be fixed */ |
3591 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 3649 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
3592 | 3650 | ||
3593 | need_resched: | 3651 | do { |
3594 | add_preempt_count(PREEMPT_ACTIVE); | 3652 | add_preempt_count(PREEMPT_ACTIVE); |
3595 | /* | 3653 | |
3596 | * We keep the big kernel semaphore locked, but we | 3654 | /* |
3597 | * clear ->lock_depth so that schedule() doesnt | 3655 | * We keep the big kernel semaphore locked, but we |
3598 | * auto-release the semaphore: | 3656 | * clear ->lock_depth so that schedule() doesnt |
3599 | */ | 3657 | * auto-release the semaphore: |
3658 | */ | ||
3600 | #ifdef CONFIG_PREEMPT_BKL | 3659 | #ifdef CONFIG_PREEMPT_BKL |
3601 | saved_lock_depth = task->lock_depth; | 3660 | saved_lock_depth = task->lock_depth; |
3602 | task->lock_depth = -1; | 3661 | task->lock_depth = -1; |
3603 | #endif | 3662 | #endif |
3604 | local_irq_enable(); | 3663 | local_irq_enable(); |
3605 | schedule(); | 3664 | schedule(); |
3606 | local_irq_disable(); | 3665 | local_irq_disable(); |
3607 | #ifdef CONFIG_PREEMPT_BKL | 3666 | #ifdef CONFIG_PREEMPT_BKL |
3608 | task->lock_depth = saved_lock_depth; | 3667 | task->lock_depth = saved_lock_depth; |
3609 | #endif | 3668 | #endif |
3610 | sub_preempt_count(PREEMPT_ACTIVE); | 3669 | sub_preempt_count(PREEMPT_ACTIVE); |
3611 | 3670 | ||
3612 | /* we could miss a preemption opportunity between schedule and now */ | 3671 | /* |
3613 | barrier(); | 3672 | * Check again in case we missed a preemption opportunity |
3614 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 3673 | * between schedule and now. |
3615 | goto need_resched; | 3674 | */ |
3675 | barrier(); | ||
3676 | } while (unlikely(test_thread_flag(TIF_NEED_RESCHED))); | ||
3616 | } | 3677 | } |
3617 | 3678 | ||
3618 | #endif /* CONFIG_PREEMPT */ | 3679 | #endif /* CONFIG_PREEMPT */ |
@@ -3636,10 +3697,9 @@ EXPORT_SYMBOL(default_wake_function); | |||
3636 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 3697 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
3637 | int nr_exclusive, int sync, void *key) | 3698 | int nr_exclusive, int sync, void *key) |
3638 | { | 3699 | { |
3639 | struct list_head *tmp, *next; | 3700 | wait_queue_t *curr, *next; |
3640 | 3701 | ||
3641 | list_for_each_safe(tmp, next, &q->task_list) { | 3702 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { |
3642 | wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); | ||
3643 | unsigned flags = curr->flags; | 3703 | unsigned flags = curr->flags; |
3644 | 3704 | ||
3645 | if (curr->func(curr, mode, sync, key) && | 3705 | if (curr->func(curr, mode, sync, key) && |
@@ -3729,206 +3789,116 @@ void fastcall complete_all(struct completion *x) | |||
3729 | } | 3789 | } |
3730 | EXPORT_SYMBOL(complete_all); | 3790 | EXPORT_SYMBOL(complete_all); |
3731 | 3791 | ||
3732 | void fastcall __sched wait_for_completion(struct completion *x) | 3792 | static inline long __sched |
3733 | { | 3793 | do_wait_for_common(struct completion *x, long timeout, int state) |
3734 | might_sleep(); | ||
3735 | |||
3736 | spin_lock_irq(&x->wait.lock); | ||
3737 | if (!x->done) { | ||
3738 | DECLARE_WAITQUEUE(wait, current); | ||
3739 | |||
3740 | wait.flags |= WQ_FLAG_EXCLUSIVE; | ||
3741 | __add_wait_queue_tail(&x->wait, &wait); | ||
3742 | do { | ||
3743 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
3744 | spin_unlock_irq(&x->wait.lock); | ||
3745 | schedule(); | ||
3746 | spin_lock_irq(&x->wait.lock); | ||
3747 | } while (!x->done); | ||
3748 | __remove_wait_queue(&x->wait, &wait); | ||
3749 | } | ||
3750 | x->done--; | ||
3751 | spin_unlock_irq(&x->wait.lock); | ||
3752 | } | ||
3753 | EXPORT_SYMBOL(wait_for_completion); | ||
3754 | |||
3755 | unsigned long fastcall __sched | ||
3756 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) | ||
3757 | { | 3794 | { |
3758 | might_sleep(); | ||
3759 | |||
3760 | spin_lock_irq(&x->wait.lock); | ||
3761 | if (!x->done) { | 3795 | if (!x->done) { |
3762 | DECLARE_WAITQUEUE(wait, current); | 3796 | DECLARE_WAITQUEUE(wait, current); |
3763 | 3797 | ||
3764 | wait.flags |= WQ_FLAG_EXCLUSIVE; | 3798 | wait.flags |= WQ_FLAG_EXCLUSIVE; |
3765 | __add_wait_queue_tail(&x->wait, &wait); | 3799 | __add_wait_queue_tail(&x->wait, &wait); |
3766 | do { | 3800 | do { |
3767 | __set_current_state(TASK_UNINTERRUPTIBLE); | 3801 | if (state == TASK_INTERRUPTIBLE && |
3802 | signal_pending(current)) { | ||
3803 | __remove_wait_queue(&x->wait, &wait); | ||
3804 | return -ERESTARTSYS; | ||
3805 | } | ||
3806 | __set_current_state(state); | ||
3768 | spin_unlock_irq(&x->wait.lock); | 3807 | spin_unlock_irq(&x->wait.lock); |
3769 | timeout = schedule_timeout(timeout); | 3808 | timeout = schedule_timeout(timeout); |
3770 | spin_lock_irq(&x->wait.lock); | 3809 | spin_lock_irq(&x->wait.lock); |
3771 | if (!timeout) { | 3810 | if (!timeout) { |
3772 | __remove_wait_queue(&x->wait, &wait); | 3811 | __remove_wait_queue(&x->wait, &wait); |
3773 | goto out; | 3812 | return timeout; |
3774 | } | 3813 | } |
3775 | } while (!x->done); | 3814 | } while (!x->done); |
3776 | __remove_wait_queue(&x->wait, &wait); | 3815 | __remove_wait_queue(&x->wait, &wait); |
3777 | } | 3816 | } |
3778 | x->done--; | 3817 | x->done--; |
3779 | out: | ||
3780 | spin_unlock_irq(&x->wait.lock); | ||
3781 | return timeout; | 3818 | return timeout; |
3782 | } | 3819 | } |
3783 | EXPORT_SYMBOL(wait_for_completion_timeout); | ||
3784 | 3820 | ||
3785 | int fastcall __sched wait_for_completion_interruptible(struct completion *x) | 3821 | static long __sched |
3822 | wait_for_common(struct completion *x, long timeout, int state) | ||
3786 | { | 3823 | { |
3787 | int ret = 0; | ||
3788 | |||
3789 | might_sleep(); | 3824 | might_sleep(); |
3790 | 3825 | ||
3791 | spin_lock_irq(&x->wait.lock); | 3826 | spin_lock_irq(&x->wait.lock); |
3792 | if (!x->done) { | 3827 | timeout = do_wait_for_common(x, timeout, state); |
3793 | DECLARE_WAITQUEUE(wait, current); | ||
3794 | |||
3795 | wait.flags |= WQ_FLAG_EXCLUSIVE; | ||
3796 | __add_wait_queue_tail(&x->wait, &wait); | ||
3797 | do { | ||
3798 | if (signal_pending(current)) { | ||
3799 | ret = -ERESTARTSYS; | ||
3800 | __remove_wait_queue(&x->wait, &wait); | ||
3801 | goto out; | ||
3802 | } | ||
3803 | __set_current_state(TASK_INTERRUPTIBLE); | ||
3804 | spin_unlock_irq(&x->wait.lock); | ||
3805 | schedule(); | ||
3806 | spin_lock_irq(&x->wait.lock); | ||
3807 | } while (!x->done); | ||
3808 | __remove_wait_queue(&x->wait, &wait); | ||
3809 | } | ||
3810 | x->done--; | ||
3811 | out: | ||
3812 | spin_unlock_irq(&x->wait.lock); | 3828 | spin_unlock_irq(&x->wait.lock); |
3829 | return timeout; | ||
3830 | } | ||
3813 | 3831 | ||
3814 | return ret; | 3832 | void fastcall __sched wait_for_completion(struct completion *x) |
3833 | { | ||
3834 | wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); | ||
3815 | } | 3835 | } |
3816 | EXPORT_SYMBOL(wait_for_completion_interruptible); | 3836 | EXPORT_SYMBOL(wait_for_completion); |
3817 | 3837 | ||
3818 | unsigned long fastcall __sched | 3838 | unsigned long fastcall __sched |
3819 | wait_for_completion_interruptible_timeout(struct completion *x, | 3839 | wait_for_completion_timeout(struct completion *x, unsigned long timeout) |
3820 | unsigned long timeout) | ||
3821 | { | 3840 | { |
3822 | might_sleep(); | 3841 | return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE); |
3823 | |||
3824 | spin_lock_irq(&x->wait.lock); | ||
3825 | if (!x->done) { | ||
3826 | DECLARE_WAITQUEUE(wait, current); | ||
3827 | |||
3828 | wait.flags |= WQ_FLAG_EXCLUSIVE; | ||
3829 | __add_wait_queue_tail(&x->wait, &wait); | ||
3830 | do { | ||
3831 | if (signal_pending(current)) { | ||
3832 | timeout = -ERESTARTSYS; | ||
3833 | __remove_wait_queue(&x->wait, &wait); | ||
3834 | goto out; | ||
3835 | } | ||
3836 | __set_current_state(TASK_INTERRUPTIBLE); | ||
3837 | spin_unlock_irq(&x->wait.lock); | ||
3838 | timeout = schedule_timeout(timeout); | ||
3839 | spin_lock_irq(&x->wait.lock); | ||
3840 | if (!timeout) { | ||
3841 | __remove_wait_queue(&x->wait, &wait); | ||
3842 | goto out; | ||
3843 | } | ||
3844 | } while (!x->done); | ||
3845 | __remove_wait_queue(&x->wait, &wait); | ||
3846 | } | ||
3847 | x->done--; | ||
3848 | out: | ||
3849 | spin_unlock_irq(&x->wait.lock); | ||
3850 | return timeout; | ||
3851 | } | 3842 | } |
3852 | EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); | 3843 | EXPORT_SYMBOL(wait_for_completion_timeout); |
3853 | 3844 | ||
3854 | static inline void | 3845 | int __sched wait_for_completion_interruptible(struct completion *x) |
3855 | sleep_on_head(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags) | ||
3856 | { | 3846 | { |
3857 | spin_lock_irqsave(&q->lock, *flags); | 3847 | return wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE); |
3858 | __add_wait_queue(q, wait); | ||
3859 | spin_unlock(&q->lock); | ||
3860 | } | 3848 | } |
3849 | EXPORT_SYMBOL(wait_for_completion_interruptible); | ||
3861 | 3850 | ||
3862 | static inline void | 3851 | unsigned long fastcall __sched |
3863 | sleep_on_tail(wait_queue_head_t *q, wait_queue_t *wait, unsigned long *flags) | 3852 | wait_for_completion_interruptible_timeout(struct completion *x, |
3853 | unsigned long timeout) | ||
3864 | { | 3854 | { |
3865 | spin_lock_irq(&q->lock); | 3855 | return wait_for_common(x, timeout, TASK_INTERRUPTIBLE); |
3866 | __remove_wait_queue(q, wait); | ||
3867 | spin_unlock_irqrestore(&q->lock, *flags); | ||
3868 | } | 3856 | } |
3857 | EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); | ||
3869 | 3858 | ||
3870 | void __sched interruptible_sleep_on(wait_queue_head_t *q) | 3859 | static long __sched |
3860 | sleep_on_common(wait_queue_head_t *q, int state, long timeout) | ||
3871 | { | 3861 | { |
3872 | unsigned long flags; | 3862 | unsigned long flags; |
3873 | wait_queue_t wait; | 3863 | wait_queue_t wait; |
3874 | 3864 | ||
3875 | init_waitqueue_entry(&wait, current); | 3865 | init_waitqueue_entry(&wait, current); |
3876 | 3866 | ||
3877 | current->state = TASK_INTERRUPTIBLE; | 3867 | __set_current_state(state); |
3878 | 3868 | ||
3879 | sleep_on_head(q, &wait, &flags); | 3869 | spin_lock_irqsave(&q->lock, flags); |
3880 | schedule(); | 3870 | __add_wait_queue(q, &wait); |
3881 | sleep_on_tail(q, &wait, &flags); | 3871 | spin_unlock(&q->lock); |
3872 | timeout = schedule_timeout(timeout); | ||
3873 | spin_lock_irq(&q->lock); | ||
3874 | __remove_wait_queue(q, &wait); | ||
3875 | spin_unlock_irqrestore(&q->lock, flags); | ||
3876 | |||
3877 | return timeout; | ||
3878 | } | ||
3879 | |||
3880 | void __sched interruptible_sleep_on(wait_queue_head_t *q) | ||
3881 | { | ||
3882 | sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | ||
3882 | } | 3883 | } |
3883 | EXPORT_SYMBOL(interruptible_sleep_on); | 3884 | EXPORT_SYMBOL(interruptible_sleep_on); |
3884 | 3885 | ||
3885 | long __sched | 3886 | long __sched |
3886 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3887 | interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) |
3887 | { | 3888 | { |
3888 | unsigned long flags; | 3889 | return sleep_on_common(q, TASK_INTERRUPTIBLE, timeout); |
3889 | wait_queue_t wait; | ||
3890 | |||
3891 | init_waitqueue_entry(&wait, current); | ||
3892 | |||
3893 | current->state = TASK_INTERRUPTIBLE; | ||
3894 | |||
3895 | sleep_on_head(q, &wait, &flags); | ||
3896 | timeout = schedule_timeout(timeout); | ||
3897 | sleep_on_tail(q, &wait, &flags); | ||
3898 | |||
3899 | return timeout; | ||
3900 | } | 3890 | } |
3901 | EXPORT_SYMBOL(interruptible_sleep_on_timeout); | 3891 | EXPORT_SYMBOL(interruptible_sleep_on_timeout); |
3902 | 3892 | ||
3903 | void __sched sleep_on(wait_queue_head_t *q) | 3893 | void __sched sleep_on(wait_queue_head_t *q) |
3904 | { | 3894 | { |
3905 | unsigned long flags; | 3895 | sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); |
3906 | wait_queue_t wait; | ||
3907 | |||
3908 | init_waitqueue_entry(&wait, current); | ||
3909 | |||
3910 | current->state = TASK_UNINTERRUPTIBLE; | ||
3911 | |||
3912 | sleep_on_head(q, &wait, &flags); | ||
3913 | schedule(); | ||
3914 | sleep_on_tail(q, &wait, &flags); | ||
3915 | } | 3896 | } |
3916 | EXPORT_SYMBOL(sleep_on); | 3897 | EXPORT_SYMBOL(sleep_on); |
3917 | 3898 | ||
3918 | long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) | 3899 | long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) |
3919 | { | 3900 | { |
3920 | unsigned long flags; | 3901 | return sleep_on_common(q, TASK_UNINTERRUPTIBLE, timeout); |
3921 | wait_queue_t wait; | ||
3922 | |||
3923 | init_waitqueue_entry(&wait, current); | ||
3924 | |||
3925 | current->state = TASK_UNINTERRUPTIBLE; | ||
3926 | |||
3927 | sleep_on_head(q, &wait, &flags); | ||
3928 | timeout = schedule_timeout(timeout); | ||
3929 | sleep_on_tail(q, &wait, &flags); | ||
3930 | |||
3931 | return timeout; | ||
3932 | } | 3902 | } |
3933 | EXPORT_SYMBOL(sleep_on_timeout); | 3903 | EXPORT_SYMBOL(sleep_on_timeout); |
3934 | 3904 | ||
@@ -3947,7 +3917,7 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
3947 | void rt_mutex_setprio(struct task_struct *p, int prio) | 3917 | void rt_mutex_setprio(struct task_struct *p, int prio) |
3948 | { | 3918 | { |
3949 | unsigned long flags; | 3919 | unsigned long flags; |
3950 | int oldprio, on_rq; | 3920 | int oldprio, on_rq, running; |
3951 | struct rq *rq; | 3921 | struct rq *rq; |
3952 | 3922 | ||
3953 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 3923 | BUG_ON(prio < 0 || prio > MAX_PRIO); |
@@ -3957,8 +3927,12 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
3957 | 3927 | ||
3958 | oldprio = p->prio; | 3928 | oldprio = p->prio; |
3959 | on_rq = p->se.on_rq; | 3929 | on_rq = p->se.on_rq; |
3960 | if (on_rq) | 3930 | running = task_running(rq, p); |
3931 | if (on_rq) { | ||
3961 | dequeue_task(rq, p, 0); | 3932 | dequeue_task(rq, p, 0); |
3933 | if (running) | ||
3934 | p->sched_class->put_prev_task(rq, p); | ||
3935 | } | ||
3962 | 3936 | ||
3963 | if (rt_prio(prio)) | 3937 | if (rt_prio(prio)) |
3964 | p->sched_class = &rt_sched_class; | 3938 | p->sched_class = &rt_sched_class; |
@@ -3968,13 +3942,15 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
3968 | p->prio = prio; | 3942 | p->prio = prio; |
3969 | 3943 | ||
3970 | if (on_rq) { | 3944 | if (on_rq) { |
3945 | if (running) | ||
3946 | p->sched_class->set_curr_task(rq); | ||
3971 | enqueue_task(rq, p, 0); | 3947 | enqueue_task(rq, p, 0); |
3972 | /* | 3948 | /* |
3973 | * Reschedule if we are currently running on this runqueue and | 3949 | * Reschedule if we are currently running on this runqueue and |
3974 | * our priority decreased, or if we are not currently running on | 3950 | * our priority decreased, or if we are not currently running on |
3975 | * this runqueue and our priority is higher than the current's | 3951 | * this runqueue and our priority is higher than the current's |
3976 | */ | 3952 | */ |
3977 | if (task_running(rq, p)) { | 3953 | if (running) { |
3978 | if (p->prio > oldprio) | 3954 | if (p->prio > oldprio) |
3979 | resched_task(rq->curr); | 3955 | resched_task(rq->curr); |
3980 | } else { | 3956 | } else { |
@@ -4138,7 +4114,7 @@ struct task_struct *idle_task(int cpu) | |||
4138 | * find_process_by_pid - find a process with a matching PID value. | 4114 | * find_process_by_pid - find a process with a matching PID value. |
4139 | * @pid: the pid in question. | 4115 | * @pid: the pid in question. |
4140 | */ | 4116 | */ |
4141 | static inline struct task_struct *find_process_by_pid(pid_t pid) | 4117 | static struct task_struct *find_process_by_pid(pid_t pid) |
4142 | { | 4118 | { |
4143 | return pid ? find_task_by_pid(pid) : current; | 4119 | return pid ? find_task_by_pid(pid) : current; |
4144 | } | 4120 | } |
@@ -4180,7 +4156,7 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
4180 | int sched_setscheduler(struct task_struct *p, int policy, | 4156 | int sched_setscheduler(struct task_struct *p, int policy, |
4181 | struct sched_param *param) | 4157 | struct sched_param *param) |
4182 | { | 4158 | { |
4183 | int retval, oldprio, oldpolicy = -1, on_rq; | 4159 | int retval, oldprio, oldpolicy = -1, on_rq, running; |
4184 | unsigned long flags; | 4160 | unsigned long flags; |
4185 | struct rq *rq; | 4161 | struct rq *rq; |
4186 | 4162 | ||
@@ -4262,18 +4238,26 @@ recheck: | |||
4262 | } | 4238 | } |
4263 | update_rq_clock(rq); | 4239 | update_rq_clock(rq); |
4264 | on_rq = p->se.on_rq; | 4240 | on_rq = p->se.on_rq; |
4265 | if (on_rq) | 4241 | running = task_running(rq, p); |
4242 | if (on_rq) { | ||
4266 | deactivate_task(rq, p, 0); | 4243 | deactivate_task(rq, p, 0); |
4244 | if (running) | ||
4245 | p->sched_class->put_prev_task(rq, p); | ||
4246 | } | ||
4247 | |||
4267 | oldprio = p->prio; | 4248 | oldprio = p->prio; |
4268 | __setscheduler(rq, p, policy, param->sched_priority); | 4249 | __setscheduler(rq, p, policy, param->sched_priority); |
4250 | |||
4269 | if (on_rq) { | 4251 | if (on_rq) { |
4252 | if (running) | ||
4253 | p->sched_class->set_curr_task(rq); | ||
4270 | activate_task(rq, p, 0); | 4254 | activate_task(rq, p, 0); |
4271 | /* | 4255 | /* |
4272 | * Reschedule if we are currently running on this runqueue and | 4256 | * Reschedule if we are currently running on this runqueue and |
4273 | * our priority decreased, or if we are not currently running on | 4257 | * our priority decreased, or if we are not currently running on |
4274 | * this runqueue and our priority is higher than the current's | 4258 | * this runqueue and our priority is higher than the current's |
4275 | */ | 4259 | */ |
4276 | if (task_running(rq, p)) { | 4260 | if (running) { |
4277 | if (p->prio > oldprio) | 4261 | if (p->prio > oldprio) |
4278 | resched_task(rq->curr); | 4262 | resched_task(rq->curr); |
4279 | } else { | 4263 | } else { |
@@ -4344,10 +4328,10 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) | |||
4344 | asmlinkage long sys_sched_getscheduler(pid_t pid) | 4328 | asmlinkage long sys_sched_getscheduler(pid_t pid) |
4345 | { | 4329 | { |
4346 | struct task_struct *p; | 4330 | struct task_struct *p; |
4347 | int retval = -EINVAL; | 4331 | int retval; |
4348 | 4332 | ||
4349 | if (pid < 0) | 4333 | if (pid < 0) |
4350 | goto out_nounlock; | 4334 | return -EINVAL; |
4351 | 4335 | ||
4352 | retval = -ESRCH; | 4336 | retval = -ESRCH; |
4353 | read_lock(&tasklist_lock); | 4337 | read_lock(&tasklist_lock); |
@@ -4358,8 +4342,6 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) | |||
4358 | retval = p->policy; | 4342 | retval = p->policy; |
4359 | } | 4343 | } |
4360 | read_unlock(&tasklist_lock); | 4344 | read_unlock(&tasklist_lock); |
4361 | |||
4362 | out_nounlock: | ||
4363 | return retval; | 4345 | return retval; |
4364 | } | 4346 | } |
4365 | 4347 | ||
@@ -4372,10 +4354,10 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) | |||
4372 | { | 4354 | { |
4373 | struct sched_param lp; | 4355 | struct sched_param lp; |
4374 | struct task_struct *p; | 4356 | struct task_struct *p; |
4375 | int retval = -EINVAL; | 4357 | int retval; |
4376 | 4358 | ||
4377 | if (!param || pid < 0) | 4359 | if (!param || pid < 0) |
4378 | goto out_nounlock; | 4360 | return -EINVAL; |
4379 | 4361 | ||
4380 | read_lock(&tasklist_lock); | 4362 | read_lock(&tasklist_lock); |
4381 | p = find_process_by_pid(pid); | 4363 | p = find_process_by_pid(pid); |
@@ -4395,7 +4377,6 @@ asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) | |||
4395 | */ | 4377 | */ |
4396 | retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; | 4378 | retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; |
4397 | 4379 | ||
4398 | out_nounlock: | ||
4399 | return retval; | 4380 | return retval; |
4400 | 4381 | ||
4401 | out_unlock: | 4382 | out_unlock: |
@@ -4555,8 +4536,8 @@ asmlinkage long sys_sched_yield(void) | |||
4555 | { | 4536 | { |
4556 | struct rq *rq = this_rq_lock(); | 4537 | struct rq *rq = this_rq_lock(); |
4557 | 4538 | ||
4558 | schedstat_inc(rq, yld_cnt); | 4539 | schedstat_inc(rq, yld_count); |
4559 | current->sched_class->yield_task(rq, current); | 4540 | current->sched_class->yield_task(rq); |
4560 | 4541 | ||
4561 | /* | 4542 | /* |
4562 | * Since we are going to call schedule() anyway, there's | 4543 | * Since we are going to call schedule() anyway, there's |
@@ -4750,11 +4731,12 @@ asmlinkage | |||
4750 | long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | 4731 | long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) |
4751 | { | 4732 | { |
4752 | struct task_struct *p; | 4733 | struct task_struct *p; |
4753 | int retval = -EINVAL; | 4734 | unsigned int time_slice; |
4735 | int retval; | ||
4754 | struct timespec t; | 4736 | struct timespec t; |
4755 | 4737 | ||
4756 | if (pid < 0) | 4738 | if (pid < 0) |
4757 | goto out_nounlock; | 4739 | return -EINVAL; |
4758 | 4740 | ||
4759 | retval = -ESRCH; | 4741 | retval = -ESRCH; |
4760 | read_lock(&tasklist_lock); | 4742 | read_lock(&tasklist_lock); |
@@ -4766,12 +4748,24 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) | |||
4766 | if (retval) | 4748 | if (retval) |
4767 | goto out_unlock; | 4749 | goto out_unlock; |
4768 | 4750 | ||
4769 | jiffies_to_timespec(p->policy == SCHED_FIFO ? | 4751 | if (p->policy == SCHED_FIFO) |
4770 | 0 : static_prio_timeslice(p->static_prio), &t); | 4752 | time_slice = 0; |
4753 | else if (p->policy == SCHED_RR) | ||
4754 | time_slice = DEF_TIMESLICE; | ||
4755 | else { | ||
4756 | struct sched_entity *se = &p->se; | ||
4757 | unsigned long flags; | ||
4758 | struct rq *rq; | ||
4759 | |||
4760 | rq = task_rq_lock(p, &flags); | ||
4761 | time_slice = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se)); | ||
4762 | task_rq_unlock(rq, &flags); | ||
4763 | } | ||
4771 | read_unlock(&tasklist_lock); | 4764 | read_unlock(&tasklist_lock); |
4765 | jiffies_to_timespec(time_slice, &t); | ||
4772 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 4766 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
4773 | out_nounlock: | ||
4774 | return retval; | 4767 | return retval; |
4768 | |||
4775 | out_unlock: | 4769 | out_unlock: |
4776 | read_unlock(&tasklist_lock); | 4770 | read_unlock(&tasklist_lock); |
4777 | return retval; | 4771 | return retval; |
@@ -4900,32 +4894,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
4900 | */ | 4894 | */ |
4901 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | 4895 | cpumask_t nohz_cpu_mask = CPU_MASK_NONE; |
4902 | 4896 | ||
4903 | /* | ||
4904 | * Increase the granularity value when there are more CPUs, | ||
4905 | * because with more CPUs the 'effective latency' as visible | ||
4906 | * to users decreases. But the relationship is not linear, | ||
4907 | * so pick a second-best guess by going with the log2 of the | ||
4908 | * number of CPUs. | ||
4909 | * | ||
4910 | * This idea comes from the SD scheduler of Con Kolivas: | ||
4911 | */ | ||
4912 | static inline void sched_init_granularity(void) | ||
4913 | { | ||
4914 | unsigned int factor = 1 + ilog2(num_online_cpus()); | ||
4915 | const unsigned long limit = 100000000; | ||
4916 | |||
4917 | sysctl_sched_min_granularity *= factor; | ||
4918 | if (sysctl_sched_min_granularity > limit) | ||
4919 | sysctl_sched_min_granularity = limit; | ||
4920 | |||
4921 | sysctl_sched_latency *= factor; | ||
4922 | if (sysctl_sched_latency > limit) | ||
4923 | sysctl_sched_latency = limit; | ||
4924 | |||
4925 | sysctl_sched_runtime_limit = sysctl_sched_latency; | ||
4926 | sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2; | ||
4927 | } | ||
4928 | |||
4929 | #ifdef CONFIG_SMP | 4897 | #ifdef CONFIG_SMP |
4930 | /* | 4898 | /* |
4931 | * This is how migration works: | 4899 | * This is how migration works: |
@@ -5103,35 +5071,34 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
5103 | struct rq *rq; | 5071 | struct rq *rq; |
5104 | int dest_cpu; | 5072 | int dest_cpu; |
5105 | 5073 | ||
5106 | restart: | 5074 | do { |
5107 | /* On same node? */ | 5075 | /* On same node? */ |
5108 | mask = node_to_cpumask(cpu_to_node(dead_cpu)); | 5076 | mask = node_to_cpumask(cpu_to_node(dead_cpu)); |
5109 | cpus_and(mask, mask, p->cpus_allowed); | 5077 | cpus_and(mask, mask, p->cpus_allowed); |
5110 | dest_cpu = any_online_cpu(mask); | 5078 | dest_cpu = any_online_cpu(mask); |
5111 | 5079 | ||
5112 | /* On any allowed CPU? */ | 5080 | /* On any allowed CPU? */ |
5113 | if (dest_cpu == NR_CPUS) | 5081 | if (dest_cpu == NR_CPUS) |
5114 | dest_cpu = any_online_cpu(p->cpus_allowed); | 5082 | dest_cpu = any_online_cpu(p->cpus_allowed); |
5115 | 5083 | ||
5116 | /* No more Mr. Nice Guy. */ | 5084 | /* No more Mr. Nice Guy. */ |
5117 | if (dest_cpu == NR_CPUS) { | 5085 | if (dest_cpu == NR_CPUS) { |
5118 | rq = task_rq_lock(p, &flags); | 5086 | rq = task_rq_lock(p, &flags); |
5119 | cpus_setall(p->cpus_allowed); | 5087 | cpus_setall(p->cpus_allowed); |
5120 | dest_cpu = any_online_cpu(p->cpus_allowed); | 5088 | dest_cpu = any_online_cpu(p->cpus_allowed); |
5121 | task_rq_unlock(rq, &flags); | 5089 | task_rq_unlock(rq, &flags); |
5122 | 5090 | ||
5123 | /* | 5091 | /* |
5124 | * Don't tell them about moving exiting tasks or | 5092 | * Don't tell them about moving exiting tasks or |
5125 | * kernel threads (both mm NULL), since they never | 5093 | * kernel threads (both mm NULL), since they never |
5126 | * leave kernel. | 5094 | * leave kernel. |
5127 | */ | 5095 | */ |
5128 | if (p->mm && printk_ratelimit()) | 5096 | if (p->mm && printk_ratelimit()) |
5129 | printk(KERN_INFO "process %d (%s) no " | 5097 | printk(KERN_INFO "process %d (%s) no " |
5130 | "longer affine to cpu%d\n", | 5098 | "longer affine to cpu%d\n", |
5131 | p->pid, p->comm, dead_cpu); | 5099 | p->pid, p->comm, dead_cpu); |
5132 | } | 5100 | } |
5133 | if (!__migrate_task(p, dead_cpu, dest_cpu)) | 5101 | } while (!__migrate_task(p, dead_cpu, dest_cpu)); |
5134 | goto restart; | ||
5135 | } | 5102 | } |
5136 | 5103 | ||
5137 | /* | 5104 | /* |
@@ -5173,6 +5140,20 @@ static void migrate_live_tasks(int src_cpu) | |||
5173 | } | 5140 | } |
5174 | 5141 | ||
5175 | /* | 5142 | /* |
5143 | * activate_idle_task - move idle task to the _front_ of runqueue. | ||
5144 | */ | ||
5145 | static void activate_idle_task(struct task_struct *p, struct rq *rq) | ||
5146 | { | ||
5147 | update_rq_clock(rq); | ||
5148 | |||
5149 | if (p->state == TASK_UNINTERRUPTIBLE) | ||
5150 | rq->nr_uninterruptible--; | ||
5151 | |||
5152 | enqueue_task(rq, p, 0); | ||
5153 | inc_nr_running(p, rq); | ||
5154 | } | ||
5155 | |||
5156 | /* | ||
5176 | * Schedules idle task to be the next runnable task on current CPU. | 5157 | * Schedules idle task to be the next runnable task on current CPU. |
5177 | * It does so by boosting its priority to highest possible and adding it to | 5158 | * It does so by boosting its priority to highest possible and adding it to |
5178 | * the _front_ of the runqueue. Used by CPU offline code. | 5159 | * the _front_ of the runqueue. Used by CPU offline code. |
@@ -5284,14 +5265,23 @@ static struct ctl_table sd_ctl_root[] = { | |||
5284 | static struct ctl_table *sd_alloc_ctl_entry(int n) | 5265 | static struct ctl_table *sd_alloc_ctl_entry(int n) |
5285 | { | 5266 | { |
5286 | struct ctl_table *entry = | 5267 | struct ctl_table *entry = |
5287 | kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL); | 5268 | kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL); |
5288 | |||
5289 | BUG_ON(!entry); | ||
5290 | memset(entry, 0, n * sizeof(struct ctl_table)); | ||
5291 | 5269 | ||
5292 | return entry; | 5270 | return entry; |
5293 | } | 5271 | } |
5294 | 5272 | ||
5273 | static void sd_free_ctl_entry(struct ctl_table **tablep) | ||
5274 | { | ||
5275 | struct ctl_table *entry = *tablep; | ||
5276 | |||
5277 | for (entry = *tablep; entry->procname; entry++) | ||
5278 | if (entry->child) | ||
5279 | sd_free_ctl_entry(&entry->child); | ||
5280 | |||
5281 | kfree(*tablep); | ||
5282 | *tablep = NULL; | ||
5283 | } | ||
5284 | |||
5295 | static void | 5285 | static void |
5296 | set_table_entry(struct ctl_table *entry, | 5286 | set_table_entry(struct ctl_table *entry, |
5297 | const char *procname, void *data, int maxlen, | 5287 | const char *procname, void *data, int maxlen, |
@@ -5307,7 +5297,10 @@ set_table_entry(struct ctl_table *entry, | |||
5307 | static struct ctl_table * | 5297 | static struct ctl_table * |
5308 | sd_alloc_ctl_domain_table(struct sched_domain *sd) | 5298 | sd_alloc_ctl_domain_table(struct sched_domain *sd) |
5309 | { | 5299 | { |
5310 | struct ctl_table *table = sd_alloc_ctl_entry(14); | 5300 | struct ctl_table *table = sd_alloc_ctl_entry(12); |
5301 | |||
5302 | if (table == NULL) | ||
5303 | return NULL; | ||
5311 | 5304 | ||
5312 | set_table_entry(&table[0], "min_interval", &sd->min_interval, | 5305 | set_table_entry(&table[0], "min_interval", &sd->min_interval, |
5313 | sizeof(long), 0644, proc_doulongvec_minmax); | 5306 | sizeof(long), 0644, proc_doulongvec_minmax); |
@@ -5327,11 +5320,12 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) | |||
5327 | sizeof(int), 0644, proc_dointvec_minmax); | 5320 | sizeof(int), 0644, proc_dointvec_minmax); |
5328 | set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, | 5321 | set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, |
5329 | sizeof(int), 0644, proc_dointvec_minmax); | 5322 | sizeof(int), 0644, proc_dointvec_minmax); |
5330 | set_table_entry(&table[10], "cache_nice_tries", | 5323 | set_table_entry(&table[9], "cache_nice_tries", |
5331 | &sd->cache_nice_tries, | 5324 | &sd->cache_nice_tries, |
5332 | sizeof(int), 0644, proc_dointvec_minmax); | 5325 | sizeof(int), 0644, proc_dointvec_minmax); |
5333 | set_table_entry(&table[12], "flags", &sd->flags, | 5326 | set_table_entry(&table[10], "flags", &sd->flags, |
5334 | sizeof(int), 0644, proc_dointvec_minmax); | 5327 | sizeof(int), 0644, proc_dointvec_minmax); |
5328 | /* &table[11] is terminator */ | ||
5335 | 5329 | ||
5336 | return table; | 5330 | return table; |
5337 | } | 5331 | } |
@@ -5346,6 +5340,8 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
5346 | for_each_domain(cpu, sd) | 5340 | for_each_domain(cpu, sd) |
5347 | domain_num++; | 5341 | domain_num++; |
5348 | entry = table = sd_alloc_ctl_entry(domain_num + 1); | 5342 | entry = table = sd_alloc_ctl_entry(domain_num + 1); |
5343 | if (table == NULL) | ||
5344 | return NULL; | ||
5349 | 5345 | ||
5350 | i = 0; | 5346 | i = 0; |
5351 | for_each_domain(cpu, sd) { | 5347 | for_each_domain(cpu, sd) { |
@@ -5360,24 +5356,38 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
5360 | } | 5356 | } |
5361 | 5357 | ||
5362 | static struct ctl_table_header *sd_sysctl_header; | 5358 | static struct ctl_table_header *sd_sysctl_header; |
5363 | static void init_sched_domain_sysctl(void) | 5359 | static void register_sched_domain_sysctl(void) |
5364 | { | 5360 | { |
5365 | int i, cpu_num = num_online_cpus(); | 5361 | int i, cpu_num = num_online_cpus(); |
5366 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 5362 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
5367 | char buf[32]; | 5363 | char buf[32]; |
5368 | 5364 | ||
5365 | if (entry == NULL) | ||
5366 | return; | ||
5367 | |||
5369 | sd_ctl_dir[0].child = entry; | 5368 | sd_ctl_dir[0].child = entry; |
5370 | 5369 | ||
5371 | for (i = 0; i < cpu_num; i++, entry++) { | 5370 | for_each_online_cpu(i) { |
5372 | snprintf(buf, 32, "cpu%d", i); | 5371 | snprintf(buf, 32, "cpu%d", i); |
5373 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5372 | entry->procname = kstrdup(buf, GFP_KERNEL); |
5374 | entry->mode = 0555; | 5373 | entry->mode = 0555; |
5375 | entry->child = sd_alloc_ctl_cpu_table(i); | 5374 | entry->child = sd_alloc_ctl_cpu_table(i); |
5375 | entry++; | ||
5376 | } | 5376 | } |
5377 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); | 5377 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); |
5378 | } | 5378 | } |
5379 | |||
5380 | static void unregister_sched_domain_sysctl(void) | ||
5381 | { | ||
5382 | unregister_sysctl_table(sd_sysctl_header); | ||
5383 | sd_sysctl_header = NULL; | ||
5384 | sd_free_ctl_entry(&sd_ctl_dir[0].child); | ||
5385 | } | ||
5379 | #else | 5386 | #else |
5380 | static void init_sched_domain_sysctl(void) | 5387 | static void register_sched_domain_sysctl(void) |
5388 | { | ||
5389 | } | ||
5390 | static void unregister_sched_domain_sysctl(void) | ||
5381 | { | 5391 | { |
5382 | } | 5392 | } |
5383 | #endif | 5393 | #endif |
@@ -5499,8 +5509,7 @@ int __init migration_init(void) | |||
5499 | int nr_cpu_ids __read_mostly = NR_CPUS; | 5509 | int nr_cpu_ids __read_mostly = NR_CPUS; |
5500 | EXPORT_SYMBOL(nr_cpu_ids); | 5510 | EXPORT_SYMBOL(nr_cpu_ids); |
5501 | 5511 | ||
5502 | #undef SCHED_DOMAIN_DEBUG | 5512 | #ifdef CONFIG_SCHED_DEBUG |
5503 | #ifdef SCHED_DOMAIN_DEBUG | ||
5504 | static void sched_domain_debug(struct sched_domain *sd, int cpu) | 5513 | static void sched_domain_debug(struct sched_domain *sd, int cpu) |
5505 | { | 5514 | { |
5506 | int level = 0; | 5515 | int level = 0; |
@@ -5558,16 +5567,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
5558 | printk("\n"); | 5567 | printk("\n"); |
5559 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 5568 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
5560 | "set\n"); | 5569 | "set\n"); |
5570 | break; | ||
5561 | } | 5571 | } |
5562 | 5572 | ||
5563 | if (!cpus_weight(group->cpumask)) { | 5573 | if (!cpus_weight(group->cpumask)) { |
5564 | printk("\n"); | 5574 | printk("\n"); |
5565 | printk(KERN_ERR "ERROR: empty group\n"); | 5575 | printk(KERN_ERR "ERROR: empty group\n"); |
5576 | break; | ||
5566 | } | 5577 | } |
5567 | 5578 | ||
5568 | if (cpus_intersects(groupmask, group->cpumask)) { | 5579 | if (cpus_intersects(groupmask, group->cpumask)) { |
5569 | printk("\n"); | 5580 | printk("\n"); |
5570 | printk(KERN_ERR "ERROR: repeated CPUs\n"); | 5581 | printk(KERN_ERR "ERROR: repeated CPUs\n"); |
5582 | break; | ||
5571 | } | 5583 | } |
5572 | 5584 | ||
5573 | cpus_or(groupmask, groupmask, group->cpumask); | 5585 | cpus_or(groupmask, groupmask, group->cpumask); |
@@ -5701,7 +5713,7 @@ static int __init isolated_cpu_setup(char *str) | |||
5701 | return 1; | 5713 | return 1; |
5702 | } | 5714 | } |
5703 | 5715 | ||
5704 | __setup ("isolcpus=", isolated_cpu_setup); | 5716 | __setup("isolcpus=", isolated_cpu_setup); |
5705 | 5717 | ||
5706 | /* | 5718 | /* |
5707 | * init_sched_build_groups takes the cpumask we wish to span, and a pointer | 5719 | * init_sched_build_groups takes the cpumask we wish to span, and a pointer |
@@ -5930,24 +5942,23 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
5930 | 5942 | ||
5931 | if (!sg) | 5943 | if (!sg) |
5932 | return; | 5944 | return; |
5933 | next_sg: | 5945 | do { |
5934 | for_each_cpu_mask(j, sg->cpumask) { | 5946 | for_each_cpu_mask(j, sg->cpumask) { |
5935 | struct sched_domain *sd; | 5947 | struct sched_domain *sd; |
5936 | 5948 | ||
5937 | sd = &per_cpu(phys_domains, j); | 5949 | sd = &per_cpu(phys_domains, j); |
5938 | if (j != first_cpu(sd->groups->cpumask)) { | 5950 | if (j != first_cpu(sd->groups->cpumask)) { |
5939 | /* | 5951 | /* |
5940 | * Only add "power" once for each | 5952 | * Only add "power" once for each |
5941 | * physical package. | 5953 | * physical package. |
5942 | */ | 5954 | */ |
5943 | continue; | 5955 | continue; |
5944 | } | 5956 | } |
5945 | 5957 | ||
5946 | sg_inc_cpu_power(sg, sd->groups->__cpu_power); | 5958 | sg_inc_cpu_power(sg, sd->groups->__cpu_power); |
5947 | } | 5959 | } |
5948 | sg = sg->next; | 5960 | sg = sg->next; |
5949 | if (sg != group_head) | 5961 | } while (sg != group_head); |
5950 | goto next_sg; | ||
5951 | } | 5962 | } |
5952 | #endif | 5963 | #endif |
5953 | 5964 | ||
@@ -6058,7 +6069,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6058 | /* | 6069 | /* |
6059 | * Allocate the per-node list of sched groups | 6070 | * Allocate the per-node list of sched groups |
6060 | */ | 6071 | */ |
6061 | sched_group_nodes = kzalloc(sizeof(struct sched_group *)*MAX_NUMNODES, | 6072 | sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), |
6062 | GFP_KERNEL); | 6073 | GFP_KERNEL); |
6063 | if (!sched_group_nodes) { | 6074 | if (!sched_group_nodes) { |
6064 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | 6075 | printk(KERN_WARNING "Can not alloc sched group node list\n"); |
@@ -6311,6 +6322,8 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map) | |||
6311 | 6322 | ||
6312 | err = build_sched_domains(&cpu_default_map); | 6323 | err = build_sched_domains(&cpu_default_map); |
6313 | 6324 | ||
6325 | register_sched_domain_sysctl(); | ||
6326 | |||
6314 | return err; | 6327 | return err; |
6315 | } | 6328 | } |
6316 | 6329 | ||
@@ -6327,6 +6340,8 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
6327 | { | 6340 | { |
6328 | int i; | 6341 | int i; |
6329 | 6342 | ||
6343 | unregister_sched_domain_sysctl(); | ||
6344 | |||
6330 | for_each_cpu_mask(i, *cpu_map) | 6345 | for_each_cpu_mask(i, *cpu_map) |
6331 | cpu_attach_domain(NULL, i); | 6346 | cpu_attach_domain(NULL, i); |
6332 | synchronize_sched(); | 6347 | synchronize_sched(); |
@@ -6357,6 +6372,8 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) | |||
6357 | if (!err && !cpus_empty(*partition2)) | 6372 | if (!err && !cpus_empty(*partition2)) |
6358 | err = build_sched_domains(partition2); | 6373 | err = build_sched_domains(partition2); |
6359 | 6374 | ||
6375 | register_sched_domain_sysctl(); | ||
6376 | |||
6360 | return err; | 6377 | return err; |
6361 | } | 6378 | } |
6362 | 6379 | ||
@@ -6488,17 +6505,13 @@ void __init sched_init_smp(void) | |||
6488 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 6505 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
6489 | hotcpu_notifier(update_sched_domains, 0); | 6506 | hotcpu_notifier(update_sched_domains, 0); |
6490 | 6507 | ||
6491 | init_sched_domain_sysctl(); | ||
6492 | |||
6493 | /* Move init over to a non-isolated CPU */ | 6508 | /* Move init over to a non-isolated CPU */ |
6494 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | 6509 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) |
6495 | BUG(); | 6510 | BUG(); |
6496 | sched_init_granularity(); | ||
6497 | } | 6511 | } |
6498 | #else | 6512 | #else |
6499 | void __init sched_init_smp(void) | 6513 | void __init sched_init_smp(void) |
6500 | { | 6514 | { |
6501 | sched_init_granularity(); | ||
6502 | } | 6515 | } |
6503 | #endif /* CONFIG_SMP */ | 6516 | #endif /* CONFIG_SMP */ |
6504 | 6517 | ||
@@ -6512,28 +6525,20 @@ int in_sched_functions(unsigned long addr) | |||
6512 | && addr < (unsigned long)__sched_text_end); | 6525 | && addr < (unsigned long)__sched_text_end); |
6513 | } | 6526 | } |
6514 | 6527 | ||
6515 | static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | 6528 | static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) |
6516 | { | 6529 | { |
6517 | cfs_rq->tasks_timeline = RB_ROOT; | 6530 | cfs_rq->tasks_timeline = RB_ROOT; |
6518 | cfs_rq->fair_clock = 1; | ||
6519 | #ifdef CONFIG_FAIR_GROUP_SCHED | 6531 | #ifdef CONFIG_FAIR_GROUP_SCHED |
6520 | cfs_rq->rq = rq; | 6532 | cfs_rq->rq = rq; |
6521 | #endif | 6533 | #endif |
6534 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | ||
6522 | } | 6535 | } |
6523 | 6536 | ||
6524 | void __init sched_init(void) | 6537 | void __init sched_init(void) |
6525 | { | 6538 | { |
6526 | u64 now = sched_clock(); | ||
6527 | int highest_cpu = 0; | 6539 | int highest_cpu = 0; |
6528 | int i, j; | 6540 | int i, j; |
6529 | 6541 | ||
6530 | /* | ||
6531 | * Link up the scheduling class hierarchy: | ||
6532 | */ | ||
6533 | rt_sched_class.next = &fair_sched_class; | ||
6534 | fair_sched_class.next = &idle_sched_class; | ||
6535 | idle_sched_class.next = NULL; | ||
6536 | |||
6537 | for_each_possible_cpu(i) { | 6542 | for_each_possible_cpu(i) { |
6538 | struct rt_prio_array *array; | 6543 | struct rt_prio_array *array; |
6539 | struct rq *rq; | 6544 | struct rq *rq; |
@@ -6546,10 +6551,28 @@ void __init sched_init(void) | |||
6546 | init_cfs_rq(&rq->cfs, rq); | 6551 | init_cfs_rq(&rq->cfs, rq); |
6547 | #ifdef CONFIG_FAIR_GROUP_SCHED | 6552 | #ifdef CONFIG_FAIR_GROUP_SCHED |
6548 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 6553 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
6549 | list_add(&rq->cfs.leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | 6554 | { |
6555 | struct cfs_rq *cfs_rq = &per_cpu(init_cfs_rq, i); | ||
6556 | struct sched_entity *se = | ||
6557 | &per_cpu(init_sched_entity, i); | ||
6558 | |||
6559 | init_cfs_rq_p[i] = cfs_rq; | ||
6560 | init_cfs_rq(cfs_rq, rq); | ||
6561 | cfs_rq->tg = &init_task_group; | ||
6562 | list_add(&cfs_rq->leaf_cfs_rq_list, | ||
6563 | &rq->leaf_cfs_rq_list); | ||
6564 | |||
6565 | init_sched_entity_p[i] = se; | ||
6566 | se->cfs_rq = &rq->cfs; | ||
6567 | se->my_q = cfs_rq; | ||
6568 | se->load.weight = init_task_group_load; | ||
6569 | se->load.inv_weight = | ||
6570 | div64_64(1ULL<<32, init_task_group_load); | ||
6571 | se->parent = NULL; | ||
6572 | } | ||
6573 | init_task_group.shares = init_task_group_load; | ||
6574 | spin_lock_init(&init_task_group.lock); | ||
6550 | #endif | 6575 | #endif |
6551 | rq->ls.load_update_last = now; | ||
6552 | rq->ls.load_update_start = now; | ||
6553 | 6576 | ||
6554 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 6577 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
6555 | rq->cpu_load[j] = 0; | 6578 | rq->cpu_load[j] = 0; |
@@ -6634,26 +6657,40 @@ EXPORT_SYMBOL(__might_sleep); | |||
6634 | #endif | 6657 | #endif |
6635 | 6658 | ||
6636 | #ifdef CONFIG_MAGIC_SYSRQ | 6659 | #ifdef CONFIG_MAGIC_SYSRQ |
6660 | static void normalize_task(struct rq *rq, struct task_struct *p) | ||
6661 | { | ||
6662 | int on_rq; | ||
6663 | update_rq_clock(rq); | ||
6664 | on_rq = p->se.on_rq; | ||
6665 | if (on_rq) | ||
6666 | deactivate_task(rq, p, 0); | ||
6667 | __setscheduler(rq, p, SCHED_NORMAL, 0); | ||
6668 | if (on_rq) { | ||
6669 | activate_task(rq, p, 0); | ||
6670 | resched_task(rq->curr); | ||
6671 | } | ||
6672 | } | ||
6673 | |||
6637 | void normalize_rt_tasks(void) | 6674 | void normalize_rt_tasks(void) |
6638 | { | 6675 | { |
6639 | struct task_struct *g, *p; | 6676 | struct task_struct *g, *p; |
6640 | unsigned long flags; | 6677 | unsigned long flags; |
6641 | struct rq *rq; | 6678 | struct rq *rq; |
6642 | int on_rq; | ||
6643 | 6679 | ||
6644 | read_lock_irq(&tasklist_lock); | 6680 | read_lock_irq(&tasklist_lock); |
6645 | do_each_thread(g, p) { | 6681 | do_each_thread(g, p) { |
6646 | p->se.fair_key = 0; | 6682 | /* |
6647 | p->se.wait_runtime = 0; | 6683 | * Only normalize user tasks: |
6684 | */ | ||
6685 | if (!p->mm) | ||
6686 | continue; | ||
6687 | |||
6648 | p->se.exec_start = 0; | 6688 | p->se.exec_start = 0; |
6649 | p->se.wait_start_fair = 0; | ||
6650 | p->se.sleep_start_fair = 0; | ||
6651 | #ifdef CONFIG_SCHEDSTATS | 6689 | #ifdef CONFIG_SCHEDSTATS |
6652 | p->se.wait_start = 0; | 6690 | p->se.wait_start = 0; |
6653 | p->se.sleep_start = 0; | 6691 | p->se.sleep_start = 0; |
6654 | p->se.block_start = 0; | 6692 | p->se.block_start = 0; |
6655 | #endif | 6693 | #endif |
6656 | task_rq(p)->cfs.fair_clock = 0; | ||
6657 | task_rq(p)->clock = 0; | 6694 | task_rq(p)->clock = 0; |
6658 | 6695 | ||
6659 | if (!rt_task(p)) { | 6696 | if (!rt_task(p)) { |
@@ -6668,26 +6705,9 @@ void normalize_rt_tasks(void) | |||
6668 | 6705 | ||
6669 | spin_lock_irqsave(&p->pi_lock, flags); | 6706 | spin_lock_irqsave(&p->pi_lock, flags); |
6670 | rq = __task_rq_lock(p); | 6707 | rq = __task_rq_lock(p); |
6671 | #ifdef CONFIG_SMP | ||
6672 | /* | ||
6673 | * Do not touch the migration thread: | ||
6674 | */ | ||
6675 | if (p == rq->migration_thread) | ||
6676 | goto out_unlock; | ||
6677 | #endif | ||
6678 | 6708 | ||
6679 | update_rq_clock(rq); | 6709 | normalize_task(rq, p); |
6680 | on_rq = p->se.on_rq; | 6710 | |
6681 | if (on_rq) | ||
6682 | deactivate_task(rq, p, 0); | ||
6683 | __setscheduler(rq, p, SCHED_NORMAL, 0); | ||
6684 | if (on_rq) { | ||
6685 | activate_task(rq, p, 0); | ||
6686 | resched_task(rq->curr); | ||
6687 | } | ||
6688 | #ifdef CONFIG_SMP | ||
6689 | out_unlock: | ||
6690 | #endif | ||
6691 | __task_rq_unlock(rq); | 6711 | __task_rq_unlock(rq); |
6692 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6712 | spin_unlock_irqrestore(&p->pi_lock, flags); |
6693 | } while_each_thread(g, p); | 6713 | } while_each_thread(g, p); |
@@ -6740,3 +6760,201 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
6740 | } | 6760 | } |
6741 | 6761 | ||
6742 | #endif | 6762 | #endif |
6763 | |||
6764 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
6765 | |||
6766 | /* allocate runqueue etc for a new task group */ | ||
6767 | struct task_group *sched_create_group(void) | ||
6768 | { | ||
6769 | struct task_group *tg; | ||
6770 | struct cfs_rq *cfs_rq; | ||
6771 | struct sched_entity *se; | ||
6772 | struct rq *rq; | ||
6773 | int i; | ||
6774 | |||
6775 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | ||
6776 | if (!tg) | ||
6777 | return ERR_PTR(-ENOMEM); | ||
6778 | |||
6779 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); | ||
6780 | if (!tg->cfs_rq) | ||
6781 | goto err; | ||
6782 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); | ||
6783 | if (!tg->se) | ||
6784 | goto err; | ||
6785 | |||
6786 | for_each_possible_cpu(i) { | ||
6787 | rq = cpu_rq(i); | ||
6788 | |||
6789 | cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, | ||
6790 | cpu_to_node(i)); | ||
6791 | if (!cfs_rq) | ||
6792 | goto err; | ||
6793 | |||
6794 | se = kmalloc_node(sizeof(struct sched_entity), GFP_KERNEL, | ||
6795 | cpu_to_node(i)); | ||
6796 | if (!se) | ||
6797 | goto err; | ||
6798 | |||
6799 | memset(cfs_rq, 0, sizeof(struct cfs_rq)); | ||
6800 | memset(se, 0, sizeof(struct sched_entity)); | ||
6801 | |||
6802 | tg->cfs_rq[i] = cfs_rq; | ||
6803 | init_cfs_rq(cfs_rq, rq); | ||
6804 | cfs_rq->tg = tg; | ||
6805 | |||
6806 | tg->se[i] = se; | ||
6807 | se->cfs_rq = &rq->cfs; | ||
6808 | se->my_q = cfs_rq; | ||
6809 | se->load.weight = NICE_0_LOAD; | ||
6810 | se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD); | ||
6811 | se->parent = NULL; | ||
6812 | } | ||
6813 | |||
6814 | for_each_possible_cpu(i) { | ||
6815 | rq = cpu_rq(i); | ||
6816 | cfs_rq = tg->cfs_rq[i]; | ||
6817 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
6818 | } | ||
6819 | |||
6820 | tg->shares = NICE_0_LOAD; | ||
6821 | spin_lock_init(&tg->lock); | ||
6822 | |||
6823 | return tg; | ||
6824 | |||
6825 | err: | ||
6826 | for_each_possible_cpu(i) { | ||
6827 | if (tg->cfs_rq) | ||
6828 | kfree(tg->cfs_rq[i]); | ||
6829 | if (tg->se) | ||
6830 | kfree(tg->se[i]); | ||
6831 | } | ||
6832 | kfree(tg->cfs_rq); | ||
6833 | kfree(tg->se); | ||
6834 | kfree(tg); | ||
6835 | |||
6836 | return ERR_PTR(-ENOMEM); | ||
6837 | } | ||
6838 | |||
6839 | /* rcu callback to free various structures associated with a task group */ | ||
6840 | static void free_sched_group(struct rcu_head *rhp) | ||
6841 | { | ||
6842 | struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu); | ||
6843 | struct task_group *tg = cfs_rq->tg; | ||
6844 | struct sched_entity *se; | ||
6845 | int i; | ||
6846 | |||
6847 | /* now it should be safe to free those cfs_rqs */ | ||
6848 | for_each_possible_cpu(i) { | ||
6849 | cfs_rq = tg->cfs_rq[i]; | ||
6850 | kfree(cfs_rq); | ||
6851 | |||
6852 | se = tg->se[i]; | ||
6853 | kfree(se); | ||
6854 | } | ||
6855 | |||
6856 | kfree(tg->cfs_rq); | ||
6857 | kfree(tg->se); | ||
6858 | kfree(tg); | ||
6859 | } | ||
6860 | |||
6861 | /* Destroy runqueue etc associated with a task group */ | ||
6862 | void sched_destroy_group(struct task_group *tg) | ||
6863 | { | ||
6864 | struct cfs_rq *cfs_rq; | ||
6865 | int i; | ||
6866 | |||
6867 | for_each_possible_cpu(i) { | ||
6868 | cfs_rq = tg->cfs_rq[i]; | ||
6869 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | ||
6870 | } | ||
6871 | |||
6872 | cfs_rq = tg->cfs_rq[0]; | ||
6873 | |||
6874 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
6875 | call_rcu(&cfs_rq->rcu, free_sched_group); | ||
6876 | } | ||
6877 | |||
6878 | /* change task's runqueue when it moves between groups. | ||
6879 | * The caller of this function should have put the task in its new group | ||
6880 | * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to | ||
6881 | * reflect its new group. | ||
6882 | */ | ||
6883 | void sched_move_task(struct task_struct *tsk) | ||
6884 | { | ||
6885 | int on_rq, running; | ||
6886 | unsigned long flags; | ||
6887 | struct rq *rq; | ||
6888 | |||
6889 | rq = task_rq_lock(tsk, &flags); | ||
6890 | |||
6891 | if (tsk->sched_class != &fair_sched_class) | ||
6892 | goto done; | ||
6893 | |||
6894 | update_rq_clock(rq); | ||
6895 | |||
6896 | running = task_running(rq, tsk); | ||
6897 | on_rq = tsk->se.on_rq; | ||
6898 | |||
6899 | if (on_rq) { | ||
6900 | dequeue_task(rq, tsk, 0); | ||
6901 | if (unlikely(running)) | ||
6902 | tsk->sched_class->put_prev_task(rq, tsk); | ||
6903 | } | ||
6904 | |||
6905 | set_task_cfs_rq(tsk); | ||
6906 | |||
6907 | if (on_rq) { | ||
6908 | if (unlikely(running)) | ||
6909 | tsk->sched_class->set_curr_task(rq); | ||
6910 | enqueue_task(rq, tsk, 0); | ||
6911 | } | ||
6912 | |||
6913 | done: | ||
6914 | task_rq_unlock(rq, &flags); | ||
6915 | } | ||
6916 | |||
6917 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | ||
6918 | { | ||
6919 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
6920 | struct rq *rq = cfs_rq->rq; | ||
6921 | int on_rq; | ||
6922 | |||
6923 | spin_lock_irq(&rq->lock); | ||
6924 | |||
6925 | on_rq = se->on_rq; | ||
6926 | if (on_rq) | ||
6927 | dequeue_entity(cfs_rq, se, 0); | ||
6928 | |||
6929 | se->load.weight = shares; | ||
6930 | se->load.inv_weight = div64_64((1ULL<<32), shares); | ||
6931 | |||
6932 | if (on_rq) | ||
6933 | enqueue_entity(cfs_rq, se, 0); | ||
6934 | |||
6935 | spin_unlock_irq(&rq->lock); | ||
6936 | } | ||
6937 | |||
6938 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | ||
6939 | { | ||
6940 | int i; | ||
6941 | |||
6942 | spin_lock(&tg->lock); | ||
6943 | if (tg->shares == shares) | ||
6944 | goto done; | ||
6945 | |||
6946 | tg->shares = shares; | ||
6947 | for_each_possible_cpu(i) | ||
6948 | set_se_shares(tg->se[i], shares); | ||
6949 | |||
6950 | done: | ||
6951 | spin_unlock(&tg->lock); | ||
6952 | return 0; | ||
6953 | } | ||
6954 | |||
6955 | unsigned long sched_group_shares(struct task_group *tg) | ||
6956 | { | ||
6957 | return tg->shares; | ||
6958 | } | ||
6959 | |||
6960 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||