aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 19:45:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 19:45:02 -0400
commitbdc7ccfc0631797636837b10df7f87bc1e2e4ae3 (patch)
tree70f09f8ffee07486d41ca254b8abb05692713d1e /kernel
parent4d4abdcb1dee03a4f9d6d2021622ed07e14dfd17 (diff)
parent0f3171438fc917b9f6b8b60dbb7a3fff9a0f68fd (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (24 commits) sched: Cleanup duplicate local variable in [enqueue|dequeue]_task_fair sched: Replace use of entity_key() sched: Separate group-scheduling code more clearly sched: Reorder root_domain to remove 64 bit alignment padding sched: Do not attempt to destroy uninitialized rt_bandwidth sched: Remove unused function cpu_cfs_rq() sched: Fix (harmless) typo 'CONFG_FAIR_GROUP_SCHED' sched, cgroup: Optimize load_balance_fair() sched: Don't update shares twice on on_rq parent sched: update correct entity's runtime in check_preempt_wakeup() xtensa: Use generic config PREEMPT definition h8300: Use generic config PREEMPT definition m32r: Use generic PREEMPT config sched: Skip autogroup when looking for all rt sched groups sched: Simplify mutex_spin_on_owner() sched: Remove rcu_read_lock() from wake_affine() sched: Generalize sleep inside spinlock detection sched: Make sleeping inside spinlock detection working in !CONFIG_PREEMPT sched: Isolate preempt counting in its own config option sched: Remove pointless in_atomic() definition check ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.preempt3
-rw-r--r--kernel/sched.c117
-rw-r--r--kernel/sched_autogroup.h1
-rw-r--r--kernel/sched_fair.c72
-rw-r--r--kernel/sched_rt.c26
5 files changed, 101 insertions, 118 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf987b95b356..24e7cb0ba26a 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -35,6 +35,7 @@ config PREEMPT_VOLUNTARY
35 35
36config PREEMPT 36config PREEMPT
37 bool "Preemptible Kernel (Low-Latency Desktop)" 37 bool "Preemptible Kernel (Low-Latency Desktop)"
38 select PREEMPT_COUNT
38 help 39 help
39 This option reduces the latency of the kernel by making 40 This option reduces the latency of the kernel by making
40 all kernel code (that is not executing in a critical section) 41 all kernel code (that is not executing in a critical section)
@@ -52,3 +53,5 @@ config PREEMPT
52 53
53endchoice 54endchoice
54 55
56config PREEMPT_COUNT
57 bool \ No newline at end of file
diff --git a/kernel/sched.c b/kernel/sched.c
index 84b9e076812e..9aaf567c5da5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -124,7 +124,7 @@
124 124
125static inline int rt_policy(int policy) 125static inline int rt_policy(int policy)
126{ 126{
127 if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) 127 if (policy == SCHED_FIFO || policy == SCHED_RR)
128 return 1; 128 return 1;
129 return 0; 129 return 0;
130} 130}
@@ -422,6 +422,7 @@ struct rt_rq {
422 */ 422 */
423struct root_domain { 423struct root_domain {
424 atomic_t refcount; 424 atomic_t refcount;
425 atomic_t rto_count;
425 struct rcu_head rcu; 426 struct rcu_head rcu;
426 cpumask_var_t span; 427 cpumask_var_t span;
427 cpumask_var_t online; 428 cpumask_var_t online;
@@ -431,7 +432,6 @@ struct root_domain {
431 * one runnable RT task. 432 * one runnable RT task.
432 */ 433 */
433 cpumask_var_t rto_mask; 434 cpumask_var_t rto_mask;
434 atomic_t rto_count;
435 struct cpupri cpupri; 435 struct cpupri cpupri;
436}; 436};
437 437
@@ -1568,38 +1568,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1568 return rq->avg_load_per_task; 1568 return rq->avg_load_per_task;
1569} 1569}
1570 1570
1571#ifdef CONFIG_FAIR_GROUP_SCHED
1572
1573/*
1574 * Compute the cpu's hierarchical load factor for each task group.
1575 * This needs to be done in a top-down fashion because the load of a child
1576 * group is a fraction of its parents load.
1577 */
1578static int tg_load_down(struct task_group *tg, void *data)
1579{
1580 unsigned long load;
1581 long cpu = (long)data;
1582
1583 if (!tg->parent) {
1584 load = cpu_rq(cpu)->load.weight;
1585 } else {
1586 load = tg->parent->cfs_rq[cpu]->h_load;
1587 load *= tg->se[cpu]->load.weight;
1588 load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
1589 }
1590
1591 tg->cfs_rq[cpu]->h_load = load;
1592
1593 return 0;
1594}
1595
1596static void update_h_load(long cpu)
1597{
1598 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1599}
1600
1601#endif
1602
1603#ifdef CONFIG_PREEMPT 1571#ifdef CONFIG_PREEMPT
1604 1572
1605static void double_rq_lock(struct rq *rq1, struct rq *rq2); 1573static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -2497,7 +2465,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
2497 if (p->sched_class->task_woken) 2465 if (p->sched_class->task_woken)
2498 p->sched_class->task_woken(rq, p); 2466 p->sched_class->task_woken(rq, p);
2499 2467
2500 if (unlikely(rq->idle_stamp)) { 2468 if (rq->idle_stamp) {
2501 u64 delta = rq->clock - rq->idle_stamp; 2469 u64 delta = rq->clock - rq->idle_stamp;
2502 u64 max = 2*sysctl_sched_migration_cost; 2470 u64 max = 2*sysctl_sched_migration_cost;
2503 2471
@@ -2886,7 +2854,7 @@ void sched_fork(struct task_struct *p)
2886#if defined(CONFIG_SMP) 2854#if defined(CONFIG_SMP)
2887 p->on_cpu = 0; 2855 p->on_cpu = 0;
2888#endif 2856#endif
2889#ifdef CONFIG_PREEMPT 2857#ifdef CONFIG_PREEMPT_COUNT
2890 /* Want to start with kernel preemption disabled. */ 2858 /* Want to start with kernel preemption disabled. */
2891 task_thread_info(p)->preempt_count = 1; 2859 task_thread_info(p)->preempt_count = 1;
2892#endif 2860#endif
@@ -4338,11 +4306,8 @@ EXPORT_SYMBOL(schedule);
4338 4306
4339static inline bool owner_running(struct mutex *lock, struct task_struct *owner) 4307static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
4340{ 4308{
4341 bool ret = false;
4342
4343 rcu_read_lock();
4344 if (lock->owner != owner) 4309 if (lock->owner != owner)
4345 goto fail; 4310 return false;
4346 4311
4347 /* 4312 /*
4348 * Ensure we emit the owner->on_cpu, dereference _after_ checking 4313 * Ensure we emit the owner->on_cpu, dereference _after_ checking
@@ -4352,11 +4317,7 @@ static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
4352 */ 4317 */
4353 barrier(); 4318 barrier();
4354 4319
4355 ret = owner->on_cpu; 4320 return owner->on_cpu;
4356fail:
4357 rcu_read_unlock();
4358
4359 return ret;
4360} 4321}
4361 4322
4362/* 4323/*
@@ -4368,21 +4329,21 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
4368 if (!sched_feat(OWNER_SPIN)) 4329 if (!sched_feat(OWNER_SPIN))
4369 return 0; 4330 return 0;
4370 4331
4332 rcu_read_lock();
4371 while (owner_running(lock, owner)) { 4333 while (owner_running(lock, owner)) {
4372 if (need_resched()) 4334 if (need_resched())
4373 return 0; 4335 break;
4374 4336
4375 arch_mutex_cpu_relax(); 4337 arch_mutex_cpu_relax();
4376 } 4338 }
4339 rcu_read_unlock();
4377 4340
4378 /* 4341 /*
4379 * If the owner changed to another task there is likely 4342 * We break out the loop above on need_resched() and when the
4380 * heavy contention, stop spinning. 4343 * owner changed, which is a sign for heavy contention. Return
4344 * success only when lock->owner is NULL.
4381 */ 4345 */
4382 if (lock->owner) 4346 return lock->owner == NULL;
4383 return 0;
4384
4385 return 1;
4386} 4347}
4387#endif 4348#endif
4388 4349
@@ -7898,17 +7859,10 @@ int in_sched_functions(unsigned long addr)
7898 && addr < (unsigned long)__sched_text_end); 7859 && addr < (unsigned long)__sched_text_end);
7899} 7860}
7900 7861
7901static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) 7862static void init_cfs_rq(struct cfs_rq *cfs_rq)
7902{ 7863{
7903 cfs_rq->tasks_timeline = RB_ROOT; 7864 cfs_rq->tasks_timeline = RB_ROOT;
7904 INIT_LIST_HEAD(&cfs_rq->tasks); 7865 INIT_LIST_HEAD(&cfs_rq->tasks);
7905#ifdef CONFIG_FAIR_GROUP_SCHED
7906 cfs_rq->rq = rq;
7907 /* allow initial update_cfs_load() to truncate */
7908#ifdef CONFIG_SMP
7909 cfs_rq->load_stamp = 1;
7910#endif
7911#endif
7912 cfs_rq->min_vruntime = (u64)(-(1LL << 20)); 7866 cfs_rq->min_vruntime = (u64)(-(1LL << 20));
7913#ifndef CONFIG_64BIT 7867#ifndef CONFIG_64BIT
7914 cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; 7868 cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
@@ -7928,13 +7882,9 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7928 /* delimiter for bitsearch: */ 7882 /* delimiter for bitsearch: */
7929 __set_bit(MAX_RT_PRIO, array->bitmap); 7883 __set_bit(MAX_RT_PRIO, array->bitmap);
7930 7884
7931#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 7885#if defined CONFIG_SMP
7932 rt_rq->highest_prio.curr = MAX_RT_PRIO; 7886 rt_rq->highest_prio.curr = MAX_RT_PRIO;
7933#ifdef CONFIG_SMP
7934 rt_rq->highest_prio.next = MAX_RT_PRIO; 7887 rt_rq->highest_prio.next = MAX_RT_PRIO;
7935#endif
7936#endif
7937#ifdef CONFIG_SMP
7938 rt_rq->rt_nr_migratory = 0; 7888 rt_rq->rt_nr_migratory = 0;
7939 rt_rq->overloaded = 0; 7889 rt_rq->overloaded = 0;
7940 plist_head_init(&rt_rq->pushable_tasks); 7890 plist_head_init(&rt_rq->pushable_tasks);
@@ -7944,11 +7894,6 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7944 rt_rq->rt_throttled = 0; 7894 rt_rq->rt_throttled = 0;
7945 rt_rq->rt_runtime = 0; 7895 rt_rq->rt_runtime = 0;
7946 raw_spin_lock_init(&rt_rq->rt_runtime_lock); 7896 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
7947
7948#ifdef CONFIG_RT_GROUP_SCHED
7949 rt_rq->rt_nr_boosted = 0;
7950 rt_rq->rq = rq;
7951#endif
7952} 7897}
7953 7898
7954#ifdef CONFIG_FAIR_GROUP_SCHED 7899#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -7957,11 +7902,17 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
7957 struct sched_entity *parent) 7902 struct sched_entity *parent)
7958{ 7903{
7959 struct rq *rq = cpu_rq(cpu); 7904 struct rq *rq = cpu_rq(cpu);
7960 tg->cfs_rq[cpu] = cfs_rq; 7905
7961 init_cfs_rq(cfs_rq, rq);
7962 cfs_rq->tg = tg; 7906 cfs_rq->tg = tg;
7907 cfs_rq->rq = rq;
7908#ifdef CONFIG_SMP
7909 /* allow initial update_cfs_load() to truncate */
7910 cfs_rq->load_stamp = 1;
7911#endif
7963 7912
7913 tg->cfs_rq[cpu] = cfs_rq;
7964 tg->se[cpu] = se; 7914 tg->se[cpu] = se;
7915
7965 /* se could be NULL for root_task_group */ 7916 /* se could be NULL for root_task_group */
7966 if (!se) 7917 if (!se)
7967 return; 7918 return;
@@ -7984,12 +7935,14 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
7984{ 7935{
7985 struct rq *rq = cpu_rq(cpu); 7936 struct rq *rq = cpu_rq(cpu);
7986 7937
7987 tg->rt_rq[cpu] = rt_rq; 7938 rt_rq->highest_prio.curr = MAX_RT_PRIO;
7988 init_rt_rq(rt_rq, rq); 7939 rt_rq->rt_nr_boosted = 0;
7940 rt_rq->rq = rq;
7989 rt_rq->tg = tg; 7941 rt_rq->tg = tg;
7990 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
7991 7942
7943 tg->rt_rq[cpu] = rt_rq;
7992 tg->rt_se[cpu] = rt_se; 7944 tg->rt_se[cpu] = rt_se;
7945
7993 if (!rt_se) 7946 if (!rt_se)
7994 return; 7947 return;
7995 7948
@@ -8071,7 +8024,7 @@ void __init sched_init(void)
8071 rq->nr_running = 0; 8024 rq->nr_running = 0;
8072 rq->calc_load_active = 0; 8025 rq->calc_load_active = 0;
8073 rq->calc_load_update = jiffies + LOAD_FREQ; 8026 rq->calc_load_update = jiffies + LOAD_FREQ;
8074 init_cfs_rq(&rq->cfs, rq); 8027 init_cfs_rq(&rq->cfs);
8075 init_rt_rq(&rq->rt, rq); 8028 init_rt_rq(&rq->rt, rq);
8076#ifdef CONFIG_FAIR_GROUP_SCHED 8029#ifdef CONFIG_FAIR_GROUP_SCHED
8077 root_task_group.shares = root_task_group_load; 8030 root_task_group.shares = root_task_group_load;
@@ -8185,7 +8138,7 @@ void __init sched_init(void)
8185 scheduler_running = 1; 8138 scheduler_running = 1;
8186} 8139}
8187 8140
8188#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 8141#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
8189static inline int preempt_count_equals(int preempt_offset) 8142static inline int preempt_count_equals(int preempt_offset)
8190{ 8143{
8191 int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); 8144 int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
@@ -8195,7 +8148,6 @@ static inline int preempt_count_equals(int preempt_offset)
8195 8148
8196void __might_sleep(const char *file, int line, int preempt_offset) 8149void __might_sleep(const char *file, int line, int preempt_offset)
8197{ 8150{
8198#ifdef in_atomic
8199 static unsigned long prev_jiffy; /* ratelimiting */ 8151 static unsigned long prev_jiffy; /* ratelimiting */
8200 8152
8201 if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || 8153 if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
@@ -8217,7 +8169,6 @@ void __might_sleep(const char *file, int line, int preempt_offset)
8217 if (irqs_disabled()) 8169 if (irqs_disabled())
8218 print_irqtrace_events(current); 8170 print_irqtrace_events(current);
8219 dump_stack(); 8171 dump_stack();
8220#endif
8221} 8172}
8222EXPORT_SYMBOL(__might_sleep); 8173EXPORT_SYMBOL(__might_sleep);
8223#endif 8174#endif
@@ -8376,6 +8327,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8376 if (!se) 8327 if (!se)
8377 goto err_free_rq; 8328 goto err_free_rq;
8378 8329
8330 init_cfs_rq(cfs_rq);
8379 init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); 8331 init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
8380 } 8332 }
8381 8333
@@ -8403,7 +8355,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8403 list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); 8355 list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
8404 raw_spin_unlock_irqrestore(&rq->lock, flags); 8356 raw_spin_unlock_irqrestore(&rq->lock, flags);
8405} 8357}
8406#else /* !CONFG_FAIR_GROUP_SCHED */ 8358#else /* !CONFIG_FAIR_GROUP_SCHED */
8407static inline void free_fair_sched_group(struct task_group *tg) 8359static inline void free_fair_sched_group(struct task_group *tg)
8408{ 8360{
8409} 8361}
@@ -8424,7 +8376,8 @@ static void free_rt_sched_group(struct task_group *tg)
8424{ 8376{
8425 int i; 8377 int i;
8426 8378
8427 destroy_rt_bandwidth(&tg->rt_bandwidth); 8379 if (tg->rt_se)
8380 destroy_rt_bandwidth(&tg->rt_bandwidth);
8428 8381
8429 for_each_possible_cpu(i) { 8382 for_each_possible_cpu(i) {
8430 if (tg->rt_rq) 8383 if (tg->rt_rq)
@@ -8465,6 +8418,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8465 if (!rt_se) 8418 if (!rt_se)
8466 goto err_free_rq; 8419 goto err_free_rq;
8467 8420
8421 init_rt_rq(rt_rq, cpu_rq(i));
8422 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
8468 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); 8423 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
8469 } 8424 }
8470 8425
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 05577055cfca..c2f0e7248dca 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -13,6 +13,7 @@ struct autogroup {
13 int nice; 13 int nice;
14}; 14};
15 15
16static inline bool task_group_is_autogroup(struct task_group *tg);
16static inline struct task_group * 17static inline struct task_group *
17autogroup_task_group(struct task_struct *p, struct task_group *tg); 18autogroup_task_group(struct task_struct *p, struct task_group *tg);
18 19
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c768588e180b..bc8ee9993814 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -135,14 +135,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
135 return grp->my_q; 135 return grp->my_q;
136} 136}
137 137
138/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
139 * another cpu ('this_cpu')
140 */
141static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
142{
143 return cfs_rq->tg->cfs_rq[this_cpu];
144}
145
146static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) 138static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
147{ 139{
148 if (!cfs_rq->on_list) { 140 if (!cfs_rq->on_list) {
@@ -271,11 +263,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
271 return NULL; 263 return NULL;
272} 264}
273 265
274static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
275{
276 return &cpu_rq(this_cpu)->cfs;
277}
278
279static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) 266static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
280{ 267{
281} 268}
@@ -334,11 +321,6 @@ static inline int entity_before(struct sched_entity *a,
334 return (s64)(a->vruntime - b->vruntime) < 0; 321 return (s64)(a->vruntime - b->vruntime) < 0;
335} 322}
336 323
337static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
338{
339 return se->vruntime - cfs_rq->min_vruntime;
340}
341
342static void update_min_vruntime(struct cfs_rq *cfs_rq) 324static void update_min_vruntime(struct cfs_rq *cfs_rq)
343{ 325{
344 u64 vruntime = cfs_rq->min_vruntime; 326 u64 vruntime = cfs_rq->min_vruntime;
@@ -372,7 +354,6 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
372 struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; 354 struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
373 struct rb_node *parent = NULL; 355 struct rb_node *parent = NULL;
374 struct sched_entity *entry; 356 struct sched_entity *entry;
375 s64 key = entity_key(cfs_rq, se);
376 int leftmost = 1; 357 int leftmost = 1;
377 358
378 /* 359 /*
@@ -385,7 +366,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
385 * We dont care about collisions. Nodes with 366 * We dont care about collisions. Nodes with
386 * the same key stay together. 367 * the same key stay together.
387 */ 368 */
388 if (key < entity_key(cfs_rq, entry)) { 369 if (entity_before(se, entry)) {
389 link = &parent->rb_left; 370 link = &parent->rb_left;
390 } else { 371 } else {
391 link = &parent->rb_right; 372 link = &parent->rb_right;
@@ -1336,7 +1317,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1336 } 1317 }
1337 1318
1338 for_each_sched_entity(se) { 1319 for_each_sched_entity(se) {
1339 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1320 cfs_rq = cfs_rq_of(se);
1340 1321
1341 update_cfs_load(cfs_rq, 0); 1322 update_cfs_load(cfs_rq, 0);
1342 update_cfs_shares(cfs_rq); 1323 update_cfs_shares(cfs_rq);
@@ -1370,13 +1351,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1370 */ 1351 */
1371 if (task_sleep && parent_entity(se)) 1352 if (task_sleep && parent_entity(se))
1372 set_next_buddy(parent_entity(se)); 1353 set_next_buddy(parent_entity(se));
1354
1355 /* avoid re-evaluating load for this entity */
1356 se = parent_entity(se);
1373 break; 1357 break;
1374 } 1358 }
1375 flags |= DEQUEUE_SLEEP; 1359 flags |= DEQUEUE_SLEEP;
1376 } 1360 }
1377 1361
1378 for_each_sched_entity(se) { 1362 for_each_sched_entity(se) {
1379 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1363 cfs_rq = cfs_rq_of(se);
1380 1364
1381 update_cfs_load(cfs_rq, 0); 1365 update_cfs_load(cfs_rq, 0);
1382 update_cfs_shares(cfs_rq); 1366 update_cfs_shares(cfs_rq);
@@ -1481,7 +1465,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1481 * effect of the currently running task from the load 1465 * effect of the currently running task from the load
1482 * of the current CPU: 1466 * of the current CPU:
1483 */ 1467 */
1484 rcu_read_lock();
1485 if (sync) { 1468 if (sync) {
1486 tg = task_group(current); 1469 tg = task_group(current);
1487 weight = current->se.load.weight; 1470 weight = current->se.load.weight;
@@ -1517,7 +1500,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1517 balanced = this_eff_load <= prev_eff_load; 1500 balanced = this_eff_load <= prev_eff_load;
1518 } else 1501 } else
1519 balanced = true; 1502 balanced = true;
1520 rcu_read_unlock();
1521 1503
1522 /* 1504 /*
1523 * If the currently running task will sleep within 1505 * If the currently running task will sleep within
@@ -1921,8 +1903,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1921 if (!sched_feat(WAKEUP_PREEMPT)) 1903 if (!sched_feat(WAKEUP_PREEMPT))
1922 return; 1904 return;
1923 1905
1924 update_curr(cfs_rq);
1925 find_matching_se(&se, &pse); 1906 find_matching_se(&se, &pse);
1907 update_curr(cfs_rq_of(se));
1926 BUG_ON(!pse); 1908 BUG_ON(!pse);
1927 if (wakeup_preempt_entity(se, pse) == 1) { 1909 if (wakeup_preempt_entity(se, pse) == 1) {
1928 /* 1910 /*
@@ -2231,11 +2213,43 @@ static void update_shares(int cpu)
2231 struct rq *rq = cpu_rq(cpu); 2213 struct rq *rq = cpu_rq(cpu);
2232 2214
2233 rcu_read_lock(); 2215 rcu_read_lock();
2216 /*
2217 * Iterates the task_group tree in a bottom up fashion, see
2218 * list_add_leaf_cfs_rq() for details.
2219 */
2234 for_each_leaf_cfs_rq(rq, cfs_rq) 2220 for_each_leaf_cfs_rq(rq, cfs_rq)
2235 update_shares_cpu(cfs_rq->tg, cpu); 2221 update_shares_cpu(cfs_rq->tg, cpu);
2236 rcu_read_unlock(); 2222 rcu_read_unlock();
2237} 2223}
2238 2224
2225/*
2226 * Compute the cpu's hierarchical load factor for each task group.
2227 * This needs to be done in a top-down fashion because the load of a child
2228 * group is a fraction of its parents load.
2229 */
2230static int tg_load_down(struct task_group *tg, void *data)
2231{
2232 unsigned long load;
2233 long cpu = (long)data;
2234
2235 if (!tg->parent) {
2236 load = cpu_rq(cpu)->load.weight;
2237 } else {
2238 load = tg->parent->cfs_rq[cpu]->h_load;
2239 load *= tg->se[cpu]->load.weight;
2240 load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
2241 }
2242
2243 tg->cfs_rq[cpu]->h_load = load;
2244
2245 return 0;
2246}
2247
2248static void update_h_load(long cpu)
2249{
2250 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
2251}
2252
2239static unsigned long 2253static unsigned long
2240load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 2254load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2241 unsigned long max_load_move, 2255 unsigned long max_load_move,
@@ -2243,14 +2257,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
2243 int *all_pinned) 2257 int *all_pinned)
2244{ 2258{
2245 long rem_load_move = max_load_move; 2259 long rem_load_move = max_load_move;
2246 int busiest_cpu = cpu_of(busiest); 2260 struct cfs_rq *busiest_cfs_rq;
2247 struct task_group *tg;
2248 2261
2249 rcu_read_lock(); 2262 rcu_read_lock();
2250 update_h_load(busiest_cpu); 2263 update_h_load(cpu_of(busiest));
2251 2264
2252 list_for_each_entry_rcu(tg, &task_groups, list) { 2265 for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
2253 struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
2254 unsigned long busiest_h_load = busiest_cfs_rq->h_load; 2266 unsigned long busiest_h_load = busiest_cfs_rq->h_load;
2255 unsigned long busiest_weight = busiest_cfs_rq->load.weight; 2267 unsigned long busiest_weight = busiest_cfs_rq->load.weight;
2256 u64 rem_load, moved_load; 2268 u64 rem_load, moved_load;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 10d018212bab..97540f0c9e47 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -185,11 +185,23 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
185 185
186typedef struct task_group *rt_rq_iter_t; 186typedef struct task_group *rt_rq_iter_t;
187 187
188#define for_each_rt_rq(rt_rq, iter, rq) \ 188static inline struct task_group *next_task_group(struct task_group *tg)
189 for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \ 189{
190 (&iter->list != &task_groups) && \ 190 do {
191 (rt_rq = iter->rt_rq[cpu_of(rq)]); \ 191 tg = list_entry_rcu(tg->list.next,
192 iter = list_entry_rcu(iter->list.next, typeof(*iter), list)) 192 typeof(struct task_group), list);
193 } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
194
195 if (&tg->list == &task_groups)
196 tg = NULL;
197
198 return tg;
199}
200
201#define for_each_rt_rq(rt_rq, iter, rq) \
202 for (iter = container_of(&task_groups, typeof(*iter), list); \
203 (iter = next_task_group(iter)) && \
204 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
193 205
194static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) 206static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
195{ 207{
@@ -1126,7 +1138,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
1126 1138
1127 rt_rq = &rq->rt; 1139 rt_rq = &rq->rt;
1128 1140
1129 if (unlikely(!rt_rq->rt_nr_running)) 1141 if (!rt_rq->rt_nr_running)
1130 return NULL; 1142 return NULL;
1131 1143
1132 if (rt_rq_throttled(rt_rq)) 1144 if (rt_rq_throttled(rt_rq))
@@ -1548,7 +1560,7 @@ skip:
1548static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) 1560static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
1549{ 1561{
1550 /* Try to pull RT tasks here if we lower this rq's prio */ 1562 /* Try to pull RT tasks here if we lower this rq's prio */
1551 if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) 1563 if (rq->rt.highest_prio.curr > prev->prio)
1552 pull_rt_task(rq); 1564 pull_rt_task(rq);
1553} 1565}
1554 1566