diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 19:45:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 19:45:02 -0400 |
commit | bdc7ccfc0631797636837b10df7f87bc1e2e4ae3 (patch) | |
tree | 70f09f8ffee07486d41ca254b8abb05692713d1e /kernel | |
parent | 4d4abdcb1dee03a4f9d6d2021622ed07e14dfd17 (diff) | |
parent | 0f3171438fc917b9f6b8b60dbb7a3fff9a0f68fd (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (24 commits)
sched: Cleanup duplicate local variable in [enqueue|dequeue]_task_fair
sched: Replace use of entity_key()
sched: Separate group-scheduling code more clearly
sched: Reorder root_domain to remove 64 bit alignment padding
sched: Do not attempt to destroy uninitialized rt_bandwidth
sched: Remove unused function cpu_cfs_rq()
sched: Fix (harmless) typo 'CONFG_FAIR_GROUP_SCHED'
sched, cgroup: Optimize load_balance_fair()
sched: Don't update shares twice on on_rq parent
sched: update correct entity's runtime in check_preempt_wakeup()
xtensa: Use generic config PREEMPT definition
h8300: Use generic config PREEMPT definition
m32r: Use generic PREEMPT config
sched: Skip autogroup when looking for all rt sched groups
sched: Simplify mutex_spin_on_owner()
sched: Remove rcu_read_lock() from wake_affine()
sched: Generalize sleep inside spinlock detection
sched: Make sleeping inside spinlock detection working in !CONFIG_PREEMPT
sched: Isolate preempt counting in its own config option
sched: Remove pointless in_atomic() definition check
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Kconfig.preempt | 3 | ||||
-rw-r--r-- | kernel/sched.c | 117 | ||||
-rw-r--r-- | kernel/sched_autogroup.h | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 72 | ||||
-rw-r--r-- | kernel/sched_rt.c | 26 |
5 files changed, 101 insertions, 118 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf987b95b356..24e7cb0ba26a 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt | |||
@@ -35,6 +35,7 @@ config PREEMPT_VOLUNTARY | |||
35 | 35 | ||
36 | config PREEMPT | 36 | config PREEMPT |
37 | bool "Preemptible Kernel (Low-Latency Desktop)" | 37 | bool "Preemptible Kernel (Low-Latency Desktop)" |
38 | select PREEMPT_COUNT | ||
38 | help | 39 | help |
39 | This option reduces the latency of the kernel by making | 40 | This option reduces the latency of the kernel by making |
40 | all kernel code (that is not executing in a critical section) | 41 | all kernel code (that is not executing in a critical section) |
@@ -52,3 +53,5 @@ config PREEMPT | |||
52 | 53 | ||
53 | endchoice | 54 | endchoice |
54 | 55 | ||
56 | config PREEMPT_COUNT | ||
57 | bool \ No newline at end of file | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 84b9e076812e..9aaf567c5da5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -124,7 +124,7 @@ | |||
124 | 124 | ||
125 | static inline int rt_policy(int policy) | 125 | static inline int rt_policy(int policy) |
126 | { | 126 | { |
127 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 127 | if (policy == SCHED_FIFO || policy == SCHED_RR) |
128 | return 1; | 128 | return 1; |
129 | return 0; | 129 | return 0; |
130 | } | 130 | } |
@@ -422,6 +422,7 @@ struct rt_rq { | |||
422 | */ | 422 | */ |
423 | struct root_domain { | 423 | struct root_domain { |
424 | atomic_t refcount; | 424 | atomic_t refcount; |
425 | atomic_t rto_count; | ||
425 | struct rcu_head rcu; | 426 | struct rcu_head rcu; |
426 | cpumask_var_t span; | 427 | cpumask_var_t span; |
427 | cpumask_var_t online; | 428 | cpumask_var_t online; |
@@ -431,7 +432,6 @@ struct root_domain { | |||
431 | * one runnable RT task. | 432 | * one runnable RT task. |
432 | */ | 433 | */ |
433 | cpumask_var_t rto_mask; | 434 | cpumask_var_t rto_mask; |
434 | atomic_t rto_count; | ||
435 | struct cpupri cpupri; | 435 | struct cpupri cpupri; |
436 | }; | 436 | }; |
437 | 437 | ||
@@ -1568,38 +1568,6 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
1568 | return rq->avg_load_per_task; | 1568 | return rq->avg_load_per_task; |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1572 | |||
1573 | /* | ||
1574 | * Compute the cpu's hierarchical load factor for each task group. | ||
1575 | * This needs to be done in a top-down fashion because the load of a child | ||
1576 | * group is a fraction of its parents load. | ||
1577 | */ | ||
1578 | static int tg_load_down(struct task_group *tg, void *data) | ||
1579 | { | ||
1580 | unsigned long load; | ||
1581 | long cpu = (long)data; | ||
1582 | |||
1583 | if (!tg->parent) { | ||
1584 | load = cpu_rq(cpu)->load.weight; | ||
1585 | } else { | ||
1586 | load = tg->parent->cfs_rq[cpu]->h_load; | ||
1587 | load *= tg->se[cpu]->load.weight; | ||
1588 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | ||
1589 | } | ||
1590 | |||
1591 | tg->cfs_rq[cpu]->h_load = load; | ||
1592 | |||
1593 | return 0; | ||
1594 | } | ||
1595 | |||
1596 | static void update_h_load(long cpu) | ||
1597 | { | ||
1598 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | ||
1599 | } | ||
1600 | |||
1601 | #endif | ||
1602 | |||
1603 | #ifdef CONFIG_PREEMPT | 1571 | #ifdef CONFIG_PREEMPT |
1604 | 1572 | ||
1605 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | 1573 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); |
@@ -2497,7 +2465,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | |||
2497 | if (p->sched_class->task_woken) | 2465 | if (p->sched_class->task_woken) |
2498 | p->sched_class->task_woken(rq, p); | 2466 | p->sched_class->task_woken(rq, p); |
2499 | 2467 | ||
2500 | if (unlikely(rq->idle_stamp)) { | 2468 | if (rq->idle_stamp) { |
2501 | u64 delta = rq->clock - rq->idle_stamp; | 2469 | u64 delta = rq->clock - rq->idle_stamp; |
2502 | u64 max = 2*sysctl_sched_migration_cost; | 2470 | u64 max = 2*sysctl_sched_migration_cost; |
2503 | 2471 | ||
@@ -2886,7 +2854,7 @@ void sched_fork(struct task_struct *p) | |||
2886 | #if defined(CONFIG_SMP) | 2854 | #if defined(CONFIG_SMP) |
2887 | p->on_cpu = 0; | 2855 | p->on_cpu = 0; |
2888 | #endif | 2856 | #endif |
2889 | #ifdef CONFIG_PREEMPT | 2857 | #ifdef CONFIG_PREEMPT_COUNT |
2890 | /* Want to start with kernel preemption disabled. */ | 2858 | /* Want to start with kernel preemption disabled. */ |
2891 | task_thread_info(p)->preempt_count = 1; | 2859 | task_thread_info(p)->preempt_count = 1; |
2892 | #endif | 2860 | #endif |
@@ -4338,11 +4306,8 @@ EXPORT_SYMBOL(schedule); | |||
4338 | 4306 | ||
4339 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | 4307 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
4340 | { | 4308 | { |
4341 | bool ret = false; | ||
4342 | |||
4343 | rcu_read_lock(); | ||
4344 | if (lock->owner != owner) | 4309 | if (lock->owner != owner) |
4345 | goto fail; | 4310 | return false; |
4346 | 4311 | ||
4347 | /* | 4312 | /* |
4348 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | 4313 | * Ensure we emit the owner->on_cpu, dereference _after_ checking |
@@ -4352,11 +4317,7 @@ static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | |||
4352 | */ | 4317 | */ |
4353 | barrier(); | 4318 | barrier(); |
4354 | 4319 | ||
4355 | ret = owner->on_cpu; | 4320 | return owner->on_cpu; |
4356 | fail: | ||
4357 | rcu_read_unlock(); | ||
4358 | |||
4359 | return ret; | ||
4360 | } | 4321 | } |
4361 | 4322 | ||
4362 | /* | 4323 | /* |
@@ -4368,21 +4329,21 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | |||
4368 | if (!sched_feat(OWNER_SPIN)) | 4329 | if (!sched_feat(OWNER_SPIN)) |
4369 | return 0; | 4330 | return 0; |
4370 | 4331 | ||
4332 | rcu_read_lock(); | ||
4371 | while (owner_running(lock, owner)) { | 4333 | while (owner_running(lock, owner)) { |
4372 | if (need_resched()) | 4334 | if (need_resched()) |
4373 | return 0; | 4335 | break; |
4374 | 4336 | ||
4375 | arch_mutex_cpu_relax(); | 4337 | arch_mutex_cpu_relax(); |
4376 | } | 4338 | } |
4339 | rcu_read_unlock(); | ||
4377 | 4340 | ||
4378 | /* | 4341 | /* |
4379 | * If the owner changed to another task there is likely | 4342 | * We break out the loop above on need_resched() and when the |
4380 | * heavy contention, stop spinning. | 4343 | * owner changed, which is a sign for heavy contention. Return |
4344 | * success only when lock->owner is NULL. | ||
4381 | */ | 4345 | */ |
4382 | if (lock->owner) | 4346 | return lock->owner == NULL; |
4383 | return 0; | ||
4384 | |||
4385 | return 1; | ||
4386 | } | 4347 | } |
4387 | #endif | 4348 | #endif |
4388 | 4349 | ||
@@ -7898,17 +7859,10 @@ int in_sched_functions(unsigned long addr) | |||
7898 | && addr < (unsigned long)__sched_text_end); | 7859 | && addr < (unsigned long)__sched_text_end); |
7899 | } | 7860 | } |
7900 | 7861 | ||
7901 | static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | 7862 | static void init_cfs_rq(struct cfs_rq *cfs_rq) |
7902 | { | 7863 | { |
7903 | cfs_rq->tasks_timeline = RB_ROOT; | 7864 | cfs_rq->tasks_timeline = RB_ROOT; |
7904 | INIT_LIST_HEAD(&cfs_rq->tasks); | 7865 | INIT_LIST_HEAD(&cfs_rq->tasks); |
7905 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7906 | cfs_rq->rq = rq; | ||
7907 | /* allow initial update_cfs_load() to truncate */ | ||
7908 | #ifdef CONFIG_SMP | ||
7909 | cfs_rq->load_stamp = 1; | ||
7910 | #endif | ||
7911 | #endif | ||
7912 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 7866 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
7913 | #ifndef CONFIG_64BIT | 7867 | #ifndef CONFIG_64BIT |
7914 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | 7868 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; |
@@ -7928,13 +7882,9 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7928 | /* delimiter for bitsearch: */ | 7882 | /* delimiter for bitsearch: */ |
7929 | __set_bit(MAX_RT_PRIO, array->bitmap); | 7883 | __set_bit(MAX_RT_PRIO, array->bitmap); |
7930 | 7884 | ||
7931 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 7885 | #if defined CONFIG_SMP |
7932 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | 7886 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
7933 | #ifdef CONFIG_SMP | ||
7934 | rt_rq->highest_prio.next = MAX_RT_PRIO; | 7887 | rt_rq->highest_prio.next = MAX_RT_PRIO; |
7935 | #endif | ||
7936 | #endif | ||
7937 | #ifdef CONFIG_SMP | ||
7938 | rt_rq->rt_nr_migratory = 0; | 7888 | rt_rq->rt_nr_migratory = 0; |
7939 | rt_rq->overloaded = 0; | 7889 | rt_rq->overloaded = 0; |
7940 | plist_head_init(&rt_rq->pushable_tasks); | 7890 | plist_head_init(&rt_rq->pushable_tasks); |
@@ -7944,11 +7894,6 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7944 | rt_rq->rt_throttled = 0; | 7894 | rt_rq->rt_throttled = 0; |
7945 | rt_rq->rt_runtime = 0; | 7895 | rt_rq->rt_runtime = 0; |
7946 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); | 7896 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); |
7947 | |||
7948 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7949 | rt_rq->rt_nr_boosted = 0; | ||
7950 | rt_rq->rq = rq; | ||
7951 | #endif | ||
7952 | } | 7897 | } |
7953 | 7898 | ||
7954 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7899 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -7957,11 +7902,17 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
7957 | struct sched_entity *parent) | 7902 | struct sched_entity *parent) |
7958 | { | 7903 | { |
7959 | struct rq *rq = cpu_rq(cpu); | 7904 | struct rq *rq = cpu_rq(cpu); |
7960 | tg->cfs_rq[cpu] = cfs_rq; | 7905 | |
7961 | init_cfs_rq(cfs_rq, rq); | ||
7962 | cfs_rq->tg = tg; | 7906 | cfs_rq->tg = tg; |
7907 | cfs_rq->rq = rq; | ||
7908 | #ifdef CONFIG_SMP | ||
7909 | /* allow initial update_cfs_load() to truncate */ | ||
7910 | cfs_rq->load_stamp = 1; | ||
7911 | #endif | ||
7963 | 7912 | ||
7913 | tg->cfs_rq[cpu] = cfs_rq; | ||
7964 | tg->se[cpu] = se; | 7914 | tg->se[cpu] = se; |
7915 | |||
7965 | /* se could be NULL for root_task_group */ | 7916 | /* se could be NULL for root_task_group */ |
7966 | if (!se) | 7917 | if (!se) |
7967 | return; | 7918 | return; |
@@ -7984,12 +7935,14 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
7984 | { | 7935 | { |
7985 | struct rq *rq = cpu_rq(cpu); | 7936 | struct rq *rq = cpu_rq(cpu); |
7986 | 7937 | ||
7987 | tg->rt_rq[cpu] = rt_rq; | 7938 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
7988 | init_rt_rq(rt_rq, rq); | 7939 | rt_rq->rt_nr_boosted = 0; |
7940 | rt_rq->rq = rq; | ||
7989 | rt_rq->tg = tg; | 7941 | rt_rq->tg = tg; |
7990 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
7991 | 7942 | ||
7943 | tg->rt_rq[cpu] = rt_rq; | ||
7992 | tg->rt_se[cpu] = rt_se; | 7944 | tg->rt_se[cpu] = rt_se; |
7945 | |||
7993 | if (!rt_se) | 7946 | if (!rt_se) |
7994 | return; | 7947 | return; |
7995 | 7948 | ||
@@ -8071,7 +8024,7 @@ void __init sched_init(void) | |||
8071 | rq->nr_running = 0; | 8024 | rq->nr_running = 0; |
8072 | rq->calc_load_active = 0; | 8025 | rq->calc_load_active = 0; |
8073 | rq->calc_load_update = jiffies + LOAD_FREQ; | 8026 | rq->calc_load_update = jiffies + LOAD_FREQ; |
8074 | init_cfs_rq(&rq->cfs, rq); | 8027 | init_cfs_rq(&rq->cfs); |
8075 | init_rt_rq(&rq->rt, rq); | 8028 | init_rt_rq(&rq->rt, rq); |
8076 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8029 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8077 | root_task_group.shares = root_task_group_load; | 8030 | root_task_group.shares = root_task_group_load; |
@@ -8185,7 +8138,7 @@ void __init sched_init(void) | |||
8185 | scheduler_running = 1; | 8138 | scheduler_running = 1; |
8186 | } | 8139 | } |
8187 | 8140 | ||
8188 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 8141 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
8189 | static inline int preempt_count_equals(int preempt_offset) | 8142 | static inline int preempt_count_equals(int preempt_offset) |
8190 | { | 8143 | { |
8191 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); | 8144 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
@@ -8195,7 +8148,6 @@ static inline int preempt_count_equals(int preempt_offset) | |||
8195 | 8148 | ||
8196 | void __might_sleep(const char *file, int line, int preempt_offset) | 8149 | void __might_sleep(const char *file, int line, int preempt_offset) |
8197 | { | 8150 | { |
8198 | #ifdef in_atomic | ||
8199 | static unsigned long prev_jiffy; /* ratelimiting */ | 8151 | static unsigned long prev_jiffy; /* ratelimiting */ |
8200 | 8152 | ||
8201 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 8153 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || |
@@ -8217,7 +8169,6 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
8217 | if (irqs_disabled()) | 8169 | if (irqs_disabled()) |
8218 | print_irqtrace_events(current); | 8170 | print_irqtrace_events(current); |
8219 | dump_stack(); | 8171 | dump_stack(); |
8220 | #endif | ||
8221 | } | 8172 | } |
8222 | EXPORT_SYMBOL(__might_sleep); | 8173 | EXPORT_SYMBOL(__might_sleep); |
8223 | #endif | 8174 | #endif |
@@ -8376,6 +8327,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8376 | if (!se) | 8327 | if (!se) |
8377 | goto err_free_rq; | 8328 | goto err_free_rq; |
8378 | 8329 | ||
8330 | init_cfs_rq(cfs_rq); | ||
8379 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); | 8331 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
8380 | } | 8332 | } |
8381 | 8333 | ||
@@ -8403,7 +8355,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | |||
8403 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | 8355 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); |
8404 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 8356 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
8405 | } | 8357 | } |
8406 | #else /* !CONFG_FAIR_GROUP_SCHED */ | 8358 | #else /* !CONFIG_FAIR_GROUP_SCHED */ |
8407 | static inline void free_fair_sched_group(struct task_group *tg) | 8359 | static inline void free_fair_sched_group(struct task_group *tg) |
8408 | { | 8360 | { |
8409 | } | 8361 | } |
@@ -8424,7 +8376,8 @@ static void free_rt_sched_group(struct task_group *tg) | |||
8424 | { | 8376 | { |
8425 | int i; | 8377 | int i; |
8426 | 8378 | ||
8427 | destroy_rt_bandwidth(&tg->rt_bandwidth); | 8379 | if (tg->rt_se) |
8380 | destroy_rt_bandwidth(&tg->rt_bandwidth); | ||
8428 | 8381 | ||
8429 | for_each_possible_cpu(i) { | 8382 | for_each_possible_cpu(i) { |
8430 | if (tg->rt_rq) | 8383 | if (tg->rt_rq) |
@@ -8465,6 +8418,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8465 | if (!rt_se) | 8418 | if (!rt_se) |
8466 | goto err_free_rq; | 8419 | goto err_free_rq; |
8467 | 8420 | ||
8421 | init_rt_rq(rt_rq, cpu_rq(i)); | ||
8422 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
8468 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); | 8423 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); |
8469 | } | 8424 | } |
8470 | 8425 | ||
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 05577055cfca..c2f0e7248dca 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h | |||
@@ -13,6 +13,7 @@ struct autogroup { | |||
13 | int nice; | 13 | int nice; |
14 | }; | 14 | }; |
15 | 15 | ||
16 | static inline bool task_group_is_autogroup(struct task_group *tg); | ||
16 | static inline struct task_group * | 17 | static inline struct task_group * |
17 | autogroup_task_group(struct task_struct *p, struct task_group *tg); | 18 | autogroup_task_group(struct task_struct *p, struct task_group *tg); |
18 | 19 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c768588e180b..bc8ee9993814 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -135,14 +135,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) | |||
135 | return grp->my_q; | 135 | return grp->my_q; |
136 | } | 136 | } |
137 | 137 | ||
138 | /* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on | ||
139 | * another cpu ('this_cpu') | ||
140 | */ | ||
141 | static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | ||
142 | { | ||
143 | return cfs_rq->tg->cfs_rq[this_cpu]; | ||
144 | } | ||
145 | |||
146 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | 138 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) |
147 | { | 139 | { |
148 | if (!cfs_rq->on_list) { | 140 | if (!cfs_rq->on_list) { |
@@ -271,11 +263,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) | |||
271 | return NULL; | 263 | return NULL; |
272 | } | 264 | } |
273 | 265 | ||
274 | static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu) | ||
275 | { | ||
276 | return &cpu_rq(this_cpu)->cfs; | ||
277 | } | ||
278 | |||
279 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | 266 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) |
280 | { | 267 | { |
281 | } | 268 | } |
@@ -334,11 +321,6 @@ static inline int entity_before(struct sched_entity *a, | |||
334 | return (s64)(a->vruntime - b->vruntime) < 0; | 321 | return (s64)(a->vruntime - b->vruntime) < 0; |
335 | } | 322 | } |
336 | 323 | ||
337 | static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
338 | { | ||
339 | return se->vruntime - cfs_rq->min_vruntime; | ||
340 | } | ||
341 | |||
342 | static void update_min_vruntime(struct cfs_rq *cfs_rq) | 324 | static void update_min_vruntime(struct cfs_rq *cfs_rq) |
343 | { | 325 | { |
344 | u64 vruntime = cfs_rq->min_vruntime; | 326 | u64 vruntime = cfs_rq->min_vruntime; |
@@ -372,7 +354,6 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
372 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; | 354 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; |
373 | struct rb_node *parent = NULL; | 355 | struct rb_node *parent = NULL; |
374 | struct sched_entity *entry; | 356 | struct sched_entity *entry; |
375 | s64 key = entity_key(cfs_rq, se); | ||
376 | int leftmost = 1; | 357 | int leftmost = 1; |
377 | 358 | ||
378 | /* | 359 | /* |
@@ -385,7 +366,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
385 | * We dont care about collisions. Nodes with | 366 | * We dont care about collisions. Nodes with |
386 | * the same key stay together. | 367 | * the same key stay together. |
387 | */ | 368 | */ |
388 | if (key < entity_key(cfs_rq, entry)) { | 369 | if (entity_before(se, entry)) { |
389 | link = &parent->rb_left; | 370 | link = &parent->rb_left; |
390 | } else { | 371 | } else { |
391 | link = &parent->rb_right; | 372 | link = &parent->rb_right; |
@@ -1336,7 +1317,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1336 | } | 1317 | } |
1337 | 1318 | ||
1338 | for_each_sched_entity(se) { | 1319 | for_each_sched_entity(se) { |
1339 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1320 | cfs_rq = cfs_rq_of(se); |
1340 | 1321 | ||
1341 | update_cfs_load(cfs_rq, 0); | 1322 | update_cfs_load(cfs_rq, 0); |
1342 | update_cfs_shares(cfs_rq); | 1323 | update_cfs_shares(cfs_rq); |
@@ -1370,13 +1351,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1370 | */ | 1351 | */ |
1371 | if (task_sleep && parent_entity(se)) | 1352 | if (task_sleep && parent_entity(se)) |
1372 | set_next_buddy(parent_entity(se)); | 1353 | set_next_buddy(parent_entity(se)); |
1354 | |||
1355 | /* avoid re-evaluating load for this entity */ | ||
1356 | se = parent_entity(se); | ||
1373 | break; | 1357 | break; |
1374 | } | 1358 | } |
1375 | flags |= DEQUEUE_SLEEP; | 1359 | flags |= DEQUEUE_SLEEP; |
1376 | } | 1360 | } |
1377 | 1361 | ||
1378 | for_each_sched_entity(se) { | 1362 | for_each_sched_entity(se) { |
1379 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1363 | cfs_rq = cfs_rq_of(se); |
1380 | 1364 | ||
1381 | update_cfs_load(cfs_rq, 0); | 1365 | update_cfs_load(cfs_rq, 0); |
1382 | update_cfs_shares(cfs_rq); | 1366 | update_cfs_shares(cfs_rq); |
@@ -1481,7 +1465,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
1481 | * effect of the currently running task from the load | 1465 | * effect of the currently running task from the load |
1482 | * of the current CPU: | 1466 | * of the current CPU: |
1483 | */ | 1467 | */ |
1484 | rcu_read_lock(); | ||
1485 | if (sync) { | 1468 | if (sync) { |
1486 | tg = task_group(current); | 1469 | tg = task_group(current); |
1487 | weight = current->se.load.weight; | 1470 | weight = current->se.load.weight; |
@@ -1517,7 +1500,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
1517 | balanced = this_eff_load <= prev_eff_load; | 1500 | balanced = this_eff_load <= prev_eff_load; |
1518 | } else | 1501 | } else |
1519 | balanced = true; | 1502 | balanced = true; |
1520 | rcu_read_unlock(); | ||
1521 | 1503 | ||
1522 | /* | 1504 | /* |
1523 | * If the currently running task will sleep within | 1505 | * If the currently running task will sleep within |
@@ -1921,8 +1903,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1921 | if (!sched_feat(WAKEUP_PREEMPT)) | 1903 | if (!sched_feat(WAKEUP_PREEMPT)) |
1922 | return; | 1904 | return; |
1923 | 1905 | ||
1924 | update_curr(cfs_rq); | ||
1925 | find_matching_se(&se, &pse); | 1906 | find_matching_se(&se, &pse); |
1907 | update_curr(cfs_rq_of(se)); | ||
1926 | BUG_ON(!pse); | 1908 | BUG_ON(!pse); |
1927 | if (wakeup_preempt_entity(se, pse) == 1) { | 1909 | if (wakeup_preempt_entity(se, pse) == 1) { |
1928 | /* | 1910 | /* |
@@ -2231,11 +2213,43 @@ static void update_shares(int cpu) | |||
2231 | struct rq *rq = cpu_rq(cpu); | 2213 | struct rq *rq = cpu_rq(cpu); |
2232 | 2214 | ||
2233 | rcu_read_lock(); | 2215 | rcu_read_lock(); |
2216 | /* | ||
2217 | * Iterates the task_group tree in a bottom up fashion, see | ||
2218 | * list_add_leaf_cfs_rq() for details. | ||
2219 | */ | ||
2234 | for_each_leaf_cfs_rq(rq, cfs_rq) | 2220 | for_each_leaf_cfs_rq(rq, cfs_rq) |
2235 | update_shares_cpu(cfs_rq->tg, cpu); | 2221 | update_shares_cpu(cfs_rq->tg, cpu); |
2236 | rcu_read_unlock(); | 2222 | rcu_read_unlock(); |
2237 | } | 2223 | } |
2238 | 2224 | ||
2225 | /* | ||
2226 | * Compute the cpu's hierarchical load factor for each task group. | ||
2227 | * This needs to be done in a top-down fashion because the load of a child | ||
2228 | * group is a fraction of its parents load. | ||
2229 | */ | ||
2230 | static int tg_load_down(struct task_group *tg, void *data) | ||
2231 | { | ||
2232 | unsigned long load; | ||
2233 | long cpu = (long)data; | ||
2234 | |||
2235 | if (!tg->parent) { | ||
2236 | load = cpu_rq(cpu)->load.weight; | ||
2237 | } else { | ||
2238 | load = tg->parent->cfs_rq[cpu]->h_load; | ||
2239 | load *= tg->se[cpu]->load.weight; | ||
2240 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | ||
2241 | } | ||
2242 | |||
2243 | tg->cfs_rq[cpu]->h_load = load; | ||
2244 | |||
2245 | return 0; | ||
2246 | } | ||
2247 | |||
2248 | static void update_h_load(long cpu) | ||
2249 | { | ||
2250 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | ||
2251 | } | ||
2252 | |||
2239 | static unsigned long | 2253 | static unsigned long |
2240 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2254 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
2241 | unsigned long max_load_move, | 2255 | unsigned long max_load_move, |
@@ -2243,14 +2257,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2243 | int *all_pinned) | 2257 | int *all_pinned) |
2244 | { | 2258 | { |
2245 | long rem_load_move = max_load_move; | 2259 | long rem_load_move = max_load_move; |
2246 | int busiest_cpu = cpu_of(busiest); | 2260 | struct cfs_rq *busiest_cfs_rq; |
2247 | struct task_group *tg; | ||
2248 | 2261 | ||
2249 | rcu_read_lock(); | 2262 | rcu_read_lock(); |
2250 | update_h_load(busiest_cpu); | 2263 | update_h_load(cpu_of(busiest)); |
2251 | 2264 | ||
2252 | list_for_each_entry_rcu(tg, &task_groups, list) { | 2265 | for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) { |
2253 | struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu]; | ||
2254 | unsigned long busiest_h_load = busiest_cfs_rq->h_load; | 2266 | unsigned long busiest_h_load = busiest_cfs_rq->h_load; |
2255 | unsigned long busiest_weight = busiest_cfs_rq->load.weight; | 2267 | unsigned long busiest_weight = busiest_cfs_rq->load.weight; |
2256 | u64 rem_load, moved_load; | 2268 | u64 rem_load, moved_load; |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 10d018212bab..97540f0c9e47 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -185,11 +185,23 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) | |||
185 | 185 | ||
186 | typedef struct task_group *rt_rq_iter_t; | 186 | typedef struct task_group *rt_rq_iter_t; |
187 | 187 | ||
188 | #define for_each_rt_rq(rt_rq, iter, rq) \ | 188 | static inline struct task_group *next_task_group(struct task_group *tg) |
189 | for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \ | 189 | { |
190 | (&iter->list != &task_groups) && \ | 190 | do { |
191 | (rt_rq = iter->rt_rq[cpu_of(rq)]); \ | 191 | tg = list_entry_rcu(tg->list.next, |
192 | iter = list_entry_rcu(iter->list.next, typeof(*iter), list)) | 192 | typeof(struct task_group), list); |
193 | } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); | ||
194 | |||
195 | if (&tg->list == &task_groups) | ||
196 | tg = NULL; | ||
197 | |||
198 | return tg; | ||
199 | } | ||
200 | |||
201 | #define for_each_rt_rq(rt_rq, iter, rq) \ | ||
202 | for (iter = container_of(&task_groups, typeof(*iter), list); \ | ||
203 | (iter = next_task_group(iter)) && \ | ||
204 | (rt_rq = iter->rt_rq[cpu_of(rq)]);) | ||
193 | 205 | ||
194 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) | 206 | static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq) |
195 | { | 207 | { |
@@ -1126,7 +1138,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) | |||
1126 | 1138 | ||
1127 | rt_rq = &rq->rt; | 1139 | rt_rq = &rq->rt; |
1128 | 1140 | ||
1129 | if (unlikely(!rt_rq->rt_nr_running)) | 1141 | if (!rt_rq->rt_nr_running) |
1130 | return NULL; | 1142 | return NULL; |
1131 | 1143 | ||
1132 | if (rt_rq_throttled(rt_rq)) | 1144 | if (rt_rq_throttled(rt_rq)) |
@@ -1548,7 +1560,7 @@ skip: | |||
1548 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) | 1560 | static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) |
1549 | { | 1561 | { |
1550 | /* Try to pull RT tasks here if we lower this rq's prio */ | 1562 | /* Try to pull RT tasks here if we lower this rq's prio */ |
1551 | if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) | 1563 | if (rq->rt.highest_prio.curr > prev->prio) |
1552 | pull_rt_task(rq); | 1564 | pull_rt_task(rq); |
1553 | } | 1565 | } |
1554 | 1566 | ||