aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-11-15 18:47:01 -0500
committerIngo Molnar <mingo@elte.hu>2010-11-18 07:27:47 -0500
commit3d4b47b4b040c9d77dd68104cfc1055d89a55afd (patch)
treea4b39b5d7c89a319b81543c1b26778d6220e772b /kernel/sched.c
parent2069dd75c7d0f49355939e5586daf5a9ab216db7 (diff)
sched: Implement on-demand (active) cfs_rq list
Make certain load-balance actions scale per number of active cgroups instead of the number of existing cgroups. This makes wakeup/sleep paths more expensive, but is a win for systems where the vast majority of existing cgroups are idle. Signed-off-by: Paul Turner <pjt@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20101115234937.666535048@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c105
1 files changed, 28 insertions, 77 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index e2f1a3024a99..22436dd2e19f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -274,9 +274,7 @@ struct task_group {
274 274
275#define root_task_group init_task_group 275#define root_task_group init_task_group
276 276
277/* task_group_lock serializes add/remove of task groups and also changes to 277/* task_group_lock serializes the addition/removal of task groups */
278 * a task group's cpu shares.
279 */
280static DEFINE_SPINLOCK(task_group_lock); 278static DEFINE_SPINLOCK(task_group_lock);
281 279
282#ifdef CONFIG_FAIR_GROUP_SCHED 280#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -344,6 +342,7 @@ struct cfs_rq {
344 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This 342 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
345 * list is used during load balance. 343 * list is used during load balance.
346 */ 344 */
345 int on_list;
347 struct list_head leaf_cfs_rq_list; 346 struct list_head leaf_cfs_rq_list;
348 struct task_group *tg; /* group that "owns" this runqueue */ 347 struct task_group *tg; /* group that "owns" this runqueue */
349 348
@@ -1547,7 +1546,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1547 1546
1548#ifdef CONFIG_FAIR_GROUP_SCHED 1547#ifdef CONFIG_FAIR_GROUP_SCHED
1549 1548
1550static void update_cfs_load(struct cfs_rq *cfs_rq); 1549static void update_cfs_load(struct cfs_rq *cfs_rq, int lb);
1551static void update_cfs_shares(struct cfs_rq *cfs_rq); 1550static void update_cfs_shares(struct cfs_rq *cfs_rq);
1552 1551
1553/* 1552/*
@@ -1570,7 +1569,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1570 raw_spin_lock_irqsave(&rq->lock, flags); 1569 raw_spin_lock_irqsave(&rq->lock, flags);
1571 1570
1572 update_rq_clock(rq); 1571 update_rq_clock(rq);
1573 update_cfs_load(cfs_rq); 1572 update_cfs_load(cfs_rq, 1);
1574 1573
1575 load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); 1574 load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
1576 load_avg -= cfs_rq->load_contribution; 1575 load_avg -= cfs_rq->load_contribution;
@@ -7688,15 +7687,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7688 7687
7689#ifdef CONFIG_FAIR_GROUP_SCHED 7688#ifdef CONFIG_FAIR_GROUP_SCHED
7690static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, 7689static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
7691 struct sched_entity *se, int cpu, int add, 7690 struct sched_entity *se, int cpu,
7692 struct sched_entity *parent) 7691 struct sched_entity *parent)
7693{ 7692{
7694 struct rq *rq = cpu_rq(cpu); 7693 struct rq *rq = cpu_rq(cpu);
7695 tg->cfs_rq[cpu] = cfs_rq; 7694 tg->cfs_rq[cpu] = cfs_rq;
7696 init_cfs_rq(cfs_rq, rq); 7695 init_cfs_rq(cfs_rq, rq);
7697 cfs_rq->tg = tg; 7696 cfs_rq->tg = tg;
7698 if (add)
7699 list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
7700 7697
7701 tg->se[cpu] = se; 7698 tg->se[cpu] = se;
7702 /* se could be NULL for init_task_group */ 7699 /* se could be NULL for init_task_group */
@@ -7716,7 +7713,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
7716 7713
7717#ifdef CONFIG_RT_GROUP_SCHED 7714#ifdef CONFIG_RT_GROUP_SCHED
7718static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 7715static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
7719 struct sched_rt_entity *rt_se, int cpu, int add, 7716 struct sched_rt_entity *rt_se, int cpu,
7720 struct sched_rt_entity *parent) 7717 struct sched_rt_entity *parent)
7721{ 7718{
7722 struct rq *rq = cpu_rq(cpu); 7719 struct rq *rq = cpu_rq(cpu);
@@ -7725,8 +7722,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
7725 init_rt_rq(rt_rq, rq); 7722 init_rt_rq(rt_rq, rq);
7726 rt_rq->tg = tg; 7723 rt_rq->tg = tg;
7727 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 7724 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
7728 if (add)
7729 list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
7730 7725
7731 tg->rt_se[cpu] = rt_se; 7726 tg->rt_se[cpu] = rt_se;
7732 if (!rt_se) 7727 if (!rt_se)
@@ -7835,7 +7830,7 @@ void __init sched_init(void)
7835 * We achieve this by letting init_task_group's tasks sit 7830 * We achieve this by letting init_task_group's tasks sit
7836 * directly in rq->cfs (i.e init_task_group->se[] = NULL). 7831 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
7837 */ 7832 */
7838 init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL); 7833 init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
7839#endif 7834#endif
7840#endif /* CONFIG_FAIR_GROUP_SCHED */ 7835#endif /* CONFIG_FAIR_GROUP_SCHED */
7841 7836
@@ -7843,7 +7838,7 @@ void __init sched_init(void)
7843#ifdef CONFIG_RT_GROUP_SCHED 7838#ifdef CONFIG_RT_GROUP_SCHED
7844 INIT_LIST_HEAD(&rq->leaf_rt_rq_list); 7839 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
7845#ifdef CONFIG_CGROUP_SCHED 7840#ifdef CONFIG_CGROUP_SCHED
7846 init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL); 7841 init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
7847#endif 7842#endif
7848#endif 7843#endif
7849 7844
@@ -8119,7 +8114,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8119 if (!se) 8114 if (!se)
8120 goto err_free_rq; 8115 goto err_free_rq;
8121 8116
8122 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); 8117 init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
8123 } 8118 }
8124 8119
8125 return 1; 8120 return 1;
@@ -8130,15 +8125,22 @@ err:
8130 return 0; 8125 return 0;
8131} 8126}
8132 8127
8133static inline void register_fair_sched_group(struct task_group *tg, int cpu)
8134{
8135 list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
8136 &cpu_rq(cpu)->leaf_cfs_rq_list);
8137}
8138
8139static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) 8128static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8140{ 8129{
8141 list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); 8130 struct rq *rq = cpu_rq(cpu);
8131 unsigned long flags;
8132 int i;
8133
8134 /*
8135 * Only empty task groups can be destroyed; so we can speculatively
8136 * check on_list without danger of it being re-added.
8137 */
8138 if (!tg->cfs_rq[cpu]->on_list)
8139 return;
8140
8141 raw_spin_lock_irqsave(&rq->lock, flags);
8142 list_del_leaf_cfs_rq(tg->cfs_rq[i]);
8143 raw_spin_unlock_irqrestore(&rq->lock, flags);
8142} 8144}
8143#else /* !CONFG_FAIR_GROUP_SCHED */ 8145#else /* !CONFG_FAIR_GROUP_SCHED */
8144static inline void free_fair_sched_group(struct task_group *tg) 8146static inline void free_fair_sched_group(struct task_group *tg)
@@ -8151,10 +8153,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8151 return 1; 8153 return 1;
8152} 8154}
8153 8155
8154static inline void register_fair_sched_group(struct task_group *tg, int cpu)
8155{
8156}
8157
8158static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) 8156static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8159{ 8157{
8160} 8158}
@@ -8209,7 +8207,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8209 if (!rt_se) 8207 if (!rt_se)
8210 goto err_free_rq; 8208 goto err_free_rq;
8211 8209
8212 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); 8210 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
8213 } 8211 }
8214 8212
8215 return 1; 8213 return 1;
@@ -8219,17 +8217,6 @@ err_free_rq:
8219err: 8217err:
8220 return 0; 8218 return 0;
8221} 8219}
8222
8223static inline void register_rt_sched_group(struct task_group *tg, int cpu)
8224{
8225 list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
8226 &cpu_rq(cpu)->leaf_rt_rq_list);
8227}
8228
8229static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8230{
8231 list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
8232}
8233#else /* !CONFIG_RT_GROUP_SCHED */ 8220#else /* !CONFIG_RT_GROUP_SCHED */
8234static inline void free_rt_sched_group(struct task_group *tg) 8221static inline void free_rt_sched_group(struct task_group *tg)
8235{ 8222{
@@ -8240,14 +8227,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8240{ 8227{
8241 return 1; 8228 return 1;
8242} 8229}
8243
8244static inline void register_rt_sched_group(struct task_group *tg, int cpu)
8245{
8246}
8247
8248static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8249{
8250}
8251#endif /* CONFIG_RT_GROUP_SCHED */ 8230#endif /* CONFIG_RT_GROUP_SCHED */
8252 8231
8253#ifdef CONFIG_CGROUP_SCHED 8232#ifdef CONFIG_CGROUP_SCHED
@@ -8263,7 +8242,6 @@ struct task_group *sched_create_group(struct task_group *parent)
8263{ 8242{
8264 struct task_group *tg; 8243 struct task_group *tg;
8265 unsigned long flags; 8244 unsigned long flags;
8266 int i;
8267 8245
8268 tg = kzalloc(sizeof(*tg), GFP_KERNEL); 8246 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
8269 if (!tg) 8247 if (!tg)
@@ -8276,10 +8254,6 @@ struct task_group *sched_create_group(struct task_group *parent)
8276 goto err; 8254 goto err;
8277 8255
8278 spin_lock_irqsave(&task_group_lock, flags); 8256 spin_lock_irqsave(&task_group_lock, flags);
8279 for_each_possible_cpu(i) {
8280 register_fair_sched_group(tg, i);
8281 register_rt_sched_group(tg, i);
8282 }
8283 list_add_rcu(&tg->list, &task_groups); 8257 list_add_rcu(&tg->list, &task_groups);
8284 8258
8285 WARN_ON(!parent); /* root should already exist */ 8259 WARN_ON(!parent); /* root should already exist */
@@ -8309,11 +8283,11 @@ void sched_destroy_group(struct task_group *tg)
8309 unsigned long flags; 8283 unsigned long flags;
8310 int i; 8284 int i;
8311 8285
8312 spin_lock_irqsave(&task_group_lock, flags); 8286 /* end participation in shares distribution */
8313 for_each_possible_cpu(i) { 8287 for_each_possible_cpu(i)
8314 unregister_fair_sched_group(tg, i); 8288 unregister_fair_sched_group(tg, i);
8315 unregister_rt_sched_group(tg, i); 8289
8316 } 8290 spin_lock_irqsave(&task_group_lock, flags);
8317 list_del_rcu(&tg->list); 8291 list_del_rcu(&tg->list);
8318 list_del_rcu(&tg->siblings); 8292 list_del_rcu(&tg->siblings);
8319 spin_unlock_irqrestore(&task_group_lock, flags); 8293 spin_unlock_irqrestore(&task_group_lock, flags);
@@ -8391,7 +8365,6 @@ static DEFINE_MUTEX(shares_mutex);
8391int sched_group_set_shares(struct task_group *tg, unsigned long shares) 8365int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8392{ 8366{
8393 int i; 8367 int i;
8394 unsigned long flags;
8395 8368
8396 /* 8369 /*
8397 * We can't change the weight of the root cgroup. 8370 * We can't change the weight of the root cgroup.
@@ -8408,19 +8381,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8408 if (tg->shares == shares) 8381 if (tg->shares == shares)
8409 goto done; 8382 goto done;
8410 8383
8411 spin_lock_irqsave(&task_group_lock, flags);
8412 for_each_possible_cpu(i)
8413 unregister_fair_sched_group(tg, i);
8414 list_del_rcu(&tg->siblings);
8415 spin_unlock_irqrestore(&task_group_lock, flags);
8416
8417 /* wait for any ongoing reference to this group to finish */
8418 synchronize_sched();
8419
8420 /*
8421 * Now we are free to modify the group's share on each cpu
8422 * w/o tripping rebalance_share or load_balance_fair.
8423 */
8424 tg->shares = shares; 8384 tg->shares = shares;
8425 for_each_possible_cpu(i) { 8385 for_each_possible_cpu(i) {
8426 /* 8386 /*
@@ -8429,15 +8389,6 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8429 set_se_shares(tg->se[i], shares); 8389 set_se_shares(tg->se[i], shares);
8430 } 8390 }
8431 8391
8432 /*
8433 * Enable load balance activity on this group, by inserting it back on
8434 * each cpu's rq->leaf_cfs_rq_list.
8435 */
8436 spin_lock_irqsave(&task_group_lock, flags);
8437 for_each_possible_cpu(i)
8438 register_fair_sched_group(tg, i);
8439 list_add_rcu(&tg->siblings, &tg->parent->children);
8440 spin_unlock_irqrestore(&task_group_lock, flags);
8441done: 8392done:
8442 mutex_unlock(&shares_mutex); 8393 mutex_unlock(&shares_mutex);
8443 return 0; 8394 return 0;