aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-01-25 15:08:30 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:30 -0500
commit6f505b16425a51270058e4a93441fe64de3dd435 (patch)
treebe21e711d93bc4d088b97c4a4f585a5044dbaa7d /kernel/sched.c
parentfa85ae2418e6843953107cd6a06f645752829bc0 (diff)
sched: rt group scheduling
Extend group scheduling to also cover the realtime classes. It uses the time limiting introduced by the previous patch to allow multiple realtime groups. The hard time limit is required to keep behaviour deterministic. The algorithms used make the realtime scheduler O(tg), linear scaling wrt the number of task groups. This is the worst case behaviour I can't seem to get out of, the avg. case of the algorithms can be improved, I focused on correctness and worst case. [ akpm@linux-foundation.org: move side-effects out of BUG_ON(). ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c283
1 files changed, 200 insertions, 83 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index e9a7beee9b79..5ea2c533b432 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -161,6 +161,8 @@ struct rt_prio_array {
161 161
162struct cfs_rq; 162struct cfs_rq;
163 163
164static LIST_HEAD(task_groups);
165
164/* task group related information */ 166/* task group related information */
165struct task_group { 167struct task_group {
166#ifdef CONFIG_FAIR_CGROUP_SCHED 168#ifdef CONFIG_FAIR_CGROUP_SCHED
@@ -171,6 +173,11 @@ struct task_group {
171 /* runqueue "owned" by this group on each cpu */ 173 /* runqueue "owned" by this group on each cpu */
172 struct cfs_rq **cfs_rq; 174 struct cfs_rq **cfs_rq;
173 175
176 struct sched_rt_entity **rt_se;
177 struct rt_rq **rt_rq;
178
179 unsigned int rt_ratio;
180
174 /* 181 /*
175 * shares assigned to a task group governs how much of cpu bandwidth 182 * shares assigned to a task group governs how much of cpu bandwidth
176 * is allocated to the group. The more shares a group has, the more is 183 * is allocated to the group. The more shares a group has, the more is
@@ -208,6 +215,7 @@ struct task_group {
208 unsigned long shares; 215 unsigned long shares;
209 216
210 struct rcu_head rcu; 217 struct rcu_head rcu;
218 struct list_head list;
211}; 219};
212 220
213/* Default task group's sched entity on each cpu */ 221/* Default task group's sched entity on each cpu */
@@ -215,9 +223,15 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
215/* Default task group's cfs_rq on each cpu */ 223/* Default task group's cfs_rq on each cpu */
216static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; 224static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
217 225
226static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
227static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
228
218static struct sched_entity *init_sched_entity_p[NR_CPUS]; 229static struct sched_entity *init_sched_entity_p[NR_CPUS];
219static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; 230static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
220 231
232static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
233static struct rt_rq *init_rt_rq_p[NR_CPUS];
234
221/* task_group_mutex serializes add/remove of task groups and also changes to 235/* task_group_mutex serializes add/remove of task groups and also changes to
222 * a task group's cpu shares. 236 * a task group's cpu shares.
223 */ 237 */
@@ -240,6 +254,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares);
240struct task_group init_task_group = { 254struct task_group init_task_group = {
241 .se = init_sched_entity_p, 255 .se = init_sched_entity_p,
242 .cfs_rq = init_cfs_rq_p, 256 .cfs_rq = init_cfs_rq_p,
257
258 .rt_se = init_sched_rt_entity_p,
259 .rt_rq = init_rt_rq_p,
243}; 260};
244 261
245#ifdef CONFIG_FAIR_USER_SCHED 262#ifdef CONFIG_FAIR_USER_SCHED
@@ -269,10 +286,13 @@ static inline struct task_group *task_group(struct task_struct *p)
269} 286}
270 287
271/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 288/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
272static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) 289static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
273{ 290{
274 p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; 291 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
275 p->se.parent = task_group(p)->se[cpu]; 292 p->se.parent = task_group(p)->se[cpu];
293
294 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
295 p->rt.parent = task_group(p)->rt_se[cpu];
276} 296}
277 297
278static inline void lock_task_group_list(void) 298static inline void lock_task_group_list(void)
@@ -297,7 +317,7 @@ static inline void unlock_doms_cur(void)
297 317
298#else 318#else
299 319
300static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { } 320static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
301static inline void lock_task_group_list(void) { } 321static inline void lock_task_group_list(void) { }
302static inline void unlock_task_group_list(void) { } 322static inline void unlock_task_group_list(void) { }
303static inline void lock_doms_cur(void) { } 323static inline void lock_doms_cur(void) { }
@@ -343,13 +363,22 @@ struct cfs_rq {
343struct rt_rq { 363struct rt_rq {
344 struct rt_prio_array active; 364 struct rt_prio_array active;
345 unsigned long rt_nr_running; 365 unsigned long rt_nr_running;
366#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
367 int highest_prio; /* highest queued rt task prio */
368#endif
346#ifdef CONFIG_SMP 369#ifdef CONFIG_SMP
347 unsigned long rt_nr_migratory; 370 unsigned long rt_nr_migratory;
348 int highest_prio; /* highest queued rt task prio */
349 int overloaded; 371 int overloaded;
350#endif 372#endif
373 int rt_throttled;
351 u64 rt_time; 374 u64 rt_time;
352 u64 rt_throttled; 375
376#ifdef CONFIG_FAIR_GROUP_SCHED
377 struct rq *rq;
378 struct list_head leaf_rt_rq_list;
379 struct task_group *tg;
380 struct sched_rt_entity *rt_se;
381#endif
353}; 382};
354 383
355#ifdef CONFIG_SMP 384#ifdef CONFIG_SMP
@@ -411,12 +440,14 @@ struct rq {
411 u64 nr_switches; 440 u64 nr_switches;
412 441
413 struct cfs_rq cfs; 442 struct cfs_rq cfs;
443 struct rt_rq rt;
444 u64 rt_period_expire;
445
414#ifdef CONFIG_FAIR_GROUP_SCHED 446#ifdef CONFIG_FAIR_GROUP_SCHED
415 /* list of leaf cfs_rq on this cpu: */ 447 /* list of leaf cfs_rq on this cpu: */
416 struct list_head leaf_cfs_rq_list; 448 struct list_head leaf_cfs_rq_list;
449 struct list_head leaf_rt_rq_list;
417#endif 450#endif
418 struct rt_rq rt;
419 u64 rt_period_expire;
420 451
421 /* 452 /*
422 * This is part of a global counter where only the total sum 453 * This is part of a global counter where only the total sum
@@ -613,9 +644,9 @@ const_debug unsigned int sysctl_sched_rt_period = 1000;
613 644
614/* 645/*
615 * ratio of time -rt tasks may consume. 646 * ratio of time -rt tasks may consume.
616 * default: 100% 647 * default: 95%
617 */ 648 */
618const_debug unsigned int sysctl_sched_rt_ratio = SCHED_RT_FRAC; 649const_debug unsigned int sysctl_sched_rt_ratio = 62259;
619 650
620/* 651/*
621 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 652 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -1337,7 +1368,7 @@ unsigned long weighted_cpuload(const int cpu)
1337 1368
1338static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) 1369static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1339{ 1370{
1340 set_task_cfs_rq(p, cpu); 1371 set_task_rq(p, cpu);
1341#ifdef CONFIG_SMP 1372#ifdef CONFIG_SMP
1342 /* 1373 /*
1343 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be 1374 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
@@ -5281,7 +5312,7 @@ int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
5281 p->sched_class->set_cpus_allowed(p, &new_mask); 5312 p->sched_class->set_cpus_allowed(p, &new_mask);
5282 else { 5313 else {
5283 p->cpus_allowed = new_mask; 5314 p->cpus_allowed = new_mask;
5284 p->nr_cpus_allowed = cpus_weight(new_mask); 5315 p->rt.nr_cpus_allowed = cpus_weight(new_mask);
5285 } 5316 }
5286 5317
5287 /* Can the task run on the task's current CPU? If so, we're done */ 5318 /* Can the task run on the task's current CPU? If so, we're done */
@@ -7079,8 +7110,50 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7079 7110
7080 rt_rq->rt_time = 0; 7111 rt_rq->rt_time = 0;
7081 rt_rq->rt_throttled = 0; 7112 rt_rq->rt_throttled = 0;
7113
7114#ifdef CONFIG_FAIR_GROUP_SCHED
7115 rt_rq->rq = rq;
7116#endif
7082} 7117}
7083 7118
7119#ifdef CONFIG_FAIR_GROUP_SCHED
7120static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
7121 struct cfs_rq *cfs_rq, struct sched_entity *se,
7122 int cpu, int add)
7123{
7124 tg->cfs_rq[cpu] = cfs_rq;
7125 init_cfs_rq(cfs_rq, rq);
7126 cfs_rq->tg = tg;
7127 if (add)
7128 list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
7129
7130 tg->se[cpu] = se;
7131 se->cfs_rq = &rq->cfs;
7132 se->my_q = cfs_rq;
7133 se->load.weight = tg->shares;
7134 se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
7135 se->parent = NULL;
7136}
7137
7138static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
7139 struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
7140 int cpu, int add)
7141{
7142 tg->rt_rq[cpu] = rt_rq;
7143 init_rt_rq(rt_rq, rq);
7144 rt_rq->tg = tg;
7145 rt_rq->rt_se = rt_se;
7146 if (add)
7147 list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
7148
7149 tg->rt_se[cpu] = rt_se;
7150 rt_se->rt_rq = &rq->rt;
7151 rt_se->my_q = rt_rq;
7152 rt_se->parent = NULL;
7153 INIT_LIST_HEAD(&rt_se->run_list);
7154}
7155#endif
7156
7084void __init sched_init(void) 7157void __init sched_init(void)
7085{ 7158{
7086 int highest_cpu = 0; 7159 int highest_cpu = 0;
@@ -7090,6 +7163,10 @@ void __init sched_init(void)
7090 init_defrootdomain(); 7163 init_defrootdomain();
7091#endif 7164#endif
7092 7165
7166#ifdef CONFIG_FAIR_GROUP_SCHED
7167 list_add(&init_task_group.list, &task_groups);
7168#endif
7169
7093 for_each_possible_cpu(i) { 7170 for_each_possible_cpu(i) {
7094 struct rq *rq; 7171 struct rq *rq;
7095 7172
@@ -7099,30 +7176,20 @@ void __init sched_init(void)
7099 rq->nr_running = 0; 7176 rq->nr_running = 0;
7100 rq->clock = 1; 7177 rq->clock = 1;
7101 init_cfs_rq(&rq->cfs, rq); 7178 init_cfs_rq(&rq->cfs, rq);
7179 init_rt_rq(&rq->rt, rq);
7102#ifdef CONFIG_FAIR_GROUP_SCHED 7180#ifdef CONFIG_FAIR_GROUP_SCHED
7103 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
7104 {
7105 struct cfs_rq *cfs_rq = &per_cpu(init_cfs_rq, i);
7106 struct sched_entity *se =
7107 &per_cpu(init_sched_entity, i);
7108
7109 init_cfs_rq_p[i] = cfs_rq;
7110 init_cfs_rq(cfs_rq, rq);
7111 cfs_rq->tg = &init_task_group;
7112 list_add(&cfs_rq->leaf_cfs_rq_list,
7113 &rq->leaf_cfs_rq_list);
7114
7115 init_sched_entity_p[i] = se;
7116 se->cfs_rq = &rq->cfs;
7117 se->my_q = cfs_rq;
7118 se->load.weight = init_task_group_load;
7119 se->load.inv_weight =
7120 div64_64(1ULL<<32, init_task_group_load);
7121 se->parent = NULL;
7122 }
7123 init_task_group.shares = init_task_group_load; 7181 init_task_group.shares = init_task_group_load;
7182 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
7183 init_tg_cfs_entry(rq, &init_task_group,
7184 &per_cpu(init_cfs_rq, i),
7185 &per_cpu(init_sched_entity, i), i, 1);
7186
7187 init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
7188 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
7189 init_tg_rt_entry(rq, &init_task_group,
7190 &per_cpu(init_rt_rq, i),
7191 &per_cpu(init_sched_rt_entity, i), i, 1);
7124#endif 7192#endif
7125 init_rt_rq(&rq->rt, rq);
7126 rq->rt_period_expire = 0; 7193 rq->rt_period_expire = 0;
7127 7194
7128 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 7195 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -7460,12 +7527,36 @@ static int load_balance_monitor(void *unused)
7460} 7527}
7461#endif /* CONFIG_SMP */ 7528#endif /* CONFIG_SMP */
7462 7529
7530static void free_sched_group(struct task_group *tg)
7531{
7532 int i;
7533
7534 for_each_possible_cpu(i) {
7535 if (tg->cfs_rq)
7536 kfree(tg->cfs_rq[i]);
7537 if (tg->se)
7538 kfree(tg->se[i]);
7539 if (tg->rt_rq)
7540 kfree(tg->rt_rq[i]);
7541 if (tg->rt_se)
7542 kfree(tg->rt_se[i]);
7543 }
7544
7545 kfree(tg->cfs_rq);
7546 kfree(tg->se);
7547 kfree(tg->rt_rq);
7548 kfree(tg->rt_se);
7549 kfree(tg);
7550}
7551
7463/* allocate runqueue etc for a new task group */ 7552/* allocate runqueue etc for a new task group */
7464struct task_group *sched_create_group(void) 7553struct task_group *sched_create_group(void)
7465{ 7554{
7466 struct task_group *tg; 7555 struct task_group *tg;
7467 struct cfs_rq *cfs_rq; 7556 struct cfs_rq *cfs_rq;
7468 struct sched_entity *se; 7557 struct sched_entity *se;
7558 struct rt_rq *rt_rq;
7559 struct sched_rt_entity *rt_se;
7469 struct rq *rq; 7560 struct rq *rq;
7470 int i; 7561 int i;
7471 7562
@@ -7479,100 +7570,89 @@ struct task_group *sched_create_group(void)
7479 tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); 7570 tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
7480 if (!tg->se) 7571 if (!tg->se)
7481 goto err; 7572 goto err;
7573 tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
7574 if (!tg->rt_rq)
7575 goto err;
7576 tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
7577 if (!tg->rt_se)
7578 goto err;
7579
7580 tg->shares = NICE_0_LOAD;
7581 tg->rt_ratio = 0; /* XXX */
7482 7582
7483 for_each_possible_cpu(i) { 7583 for_each_possible_cpu(i) {
7484 rq = cpu_rq(i); 7584 rq = cpu_rq(i);
7485 7585
7486 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, 7586 cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
7487 cpu_to_node(i)); 7587 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
7488 if (!cfs_rq) 7588 if (!cfs_rq)
7489 goto err; 7589 goto err;
7490 7590
7491 se = kmalloc_node(sizeof(struct sched_entity), GFP_KERNEL, 7591 se = kmalloc_node(sizeof(struct sched_entity),
7492 cpu_to_node(i)); 7592 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
7493 if (!se) 7593 if (!se)
7494 goto err; 7594 goto err;
7495 7595
7496 memset(cfs_rq, 0, sizeof(struct cfs_rq)); 7596 rt_rq = kmalloc_node(sizeof(struct rt_rq),
7497 memset(se, 0, sizeof(struct sched_entity)); 7597 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
7598 if (!rt_rq)
7599 goto err;
7498 7600
7499 tg->cfs_rq[i] = cfs_rq; 7601 rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
7500 init_cfs_rq(cfs_rq, rq); 7602 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
7501 cfs_rq->tg = tg; 7603 if (!rt_se)
7604 goto err;
7502 7605
7503 tg->se[i] = se; 7606 init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
7504 se->cfs_rq = &rq->cfs; 7607 init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
7505 se->my_q = cfs_rq;
7506 se->load.weight = NICE_0_LOAD;
7507 se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);
7508 se->parent = NULL;
7509 } 7608 }
7510 7609
7511 tg->shares = NICE_0_LOAD;
7512
7513 lock_task_group_list(); 7610 lock_task_group_list();
7514 for_each_possible_cpu(i) { 7611 for_each_possible_cpu(i) {
7515 rq = cpu_rq(i); 7612 rq = cpu_rq(i);
7516 cfs_rq = tg->cfs_rq[i]; 7613 cfs_rq = tg->cfs_rq[i];
7517 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); 7614 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
7615 rt_rq = tg->rt_rq[i];
7616 list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
7518 } 7617 }
7618 list_add_rcu(&tg->list, &task_groups);
7519 unlock_task_group_list(); 7619 unlock_task_group_list();
7520 7620
7521 return tg; 7621 return tg;
7522 7622
7523err: 7623err:
7524 for_each_possible_cpu(i) { 7624 free_sched_group(tg);
7525 if (tg->cfs_rq)
7526 kfree(tg->cfs_rq[i]);
7527 if (tg->se)
7528 kfree(tg->se[i]);
7529 }
7530 kfree(tg->cfs_rq);
7531 kfree(tg->se);
7532 kfree(tg);
7533
7534 return ERR_PTR(-ENOMEM); 7625 return ERR_PTR(-ENOMEM);
7535} 7626}
7536 7627
7537/* rcu callback to free various structures associated with a task group */ 7628/* rcu callback to free various structures associated with a task group */
7538static void free_sched_group(struct rcu_head *rhp) 7629static void free_sched_group_rcu(struct rcu_head *rhp)
7539{ 7630{
7540 struct task_group *tg = container_of(rhp, struct task_group, rcu);
7541 struct cfs_rq *cfs_rq;
7542 struct sched_entity *se;
7543 int i;
7544
7545 /* now it should be safe to free those cfs_rqs */ 7631 /* now it should be safe to free those cfs_rqs */
7546 for_each_possible_cpu(i) { 7632 free_sched_group(container_of(rhp, struct task_group, rcu));
7547 cfs_rq = tg->cfs_rq[i];
7548 kfree(cfs_rq);
7549
7550 se = tg->se[i];
7551 kfree(se);
7552 }
7553
7554 kfree(tg->cfs_rq);
7555 kfree(tg->se);
7556 kfree(tg);
7557} 7633}
7558 7634
7559/* Destroy runqueue etc associated with a task group */ 7635/* Destroy runqueue etc associated with a task group */
7560void sched_destroy_group(struct task_group *tg) 7636void sched_destroy_group(struct task_group *tg)
7561{ 7637{
7562 struct cfs_rq *cfs_rq = NULL; 7638 struct cfs_rq *cfs_rq = NULL;
7639 struct rt_rq *rt_rq = NULL;
7563 int i; 7640 int i;
7564 7641
7565 lock_task_group_list(); 7642 lock_task_group_list();
7566 for_each_possible_cpu(i) { 7643 for_each_possible_cpu(i) {
7567 cfs_rq = tg->cfs_rq[i]; 7644 cfs_rq = tg->cfs_rq[i];
7568 list_del_rcu(&cfs_rq->leaf_cfs_rq_list); 7645 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
7646 rt_rq = tg->rt_rq[i];
7647 list_del_rcu(&rt_rq->leaf_rt_rq_list);
7569 } 7648 }
7649 list_del_rcu(&tg->list);
7570 unlock_task_group_list(); 7650 unlock_task_group_list();
7571 7651
7572 BUG_ON(!cfs_rq); 7652 BUG_ON(!cfs_rq);
7573 7653
7574 /* wait for possible concurrent references to cfs_rqs complete */ 7654 /* wait for possible concurrent references to cfs_rqs complete */
7575 call_rcu(&tg->rcu, free_sched_group); 7655 call_rcu(&tg->rcu, free_sched_group_rcu);
7576} 7656}
7577 7657
7578/* change task's runqueue when it moves between groups. 7658/* change task's runqueue when it moves between groups.
@@ -7588,11 +7668,6 @@ void sched_move_task(struct task_struct *tsk)
7588 7668
7589 rq = task_rq_lock(tsk, &flags); 7669 rq = task_rq_lock(tsk, &flags);
7590 7670
7591 if (tsk->sched_class != &fair_sched_class) {
7592 set_task_cfs_rq(tsk, task_cpu(tsk));
7593 goto done;
7594 }
7595
7596 update_rq_clock(rq); 7671 update_rq_clock(rq);
7597 7672
7598 running = task_current(rq, tsk); 7673 running = task_current(rq, tsk);
@@ -7604,7 +7679,7 @@ void sched_move_task(struct task_struct *tsk)
7604 tsk->sched_class->put_prev_task(rq, tsk); 7679 tsk->sched_class->put_prev_task(rq, tsk);
7605 } 7680 }
7606 7681
7607 set_task_cfs_rq(tsk, task_cpu(tsk)); 7682 set_task_rq(tsk, task_cpu(tsk));
7608 7683
7609 if (on_rq) { 7684 if (on_rq) {
7610 if (unlikely(running)) 7685 if (unlikely(running))
@@ -7612,7 +7687,6 @@ void sched_move_task(struct task_struct *tsk)
7612 enqueue_task(rq, tsk, 0); 7687 enqueue_task(rq, tsk, 0);
7613 } 7688 }
7614 7689
7615done:
7616 task_rq_unlock(rq, &flags); 7690 task_rq_unlock(rq, &flags);
7617} 7691}
7618 7692
@@ -7697,6 +7771,31 @@ unsigned long sched_group_shares(struct task_group *tg)
7697 return tg->shares; 7771 return tg->shares;
7698} 7772}
7699 7773
7774/*
7775 * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
7776 */
7777int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
7778{
7779 struct task_group *tgi;
7780 unsigned long total = 0;
7781
7782 rcu_read_lock();
7783 list_for_each_entry_rcu(tgi, &task_groups, list)
7784 total += tgi->rt_ratio;
7785 rcu_read_unlock();
7786
7787 if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
7788 return -EINVAL;
7789
7790 tg->rt_ratio = rt_ratio;
7791 return 0;
7792}
7793
7794unsigned long sched_group_rt_ratio(struct task_group *tg)
7795{
7796 return tg->rt_ratio;
7797}
7798
7700#endif /* CONFIG_FAIR_GROUP_SCHED */ 7799#endif /* CONFIG_FAIR_GROUP_SCHED */
7701 7800
7702#ifdef CONFIG_FAIR_CGROUP_SCHED 7801#ifdef CONFIG_FAIR_CGROUP_SCHED
@@ -7772,12 +7871,30 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
7772 return (u64) tg->shares; 7871 return (u64) tg->shares;
7773} 7872}
7774 7873
7874static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
7875 u64 rt_ratio_val)
7876{
7877 return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
7878}
7879
7880static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
7881{
7882 struct task_group *tg = cgroup_tg(cgrp);
7883
7884 return (u64) tg->rt_ratio;
7885}
7886
7775static struct cftype cpu_files[] = { 7887static struct cftype cpu_files[] = {
7776 { 7888 {
7777 .name = "shares", 7889 .name = "shares",
7778 .read_uint = cpu_shares_read_uint, 7890 .read_uint = cpu_shares_read_uint,
7779 .write_uint = cpu_shares_write_uint, 7891 .write_uint = cpu_shares_write_uint,
7780 }, 7892 },
7893 {
7894 .name = "rt_ratio",
7895 .read_uint = cpu_rt_ratio_read_uint,
7896 .write_uint = cpu_rt_ratio_write_uint,
7897 },
7781}; 7898};
7782 7899
7783static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) 7900static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)