aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-13 11:22:41 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-13 11:22:41 -0500
commit3174ffaa939d8f771019f83761c668b1d5c1973b (patch)
treebcc73b265f225c33983d8935250f61b8ccadd51e /kernel
parentd7ab95f8c54aed896ad739f261f79ed945472aca (diff)
parentb68aa2300cabeb96801369a4bb37a4f19f59ed84 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched: sched: rt-group: refure unrunnable tasks sched: rt-group: clean up the ifdeffery sched: rt-group: make rt groups scheduling configurable sched: rt-group: interface sched: rt-group: deal with PI sched: fix incorrect irq lock usage in normalize_rt_tasks() sched: fair-group: separate tg->shares from task_group_lock hrtimer: more hrtimer_init_sleeper() fallout.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rtmutex.c5
-rw-r--r--kernel/sched.c494
-rw-r--r--kernel/sched_rt.c102
-rw-r--r--kernel/sysctl.c32
-rw-r--r--kernel/user.c50
5 files changed, 487 insertions, 196 deletions
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 0deef71ff8d..6522ae5b14a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -630,9 +630,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
630 set_current_state(state); 630 set_current_state(state);
631 631
632 /* Setup the timer, when timeout != NULL */ 632 /* Setup the timer, when timeout != NULL */
633 if (unlikely(timeout)) 633 if (unlikely(timeout)) {
634 hrtimer_start(&timeout->timer, timeout->timer.expires, 634 hrtimer_start(&timeout->timer, timeout->timer.expires,
635 HRTIMER_MODE_ABS); 635 HRTIMER_MODE_ABS);
636 if (!hrtimer_active(&timeout->timer))
637 timeout->task = NULL;
638 }
636 639
637 for (;;) { 640 for (;;) {
638 /* Try to acquire the lock: */ 641 /* Try to acquire the lock: */
diff --git a/kernel/sched.c b/kernel/sched.c
index 3eedd526090..f28f19e65b5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -155,7 +155,7 @@ struct rt_prio_array {
155 struct list_head queue[MAX_RT_PRIO]; 155 struct list_head queue[MAX_RT_PRIO];
156}; 156};
157 157
158#ifdef CONFIG_FAIR_GROUP_SCHED 158#ifdef CONFIG_GROUP_SCHED
159 159
160#include <linux/cgroup.h> 160#include <linux/cgroup.h>
161 161
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups);
165 165
166/* task group related information */ 166/* task group related information */
167struct task_group { 167struct task_group {
168#ifdef CONFIG_FAIR_CGROUP_SCHED 168#ifdef CONFIG_CGROUP_SCHED
169 struct cgroup_subsys_state css; 169 struct cgroup_subsys_state css;
170#endif 170#endif
171
172#ifdef CONFIG_FAIR_GROUP_SCHED
171 /* schedulable entities of this group on each cpu */ 173 /* schedulable entities of this group on each cpu */
172 struct sched_entity **se; 174 struct sched_entity **se;
173 /* runqueue "owned" by this group on each cpu */ 175 /* runqueue "owned" by this group on each cpu */
174 struct cfs_rq **cfs_rq; 176 struct cfs_rq **cfs_rq;
175 177
176 struct sched_rt_entity **rt_se;
177 struct rt_rq **rt_rq;
178
179 unsigned int rt_ratio;
180
181 /* 178 /*
182 * shares assigned to a task group governs how much of cpu bandwidth 179 * shares assigned to a task group governs how much of cpu bandwidth
183 * is allocated to the group. The more shares a group has, the more is 180 * is allocated to the group. The more shares a group has, the more is
@@ -213,33 +210,46 @@ struct task_group {
213 * 210 *
214 */ 211 */
215 unsigned long shares; 212 unsigned long shares;
213#endif
214
215#ifdef CONFIG_RT_GROUP_SCHED
216 struct sched_rt_entity **rt_se;
217 struct rt_rq **rt_rq;
218
219 u64 rt_runtime;
220#endif
216 221
217 struct rcu_head rcu; 222 struct rcu_head rcu;
218 struct list_head list; 223 struct list_head list;
219}; 224};
220 225
226#ifdef CONFIG_FAIR_GROUP_SCHED
221/* Default task group's sched entity on each cpu */ 227/* Default task group's sched entity on each cpu */
222static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); 228static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
223/* Default task group's cfs_rq on each cpu */ 229/* Default task group's cfs_rq on each cpu */
224static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; 230static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
225 231
226static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
227static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
228
229static struct sched_entity *init_sched_entity_p[NR_CPUS]; 232static struct sched_entity *init_sched_entity_p[NR_CPUS];
230static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; 233static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
234#endif
235
236#ifdef CONFIG_RT_GROUP_SCHED
237static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
238static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
231 239
232static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; 240static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
233static struct rt_rq *init_rt_rq_p[NR_CPUS]; 241static struct rt_rq *init_rt_rq_p[NR_CPUS];
242#endif
234 243
235/* task_group_mutex serializes add/remove of task groups and also changes to 244/* task_group_lock serializes add/remove of task groups and also changes to
236 * a task group's cpu shares. 245 * a task group's cpu shares.
237 */ 246 */
238static DEFINE_MUTEX(task_group_mutex); 247static DEFINE_SPINLOCK(task_group_lock);
239 248
240/* doms_cur_mutex serializes access to doms_cur[] array */ 249/* doms_cur_mutex serializes access to doms_cur[] array */
241static DEFINE_MUTEX(doms_cur_mutex); 250static DEFINE_MUTEX(doms_cur_mutex);
242 251
252#ifdef CONFIG_FAIR_GROUP_SCHED
243#ifdef CONFIG_SMP 253#ifdef CONFIG_SMP
244/* kernel thread that runs rebalance_shares() periodically */ 254/* kernel thread that runs rebalance_shares() periodically */
245static struct task_struct *lb_monitor_task; 255static struct task_struct *lb_monitor_task;
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused);
248 258
249static void set_se_shares(struct sched_entity *se, unsigned long shares); 259static void set_se_shares(struct sched_entity *se, unsigned long shares);
250 260
261#ifdef CONFIG_USER_SCHED
262# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
263#else
264# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
265#endif
266
267#define MIN_GROUP_SHARES 2
268
269static int init_task_group_load = INIT_TASK_GROUP_LOAD;
270#endif
271
251/* Default task group. 272/* Default task group.
252 * Every task in system belong to this group at bootup. 273 * Every task in system belong to this group at bootup.
253 */ 274 */
254struct task_group init_task_group = { 275struct task_group init_task_group = {
276#ifdef CONFIG_FAIR_GROUP_SCHED
255 .se = init_sched_entity_p, 277 .se = init_sched_entity_p,
256 .cfs_rq = init_cfs_rq_p, 278 .cfs_rq = init_cfs_rq_p,
279#endif
257 280
281#ifdef CONFIG_RT_GROUP_SCHED
258 .rt_se = init_sched_rt_entity_p, 282 .rt_se = init_sched_rt_entity_p,
259 .rt_rq = init_rt_rq_p, 283 .rt_rq = init_rt_rq_p,
260};
261
262#ifdef CONFIG_FAIR_USER_SCHED
263# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
264#else
265# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
266#endif 284#endif
267 285};
268#define MIN_GROUP_SHARES 2
269
270static int init_task_group_load = INIT_TASK_GROUP_LOAD;
271 286
272/* return group to which a task belongs */ 287/* return group to which a task belongs */
273static inline struct task_group *task_group(struct task_struct *p) 288static inline struct task_group *task_group(struct task_struct *p)
274{ 289{
275 struct task_group *tg; 290 struct task_group *tg;
276 291
277#ifdef CONFIG_FAIR_USER_SCHED 292#ifdef CONFIG_USER_SCHED
278 tg = p->user->tg; 293 tg = p->user->tg;
279#elif defined(CONFIG_FAIR_CGROUP_SCHED) 294#elif defined(CONFIG_CGROUP_SCHED)
280 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), 295 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
281 struct task_group, css); 296 struct task_group, css);
282#else 297#else
@@ -288,21 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p)
288/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 303/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
289static inline void set_task_rq(struct task_struct *p, unsigned int cpu) 304static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
290{ 305{
306#ifdef CONFIG_FAIR_GROUP_SCHED
291 p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; 307 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
292 p->se.parent = task_group(p)->se[cpu]; 308 p->se.parent = task_group(p)->se[cpu];
309#endif
293 310
311#ifdef CONFIG_RT_GROUP_SCHED
294 p->rt.rt_rq = task_group(p)->rt_rq[cpu]; 312 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
295 p->rt.parent = task_group(p)->rt_se[cpu]; 313 p->rt.parent = task_group(p)->rt_se[cpu];
296} 314#endif
297
298static inline void lock_task_group_list(void)
299{
300 mutex_lock(&task_group_mutex);
301}
302
303static inline void unlock_task_group_list(void)
304{
305 mutex_unlock(&task_group_mutex);
306} 315}
307 316
308static inline void lock_doms_cur(void) 317static inline void lock_doms_cur(void)
@@ -318,12 +327,10 @@ static inline void unlock_doms_cur(void)
318#else 327#else
319 328
320static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } 329static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
321static inline void lock_task_group_list(void) { }
322static inline void unlock_task_group_list(void) { }
323static inline void lock_doms_cur(void) { } 330static inline void lock_doms_cur(void) { }
324static inline void unlock_doms_cur(void) { } 331static inline void unlock_doms_cur(void) { }
325 332
326#endif /* CONFIG_FAIR_GROUP_SCHED */ 333#endif /* CONFIG_GROUP_SCHED */
327 334
328/* CFS-related fields in a runqueue */ 335/* CFS-related fields in a runqueue */
329struct cfs_rq { 336struct cfs_rq {
@@ -363,7 +370,7 @@ struct cfs_rq {
363struct rt_rq { 370struct rt_rq {
364 struct rt_prio_array active; 371 struct rt_prio_array active;
365 unsigned long rt_nr_running; 372 unsigned long rt_nr_running;
366#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED 373#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
367 int highest_prio; /* highest queued rt task prio */ 374 int highest_prio; /* highest queued rt task prio */
368#endif 375#endif
369#ifdef CONFIG_SMP 376#ifdef CONFIG_SMP
@@ -373,7 +380,9 @@ struct rt_rq {
373 int rt_throttled; 380 int rt_throttled;
374 u64 rt_time; 381 u64 rt_time;
375 382
376#ifdef CONFIG_FAIR_GROUP_SCHED 383#ifdef CONFIG_RT_GROUP_SCHED
384 unsigned long rt_nr_boosted;
385
377 struct rq *rq; 386 struct rq *rq;
378 struct list_head leaf_rt_rq_list; 387 struct list_head leaf_rt_rq_list;
379 struct task_group *tg; 388 struct task_group *tg;
@@ -447,6 +456,8 @@ struct rq {
447#ifdef CONFIG_FAIR_GROUP_SCHED 456#ifdef CONFIG_FAIR_GROUP_SCHED
448 /* list of leaf cfs_rq on this cpu: */ 457 /* list of leaf cfs_rq on this cpu: */
449 struct list_head leaf_cfs_rq_list; 458 struct list_head leaf_cfs_rq_list;
459#endif
460#ifdef CONFIG_RT_GROUP_SCHED
450 struct list_head leaf_rt_rq_list; 461 struct list_head leaf_rt_rq_list;
451#endif 462#endif
452 463
@@ -652,19 +663,21 @@ const_debug unsigned int sysctl_sched_features =
652const_debug unsigned int sysctl_sched_nr_migrate = 32; 663const_debug unsigned int sysctl_sched_nr_migrate = 32;
653 664
654/* 665/*
655 * period over which we measure -rt task cpu usage in ms. 666 * period over which we measure -rt task cpu usage in us.
656 * default: 1s 667 * default: 1s
657 */ 668 */
658const_debug unsigned int sysctl_sched_rt_period = 1000; 669unsigned int sysctl_sched_rt_period = 1000000;
659 670
660#define SCHED_RT_FRAC_SHIFT 16 671/*
661#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) 672 * part of the period that we allow rt tasks to run in us.
673 * default: 0.95s
674 */
675int sysctl_sched_rt_runtime = 950000;
662 676
663/* 677/*
664 * ratio of time -rt tasks may consume. 678 * single value that denotes runtime == period, ie unlimited time.
665 * default: 95%
666 */ 679 */
667const_debug unsigned int sysctl_sched_rt_ratio = 62259; 680#define RUNTIME_INF ((u64)~0ULL)
668 681
669/* 682/*
670 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 683 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
@@ -4571,6 +4584,15 @@ recheck:
4571 return -EPERM; 4584 return -EPERM;
4572 } 4585 }
4573 4586
4587#ifdef CONFIG_RT_GROUP_SCHED
4588 /*
4589 * Do not allow realtime tasks into groups that have no runtime
4590 * assigned.
4591 */
4592 if (rt_policy(policy) && task_group(p)->rt_runtime == 0)
4593 return -EPERM;
4594#endif
4595
4574 retval = security_task_setscheduler(p, policy, param); 4596 retval = security_task_setscheduler(p, policy, param);
4575 if (retval) 4597 if (retval)
4576 return retval; 4598 return retval;
@@ -7112,7 +7134,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7112 /* delimiter for bitsearch: */ 7134 /* delimiter for bitsearch: */
7113 __set_bit(MAX_RT_PRIO, array->bitmap); 7135 __set_bit(MAX_RT_PRIO, array->bitmap);
7114 7136
7115#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED 7137#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
7116 rt_rq->highest_prio = MAX_RT_PRIO; 7138 rt_rq->highest_prio = MAX_RT_PRIO;
7117#endif 7139#endif
7118#ifdef CONFIG_SMP 7140#ifdef CONFIG_SMP
@@ -7123,7 +7145,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7123 rt_rq->rt_time = 0; 7145 rt_rq->rt_time = 0;
7124 rt_rq->rt_throttled = 0; 7146 rt_rq->rt_throttled = 0;
7125 7147
7126#ifdef CONFIG_FAIR_GROUP_SCHED 7148#ifdef CONFIG_RT_GROUP_SCHED
7149 rt_rq->rt_nr_boosted = 0;
7127 rt_rq->rq = rq; 7150 rt_rq->rq = rq;
7128#endif 7151#endif
7129} 7152}
@@ -7146,7 +7169,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
7146 se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); 7169 se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
7147 se->parent = NULL; 7170 se->parent = NULL;
7148} 7171}
7172#endif
7149 7173
7174#ifdef CONFIG_RT_GROUP_SCHED
7150static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, 7175static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
7151 struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, 7176 struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
7152 int cpu, int add) 7177 int cpu, int add)
@@ -7175,7 +7200,7 @@ void __init sched_init(void)
7175 init_defrootdomain(); 7200 init_defrootdomain();
7176#endif 7201#endif
7177 7202
7178#ifdef CONFIG_FAIR_GROUP_SCHED 7203#ifdef CONFIG_GROUP_SCHED
7179 list_add(&init_task_group.list, &task_groups); 7204 list_add(&init_task_group.list, &task_groups);
7180#endif 7205#endif
7181 7206
@@ -7196,7 +7221,10 @@ void __init sched_init(void)
7196 &per_cpu(init_cfs_rq, i), 7221 &per_cpu(init_cfs_rq, i),
7197 &per_cpu(init_sched_entity, i), i, 1); 7222 &per_cpu(init_sched_entity, i), i, 1);
7198 7223
7199 init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */ 7224#endif
7225#ifdef CONFIG_RT_GROUP_SCHED
7226 init_task_group.rt_runtime =
7227 sysctl_sched_rt_runtime * NSEC_PER_USEC;
7200 INIT_LIST_HEAD(&rq->leaf_rt_rq_list); 7228 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
7201 init_tg_rt_entry(rq, &init_task_group, 7229 init_tg_rt_entry(rq, &init_task_group,
7202 &per_cpu(init_rt_rq, i), 7230 &per_cpu(init_rt_rq, i),
@@ -7303,7 +7331,7 @@ void normalize_rt_tasks(void)
7303 unsigned long flags; 7331 unsigned long flags;
7304 struct rq *rq; 7332 struct rq *rq;
7305 7333
7306 read_lock_irq(&tasklist_lock); 7334 read_lock_irqsave(&tasklist_lock, flags);
7307 do_each_thread(g, p) { 7335 do_each_thread(g, p) {
7308 /* 7336 /*
7309 * Only normalize user tasks: 7337 * Only normalize user tasks:
@@ -7329,16 +7357,16 @@ void normalize_rt_tasks(void)
7329 continue; 7357 continue;
7330 } 7358 }
7331 7359
7332 spin_lock_irqsave(&p->pi_lock, flags); 7360 spin_lock(&p->pi_lock);
7333 rq = __task_rq_lock(p); 7361 rq = __task_rq_lock(p);
7334 7362
7335 normalize_task(rq, p); 7363 normalize_task(rq, p);
7336 7364
7337 __task_rq_unlock(rq); 7365 __task_rq_unlock(rq);
7338 spin_unlock_irqrestore(&p->pi_lock, flags); 7366 spin_unlock(&p->pi_lock);
7339 } while_each_thread(g, p); 7367 } while_each_thread(g, p);
7340 7368
7341 read_unlock_irq(&tasklist_lock); 7369 read_unlock_irqrestore(&tasklist_lock, flags);
7342} 7370}
7343 7371
7344#endif /* CONFIG_MAGIC_SYSRQ */ 7372#endif /* CONFIG_MAGIC_SYSRQ */
@@ -7387,9 +7415,9 @@ void set_curr_task(int cpu, struct task_struct *p)
7387 7415
7388#endif 7416#endif
7389 7417
7390#ifdef CONFIG_FAIR_GROUP_SCHED 7418#ifdef CONFIG_GROUP_SCHED
7391 7419
7392#ifdef CONFIG_SMP 7420#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
7393/* 7421/*
7394 * distribute shares of all task groups among their schedulable entities, 7422 * distribute shares of all task groups among their schedulable entities,
7395 * to reflect load distribution across cpus. 7423 * to reflect load distribution across cpus.
@@ -7540,7 +7568,8 @@ static int load_balance_monitor(void *unused)
7540} 7568}
7541#endif /* CONFIG_SMP */ 7569#endif /* CONFIG_SMP */
7542 7570
7543static void free_sched_group(struct task_group *tg) 7571#ifdef CONFIG_FAIR_GROUP_SCHED
7572static void free_fair_sched_group(struct task_group *tg)
7544{ 7573{
7545 int i; 7574 int i;
7546 7575
@@ -7549,49 +7578,27 @@ static void free_sched_group(struct task_group *tg)
7549 kfree(tg->cfs_rq[i]); 7578 kfree(tg->cfs_rq[i]);
7550 if (tg->se) 7579 if (tg->se)
7551 kfree(tg->se[i]); 7580 kfree(tg->se[i]);
7552 if (tg->rt_rq)
7553 kfree(tg->rt_rq[i]);
7554 if (tg->rt_se)
7555 kfree(tg->rt_se[i]);
7556 } 7581 }
7557 7582
7558 kfree(tg->cfs_rq); 7583 kfree(tg->cfs_rq);
7559 kfree(tg->se); 7584 kfree(tg->se);
7560 kfree(tg->rt_rq);
7561 kfree(tg->rt_se);
7562 kfree(tg);
7563} 7585}
7564 7586
7565/* allocate runqueue etc for a new task group */ 7587static int alloc_fair_sched_group(struct task_group *tg)
7566struct task_group *sched_create_group(void)
7567{ 7588{
7568 struct task_group *tg;
7569 struct cfs_rq *cfs_rq; 7589 struct cfs_rq *cfs_rq;
7570 struct sched_entity *se; 7590 struct sched_entity *se;
7571 struct rt_rq *rt_rq;
7572 struct sched_rt_entity *rt_se;
7573 struct rq *rq; 7591 struct rq *rq;
7574 int i; 7592 int i;
7575 7593
7576 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
7577 if (!tg)
7578 return ERR_PTR(-ENOMEM);
7579
7580 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); 7594 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
7581 if (!tg->cfs_rq) 7595 if (!tg->cfs_rq)
7582 goto err; 7596 goto err;
7583 tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); 7597 tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
7584 if (!tg->se) 7598 if (!tg->se)
7585 goto err; 7599 goto err;
7586 tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
7587 if (!tg->rt_rq)
7588 goto err;
7589 tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
7590 if (!tg->rt_se)
7591 goto err;
7592 7600
7593 tg->shares = NICE_0_LOAD; 7601 tg->shares = NICE_0_LOAD;
7594 tg->rt_ratio = 0; /* XXX */
7595 7602
7596 for_each_possible_cpu(i) { 7603 for_each_possible_cpu(i) {
7597 rq = cpu_rq(i); 7604 rq = cpu_rq(i);
@@ -7606,6 +7613,79 @@ struct task_group *sched_create_group(void)
7606 if (!se) 7613 if (!se)
7607 goto err; 7614 goto err;
7608 7615
7616 init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
7617 }
7618
7619 return 1;
7620
7621 err:
7622 return 0;
7623}
7624
7625static inline void register_fair_sched_group(struct task_group *tg, int cpu)
7626{
7627 list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
7628 &cpu_rq(cpu)->leaf_cfs_rq_list);
7629}
7630
7631static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
7632{
7633 list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
7634}
7635#else
7636static inline void free_fair_sched_group(struct task_group *tg)
7637{
7638}
7639
7640static inline int alloc_fair_sched_group(struct task_group *tg)
7641{
7642 return 1;
7643}
7644
7645static inline void register_fair_sched_group(struct task_group *tg, int cpu)
7646{
7647}
7648
7649static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
7650{
7651}
7652#endif
7653
7654#ifdef CONFIG_RT_GROUP_SCHED
7655static void free_rt_sched_group(struct task_group *tg)
7656{
7657 int i;
7658
7659 for_each_possible_cpu(i) {
7660 if (tg->rt_rq)
7661 kfree(tg->rt_rq[i]);
7662 if (tg->rt_se)
7663 kfree(tg->rt_se[i]);
7664 }
7665
7666 kfree(tg->rt_rq);
7667 kfree(tg->rt_se);
7668}
7669
7670static int alloc_rt_sched_group(struct task_group *tg)
7671{
7672 struct rt_rq *rt_rq;
7673 struct sched_rt_entity *rt_se;
7674 struct rq *rq;
7675 int i;
7676
7677 tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
7678 if (!tg->rt_rq)
7679 goto err;
7680 tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
7681 if (!tg->rt_se)
7682 goto err;
7683
7684 tg->rt_runtime = 0;
7685
7686 for_each_possible_cpu(i) {
7687 rq = cpu_rq(i);
7688
7609 rt_rq = kmalloc_node(sizeof(struct rt_rq), 7689 rt_rq = kmalloc_node(sizeof(struct rt_rq),
7610 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 7690 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
7611 if (!rt_rq) 7691 if (!rt_rq)
@@ -7616,20 +7696,75 @@ struct task_group *sched_create_group(void)
7616 if (!rt_se) 7696 if (!rt_se)
7617 goto err; 7697 goto err;
7618 7698
7619 init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
7620 init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); 7699 init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
7621 } 7700 }
7622 7701
7623 lock_task_group_list(); 7702 return 1;
7703
7704 err:
7705 return 0;
7706}
7707
7708static inline void register_rt_sched_group(struct task_group *tg, int cpu)
7709{
7710 list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
7711 &cpu_rq(cpu)->leaf_rt_rq_list);
7712}
7713
7714static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
7715{
7716 list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
7717}
7718#else
7719static inline void free_rt_sched_group(struct task_group *tg)
7720{
7721}
7722
7723static inline int alloc_rt_sched_group(struct task_group *tg)
7724{
7725 return 1;
7726}
7727
7728static inline void register_rt_sched_group(struct task_group *tg, int cpu)
7729{
7730}
7731
7732static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
7733{
7734}
7735#endif
7736
7737static void free_sched_group(struct task_group *tg)
7738{
7739 free_fair_sched_group(tg);
7740 free_rt_sched_group(tg);
7741 kfree(tg);
7742}
7743
7744/* allocate runqueue etc for a new task group */
7745struct task_group *sched_create_group(void)
7746{
7747 struct task_group *tg;
7748 unsigned long flags;
7749 int i;
7750
7751 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
7752 if (!tg)
7753 return ERR_PTR(-ENOMEM);
7754
7755 if (!alloc_fair_sched_group(tg))
7756 goto err;
7757
7758 if (!alloc_rt_sched_group(tg))
7759 goto err;
7760
7761 spin_lock_irqsave(&task_group_lock, flags);
7624 for_each_possible_cpu(i) { 7762 for_each_possible_cpu(i) {
7625 rq = cpu_rq(i); 7763 register_fair_sched_group(tg, i);
7626 cfs_rq = tg->cfs_rq[i]; 7764 register_rt_sched_group(tg, i);
7627 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
7628 rt_rq = tg->rt_rq[i];
7629 list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
7630 } 7765 }
7631 list_add_rcu(&tg->list, &task_groups); 7766 list_add_rcu(&tg->list, &task_groups);
7632 unlock_task_group_list(); 7767 spin_unlock_irqrestore(&task_group_lock, flags);
7633 7768
7634 return tg; 7769 return tg;
7635 7770
@@ -7648,21 +7783,16 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
7648/* Destroy runqueue etc associated with a task group */ 7783/* Destroy runqueue etc associated with a task group */
7649void sched_destroy_group(struct task_group *tg) 7784void sched_destroy_group(struct task_group *tg)
7650{ 7785{
7651 struct cfs_rq *cfs_rq = NULL; 7786 unsigned long flags;
7652 struct rt_rq *rt_rq = NULL;
7653 int i; 7787 int i;
7654 7788
7655 lock_task_group_list(); 7789 spin_lock_irqsave(&task_group_lock, flags);
7656 for_each_possible_cpu(i) { 7790 for_each_possible_cpu(i) {
7657 cfs_rq = tg->cfs_rq[i]; 7791 unregister_fair_sched_group(tg, i);
7658 list_del_rcu(&cfs_rq->leaf_cfs_rq_list); 7792 unregister_rt_sched_group(tg, i);
7659 rt_rq = tg->rt_rq[i];
7660 list_del_rcu(&rt_rq->leaf_rt_rq_list);
7661 } 7793 }
7662 list_del_rcu(&tg->list); 7794 list_del_rcu(&tg->list);
7663 unlock_task_group_list(); 7795 spin_unlock_irqrestore(&task_group_lock, flags);
7664
7665 BUG_ON(!cfs_rq);
7666 7796
7667 /* wait for possible concurrent references to cfs_rqs complete */ 7797 /* wait for possible concurrent references to cfs_rqs complete */
7668 call_rcu(&tg->rcu, free_sched_group_rcu); 7798 call_rcu(&tg->rcu, free_sched_group_rcu);
@@ -7703,6 +7833,7 @@ void sched_move_task(struct task_struct *tsk)
7703 task_rq_unlock(rq, &flags); 7833 task_rq_unlock(rq, &flags);
7704} 7834}
7705 7835
7836#ifdef CONFIG_FAIR_GROUP_SCHED
7706/* rq->lock to be locked by caller */ 7837/* rq->lock to be locked by caller */
7707static void set_se_shares(struct sched_entity *se, unsigned long shares) 7838static void set_se_shares(struct sched_entity *se, unsigned long shares)
7708{ 7839{
@@ -7728,13 +7859,14 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
7728 } 7859 }
7729} 7860}
7730 7861
7862static DEFINE_MUTEX(shares_mutex);
7863
7731int sched_group_set_shares(struct task_group *tg, unsigned long shares) 7864int sched_group_set_shares(struct task_group *tg, unsigned long shares)
7732{ 7865{
7733 int i; 7866 int i;
7734 struct cfs_rq *cfs_rq; 7867 unsigned long flags;
7735 struct rq *rq;
7736 7868
7737 lock_task_group_list(); 7869 mutex_lock(&shares_mutex);
7738 if (tg->shares == shares) 7870 if (tg->shares == shares)
7739 goto done; 7871 goto done;
7740 7872
@@ -7746,10 +7878,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
7746 * load_balance_fair) from referring to this group first, 7878 * load_balance_fair) from referring to this group first,
7747 * by taking it off the rq->leaf_cfs_rq_list on each cpu. 7879 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
7748 */ 7880 */
7749 for_each_possible_cpu(i) { 7881 spin_lock_irqsave(&task_group_lock, flags);
7750 cfs_rq = tg->cfs_rq[i]; 7882 for_each_possible_cpu(i)
7751 list_del_rcu(&cfs_rq->leaf_cfs_rq_list); 7883 unregister_fair_sched_group(tg, i);
7752 } 7884 spin_unlock_irqrestore(&task_group_lock, flags);
7753 7885
7754 /* wait for any ongoing reference to this group to finish */ 7886 /* wait for any ongoing reference to this group to finish */
7755 synchronize_sched(); 7887 synchronize_sched();
@@ -7769,13 +7901,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
7769 * Enable load balance activity on this group, by inserting it back on 7901 * Enable load balance activity on this group, by inserting it back on
7770 * each cpu's rq->leaf_cfs_rq_list. 7902 * each cpu's rq->leaf_cfs_rq_list.
7771 */ 7903 */
7772 for_each_possible_cpu(i) { 7904 spin_lock_irqsave(&task_group_lock, flags);
7773 rq = cpu_rq(i); 7905 for_each_possible_cpu(i)
7774 cfs_rq = tg->cfs_rq[i]; 7906 register_fair_sched_group(tg, i);
7775 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); 7907 spin_unlock_irqrestore(&task_group_lock, flags);
7776 }
7777done: 7908done:
7778 unlock_task_group_list(); 7909 mutex_unlock(&shares_mutex);
7779 return 0; 7910 return 0;
7780} 7911}
7781 7912
@@ -7783,35 +7914,84 @@ unsigned long sched_group_shares(struct task_group *tg)
7783{ 7914{
7784 return tg->shares; 7915 return tg->shares;
7785} 7916}
7917#endif
7786 7918
7919#ifdef CONFIG_RT_GROUP_SCHED
7787/* 7920/*
7788 * Ensure the total rt_ratio <= sysctl_sched_rt_ratio 7921 * Ensure that the real time constraints are schedulable.
7789 */ 7922 */
7790int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio) 7923static DEFINE_MUTEX(rt_constraints_mutex);
7924
7925static unsigned long to_ratio(u64 period, u64 runtime)
7926{
7927 if (runtime == RUNTIME_INF)
7928 return 1ULL << 16;
7929
7930 runtime *= (1ULL << 16);
7931 div64_64(runtime, period);
7932 return runtime;
7933}
7934
7935static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
7791{ 7936{
7792 struct task_group *tgi; 7937 struct task_group *tgi;
7793 unsigned long total = 0; 7938 unsigned long total = 0;
7939 unsigned long global_ratio =
7940 to_ratio(sysctl_sched_rt_period,
7941 sysctl_sched_rt_runtime < 0 ?
7942 RUNTIME_INF : sysctl_sched_rt_runtime);
7794 7943
7795 rcu_read_lock(); 7944 rcu_read_lock();
7796 list_for_each_entry_rcu(tgi, &task_groups, list) 7945 list_for_each_entry_rcu(tgi, &task_groups, list) {
7797 total += tgi->rt_ratio; 7946 if (tgi == tg)
7798 rcu_read_unlock(); 7947 continue;
7799 7948
7800 if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio) 7949 total += to_ratio(period, tgi->rt_runtime);
7801 return -EINVAL; 7950 }
7951 rcu_read_unlock();
7802 7952
7803 tg->rt_ratio = rt_ratio; 7953 return total + to_ratio(period, runtime) < global_ratio;
7804 return 0;
7805} 7954}
7806 7955
7807unsigned long sched_group_rt_ratio(struct task_group *tg) 7956int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
7808{ 7957{
7809 return tg->rt_ratio; 7958 u64 rt_runtime, rt_period;
7959 int err = 0;
7960
7961 rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
7962 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
7963 if (rt_runtime_us == -1)
7964 rt_runtime = rt_period;
7965
7966 mutex_lock(&rt_constraints_mutex);
7967 if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
7968 err = -EINVAL;
7969 goto unlock;
7970 }
7971 if (rt_runtime_us == -1)
7972 rt_runtime = RUNTIME_INF;
7973 tg->rt_runtime = rt_runtime;
7974 unlock:
7975 mutex_unlock(&rt_constraints_mutex);
7976
7977 return err;
7810} 7978}
7811 7979
7812#endif /* CONFIG_FAIR_GROUP_SCHED */ 7980long sched_group_rt_runtime(struct task_group *tg)
7981{
7982 u64 rt_runtime_us;
7983
7984 if (tg->rt_runtime == RUNTIME_INF)
7985 return -1;
7986
7987 rt_runtime_us = tg->rt_runtime;
7988 do_div(rt_runtime_us, NSEC_PER_USEC);
7989 return rt_runtime_us;
7990}
7991#endif
7992#endif /* CONFIG_GROUP_SCHED */
7813 7993
7814#ifdef CONFIG_FAIR_CGROUP_SCHED 7994#ifdef CONFIG_CGROUP_SCHED
7815 7995
7816/* return corresponding task_group object of a cgroup */ 7996/* return corresponding task_group object of a cgroup */
7817static inline struct task_group *cgroup_tg(struct cgroup *cgrp) 7997static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
@@ -7857,9 +8037,15 @@ static int
7857cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 8037cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7858 struct task_struct *tsk) 8038 struct task_struct *tsk)
7859{ 8039{
8040#ifdef CONFIG_RT_GROUP_SCHED
8041 /* Don't accept realtime tasks when there is no way for them to run */
8042 if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0)
8043 return -EINVAL;
8044#else
7860 /* We don't support RT-tasks being in separate groups */ 8045 /* We don't support RT-tasks being in separate groups */
7861 if (tsk->sched_class != &fair_sched_class) 8046 if (tsk->sched_class != &fair_sched_class)
7862 return -EINVAL; 8047 return -EINVAL;
8048#endif
7863 8049
7864 return 0; 8050 return 0;
7865} 8051}
@@ -7871,6 +8057,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7871 sched_move_task(tsk); 8057 sched_move_task(tsk);
7872} 8058}
7873 8059
8060#ifdef CONFIG_FAIR_GROUP_SCHED
7874static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, 8061static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
7875 u64 shareval) 8062 u64 shareval)
7876{ 8063{
@@ -7883,31 +8070,70 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
7883 8070
7884 return (u64) tg->shares; 8071 return (u64) tg->shares;
7885} 8072}
8073#endif
7886 8074
7887static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype, 8075#ifdef CONFIG_RT_GROUP_SCHED
7888 u64 rt_ratio_val) 8076static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
8077 struct file *file,
8078 const char __user *userbuf,
8079 size_t nbytes, loff_t *unused_ppos)
7889{ 8080{
7890 return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val); 8081 char buffer[64];
8082 int retval = 0;
8083 s64 val;
8084 char *end;
8085
8086 if (!nbytes)
8087 return -EINVAL;
8088 if (nbytes >= sizeof(buffer))
8089 return -E2BIG;
8090 if (copy_from_user(buffer, userbuf, nbytes))
8091 return -EFAULT;
8092
8093 buffer[nbytes] = 0; /* nul-terminate */
8094
8095 /* strip newline if necessary */
8096 if (nbytes && (buffer[nbytes-1] == '\n'))
8097 buffer[nbytes-1] = 0;
8098 val = simple_strtoll(buffer, &end, 0);
8099 if (*end)
8100 return -EINVAL;
8101
8102 /* Pass to subsystem */
8103 retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
8104 if (!retval)
8105 retval = nbytes;
8106 return retval;
7891} 8107}
7892 8108
7893static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft) 8109static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
8110 struct file *file,
8111 char __user *buf, size_t nbytes,
8112 loff_t *ppos)
7894{ 8113{
7895 struct task_group *tg = cgroup_tg(cgrp); 8114 char tmp[64];
8115 long val = sched_group_rt_runtime(cgroup_tg(cgrp));
8116 int len = sprintf(tmp, "%ld\n", val);
7896 8117
7897 return (u64) tg->rt_ratio; 8118 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
7898} 8119}
8120#endif
7899 8121
7900static struct cftype cpu_files[] = { 8122static struct cftype cpu_files[] = {
8123#ifdef CONFIG_FAIR_GROUP_SCHED
7901 { 8124 {
7902 .name = "shares", 8125 .name = "shares",
7903 .read_uint = cpu_shares_read_uint, 8126 .read_uint = cpu_shares_read_uint,
7904 .write_uint = cpu_shares_write_uint, 8127 .write_uint = cpu_shares_write_uint,
7905 }, 8128 },
8129#endif
8130#ifdef CONFIG_RT_GROUP_SCHED
7906 { 8131 {
7907 .name = "rt_ratio", 8132 .name = "rt_runtime_us",
7908 .read_uint = cpu_rt_ratio_read_uint, 8133 .read = cpu_rt_runtime_read,
7909 .write_uint = cpu_rt_ratio_write_uint, 8134 .write = cpu_rt_runtime_write,
7910 }, 8135 },
8136#endif
7911}; 8137};
7912 8138
7913static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) 8139static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
@@ -7926,7 +8152,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7926 .early_init = 1, 8152 .early_init = 1,
7927}; 8153};
7928 8154
7929#endif /* CONFIG_FAIR_CGROUP_SCHED */ 8155#endif /* CONFIG_CGROUP_SCHED */
7930 8156
7931#ifdef CONFIG_CGROUP_CPUACCT 8157#ifdef CONFIG_CGROUP_CPUACCT
7932 8158
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 274b40d7bef..f54792b175b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -55,14 +55,14 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
55 return !list_empty(&rt_se->run_list); 55 return !list_empty(&rt_se->run_list);
56} 56}
57 57
58#ifdef CONFIG_FAIR_GROUP_SCHED 58#ifdef CONFIG_RT_GROUP_SCHED
59 59
60static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) 60static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
61{ 61{
62 if (!rt_rq->tg) 62 if (!rt_rq->tg)
63 return SCHED_RT_FRAC; 63 return RUNTIME_INF;
64 64
65 return rt_rq->tg->rt_ratio; 65 return rt_rq->tg->rt_runtime;
66} 66}
67 67
68#define for_each_leaf_rt_rq(rt_rq, rq) \ 68#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
89static void enqueue_rt_entity(struct sched_rt_entity *rt_se); 89static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
90static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 90static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
91 91
92static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) 92static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
93{ 93{
94 struct sched_rt_entity *rt_se = rt_rq->rt_se; 94 struct sched_rt_entity *rt_se = rt_rq->rt_se;
95 95
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
102 } 102 }
103} 103}
104 104
105static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) 105static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
106{ 106{
107 struct sched_rt_entity *rt_se = rt_rq->rt_se; 107 struct sched_rt_entity *rt_se = rt_rq->rt_se;
108 108
@@ -110,11 +110,31 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
110 dequeue_rt_entity(rt_se); 110 dequeue_rt_entity(rt_se);
111} 111}
112 112
113static inline int rt_rq_throttled(struct rt_rq *rt_rq)
114{
115 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
116}
117
118static int rt_se_boosted(struct sched_rt_entity *rt_se)
119{
120 struct rt_rq *rt_rq = group_rt_rq(rt_se);
121 struct task_struct *p;
122
123 if (rt_rq)
124 return !!rt_rq->rt_nr_boosted;
125
126 p = rt_task_of(rt_se);
127 return p->prio != p->normal_prio;
128}
129
113#else 130#else
114 131
115static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) 132static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
116{ 133{
117 return sysctl_sched_rt_ratio; 134 if (sysctl_sched_rt_runtime == -1)
135 return RUNTIME_INF;
136
137 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
118} 138}
119 139
120#define for_each_leaf_rt_rq(rt_rq, rq) \ 140#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -141,19 +161,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
141 return NULL; 161 return NULL;
142} 162}
143 163
144static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) 164static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
145{ 165{
146} 166}
147 167
148static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) 168static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
149{ 169{
150} 170}
151 171
172static inline int rt_rq_throttled(struct rt_rq *rt_rq)
173{
174 return rt_rq->rt_throttled;
175}
152#endif 176#endif
153 177
154static inline int rt_se_prio(struct sched_rt_entity *rt_se) 178static inline int rt_se_prio(struct sched_rt_entity *rt_se)
155{ 179{
156#ifdef CONFIG_FAIR_GROUP_SCHED 180#ifdef CONFIG_RT_GROUP_SCHED
157 struct rt_rq *rt_rq = group_rt_rq(rt_se); 181 struct rt_rq *rt_rq = group_rt_rq(rt_se);
158 182
159 if (rt_rq) 183 if (rt_rq)
@@ -163,28 +187,26 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
163 return rt_task_of(rt_se)->prio; 187 return rt_task_of(rt_se)->prio;
164} 188}
165 189
166static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) 190static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
167{ 191{
168 unsigned int rt_ratio = sched_rt_ratio(rt_rq); 192 u64 runtime = sched_rt_runtime(rt_rq);
169 u64 period, ratio;
170 193
171 if (rt_ratio == SCHED_RT_FRAC) 194 if (runtime == RUNTIME_INF)
172 return 0; 195 return 0;
173 196
174 if (rt_rq->rt_throttled) 197 if (rt_rq->rt_throttled)
175 return 1; 198 return rt_rq_throttled(rt_rq);
176
177 period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
178 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
179 199
180 if (rt_rq->rt_time > ratio) { 200 if (rt_rq->rt_time > runtime) {
181 struct rq *rq = rq_of_rt_rq(rt_rq); 201 struct rq *rq = rq_of_rt_rq(rt_rq);
182 202
183 rq->rt_throttled = 1; 203 rq->rt_throttled = 1;
184 rt_rq->rt_throttled = 1; 204 rt_rq->rt_throttled = 1;
185 205
186 sched_rt_ratio_dequeue(rt_rq); 206 if (rt_rq_throttled(rt_rq)) {
187 return 1; 207 sched_rt_rq_dequeue(rt_rq);
208 return 1;
209 }
188 } 210 }
189 211
190 return 0; 212 return 0;
@@ -196,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq)
196 u64 period; 218 u64 period;
197 219
198 while (rq->clock > rq->rt_period_expire) { 220 while (rq->clock > rq->rt_period_expire) {
199 period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; 221 period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
200 rq->rt_period_expire += period; 222 rq->rt_period_expire += period;
201 223
202 for_each_leaf_rt_rq(rt_rq, rq) { 224 for_each_leaf_rt_rq(rt_rq, rq) {
203 unsigned long rt_ratio = sched_rt_ratio(rt_rq); 225 u64 runtime = sched_rt_runtime(rt_rq);
204 u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
205 226
206 rt_rq->rt_time -= min(rt_rq->rt_time, ratio); 227 rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
207 if (rt_rq->rt_throttled) { 228 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
208 rt_rq->rt_throttled = 0; 229 rt_rq->rt_throttled = 0;
209 sched_rt_ratio_enqueue(rt_rq); 230 sched_rt_rq_enqueue(rt_rq);
210 } 231 }
211 } 232 }
212 233
@@ -239,12 +260,7 @@ static void update_curr_rt(struct rq *rq)
239 cpuacct_charge(curr, delta_exec); 260 cpuacct_charge(curr, delta_exec);
240 261
241 rt_rq->rt_time += delta_exec; 262 rt_rq->rt_time += delta_exec;
242 /* 263 if (sched_rt_runtime_exceeded(rt_rq))
243 * might make it a tad more accurate:
244 *
245 * update_sched_rt_period(rq);
246 */
247 if (sched_rt_ratio_exceeded(rt_rq))
248 resched_task(curr); 264 resched_task(curr);
249} 265}
250 266
@@ -253,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
253{ 269{
254 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 270 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
255 rt_rq->rt_nr_running++; 271 rt_rq->rt_nr_running++;
256#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED 272#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
257 if (rt_se_prio(rt_se) < rt_rq->highest_prio) 273 if (rt_se_prio(rt_se) < rt_rq->highest_prio)
258 rt_rq->highest_prio = rt_se_prio(rt_se); 274 rt_rq->highest_prio = rt_se_prio(rt_se);
259#endif 275#endif
@@ -265,6 +281,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
265 281
266 update_rt_migration(rq_of_rt_rq(rt_rq)); 282 update_rt_migration(rq_of_rt_rq(rt_rq));
267#endif 283#endif
284#ifdef CONFIG_RT_GROUP_SCHED
285 if (rt_se_boosted(rt_se))
286 rt_rq->rt_nr_boosted++;
287#endif
268} 288}
269 289
270static inline 290static inline
@@ -273,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
273 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 293 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
274 WARN_ON(!rt_rq->rt_nr_running); 294 WARN_ON(!rt_rq->rt_nr_running);
275 rt_rq->rt_nr_running--; 295 rt_rq->rt_nr_running--;
276#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED 296#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
277 if (rt_rq->rt_nr_running) { 297 if (rt_rq->rt_nr_running) {
278 struct rt_prio_array *array; 298 struct rt_prio_array *array;
279 299
@@ -295,6 +315,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
295 315
296 update_rt_migration(rq_of_rt_rq(rt_rq)); 316 update_rt_migration(rq_of_rt_rq(rt_rq));
297#endif /* CONFIG_SMP */ 317#endif /* CONFIG_SMP */
318#ifdef CONFIG_RT_GROUP_SCHED
319 if (rt_se_boosted(rt_se))
320 rt_rq->rt_nr_boosted--;
321
322 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
323#endif
298} 324}
299 325
300static void enqueue_rt_entity(struct sched_rt_entity *rt_se) 326static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
@@ -303,7 +329,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
303 struct rt_prio_array *array = &rt_rq->active; 329 struct rt_prio_array *array = &rt_rq->active;
304 struct rt_rq *group_rq = group_rt_rq(rt_se); 330 struct rt_rq *group_rq = group_rt_rq(rt_se);
305 331
306 if (group_rq && group_rq->rt_throttled) 332 if (group_rq && rt_rq_throttled(group_rq))
307 return; 333 return;
308 334
309 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); 335 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
@@ -496,7 +522,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
496 if (unlikely(!rt_rq->rt_nr_running)) 522 if (unlikely(!rt_rq->rt_nr_running))
497 return NULL; 523 return NULL;
498 524
499 if (sched_rt_ratio_exceeded(rt_rq)) 525 if (rt_rq_throttled(rt_rq))
500 return NULL; 526 return NULL;
501 527
502 do { 528 do {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d41ef6b4cf7..924c674b76e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = {
311 .mode = 0644, 311 .mode = 0644,
312 .proc_handler = &proc_dointvec, 312 .proc_handler = &proc_dointvec,
313 }, 313 },
314 {
315 .ctl_name = CTL_UNNUMBERED,
316 .procname = "sched_rt_period_ms",
317 .data = &sysctl_sched_rt_period,
318 .maxlen = sizeof(unsigned int),
319 .mode = 0644,
320 .proc_handler = &proc_dointvec,
321 },
322 {
323 .ctl_name = CTL_UNNUMBERED,
324 .procname = "sched_rt_ratio",
325 .data = &sysctl_sched_rt_ratio,
326 .maxlen = sizeof(unsigned int),
327 .mode = 0644,
328 .proc_handler = &proc_dointvec,
329 },
330#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) 314#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
331 { 315 {
332 .ctl_name = CTL_UNNUMBERED, 316 .ctl_name = CTL_UNNUMBERED,
@@ -348,6 +332,22 @@ static struct ctl_table kern_table[] = {
348#endif 332#endif
349 { 333 {
350 .ctl_name = CTL_UNNUMBERED, 334 .ctl_name = CTL_UNNUMBERED,
335 .procname = "sched_rt_period_us",
336 .data = &sysctl_sched_rt_period,
337 .maxlen = sizeof(unsigned int),
338 .mode = 0644,
339 .proc_handler = &proc_dointvec,
340 },
341 {
342 .ctl_name = CTL_UNNUMBERED,
343 .procname = "sched_rt_runtime_us",
344 .data = &sysctl_sched_rt_runtime,
345 .maxlen = sizeof(int),
346 .mode = 0644,
347 .proc_handler = &proc_dointvec,
348 },
349 {
350 .ctl_name = CTL_UNNUMBERED,
351 .procname = "sched_compat_yield", 351 .procname = "sched_compat_yield",
352 .data = &sysctl_sched_compat_yield, 352 .data = &sysctl_sched_compat_yield,
353 .maxlen = sizeof(unsigned int), 353 .maxlen = sizeof(unsigned int),
diff --git a/kernel/user.c b/kernel/user.c
index 7d7900c5a1f..7132022a040 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -57,7 +57,7 @@ struct user_struct root_user = {
57 .uid_keyring = &root_user_keyring, 57 .uid_keyring = &root_user_keyring,
58 .session_keyring = &root_session_keyring, 58 .session_keyring = &root_session_keyring,
59#endif 59#endif
60#ifdef CONFIG_FAIR_USER_SCHED 60#ifdef CONFIG_USER_SCHED
61 .tg = &init_task_group, 61 .tg = &init_task_group,
62#endif 62#endif
63}; 63};
@@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
90 return NULL; 90 return NULL;
91} 91}
92 92
93#ifdef CONFIG_FAIR_USER_SCHED 93#ifdef CONFIG_USER_SCHED
94 94
95static void sched_destroy_user(struct user_struct *up) 95static void sched_destroy_user(struct user_struct *up)
96{ 96{
@@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p)
113 sched_move_task(p); 113 sched_move_task(p);
114} 114}
115 115
116#else /* CONFIG_FAIR_USER_SCHED */ 116#else /* CONFIG_USER_SCHED */
117 117
118static void sched_destroy_user(struct user_struct *up) { } 118static void sched_destroy_user(struct user_struct *up) { }
119static int sched_create_user(struct user_struct *up) { return 0; } 119static int sched_create_user(struct user_struct *up) { return 0; }
120static void sched_switch_user(struct task_struct *p) { } 120static void sched_switch_user(struct task_struct *p) { }
121 121
122#endif /* CONFIG_FAIR_USER_SCHED */ 122#endif /* CONFIG_USER_SCHED */
123 123
124#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS) 124#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
125 125
126static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ 126static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
127static DEFINE_MUTEX(uids_mutex); 127static DEFINE_MUTEX(uids_mutex);
@@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void)
137} 137}
138 138
139/* uid directory attributes */ 139/* uid directory attributes */
140#ifdef CONFIG_FAIR_GROUP_SCHED
140static ssize_t cpu_shares_show(struct kobject *kobj, 141static ssize_t cpu_shares_show(struct kobject *kobj,
141 struct kobj_attribute *attr, 142 struct kobj_attribute *attr,
142 char *buf) 143 char *buf)
@@ -163,10 +164,45 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
163 164
164static struct kobj_attribute cpu_share_attr = 165static struct kobj_attribute cpu_share_attr =
165 __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); 166 __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
167#endif
168
169#ifdef CONFIG_RT_GROUP_SCHED
170static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
171 struct kobj_attribute *attr,
172 char *buf)
173{
174 struct user_struct *up = container_of(kobj, struct user_struct, kobj);
175
176 return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
177}
178
179static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
180 struct kobj_attribute *attr,
181 const char *buf, size_t size)
182{
183 struct user_struct *up = container_of(kobj, struct user_struct, kobj);
184 unsigned long rt_runtime;
185 int rc;
186
187 sscanf(buf, "%lu", &rt_runtime);
188
189 rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
190
191 return (rc ? rc : size);
192}
193
194static struct kobj_attribute cpu_rt_runtime_attr =
195 __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
196#endif
166 197
167/* default attributes per uid directory */ 198/* default attributes per uid directory */
168static struct attribute *uids_attributes[] = { 199static struct attribute *uids_attributes[] = {
200#ifdef CONFIG_FAIR_GROUP_SCHED
169 &cpu_share_attr.attr, 201 &cpu_share_attr.attr,
202#endif
203#ifdef CONFIG_RT_GROUP_SCHED
204 &cpu_rt_runtime_attr.attr,
205#endif
170 NULL 206 NULL
171}; 207};
172 208
@@ -269,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
269 schedule_work(&up->work); 305 schedule_work(&up->work);
270} 306}
271 307
272#else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */ 308#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
273 309
274int uids_sysfs_init(void) { return 0; } 310int uids_sysfs_init(void) { return 0; }
275static inline int uids_user_create(struct user_struct *up) { return 0; } 311static inline int uids_user_create(struct user_struct *up) { return 0; }
@@ -373,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
373 spin_lock_irq(&uidhash_lock); 409 spin_lock_irq(&uidhash_lock);
374 up = uid_hash_find(uid, hashent); 410 up = uid_hash_find(uid, hashent);
375 if (up) { 411 if (up) {
376 /* This case is not possible when CONFIG_FAIR_USER_SCHED 412 /* This case is not possible when CONFIG_USER_SCHED
377 * is defined, since we serialize alloc_uid() using 413 * is defined, since we serialize alloc_uid() using
378 * uids_mutex. Hence no need to call 414 * uids_mutex. Hence no need to call
379 * sched_destroy_user() or remove_user_sysfs_dir(). 415 * sched_destroy_user() or remove_user_sysfs_dir().