aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-06-28 15:18:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-06-28 15:18:30 -0400
commitf014d937d61f47761f961eba903feb2ffa1793aa (patch)
tree4a6a9441b21711e34d567a8066950548935b9b3a
parentcf91b415c8419513ada650a932bfb32a526d4d98 (diff)
parent0d98bb2656e9bd2dfda2d089db1fe1dbdab41504 (diff)
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: Prevent compiler from optimising the sched_avg_update() loop sched: Fix over-scheduling bug sched: Fix PROVE_RCU vs cpu_cgroup
-rw-r--r--include/linux/cgroup.h20
-rw-r--r--kernel/sched.c124
2 files changed, 79 insertions, 65 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0c621604baa1..e3d00fdb858d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -525,13 +525,21 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
525 return cgrp->subsys[subsys_id]; 525 return cgrp->subsys[subsys_id];
526} 526}
527 527
528static inline struct cgroup_subsys_state *task_subsys_state( 528/*
529 struct task_struct *task, int subsys_id) 529 * function to get the cgroup_subsys_state which allows for extra
530 * rcu_dereference_check() conditions, such as locks used during the
531 * cgroup_subsys::attach() methods.
532 */
533#define task_subsys_state_check(task, subsys_id, __c) \
534 rcu_dereference_check(task->cgroups->subsys[subsys_id], \
535 rcu_read_lock_held() || \
536 lockdep_is_held(&task->alloc_lock) || \
537 cgroup_lock_is_held() || (__c))
538
539static inline struct cgroup_subsys_state *
540task_subsys_state(struct task_struct *task, int subsys_id)
530{ 541{
531 return rcu_dereference_check(task->cgroups->subsys[subsys_id], 542 return task_subsys_state_check(task, subsys_id, false);
532 rcu_read_lock_held() ||
533 lockdep_is_held(&task->alloc_lock) ||
534 cgroup_lock_is_held());
535} 543}
536 544
537static inline struct cgroup* task_cgroup(struct task_struct *task, 545static inline struct cgroup* task_cgroup(struct task_struct *task,
diff --git a/kernel/sched.c b/kernel/sched.c
index a2d215d132f6..cb816e36cc8b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
306 */ 306 */
307struct task_group init_task_group; 307struct task_group init_task_group;
308 308
309/* return group to which a task belongs */
310static inline struct task_group *task_group(struct task_struct *p)
311{
312 struct task_group *tg;
313
314#ifdef CONFIG_CGROUP_SCHED
315 tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
316 struct task_group, css);
317#else
318 tg = &init_task_group;
319#endif
320 return tg;
321}
322
323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
324static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
325{
326 /*
327 * Strictly speaking this rcu_read_lock() is not needed since the
328 * task_group is tied to the cgroup, which in turn can never go away
329 * as long as there are tasks attached to it.
330 *
331 * However since task_group() uses task_subsys_state() which is an
332 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
333 */
334 rcu_read_lock();
335#ifdef CONFIG_FAIR_GROUP_SCHED
336 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
337 p->se.parent = task_group(p)->se[cpu];
338#endif
339
340#ifdef CONFIG_RT_GROUP_SCHED
341 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
342 p->rt.parent = task_group(p)->rt_se[cpu];
343#endif
344 rcu_read_unlock();
345}
346
347#else
348
349static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
350static inline struct task_group *task_group(struct task_struct *p)
351{
352 return NULL;
353}
354
355#endif /* CONFIG_CGROUP_SCHED */ 309#endif /* CONFIG_CGROUP_SCHED */
356 310
357/* CFS-related fields in a runqueue */ 311/* CFS-related fields in a runqueue */
@@ -644,6 +598,49 @@ static inline int cpu_of(struct rq *rq)
644#define cpu_curr(cpu) (cpu_rq(cpu)->curr) 598#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
645#define raw_rq() (&__raw_get_cpu_var(runqueues)) 599#define raw_rq() (&__raw_get_cpu_var(runqueues))
646 600
601#ifdef CONFIG_CGROUP_SCHED
602
603/*
604 * Return the group to which this tasks belongs.
605 *
606 * We use task_subsys_state_check() and extend the RCU verification
607 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
608 * holds that lock for each task it moves into the cgroup. Therefore
609 * by holding that lock, we pin the task to the current cgroup.
610 */
611static inline struct task_group *task_group(struct task_struct *p)
612{
613 struct cgroup_subsys_state *css;
614
615 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
616 lockdep_is_held(&task_rq(p)->lock));
617 return container_of(css, struct task_group, css);
618}
619
620/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
621static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
622{
623#ifdef CONFIG_FAIR_GROUP_SCHED
624 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
625 p->se.parent = task_group(p)->se[cpu];
626#endif
627
628#ifdef CONFIG_RT_GROUP_SCHED
629 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
630 p->rt.parent = task_group(p)->rt_se[cpu];
631#endif
632}
633
634#else /* CONFIG_CGROUP_SCHED */
635
636static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
637static inline struct task_group *task_group(struct task_struct *p)
638{
639 return NULL;
640}
641
642#endif /* CONFIG_CGROUP_SCHED */
643
647inline void update_rq_clock(struct rq *rq) 644inline void update_rq_clock(struct rq *rq)
648{ 645{
649 if (!rq->skip_clock_update) 646 if (!rq->skip_clock_update)
@@ -1257,6 +1254,12 @@ static void sched_avg_update(struct rq *rq)
1257 s64 period = sched_avg_period(); 1254 s64 period = sched_avg_period();
1258 1255
1259 while ((s64)(rq->clock - rq->age_stamp) > period) { 1256 while ((s64)(rq->clock - rq->age_stamp) > period) {
1257 /*
1258 * Inline assembly required to prevent the compiler
1259 * optimising this loop into a divmod call.
1260 * See __iter_div_u64_rem() for another example of this.
1261 */
1262 asm("" : "+rm" (rq->age_stamp));
1260 rq->age_stamp += period; 1263 rq->age_stamp += period;
1261 rq->rt_avg /= 2; 1264 rq->rt_avg /= 2;
1262 } 1265 }
@@ -1660,9 +1663,6 @@ static void update_shares(struct sched_domain *sd)
1660 1663
1661static void update_h_load(long cpu) 1664static void update_h_load(long cpu)
1662{ 1665{
1663 if (root_task_group_empty())
1664 return;
1665
1666 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 1666 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1667} 1667}
1668 1668
@@ -4474,16 +4474,6 @@ recheck:
4474 } 4474 }
4475 4475
4476 if (user) { 4476 if (user) {
4477#ifdef CONFIG_RT_GROUP_SCHED
4478 /*
4479 * Do not allow realtime tasks into groups that have no runtime
4480 * assigned.
4481 */
4482 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4483 task_group(p)->rt_bandwidth.rt_runtime == 0)
4484 return -EPERM;
4485#endif
4486
4487 retval = security_task_setscheduler(p, policy, param); 4477 retval = security_task_setscheduler(p, policy, param);
4488 if (retval) 4478 if (retval)
4489 return retval; 4479 return retval;
@@ -4499,6 +4489,22 @@ recheck:
4499 * runqueue lock must be held. 4489 * runqueue lock must be held.
4500 */ 4490 */
4501 rq = __task_rq_lock(p); 4491 rq = __task_rq_lock(p);
4492
4493#ifdef CONFIG_RT_GROUP_SCHED
4494 if (user) {
4495 /*
4496 * Do not allow realtime tasks into groups that have no runtime
4497 * assigned.
4498 */
4499 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4500 task_group(p)->rt_bandwidth.rt_runtime == 0) {
4501 __task_rq_unlock(rq);
4502 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4503 return -EPERM;
4504 }
4505 }
4506#endif
4507
4502 /* recheck policy now with rq lock held */ 4508 /* recheck policy now with rq lock held */
4503 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 4509 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4504 policy = oldpolicy = -1; 4510 policy = oldpolicy = -1;