diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-06-28 15:18:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-06-28 15:18:30 -0400 |
commit | f014d937d61f47761f961eba903feb2ffa1793aa (patch) | |
tree | 4a6a9441b21711e34d567a8066950548935b9b3a | |
parent | cf91b415c8419513ada650a932bfb32a526d4d98 (diff) | |
parent | 0d98bb2656e9bd2dfda2d089db1fe1dbdab41504 (diff) |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: Prevent compiler from optimising the sched_avg_update() loop
sched: Fix over-scheduling bug
sched: Fix PROVE_RCU vs cpu_cgroup
-rw-r--r-- | include/linux/cgroup.h | 20 | ||||
-rw-r--r-- | kernel/sched.c | 124 |
2 files changed, 79 insertions, 65 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0c621604baa1..e3d00fdb858d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -525,13 +525,21 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( | |||
525 | return cgrp->subsys[subsys_id]; | 525 | return cgrp->subsys[subsys_id]; |
526 | } | 526 | } |
527 | 527 | ||
528 | static inline struct cgroup_subsys_state *task_subsys_state( | 528 | /* |
529 | struct task_struct *task, int subsys_id) | 529 | * function to get the cgroup_subsys_state which allows for extra |
530 | * rcu_dereference_check() conditions, such as locks used during the | ||
531 | * cgroup_subsys::attach() methods. | ||
532 | */ | ||
533 | #define task_subsys_state_check(task, subsys_id, __c) \ | ||
534 | rcu_dereference_check(task->cgroups->subsys[subsys_id], \ | ||
535 | rcu_read_lock_held() || \ | ||
536 | lockdep_is_held(&task->alloc_lock) || \ | ||
537 | cgroup_lock_is_held() || (__c)) | ||
538 | |||
539 | static inline struct cgroup_subsys_state * | ||
540 | task_subsys_state(struct task_struct *task, int subsys_id) | ||
530 | { | 541 | { |
531 | return rcu_dereference_check(task->cgroups->subsys[subsys_id], | 542 | return task_subsys_state_check(task, subsys_id, false); |
532 | rcu_read_lock_held() || | ||
533 | lockdep_is_held(&task->alloc_lock) || | ||
534 | cgroup_lock_is_held()); | ||
535 | } | 543 | } |
536 | 544 | ||
537 | static inline struct cgroup* task_cgroup(struct task_struct *task, | 545 | static inline struct cgroup* task_cgroup(struct task_struct *task, |
diff --git a/kernel/sched.c b/kernel/sched.c index a2d215d132f6..cb816e36cc8b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -306,52 +306,6 @@ static int init_task_group_load = INIT_TASK_GROUP_LOAD; | |||
306 | */ | 306 | */ |
307 | struct task_group init_task_group; | 307 | struct task_group init_task_group; |
308 | 308 | ||
309 | /* return group to which a task belongs */ | ||
310 | static inline struct task_group *task_group(struct task_struct *p) | ||
311 | { | ||
312 | struct task_group *tg; | ||
313 | |||
314 | #ifdef CONFIG_CGROUP_SCHED | ||
315 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | ||
316 | struct task_group, css); | ||
317 | #else | ||
318 | tg = &init_task_group; | ||
319 | #endif | ||
320 | return tg; | ||
321 | } | ||
322 | |||
323 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
324 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
325 | { | ||
326 | /* | ||
327 | * Strictly speaking this rcu_read_lock() is not needed since the | ||
328 | * task_group is tied to the cgroup, which in turn can never go away | ||
329 | * as long as there are tasks attached to it. | ||
330 | * | ||
331 | * However since task_group() uses task_subsys_state() which is an | ||
332 | * rcu_dereference() user, this quiets CONFIG_PROVE_RCU. | ||
333 | */ | ||
334 | rcu_read_lock(); | ||
335 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
336 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
337 | p->se.parent = task_group(p)->se[cpu]; | ||
338 | #endif | ||
339 | |||
340 | #ifdef CONFIG_RT_GROUP_SCHED | ||
341 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
342 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
343 | #endif | ||
344 | rcu_read_unlock(); | ||
345 | } | ||
346 | |||
347 | #else | ||
348 | |||
349 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
350 | static inline struct task_group *task_group(struct task_struct *p) | ||
351 | { | ||
352 | return NULL; | ||
353 | } | ||
354 | |||
355 | #endif /* CONFIG_CGROUP_SCHED */ | 309 | #endif /* CONFIG_CGROUP_SCHED */ |
356 | 310 | ||
357 | /* CFS-related fields in a runqueue */ | 311 | /* CFS-related fields in a runqueue */ |
@@ -644,6 +598,49 @@ static inline int cpu_of(struct rq *rq) | |||
644 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 598 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
645 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) | 599 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) |
646 | 600 | ||
601 | #ifdef CONFIG_CGROUP_SCHED | ||
602 | |||
603 | /* | ||
604 | * Return the group to which this tasks belongs. | ||
605 | * | ||
606 | * We use task_subsys_state_check() and extend the RCU verification | ||
607 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | ||
608 | * holds that lock for each task it moves into the cgroup. Therefore | ||
609 | * by holding that lock, we pin the task to the current cgroup. | ||
610 | */ | ||
611 | static inline struct task_group *task_group(struct task_struct *p) | ||
612 | { | ||
613 | struct cgroup_subsys_state *css; | ||
614 | |||
615 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | ||
616 | lockdep_is_held(&task_rq(p)->lock)); | ||
617 | return container_of(css, struct task_group, css); | ||
618 | } | ||
619 | |||
620 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
621 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
622 | { | ||
623 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
624 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | ||
625 | p->se.parent = task_group(p)->se[cpu]; | ||
626 | #endif | ||
627 | |||
628 | #ifdef CONFIG_RT_GROUP_SCHED | ||
629 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | ||
630 | p->rt.parent = task_group(p)->rt_se[cpu]; | ||
631 | #endif | ||
632 | } | ||
633 | |||
634 | #else /* CONFIG_CGROUP_SCHED */ | ||
635 | |||
636 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
637 | static inline struct task_group *task_group(struct task_struct *p) | ||
638 | { | ||
639 | return NULL; | ||
640 | } | ||
641 | |||
642 | #endif /* CONFIG_CGROUP_SCHED */ | ||
643 | |||
647 | inline void update_rq_clock(struct rq *rq) | 644 | inline void update_rq_clock(struct rq *rq) |
648 | { | 645 | { |
649 | if (!rq->skip_clock_update) | 646 | if (!rq->skip_clock_update) |
@@ -1257,6 +1254,12 @@ static void sched_avg_update(struct rq *rq) | |||
1257 | s64 period = sched_avg_period(); | 1254 | s64 period = sched_avg_period(); |
1258 | 1255 | ||
1259 | while ((s64)(rq->clock - rq->age_stamp) > period) { | 1256 | while ((s64)(rq->clock - rq->age_stamp) > period) { |
1257 | /* | ||
1258 | * Inline assembly required to prevent the compiler | ||
1259 | * optimising this loop into a divmod call. | ||
1260 | * See __iter_div_u64_rem() for another example of this. | ||
1261 | */ | ||
1262 | asm("" : "+rm" (rq->age_stamp)); | ||
1260 | rq->age_stamp += period; | 1263 | rq->age_stamp += period; |
1261 | rq->rt_avg /= 2; | 1264 | rq->rt_avg /= 2; |
1262 | } | 1265 | } |
@@ -1660,9 +1663,6 @@ static void update_shares(struct sched_domain *sd) | |||
1660 | 1663 | ||
1661 | static void update_h_load(long cpu) | 1664 | static void update_h_load(long cpu) |
1662 | { | 1665 | { |
1663 | if (root_task_group_empty()) | ||
1664 | return; | ||
1665 | |||
1666 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 1666 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
1667 | } | 1667 | } |
1668 | 1668 | ||
@@ -4474,16 +4474,6 @@ recheck: | |||
4474 | } | 4474 | } |
4475 | 4475 | ||
4476 | if (user) { | 4476 | if (user) { |
4477 | #ifdef CONFIG_RT_GROUP_SCHED | ||
4478 | /* | ||
4479 | * Do not allow realtime tasks into groups that have no runtime | ||
4480 | * assigned. | ||
4481 | */ | ||
4482 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
4483 | task_group(p)->rt_bandwidth.rt_runtime == 0) | ||
4484 | return -EPERM; | ||
4485 | #endif | ||
4486 | |||
4487 | retval = security_task_setscheduler(p, policy, param); | 4477 | retval = security_task_setscheduler(p, policy, param); |
4488 | if (retval) | 4478 | if (retval) |
4489 | return retval; | 4479 | return retval; |
@@ -4499,6 +4489,22 @@ recheck: | |||
4499 | * runqueue lock must be held. | 4489 | * runqueue lock must be held. |
4500 | */ | 4490 | */ |
4501 | rq = __task_rq_lock(p); | 4491 | rq = __task_rq_lock(p); |
4492 | |||
4493 | #ifdef CONFIG_RT_GROUP_SCHED | ||
4494 | if (user) { | ||
4495 | /* | ||
4496 | * Do not allow realtime tasks into groups that have no runtime | ||
4497 | * assigned. | ||
4498 | */ | ||
4499 | if (rt_bandwidth_enabled() && rt_policy(policy) && | ||
4500 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | ||
4501 | __task_rq_unlock(rq); | ||
4502 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
4503 | return -EPERM; | ||
4504 | } | ||
4505 | } | ||
4506 | #endif | ||
4507 | |||
4502 | /* recheck policy now with rq lock held */ | 4508 | /* recheck policy now with rq lock held */ |
4503 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 4509 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
4504 | policy = oldpolicy = -1; | 4510 | policy = oldpolicy = -1; |