aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2012-02-11 00:05:00 -0500
committerIngo Molnar <mingo@kernel.org>2014-02-10 10:17:19 -0500
commit678d5718d8d099421b0dd54c01b0528f4aaf5919 (patch)
treee2e31b84b5baee6e5496d6304f93c4a22e575846 /kernel/sched/fair.c
parentf10447998a59b97747c16258a9c6e6a1512f27f3 (diff)
sched/fair: Optimize cgroup pick_next_task_fair()
Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt path") it is likely we pick a new task from the same cgroup, doing a put and then set on all intermediate entities is a waste of time, so try to avoid this. Measured using: mount nodev /cgroup -t cgroup -o cpu cd /cgroup mkdir a; cd a mkdir b; cd b mkdir c; cd c echo $$ > tasks perf stat --repeat 10 -- taskset 1 perf bench sched pipe PRE : 4.542422684 seconds time elapsed ( +- 0.33% ) POST: 4.389409991 seconds time elapsed ( +- 0.32% ) Which shows a significant improvement of ~3.5% Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tejun Heo <tj@kernel.org> Link: http://lkml.kernel.org/r/1328936700.2476.17.camel@laptop Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c122
1 files changed, 110 insertions, 12 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 846172107ba5..a81b241ff70f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2907,17 +2907,36 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
2907 * 3) pick the "last" process, for cache locality 2907 * 3) pick the "last" process, for cache locality
2908 * 4) do not run the "skip" process, if something else is available 2908 * 4) do not run the "skip" process, if something else is available
2909 */ 2909 */
2910static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 2910static struct sched_entity *
2911pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
2911{ 2912{
2912 struct sched_entity *se = __pick_first_entity(cfs_rq); 2913 struct sched_entity *left = __pick_first_entity(cfs_rq);
2913 struct sched_entity *left = se; 2914 struct sched_entity *se;
2915
2916 /*
2917 * If curr is set we have to see if its left of the leftmost entity
2918 * still in the tree, provided there was anything in the tree at all.
2919 */
2920 if (!left || (curr && entity_before(curr, left)))
2921 left = curr;
2922
2923 se = left; /* ideally we run the leftmost entity */
2914 2924
2915 /* 2925 /*
2916 * Avoid running the skip buddy, if running something else can 2926 * Avoid running the skip buddy, if running something else can
2917 * be done without getting too unfair. 2927 * be done without getting too unfair.
2918 */ 2928 */
2919 if (cfs_rq->skip == se) { 2929 if (cfs_rq->skip == se) {
2920 struct sched_entity *second = __pick_next_entity(se); 2930 struct sched_entity *second;
2931
2932 if (se == curr) {
2933 second = __pick_first_entity(cfs_rq);
2934 } else {
2935 second = __pick_next_entity(se);
2936 if (!second || (curr && entity_before(curr, second)))
2937 second = curr;
2938 }
2939
2921 if (second && wakeup_preempt_entity(second, left) < 1) 2940 if (second && wakeup_preempt_entity(second, left) < 1)
2922 se = second; 2941 se = second;
2923 } 2942 }
@@ -2939,7 +2958,7 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
2939 return se; 2958 return se;
2940} 2959}
2941 2960
2942static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq); 2961static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
2943 2962
2944static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) 2963static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
2945{ 2964{
@@ -3594,22 +3613,23 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
3594} 3613}
3595 3614
3596/* conditionally throttle active cfs_rq's from put_prev_entity() */ 3615/* conditionally throttle active cfs_rq's from put_prev_entity() */
3597static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) 3616static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
3598{ 3617{
3599 if (!cfs_bandwidth_used()) 3618 if (!cfs_bandwidth_used())
3600 return; 3619 return false;
3601 3620
3602 if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) 3621 if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0))
3603 return; 3622 return false;
3604 3623
3605 /* 3624 /*
3606 * it's possible for a throttled entity to be forced into a running 3625 * it's possible for a throttled entity to be forced into a running
3607 * state (e.g. set_curr_task), in this case we're finished. 3626 * state (e.g. set_curr_task), in this case we're finished.
3608 */ 3627 */
3609 if (cfs_rq_throttled(cfs_rq)) 3628 if (cfs_rq_throttled(cfs_rq))
3610 return; 3629 return true;
3611 3630
3612 throttle_cfs_rq(cfs_rq); 3631 throttle_cfs_rq(cfs_rq);
3632 return true;
3613} 3633}
3614 3634
3615static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) 3635static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
@@ -3719,7 +3739,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
3719} 3739}
3720 3740
3721static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} 3741static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
3722static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 3742static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
3723static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} 3743static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
3724static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} 3744static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
3725 3745
@@ -4658,9 +4678,86 @@ preempt:
4658static struct task_struct * 4678static struct task_struct *
4659pick_next_task_fair(struct rq *rq, struct task_struct *prev) 4679pick_next_task_fair(struct rq *rq, struct task_struct *prev)
4660{ 4680{
4661 struct task_struct *p;
4662 struct cfs_rq *cfs_rq = &rq->cfs; 4681 struct cfs_rq *cfs_rq = &rq->cfs;
4663 struct sched_entity *se; 4682 struct sched_entity *se;
4683 struct task_struct *p;
4684
4685#ifdef CONFIG_FAIR_GROUP_SCHED
4686 if (!cfs_rq->nr_running)
4687 return NULL;
4688
4689 if (!prev || prev->sched_class != &fair_sched_class)
4690 goto simple;
4691
4692 /*
4693 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
4694 * likely that a next task is from the same cgroup as the current.
4695 *
4696 * Therefore attempt to avoid putting and setting the entire cgroup
4697 * hierarchy, only change the part that actually changes.
4698 */
4699
4700 do {
4701 struct sched_entity *curr = cfs_rq->curr;
4702
4703 /*
4704 * Since we got here without doing put_prev_entity() we also
4705 * have to consider cfs_rq->curr. If it is still a runnable
4706 * entity, update_curr() will update its vruntime, otherwise
4707 * forget we've ever seen it.
4708 */
4709 if (curr && curr->on_rq)
4710 update_curr(cfs_rq);
4711 else
4712 curr = NULL;
4713
4714 /*
4715 * This call to check_cfs_rq_runtime() will do the throttle and
4716 * dequeue its entity in the parent(s). Therefore the 'simple'
4717 * nr_running test will indeed be correct.
4718 */
4719 if (unlikely(check_cfs_rq_runtime(cfs_rq)))
4720 goto simple;
4721
4722 se = pick_next_entity(cfs_rq, curr);
4723 cfs_rq = group_cfs_rq(se);
4724 } while (cfs_rq);
4725
4726 p = task_of(se);
4727
4728 /*
4729 * Since we haven't yet done put_prev_entity and if the selected task
4730 * is a different task than we started out with, try and touch the
4731 * least amount of cfs_rqs.
4732 */
4733 if (prev != p) {
4734 struct sched_entity *pse = &prev->se;
4735
4736 while (!(cfs_rq = is_same_group(se, pse))) {
4737 int se_depth = se->depth;
4738 int pse_depth = pse->depth;
4739
4740 if (se_depth <= pse_depth) {
4741 put_prev_entity(cfs_rq_of(pse), pse);
4742 pse = parent_entity(pse);
4743 }
4744 if (se_depth >= pse_depth) {
4745 set_next_entity(cfs_rq_of(se), se);
4746 se = parent_entity(se);
4747 }
4748 }
4749
4750 put_prev_entity(cfs_rq, pse);
4751 set_next_entity(cfs_rq, se);
4752 }
4753
4754 if (hrtick_enabled(rq))
4755 hrtick_start_fair(rq, p);
4756
4757 return p;
4758simple:
4759 cfs_rq = &rq->cfs;
4760#endif
4664 4761
4665 if (!cfs_rq->nr_running) 4762 if (!cfs_rq->nr_running)
4666 return NULL; 4763 return NULL;
@@ -4669,12 +4766,13 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev)
4669 prev->sched_class->put_prev_task(rq, prev); 4766 prev->sched_class->put_prev_task(rq, prev);
4670 4767
4671 do { 4768 do {
4672 se = pick_next_entity(cfs_rq); 4769 se = pick_next_entity(cfs_rq, NULL);
4673 set_next_entity(cfs_rq, se); 4770 set_next_entity(cfs_rq, se);
4674 cfs_rq = group_cfs_rq(se); 4771 cfs_rq = group_cfs_rq(se);
4675 } while (cfs_rq); 4772 } while (cfs_rq);
4676 4773
4677 p = task_of(se); 4774 p = task_of(se);
4775
4678 if (hrtick_enabled(rq)) 4776 if (hrtick_enabled(rq))
4679 hrtick_start_fair(rq, p); 4777 hrtick_start_fair(rq, p);
4680 4778