diff options
author | Peter Zijlstra <peterz@infradead.org> | 2012-02-11 00:05:00 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-02-10 10:17:19 -0500 |
commit | 678d5718d8d099421b0dd54c01b0528f4aaf5919 (patch) | |
tree | e2e31b84b5baee6e5496d6304f93c4a22e575846 /kernel/sched/fair.c | |
parent | f10447998a59b97747c16258a9c6e6a1512f27f3 (diff) |
sched/fair: Optimize cgroup pick_next_task_fair()
Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt
path") it is likely we pick a new task from the same cgroup, doing a put
and then set on all intermediate entities is a waste of time, so try to
avoid this.
Measured using:
mount nodev /cgroup -t cgroup -o cpu
cd /cgroup
mkdir a; cd a
mkdir b; cd b
mkdir c; cd c
echo $$ > tasks
perf stat --repeat 10 -- taskset 1 perf bench sched pipe
PRE : 4.542422684 seconds time elapsed ( +- 0.33% )
POST: 4.389409991 seconds time elapsed ( +- 0.32% )
Which shows a significant improvement of ~3.5%
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1328936700.2476.17.camel@laptop
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 122 |
1 files changed, 110 insertions, 12 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 846172107ba5..a81b241ff70f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -2907,17 +2907,36 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | |||
2907 | * 3) pick the "last" process, for cache locality | 2907 | * 3) pick the "last" process, for cache locality |
2908 | * 4) do not run the "skip" process, if something else is available | 2908 | * 4) do not run the "skip" process, if something else is available |
2909 | */ | 2909 | */ |
2910 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 2910 | static struct sched_entity * |
2911 | pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) | ||
2911 | { | 2912 | { |
2912 | struct sched_entity *se = __pick_first_entity(cfs_rq); | 2913 | struct sched_entity *left = __pick_first_entity(cfs_rq); |
2913 | struct sched_entity *left = se; | 2914 | struct sched_entity *se; |
2915 | |||
2916 | /* | ||
2917 | * If curr is set we have to see if its left of the leftmost entity | ||
2918 | * still in the tree, provided there was anything in the tree at all. | ||
2919 | */ | ||
2920 | if (!left || (curr && entity_before(curr, left))) | ||
2921 | left = curr; | ||
2922 | |||
2923 | se = left; /* ideally we run the leftmost entity */ | ||
2914 | 2924 | ||
2915 | /* | 2925 | /* |
2916 | * Avoid running the skip buddy, if running something else can | 2926 | * Avoid running the skip buddy, if running something else can |
2917 | * be done without getting too unfair. | 2927 | * be done without getting too unfair. |
2918 | */ | 2928 | */ |
2919 | if (cfs_rq->skip == se) { | 2929 | if (cfs_rq->skip == se) { |
2920 | struct sched_entity *second = __pick_next_entity(se); | 2930 | struct sched_entity *second; |
2931 | |||
2932 | if (se == curr) { | ||
2933 | second = __pick_first_entity(cfs_rq); | ||
2934 | } else { | ||
2935 | second = __pick_next_entity(se); | ||
2936 | if (!second || (curr && entity_before(curr, second))) | ||
2937 | second = curr; | ||
2938 | } | ||
2939 | |||
2921 | if (second && wakeup_preempt_entity(second, left) < 1) | 2940 | if (second && wakeup_preempt_entity(second, left) < 1) |
2922 | se = second; | 2941 | se = second; |
2923 | } | 2942 | } |
@@ -2939,7 +2958,7 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | |||
2939 | return se; | 2958 | return se; |
2940 | } | 2959 | } |
2941 | 2960 | ||
2942 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq); | 2961 | static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); |
2943 | 2962 | ||
2944 | static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) | 2963 | static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) |
2945 | { | 2964 | { |
@@ -3594,22 +3613,23 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) | |||
3594 | } | 3613 | } |
3595 | 3614 | ||
3596 | /* conditionally throttle active cfs_rq's from put_prev_entity() */ | 3615 | /* conditionally throttle active cfs_rq's from put_prev_entity() */ |
3597 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 3616 | static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
3598 | { | 3617 | { |
3599 | if (!cfs_bandwidth_used()) | 3618 | if (!cfs_bandwidth_used()) |
3600 | return; | 3619 | return false; |
3601 | 3620 | ||
3602 | if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) | 3621 | if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) |
3603 | return; | 3622 | return false; |
3604 | 3623 | ||
3605 | /* | 3624 | /* |
3606 | * it's possible for a throttled entity to be forced into a running | 3625 | * it's possible for a throttled entity to be forced into a running |
3607 | * state (e.g. set_curr_task), in this case we're finished. | 3626 | * state (e.g. set_curr_task), in this case we're finished. |
3608 | */ | 3627 | */ |
3609 | if (cfs_rq_throttled(cfs_rq)) | 3628 | if (cfs_rq_throttled(cfs_rq)) |
3610 | return; | 3629 | return true; |
3611 | 3630 | ||
3612 | throttle_cfs_rq(cfs_rq); | 3631 | throttle_cfs_rq(cfs_rq); |
3632 | return true; | ||
3613 | } | 3633 | } |
3614 | 3634 | ||
3615 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | 3635 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) |
@@ -3719,7 +3739,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq) | |||
3719 | } | 3739 | } |
3720 | 3740 | ||
3721 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} | 3741 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} |
3722 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 3742 | static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; } |
3723 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} | 3743 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} |
3724 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 3744 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
3725 | 3745 | ||
@@ -4658,9 +4678,86 @@ preempt: | |||
4658 | static struct task_struct * | 4678 | static struct task_struct * |
4659 | pick_next_task_fair(struct rq *rq, struct task_struct *prev) | 4679 | pick_next_task_fair(struct rq *rq, struct task_struct *prev) |
4660 | { | 4680 | { |
4661 | struct task_struct *p; | ||
4662 | struct cfs_rq *cfs_rq = &rq->cfs; | 4681 | struct cfs_rq *cfs_rq = &rq->cfs; |
4663 | struct sched_entity *se; | 4682 | struct sched_entity *se; |
4683 | struct task_struct *p; | ||
4684 | |||
4685 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
4686 | if (!cfs_rq->nr_running) | ||
4687 | return NULL; | ||
4688 | |||
4689 | if (!prev || prev->sched_class != &fair_sched_class) | ||
4690 | goto simple; | ||
4691 | |||
4692 | /* | ||
4693 | * Because of the set_next_buddy() in dequeue_task_fair() it is rather | ||
4694 | * likely that a next task is from the same cgroup as the current. | ||
4695 | * | ||
4696 | * Therefore attempt to avoid putting and setting the entire cgroup | ||
4697 | * hierarchy, only change the part that actually changes. | ||
4698 | */ | ||
4699 | |||
4700 | do { | ||
4701 | struct sched_entity *curr = cfs_rq->curr; | ||
4702 | |||
4703 | /* | ||
4704 | * Since we got here without doing put_prev_entity() we also | ||
4705 | * have to consider cfs_rq->curr. If it is still a runnable | ||
4706 | * entity, update_curr() will update its vruntime, otherwise | ||
4707 | * forget we've ever seen it. | ||
4708 | */ | ||
4709 | if (curr && curr->on_rq) | ||
4710 | update_curr(cfs_rq); | ||
4711 | else | ||
4712 | curr = NULL; | ||
4713 | |||
4714 | /* | ||
4715 | * This call to check_cfs_rq_runtime() will do the throttle and | ||
4716 | * dequeue its entity in the parent(s). Therefore the 'simple' | ||
4717 | * nr_running test will indeed be correct. | ||
4718 | */ | ||
4719 | if (unlikely(check_cfs_rq_runtime(cfs_rq))) | ||
4720 | goto simple; | ||
4721 | |||
4722 | se = pick_next_entity(cfs_rq, curr); | ||
4723 | cfs_rq = group_cfs_rq(se); | ||
4724 | } while (cfs_rq); | ||
4725 | |||
4726 | p = task_of(se); | ||
4727 | |||
4728 | /* | ||
4729 | * Since we haven't yet done put_prev_entity and if the selected task | ||
4730 | * is a different task than we started out with, try and touch the | ||
4731 | * least amount of cfs_rqs. | ||
4732 | */ | ||
4733 | if (prev != p) { | ||
4734 | struct sched_entity *pse = &prev->se; | ||
4735 | |||
4736 | while (!(cfs_rq = is_same_group(se, pse))) { | ||
4737 | int se_depth = se->depth; | ||
4738 | int pse_depth = pse->depth; | ||
4739 | |||
4740 | if (se_depth <= pse_depth) { | ||
4741 | put_prev_entity(cfs_rq_of(pse), pse); | ||
4742 | pse = parent_entity(pse); | ||
4743 | } | ||
4744 | if (se_depth >= pse_depth) { | ||
4745 | set_next_entity(cfs_rq_of(se), se); | ||
4746 | se = parent_entity(se); | ||
4747 | } | ||
4748 | } | ||
4749 | |||
4750 | put_prev_entity(cfs_rq, pse); | ||
4751 | set_next_entity(cfs_rq, se); | ||
4752 | } | ||
4753 | |||
4754 | if (hrtick_enabled(rq)) | ||
4755 | hrtick_start_fair(rq, p); | ||
4756 | |||
4757 | return p; | ||
4758 | simple: | ||
4759 | cfs_rq = &rq->cfs; | ||
4760 | #endif | ||
4664 | 4761 | ||
4665 | if (!cfs_rq->nr_running) | 4762 | if (!cfs_rq->nr_running) |
4666 | return NULL; | 4763 | return NULL; |
@@ -4669,12 +4766,13 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) | |||
4669 | prev->sched_class->put_prev_task(rq, prev); | 4766 | prev->sched_class->put_prev_task(rq, prev); |
4670 | 4767 | ||
4671 | do { | 4768 | do { |
4672 | se = pick_next_entity(cfs_rq); | 4769 | se = pick_next_entity(cfs_rq, NULL); |
4673 | set_next_entity(cfs_rq, se); | 4770 | set_next_entity(cfs_rq, se); |
4674 | cfs_rq = group_cfs_rq(se); | 4771 | cfs_rq = group_cfs_rq(se); |
4675 | } while (cfs_rq); | 4772 | } while (cfs_rq); |
4676 | 4773 | ||
4677 | p = task_of(se); | 4774 | p = task_of(se); |
4775 | |||
4678 | if (hrtick_enabled(rq)) | 4776 | if (hrtick_enabled(rq)) |
4679 | hrtick_start_fair(rq, p); | 4777 | hrtick_start_fair(rq, p); |
4680 | 4778 | ||