diff options
author | Morten Rasmussen <morten.rasmussen@arm.com> | 2018-07-04 06:17:40 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-09-10 05:05:49 -0400 |
commit | 3b1baa6496e6b7ad016342a9d256bdfb072ce902 (patch) | |
tree | 9e91be8d2548ed6113f0e69636c5e22766d0b8c5 /kernel/sched | |
parent | df054e8445a4011e3d693c2268129c0456108663 (diff) |
sched/fair: Add 'group_misfit_task' load-balance type
To maximize throughput in systems with asymmetric CPU capacities (e.g.
ARM big.LITTLE) load-balancing has to consider task and CPU utilization
as well as per-CPU compute capacity when load-balancing in addition to
the current average load based load-balancing policy. Tasks with high
utilization that are scheduled on a lower capacity CPU need to be
identified and migrated to a higher capacity CPU if possible to maximize
throughput.
To implement this additional policy an additional group_type
(load-balance scenario) is added: 'group_misfit_task'. This represents
scenarios where a sched_group has one or more tasks that are not
suitable for its per-CPU capacity. 'group_misfit_task' is only considered
if the system is not overloaded or imbalanced ('group_imbalanced' or
'group_overloaded').
Identifying misfit tasks requires the rq lock to be held. To avoid
taking remote rq locks to examine source sched_groups for misfit tasks,
each CPU is responsible for tracking misfit tasks themselves and update
the rq->misfit_task flag. This means checking task utilization when
tasks are scheduled and on sched_tick.
Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: gaku.inami.xh@renesas.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Link: http://lkml.kernel.org/r/1530699470-29808-3-git-send-email-morten.rasmussen@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/fair.c | 54 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 |
2 files changed, 48 insertions, 8 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3e5071aeb117..6e04bea5b11a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -693,6 +693,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
693 | 693 | ||
694 | static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); | 694 | static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); |
695 | static unsigned long task_h_load(struct task_struct *p); | 695 | static unsigned long task_h_load(struct task_struct *p); |
696 | static unsigned long capacity_of(int cpu); | ||
696 | 697 | ||
697 | /* Give new sched_entity start runnable values to heavy its load in infant time */ | 698 | /* Give new sched_entity start runnable values to heavy its load in infant time */ |
698 | void init_entity_runnable_average(struct sched_entity *se) | 699 | void init_entity_runnable_average(struct sched_entity *se) |
@@ -1446,7 +1447,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, | |||
1446 | static unsigned long weighted_cpuload(struct rq *rq); | 1447 | static unsigned long weighted_cpuload(struct rq *rq); |
1447 | static unsigned long source_load(int cpu, int type); | 1448 | static unsigned long source_load(int cpu, int type); |
1448 | static unsigned long target_load(int cpu, int type); | 1449 | static unsigned long target_load(int cpu, int type); |
1449 | static unsigned long capacity_of(int cpu); | ||
1450 | 1450 | ||
1451 | /* Cached statistics for all CPUs within a node */ | 1451 | /* Cached statistics for all CPUs within a node */ |
1452 | struct numa_stats { | 1452 | struct numa_stats { |
@@ -3647,6 +3647,29 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) | |||
3647 | WRITE_ONCE(p->se.avg.util_est, ue); | 3647 | WRITE_ONCE(p->se.avg.util_est, ue); |
3648 | } | 3648 | } |
3649 | 3649 | ||
3650 | static inline int task_fits_capacity(struct task_struct *p, long capacity) | ||
3651 | { | ||
3652 | return capacity * 1024 > task_util_est(p) * capacity_margin; | ||
3653 | } | ||
3654 | |||
3655 | static inline void update_misfit_status(struct task_struct *p, struct rq *rq) | ||
3656 | { | ||
3657 | if (!static_branch_unlikely(&sched_asym_cpucapacity)) | ||
3658 | return; | ||
3659 | |||
3660 | if (!p) { | ||
3661 | rq->misfit_task_load = 0; | ||
3662 | return; | ||
3663 | } | ||
3664 | |||
3665 | if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) { | ||
3666 | rq->misfit_task_load = 0; | ||
3667 | return; | ||
3668 | } | ||
3669 | |||
3670 | rq->misfit_task_load = task_h_load(p); | ||
3671 | } | ||
3672 | |||
3650 | #else /* CONFIG_SMP */ | 3673 | #else /* CONFIG_SMP */ |
3651 | 3674 | ||
3652 | #define UPDATE_TG 0x0 | 3675 | #define UPDATE_TG 0x0 |
@@ -3676,6 +3699,7 @@ util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {} | |||
3676 | static inline void | 3699 | static inline void |
3677 | util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, | 3700 | util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, |
3678 | bool task_sleep) {} | 3701 | bool task_sleep) {} |
3702 | static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} | ||
3679 | 3703 | ||
3680 | #endif /* CONFIG_SMP */ | 3704 | #endif /* CONFIG_SMP */ |
3681 | 3705 | ||
@@ -6201,7 +6225,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) | |||
6201 | /* Bring task utilization in sync with prev_cpu */ | 6225 | /* Bring task utilization in sync with prev_cpu */ |
6202 | sync_entity_load_avg(&p->se); | 6226 | sync_entity_load_avg(&p->se); |
6203 | 6227 | ||
6204 | return min_cap * 1024 < task_util(p) * capacity_margin; | 6228 | return !task_fits_capacity(p, min_cap); |
6205 | } | 6229 | } |
6206 | 6230 | ||
6207 | /* | 6231 | /* |
@@ -6618,9 +6642,12 @@ done: __maybe_unused; | |||
6618 | if (hrtick_enabled(rq)) | 6642 | if (hrtick_enabled(rq)) |
6619 | hrtick_start_fair(rq, p); | 6643 | hrtick_start_fair(rq, p); |
6620 | 6644 | ||
6645 | update_misfit_status(p, rq); | ||
6646 | |||
6621 | return p; | 6647 | return p; |
6622 | 6648 | ||
6623 | idle: | 6649 | idle: |
6650 | update_misfit_status(NULL, rq); | ||
6624 | new_tasks = idle_balance(rq, rf); | 6651 | new_tasks = idle_balance(rq, rf); |
6625 | 6652 | ||
6626 | /* | 6653 | /* |
@@ -6826,6 +6853,13 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; | |||
6826 | 6853 | ||
6827 | enum fbq_type { regular, remote, all }; | 6854 | enum fbq_type { regular, remote, all }; |
6828 | 6855 | ||
6856 | enum group_type { | ||
6857 | group_other = 0, | ||
6858 | group_misfit_task, | ||
6859 | group_imbalanced, | ||
6860 | group_overloaded, | ||
6861 | }; | ||
6862 | |||
6829 | #define LBF_ALL_PINNED 0x01 | 6863 | #define LBF_ALL_PINNED 0x01 |
6830 | #define LBF_NEED_BREAK 0x02 | 6864 | #define LBF_NEED_BREAK 0x02 |
6831 | #define LBF_DST_PINNED 0x04 | 6865 | #define LBF_DST_PINNED 0x04 |
@@ -7399,12 +7433,6 @@ static unsigned long task_h_load(struct task_struct *p) | |||
7399 | 7433 | ||
7400 | /********** Helpers for find_busiest_group ************************/ | 7434 | /********** Helpers for find_busiest_group ************************/ |
7401 | 7435 | ||
7402 | enum group_type { | ||
7403 | group_other = 0, | ||
7404 | group_imbalanced, | ||
7405 | group_overloaded, | ||
7406 | }; | ||
7407 | |||
7408 | /* | 7436 | /* |
7409 | * sg_lb_stats - stats of a sched_group required for load_balancing | 7437 | * sg_lb_stats - stats of a sched_group required for load_balancing |
7410 | */ | 7438 | */ |
@@ -7420,6 +7448,7 @@ struct sg_lb_stats { | |||
7420 | unsigned int group_weight; | 7448 | unsigned int group_weight; |
7421 | enum group_type group_type; | 7449 | enum group_type group_type; |
7422 | int group_no_capacity; | 7450 | int group_no_capacity; |
7451 | unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ | ||
7423 | #ifdef CONFIG_NUMA_BALANCING | 7452 | #ifdef CONFIG_NUMA_BALANCING |
7424 | unsigned int nr_numa_running; | 7453 | unsigned int nr_numa_running; |
7425 | unsigned int nr_preferred_running; | 7454 | unsigned int nr_preferred_running; |
@@ -7712,6 +7741,9 @@ group_type group_classify(struct sched_group *group, | |||
7712 | if (sg_imbalanced(group)) | 7741 | if (sg_imbalanced(group)) |
7713 | return group_imbalanced; | 7742 | return group_imbalanced; |
7714 | 7743 | ||
7744 | if (sgs->group_misfit_task_load) | ||
7745 | return group_misfit_task; | ||
7746 | |||
7715 | return group_other; | 7747 | return group_other; |
7716 | } | 7748 | } |
7717 | 7749 | ||
@@ -7786,6 +7818,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
7786 | */ | 7818 | */ |
7787 | if (!nr_running && idle_cpu(i)) | 7819 | if (!nr_running && idle_cpu(i)) |
7788 | sgs->idle_cpus++; | 7820 | sgs->idle_cpus++; |
7821 | |||
7822 | if (env->sd->flags & SD_ASYM_CPUCAPACITY && | ||
7823 | sgs->group_misfit_task_load < rq->misfit_task_load) | ||
7824 | sgs->group_misfit_task_load = rq->misfit_task_load; | ||
7789 | } | 7825 | } |
7790 | 7826 | ||
7791 | /* Adjust by relative CPU capacity of the group */ | 7827 | /* Adjust by relative CPU capacity of the group */ |
@@ -9567,6 +9603,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
9567 | 9603 | ||
9568 | if (static_branch_unlikely(&sched_numa_balancing)) | 9604 | if (static_branch_unlikely(&sched_numa_balancing)) |
9569 | task_tick_numa(rq, curr); | 9605 | task_tick_numa(rq, curr); |
9606 | |||
9607 | update_misfit_status(curr, rq); | ||
9570 | } | 9608 | } |
9571 | 9609 | ||
9572 | /* | 9610 | /* |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0f36adc31ba5..7dbf67d147a2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -842,6 +842,8 @@ struct rq { | |||
842 | 842 | ||
843 | unsigned char idle_balance; | 843 | unsigned char idle_balance; |
844 | 844 | ||
845 | unsigned long misfit_task_load; | ||
846 | |||
845 | /* For active balancing */ | 847 | /* For active balancing */ |
846 | int active_balance; | 848 | int active_balance; |
847 | int push_cpu; | 849 | int push_cpu; |