summaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorMorten Rasmussen <morten.rasmussen@arm.com>2018-07-04 06:17:40 -0400
committerIngo Molnar <mingo@kernel.org>2018-09-10 05:05:49 -0400
commit3b1baa6496e6b7ad016342a9d256bdfb072ce902 (patch)
tree9e91be8d2548ed6113f0e69636c5e22766d0b8c5 /kernel/sched
parentdf054e8445a4011e3d693c2268129c0456108663 (diff)
sched/fair: Add 'group_misfit_task' load-balance type
To maximize throughput in systems with asymmetric CPU capacities (e.g. ARM big.LITTLE) load-balancing has to consider task and CPU utilization as well as per-CPU compute capacity when load-balancing in addition to the current average load based load-balancing policy. Tasks with high utilization that are scheduled on a lower capacity CPU need to be identified and migrated to a higher capacity CPU if possible to maximize throughput. To implement this additional policy an additional group_type (load-balance scenario) is added: 'group_misfit_task'. This represents scenarios where a sched_group has one or more tasks that are not suitable for its per-CPU capacity. 'group_misfit_task' is only considered if the system is not overloaded or imbalanced ('group_imbalanced' or 'group_overloaded'). Identifying misfit tasks requires the rq lock to be held. To avoid taking remote rq locks to examine source sched_groups for misfit tasks, each CPU is responsible for tracking misfit tasks themselves and update the rq->misfit_task flag. This means checking task utilization when tasks are scheduled and on sched_tick. Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: dietmar.eggemann@arm.com Cc: gaku.inami.xh@renesas.com Cc: valentin.schneider@arm.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1530699470-29808-3-git-send-email-morten.rasmussen@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c54
-rw-r--r--kernel/sched/sched.h2
2 files changed, 48 insertions, 8 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3e5071aeb117..6e04bea5b11a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -693,6 +693,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
693 693
694static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); 694static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
695static unsigned long task_h_load(struct task_struct *p); 695static unsigned long task_h_load(struct task_struct *p);
696static unsigned long capacity_of(int cpu);
696 697
697/* Give new sched_entity start runnable values to heavy its load in infant time */ 698/* Give new sched_entity start runnable values to heavy its load in infant time */
698void init_entity_runnable_average(struct sched_entity *se) 699void init_entity_runnable_average(struct sched_entity *se)
@@ -1446,7 +1447,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
1446static unsigned long weighted_cpuload(struct rq *rq); 1447static unsigned long weighted_cpuload(struct rq *rq);
1447static unsigned long source_load(int cpu, int type); 1448static unsigned long source_load(int cpu, int type);
1448static unsigned long target_load(int cpu, int type); 1449static unsigned long target_load(int cpu, int type);
1449static unsigned long capacity_of(int cpu);
1450 1450
1451/* Cached statistics for all CPUs within a node */ 1451/* Cached statistics for all CPUs within a node */
1452struct numa_stats { 1452struct numa_stats {
@@ -3647,6 +3647,29 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
3647 WRITE_ONCE(p->se.avg.util_est, ue); 3647 WRITE_ONCE(p->se.avg.util_est, ue);
3648} 3648}
3649 3649
3650static inline int task_fits_capacity(struct task_struct *p, long capacity)
3651{
3652 return capacity * 1024 > task_util_est(p) * capacity_margin;
3653}
3654
3655static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
3656{
3657 if (!static_branch_unlikely(&sched_asym_cpucapacity))
3658 return;
3659
3660 if (!p) {
3661 rq->misfit_task_load = 0;
3662 return;
3663 }
3664
3665 if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
3666 rq->misfit_task_load = 0;
3667 return;
3668 }
3669
3670 rq->misfit_task_load = task_h_load(p);
3671}
3672
3650#else /* CONFIG_SMP */ 3673#else /* CONFIG_SMP */
3651 3674
3652#define UPDATE_TG 0x0 3675#define UPDATE_TG 0x0
@@ -3676,6 +3699,7 @@ util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
3676static inline void 3699static inline void
3677util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, 3700util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p,
3678 bool task_sleep) {} 3701 bool task_sleep) {}
3702static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
3679 3703
3680#endif /* CONFIG_SMP */ 3704#endif /* CONFIG_SMP */
3681 3705
@@ -6201,7 +6225,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
6201 /* Bring task utilization in sync with prev_cpu */ 6225 /* Bring task utilization in sync with prev_cpu */
6202 sync_entity_load_avg(&p->se); 6226 sync_entity_load_avg(&p->se);
6203 6227
6204 return min_cap * 1024 < task_util(p) * capacity_margin; 6228 return !task_fits_capacity(p, min_cap);
6205} 6229}
6206 6230
6207/* 6231/*
@@ -6618,9 +6642,12 @@ done: __maybe_unused;
6618 if (hrtick_enabled(rq)) 6642 if (hrtick_enabled(rq))
6619 hrtick_start_fair(rq, p); 6643 hrtick_start_fair(rq, p);
6620 6644
6645 update_misfit_status(p, rq);
6646
6621 return p; 6647 return p;
6622 6648
6623idle: 6649idle:
6650 update_misfit_status(NULL, rq);
6624 new_tasks = idle_balance(rq, rf); 6651 new_tasks = idle_balance(rq, rf);
6625 6652
6626 /* 6653 /*
@@ -6826,6 +6853,13 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10;
6826 6853
6827enum fbq_type { regular, remote, all }; 6854enum fbq_type { regular, remote, all };
6828 6855
6856enum group_type {
6857 group_other = 0,
6858 group_misfit_task,
6859 group_imbalanced,
6860 group_overloaded,
6861};
6862
6829#define LBF_ALL_PINNED 0x01 6863#define LBF_ALL_PINNED 0x01
6830#define LBF_NEED_BREAK 0x02 6864#define LBF_NEED_BREAK 0x02
6831#define LBF_DST_PINNED 0x04 6865#define LBF_DST_PINNED 0x04
@@ -7399,12 +7433,6 @@ static unsigned long task_h_load(struct task_struct *p)
7399 7433
7400/********** Helpers for find_busiest_group ************************/ 7434/********** Helpers for find_busiest_group ************************/
7401 7435
7402enum group_type {
7403 group_other = 0,
7404 group_imbalanced,
7405 group_overloaded,
7406};
7407
7408/* 7436/*
7409 * sg_lb_stats - stats of a sched_group required for load_balancing 7437 * sg_lb_stats - stats of a sched_group required for load_balancing
7410 */ 7438 */
@@ -7420,6 +7448,7 @@ struct sg_lb_stats {
7420 unsigned int group_weight; 7448 unsigned int group_weight;
7421 enum group_type group_type; 7449 enum group_type group_type;
7422 int group_no_capacity; 7450 int group_no_capacity;
7451 unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
7423#ifdef CONFIG_NUMA_BALANCING 7452#ifdef CONFIG_NUMA_BALANCING
7424 unsigned int nr_numa_running; 7453 unsigned int nr_numa_running;
7425 unsigned int nr_preferred_running; 7454 unsigned int nr_preferred_running;
@@ -7712,6 +7741,9 @@ group_type group_classify(struct sched_group *group,
7712 if (sg_imbalanced(group)) 7741 if (sg_imbalanced(group))
7713 return group_imbalanced; 7742 return group_imbalanced;
7714 7743
7744 if (sgs->group_misfit_task_load)
7745 return group_misfit_task;
7746
7715 return group_other; 7747 return group_other;
7716} 7748}
7717 7749
@@ -7786,6 +7818,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
7786 */ 7818 */
7787 if (!nr_running && idle_cpu(i)) 7819 if (!nr_running && idle_cpu(i))
7788 sgs->idle_cpus++; 7820 sgs->idle_cpus++;
7821
7822 if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
7823 sgs->group_misfit_task_load < rq->misfit_task_load)
7824 sgs->group_misfit_task_load = rq->misfit_task_load;
7789 } 7825 }
7790 7826
7791 /* Adjust by relative CPU capacity of the group */ 7827 /* Adjust by relative CPU capacity of the group */
@@ -9567,6 +9603,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
9567 9603
9568 if (static_branch_unlikely(&sched_numa_balancing)) 9604 if (static_branch_unlikely(&sched_numa_balancing))
9569 task_tick_numa(rq, curr); 9605 task_tick_numa(rq, curr);
9606
9607 update_misfit_status(curr, rq);
9570} 9608}
9571 9609
9572/* 9610/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0f36adc31ba5..7dbf67d147a2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -842,6 +842,8 @@ struct rq {
842 842
843 unsigned char idle_balance; 843 unsigned char idle_balance;
844 844
845 unsigned long misfit_task_load;
846
845 /* For active balancing */ 847 /* For active balancing */
846 int active_balance; 848 int active_balance;
847 int push_cpu; 849 int push_cpu;