3 files changed, 89 insertions, 56 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ead464a0f2e5..4778c48a7fda 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6998,7 +6998,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
 {
        char tok[21];   /* U64_MAX */
-        if (!sscanf(buf, "%s %llu", tok, periodp))
+        if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
                return -EINVAL;
        *periodp *= NSEC_PER_USEC;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2efe629425be..5c41ea367422 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
        bool                    iowait_boost_pending;
        unsigned int            iowait_boost;
-        unsigned int            iowait_boost_max;
        u64                     last_update;
        unsigned long           bw_dl;
+        unsigned long           min;
        unsigned long           max;
        /* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
        if (delta_ns <= TICK_NSEC)
                return false;
-        sg_cpu->iowait_boost = set_iowait_boost
+        sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
-                ? sg_cpu->sg_policy->policy->min : 0;
        sg_cpu->iowait_boost_pending = set_iowait_boost;
        return true;
@@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
        /* Double the boost at each request */
        if (sg_cpu->iowait_boost) {
-                sg_cpu->iowait_boost <<= 1;
+                sg_cpu->iowait_boost =
-                if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
+                        min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
-                        sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
                return;
        }
        /* First wakeup after IO: start with minimum boost */
-        sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+        sg_cpu->iowait_boost = sg_cpu->min;
 }
 /**
@@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
 * This mechanism is designed to boost high frequently IO waiting tasks, while
 * being more conservative on tasks which does sporadic IO operations.
 */
-static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
-                               unsigned long *util, unsigned long *max)
+                                        unsigned long util, unsigned long max)
 {
-        unsigned int boost_util, boost_max;
+        unsigned long boost;
        /* No boost currently required */
        if (!sg_cpu->iowait_boost)
-                return;
+                return util;
        /* Reset boost if the CPU appears to have been idle enough */
        if (sugov_iowait_reset(sg_cpu, time, false))
-                return;
+                return util;
-        /*
+        if (!sg_cpu->iowait_boost_pending) {
-         * An IO waiting task has just woken up:
-         * allow to further double the boost value
-         */
-        if (sg_cpu->iowait_boost_pending) {
-                sg_cpu->iowait_boost_pending = false;
-        } else {
                /*
-                 * Otherwise: reduce the boost value and disable it when we
+                 * No boost pending; reduce the boost value.
-                 * reach the minimum.
                 */
                sg_cpu->iowait_boost >>= 1;
-                if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+                if (sg_cpu->iowait_boost < sg_cpu->min) {
                        sg_cpu->iowait_boost = 0;
-                        return;
+                        return util;
                }
        }
+        sg_cpu->iowait_boost_pending = false;
        /*
-         * Apply the current boost value: a CPU is boosted only if its current
+         * @util is already in capacity scale; convert iowait_boost
-         * utilization is smaller then the current IO boost level.
+         * into the same scale so we can compare.
         */
-        boost_util = sg_cpu->iowait_boost;
+        boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
-        boost_max = sg_cpu->iowait_boost_max;
+        return max(boost, util);
-        if (*util * boost_max < *max * boost_util) {
-                *util = boost_util;
-                *max = boost_max;
-        }
 }
 #ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
        util = sugov_get_util(sg_cpu);
        max = sg_cpu->max;
-        sugov_iowait_apply(sg_cpu, time, &util, &max);
+        util = sugov_iowait_apply(sg_cpu, time, util, max);
        next_f = get_next_freq(sg_policy, util, max);
        /*
         * Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
                j_util = sugov_get_util(j_sg_cpu);
                j_max = j_sg_cpu->max;
-                sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+                j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
                if (j_util * max > j_max * util) {
                        util = j_util;
@@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy)
                memset(sg_cpu, 0, sizeof(*sg_cpu));
                sg_cpu->cpu                     = cpu;
                sg_cpu->sg_policy               = sg_policy;
-                sg_cpu->iowait_boost_max        = policy->cpuinfo.max_freq;
+                sg_cpu->min                     =
+                        (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
+                        policy->cpuinfo.max_freq;
        }
        for_each_cpu(cpu, policy->cpus) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ea74d43924b2..fdab7eb6f351 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8060,6 +8060,18 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 }
 /*
+ * Check whether a rq has a misfit task and if it looks like we can actually
+ * help that task: we can migrate the task to a CPU of higher capacity, or
+ * the task's current CPU is heavily pressured.
+ */
+static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
+{
+        return rq->misfit_task_load &&
+                (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
+                 check_cpu_capacity(rq, sd));
+}
+/*
 * Group imbalance indicates (and tries to solve) the problem where balancing
 * groups is inadequate due to ->cpus_allowed constraints.
 *
@@ -9586,35 +9598,21 @@ static void nohz_balancer_kick(struct rq *rq)
        if (time_before(now, nohz.next_balance))
                goto out;
-        if (rq->nr_running >= 2 || rq->misfit_task_load) {
+        if (rq->nr_running >= 2) {
                flags = NOHZ_KICK_MASK;
                goto out;
        }
        rcu_read_lock();
-        sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-        if (sds) {
-                /*
-                 * If there is an imbalance between LLC domains (IOW we could
-                 * increase the overall cache use), we need some less-loaded LLC
-                 * domain to pull some load. Likewise, we may need to spread
-                 * load within the current LLC domain (e.g. packed SMT cores but
-                 * other CPUs are idle). We can't really know from here how busy
-                 * the others are - so just get a nohz balance going if it looks
-                 * like this LLC domain has tasks we could move.
-                 */
-                nr_busy = atomic_read(&sds->nr_busy_cpus);
-                if (nr_busy > 1) {
-                        flags = NOHZ_KICK_MASK;
-                        goto unlock;
-                }
-        }
        sd = rcu_dereference(rq->sd);
        if (sd) {
-                if ((rq->cfs.h_nr_running >= 1) &&
+                /*
-                    check_cpu_capacity(rq, sd)) {
+                 * If there's a CFS task and the current CPU has reduced
+                 * capacity; kick the ILB to see if there's a better CPU to run
+                 * on.
+                 */
+                if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
                        flags = NOHZ_KICK_MASK;
                        goto unlock;
                }
@@ -9622,6 +9620,11 @@ static void nohz_balancer_kick(struct rq *rq)
        sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
        if (sd) {
+                /*
+                 * When ASYM_PACKING; see if there's a more preferred CPU
+                 * currently idle; in which case, kick the ILB to move tasks
+                 * around.
+                 */
                for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
                        if (sched_asym_prefer(i, cpu)) {
                                flags = NOHZ_KICK_MASK;
@@ -9629,6 +9632,45 @@ static void nohz_balancer_kick(struct rq *rq)
                        }
                }
        }
+        sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu));
+        if (sd) {
+                /*
+                 * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
+                 * to run the misfit task on.
+                 */
+                if (check_misfit_status(rq, sd)) {
+                        flags = NOHZ_KICK_MASK;
+                        goto unlock;
+                }
+                /*
+                 * For asymmetric systems, we do not want to nicely balance
+                 * cache use, instead we want to embrace asymmetry and only
+                 * ensure tasks have enough CPU capacity.
+                 *
+                 * Skip the LLC logic because it's not relevant in that case.
+                 */
+                goto unlock;
+        }
+        sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+        if (sds) {
+                /*
+                 * If there is an imbalance between LLC domains (IOW we could
+                 * increase the overall cache use), we need some less-loaded LLC
+                 * domain to pull some load. Likewise, we may need to spread
+                 * load within the current LLC domain (e.g. packed SMT cores but
+                 * other CPUs are idle). We can't really know from here how busy
+                 * the others are - so just get a nohz balance going if it looks
+                 * like this LLC domain has tasks we could move.
+                 */
+                nr_busy = atomic_read(&sds->nr_busy_cpus);
+                if (nr_busy > 1) {
+                        flags = NOHZ_KICK_MASK;
+                        goto unlock;
+                }
+        }
 unlock:
        rcu_read_unlock();
 out: