aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c59
-rw-r--r--kernel/sched/fair.c84
3 files changed, 89 insertions, 56 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ead464a0f2e5..4778c48a7fda 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6998,7 +6998,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
6998{ 6998{
6999 char tok[21]; /* U64_MAX */ 6999 char tok[21]; /* U64_MAX */
7000 7000
7001 if (!sscanf(buf, "%s %llu", tok, periodp)) 7001 if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
7002 return -EINVAL; 7002 return -EINVAL;
7003 7003
7004 *periodp *= NSEC_PER_USEC; 7004 *periodp *= NSEC_PER_USEC;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2efe629425be..5c41ea367422 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -48,10 +48,10 @@ struct sugov_cpu {
48 48
49 bool iowait_boost_pending; 49 bool iowait_boost_pending;
50 unsigned int iowait_boost; 50 unsigned int iowait_boost;
51 unsigned int iowait_boost_max;
52 u64 last_update; 51 u64 last_update;
53 52
54 unsigned long bw_dl; 53 unsigned long bw_dl;
54 unsigned long min;
55 unsigned long max; 55 unsigned long max;
56 56
57 /* The field below is for single-CPU policies only: */ 57 /* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
303 if (delta_ns <= TICK_NSEC) 303 if (delta_ns <= TICK_NSEC)
304 return false; 304 return false;
305 305
306 sg_cpu->iowait_boost = set_iowait_boost 306 sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
307 ? sg_cpu->sg_policy->policy->min : 0;
308 sg_cpu->iowait_boost_pending = set_iowait_boost; 307 sg_cpu->iowait_boost_pending = set_iowait_boost;
309 308
310 return true; 309 return true;
@@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
344 343
345 /* Double the boost at each request */ 344 /* Double the boost at each request */
346 if (sg_cpu->iowait_boost) { 345 if (sg_cpu->iowait_boost) {
347 sg_cpu->iowait_boost <<= 1; 346 sg_cpu->iowait_boost =
348 if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) 347 min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
349 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
350 return; 348 return;
351 } 349 }
352 350
353 /* First wakeup after IO: start with minimum boost */ 351 /* First wakeup after IO: start with minimum boost */
354 sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; 352 sg_cpu->iowait_boost = sg_cpu->min;
355} 353}
356 354
357/** 355/**
@@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
373 * This mechanism is designed to boost high frequently IO waiting tasks, while 371 * This mechanism is designed to boost high frequently IO waiting tasks, while
374 * being more conservative on tasks which does sporadic IO operations. 372 * being more conservative on tasks which does sporadic IO operations.
375 */ 373 */
376static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, 374static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
377 unsigned long *util, unsigned long *max) 375 unsigned long util, unsigned long max)
378{ 376{
379 unsigned int boost_util, boost_max; 377 unsigned long boost;
380 378
381 /* No boost currently required */ 379 /* No boost currently required */
382 if (!sg_cpu->iowait_boost) 380 if (!sg_cpu->iowait_boost)
383 return; 381 return util;
384 382
385 /* Reset boost if the CPU appears to have been idle enough */ 383 /* Reset boost if the CPU appears to have been idle enough */
386 if (sugov_iowait_reset(sg_cpu, time, false)) 384 if (sugov_iowait_reset(sg_cpu, time, false))
387 return; 385 return util;
388 386
389 /* 387 if (!sg_cpu->iowait_boost_pending) {
390 * An IO waiting task has just woken up:
391 * allow to further double the boost value
392 */
393 if (sg_cpu->iowait_boost_pending) {
394 sg_cpu->iowait_boost_pending = false;
395 } else {
396 /* 388 /*
397 * Otherwise: reduce the boost value and disable it when we 389 * No boost pending; reduce the boost value.
398 * reach the minimum.
399 */ 390 */
400 sg_cpu->iowait_boost >>= 1; 391 sg_cpu->iowait_boost >>= 1;
401 if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { 392 if (sg_cpu->iowait_boost < sg_cpu->min) {
402 sg_cpu->iowait_boost = 0; 393 sg_cpu->iowait_boost = 0;
403 return; 394 return util;
404 } 395 }
405 } 396 }
406 397
398 sg_cpu->iowait_boost_pending = false;
399
407 /* 400 /*
408 * Apply the current boost value: a CPU is boosted only if its current 401 * @util is already in capacity scale; convert iowait_boost
409 * utilization is smaller then the current IO boost level. 402 * into the same scale so we can compare.
410 */ 403 */
411 boost_util = sg_cpu->iowait_boost; 404 boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
412 boost_max = sg_cpu->iowait_boost_max; 405 return max(boost, util);
413 if (*util * boost_max < *max * boost_util) {
414 *util = boost_util;
415 *max = boost_max;
416 }
417} 406}
418 407
419#ifdef CONFIG_NO_HZ_COMMON 408#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
460 449
461 util = sugov_get_util(sg_cpu); 450 util = sugov_get_util(sg_cpu);
462 max = sg_cpu->max; 451 max = sg_cpu->max;
463 sugov_iowait_apply(sg_cpu, time, &util, &max); 452 util = sugov_iowait_apply(sg_cpu, time, util, max);
464 next_f = get_next_freq(sg_policy, util, max); 453 next_f = get_next_freq(sg_policy, util, max);
465 /* 454 /*
466 * Do not reduce the frequency if the CPU has not been idle 455 * Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
500 489
501 j_util = sugov_get_util(j_sg_cpu); 490 j_util = sugov_get_util(j_sg_cpu);
502 j_max = j_sg_cpu->max; 491 j_max = j_sg_cpu->max;
503 sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); 492 j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
504 493
505 if (j_util * max > j_max * util) { 494 if (j_util * max > j_max * util) {
506 util = j_util; 495 util = j_util;
@@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy)
837 memset(sg_cpu, 0, sizeof(*sg_cpu)); 826 memset(sg_cpu, 0, sizeof(*sg_cpu));
838 sg_cpu->cpu = cpu; 827 sg_cpu->cpu = cpu;
839 sg_cpu->sg_policy = sg_policy; 828 sg_cpu->sg_policy = sg_policy;
840 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; 829 sg_cpu->min =
830 (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
831 policy->cpuinfo.max_freq;
841 } 832 }
842 833
843 for_each_cpu(cpu, policy->cpus) { 834 for_each_cpu(cpu, policy->cpus) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ea74d43924b2..fdab7eb6f351 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8060,6 +8060,18 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
8060} 8060}
8061 8061
8062/* 8062/*
8063 * Check whether a rq has a misfit task and if it looks like we can actually
8064 * help that task: we can migrate the task to a CPU of higher capacity, or
8065 * the task's current CPU is heavily pressured.
8066 */
8067static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
8068{
8069 return rq->misfit_task_load &&
8070 (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
8071 check_cpu_capacity(rq, sd));
8072}
8073
8074/*
8063 * Group imbalance indicates (and tries to solve) the problem where balancing 8075 * Group imbalance indicates (and tries to solve) the problem where balancing
8064 * groups is inadequate due to ->cpus_allowed constraints. 8076 * groups is inadequate due to ->cpus_allowed constraints.
8065 * 8077 *
@@ -9586,35 +9598,21 @@ static void nohz_balancer_kick(struct rq *rq)
9586 if (time_before(now, nohz.next_balance)) 9598 if (time_before(now, nohz.next_balance))
9587 goto out; 9599 goto out;
9588 9600
9589 if (rq->nr_running >= 2 || rq->misfit_task_load) { 9601 if (rq->nr_running >= 2) {
9590 flags = NOHZ_KICK_MASK; 9602 flags = NOHZ_KICK_MASK;
9591 goto out; 9603 goto out;
9592 } 9604 }
9593 9605
9594 rcu_read_lock(); 9606 rcu_read_lock();
9595 sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
9596 if (sds) {
9597 /*
9598 * If there is an imbalance between LLC domains (IOW we could
9599 * increase the overall cache use), we need some less-loaded LLC
9600 * domain to pull some load. Likewise, we may need to spread
9601 * load within the current LLC domain (e.g. packed SMT cores but
9602 * other CPUs are idle). We can't really know from here how busy
9603 * the others are - so just get a nohz balance going if it looks
9604 * like this LLC domain has tasks we could move.
9605 */
9606 nr_busy = atomic_read(&sds->nr_busy_cpus);
9607 if (nr_busy > 1) {
9608 flags = NOHZ_KICK_MASK;
9609 goto unlock;
9610 }
9611
9612 }
9613 9607
9614 sd = rcu_dereference(rq->sd); 9608 sd = rcu_dereference(rq->sd);
9615 if (sd) { 9609 if (sd) {
9616 if ((rq->cfs.h_nr_running >= 1) && 9610 /*
9617 check_cpu_capacity(rq, sd)) { 9611 * If there's a CFS task and the current CPU has reduced
9612 * capacity; kick the ILB to see if there's a better CPU to run
9613 * on.
9614 */
9615 if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
9618 flags = NOHZ_KICK_MASK; 9616 flags = NOHZ_KICK_MASK;
9619 goto unlock; 9617 goto unlock;
9620 } 9618 }
@@ -9622,6 +9620,11 @@ static void nohz_balancer_kick(struct rq *rq)
9622 9620
9623 sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); 9621 sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
9624 if (sd) { 9622 if (sd) {
9623 /*
9624 * When ASYM_PACKING; see if there's a more preferred CPU
9625 * currently idle; in which case, kick the ILB to move tasks
9626 * around.
9627 */
9625 for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { 9628 for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
9626 if (sched_asym_prefer(i, cpu)) { 9629 if (sched_asym_prefer(i, cpu)) {
9627 flags = NOHZ_KICK_MASK; 9630 flags = NOHZ_KICK_MASK;
@@ -9629,6 +9632,45 @@ static void nohz_balancer_kick(struct rq *rq)
9629 } 9632 }
9630 } 9633 }
9631 } 9634 }
9635
9636 sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu));
9637 if (sd) {
9638 /*
9639 * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
9640 * to run the misfit task on.
9641 */
9642 if (check_misfit_status(rq, sd)) {
9643 flags = NOHZ_KICK_MASK;
9644 goto unlock;
9645 }
9646
9647 /*
9648 * For asymmetric systems, we do not want to nicely balance
9649 * cache use, instead we want to embrace asymmetry and only
9650 * ensure tasks have enough CPU capacity.
9651 *
9652 * Skip the LLC logic because it's not relevant in that case.
9653 */
9654 goto unlock;
9655 }
9656
9657 sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
9658 if (sds) {
9659 /*
9660 * If there is an imbalance between LLC domains (IOW we could
9661 * increase the overall cache use), we need some less-loaded LLC
9662 * domain to pull some load. Likewise, we may need to spread
9663 * load within the current LLC domain (e.g. packed SMT cores but
9664 * other CPUs are idle). We can't really know from here how busy
9665 * the others are - so just get a nohz balance going if it looks
9666 * like this LLC domain has tasks we could move.
9667 */
9668 nr_busy = atomic_read(&sds->nr_busy_cpus);
9669 if (nr_busy > 1) {
9670 flags = NOHZ_KICK_MASK;
9671 goto unlock;
9672 }
9673 }
9632unlock: 9674unlock:
9633 rcu_read_unlock(); 9675 rcu_read_unlock();
9634out: 9676out: