diff options
| -rw-r--r-- | kernel/sched/core.c | 2 | ||||
| -rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 59 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 84 |
3 files changed, 89 insertions, 56 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ead464a0f2e5..4778c48a7fda 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -6998,7 +6998,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, | |||
| 6998 | { | 6998 | { |
| 6999 | char tok[21]; /* U64_MAX */ | 6999 | char tok[21]; /* U64_MAX */ |
| 7000 | 7000 | ||
| 7001 | if (!sscanf(buf, "%s %llu", tok, periodp)) | 7001 | if (sscanf(buf, "%20s %llu", tok, periodp) < 1) |
| 7002 | return -EINVAL; | 7002 | return -EINVAL; |
| 7003 | 7003 | ||
| 7004 | *periodp *= NSEC_PER_USEC; | 7004 | *periodp *= NSEC_PER_USEC; |
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2efe629425be..5c41ea367422 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c | |||
| @@ -48,10 +48,10 @@ struct sugov_cpu { | |||
| 48 | 48 | ||
| 49 | bool iowait_boost_pending; | 49 | bool iowait_boost_pending; |
| 50 | unsigned int iowait_boost; | 50 | unsigned int iowait_boost; |
| 51 | unsigned int iowait_boost_max; | ||
| 52 | u64 last_update; | 51 | u64 last_update; |
| 53 | 52 | ||
| 54 | unsigned long bw_dl; | 53 | unsigned long bw_dl; |
| 54 | unsigned long min; | ||
| 55 | unsigned long max; | 55 | unsigned long max; |
| 56 | 56 | ||
| 57 | /* The field below is for single-CPU policies only: */ | 57 | /* The field below is for single-CPU policies only: */ |
| @@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, | |||
| 303 | if (delta_ns <= TICK_NSEC) | 303 | if (delta_ns <= TICK_NSEC) |
| 304 | return false; | 304 | return false; |
| 305 | 305 | ||
| 306 | sg_cpu->iowait_boost = set_iowait_boost | 306 | sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0; |
| 307 | ? sg_cpu->sg_policy->policy->min : 0; | ||
| 308 | sg_cpu->iowait_boost_pending = set_iowait_boost; | 307 | sg_cpu->iowait_boost_pending = set_iowait_boost; |
| 309 | 308 | ||
| 310 | return true; | 309 | return true; |
| @@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, | |||
| 344 | 343 | ||
| 345 | /* Double the boost at each request */ | 344 | /* Double the boost at each request */ |
| 346 | if (sg_cpu->iowait_boost) { | 345 | if (sg_cpu->iowait_boost) { |
| 347 | sg_cpu->iowait_boost <<= 1; | 346 | sg_cpu->iowait_boost = |
| 348 | if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) | 347 | min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); |
| 349 | sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; | ||
| 350 | return; | 348 | return; |
| 351 | } | 349 | } |
| 352 | 350 | ||
| 353 | /* First wakeup after IO: start with minimum boost */ | 351 | /* First wakeup after IO: start with minimum boost */ |
| 354 | sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; | 352 | sg_cpu->iowait_boost = sg_cpu->min; |
| 355 | } | 353 | } |
| 356 | 354 | ||
| 357 | /** | 355 | /** |
| @@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, | |||
| 373 | * This mechanism is designed to boost high frequently IO waiting tasks, while | 371 | * This mechanism is designed to boost high frequently IO waiting tasks, while |
| 374 | * being more conservative on tasks which does sporadic IO operations. | 372 | * being more conservative on tasks which does sporadic IO operations. |
| 375 | */ | 373 | */ |
| 376 | static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, | 374 | static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, |
| 377 | unsigned long *util, unsigned long *max) | 375 | unsigned long util, unsigned long max) |
| 378 | { | 376 | { |
| 379 | unsigned int boost_util, boost_max; | 377 | unsigned long boost; |
| 380 | 378 | ||
| 381 | /* No boost currently required */ | 379 | /* No boost currently required */ |
| 382 | if (!sg_cpu->iowait_boost) | 380 | if (!sg_cpu->iowait_boost) |
| 383 | return; | 381 | return util; |
| 384 | 382 | ||
| 385 | /* Reset boost if the CPU appears to have been idle enough */ | 383 | /* Reset boost if the CPU appears to have been idle enough */ |
| 386 | if (sugov_iowait_reset(sg_cpu, time, false)) | 384 | if (sugov_iowait_reset(sg_cpu, time, false)) |
| 387 | return; | 385 | return util; |
| 388 | 386 | ||
| 389 | /* | 387 | if (!sg_cpu->iowait_boost_pending) { |
| 390 | * An IO waiting task has just woken up: | ||
| 391 | * allow to further double the boost value | ||
| 392 | */ | ||
| 393 | if (sg_cpu->iowait_boost_pending) { | ||
| 394 | sg_cpu->iowait_boost_pending = false; | ||
| 395 | } else { | ||
| 396 | /* | 388 | /* |
| 397 | * Otherwise: reduce the boost value and disable it when we | 389 | * No boost pending; reduce the boost value. |
| 398 | * reach the minimum. | ||
| 399 | */ | 390 | */ |
| 400 | sg_cpu->iowait_boost >>= 1; | 391 | sg_cpu->iowait_boost >>= 1; |
| 401 | if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { | 392 | if (sg_cpu->iowait_boost < sg_cpu->min) { |
| 402 | sg_cpu->iowait_boost = 0; | 393 | sg_cpu->iowait_boost = 0; |
| 403 | return; | 394 | return util; |
| 404 | } | 395 | } |
| 405 | } | 396 | } |
| 406 | 397 | ||
| 398 | sg_cpu->iowait_boost_pending = false; | ||
| 399 | |||
| 407 | /* | 400 | /* |
| 408 | * Apply the current boost value: a CPU is boosted only if its current | 401 | * @util is already in capacity scale; convert iowait_boost |
| 409 | * utilization is smaller then the current IO boost level. | 402 | * into the same scale so we can compare. |
| 410 | */ | 403 | */ |
| 411 | boost_util = sg_cpu->iowait_boost; | 404 | boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT; |
| 412 | boost_max = sg_cpu->iowait_boost_max; | 405 | return max(boost, util); |
| 413 | if (*util * boost_max < *max * boost_util) { | ||
| 414 | *util = boost_util; | ||
| 415 | *max = boost_max; | ||
| 416 | } | ||
| 417 | } | 406 | } |
| 418 | 407 | ||
| 419 | #ifdef CONFIG_NO_HZ_COMMON | 408 | #ifdef CONFIG_NO_HZ_COMMON |
| @@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, | |||
| 460 | 449 | ||
| 461 | util = sugov_get_util(sg_cpu); | 450 | util = sugov_get_util(sg_cpu); |
| 462 | max = sg_cpu->max; | 451 | max = sg_cpu->max; |
| 463 | sugov_iowait_apply(sg_cpu, time, &util, &max); | 452 | util = sugov_iowait_apply(sg_cpu, time, util, max); |
| 464 | next_f = get_next_freq(sg_policy, util, max); | 453 | next_f = get_next_freq(sg_policy, util, max); |
| 465 | /* | 454 | /* |
| 466 | * Do not reduce the frequency if the CPU has not been idle | 455 | * Do not reduce the frequency if the CPU has not been idle |
| @@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) | |||
| 500 | 489 | ||
| 501 | j_util = sugov_get_util(j_sg_cpu); | 490 | j_util = sugov_get_util(j_sg_cpu); |
| 502 | j_max = j_sg_cpu->max; | 491 | j_max = j_sg_cpu->max; |
| 503 | sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); | 492 | j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); |
| 504 | 493 | ||
| 505 | if (j_util * max > j_max * util) { | 494 | if (j_util * max > j_max * util) { |
| 506 | util = j_util; | 495 | util = j_util; |
| @@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy) | |||
| 837 | memset(sg_cpu, 0, sizeof(*sg_cpu)); | 826 | memset(sg_cpu, 0, sizeof(*sg_cpu)); |
| 838 | sg_cpu->cpu = cpu; | 827 | sg_cpu->cpu = cpu; |
| 839 | sg_cpu->sg_policy = sg_policy; | 828 | sg_cpu->sg_policy = sg_policy; |
| 840 | sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; | 829 | sg_cpu->min = |
| 830 | (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / | ||
| 831 | policy->cpuinfo.max_freq; | ||
| 841 | } | 832 | } |
| 842 | 833 | ||
| 843 | for_each_cpu(cpu, policy->cpus) { | 834 | for_each_cpu(cpu, policy->cpus) { |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ea74d43924b2..fdab7eb6f351 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -8060,6 +8060,18 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) | |||
| 8060 | } | 8060 | } |
| 8061 | 8061 | ||
| 8062 | /* | 8062 | /* |
| 8063 | * Check whether a rq has a misfit task and if it looks like we can actually | ||
| 8064 | * help that task: we can migrate the task to a CPU of higher capacity, or | ||
| 8065 | * the task's current CPU is heavily pressured. | ||
| 8066 | */ | ||
| 8067 | static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) | ||
| 8068 | { | ||
| 8069 | return rq->misfit_task_load && | ||
| 8070 | (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity || | ||
| 8071 | check_cpu_capacity(rq, sd)); | ||
| 8072 | } | ||
| 8073 | |||
| 8074 | /* | ||
| 8063 | * Group imbalance indicates (and tries to solve) the problem where balancing | 8075 | * Group imbalance indicates (and tries to solve) the problem where balancing |
| 8064 | * groups is inadequate due to ->cpus_allowed constraints. | 8076 | * groups is inadequate due to ->cpus_allowed constraints. |
| 8065 | * | 8077 | * |
| @@ -9586,35 +9598,21 @@ static void nohz_balancer_kick(struct rq *rq) | |||
| 9586 | if (time_before(now, nohz.next_balance)) | 9598 | if (time_before(now, nohz.next_balance)) |
| 9587 | goto out; | 9599 | goto out; |
| 9588 | 9600 | ||
| 9589 | if (rq->nr_running >= 2 || rq->misfit_task_load) { | 9601 | if (rq->nr_running >= 2) { |
| 9590 | flags = NOHZ_KICK_MASK; | 9602 | flags = NOHZ_KICK_MASK; |
| 9591 | goto out; | 9603 | goto out; |
| 9592 | } | 9604 | } |
| 9593 | 9605 | ||
| 9594 | rcu_read_lock(); | 9606 | rcu_read_lock(); |
| 9595 | sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); | ||
| 9596 | if (sds) { | ||
| 9597 | /* | ||
| 9598 | * If there is an imbalance between LLC domains (IOW we could | ||
| 9599 | * increase the overall cache use), we need some less-loaded LLC | ||
| 9600 | * domain to pull some load. Likewise, we may need to spread | ||
| 9601 | * load within the current LLC domain (e.g. packed SMT cores but | ||
| 9602 | * other CPUs are idle). We can't really know from here how busy | ||
| 9603 | * the others are - so just get a nohz balance going if it looks | ||
| 9604 | * like this LLC domain has tasks we could move. | ||
| 9605 | */ | ||
| 9606 | nr_busy = atomic_read(&sds->nr_busy_cpus); | ||
| 9607 | if (nr_busy > 1) { | ||
| 9608 | flags = NOHZ_KICK_MASK; | ||
| 9609 | goto unlock; | ||
| 9610 | } | ||
| 9611 | |||
| 9612 | } | ||
| 9613 | 9607 | ||
| 9614 | sd = rcu_dereference(rq->sd); | 9608 | sd = rcu_dereference(rq->sd); |
| 9615 | if (sd) { | 9609 | if (sd) { |
| 9616 | if ((rq->cfs.h_nr_running >= 1) && | 9610 | /* |
| 9617 | check_cpu_capacity(rq, sd)) { | 9611 | * If there's a CFS task and the current CPU has reduced |
| 9612 | * capacity; kick the ILB to see if there's a better CPU to run | ||
| 9613 | * on. | ||
| 9614 | */ | ||
| 9615 | if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) { | ||
| 9618 | flags = NOHZ_KICK_MASK; | 9616 | flags = NOHZ_KICK_MASK; |
| 9619 | goto unlock; | 9617 | goto unlock; |
| 9620 | } | 9618 | } |
| @@ -9622,6 +9620,11 @@ static void nohz_balancer_kick(struct rq *rq) | |||
| 9622 | 9620 | ||
| 9623 | sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); | 9621 | sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); |
| 9624 | if (sd) { | 9622 | if (sd) { |
| 9623 | /* | ||
| 9624 | * When ASYM_PACKING; see if there's a more preferred CPU | ||
| 9625 | * currently idle; in which case, kick the ILB to move tasks | ||
| 9626 | * around. | ||
| 9627 | */ | ||
| 9625 | for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { | 9628 | for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { |
| 9626 | if (sched_asym_prefer(i, cpu)) { | 9629 | if (sched_asym_prefer(i, cpu)) { |
| 9627 | flags = NOHZ_KICK_MASK; | 9630 | flags = NOHZ_KICK_MASK; |
| @@ -9629,6 +9632,45 @@ static void nohz_balancer_kick(struct rq *rq) | |||
| 9629 | } | 9632 | } |
| 9630 | } | 9633 | } |
| 9631 | } | 9634 | } |
| 9635 | |||
| 9636 | sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu)); | ||
| 9637 | if (sd) { | ||
| 9638 | /* | ||
| 9639 | * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU | ||
| 9640 | * to run the misfit task on. | ||
| 9641 | */ | ||
| 9642 | if (check_misfit_status(rq, sd)) { | ||
| 9643 | flags = NOHZ_KICK_MASK; | ||
| 9644 | goto unlock; | ||
| 9645 | } | ||
| 9646 | |||
| 9647 | /* | ||
| 9648 | * For asymmetric systems, we do not want to nicely balance | ||
| 9649 | * cache use, instead we want to embrace asymmetry and only | ||
| 9650 | * ensure tasks have enough CPU capacity. | ||
| 9651 | * | ||
| 9652 | * Skip the LLC logic because it's not relevant in that case. | ||
| 9653 | */ | ||
| 9654 | goto unlock; | ||
| 9655 | } | ||
| 9656 | |||
| 9657 | sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); | ||
| 9658 | if (sds) { | ||
| 9659 | /* | ||
| 9660 | * If there is an imbalance between LLC domains (IOW we could | ||
| 9661 | * increase the overall cache use), we need some less-loaded LLC | ||
| 9662 | * domain to pull some load. Likewise, we may need to spread | ||
| 9663 | * load within the current LLC domain (e.g. packed SMT cores but | ||
| 9664 | * other CPUs are idle). We can't really know from here how busy | ||
| 9665 | * the others are - so just get a nohz balance going if it looks | ||
| 9666 | * like this LLC domain has tasks we could move. | ||
| 9667 | */ | ||
| 9668 | nr_busy = atomic_read(&sds->nr_busy_cpus); | ||
| 9669 | if (nr_busy > 1) { | ||
| 9670 | flags = NOHZ_KICK_MASK; | ||
| 9671 | goto unlock; | ||
| 9672 | } | ||
| 9673 | } | ||
| 9632 | unlock: | 9674 | unlock: |
| 9633 | rcu_read_unlock(); | 9675 | rcu_read_unlock(); |
| 9634 | out: | 9676 | out: |
