diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 5 | ||||
-rw-r--r-- | kernel/sched/fair.c | 66 | ||||
-rw-r--r-- | kernel/sched/psi.c | 43 |
3 files changed, 76 insertions, 38 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f12225f26b70..091e089063be 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5851,11 +5851,14 @@ void __init sched_init_smp(void) | |||
5851 | /* | 5851 | /* |
5852 | * There's no userspace yet to cause hotplug operations; hence all the | 5852 | * There's no userspace yet to cause hotplug operations; hence all the |
5853 | * CPU masks are stable and all blatant races in the below code cannot | 5853 | * CPU masks are stable and all blatant races in the below code cannot |
5854 | * happen. | 5854 | * happen. The hotplug lock is nevertheless taken to satisfy lockdep, |
5855 | * but there won't be any contention on it. | ||
5855 | */ | 5856 | */ |
5857 | cpus_read_lock(); | ||
5856 | mutex_lock(&sched_domains_mutex); | 5858 | mutex_lock(&sched_domains_mutex); |
5857 | sched_init_domains(cpu_active_mask); | 5859 | sched_init_domains(cpu_active_mask); |
5858 | mutex_unlock(&sched_domains_mutex); | 5860 | mutex_unlock(&sched_domains_mutex); |
5861 | cpus_read_unlock(); | ||
5859 | 5862 | ||
5860 | /* Move init over to a non-isolated CPU */ | 5863 | /* Move init over to a non-isolated CPU */ |
5861 | if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) | 5864 | if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ee271bb661cc..ac855b2f4774 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -2400,8 +2400,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
2400 | local = 1; | 2400 | local = 1; |
2401 | 2401 | ||
2402 | /* | 2402 | /* |
2403 | * Retry task to preferred node migration periodically, in case it | 2403 | * Retry to migrate task to preferred node periodically, in case it |
2404 | * case it previously failed, or the scheduler moved us. | 2404 | * previously failed, or the scheduler moved us. |
2405 | */ | 2405 | */ |
2406 | if (time_after(jiffies, p->numa_migrate_retry)) { | 2406 | if (time_after(jiffies, p->numa_migrate_retry)) { |
2407 | task_numa_placement(p); | 2407 | task_numa_placement(p); |
@@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, | |||
5674 | return target; | 5674 | return target; |
5675 | } | 5675 | } |
5676 | 5676 | ||
5677 | static unsigned long cpu_util_wake(int cpu, struct task_struct *p); | 5677 | static unsigned long cpu_util_without(int cpu, struct task_struct *p); |
5678 | 5678 | ||
5679 | static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) | 5679 | static unsigned long capacity_spare_without(int cpu, struct task_struct *p) |
5680 | { | 5680 | { |
5681 | return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); | 5681 | return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0); |
5682 | } | 5682 | } |
5683 | 5683 | ||
5684 | /* | 5684 | /* |
@@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
5738 | 5738 | ||
5739 | avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); | 5739 | avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); |
5740 | 5740 | ||
5741 | spare_cap = capacity_spare_wake(i, p); | 5741 | spare_cap = capacity_spare_without(i, p); |
5742 | 5742 | ||
5743 | if (spare_cap > max_spare_cap) | 5743 | if (spare_cap > max_spare_cap) |
5744 | max_spare_cap = spare_cap; | 5744 | max_spare_cap = spare_cap; |
@@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p | |||
5889 | return prev_cpu; | 5889 | return prev_cpu; |
5890 | 5890 | ||
5891 | /* | 5891 | /* |
5892 | * We need task's util for capacity_spare_wake, sync it up to prev_cpu's | 5892 | * We need task's util for capacity_spare_without, sync it up to |
5893 | * last_update_time. | 5893 | * prev_cpu's last_update_time. |
5894 | */ | 5894 | */ |
5895 | if (!(sd_flag & SD_BALANCE_FORK)) | 5895 | if (!(sd_flag & SD_BALANCE_FORK)) |
5896 | sync_entity_load_avg(&p->se); | 5896 | sync_entity_load_avg(&p->se); |
@@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu) | |||
6216 | } | 6216 | } |
6217 | 6217 | ||
6218 | /* | 6218 | /* |
6219 | * cpu_util_wake: Compute CPU utilization with any contributions from | 6219 | * cpu_util_without: compute cpu utilization without any contributions from *p |
6220 | * the waking task p removed. | 6220 | * @cpu: the CPU which utilization is requested |
6221 | * @p: the task which utilization should be discounted | ||
6222 | * | ||
6223 | * The utilization of a CPU is defined by the utilization of tasks currently | ||
6224 | * enqueued on that CPU as well as tasks which are currently sleeping after an | ||
6225 | * execution on that CPU. | ||
6226 | * | ||
6227 | * This method returns the utilization of the specified CPU by discounting the | ||
6228 | * utilization of the specified task, whenever the task is currently | ||
6229 | * contributing to the CPU utilization. | ||
6221 | */ | 6230 | */ |
6222 | static unsigned long cpu_util_wake(int cpu, struct task_struct *p) | 6231 | static unsigned long cpu_util_without(int cpu, struct task_struct *p) |
6223 | { | 6232 | { |
6224 | struct cfs_rq *cfs_rq; | 6233 | struct cfs_rq *cfs_rq; |
6225 | unsigned int util; | 6234 | unsigned int util; |
@@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) | |||
6231 | cfs_rq = &cpu_rq(cpu)->cfs; | 6240 | cfs_rq = &cpu_rq(cpu)->cfs; |
6232 | util = READ_ONCE(cfs_rq->avg.util_avg); | 6241 | util = READ_ONCE(cfs_rq->avg.util_avg); |
6233 | 6242 | ||
6234 | /* Discount task's blocked util from CPU's util */ | 6243 | /* Discount task's util from CPU's util */ |
6235 | util -= min_t(unsigned int, util, task_util(p)); | 6244 | util -= min_t(unsigned int, util, task_util(p)); |
6236 | 6245 | ||
6237 | /* | 6246 | /* |
@@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) | |||
6240 | * a) if *p is the only task sleeping on this CPU, then: | 6249 | * a) if *p is the only task sleeping on this CPU, then: |
6241 | * cpu_util (== task_util) > util_est (== 0) | 6250 | * cpu_util (== task_util) > util_est (== 0) |
6242 | * and thus we return: | 6251 | * and thus we return: |
6243 | * cpu_util_wake = (cpu_util - task_util) = 0 | 6252 | * cpu_util_without = (cpu_util - task_util) = 0 |
6244 | * | 6253 | * |
6245 | * b) if other tasks are SLEEPING on this CPU, which is now exiting | 6254 | * b) if other tasks are SLEEPING on this CPU, which is now exiting |
6246 | * IDLE, then: | 6255 | * IDLE, then: |
6247 | * cpu_util >= task_util | 6256 | * cpu_util >= task_util |
6248 | * cpu_util > util_est (== 0) | 6257 | * cpu_util > util_est (== 0) |
6249 | * and thus we discount *p's blocked utilization to return: | 6258 | * and thus we discount *p's blocked utilization to return: |
6250 | * cpu_util_wake = (cpu_util - task_util) >= 0 | 6259 | * cpu_util_without = (cpu_util - task_util) >= 0 |
6251 | * | 6260 | * |
6252 | * c) if other tasks are RUNNABLE on that CPU and | 6261 | * c) if other tasks are RUNNABLE on that CPU and |
6253 | * util_est > cpu_util | 6262 | * util_est > cpu_util |
@@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) | |||
6260 | * covered by the following code when estimated utilization is | 6269 | * covered by the following code when estimated utilization is |
6261 | * enabled. | 6270 | * enabled. |
6262 | */ | 6271 | */ |
6263 | if (sched_feat(UTIL_EST)) | 6272 | if (sched_feat(UTIL_EST)) { |
6264 | util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); | 6273 | unsigned int estimated = |
6274 | READ_ONCE(cfs_rq->avg.util_est.enqueued); | ||
6275 | |||
6276 | /* | ||
6277 | * Despite the following checks we still have a small window | ||
6278 | * for a possible race, when an execl's select_task_rq_fair() | ||
6279 | * races with LB's detach_task(): | ||
6280 | * | ||
6281 | * detach_task() | ||
6282 | * p->on_rq = TASK_ON_RQ_MIGRATING; | ||
6283 | * ---------------------------------- A | ||
6284 | * deactivate_task() \ | ||
6285 | * dequeue_task() + RaceTime | ||
6286 | * util_est_dequeue() / | ||
6287 | * ---------------------------------- B | ||
6288 | * | ||
6289 | * The additional check on "current == p" it's required to | ||
6290 | * properly fix the execl regression and it helps in further | ||
6291 | * reducing the chances for the above race. | ||
6292 | */ | ||
6293 | if (unlikely(task_on_rq_queued(p) || current == p)) { | ||
6294 | estimated -= min_t(unsigned int, estimated, | ||
6295 | (_task_util_est(p) | UTIL_AVG_UNCHANGED)); | ||
6296 | } | ||
6297 | util = max(util, estimated); | ||
6298 | } | ||
6265 | 6299 | ||
6266 | /* | 6300 | /* |
6267 | * Utilization (estimated) can exceed the CPU capacity, thus let's | 6301 | * Utilization (estimated) can exceed the CPU capacity, thus let's |
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7cdecfc010af..3d7355d7c3e3 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c | |||
@@ -633,38 +633,39 @@ void psi_cgroup_free(struct cgroup *cgroup) | |||
633 | */ | 633 | */ |
634 | void cgroup_move_task(struct task_struct *task, struct css_set *to) | 634 | void cgroup_move_task(struct task_struct *task, struct css_set *to) |
635 | { | 635 | { |
636 | bool move_psi = !psi_disabled; | ||
637 | unsigned int task_flags = 0; | 636 | unsigned int task_flags = 0; |
638 | struct rq_flags rf; | 637 | struct rq_flags rf; |
639 | struct rq *rq; | 638 | struct rq *rq; |
640 | 639 | ||
641 | if (move_psi) { | 640 | if (psi_disabled) { |
642 | rq = task_rq_lock(task, &rf); | 641 | /* |
642 | * Lame to do this here, but the scheduler cannot be locked | ||
643 | * from the outside, so we move cgroups from inside sched/. | ||
644 | */ | ||
645 | rcu_assign_pointer(task->cgroups, to); | ||
646 | return; | ||
647 | } | ||
643 | 648 | ||
644 | if (task_on_rq_queued(task)) | 649 | rq = task_rq_lock(task, &rf); |
645 | task_flags = TSK_RUNNING; | ||
646 | else if (task->in_iowait) | ||
647 | task_flags = TSK_IOWAIT; | ||
648 | 650 | ||
649 | if (task->flags & PF_MEMSTALL) | 651 | if (task_on_rq_queued(task)) |
650 | task_flags |= TSK_MEMSTALL; | 652 | task_flags = TSK_RUNNING; |
653 | else if (task->in_iowait) | ||
654 | task_flags = TSK_IOWAIT; | ||
651 | 655 | ||
652 | if (task_flags) | 656 | if (task->flags & PF_MEMSTALL) |
653 | psi_task_change(task, task_flags, 0); | 657 | task_flags |= TSK_MEMSTALL; |
654 | } | ||
655 | 658 | ||
656 | /* | 659 | if (task_flags) |
657 | * Lame to do this here, but the scheduler cannot be locked | 660 | psi_task_change(task, task_flags, 0); |
658 | * from the outside, so we move cgroups from inside sched/. | 661 | |
659 | */ | 662 | /* See comment above */ |
660 | rcu_assign_pointer(task->cgroups, to); | 663 | rcu_assign_pointer(task->cgroups, to); |
661 | 664 | ||
662 | if (move_psi) { | 665 | if (task_flags) |
663 | if (task_flags) | 666 | psi_task_change(task, 0, task_flags); |
664 | psi_task_change(task, 0, task_flags); | ||
665 | 667 | ||
666 | task_rq_unlock(rq, task, &rf); | 668 | task_rq_unlock(rq, task, &rf); |
667 | } | ||
668 | } | 669 | } |
669 | #endif /* CONFIG_CGROUPS */ | 670 | #endif /* CONFIG_CGROUPS */ |
670 | 671 | ||