aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/fair.c66
-rw-r--r--kernel/sched/psi.c43
3 files changed, 76 insertions, 38 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f12225f26b70..091e089063be 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5851,11 +5851,14 @@ void __init sched_init_smp(void)
5851 /* 5851 /*
5852 * There's no userspace yet to cause hotplug operations; hence all the 5852 * There's no userspace yet to cause hotplug operations; hence all the
5853 * CPU masks are stable and all blatant races in the below code cannot 5853 * CPU masks are stable and all blatant races in the below code cannot
5854 * happen. 5854 * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
5855 * but there won't be any contention on it.
5855 */ 5856 */
5857 cpus_read_lock();
5856 mutex_lock(&sched_domains_mutex); 5858 mutex_lock(&sched_domains_mutex);
5857 sched_init_domains(cpu_active_mask); 5859 sched_init_domains(cpu_active_mask);
5858 mutex_unlock(&sched_domains_mutex); 5860 mutex_unlock(&sched_domains_mutex);
5861 cpus_read_unlock();
5859 5862
5860 /* Move init over to a non-isolated CPU */ 5863 /* Move init over to a non-isolated CPU */
5861 if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) 5864 if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ee271bb661cc..ac855b2f4774 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2400,8 +2400,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
2400 local = 1; 2400 local = 1;
2401 2401
2402 /* 2402 /*
2403 * Retry task to preferred node migration periodically, in case it 2403 * Retry to migrate task to preferred node periodically, in case it
2404 * case it previously failed, or the scheduler moved us. 2404 * previously failed, or the scheduler moved us.
2405 */ 2405 */
2406 if (time_after(jiffies, p->numa_migrate_retry)) { 2406 if (time_after(jiffies, p->numa_migrate_retry)) {
2407 task_numa_placement(p); 2407 task_numa_placement(p);
@@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
5674 return target; 5674 return target;
5675} 5675}
5676 5676
5677static unsigned long cpu_util_wake(int cpu, struct task_struct *p); 5677static unsigned long cpu_util_without(int cpu, struct task_struct *p);
5678 5678
5679static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) 5679static unsigned long capacity_spare_without(int cpu, struct task_struct *p)
5680{ 5680{
5681 return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); 5681 return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0);
5682} 5682}
5683 5683
5684/* 5684/*
@@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
5738 5738
5739 avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); 5739 avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
5740 5740
5741 spare_cap = capacity_spare_wake(i, p); 5741 spare_cap = capacity_spare_without(i, p);
5742 5742
5743 if (spare_cap > max_spare_cap) 5743 if (spare_cap > max_spare_cap)
5744 max_spare_cap = spare_cap; 5744 max_spare_cap = spare_cap;
@@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
5889 return prev_cpu; 5889 return prev_cpu;
5890 5890
5891 /* 5891 /*
5892 * We need task's util for capacity_spare_wake, sync it up to prev_cpu's 5892 * We need task's util for capacity_spare_without, sync it up to
5893 * last_update_time. 5893 * prev_cpu's last_update_time.
5894 */ 5894 */
5895 if (!(sd_flag & SD_BALANCE_FORK)) 5895 if (!(sd_flag & SD_BALANCE_FORK))
5896 sync_entity_load_avg(&p->se); 5896 sync_entity_load_avg(&p->se);
@@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu)
6216} 6216}
6217 6217
6218/* 6218/*
6219 * cpu_util_wake: Compute CPU utilization with any contributions from 6219 * cpu_util_without: compute cpu utilization without any contributions from *p
6220 * the waking task p removed. 6220 * @cpu: the CPU which utilization is requested
6221 * @p: the task which utilization should be discounted
6222 *
6223 * The utilization of a CPU is defined by the utilization of tasks currently
6224 * enqueued on that CPU as well as tasks which are currently sleeping after an
6225 * execution on that CPU.
6226 *
6227 * This method returns the utilization of the specified CPU by discounting the
6228 * utilization of the specified task, whenever the task is currently
6229 * contributing to the CPU utilization.
6221 */ 6230 */
6222static unsigned long cpu_util_wake(int cpu, struct task_struct *p) 6231static unsigned long cpu_util_without(int cpu, struct task_struct *p)
6223{ 6232{
6224 struct cfs_rq *cfs_rq; 6233 struct cfs_rq *cfs_rq;
6225 unsigned int util; 6234 unsigned int util;
@@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
6231 cfs_rq = &cpu_rq(cpu)->cfs; 6240 cfs_rq = &cpu_rq(cpu)->cfs;
6232 util = READ_ONCE(cfs_rq->avg.util_avg); 6241 util = READ_ONCE(cfs_rq->avg.util_avg);
6233 6242
6234 /* Discount task's blocked util from CPU's util */ 6243 /* Discount task's util from CPU's util */
6235 util -= min_t(unsigned int, util, task_util(p)); 6244 util -= min_t(unsigned int, util, task_util(p));
6236 6245
6237 /* 6246 /*
@@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
6240 * a) if *p is the only task sleeping on this CPU, then: 6249 * a) if *p is the only task sleeping on this CPU, then:
6241 * cpu_util (== task_util) > util_est (== 0) 6250 * cpu_util (== task_util) > util_est (== 0)
6242 * and thus we return: 6251 * and thus we return:
6243 * cpu_util_wake = (cpu_util - task_util) = 0 6252 * cpu_util_without = (cpu_util - task_util) = 0
6244 * 6253 *
6245 * b) if other tasks are SLEEPING on this CPU, which is now exiting 6254 * b) if other tasks are SLEEPING on this CPU, which is now exiting
6246 * IDLE, then: 6255 * IDLE, then:
6247 * cpu_util >= task_util 6256 * cpu_util >= task_util
6248 * cpu_util > util_est (== 0) 6257 * cpu_util > util_est (== 0)
6249 * and thus we discount *p's blocked utilization to return: 6258 * and thus we discount *p's blocked utilization to return:
6250 * cpu_util_wake = (cpu_util - task_util) >= 0 6259 * cpu_util_without = (cpu_util - task_util) >= 0
6251 * 6260 *
6252 * c) if other tasks are RUNNABLE on that CPU and 6261 * c) if other tasks are RUNNABLE on that CPU and
6253 * util_est > cpu_util 6262 * util_est > cpu_util
@@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
6260 * covered by the following code when estimated utilization is 6269 * covered by the following code when estimated utilization is
6261 * enabled. 6270 * enabled.
6262 */ 6271 */
6263 if (sched_feat(UTIL_EST)) 6272 if (sched_feat(UTIL_EST)) {
6264 util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); 6273 unsigned int estimated =
6274 READ_ONCE(cfs_rq->avg.util_est.enqueued);
6275
6276 /*
6277 * Despite the following checks we still have a small window
6278 * for a possible race, when an execl's select_task_rq_fair()
6279 * races with LB's detach_task():
6280 *
6281 * detach_task()
6282 * p->on_rq = TASK_ON_RQ_MIGRATING;
6283 * ---------------------------------- A
6284 * deactivate_task() \
6285 * dequeue_task() + RaceTime
6286 * util_est_dequeue() /
6287 * ---------------------------------- B
6288 *
6289 * The additional check on "current == p" it's required to
6290 * properly fix the execl regression and it helps in further
6291 * reducing the chances for the above race.
6292 */
6293 if (unlikely(task_on_rq_queued(p) || current == p)) {
6294 estimated -= min_t(unsigned int, estimated,
6295 (_task_util_est(p) | UTIL_AVG_UNCHANGED));
6296 }
6297 util = max(util, estimated);
6298 }
6265 6299
6266 /* 6300 /*
6267 * Utilization (estimated) can exceed the CPU capacity, thus let's 6301 * Utilization (estimated) can exceed the CPU capacity, thus let's
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 7cdecfc010af..3d7355d7c3e3 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -633,38 +633,39 @@ void psi_cgroup_free(struct cgroup *cgroup)
633 */ 633 */
634void cgroup_move_task(struct task_struct *task, struct css_set *to) 634void cgroup_move_task(struct task_struct *task, struct css_set *to)
635{ 635{
636 bool move_psi = !psi_disabled;
637 unsigned int task_flags = 0; 636 unsigned int task_flags = 0;
638 struct rq_flags rf; 637 struct rq_flags rf;
639 struct rq *rq; 638 struct rq *rq;
640 639
641 if (move_psi) { 640 if (psi_disabled) {
642 rq = task_rq_lock(task, &rf); 641 /*
642 * Lame to do this here, but the scheduler cannot be locked
643 * from the outside, so we move cgroups from inside sched/.
644 */
645 rcu_assign_pointer(task->cgroups, to);
646 return;
647 }
643 648
644 if (task_on_rq_queued(task)) 649 rq = task_rq_lock(task, &rf);
645 task_flags = TSK_RUNNING;
646 else if (task->in_iowait)
647 task_flags = TSK_IOWAIT;
648 650
649 if (task->flags & PF_MEMSTALL) 651 if (task_on_rq_queued(task))
650 task_flags |= TSK_MEMSTALL; 652 task_flags = TSK_RUNNING;
653 else if (task->in_iowait)
654 task_flags = TSK_IOWAIT;
651 655
652 if (task_flags) 656 if (task->flags & PF_MEMSTALL)
653 psi_task_change(task, task_flags, 0); 657 task_flags |= TSK_MEMSTALL;
654 }
655 658
656 /* 659 if (task_flags)
657 * Lame to do this here, but the scheduler cannot be locked 660 psi_task_change(task, task_flags, 0);
658 * from the outside, so we move cgroups from inside sched/. 661
659 */ 662 /* See comment above */
660 rcu_assign_pointer(task->cgroups, to); 663 rcu_assign_pointer(task->cgroups, to);
661 664
662 if (move_psi) { 665 if (task_flags)
663 if (task_flags) 666 psi_task_change(task, 0, task_flags);
664 psi_task_change(task, 0, task_flags);
665 667
666 task_rq_unlock(rq, task, &rf); 668 task_rq_unlock(rq, task, &rf);
667 }
668} 669}
669#endif /* CONFIG_CGROUPS */ 670#endif /* CONFIG_CGROUPS */
670 671