aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c53
-rw-r--r--kernel/sched/psi.c71
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/sched/stats.h8
4 files changed, 71 insertions, 65 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5afb868f7339..8050f266751a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2881,6 +2881,18 @@ unsigned long long nr_context_switches(void)
2881} 2881}
2882 2882
2883/* 2883/*
2884 * Consumers of these two interfaces, like for example the cpuidle menu
2885 * governor, are using nonsensical data. Preferring shallow idle state selection
2886 * for a CPU that has IO-wait which might not even end up running the task when
2887 * it does become runnable.
2888 */
2889
2890unsigned long nr_iowait_cpu(int cpu)
2891{
2892 return atomic_read(&cpu_rq(cpu)->nr_iowait);
2893}
2894
2895/*
2884 * IO-wait accounting, and how its mostly bollocks (on SMP). 2896 * IO-wait accounting, and how its mostly bollocks (on SMP).
2885 * 2897 *
2886 * The idea behind IO-wait account is to account the idle time that we could 2898 * The idea behind IO-wait account is to account the idle time that we could
@@ -2915,31 +2927,11 @@ unsigned long nr_iowait(void)
2915 unsigned long i, sum = 0; 2927 unsigned long i, sum = 0;
2916 2928
2917 for_each_possible_cpu(i) 2929 for_each_possible_cpu(i)
2918 sum += atomic_read(&cpu_rq(i)->nr_iowait); 2930 sum += nr_iowait_cpu(i);
2919 2931
2920 return sum; 2932 return sum;
2921} 2933}
2922 2934
2923/*
2924 * Consumers of these two interfaces, like for example the cpuidle menu
2925 * governor, are using nonsensical data. Preferring shallow idle state selection
2926 * for a CPU that has IO-wait which might not even end up running the task when
2927 * it does become runnable.
2928 */
2929
2930unsigned long nr_iowait_cpu(int cpu)
2931{
2932 struct rq *this = cpu_rq(cpu);
2933 return atomic_read(&this->nr_iowait);
2934}
2935
2936void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
2937{
2938 struct rq *rq = this_rq();
2939 *nr_waiters = atomic_read(&rq->nr_iowait);
2940 *load = rq->load.weight;
2941}
2942
2943#ifdef CONFIG_SMP 2935#ifdef CONFIG_SMP
2944 2936
2945/* 2937/*
@@ -5746,15 +5738,10 @@ int sched_cpu_activate(unsigned int cpu)
5746 5738
5747#ifdef CONFIG_SCHED_SMT 5739#ifdef CONFIG_SCHED_SMT
5748 /* 5740 /*
5749 * The sched_smt_present static key needs to be evaluated on every 5741 * When going up, increment the number of cores with SMT present.
5750 * hotplug event because at boot time SMT might be disabled when
5751 * the number of booted CPUs is limited.
5752 *
5753 * If then later a sibling gets hotplugged, then the key would stay
5754 * off and SMT scheduling would never be functional.
5755 */ 5742 */
5756 if (cpumask_weight(cpu_smt_mask(cpu)) > 1) 5743 if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
5757 static_branch_enable_cpuslocked(&sched_smt_present); 5744 static_branch_inc_cpuslocked(&sched_smt_present);
5758#endif 5745#endif
5759 set_cpu_active(cpu, true); 5746 set_cpu_active(cpu, true);
5760 5747
@@ -5798,6 +5785,14 @@ int sched_cpu_deactivate(unsigned int cpu)
5798 */ 5785 */
5799 synchronize_rcu_mult(call_rcu, call_rcu_sched); 5786 synchronize_rcu_mult(call_rcu, call_rcu_sched);
5800 5787
5788#ifdef CONFIG_SCHED_SMT
5789 /*
5790 * When going down, decrement the number of cores with SMT present.
5791 */
5792 if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
5793 static_branch_dec_cpuslocked(&sched_smt_present);
5794#endif
5795
5801 if (!sched_smp_initialized) 5796 if (!sched_smp_initialized)
5802 return 0; 5797 return 0;
5803 5798
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 7cdecfc010af..fe24de3fbc93 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -136,8 +136,18 @@
136 136
137static int psi_bug __read_mostly; 137static int psi_bug __read_mostly;
138 138
139bool psi_disabled __read_mostly; 139DEFINE_STATIC_KEY_FALSE(psi_disabled);
140core_param(psi_disabled, psi_disabled, bool, 0644); 140
141#ifdef CONFIG_PSI_DEFAULT_DISABLED
142bool psi_enable;
143#else
144bool psi_enable = true;
145#endif
146static int __init setup_psi(char *str)
147{
148 return kstrtobool(str, &psi_enable) == 0;
149}
150__setup("psi=", setup_psi);
141 151
142/* Running averages - we need to be higher-res than loadavg */ 152/* Running averages - we need to be higher-res than loadavg */
143#define PSI_FREQ (2*HZ+1) /* 2 sec intervals */ 153#define PSI_FREQ (2*HZ+1) /* 2 sec intervals */
@@ -169,8 +179,10 @@ static void group_init(struct psi_group *group)
169 179
170void __init psi_init(void) 180void __init psi_init(void)
171{ 181{
172 if (psi_disabled) 182 if (!psi_enable) {
183 static_branch_enable(&psi_disabled);
173 return; 184 return;
185 }
174 186
175 psi_period = jiffies_to_nsecs(PSI_FREQ); 187 psi_period = jiffies_to_nsecs(PSI_FREQ);
176 group_init(&psi_system); 188 group_init(&psi_system);
@@ -549,7 +561,7 @@ void psi_memstall_enter(unsigned long *flags)
549 struct rq_flags rf; 561 struct rq_flags rf;
550 struct rq *rq; 562 struct rq *rq;
551 563
552 if (psi_disabled) 564 if (static_branch_likely(&psi_disabled))
553 return; 565 return;
554 566
555 *flags = current->flags & PF_MEMSTALL; 567 *flags = current->flags & PF_MEMSTALL;
@@ -579,7 +591,7 @@ void psi_memstall_leave(unsigned long *flags)
579 struct rq_flags rf; 591 struct rq_flags rf;
580 struct rq *rq; 592 struct rq *rq;
581 593
582 if (psi_disabled) 594 if (static_branch_likely(&psi_disabled))
583 return; 595 return;
584 596
585 if (*flags) 597 if (*flags)
@@ -600,7 +612,7 @@ void psi_memstall_leave(unsigned long *flags)
600#ifdef CONFIG_CGROUPS 612#ifdef CONFIG_CGROUPS
601int psi_cgroup_alloc(struct cgroup *cgroup) 613int psi_cgroup_alloc(struct cgroup *cgroup)
602{ 614{
603 if (psi_disabled) 615 if (static_branch_likely(&psi_disabled))
604 return 0; 616 return 0;
605 617
606 cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu); 618 cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
@@ -612,7 +624,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
612 624
613void psi_cgroup_free(struct cgroup *cgroup) 625void psi_cgroup_free(struct cgroup *cgroup)
614{ 626{
615 if (psi_disabled) 627 if (static_branch_likely(&psi_disabled))
616 return; 628 return;
617 629
618 cancel_delayed_work_sync(&cgroup->psi.clock_work); 630 cancel_delayed_work_sync(&cgroup->psi.clock_work);
@@ -633,38 +645,39 @@ void psi_cgroup_free(struct cgroup *cgroup)
633 */ 645 */
634void cgroup_move_task(struct task_struct *task, struct css_set *to) 646void cgroup_move_task(struct task_struct *task, struct css_set *to)
635{ 647{
636 bool move_psi = !psi_disabled;
637 unsigned int task_flags = 0; 648 unsigned int task_flags = 0;
638 struct rq_flags rf; 649 struct rq_flags rf;
639 struct rq *rq; 650 struct rq *rq;
640 651
641 if (move_psi) { 652 if (static_branch_likely(&psi_disabled)) {
642 rq = task_rq_lock(task, &rf); 653 /*
654 * Lame to do this here, but the scheduler cannot be locked
655 * from the outside, so we move cgroups from inside sched/.
656 */
657 rcu_assign_pointer(task->cgroups, to);
658 return;
659 }
643 660
644 if (task_on_rq_queued(task)) 661 rq = task_rq_lock(task, &rf);
645 task_flags = TSK_RUNNING;
646 else if (task->in_iowait)
647 task_flags = TSK_IOWAIT;
648 662
649 if (task->flags & PF_MEMSTALL) 663 if (task_on_rq_queued(task))
650 task_flags |= TSK_MEMSTALL; 664 task_flags = TSK_RUNNING;
665 else if (task->in_iowait)
666 task_flags = TSK_IOWAIT;
651 667
652 if (task_flags) 668 if (task->flags & PF_MEMSTALL)
653 psi_task_change(task, task_flags, 0); 669 task_flags |= TSK_MEMSTALL;
654 }
655 670
656 /* 671 if (task_flags)
657 * Lame to do this here, but the scheduler cannot be locked 672 psi_task_change(task, task_flags, 0);
658 * from the outside, so we move cgroups from inside sched/. 673
659 */ 674 /* See comment above */
660 rcu_assign_pointer(task->cgroups, to); 675 rcu_assign_pointer(task->cgroups, to);
661 676
662 if (move_psi) { 677 if (task_flags)
663 if (task_flags) 678 psi_task_change(task, 0, task_flags);
664 psi_task_change(task, 0, task_flags);
665 679
666 task_rq_unlock(rq, task, &rf); 680 task_rq_unlock(rq, task, &rf);
667 }
668} 681}
669#endif /* CONFIG_CGROUPS */ 682#endif /* CONFIG_CGROUPS */
670 683
@@ -672,7 +685,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
672{ 685{
673 int full; 686 int full;
674 687
675 if (psi_disabled) 688 if (static_branch_likely(&psi_disabled))
676 return -EOPNOTSUPP; 689 return -EOPNOTSUPP;
677 690
678 update_stats(group); 691 update_stats(group);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e0e052a50fcd..71cd8b710599 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -23,6 +23,7 @@
23#include <linux/sched/prio.h> 23#include <linux/sched/prio.h>
24#include <linux/sched/rt.h> 24#include <linux/sched/rt.h>
25#include <linux/sched/signal.h> 25#include <linux/sched/signal.h>
26#include <linux/sched/smt.h>
26#include <linux/sched/stat.h> 27#include <linux/sched/stat.h>
27#include <linux/sched/sysctl.h> 28#include <linux/sched/sysctl.h>
28#include <linux/sched/task.h> 29#include <linux/sched/task.h>
@@ -941,9 +942,6 @@ static inline int cpu_of(struct rq *rq)
941 942
942 943
943#ifdef CONFIG_SCHED_SMT 944#ifdef CONFIG_SCHED_SMT
944
945extern struct static_key_false sched_smt_present;
946
947extern void __update_idle_core(struct rq *rq); 945extern void __update_idle_core(struct rq *rq);
948 946
949static inline void update_idle_core(struct rq *rq) 947static inline void update_idle_core(struct rq *rq)
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 4904c4677000..aa0de240fb41 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -66,7 +66,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
66{ 66{
67 int clear = 0, set = TSK_RUNNING; 67 int clear = 0, set = TSK_RUNNING;
68 68
69 if (psi_disabled) 69 if (static_branch_likely(&psi_disabled))
70 return; 70 return;
71 71
72 if (!wakeup || p->sched_psi_wake_requeue) { 72 if (!wakeup || p->sched_psi_wake_requeue) {
@@ -86,7 +86,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
86{ 86{
87 int clear = TSK_RUNNING, set = 0; 87 int clear = TSK_RUNNING, set = 0;
88 88
89 if (psi_disabled) 89 if (static_branch_likely(&psi_disabled))
90 return; 90 return;
91 91
92 if (!sleep) { 92 if (!sleep) {
@@ -102,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
102 102
103static inline void psi_ttwu_dequeue(struct task_struct *p) 103static inline void psi_ttwu_dequeue(struct task_struct *p)
104{ 104{
105 if (psi_disabled) 105 if (static_branch_likely(&psi_disabled))
106 return; 106 return;
107 /* 107 /*
108 * Is the task being migrated during a wakeup? Make sure to 108 * Is the task being migrated during a wakeup? Make sure to
@@ -128,7 +128,7 @@ static inline void psi_ttwu_dequeue(struct task_struct *p)
128 128
129static inline void psi_task_tick(struct rq *rq) 129static inline void psi_task_tick(struct rq *rq)
130{ 130{
131 if (psi_disabled) 131 if (static_branch_likely(&psi_disabled))
132 return; 132 return;
133 133
134 if (unlikely(rq->curr->flags & PF_MEMSTALL)) 134 if (unlikely(rq->curr->flags & PF_MEMSTALL))