diff options
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/core.c | 53 | ||||
| -rw-r--r-- | kernel/sched/psi.c | 71 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 4 | ||||
| -rw-r--r-- | kernel/sched/stats.h | 8 |
4 files changed, 71 insertions, 65 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5afb868f7339..8050f266751a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2881,6 +2881,18 @@ unsigned long long nr_context_switches(void) | |||
| 2881 | } | 2881 | } |
| 2882 | 2882 | ||
| 2883 | /* | 2883 | /* |
| 2884 | * Consumers of these two interfaces, like for example the cpuidle menu | ||
| 2885 | * governor, are using nonsensical data. Preferring shallow idle state selection | ||
| 2886 | * for a CPU that has IO-wait which might not even end up running the task when | ||
| 2887 | * it does become runnable. | ||
| 2888 | */ | ||
| 2889 | |||
| 2890 | unsigned long nr_iowait_cpu(int cpu) | ||
| 2891 | { | ||
| 2892 | return atomic_read(&cpu_rq(cpu)->nr_iowait); | ||
| 2893 | } | ||
| 2894 | |||
| 2895 | /* | ||
| 2884 | * IO-wait accounting, and how its mostly bollocks (on SMP). | 2896 | * IO-wait accounting, and how its mostly bollocks (on SMP). |
| 2885 | * | 2897 | * |
| 2886 | * The idea behind IO-wait account is to account the idle time that we could | 2898 | * The idea behind IO-wait account is to account the idle time that we could |
| @@ -2915,31 +2927,11 @@ unsigned long nr_iowait(void) | |||
| 2915 | unsigned long i, sum = 0; | 2927 | unsigned long i, sum = 0; |
| 2916 | 2928 | ||
| 2917 | for_each_possible_cpu(i) | 2929 | for_each_possible_cpu(i) |
| 2918 | sum += atomic_read(&cpu_rq(i)->nr_iowait); | 2930 | sum += nr_iowait_cpu(i); |
| 2919 | 2931 | ||
| 2920 | return sum; | 2932 | return sum; |
| 2921 | } | 2933 | } |
| 2922 | 2934 | ||
| 2923 | /* | ||
| 2924 | * Consumers of these two interfaces, like for example the cpuidle menu | ||
| 2925 | * governor, are using nonsensical data. Preferring shallow idle state selection | ||
| 2926 | * for a CPU that has IO-wait which might not even end up running the task when | ||
| 2927 | * it does become runnable. | ||
| 2928 | */ | ||
| 2929 | |||
| 2930 | unsigned long nr_iowait_cpu(int cpu) | ||
| 2931 | { | ||
| 2932 | struct rq *this = cpu_rq(cpu); | ||
| 2933 | return atomic_read(&this->nr_iowait); | ||
| 2934 | } | ||
| 2935 | |||
| 2936 | void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) | ||
| 2937 | { | ||
| 2938 | struct rq *rq = this_rq(); | ||
| 2939 | *nr_waiters = atomic_read(&rq->nr_iowait); | ||
| 2940 | *load = rq->load.weight; | ||
| 2941 | } | ||
| 2942 | |||
| 2943 | #ifdef CONFIG_SMP | 2935 | #ifdef CONFIG_SMP |
| 2944 | 2936 | ||
| 2945 | /* | 2937 | /* |
| @@ -5746,15 +5738,10 @@ int sched_cpu_activate(unsigned int cpu) | |||
| 5746 | 5738 | ||
| 5747 | #ifdef CONFIG_SCHED_SMT | 5739 | #ifdef CONFIG_SCHED_SMT |
| 5748 | /* | 5740 | /* |
| 5749 | * The sched_smt_present static key needs to be evaluated on every | 5741 | * When going up, increment the number of cores with SMT present. |
| 5750 | * hotplug event because at boot time SMT might be disabled when | ||
| 5751 | * the number of booted CPUs is limited. | ||
| 5752 | * | ||
| 5753 | * If then later a sibling gets hotplugged, then the key would stay | ||
| 5754 | * off and SMT scheduling would never be functional. | ||
| 5755 | */ | 5742 | */ |
| 5756 | if (cpumask_weight(cpu_smt_mask(cpu)) > 1) | 5743 | if (cpumask_weight(cpu_smt_mask(cpu)) == 2) |
| 5757 | static_branch_enable_cpuslocked(&sched_smt_present); | 5744 | static_branch_inc_cpuslocked(&sched_smt_present); |
| 5758 | #endif | 5745 | #endif |
| 5759 | set_cpu_active(cpu, true); | 5746 | set_cpu_active(cpu, true); |
| 5760 | 5747 | ||
| @@ -5798,6 +5785,14 @@ int sched_cpu_deactivate(unsigned int cpu) | |||
| 5798 | */ | 5785 | */ |
| 5799 | synchronize_rcu_mult(call_rcu, call_rcu_sched); | 5786 | synchronize_rcu_mult(call_rcu, call_rcu_sched); |
| 5800 | 5787 | ||
| 5788 | #ifdef CONFIG_SCHED_SMT | ||
| 5789 | /* | ||
| 5790 | * When going down, decrement the number of cores with SMT present. | ||
| 5791 | */ | ||
| 5792 | if (cpumask_weight(cpu_smt_mask(cpu)) == 2) | ||
| 5793 | static_branch_dec_cpuslocked(&sched_smt_present); | ||
| 5794 | #endif | ||
| 5795 | |||
| 5801 | if (!sched_smp_initialized) | 5796 | if (!sched_smp_initialized) |
| 5802 | return 0; | 5797 | return 0; |
| 5803 | 5798 | ||
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7cdecfc010af..fe24de3fbc93 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c | |||
| @@ -136,8 +136,18 @@ | |||
| 136 | 136 | ||
| 137 | static int psi_bug __read_mostly; | 137 | static int psi_bug __read_mostly; |
| 138 | 138 | ||
| 139 | bool psi_disabled __read_mostly; | 139 | DEFINE_STATIC_KEY_FALSE(psi_disabled); |
| 140 | core_param(psi_disabled, psi_disabled, bool, 0644); | 140 | |
| 141 | #ifdef CONFIG_PSI_DEFAULT_DISABLED | ||
| 142 | bool psi_enable; | ||
| 143 | #else | ||
| 144 | bool psi_enable = true; | ||
| 145 | #endif | ||
| 146 | static int __init setup_psi(char *str) | ||
| 147 | { | ||
| 148 | return kstrtobool(str, &psi_enable) == 0; | ||
| 149 | } | ||
| 150 | __setup("psi=", setup_psi); | ||
| 141 | 151 | ||
| 142 | /* Running averages - we need to be higher-res than loadavg */ | 152 | /* Running averages - we need to be higher-res than loadavg */ |
| 143 | #define PSI_FREQ (2*HZ+1) /* 2 sec intervals */ | 153 | #define PSI_FREQ (2*HZ+1) /* 2 sec intervals */ |
| @@ -169,8 +179,10 @@ static void group_init(struct psi_group *group) | |||
| 169 | 179 | ||
| 170 | void __init psi_init(void) | 180 | void __init psi_init(void) |
| 171 | { | 181 | { |
| 172 | if (psi_disabled) | 182 | if (!psi_enable) { |
| 183 | static_branch_enable(&psi_disabled); | ||
| 173 | return; | 184 | return; |
| 185 | } | ||
| 174 | 186 | ||
| 175 | psi_period = jiffies_to_nsecs(PSI_FREQ); | 187 | psi_period = jiffies_to_nsecs(PSI_FREQ); |
| 176 | group_init(&psi_system); | 188 | group_init(&psi_system); |
| @@ -549,7 +561,7 @@ void psi_memstall_enter(unsigned long *flags) | |||
| 549 | struct rq_flags rf; | 561 | struct rq_flags rf; |
| 550 | struct rq *rq; | 562 | struct rq *rq; |
| 551 | 563 | ||
| 552 | if (psi_disabled) | 564 | if (static_branch_likely(&psi_disabled)) |
| 553 | return; | 565 | return; |
| 554 | 566 | ||
| 555 | *flags = current->flags & PF_MEMSTALL; | 567 | *flags = current->flags & PF_MEMSTALL; |
| @@ -579,7 +591,7 @@ void psi_memstall_leave(unsigned long *flags) | |||
| 579 | struct rq_flags rf; | 591 | struct rq_flags rf; |
| 580 | struct rq *rq; | 592 | struct rq *rq; |
| 581 | 593 | ||
| 582 | if (psi_disabled) | 594 | if (static_branch_likely(&psi_disabled)) |
| 583 | return; | 595 | return; |
| 584 | 596 | ||
| 585 | if (*flags) | 597 | if (*flags) |
| @@ -600,7 +612,7 @@ void psi_memstall_leave(unsigned long *flags) | |||
| 600 | #ifdef CONFIG_CGROUPS | 612 | #ifdef CONFIG_CGROUPS |
| 601 | int psi_cgroup_alloc(struct cgroup *cgroup) | 613 | int psi_cgroup_alloc(struct cgroup *cgroup) |
| 602 | { | 614 | { |
| 603 | if (psi_disabled) | 615 | if (static_branch_likely(&psi_disabled)) |
| 604 | return 0; | 616 | return 0; |
| 605 | 617 | ||
| 606 | cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu); | 618 | cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu); |
| @@ -612,7 +624,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup) | |||
| 612 | 624 | ||
| 613 | void psi_cgroup_free(struct cgroup *cgroup) | 625 | void psi_cgroup_free(struct cgroup *cgroup) |
| 614 | { | 626 | { |
| 615 | if (psi_disabled) | 627 | if (static_branch_likely(&psi_disabled)) |
| 616 | return; | 628 | return; |
| 617 | 629 | ||
| 618 | cancel_delayed_work_sync(&cgroup->psi.clock_work); | 630 | cancel_delayed_work_sync(&cgroup->psi.clock_work); |
| @@ -633,38 +645,39 @@ void psi_cgroup_free(struct cgroup *cgroup) | |||
| 633 | */ | 645 | */ |
| 634 | void cgroup_move_task(struct task_struct *task, struct css_set *to) | 646 | void cgroup_move_task(struct task_struct *task, struct css_set *to) |
| 635 | { | 647 | { |
| 636 | bool move_psi = !psi_disabled; | ||
| 637 | unsigned int task_flags = 0; | 648 | unsigned int task_flags = 0; |
| 638 | struct rq_flags rf; | 649 | struct rq_flags rf; |
| 639 | struct rq *rq; | 650 | struct rq *rq; |
| 640 | 651 | ||
| 641 | if (move_psi) { | 652 | if (static_branch_likely(&psi_disabled)) { |
| 642 | rq = task_rq_lock(task, &rf); | 653 | /* |
| 654 | * Lame to do this here, but the scheduler cannot be locked | ||
| 655 | * from the outside, so we move cgroups from inside sched/. | ||
| 656 | */ | ||
| 657 | rcu_assign_pointer(task->cgroups, to); | ||
| 658 | return; | ||
| 659 | } | ||
| 643 | 660 | ||
| 644 | if (task_on_rq_queued(task)) | 661 | rq = task_rq_lock(task, &rf); |
| 645 | task_flags = TSK_RUNNING; | ||
| 646 | else if (task->in_iowait) | ||
| 647 | task_flags = TSK_IOWAIT; | ||
| 648 | 662 | ||
| 649 | if (task->flags & PF_MEMSTALL) | 663 | if (task_on_rq_queued(task)) |
| 650 | task_flags |= TSK_MEMSTALL; | 664 | task_flags = TSK_RUNNING; |
| 665 | else if (task->in_iowait) | ||
| 666 | task_flags = TSK_IOWAIT; | ||
| 651 | 667 | ||
| 652 | if (task_flags) | 668 | if (task->flags & PF_MEMSTALL) |
| 653 | psi_task_change(task, task_flags, 0); | 669 | task_flags |= TSK_MEMSTALL; |
| 654 | } | ||
| 655 | 670 | ||
| 656 | /* | 671 | if (task_flags) |
| 657 | * Lame to do this here, but the scheduler cannot be locked | 672 | psi_task_change(task, task_flags, 0); |
| 658 | * from the outside, so we move cgroups from inside sched/. | 673 | |
| 659 | */ | 674 | /* See comment above */ |
| 660 | rcu_assign_pointer(task->cgroups, to); | 675 | rcu_assign_pointer(task->cgroups, to); |
| 661 | 676 | ||
| 662 | if (move_psi) { | 677 | if (task_flags) |
| 663 | if (task_flags) | 678 | psi_task_change(task, 0, task_flags); |
| 664 | psi_task_change(task, 0, task_flags); | ||
| 665 | 679 | ||
| 666 | task_rq_unlock(rq, task, &rf); | 680 | task_rq_unlock(rq, task, &rf); |
| 667 | } | ||
| 668 | } | 681 | } |
| 669 | #endif /* CONFIG_CGROUPS */ | 682 | #endif /* CONFIG_CGROUPS */ |
| 670 | 683 | ||
| @@ -672,7 +685,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) | |||
| 672 | { | 685 | { |
| 673 | int full; | 686 | int full; |
| 674 | 687 | ||
| 675 | if (psi_disabled) | 688 | if (static_branch_likely(&psi_disabled)) |
| 676 | return -EOPNOTSUPP; | 689 | return -EOPNOTSUPP; |
| 677 | 690 | ||
| 678 | update_stats(group); | 691 | update_stats(group); |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e0e052a50fcd..71cd8b710599 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/sched/prio.h> | 23 | #include <linux/sched/prio.h> |
| 24 | #include <linux/sched/rt.h> | 24 | #include <linux/sched/rt.h> |
| 25 | #include <linux/sched/signal.h> | 25 | #include <linux/sched/signal.h> |
| 26 | #include <linux/sched/smt.h> | ||
| 26 | #include <linux/sched/stat.h> | 27 | #include <linux/sched/stat.h> |
| 27 | #include <linux/sched/sysctl.h> | 28 | #include <linux/sched/sysctl.h> |
| 28 | #include <linux/sched/task.h> | 29 | #include <linux/sched/task.h> |
| @@ -941,9 +942,6 @@ static inline int cpu_of(struct rq *rq) | |||
| 941 | 942 | ||
| 942 | 943 | ||
| 943 | #ifdef CONFIG_SCHED_SMT | 944 | #ifdef CONFIG_SCHED_SMT |
| 944 | |||
| 945 | extern struct static_key_false sched_smt_present; | ||
| 946 | |||
| 947 | extern void __update_idle_core(struct rq *rq); | 945 | extern void __update_idle_core(struct rq *rq); |
| 948 | 946 | ||
| 949 | static inline void update_idle_core(struct rq *rq) | 947 | static inline void update_idle_core(struct rq *rq) |
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 4904c4677000..aa0de240fb41 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
| @@ -66,7 +66,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) | |||
| 66 | { | 66 | { |
| 67 | int clear = 0, set = TSK_RUNNING; | 67 | int clear = 0, set = TSK_RUNNING; |
| 68 | 68 | ||
| 69 | if (psi_disabled) | 69 | if (static_branch_likely(&psi_disabled)) |
| 70 | return; | 70 | return; |
| 71 | 71 | ||
| 72 | if (!wakeup || p->sched_psi_wake_requeue) { | 72 | if (!wakeup || p->sched_psi_wake_requeue) { |
| @@ -86,7 +86,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) | |||
| 86 | { | 86 | { |
| 87 | int clear = TSK_RUNNING, set = 0; | 87 | int clear = TSK_RUNNING, set = 0; |
| 88 | 88 | ||
| 89 | if (psi_disabled) | 89 | if (static_branch_likely(&psi_disabled)) |
| 90 | return; | 90 | return; |
| 91 | 91 | ||
| 92 | if (!sleep) { | 92 | if (!sleep) { |
| @@ -102,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) | |||
| 102 | 102 | ||
| 103 | static inline void psi_ttwu_dequeue(struct task_struct *p) | 103 | static inline void psi_ttwu_dequeue(struct task_struct *p) |
| 104 | { | 104 | { |
| 105 | if (psi_disabled) | 105 | if (static_branch_likely(&psi_disabled)) |
| 106 | return; | 106 | return; |
| 107 | /* | 107 | /* |
| 108 | * Is the task being migrated during a wakeup? Make sure to | 108 | * Is the task being migrated during a wakeup? Make sure to |
| @@ -128,7 +128,7 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) | |||
| 128 | 128 | ||
| 129 | static inline void psi_task_tick(struct rq *rq) | 129 | static inline void psi_task_tick(struct rq *rq) |
| 130 | { | 130 | { |
| 131 | if (psi_disabled) | 131 | if (static_branch_likely(&psi_disabled)) |
| 132 | return; | 132 | return; |
| 133 | 133 | ||
| 134 | if (unlikely(rq->curr->flags & PF_MEMSTALL)) | 134 | if (unlikely(rq->curr->flags & PF_MEMSTALL)) |
