summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/latencytop.c14
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/sched/core.c70
-rw-r--r--kernel/sched/debug.c102
-rw-r--r--kernel/sched/fair.c113
-rw-r--r--kernel/sched/sched.h1
-rw-r--r--kernel/sched/stats.h8
-rw-r--r--kernel/sysctl.c13
8 files changed, 232 insertions, 90 deletions
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index a02812743a7e..b5c30d9f46c5 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -47,12 +47,12 @@
47 * of times) 47 * of times)
48 */ 48 */
49 49
50#include <linux/latencytop.h>
51#include <linux/kallsyms.h> 50#include <linux/kallsyms.h>
52#include <linux/seq_file.h> 51#include <linux/seq_file.h>
53#include <linux/notifier.h> 52#include <linux/notifier.h>
54#include <linux/spinlock.h> 53#include <linux/spinlock.h>
55#include <linux/proc_fs.h> 54#include <linux/proc_fs.h>
55#include <linux/latencytop.h>
56#include <linux/export.h> 56#include <linux/export.h>
57#include <linux/sched.h> 57#include <linux/sched.h>
58#include <linux/list.h> 58#include <linux/list.h>
@@ -289,4 +289,16 @@ static int __init init_lstats_procfs(void)
289 proc_create("latency_stats", 0644, NULL, &lstats_fops); 289 proc_create("latency_stats", 0644, NULL, &lstats_fops);
290 return 0; 290 return 0;
291} 291}
292
293int sysctl_latencytop(struct ctl_table *table, int write,
294 void __user *buffer, size_t *lenp, loff_t *ppos)
295{
296 int err;
297
298 err = proc_dointvec(table, write, buffer, lenp, ppos);
299 if (latencytop_enabled)
300 force_schedstat_enabled();
301
302 return err;
303}
292device_initcall(init_lstats_procfs); 304device_initcall(init_lstats_procfs);
diff --git a/kernel/profile.c b/kernel/profile.c
index 99513e1160e5..51369697466e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -59,6 +59,7 @@ int profile_setup(char *str)
59 59
60 if (!strncmp(str, sleepstr, strlen(sleepstr))) { 60 if (!strncmp(str, sleepstr, strlen(sleepstr))) {
61#ifdef CONFIG_SCHEDSTATS 61#ifdef CONFIG_SCHEDSTATS
62 force_schedstat_enabled();
62 prof_on = SLEEP_PROFILING; 63 prof_on = SLEEP_PROFILING;
63 if (str[strlen(sleepstr)] == ',') 64 if (str[strlen(sleepstr)] == ',')
64 str += strlen(sleepstr) + 1; 65 str += strlen(sleepstr) + 1;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 24fcdbf28b18..7e548bde67ee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2093,7 +2093,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2093 2093
2094 ttwu_queue(p, cpu); 2094 ttwu_queue(p, cpu);
2095stat: 2095stat:
2096 ttwu_stat(p, cpu, wake_flags); 2096 if (schedstat_enabled())
2097 ttwu_stat(p, cpu, wake_flags);
2097out: 2098out:
2098 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 2099 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2099 2100
@@ -2141,7 +2142,8 @@ static void try_to_wake_up_local(struct task_struct *p)
2141 ttwu_activate(rq, p, ENQUEUE_WAKEUP); 2142 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2142 2143
2143 ttwu_do_wakeup(rq, p, 0); 2144 ttwu_do_wakeup(rq, p, 0);
2144 ttwu_stat(p, smp_processor_id(), 0); 2145 if (schedstat_enabled())
2146 ttwu_stat(p, smp_processor_id(), 0);
2145out: 2147out:
2146 raw_spin_unlock(&p->pi_lock); 2148 raw_spin_unlock(&p->pi_lock);
2147} 2149}
@@ -2210,6 +2212,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
2210#endif 2212#endif
2211 2213
2212#ifdef CONFIG_SCHEDSTATS 2214#ifdef CONFIG_SCHEDSTATS
2215 /* Even if schedstat is disabled, there should not be garbage */
2213 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2216 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
2214#endif 2217#endif
2215 2218
@@ -2281,6 +2284,69 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
2281#endif 2284#endif
2282#endif 2285#endif
2283 2286
2287DEFINE_STATIC_KEY_FALSE(sched_schedstats);
2288
2289#ifdef CONFIG_SCHEDSTATS
2290static void set_schedstats(bool enabled)
2291{
2292 if (enabled)
2293 static_branch_enable(&sched_schedstats);
2294 else
2295 static_branch_disable(&sched_schedstats);
2296}
2297
2298void force_schedstat_enabled(void)
2299{
2300 if (!schedstat_enabled()) {
2301 pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
2302 static_branch_enable(&sched_schedstats);
2303 }
2304}
2305
2306static int __init setup_schedstats(char *str)
2307{
2308 int ret = 0;
2309 if (!str)
2310 goto out;
2311
2312 if (!strcmp(str, "enable")) {
2313 set_schedstats(true);
2314 ret = 1;
2315 } else if (!strcmp(str, "disable")) {
2316 set_schedstats(false);
2317 ret = 1;
2318 }
2319out:
2320 if (!ret)
2321 pr_warn("Unable to parse schedstats=\n");
2322
2323 return ret;
2324}
2325__setup("schedstats=", setup_schedstats);
2326
2327#ifdef CONFIG_PROC_SYSCTL
2328int sysctl_schedstats(struct ctl_table *table, int write,
2329 void __user *buffer, size_t *lenp, loff_t *ppos)
2330{
2331 struct ctl_table t;
2332 int err;
2333 int state = static_branch_likely(&sched_schedstats);
2334
2335 if (write && !capable(CAP_SYS_ADMIN))
2336 return -EPERM;
2337
2338 t = *table;
2339 t.data = &state;
2340 err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
2341 if (err < 0)
2342 return err;
2343 if (write)
2344 set_schedstats(state);
2345 return err;
2346}
2347#endif
2348#endif
2349
2284/* 2350/*
2285 * fork()/clone()-time setup: 2351 * fork()/clone()-time setup:
2286 */ 2352 */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 641511771ae6..7cfa87bd8b89 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -75,16 +75,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
75 PN(se->vruntime); 75 PN(se->vruntime);
76 PN(se->sum_exec_runtime); 76 PN(se->sum_exec_runtime);
77#ifdef CONFIG_SCHEDSTATS 77#ifdef CONFIG_SCHEDSTATS
78 PN(se->statistics.wait_start); 78 if (schedstat_enabled()) {
79 PN(se->statistics.sleep_start); 79 PN(se->statistics.wait_start);
80 PN(se->statistics.block_start); 80 PN(se->statistics.sleep_start);
81 PN(se->statistics.sleep_max); 81 PN(se->statistics.block_start);
82 PN(se->statistics.block_max); 82 PN(se->statistics.sleep_max);
83 PN(se->statistics.exec_max); 83 PN(se->statistics.block_max);
84 PN(se->statistics.slice_max); 84 PN(se->statistics.exec_max);
85 PN(se->statistics.wait_max); 85 PN(se->statistics.slice_max);
86 PN(se->statistics.wait_sum); 86 PN(se->statistics.wait_max);
87 P(se->statistics.wait_count); 87 PN(se->statistics.wait_sum);
88 P(se->statistics.wait_count);
89 }
88#endif 90#endif
89 P(se->load.weight); 91 P(se->load.weight);
90#ifdef CONFIG_SMP 92#ifdef CONFIG_SMP
@@ -122,10 +124,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
122 (long long)(p->nvcsw + p->nivcsw), 124 (long long)(p->nvcsw + p->nivcsw),
123 p->prio); 125 p->prio);
124#ifdef CONFIG_SCHEDSTATS 126#ifdef CONFIG_SCHEDSTATS
125 SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", 127 if (schedstat_enabled()) {
126 SPLIT_NS(p->se.statistics.wait_sum), 128 SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
127 SPLIT_NS(p->se.sum_exec_runtime), 129 SPLIT_NS(p->se.statistics.wait_sum),
128 SPLIT_NS(p->se.statistics.sum_sleep_runtime)); 130 SPLIT_NS(p->se.sum_exec_runtime),
131 SPLIT_NS(p->se.statistics.sum_sleep_runtime));
132 }
129#else 133#else
130 SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", 134 SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
131 0LL, 0L, 135 0LL, 0L,
@@ -313,17 +317,18 @@ do { \
313#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); 317#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
314#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); 318#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
315 319
316 P(yld_count);
317
318 P(sched_count);
319 P(sched_goidle);
320#ifdef CONFIG_SMP 320#ifdef CONFIG_SMP
321 P64(avg_idle); 321 P64(avg_idle);
322 P64(max_idle_balance_cost); 322 P64(max_idle_balance_cost);
323#endif 323#endif
324 324
325 P(ttwu_count); 325 if (schedstat_enabled()) {
326 P(ttwu_local); 326 P(yld_count);
327 P(sched_count);
328 P(sched_goidle);
329 P(ttwu_count);
330 P(ttwu_local);
331 }
327 332
328#undef P 333#undef P
329#undef P64 334#undef P64
@@ -569,38 +574,39 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
569 nr_switches = p->nvcsw + p->nivcsw; 574 nr_switches = p->nvcsw + p->nivcsw;
570 575
571#ifdef CONFIG_SCHEDSTATS 576#ifdef CONFIG_SCHEDSTATS
572 PN(se.statistics.sum_sleep_runtime);
573 PN(se.statistics.wait_start);
574 PN(se.statistics.sleep_start);
575 PN(se.statistics.block_start);
576 PN(se.statistics.sleep_max);
577 PN(se.statistics.block_max);
578 PN(se.statistics.exec_max);
579 PN(se.statistics.slice_max);
580 PN(se.statistics.wait_max);
581 PN(se.statistics.wait_sum);
582 P(se.statistics.wait_count);
583 PN(se.statistics.iowait_sum);
584 P(se.statistics.iowait_count);
585 P(se.nr_migrations); 577 P(se.nr_migrations);
586 P(se.statistics.nr_migrations_cold);
587 P(se.statistics.nr_failed_migrations_affine);
588 P(se.statistics.nr_failed_migrations_running);
589 P(se.statistics.nr_failed_migrations_hot);
590 P(se.statistics.nr_forced_migrations);
591 P(se.statistics.nr_wakeups);
592 P(se.statistics.nr_wakeups_sync);
593 P(se.statistics.nr_wakeups_migrate);
594 P(se.statistics.nr_wakeups_local);
595 P(se.statistics.nr_wakeups_remote);
596 P(se.statistics.nr_wakeups_affine);
597 P(se.statistics.nr_wakeups_affine_attempts);
598 P(se.statistics.nr_wakeups_passive);
599 P(se.statistics.nr_wakeups_idle);
600 578
601 { 579 if (schedstat_enabled()) {
602 u64 avg_atom, avg_per_cpu; 580 u64 avg_atom, avg_per_cpu;
603 581
582 PN(se.statistics.sum_sleep_runtime);
583 PN(se.statistics.wait_start);
584 PN(se.statistics.sleep_start);
585 PN(se.statistics.block_start);
586 PN(se.statistics.sleep_max);
587 PN(se.statistics.block_max);
588 PN(se.statistics.exec_max);
589 PN(se.statistics.slice_max);
590 PN(se.statistics.wait_max);
591 PN(se.statistics.wait_sum);
592 P(se.statistics.wait_count);
593 PN(se.statistics.iowait_sum);
594 P(se.statistics.iowait_count);
595 P(se.statistics.nr_migrations_cold);
596 P(se.statistics.nr_failed_migrations_affine);
597 P(se.statistics.nr_failed_migrations_running);
598 P(se.statistics.nr_failed_migrations_hot);
599 P(se.statistics.nr_forced_migrations);
600 P(se.statistics.nr_wakeups);
601 P(se.statistics.nr_wakeups_sync);
602 P(se.statistics.nr_wakeups_migrate);
603 P(se.statistics.nr_wakeups_local);
604 P(se.statistics.nr_wakeups_remote);
605 P(se.statistics.nr_wakeups_affine);
606 P(se.statistics.nr_wakeups_affine_attempts);
607 P(se.statistics.nr_wakeups_passive);
608 P(se.statistics.nr_wakeups_idle);
609
604 avg_atom = p->se.sum_exec_runtime; 610 avg_atom = p->se.sum_exec_runtime;
605 if (nr_switches) 611 if (nr_switches)
606 avg_atom = div64_ul(avg_atom, nr_switches); 612 avg_atom = div64_ul(avg_atom, nr_switches);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 56b7d4b83947..51a45502d8a6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,8 +20,8 @@
20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra 20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
21 */ 21 */
22 22
23#include <linux/latencytop.h>
24#include <linux/sched.h> 23#include <linux/sched.h>
24#include <linux/latencytop.h>
25#include <linux/cpumask.h> 25#include <linux/cpumask.h>
26#include <linux/cpuidle.h> 26#include <linux/cpuidle.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
@@ -755,7 +755,9 @@ static void
755update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) 755update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
756{ 756{
757 struct task_struct *p; 757 struct task_struct *p;
758 u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start; 758 u64 delta;
759
760 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
759 761
760 if (entity_is_task(se)) { 762 if (entity_is_task(se)) {
761 p = task_of(se); 763 p = task_of(se);
@@ -776,22 +778,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
776 se->statistics.wait_sum += delta; 778 se->statistics.wait_sum += delta;
777 se->statistics.wait_start = 0; 779 se->statistics.wait_start = 0;
778} 780}
779#else
780static inline void
781update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
782{
783}
784
785static inline void
786update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
787{
788}
789#endif
790 781
791/* 782/*
792 * Task is being enqueued - update stats: 783 * Task is being enqueued - update stats:
793 */ 784 */
794static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) 785static inline void
786update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
795{ 787{
796 /* 788 /*
797 * Are we enqueueing a waiting task? (for current tasks 789 * Are we enqueueing a waiting task? (for current tasks
@@ -802,7 +794,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
802} 794}
803 795
804static inline void 796static inline void
805update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) 797update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
806{ 798{
807 /* 799 /*
808 * Mark the end of the wait period if dequeueing a 800 * Mark the end of the wait period if dequeueing a
@@ -810,7 +802,40 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
810 */ 802 */
811 if (se != cfs_rq->curr) 803 if (se != cfs_rq->curr)
812 update_stats_wait_end(cfs_rq, se); 804 update_stats_wait_end(cfs_rq, se);
805
806 if (flags & DEQUEUE_SLEEP) {
807 if (entity_is_task(se)) {
808 struct task_struct *tsk = task_of(se);
809
810 if (tsk->state & TASK_INTERRUPTIBLE)
811 se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
812 if (tsk->state & TASK_UNINTERRUPTIBLE)
813 se->statistics.block_start = rq_clock(rq_of(cfs_rq));
814 }
815 }
816
817}
818#else
819static inline void
820update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
821{
822}
823
824static inline void
825update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
826{
827}
828
829static inline void
830update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
831{
832}
833
834static inline void
835update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
836{
813} 837}
838#endif
814 839
815/* 840/*
816 * We are picking a new current task - update its stats: 841 * We are picking a new current task - update its stats:
@@ -3102,6 +3127,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
3102 3127
3103static void check_enqueue_throttle(struct cfs_rq *cfs_rq); 3128static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
3104 3129
3130static inline void check_schedstat_required(void)
3131{
3132#ifdef CONFIG_SCHEDSTATS
3133 if (schedstat_enabled())
3134 return;
3135
3136 /* Force schedstat enabled if a dependent tracepoint is active */
3137 if (trace_sched_stat_wait_enabled() ||
3138 trace_sched_stat_sleep_enabled() ||
3139 trace_sched_stat_iowait_enabled() ||
3140 trace_sched_stat_blocked_enabled() ||
3141 trace_sched_stat_runtime_enabled()) {
3142 pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
3143 "stat_blocked and stat_runtime require the "
3144 "kernel parameter schedstats=enabled or "
3145 "kernel.sched_schedstats=1\n");
3146 }
3147#endif
3148}
3149
3105static void 3150static void
3106enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) 3151enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
3107{ 3152{
@@ -3122,11 +3167,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
3122 3167
3123 if (flags & ENQUEUE_WAKEUP) { 3168 if (flags & ENQUEUE_WAKEUP) {
3124 place_entity(cfs_rq, se, 0); 3169 place_entity(cfs_rq, se, 0);
3125 enqueue_sleeper(cfs_rq, se); 3170 if (schedstat_enabled())
3171 enqueue_sleeper(cfs_rq, se);
3126 } 3172 }
3127 3173
3128 update_stats_enqueue(cfs_rq, se); 3174 check_schedstat_required();
3129 check_spread(cfs_rq, se); 3175 if (schedstat_enabled()) {
3176 update_stats_enqueue(cfs_rq, se);
3177 check_spread(cfs_rq, se);
3178 }
3130 if (se != cfs_rq->curr) 3179 if (se != cfs_rq->curr)
3131 __enqueue_entity(cfs_rq, se); 3180 __enqueue_entity(cfs_rq, se);
3132 se->on_rq = 1; 3181 se->on_rq = 1;
@@ -3193,19 +3242,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
3193 update_curr(cfs_rq); 3242 update_curr(cfs_rq);
3194 dequeue_entity_load_avg(cfs_rq, se); 3243 dequeue_entity_load_avg(cfs_rq, se);
3195 3244
3196 update_stats_dequeue(cfs_rq, se); 3245 if (schedstat_enabled())
3197 if (flags & DEQUEUE_SLEEP) { 3246 update_stats_dequeue(cfs_rq, se, flags);
3198#ifdef CONFIG_SCHEDSTATS
3199 if (entity_is_task(se)) {
3200 struct task_struct *tsk = task_of(se);
3201
3202 if (tsk->state & TASK_INTERRUPTIBLE)
3203 se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
3204 if (tsk->state & TASK_UNINTERRUPTIBLE)
3205 se->statistics.block_start = rq_clock(rq_of(cfs_rq));
3206 }
3207#endif
3208 }
3209 3247
3210 clear_buddies(cfs_rq, se); 3248 clear_buddies(cfs_rq, se);
3211 3249
@@ -3279,7 +3317,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
3279 * a CPU. So account for the time it spent waiting on the 3317 * a CPU. So account for the time it spent waiting on the
3280 * runqueue. 3318 * runqueue.
3281 */ 3319 */
3282 update_stats_wait_end(cfs_rq, se); 3320 if (schedstat_enabled())
3321 update_stats_wait_end(cfs_rq, se);
3283 __dequeue_entity(cfs_rq, se); 3322 __dequeue_entity(cfs_rq, se);
3284 update_load_avg(se, 1); 3323 update_load_avg(se, 1);
3285 } 3324 }
@@ -3292,7 +3331,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
3292 * least twice that of our own weight (i.e. dont track it 3331 * least twice that of our own weight (i.e. dont track it
3293 * when there are only lesser-weight tasks around): 3332 * when there are only lesser-weight tasks around):
3294 */ 3333 */
3295 if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) { 3334 if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
3296 se->statistics.slice_max = max(se->statistics.slice_max, 3335 se->statistics.slice_max = max(se->statistics.slice_max,
3297 se->sum_exec_runtime - se->prev_sum_exec_runtime); 3336 se->sum_exec_runtime - se->prev_sum_exec_runtime);
3298 } 3337 }
@@ -3375,9 +3414,13 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
3375 /* throttle cfs_rqs exceeding runtime */ 3414 /* throttle cfs_rqs exceeding runtime */
3376 check_cfs_rq_runtime(cfs_rq); 3415 check_cfs_rq_runtime(cfs_rq);
3377 3416
3378 check_spread(cfs_rq, prev); 3417 if (schedstat_enabled()) {
3418 check_spread(cfs_rq, prev);
3419 if (prev->on_rq)
3420 update_stats_wait_start(cfs_rq, prev);
3421 }
3422
3379 if (prev->on_rq) { 3423 if (prev->on_rq) {
3380 update_stats_wait_start(cfs_rq, prev);
3381 /* Put 'current' back into the tree. */ 3424 /* Put 'current' back into the tree. */
3382 __enqueue_entity(cfs_rq, prev); 3425 __enqueue_entity(cfs_rq, prev);
3383 /* in !on_rq case, update occurred at dequeue */ 3426 /* in !on_rq case, update occurred at dequeue */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 10f16374df7f..1d583870e1a6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1022,6 +1022,7 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
1022#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */ 1022#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
1023 1023
1024extern struct static_key_false sched_numa_balancing; 1024extern struct static_key_false sched_numa_balancing;
1025extern struct static_key_false sched_schedstats;
1025 1026
1026static inline u64 global_rt_period(void) 1027static inline u64 global_rt_period(void)
1027{ 1028{
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index b0fbc7632de5..70b3b6a20fb0 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,9 +29,10 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
29 if (rq) 29 if (rq)
30 rq->rq_sched_info.run_delay += delta; 30 rq->rq_sched_info.run_delay += delta;
31} 31}
32# define schedstat_inc(rq, field) do { (rq)->field++; } while (0) 32# define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
33# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) 33# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
34# define schedstat_set(var, val) do { var = (val); } while (0) 34# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
35# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
35#else /* !CONFIG_SCHEDSTATS */ 36#else /* !CONFIG_SCHEDSTATS */
36static inline void 37static inline void
37rq_sched_info_arrive(struct rq *rq, unsigned long long delta) 38rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -42,6 +43,7 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
42static inline void 43static inline void
43rq_sched_info_depart(struct rq *rq, unsigned long long delta) 44rq_sched_info_depart(struct rq *rq, unsigned long long delta)
44{} 45{}
46# define schedstat_enabled() 0
45# define schedstat_inc(rq, field) do { } while (0) 47# define schedstat_inc(rq, field) do { } while (0)
46# define schedstat_add(rq, field, amt) do { } while (0) 48# define schedstat_add(rq, field, amt) do { } while (0)
47# define schedstat_set(var, val) do { } while (0) 49# define schedstat_set(var, val) do { } while (0)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 97715fd9e790..f5102fabef7f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -350,6 +350,17 @@ static struct ctl_table kern_table[] = {
350 .mode = 0644, 350 .mode = 0644,
351 .proc_handler = proc_dointvec, 351 .proc_handler = proc_dointvec,
352 }, 352 },
353#ifdef CONFIG_SCHEDSTATS
354 {
355 .procname = "sched_schedstats",
356 .data = NULL,
357 .maxlen = sizeof(unsigned int),
358 .mode = 0644,
359 .proc_handler = sysctl_schedstats,
360 .extra1 = &zero,
361 .extra2 = &one,
362 },
363#endif /* CONFIG_SCHEDSTATS */
353#endif /* CONFIG_SMP */ 364#endif /* CONFIG_SMP */
354#ifdef CONFIG_NUMA_BALANCING 365#ifdef CONFIG_NUMA_BALANCING
355 { 366 {
@@ -505,7 +516,7 @@ static struct ctl_table kern_table[] = {
505 .data = &latencytop_enabled, 516 .data = &latencytop_enabled,
506 .maxlen = sizeof(int), 517 .maxlen = sizeof(int),
507 .mode = 0644, 518 .mode = 0644,
508 .proc_handler = proc_dointvec, 519 .proc_handler = sysctl_latencytop,
509 }, 520 },
510#endif 521#endif
511#ifdef CONFIG_BLK_DEV_INITRD 522#ifdef CONFIG_BLK_DEV_INITRD