11 files changed, 119 insertions, 31 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 274450efea90..9c51ec3f0f44 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event)
        if (event->ctx)
                put_ctx(event->ctx);
-        if (event->pmu) {
+        exclusive_event_destroy(event);
-                exclusive_event_destroy(event);
+        module_put(event->pmu->module);
-                module_put(event->pmu->module);
-        }
        call_rcu(&event->rcu_head, free_event_rcu);
 }
diff --git a/kernel/futex.c b/kernel/futex.c
index ee25f5ba4aca..33664f70e2d2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 {
        unsigned long address = (unsigned long)uaddr;
        struct mm_struct *mm = current->mm;
-        struct page *page;
+        struct page *page, *tail;
        struct address_space *mapping;
        int err, ro = 0;
@@ -530,7 +530,15 @@ again:
         * considered here and page lock forces unnecessarily serialization
         * From this point on, mapping will be re-verified if necessary and
         * page lock will be acquired only if it is unavoidable
-         */
+         *
+         * Mapping checks require the head page for any compound page so the
+         * head page and mapping is looked up now. For anonymous pages, it
+         * does not matter if the page splits in the future as the key is
+         * based on the address. For filesystem-backed pages, the tail is
+         * required as the index of the page determines the key. For
+         * base pages, there is no tail page and tail == page.
+         */
+        tail = page;
        page = compound_head(page);
        mapping = READ_ONCE(page->mapping);
@@ -654,7 +662,7 @@ again:
                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
                key->shared.inode = inode;
-                key->shared.pgoff = basepage_index(page);
+                key->shared.pgoff = basepage_index(tail);
                rcu_read_unlock();
        }
diff --git a/kernel/kcov.c b/kernel/kcov.c
index a02f2dddd1d7..8d44b3fea9d0 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -264,7 +264,12 @@ static const struct file_operations kcov_fops = {
 static int __init kcov_init(void)
 {
-        if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) {
+        /*
+         * The kcov debugfs file won't ever get removed and thus,
+         * there is no need to protect it against removal races. The
+         * use of debugfs_create_file_unsafe() is actually safe here.
+         */
+        if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) {
                pr_err("failed to create kcov in debugfs\n");
                return -ENOMEM;
        }
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index e364b424b019..79d2d765a75f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
        if (!hold_ctx)
                return 0;
-        if (unlikely(ctx == hold_ctx))
-                return -EALREADY;
        if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
            (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        unsigned long flags;
        int ret;
+        if (use_ww_ctx) {
+                struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+                if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
+                        return -EALREADY;
+        }
        preempt_disable();
        mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ce2f75e32ae1..5fc8c311b8fe 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
 #define queued_spin_lock_slowpath       native_queued_spin_lock_slowpath
 #endif
+/*
+ * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
+ * issuing an _unordered_ store to set _Q_LOCKED_VAL.
+ *
+ * This means that the store can be delayed, but no later than the
+ * store-release from the unlock. This means that simply observing
+ * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
+ *
+ * There are two paths that can issue the unordered store:
+ *
+ *  (1) clear_pending_set_locked():     *,1,0 -> *,0,1
+ *
+ *  (2) set_locked():                   t,0,0 -> t,0,1 ; t != 0
+ *      atomic_cmpxchg_relaxed():       t,0,0 -> 0,0,1
+ *
+ * However, in both cases we have other !0 state we've set before to queue
+ * ourseves:
+ *
+ * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
+ * load is constrained by that ACQUIRE to not pass before that, and thus must
+ * observe the store.
+ *
+ * For (2) we have a more intersting scenario. We enqueue ourselves using
+ * xchg_tail(), which ends up being a RELEASE. This in itself is not
+ * sufficient, however that is followed by an smp_cond_acquire() on the same
+ * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
+ * guarantees we must observe that store.
+ *
+ * Therefore both cases have other !0 state that is observable before the
+ * unordered locked byte store comes through. This means we can use that to
+ * wait for the lock store, and then wait for an unlock.
+ */
+#ifndef queued_spin_unlock_wait
+void queued_spin_unlock_wait(struct qspinlock *lock)
+{
+        u32 val;
+        for (;;) {
+                val = atomic_read(&lock->val);
+                if (!val) /* not locked, we're done */
+                        goto done;
+                if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
+                        break;
+                /* not locked, but pending, wait until we observe the lock */
+                cpu_relax();
+        }
+        /* any unlock is good */
+        while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
+                cpu_relax();
+done:
+        smp_rmb(); /* CTRL + RMB -> ACQUIRE */
+}
+EXPORT_SYMBOL(queued_spin_unlock_wait);
+#endif
 #endif /* _GEN_PV_LOCK_SLOWPATH */
 /**
diff --git a/kernel/relay.c b/kernel/relay.c
index 074994bcfa9b..04d7cf3ef8cf 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -614,6 +614,7 @@ free_bufs:
        kref_put(&chan->kref, relay_destroy_channel);
        mutex_unlock(&relay_channels_mutex);
+        kfree(chan);
        return NULL;
 }
 EXPORT_SYMBOL_GPL(relay_open);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f2cae4620c7..017d5394f5dc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 #endif
 #endif
+#ifdef CONFIG_SCHEDSTATS
 DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+static bool __initdata __sched_schedstats = false;
-#ifdef CONFIG_SCHEDSTATS
 static void set_schedstats(bool enabled)
 {
        if (enabled)
@@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str)
        if (!str)
                goto out;
+        /*
+         * This code is called before jump labels have been set up, so we can't
+         * change the static branch directly just yet.  Instead set a temporary
+         * variable so init_schedstats() can do it later.
+         */
        if (!strcmp(str, "enable")) {
-                set_schedstats(true);
+                __sched_schedstats = true;
                ret = 1;
        } else if (!strcmp(str, "disable")) {
-                set_schedstats(false);
+                __sched_schedstats = false;
                ret = 1;
        }
 out:
@@ -2293,6 +2300,11 @@ out:
 }
 __setup("schedstats=", setup_schedstats);
+static void __init init_schedstats(void)
+{
+        set_schedstats(__sched_schedstats);
+}
 #ifdef CONFIG_PROC_SYSCTL
 int sysctl_schedstats(struct ctl_table *table, int write,
                         void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write,
                set_schedstats(state);
        return err;
 }
-#endif
+#endif /* CONFIG_PROC_SYSCTL */
-#endif
+#else  /* !CONFIG_SCHEDSTATS */
+static inline void init_schedstats(void) {}
+#endif /* CONFIG_SCHEDSTATS */
 /*
 * fork()/clone()-time setup:
@@ -3156,7 +3170,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
 static inline void schedule_debug(struct task_struct *prev)
 {
 #ifdef CONFIG_SCHED_STACK_END_CHECK
-        BUG_ON(task_stack_end_corrupted(prev));
+        if (task_stack_end_corrupted(prev))
+                panic("corrupted stack end detected inside scheduler\n");
 #endif
        if (unlikely(in_atomic_preempt_off())) {
@@ -7487,6 +7502,8 @@ void __init sched_init(void)
 #endif
        init_sched_fair_class();
+        init_schedstats();
        scheduler_running = 1;
 }
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index cf905f655ba1..0368c393a336 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -427,19 +427,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
                SPLIT_NS(p->se.vruntime),
                (long long)(p->nvcsw + p->nivcsw),
                p->prio);
-#ifdef CONFIG_SCHEDSTATS
-        if (schedstat_enabled()) {
-                SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-                        SPLIT_NS(p->se.statistics.wait_sum),
-                        SPLIT_NS(p->se.sum_exec_runtime),
-                        SPLIT_NS(p->se.statistics.sum_sleep_runtime));
-        }
-#else
        SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-                0LL, 0L,
+                SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
                SPLIT_NS(p->se.sum_exec_runtime),
-                0LL, 0L);
+                SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
-#endif
 #ifdef CONFIG_NUMA_BALANCING
        SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
 #endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index bd12c6c714ec..c5aeedf4e93a 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 */
 static void cpuidle_idle_call(void)
 {
-        struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
+        struct cpuidle_device *dev = cpuidle_get_device();
        struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
        int next_state, entered_state;
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 70b3b6a20fb0..78955cbea31c 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 # define schedstat_inc(rq, field)       do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
 # define schedstat_add(rq, field, amt)  do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
 # define schedstat_set(var, val)        do { if (schedstat_enabled()) { var = (val); } } while (0)
+# define schedstat_val(rq, field)       ((schedstat_enabled()) ? (rq)->field : 0)
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
 rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 # define schedstat_inc(rq, field)       do { } while (0)
 # define schedstat_add(rq, field, amt)  do { } while (0)
 # define schedstat_set(var, val)        do { } while (0)
+# define schedstat_val(rq, field)       0
 #endif
 #ifdef CONFIG_SCHED_INFO
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 780bcbe1d4de..720b7bb01d43 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -198,7 +198,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
-        file = (struct file *)array->ptrs[index];
+        file = READ_ONCE(array->ptrs[index]);
        if (unlikely(!file))
                return -ENOENT;
@@ -247,7 +247,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
-        file = (struct file *)array->ptrs[index];
+        file = READ_ONCE(array->ptrs[index]);
        if (unlikely(!file))
                return -ENOENT;

diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..9c51ec3f0f44 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c
@@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event)
3862	if (event->ctx)	3862	if (event->ctx)
3863	put_ctx(event->ctx);	3863	put_ctx(event->ctx);
3864		3864
3865	if (event->pmu) {	3865	exclusive_event_destroy(event);
3866	exclusive_event_destroy(event);	3866	module_put(event->pmu->module);
3867	module_put(event->pmu->module);
3868	}
3869		3867
3870	call_rcu(&event->rcu_head, free_event_rcu);	3868	call_rcu(&event->rcu_head, free_event_rcu);
3871	}	3869	}


diff --git a/kernel/futex.c b/kernel/futex.c index ee25f5ba4aca..33664f70e2d2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c
@@ -469,7 +469,7 @@ get_futex_key(u32 __user uaddr, int fshared, union futex_key key, int rw)
469	{	469	{
470	unsigned long address = (unsigned long)uaddr;	470	unsigned long address = (unsigned long)uaddr;
471	struct mm_struct *mm = current->mm;	471	struct mm_struct *mm = current->mm;
472	struct page *page;	472	struct page page, tail;
473	struct address_space *mapping;	473	struct address_space *mapping;
474	int err, ro = 0;	474	int err, ro = 0;
475		475
@@ -530,7 +530,15 @@ again:
530	* considered here and page lock forces unnecessarily serialization	530	* considered here and page lock forces unnecessarily serialization
531	* From this point on, mapping will be re-verified if necessary and	531	* From this point on, mapping will be re-verified if necessary and
532	* page lock will be acquired only if it is unavoidable	532	* page lock will be acquired only if it is unavoidable
533	*/	533	*
		534	* Mapping checks require the head page for any compound page so the
		535	* head page and mapping is looked up now. For anonymous pages, it
		536	* does not matter if the page splits in the future as the key is
		537	* based on the address. For filesystem-backed pages, the tail is
		538	* required as the index of the page determines the key. For
		539	* base pages, there is no tail page and tail == page.
		540	*/
		541	tail = page;
534	page = compound_head(page);	542	page = compound_head(page);
535	mapping = READ_ONCE(page->mapping);	543	mapping = READ_ONCE(page->mapping);
536		544
@@ -654,7 +662,7 @@ again:
654		662
655	key->both.offset \|= FUT_OFF_INODE; /* inode-based key */	663	key->both.offset \|= FUT_OFF_INODE; /* inode-based key */
656	key->shared.inode = inode;	664	key->shared.inode = inode;
657	key->shared.pgoff = basepage_index(page);	665	key->shared.pgoff = basepage_index(tail);
658	rcu_read_unlock();	666	rcu_read_unlock();
659	}	667	}
660		668


diff --git a/kernel/kcov.c b/kernel/kcov.c index a02f2dddd1d7..8d44b3fea9d0 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c
@@ -264,7 +264,12 @@ static const struct file_operations kcov_fops = {
264		264
265	static int __init kcov_init(void)	265	static int __init kcov_init(void)
266	{	266	{
267	if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) {	267	/*
		268	* The kcov debugfs file won't ever get removed and thus,
		269	* there is no need to protect it against removal races. The
		270	* use of debugfs_create_file_unsafe() is actually safe here.
		271	*/
		272	if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) {
268	pr_err("failed to create kcov in debugfs\n");	273	pr_err("failed to create kcov in debugfs\n");
269	return -ENOMEM;	274	return -ENOMEM;
270	}	275	}


diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e364b424b019..79d2d765a75f 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex lock, struct ww_acquire_ctx ctx)
486	if (!hold_ctx)	486	if (!hold_ctx)
487	return 0;	487	return 0;
488		488
489	if (unlikely(ctx == hold_ctx))
490	return -EALREADY;
491
492	if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&	489	if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
493	(ctx->stamp != hold_ctx->stamp \|\| ctx > hold_ctx)) {	490	(ctx->stamp != hold_ctx->stamp \|\| ctx > hold_ctx)) {
494	#ifdef CONFIG_DEBUG_MUTEXES	491	#ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
514	unsigned long flags;	511	unsigned long flags;
515	int ret;	512	int ret;
516		513
		514	if (use_ww_ctx) {
		515	struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
		516	if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
		517	return -EALREADY;
		518	}
		519
517	preempt_disable();	520	preempt_disable();
518	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);	521	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
519		522


diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e32ae1..5fc8c311b8fe 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
267	#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath	267	#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
268	#endif	268	#endif
269		269
		270	/*
		271	* queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
		272	* issuing an _unordered_ store to set _Q_LOCKED_VAL.
		273	*
		274	* This means that the store can be delayed, but no later than the
		275	* store-release from the unlock. This means that simply observing
		276	* _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
		277	*
		278	* There are two paths that can issue the unordered store:
		279	*
		280	* (1) clear_pending_set_locked(): ,1,0 -> ,0,1
		281	*
		282	* (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
		283	* atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
		284	*
		285	* However, in both cases we have other !0 state we've set before to queue
		286	* ourseves:
		287	*
		288	* For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
		289	* load is constrained by that ACQUIRE to not pass before that, and thus must
		290	* observe the store.
		291	*
		292	* For (2) we have a more intersting scenario. We enqueue ourselves using
		293	* xchg_tail(), which ends up being a RELEASE. This in itself is not
		294	* sufficient, however that is followed by an smp_cond_acquire() on the same
		295	* word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
		296	* guarantees we must observe that store.
		297	*
		298	* Therefore both cases have other !0 state that is observable before the
		299	* unordered locked byte store comes through. This means we can use that to
		300	* wait for the lock store, and then wait for an unlock.
		301	*/
		302	#ifndef queued_spin_unlock_wait
		303	void queued_spin_unlock_wait(struct qspinlock *lock)
		304	{
		305	u32 val;
		306
		307	for (;;) {
		308	val = atomic_read(&lock->val);
		309
		310	if (!val) /* not locked, we're done */
		311	goto done;
		312
		313	if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
		314	break;
		315
		316	/* not locked, but pending, wait until we observe the lock */
		317	cpu_relax();
		318	}
		319
		320	/* any unlock is good */
		321	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
		322	cpu_relax();
		323
		324	done:
		325	smp_rmb(); /* CTRL + RMB -> ACQUIRE */
		326	}
		327	EXPORT_SYMBOL(queued_spin_unlock_wait);
		328	#endif
		329
270	#endif /* _GEN_PV_LOCK_SLOWPATH */	330	#endif /* _GEN_PV_LOCK_SLOWPATH */
271		331
272	/**	332	/**


diff --git a/kernel/relay.c b/kernel/relay.c index 074994bcfa9b..04d7cf3ef8cf 100644 --- a/kernel/relay.c +++ b/kernel/relay.c
@@ -614,6 +614,7 @@ free_bufs:
614		614
615	kref_put(&chan->kref, relay_destroy_channel);	615	kref_put(&chan->kref, relay_destroy_channel);
616	mutex_unlock(&relay_channels_mutex);	616	mutex_unlock(&relay_channels_mutex);
		617	kfree(chan);
617	return NULL;	618	return NULL;
618	}	619	}
619	EXPORT_SYMBOL_GPL(relay_open);	620	EXPORT_SYMBOL_GPL(relay_open);


diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f2cae4620c7..017d5394f5dc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
2253	#endif	2253	#endif
2254	#endif	2254	#endif
2255		2255
		2256	#ifdef CONFIG_SCHEDSTATS
		2257
2256	DEFINE_STATIC_KEY_FALSE(sched_schedstats);	2258	DEFINE_STATIC_KEY_FALSE(sched_schedstats);
		2259	static bool __initdata __sched_schedstats = false;
2257		2260
2258	#ifdef CONFIG_SCHEDSTATS
2259	static void set_schedstats(bool enabled)	2261	static void set_schedstats(bool enabled)
2260	{	2262	{
2261	if (enabled)	2263	if (enabled)
@@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str)
2278	if (!str)	2280	if (!str)
2279	goto out;	2281	goto out;
2280		2282
		2283	/*
		2284	* This code is called before jump labels have been set up, so we can't
		2285	* change the static branch directly just yet. Instead set a temporary
		2286	* variable so init_schedstats() can do it later.
		2287	*/
2281	if (!strcmp(str, "enable")) {	2288	if (!strcmp(str, "enable")) {
2282	set_schedstats(true);	2289	__sched_schedstats = true;
2283	ret = 1;	2290	ret = 1;
2284	} else if (!strcmp(str, "disable")) {	2291	} else if (!strcmp(str, "disable")) {
2285	set_schedstats(false);	2292	__sched_schedstats = false;
2286	ret = 1;	2293	ret = 1;
2287	}	2294	}
2288	out:	2295	out:
@@ -2293,6 +2300,11 @@ out:
2293	}	2300	}
2294	__setup("schedstats=", setup_schedstats);	2301	__setup("schedstats=", setup_schedstats);
2295		2302
		2303	static void __init init_schedstats(void)
		2304	{
		2305	set_schedstats(__sched_schedstats);
		2306	}
		2307
2296	#ifdef CONFIG_PROC_SYSCTL	2308	#ifdef CONFIG_PROC_SYSCTL
2297	int sysctl_schedstats(struct ctl_table *table, int write,	2309	int sysctl_schedstats(struct ctl_table *table, int write,
2298	void __user buffer, size_t lenp, loff_t *ppos)	2310	void __user buffer, size_t lenp, loff_t *ppos)
@@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write,
2313	set_schedstats(state);	2325	set_schedstats(state);
2314	return err;	2326	return err;
2315	}	2327	}
2316	#endif	2328	#endif /* CONFIG_PROC_SYSCTL */
2317	#endif	2329	#else /* !CONFIG_SCHEDSTATS */
		2330	static inline void init_schedstats(void) {}
		2331	#endif /* CONFIG_SCHEDSTATS */
2318		2332
2319	/*	2333	/*
2320	* fork()/clone()-time setup:	2334	* fork()/clone()-time setup:
@@ -3156,7 +3170,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
3156	static inline void schedule_debug(struct task_struct *prev)	3170	static inline void schedule_debug(struct task_struct *prev)
3157	{	3171	{
3158	#ifdef CONFIG_SCHED_STACK_END_CHECK	3172	#ifdef CONFIG_SCHED_STACK_END_CHECK
3159	BUG_ON(task_stack_end_corrupted(prev));	3173	if (task_stack_end_corrupted(prev))
		3174	panic("corrupted stack end detected inside scheduler\n");
3160	#endif	3175	#endif
3161		3176
3162	if (unlikely(in_atomic_preempt_off())) {	3177	if (unlikely(in_atomic_preempt_off())) {
@@ -7487,6 +7502,8 @@ void __init sched_init(void)
7487	#endif	7502	#endif
7488	init_sched_fair_class();	7503	init_sched_fair_class();
7489		7504
		7505	init_schedstats();
		7506
7490	scheduler_running = 1;	7507	scheduler_running = 1;
7491	}	7508	}
7492		7509


diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index cf905f655ba1..0368c393a336 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c
@@ -427,19 +427,12 @@ print_task(struct seq_file m, struct rq rq, struct task_struct *p)
427	SPLIT_NS(p->se.vruntime),	427	SPLIT_NS(p->se.vruntime),
428	(long long)(p->nvcsw + p->nivcsw),	428	(long long)(p->nvcsw + p->nivcsw),
429	p->prio);	429	p->prio);
430	#ifdef CONFIG_SCHEDSTATS	430
431	if (schedstat_enabled()) {
432	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
433	SPLIT_NS(p->se.statistics.wait_sum),
434	SPLIT_NS(p->se.sum_exec_runtime),
435	SPLIT_NS(p->se.statistics.sum_sleep_runtime));
436	}
437	#else
438	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",	431	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
439	0LL, 0L,	432	SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
440	SPLIT_NS(p->se.sum_exec_runtime),	433	SPLIT_NS(p->se.sum_exec_runtime),
441	0LL, 0L);	434	SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
442	#endif	435
443	#ifdef CONFIG_NUMA_BALANCING	436	#ifdef CONFIG_NUMA_BALANCING
444	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));	437	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
445	#endif	438	#endif


diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index bd12c6c714ec..c5aeedf4e93a 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c
@@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver drv, struct cpuidle_device dev,
127	*/	127	*/
128	static void cpuidle_idle_call(void)	128	static void cpuidle_idle_call(void)
129	{	129	{
130	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);	130	struct cpuidle_device *dev = cpuidle_get_device();
131	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);	131	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
132	int next_state, entered_state;	132	int next_state, entered_state;
133		133


diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 70b3b6a20fb0..78955cbea31c 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h
@@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
33	# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0)	33	# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
34	# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)	34	# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
35	# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)	35	# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
		36	# define schedstat_val(rq, field) ((schedstat_enabled()) ? (rq)->field : 0)
		37
36	#else /* !CONFIG_SCHEDSTATS */	38	#else /* !CONFIG_SCHEDSTATS */
37	static inline void	39	static inline void
38	rq_sched_info_arrive(struct rq *rq, unsigned long long delta)	40	rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
47	# define schedstat_inc(rq, field) do { } while (0)	49	# define schedstat_inc(rq, field) do { } while (0)
48	# define schedstat_add(rq, field, amt) do { } while (0)	50	# define schedstat_add(rq, field, amt) do { } while (0)
49	# define schedstat_set(var, val) do { } while (0)	51	# define schedstat_set(var, val) do { } while (0)
		52	# define schedstat_val(rq, field) 0
50	#endif	53	#endif
51		54
52	#ifdef CONFIG_SCHED_INFO	55	#ifdef CONFIG_SCHED_INFO


diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 780bcbe1d4de..720b7bb01d43 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c
@@ -198,7 +198,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
198	if (unlikely(index >= array->map.max_entries))	198	if (unlikely(index >= array->map.max_entries))
199	return -E2BIG;	199	return -E2BIG;
200		200
201	file = (struct file *)array->ptrs[index];	201	file = READ_ONCE(array->ptrs[index]);
202	if (unlikely(!file))	202	if (unlikely(!file))
203	return -ENOENT;	203	return -ENOENT;
204		204
@@ -247,7 +247,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
247	if (unlikely(index >= array->map.max_entries))	247	if (unlikely(index >= array->map.max_entries))
248	return -E2BIG;	248	return -E2BIG;
249		249
250	file = (struct file *)array->ptrs[index];	250	file = READ_ONCE(array->ptrs[index]);
251	if (unlikely(!file))	251	if (unlikely(!file))
252	return -ENOENT;	252	return -ENOENT;
253		253