aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c6
-rw-r--r--kernel/futex.c14
-rw-r--r--kernel/kcov.c7
-rw-r--r--kernel/locking/mutex.c9
-rw-r--r--kernel/locking/qspinlock.c60
-rw-r--r--kernel/relay.c1
-rw-r--r--kernel/sched/core.c29
-rw-r--r--kernel/sched/debug.c15
-rw-r--r--kernel/sched/idle.c2
-rw-r--r--kernel/sched/stats.h3
-rw-r--r--kernel/trace/bpf_trace.c4
11 files changed, 119 insertions, 31 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 274450efea90..9c51ec3f0f44 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event)
3862 if (event->ctx) 3862 if (event->ctx)
3863 put_ctx(event->ctx); 3863 put_ctx(event->ctx);
3864 3864
3865 if (event->pmu) { 3865 exclusive_event_destroy(event);
3866 exclusive_event_destroy(event); 3866 module_put(event->pmu->module);
3867 module_put(event->pmu->module);
3868 }
3869 3867
3870 call_rcu(&event->rcu_head, free_event_rcu); 3868 call_rcu(&event->rcu_head, free_event_rcu);
3871} 3869}
diff --git a/kernel/futex.c b/kernel/futex.c
index ee25f5ba4aca..33664f70e2d2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
469{ 469{
470 unsigned long address = (unsigned long)uaddr; 470 unsigned long address = (unsigned long)uaddr;
471 struct mm_struct *mm = current->mm; 471 struct mm_struct *mm = current->mm;
472 struct page *page; 472 struct page *page, *tail;
473 struct address_space *mapping; 473 struct address_space *mapping;
474 int err, ro = 0; 474 int err, ro = 0;
475 475
@@ -530,7 +530,15 @@ again:
530 * considered here and page lock forces unnecessarily serialization 530 * considered here and page lock forces unnecessarily serialization
531 * From this point on, mapping will be re-verified if necessary and 531 * From this point on, mapping will be re-verified if necessary and
532 * page lock will be acquired only if it is unavoidable 532 * page lock will be acquired only if it is unavoidable
533 */ 533 *
534 * Mapping checks require the head page for any compound page so the
535 * head page and mapping is looked up now. For anonymous pages, it
536 * does not matter if the page splits in the future as the key is
537 * based on the address. For filesystem-backed pages, the tail is
538 * required as the index of the page determines the key. For
539 * base pages, there is no tail page and tail == page.
540 */
541 tail = page;
534 page = compound_head(page); 542 page = compound_head(page);
535 mapping = READ_ONCE(page->mapping); 543 mapping = READ_ONCE(page->mapping);
536 544
@@ -654,7 +662,7 @@ again:
654 662
655 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ 663 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
656 key->shared.inode = inode; 664 key->shared.inode = inode;
657 key->shared.pgoff = basepage_index(page); 665 key->shared.pgoff = basepage_index(tail);
658 rcu_read_unlock(); 666 rcu_read_unlock();
659 } 667 }
660 668
diff --git a/kernel/kcov.c b/kernel/kcov.c
index a02f2dddd1d7..8d44b3fea9d0 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -264,7 +264,12 @@ static const struct file_operations kcov_fops = {
264 264
265static int __init kcov_init(void) 265static int __init kcov_init(void)
266{ 266{
267 if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) { 267 /*
268 * The kcov debugfs file won't ever get removed and thus,
269 * there is no need to protect it against removal races. The
270 * use of debugfs_create_file_unsafe() is actually safe here.
271 */
272 if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) {
268 pr_err("failed to create kcov in debugfs\n"); 273 pr_err("failed to create kcov in debugfs\n");
269 return -ENOMEM; 274 return -ENOMEM;
270 } 275 }
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index e364b424b019..79d2d765a75f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
486 if (!hold_ctx) 486 if (!hold_ctx)
487 return 0; 487 return 0;
488 488
489 if (unlikely(ctx == hold_ctx))
490 return -EALREADY;
491
492 if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && 489 if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
493 (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { 490 (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
494#ifdef CONFIG_DEBUG_MUTEXES 491#ifdef CONFIG_DEBUG_MUTEXES
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
514 unsigned long flags; 511 unsigned long flags;
515 int ret; 512 int ret;
516 513
514 if (use_ww_ctx) {
515 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
516 if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
517 return -EALREADY;
518 }
519
517 preempt_disable(); 520 preempt_disable();
518 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); 521 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
519 522
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index ce2f75e32ae1..5fc8c311b8fe 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
267#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath 267#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
268#endif 268#endif
269 269
270/*
271 * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
272 * issuing an _unordered_ store to set _Q_LOCKED_VAL.
273 *
274 * This means that the store can be delayed, but no later than the
275 * store-release from the unlock. This means that simply observing
276 * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
277 *
278 * There are two paths that can issue the unordered store:
279 *
280 * (1) clear_pending_set_locked(): *,1,0 -> *,0,1
281 *
282 * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
283 * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
284 *
285 * However, in both cases we have other !0 state we've set before to queue
286 * ourseves:
287 *
288 * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
289 * load is constrained by that ACQUIRE to not pass before that, and thus must
290 * observe the store.
291 *
292 * For (2) we have a more intersting scenario. We enqueue ourselves using
293 * xchg_tail(), which ends up being a RELEASE. This in itself is not
294 * sufficient, however that is followed by an smp_cond_acquire() on the same
295 * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
296 * guarantees we must observe that store.
297 *
298 * Therefore both cases have other !0 state that is observable before the
299 * unordered locked byte store comes through. This means we can use that to
300 * wait for the lock store, and then wait for an unlock.
301 */
302#ifndef queued_spin_unlock_wait
303void queued_spin_unlock_wait(struct qspinlock *lock)
304{
305 u32 val;
306
307 for (;;) {
308 val = atomic_read(&lock->val);
309
310 if (!val) /* not locked, we're done */
311 goto done;
312
313 if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
314 break;
315
316 /* not locked, but pending, wait until we observe the lock */
317 cpu_relax();
318 }
319
320 /* any unlock is good */
321 while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
322 cpu_relax();
323
324done:
325 smp_rmb(); /* CTRL + RMB -> ACQUIRE */
326}
327EXPORT_SYMBOL(queued_spin_unlock_wait);
328#endif
329
270#endif /* _GEN_PV_LOCK_SLOWPATH */ 330#endif /* _GEN_PV_LOCK_SLOWPATH */
271 331
272/** 332/**
diff --git a/kernel/relay.c b/kernel/relay.c
index 074994bcfa9b..04d7cf3ef8cf 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -614,6 +614,7 @@ free_bufs:
614 614
615 kref_put(&chan->kref, relay_destroy_channel); 615 kref_put(&chan->kref, relay_destroy_channel);
616 mutex_unlock(&relay_channels_mutex); 616 mutex_unlock(&relay_channels_mutex);
617 kfree(chan);
617 return NULL; 618 return NULL;
618} 619}
619EXPORT_SYMBOL_GPL(relay_open); 620EXPORT_SYMBOL_GPL(relay_open);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f2cae4620c7..017d5394f5dc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
2253#endif 2253#endif
2254#endif 2254#endif
2255 2255
2256#ifdef CONFIG_SCHEDSTATS
2257
2256DEFINE_STATIC_KEY_FALSE(sched_schedstats); 2258DEFINE_STATIC_KEY_FALSE(sched_schedstats);
2259static bool __initdata __sched_schedstats = false;
2257 2260
2258#ifdef CONFIG_SCHEDSTATS
2259static void set_schedstats(bool enabled) 2261static void set_schedstats(bool enabled)
2260{ 2262{
2261 if (enabled) 2263 if (enabled)
@@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str)
2278 if (!str) 2280 if (!str)
2279 goto out; 2281 goto out;
2280 2282
2283 /*
2284 * This code is called before jump labels have been set up, so we can't
2285 * change the static branch directly just yet. Instead set a temporary
2286 * variable so init_schedstats() can do it later.
2287 */
2281 if (!strcmp(str, "enable")) { 2288 if (!strcmp(str, "enable")) {
2282 set_schedstats(true); 2289 __sched_schedstats = true;
2283 ret = 1; 2290 ret = 1;
2284 } else if (!strcmp(str, "disable")) { 2291 } else if (!strcmp(str, "disable")) {
2285 set_schedstats(false); 2292 __sched_schedstats = false;
2286 ret = 1; 2293 ret = 1;
2287 } 2294 }
2288out: 2295out:
@@ -2293,6 +2300,11 @@ out:
2293} 2300}
2294__setup("schedstats=", setup_schedstats); 2301__setup("schedstats=", setup_schedstats);
2295 2302
2303static void __init init_schedstats(void)
2304{
2305 set_schedstats(__sched_schedstats);
2306}
2307
2296#ifdef CONFIG_PROC_SYSCTL 2308#ifdef CONFIG_PROC_SYSCTL
2297int sysctl_schedstats(struct ctl_table *table, int write, 2309int sysctl_schedstats(struct ctl_table *table, int write,
2298 void __user *buffer, size_t *lenp, loff_t *ppos) 2310 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write,
2313 set_schedstats(state); 2325 set_schedstats(state);
2314 return err; 2326 return err;
2315} 2327}
2316#endif 2328#endif /* CONFIG_PROC_SYSCTL */
2317#endif 2329#else /* !CONFIG_SCHEDSTATS */
2330static inline void init_schedstats(void) {}
2331#endif /* CONFIG_SCHEDSTATS */
2318 2332
2319/* 2333/*
2320 * fork()/clone()-time setup: 2334 * fork()/clone()-time setup:
@@ -3156,7 +3170,8 @@ static noinline void __schedule_bug(struct task_struct *prev)
3156static inline void schedule_debug(struct task_struct *prev) 3170static inline void schedule_debug(struct task_struct *prev)
3157{ 3171{
3158#ifdef CONFIG_SCHED_STACK_END_CHECK 3172#ifdef CONFIG_SCHED_STACK_END_CHECK
3159 BUG_ON(task_stack_end_corrupted(prev)); 3173 if (task_stack_end_corrupted(prev))
3174 panic("corrupted stack end detected inside scheduler\n");
3160#endif 3175#endif
3161 3176
3162 if (unlikely(in_atomic_preempt_off())) { 3177 if (unlikely(in_atomic_preempt_off())) {
@@ -7487,6 +7502,8 @@ void __init sched_init(void)
7487#endif 7502#endif
7488 init_sched_fair_class(); 7503 init_sched_fair_class();
7489 7504
7505 init_schedstats();
7506
7490 scheduler_running = 1; 7507 scheduler_running = 1;
7491} 7508}
7492 7509
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index cf905f655ba1..0368c393a336 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -427,19 +427,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
427 SPLIT_NS(p->se.vruntime), 427 SPLIT_NS(p->se.vruntime),
428 (long long)(p->nvcsw + p->nivcsw), 428 (long long)(p->nvcsw + p->nivcsw),
429 p->prio); 429 p->prio);
430#ifdef CONFIG_SCHEDSTATS 430
431 if (schedstat_enabled()) {
432 SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
433 SPLIT_NS(p->se.statistics.wait_sum),
434 SPLIT_NS(p->se.sum_exec_runtime),
435 SPLIT_NS(p->se.statistics.sum_sleep_runtime));
436 }
437#else
438 SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", 431 SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
439 0LL, 0L, 432 SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
440 SPLIT_NS(p->se.sum_exec_runtime), 433 SPLIT_NS(p->se.sum_exec_runtime),
441 0LL, 0L); 434 SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
442#endif 435
443#ifdef CONFIG_NUMA_BALANCING 436#ifdef CONFIG_NUMA_BALANCING
444 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); 437 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
445#endif 438#endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index bd12c6c714ec..c5aeedf4e93a 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
127 */ 127 */
128static void cpuidle_idle_call(void) 128static void cpuidle_idle_call(void)
129{ 129{
130 struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); 130 struct cpuidle_device *dev = cpuidle_get_device();
131 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); 131 struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
132 int next_state, entered_state; 132 int next_state, entered_state;
133 133
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 70b3b6a20fb0..78955cbea31c 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
33# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0) 33# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
34# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0) 34# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
35# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) 35# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
36# define schedstat_val(rq, field) ((schedstat_enabled()) ? (rq)->field : 0)
37
36#else /* !CONFIG_SCHEDSTATS */ 38#else /* !CONFIG_SCHEDSTATS */
37static inline void 39static inline void
38rq_sched_info_arrive(struct rq *rq, unsigned long long delta) 40rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
@@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
47# define schedstat_inc(rq, field) do { } while (0) 49# define schedstat_inc(rq, field) do { } while (0)
48# define schedstat_add(rq, field, amt) do { } while (0) 50# define schedstat_add(rq, field, amt) do { } while (0)
49# define schedstat_set(var, val) do { } while (0) 51# define schedstat_set(var, val) do { } while (0)
52# define schedstat_val(rq, field) 0
50#endif 53#endif
51 54
52#ifdef CONFIG_SCHED_INFO 55#ifdef CONFIG_SCHED_INFO
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 780bcbe1d4de..720b7bb01d43 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -198,7 +198,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
198 if (unlikely(index >= array->map.max_entries)) 198 if (unlikely(index >= array->map.max_entries))
199 return -E2BIG; 199 return -E2BIG;
200 200
201 file = (struct file *)array->ptrs[index]; 201 file = READ_ONCE(array->ptrs[index]);
202 if (unlikely(!file)) 202 if (unlikely(!file))
203 return -ENOENT; 203 return -ENOENT;
204 204
@@ -247,7 +247,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
247 if (unlikely(index >= array->map.max_entries)) 247 if (unlikely(index >= array->map.max_entries))
248 return -E2BIG; 248 return -E2BIG;
249 249
250 file = (struct file *)array->ptrs[index]; 250 file = READ_ONCE(array->ptrs[index]);
251 if (unlikely(!file)) 251 if (unlikely(!file))
252 return -ENOENT; 252 return -ENOENT;
253 253