diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 6 | ||||
-rw-r--r-- | kernel/futex.c | 14 | ||||
-rw-r--r-- | kernel/kcov.c | 7 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 9 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 60 | ||||
-rw-r--r-- | kernel/relay.c | 1 | ||||
-rw-r--r-- | kernel/sched/core.c | 29 | ||||
-rw-r--r-- | kernel/sched/debug.c | 15 | ||||
-rw-r--r-- | kernel/sched/idle.c | 2 | ||||
-rw-r--r-- | kernel/sched/stats.h | 3 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 4 |
11 files changed, 119 insertions, 31 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..9c51ec3f0f44 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event) | |||
3862 | if (event->ctx) | 3862 | if (event->ctx) |
3863 | put_ctx(event->ctx); | 3863 | put_ctx(event->ctx); |
3864 | 3864 | ||
3865 | if (event->pmu) { | 3865 | exclusive_event_destroy(event); |
3866 | exclusive_event_destroy(event); | 3866 | module_put(event->pmu->module); |
3867 | module_put(event->pmu->module); | ||
3868 | } | ||
3869 | 3867 | ||
3870 | call_rcu(&event->rcu_head, free_event_rcu); | 3868 | call_rcu(&event->rcu_head, free_event_rcu); |
3871 | } | 3869 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index ee25f5ba4aca..33664f70e2d2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
469 | { | 469 | { |
470 | unsigned long address = (unsigned long)uaddr; | 470 | unsigned long address = (unsigned long)uaddr; |
471 | struct mm_struct *mm = current->mm; | 471 | struct mm_struct *mm = current->mm; |
472 | struct page *page; | 472 | struct page *page, *tail; |
473 | struct address_space *mapping; | 473 | struct address_space *mapping; |
474 | int err, ro = 0; | 474 | int err, ro = 0; |
475 | 475 | ||
@@ -530,7 +530,15 @@ again: | |||
530 | * considered here and page lock forces unnecessarily serialization | 530 | * considered here and page lock forces unnecessarily serialization |
531 | * From this point on, mapping will be re-verified if necessary and | 531 | * From this point on, mapping will be re-verified if necessary and |
532 | * page lock will be acquired only if it is unavoidable | 532 | * page lock will be acquired only if it is unavoidable |
533 | */ | 533 | * |
534 | * Mapping checks require the head page for any compound page so the | ||
535 | * head page and mapping is looked up now. For anonymous pages, it | ||
536 | * does not matter if the page splits in the future as the key is | ||
537 | * based on the address. For filesystem-backed pages, the tail is | ||
538 | * required as the index of the page determines the key. For | ||
539 | * base pages, there is no tail page and tail == page. | ||
540 | */ | ||
541 | tail = page; | ||
534 | page = compound_head(page); | 542 | page = compound_head(page); |
535 | mapping = READ_ONCE(page->mapping); | 543 | mapping = READ_ONCE(page->mapping); |
536 | 544 | ||
@@ -654,7 +662,7 @@ again: | |||
654 | 662 | ||
655 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ | 663 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ |
656 | key->shared.inode = inode; | 664 | key->shared.inode = inode; |
657 | key->shared.pgoff = basepage_index(page); | 665 | key->shared.pgoff = basepage_index(tail); |
658 | rcu_read_unlock(); | 666 | rcu_read_unlock(); |
659 | } | 667 | } |
660 | 668 | ||
diff --git a/kernel/kcov.c b/kernel/kcov.c index a02f2dddd1d7..8d44b3fea9d0 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c | |||
@@ -264,7 +264,12 @@ static const struct file_operations kcov_fops = { | |||
264 | 264 | ||
265 | static int __init kcov_init(void) | 265 | static int __init kcov_init(void) |
266 | { | 266 | { |
267 | if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) { | 267 | /* |
268 | * The kcov debugfs file won't ever get removed and thus, | ||
269 | * there is no need to protect it against removal races. The | ||
270 | * use of debugfs_create_file_unsafe() is actually safe here. | ||
271 | */ | ||
272 | if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) { | ||
268 | pr_err("failed to create kcov in debugfs\n"); | 273 | pr_err("failed to create kcov in debugfs\n"); |
269 | return -ENOMEM; | 274 | return -ENOMEM; |
270 | } | 275 | } |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e364b424b019..79d2d765a75f 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) | |||
486 | if (!hold_ctx) | 486 | if (!hold_ctx) |
487 | return 0; | 487 | return 0; |
488 | 488 | ||
489 | if (unlikely(ctx == hold_ctx)) | ||
490 | return -EALREADY; | ||
491 | |||
492 | if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | 489 | if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && |
493 | (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | 490 | (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { |
494 | #ifdef CONFIG_DEBUG_MUTEXES | 491 | #ifdef CONFIG_DEBUG_MUTEXES |
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
514 | unsigned long flags; | 511 | unsigned long flags; |
515 | int ret; | 512 | int ret; |
516 | 513 | ||
514 | if (use_ww_ctx) { | ||
515 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | ||
516 | if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) | ||
517 | return -EALREADY; | ||
518 | } | ||
519 | |||
517 | preempt_disable(); | 520 | preempt_disable(); |
518 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); | 521 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); |
519 | 522 | ||
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e32ae1..5fc8c311b8fe 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, | |||
267 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath | 267 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath |
268 | #endif | 268 | #endif |
269 | 269 | ||
270 | /* | ||
271 | * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before | ||
272 | * issuing an _unordered_ store to set _Q_LOCKED_VAL. | ||
273 | * | ||
274 | * This means that the store can be delayed, but no later than the | ||
275 | * store-release from the unlock. This means that simply observing | ||
276 | * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. | ||
277 | * | ||
278 | * There are two paths that can issue the unordered store: | ||
279 | * | ||
280 | * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 | ||
281 | * | ||
282 | * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 | ||
283 | * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 | ||
284 | * | ||
285 | * However, in both cases we have other !0 state we've set before to queue | ||
286 | * ourseves: | ||
287 | * | ||
288 | * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our | ||
289 | * load is constrained by that ACQUIRE to not pass before that, and thus must | ||
290 | * observe the store. | ||
291 | * | ||
292 | * For (2) we have a more intersting scenario. We enqueue ourselves using | ||
293 | * xchg_tail(), which ends up being a RELEASE. This in itself is not | ||
294 | * sufficient, however that is followed by an smp_cond_acquire() on the same | ||
295 | * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and | ||
296 | * guarantees we must observe that store. | ||
297 | * | ||
298 | * Therefore both cases have other !0 state that is observable before the | ||
299 | * unordered locked byte store comes through. This means we can use that to | ||
300 | * wait for the lock store, and then wait for an unlock. | ||
301 | */ | ||
302 | #ifndef queued_spin_unlock_wait | ||
303 | void queued_spin_unlock_wait(struct qspinlock *lock) | ||
304 | { | ||
305 | u32 val; | ||
306 | |||
307 | for (;;) { | ||
308 | val = atomic_read(&lock->val); | ||
309 | |||
310 | if (!val) /* not locked, we're done */ | ||
311 | goto done; | ||
312 | |||
313 | if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ | ||
314 | break; | ||
315 | |||
316 | /* not locked, but pending, wait until we observe the lock */ | ||
317 | cpu_relax(); | ||
318 | } | ||
319 | |||
320 | /* any unlock is good */ | ||
321 | while (atomic_read(&lock->val) & _Q_LOCKED_MASK) | ||
322 | cpu_relax(); | ||
323 | |||
324 | done: | ||
325 | smp_rmb(); /* CTRL + RMB -> ACQUIRE */ | ||
326 | } | ||
327 | EXPORT_SYMBOL(queued_spin_unlock_wait); | ||
328 | #endif | ||
329 | |||
270 | #endif /* _GEN_PV_LOCK_SLOWPATH */ | 330 | #endif /* _GEN_PV_LOCK_SLOWPATH */ |
271 | 331 | ||
272 | /** | 332 | /** |
diff --git a/kernel/relay.c b/kernel/relay.c index 074994bcfa9b..04d7cf3ef8cf 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -614,6 +614,7 @@ free_bufs: | |||
614 | 614 | ||
615 | kref_put(&chan->kref, relay_destroy_channel); | 615 | kref_put(&chan->kref, relay_destroy_channel); |
616 | mutex_unlock(&relay_channels_mutex); | 616 | mutex_unlock(&relay_channels_mutex); |
617 | kfree(chan); | ||
617 | return NULL; | 618 | return NULL; |
618 | } | 619 | } |
619 | EXPORT_SYMBOL_GPL(relay_open); | 620 | EXPORT_SYMBOL_GPL(relay_open); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f2cae4620c7..017d5394f5dc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, | |||
2253 | #endif | 2253 | #endif |
2254 | #endif | 2254 | #endif |
2255 | 2255 | ||
2256 | #ifdef CONFIG_SCHEDSTATS | ||
2257 | |||
2256 | DEFINE_STATIC_KEY_FALSE(sched_schedstats); | 2258 | DEFINE_STATIC_KEY_FALSE(sched_schedstats); |
2259 | static bool __initdata __sched_schedstats = false; | ||
2257 | 2260 | ||
2258 | #ifdef CONFIG_SCHEDSTATS | ||
2259 | static void set_schedstats(bool enabled) | 2261 | static void set_schedstats(bool enabled) |
2260 | { | 2262 | { |
2261 | if (enabled) | 2263 | if (enabled) |
@@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str) | |||
2278 | if (!str) | 2280 | if (!str) |
2279 | goto out; | 2281 | goto out; |
2280 | 2282 | ||
2283 | /* | ||
2284 | * This code is called before jump labels have been set up, so we can't | ||
2285 | * change the static branch directly just yet. Instead set a temporary | ||
2286 | * variable so init_schedstats() can do it later. | ||
2287 | */ | ||
2281 | if (!strcmp(str, "enable")) { | 2288 | if (!strcmp(str, "enable")) { |
2282 | set_schedstats(true); | 2289 | __sched_schedstats = true; |
2283 | ret = 1; | 2290 | ret = 1; |
2284 | } else if (!strcmp(str, "disable")) { | 2291 | } else if (!strcmp(str, "disable")) { |
2285 | set_schedstats(false); | 2292 | __sched_schedstats = false; |
2286 | ret = 1; | 2293 | ret = 1; |
2287 | } | 2294 | } |
2288 | out: | 2295 | out: |
@@ -2293,6 +2300,11 @@ out: | |||
2293 | } | 2300 | } |
2294 | __setup("schedstats=", setup_schedstats); | 2301 | __setup("schedstats=", setup_schedstats); |
2295 | 2302 | ||
2303 | static void __init init_schedstats(void) | ||
2304 | { | ||
2305 | set_schedstats(__sched_schedstats); | ||
2306 | } | ||
2307 | |||
2296 | #ifdef CONFIG_PROC_SYSCTL | 2308 | #ifdef CONFIG_PROC_SYSCTL |
2297 | int sysctl_schedstats(struct ctl_table *table, int write, | 2309 | int sysctl_schedstats(struct ctl_table *table, int write, |
2298 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2310 | void __user *buffer, size_t *lenp, loff_t *ppos) |
@@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write, | |||
2313 | set_schedstats(state); | 2325 | set_schedstats(state); |
2314 | return err; | 2326 | return err; |
2315 | } | 2327 | } |
2316 | #endif | 2328 | #endif /* CONFIG_PROC_SYSCTL */ |
2317 | #endif | 2329 | #else /* !CONFIG_SCHEDSTATS */ |
2330 | static inline void init_schedstats(void) {} | ||
2331 | #endif /* CONFIG_SCHEDSTATS */ | ||
2318 | 2332 | ||
2319 | /* | 2333 | /* |
2320 | * fork()/clone()-time setup: | 2334 | * fork()/clone()-time setup: |
@@ -3156,7 +3170,8 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
3156 | static inline void schedule_debug(struct task_struct *prev) | 3170 | static inline void schedule_debug(struct task_struct *prev) |
3157 | { | 3171 | { |
3158 | #ifdef CONFIG_SCHED_STACK_END_CHECK | 3172 | #ifdef CONFIG_SCHED_STACK_END_CHECK |
3159 | BUG_ON(task_stack_end_corrupted(prev)); | 3173 | if (task_stack_end_corrupted(prev)) |
3174 | panic("corrupted stack end detected inside scheduler\n"); | ||
3160 | #endif | 3175 | #endif |
3161 | 3176 | ||
3162 | if (unlikely(in_atomic_preempt_off())) { | 3177 | if (unlikely(in_atomic_preempt_off())) { |
@@ -7487,6 +7502,8 @@ void __init sched_init(void) | |||
7487 | #endif | 7502 | #endif |
7488 | init_sched_fair_class(); | 7503 | init_sched_fair_class(); |
7489 | 7504 | ||
7505 | init_schedstats(); | ||
7506 | |||
7490 | scheduler_running = 1; | 7507 | scheduler_running = 1; |
7491 | } | 7508 | } |
7492 | 7509 | ||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index cf905f655ba1..0368c393a336 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -427,19 +427,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
427 | SPLIT_NS(p->se.vruntime), | 427 | SPLIT_NS(p->se.vruntime), |
428 | (long long)(p->nvcsw + p->nivcsw), | 428 | (long long)(p->nvcsw + p->nivcsw), |
429 | p->prio); | 429 | p->prio); |
430 | #ifdef CONFIG_SCHEDSTATS | 430 | |
431 | if (schedstat_enabled()) { | ||
432 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", | ||
433 | SPLIT_NS(p->se.statistics.wait_sum), | ||
434 | SPLIT_NS(p->se.sum_exec_runtime), | ||
435 | SPLIT_NS(p->se.statistics.sum_sleep_runtime)); | ||
436 | } | ||
437 | #else | ||
438 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", | 431 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", |
439 | 0LL, 0L, | 432 | SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)), |
440 | SPLIT_NS(p->se.sum_exec_runtime), | 433 | SPLIT_NS(p->se.sum_exec_runtime), |
441 | 0LL, 0L); | 434 | SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime))); |
442 | #endif | 435 | |
443 | #ifdef CONFIG_NUMA_BALANCING | 436 | #ifdef CONFIG_NUMA_BALANCING |
444 | SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); | 437 | SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); |
445 | #endif | 438 | #endif |
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index bd12c6c714ec..c5aeedf4e93a 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
@@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, | |||
127 | */ | 127 | */ |
128 | static void cpuidle_idle_call(void) | 128 | static void cpuidle_idle_call(void) |
129 | { | 129 | { |
130 | struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); | 130 | struct cpuidle_device *dev = cpuidle_get_device(); |
131 | struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); | 131 | struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); |
132 | int next_state, entered_state; | 132 | int next_state, entered_state; |
133 | 133 | ||
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 70b3b6a20fb0..78955cbea31c 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
@@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | |||
33 | # define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0) | 33 | # define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0) |
34 | # define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0) | 34 | # define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0) |
35 | # define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) | 35 | # define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) |
36 | # define schedstat_val(rq, field) ((schedstat_enabled()) ? (rq)->field : 0) | ||
37 | |||
36 | #else /* !CONFIG_SCHEDSTATS */ | 38 | #else /* !CONFIG_SCHEDSTATS */ |
37 | static inline void | 39 | static inline void |
38 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | 40 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) |
@@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
47 | # define schedstat_inc(rq, field) do { } while (0) | 49 | # define schedstat_inc(rq, field) do { } while (0) |
48 | # define schedstat_add(rq, field, amt) do { } while (0) | 50 | # define schedstat_add(rq, field, amt) do { } while (0) |
49 | # define schedstat_set(var, val) do { } while (0) | 51 | # define schedstat_set(var, val) do { } while (0) |
52 | # define schedstat_val(rq, field) 0 | ||
50 | #endif | 53 | #endif |
51 | 54 | ||
52 | #ifdef CONFIG_SCHED_INFO | 55 | #ifdef CONFIG_SCHED_INFO |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 780bcbe1d4de..720b7bb01d43 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -198,7 +198,7 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) | |||
198 | if (unlikely(index >= array->map.max_entries)) | 198 | if (unlikely(index >= array->map.max_entries)) |
199 | return -E2BIG; | 199 | return -E2BIG; |
200 | 200 | ||
201 | file = (struct file *)array->ptrs[index]; | 201 | file = READ_ONCE(array->ptrs[index]); |
202 | if (unlikely(!file)) | 202 | if (unlikely(!file)) |
203 | return -ENOENT; | 203 | return -ENOENT; |
204 | 204 | ||
@@ -247,7 +247,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) | |||
247 | if (unlikely(index >= array->map.max_entries)) | 247 | if (unlikely(index >= array->map.max_entries)) |
248 | return -E2BIG; | 248 | return -E2BIG; |
249 | 249 | ||
250 | file = (struct file *)array->ptrs[index]; | 250 | file = READ_ONCE(array->ptrs[index]); |
251 | if (unlikely(!file)) | 251 | if (unlikely(!file)) |
252 | return -ENOENT; | 252 | return -ENOENT; |
253 | 253 | ||