diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/exit.c | 10 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 117 | ||||
-rw-r--r-- | kernel/membarrier.c | 70 | ||||
-rw-r--r-- | kernel/rcu/Kconfig | 3 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 128 | ||||
-rw-r--r-- | kernel/rcu/rcu_segcblist.c | 108 | ||||
-rw-r--r-- | kernel/rcu/rcu_segcblist.h | 28 | ||||
-rw-r--r-- | kernel/rcu/rcuperf.c | 17 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 83 | ||||
-rw-r--r-- | kernel/rcu/srcutiny.c | 8 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 50 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 2 | ||||
-rw-r--r-- | kernel/rcu/tiny_plugin.h | 47 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 174 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 15 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 238 | ||||
-rw-r--r-- | kernel/rcu/update.c | 18 | ||||
-rw-r--r-- | kernel/sched/Makefile | 1 | ||||
-rw-r--r-- | kernel/sched/completion.c | 11 | ||||
-rw-r--r-- | kernel/sched/core.c | 38 | ||||
-rw-r--r-- | kernel/sched/membarrier.c | 152 | ||||
-rw-r--r-- | kernel/task_work.c | 8 | ||||
-rw-r--r-- | kernel/torture.c | 2 |
25 files changed, 544 insertions, 787 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 4cb8e8b23c6e..9c323a6daa46 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -108,7 +108,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | |||
108 | obj-$(CONFIG_JUMP_LABEL) += jump_label.o | 108 | obj-$(CONFIG_JUMP_LABEL) += jump_label.o |
109 | obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o | 109 | obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o |
110 | obj-$(CONFIG_TORTURE_TEST) += torture.o | 110 | obj-$(CONFIG_TORTURE_TEST) += torture.o |
111 | obj-$(CONFIG_MEMBARRIER) += membarrier.o | ||
112 | 111 | ||
113 | obj-$(CONFIG_HAS_IOMEM) += memremap.o | 112 | obj-$(CONFIG_HAS_IOMEM) += memremap.o |
114 | 113 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index c5548faa9f37..f9ef3ecc78c1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -764,7 +764,6 @@ void __noreturn do_exit(long code) | |||
764 | { | 764 | { |
765 | struct task_struct *tsk = current; | 765 | struct task_struct *tsk = current; |
766 | int group_dead; | 766 | int group_dead; |
767 | TASKS_RCU(int tasks_rcu_i); | ||
768 | 767 | ||
769 | profile_task_exit(tsk); | 768 | profile_task_exit(tsk); |
770 | kcov_task_exit(tsk); | 769 | kcov_task_exit(tsk); |
@@ -819,7 +818,8 @@ void __noreturn do_exit(long code) | |||
819 | * Ensure that we must observe the pi_state in exit_mm() -> | 818 | * Ensure that we must observe the pi_state in exit_mm() -> |
820 | * mm_release() -> exit_pi_state_list(). | 819 | * mm_release() -> exit_pi_state_list(). |
821 | */ | 820 | */ |
822 | raw_spin_unlock_wait(&tsk->pi_lock); | 821 | raw_spin_lock_irq(&tsk->pi_lock); |
822 | raw_spin_unlock_irq(&tsk->pi_lock); | ||
823 | 823 | ||
824 | if (unlikely(in_atomic())) { | 824 | if (unlikely(in_atomic())) { |
825 | pr_info("note: %s[%d] exited with preempt_count %d\n", | 825 | pr_info("note: %s[%d] exited with preempt_count %d\n", |
@@ -881,9 +881,7 @@ void __noreturn do_exit(long code) | |||
881 | */ | 881 | */ |
882 | flush_ptrace_hw_breakpoint(tsk); | 882 | flush_ptrace_hw_breakpoint(tsk); |
883 | 883 | ||
884 | TASKS_RCU(preempt_disable()); | 884 | exit_tasks_rcu_start(); |
885 | TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu)); | ||
886 | TASKS_RCU(preempt_enable()); | ||
887 | exit_notify(tsk, group_dead); | 885 | exit_notify(tsk, group_dead); |
888 | proc_exit_connector(tsk); | 886 | proc_exit_connector(tsk); |
889 | mpol_put_task_policy(tsk); | 887 | mpol_put_task_policy(tsk); |
@@ -918,7 +916,7 @@ void __noreturn do_exit(long code) | |||
918 | if (tsk->nr_dirtied) | 916 | if (tsk->nr_dirtied) |
919 | __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); | 917 | __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); |
920 | exit_rcu(); | 918 | exit_rcu(); |
921 | TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); | 919 | exit_tasks_rcu_finish(); |
922 | 920 | ||
923 | do_task_dead(); | 921 | do_task_dead(); |
924 | } | 922 | } |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index fd24153e8a48..294294c71ba4 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -268,123 +268,6 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, | |||
268 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath | 268 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath |
269 | #endif | 269 | #endif |
270 | 270 | ||
271 | /* | ||
272 | * Various notes on spin_is_locked() and spin_unlock_wait(), which are | ||
273 | * 'interesting' functions: | ||
274 | * | ||
275 | * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE | ||
276 | * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64, | ||
277 | * PPC). Also qspinlock has a similar issue per construction, the setting of | ||
278 | * the locked byte can be unordered acquiring the lock proper. | ||
279 | * | ||
280 | * This gets to be 'interesting' in the following cases, where the /should/s | ||
281 | * end up false because of this issue. | ||
282 | * | ||
283 | * | ||
284 | * CASE 1: | ||
285 | * | ||
286 | * So the spin_is_locked() correctness issue comes from something like: | ||
287 | * | ||
288 | * CPU0 CPU1 | ||
289 | * | ||
290 | * global_lock(); local_lock(i) | ||
291 | * spin_lock(&G) spin_lock(&L[i]) | ||
292 | * for (i) if (!spin_is_locked(&G)) { | ||
293 | * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep(); | ||
294 | * return; | ||
295 | * } | ||
296 | * // deal with fail | ||
297 | * | ||
298 | * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such | ||
299 | * that there is exclusion between the two critical sections. | ||
300 | * | ||
301 | * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from | ||
302 | * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i]) | ||
303 | * /should/ be constrained by the ACQUIRE from spin_lock(&G). | ||
304 | * | ||
305 | * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB. | ||
306 | * | ||
307 | * | ||
308 | * CASE 2: | ||
309 | * | ||
310 | * For spin_unlock_wait() there is a second correctness issue, namely: | ||
311 | * | ||
312 | * CPU0 CPU1 | ||
313 | * | ||
314 | * flag = set; | ||
315 | * smp_mb(); spin_lock(&l) | ||
316 | * spin_unlock_wait(&l); if (!flag) | ||
317 | * // add to lockless list | ||
318 | * spin_unlock(&l); | ||
319 | * // iterate lockless list | ||
320 | * | ||
321 | * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0 | ||
322 | * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE | ||
323 | * semantics etc..) | ||
324 | * | ||
325 | * Where flag /should/ be ordered against the locked store of l. | ||
326 | */ | ||
327 | |||
328 | /* | ||
329 | * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before | ||
330 | * issuing an _unordered_ store to set _Q_LOCKED_VAL. | ||
331 | * | ||
332 | * This means that the store can be delayed, but no later than the | ||
333 | * store-release from the unlock. This means that simply observing | ||
334 | * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. | ||
335 | * | ||
336 | * There are two paths that can issue the unordered store: | ||
337 | * | ||
338 | * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 | ||
339 | * | ||
340 | * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 | ||
341 | * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 | ||
342 | * | ||
343 | * However, in both cases we have other !0 state we've set before to queue | ||
344 | * ourseves: | ||
345 | * | ||
346 | * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our | ||
347 | * load is constrained by that ACQUIRE to not pass before that, and thus must | ||
348 | * observe the store. | ||
349 | * | ||
350 | * For (2) we have a more intersting scenario. We enqueue ourselves using | ||
351 | * xchg_tail(), which ends up being a RELEASE. This in itself is not | ||
352 | * sufficient, however that is followed by an smp_cond_acquire() on the same | ||
353 | * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and | ||
354 | * guarantees we must observe that store. | ||
355 | * | ||
356 | * Therefore both cases have other !0 state that is observable before the | ||
357 | * unordered locked byte store comes through. This means we can use that to | ||
358 | * wait for the lock store, and then wait for an unlock. | ||
359 | */ | ||
360 | #ifndef queued_spin_unlock_wait | ||
361 | void queued_spin_unlock_wait(struct qspinlock *lock) | ||
362 | { | ||
363 | u32 val; | ||
364 | |||
365 | for (;;) { | ||
366 | val = atomic_read(&lock->val); | ||
367 | |||
368 | if (!val) /* not locked, we're done */ | ||
369 | goto done; | ||
370 | |||
371 | if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ | ||
372 | break; | ||
373 | |||
374 | /* not locked, but pending, wait until we observe the lock */ | ||
375 | cpu_relax(); | ||
376 | } | ||
377 | |||
378 | /* any unlock is good */ | ||
379 | while (atomic_read(&lock->val) & _Q_LOCKED_MASK) | ||
380 | cpu_relax(); | ||
381 | |||
382 | done: | ||
383 | smp_acquire__after_ctrl_dep(); | ||
384 | } | ||
385 | EXPORT_SYMBOL(queued_spin_unlock_wait); | ||
386 | #endif | ||
387 | |||
388 | #endif /* _GEN_PV_LOCK_SLOWPATH */ | 271 | #endif /* _GEN_PV_LOCK_SLOWPATH */ |
389 | 272 | ||
390 | /** | 273 | /** |
diff --git a/kernel/membarrier.c b/kernel/membarrier.c deleted file mode 100644 index 9f9284f37f8d..000000000000 --- a/kernel/membarrier.c +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
3 | * | ||
4 | * membarrier system call | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/syscalls.h> | ||
18 | #include <linux/membarrier.h> | ||
19 | #include <linux/tick.h> | ||
20 | |||
21 | /* | ||
22 | * Bitmask made from a "or" of all commands within enum membarrier_cmd, | ||
23 | * except MEMBARRIER_CMD_QUERY. | ||
24 | */ | ||
25 | #define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED) | ||
26 | |||
27 | /** | ||
28 | * sys_membarrier - issue memory barriers on a set of threads | ||
29 | * @cmd: Takes command values defined in enum membarrier_cmd. | ||
30 | * @flags: Currently needs to be 0. For future extensions. | ||
31 | * | ||
32 | * If this system call is not implemented, -ENOSYS is returned. If the | ||
33 | * command specified does not exist, or if the command argument is invalid, | ||
34 | * this system call returns -EINVAL. For a given command, with flags argument | ||
35 | * set to 0, this system call is guaranteed to always return the same value | ||
36 | * until reboot. | ||
37 | * | ||
38 | * All memory accesses performed in program order from each targeted thread | ||
39 | * is guaranteed to be ordered with respect to sys_membarrier(). If we use | ||
40 | * the semantic "barrier()" to represent a compiler barrier forcing memory | ||
41 | * accesses to be performed in program order across the barrier, and | ||
42 | * smp_mb() to represent explicit memory barriers forcing full memory | ||
43 | * ordering across the barrier, we have the following ordering table for | ||
44 | * each pair of barrier(), sys_membarrier() and smp_mb(): | ||
45 | * | ||
46 | * The pair ordering is detailed as (O: ordered, X: not ordered): | ||
47 | * | ||
48 | * barrier() smp_mb() sys_membarrier() | ||
49 | * barrier() X X O | ||
50 | * smp_mb() X O O | ||
51 | * sys_membarrier() O O O | ||
52 | */ | ||
53 | SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) | ||
54 | { | ||
55 | /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ | ||
56 | if (tick_nohz_full_enabled()) | ||
57 | return -ENOSYS; | ||
58 | if (unlikely(flags)) | ||
59 | return -EINVAL; | ||
60 | switch (cmd) { | ||
61 | case MEMBARRIER_CMD_QUERY: | ||
62 | return MEMBARRIER_CMD_BITMASK; | ||
63 | case MEMBARRIER_CMD_SHARED: | ||
64 | if (num_online_cpus() > 1) | ||
65 | synchronize_sched(); | ||
66 | return 0; | ||
67 | default: | ||
68 | return -EINVAL; | ||
69 | } | ||
70 | } | ||
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index be90c945063f..9210379c0353 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig | |||
@@ -69,8 +69,7 @@ config TREE_SRCU | |||
69 | This option selects the full-fledged version of SRCU. | 69 | This option selects the full-fledged version of SRCU. |
70 | 70 | ||
71 | config TASKS_RCU | 71 | config TASKS_RCU |
72 | bool | 72 | def_bool PREEMPT |
73 | default n | ||
74 | select SRCU | 73 | select SRCU |
75 | help | 74 | help |
76 | This option enables a task-based RCU implementation that uses | 75 | This option enables a task-based RCU implementation that uses |
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 808b8c85f626..e4b43fef89f5 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h | |||
@@ -356,22 +356,10 @@ do { \ | |||
356 | 356 | ||
357 | #ifdef CONFIG_TINY_RCU | 357 | #ifdef CONFIG_TINY_RCU |
358 | /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ | 358 | /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ |
359 | static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ | 359 | static inline bool rcu_gp_is_normal(void) { return true; } |
360 | { | 360 | static inline bool rcu_gp_is_expedited(void) { return false; } |
361 | return true; | 361 | static inline void rcu_expedite_gp(void) { } |
362 | } | 362 | static inline void rcu_unexpedite_gp(void) { } |
363 | static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ | ||
364 | { | ||
365 | return false; | ||
366 | } | ||
367 | |||
368 | static inline void rcu_expedite_gp(void) | ||
369 | { | ||
370 | } | ||
371 | |||
372 | static inline void rcu_unexpedite_gp(void) | ||
373 | { | ||
374 | } | ||
375 | #else /* #ifdef CONFIG_TINY_RCU */ | 363 | #else /* #ifdef CONFIG_TINY_RCU */ |
376 | bool rcu_gp_is_normal(void); /* Internal RCU use. */ | 364 | bool rcu_gp_is_normal(void); /* Internal RCU use. */ |
377 | bool rcu_gp_is_expedited(void); /* Internal RCU use. */ | 365 | bool rcu_gp_is_expedited(void); /* Internal RCU use. */ |
@@ -419,12 +407,8 @@ static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, | |||
419 | *gpnum = 0; | 407 | *gpnum = 0; |
420 | *completed = 0; | 408 | *completed = 0; |
421 | } | 409 | } |
422 | static inline void rcutorture_record_test_transition(void) | 410 | static inline void rcutorture_record_test_transition(void) { } |
423 | { | 411 | static inline void rcutorture_record_progress(unsigned long vernum) { } |
424 | } | ||
425 | static inline void rcutorture_record_progress(unsigned long vernum) | ||
426 | { | ||
427 | } | ||
428 | #ifdef CONFIG_RCU_TRACE | 412 | #ifdef CONFIG_RCU_TRACE |
429 | void do_trace_rcu_torture_read(const char *rcutorturename, | 413 | void do_trace_rcu_torture_read(const char *rcutorturename, |
430 | struct rcu_head *rhp, | 414 | struct rcu_head *rhp, |
@@ -460,92 +444,20 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type, | |||
460 | #endif | 444 | #endif |
461 | 445 | ||
462 | #ifdef CONFIG_TINY_RCU | 446 | #ifdef CONFIG_TINY_RCU |
463 | 447 | static inline unsigned long rcu_batches_started(void) { return 0; } | |
464 | /* | 448 | static inline unsigned long rcu_batches_started_bh(void) { return 0; } |
465 | * Return the number of grace periods started. | 449 | static inline unsigned long rcu_batches_started_sched(void) { return 0; } |
466 | */ | 450 | static inline unsigned long rcu_batches_completed(void) { return 0; } |
467 | static inline unsigned long rcu_batches_started(void) | 451 | static inline unsigned long rcu_batches_completed_bh(void) { return 0; } |
468 | { | 452 | static inline unsigned long rcu_batches_completed_sched(void) { return 0; } |
469 | return 0; | 453 | static inline unsigned long rcu_exp_batches_completed(void) { return 0; } |
470 | } | 454 | static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; } |
471 | 455 | static inline unsigned long | |
472 | /* | 456 | srcu_batches_completed(struct srcu_struct *sp) { return 0; } |
473 | * Return the number of bottom-half grace periods started. | 457 | static inline void rcu_force_quiescent_state(void) { } |
474 | */ | 458 | static inline void rcu_bh_force_quiescent_state(void) { } |
475 | static inline unsigned long rcu_batches_started_bh(void) | 459 | static inline void rcu_sched_force_quiescent_state(void) { } |
476 | { | 460 | static inline void show_rcu_gp_kthreads(void) { } |
477 | return 0; | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * Return the number of sched grace periods started. | ||
482 | */ | ||
483 | static inline unsigned long rcu_batches_started_sched(void) | ||
484 | { | ||
485 | return 0; | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * Return the number of grace periods completed. | ||
490 | */ | ||
491 | static inline unsigned long rcu_batches_completed(void) | ||
492 | { | ||
493 | return 0; | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * Return the number of bottom-half grace periods completed. | ||
498 | */ | ||
499 | static inline unsigned long rcu_batches_completed_bh(void) | ||
500 | { | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Return the number of sched grace periods completed. | ||
506 | */ | ||
507 | static inline unsigned long rcu_batches_completed_sched(void) | ||
508 | { | ||
509 | return 0; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Return the number of expedited grace periods completed. | ||
514 | */ | ||
515 | static inline unsigned long rcu_exp_batches_completed(void) | ||
516 | { | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * Return the number of expedited sched grace periods completed. | ||
522 | */ | ||
523 | static inline unsigned long rcu_exp_batches_completed_sched(void) | ||
524 | { | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) | ||
529 | { | ||
530 | return 0; | ||
531 | } | ||
532 | |||
533 | static inline void rcu_force_quiescent_state(void) | ||
534 | { | ||
535 | } | ||
536 | |||
537 | static inline void rcu_bh_force_quiescent_state(void) | ||
538 | { | ||
539 | } | ||
540 | |||
541 | static inline void rcu_sched_force_quiescent_state(void) | ||
542 | { | ||
543 | } | ||
544 | |||
545 | static inline void show_rcu_gp_kthreads(void) | ||
546 | { | ||
547 | } | ||
548 | |||
549 | #else /* #ifdef CONFIG_TINY_RCU */ | 461 | #else /* #ifdef CONFIG_TINY_RCU */ |
550 | extern unsigned long rcutorture_testseq; | 462 | extern unsigned long rcutorture_testseq; |
551 | extern unsigned long rcutorture_vernum; | 463 | extern unsigned long rcutorture_vernum; |
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 2b62a38b080f..7649fcd2c4c7 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c | |||
@@ -36,24 +36,6 @@ void rcu_cblist_init(struct rcu_cblist *rclp) | |||
36 | } | 36 | } |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Debug function to actually count the number of callbacks. | ||
40 | * If the number exceeds the limit specified, return -1. | ||
41 | */ | ||
42 | long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim) | ||
43 | { | ||
44 | int cnt = 0; | ||
45 | struct rcu_head **rhpp = &rclp->head; | ||
46 | |||
47 | for (;;) { | ||
48 | if (!*rhpp) | ||
49 | return cnt; | ||
50 | if (++cnt > lim) | ||
51 | return -1; | ||
52 | rhpp = &(*rhpp)->next; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Dequeue the oldest rcu_head structure from the specified callback | 39 | * Dequeue the oldest rcu_head structure from the specified callback |
58 | * list. This function assumes that the callback is non-lazy, but | 40 | * list. This function assumes that the callback is non-lazy, but |
59 | * the caller can later invoke rcu_cblist_dequeued_lazy() if it | 41 | * the caller can later invoke rcu_cblist_dequeued_lazy() if it |
@@ -103,17 +85,6 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) | |||
103 | } | 85 | } |
104 | 86 | ||
105 | /* | 87 | /* |
106 | * Is the specified segment of the specified rcu_segcblist structure | ||
107 | * empty of callbacks? | ||
108 | */ | ||
109 | bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) | ||
110 | { | ||
111 | if (seg == RCU_DONE_TAIL) | ||
112 | return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; | ||
113 | return rsclp->tails[seg - 1] == rsclp->tails[seg]; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Does the specified rcu_segcblist structure contain callbacks that | 88 | * Does the specified rcu_segcblist structure contain callbacks that |
118 | * are ready to be invoked? | 89 | * are ready to be invoked? |
119 | */ | 90 | */ |
@@ -134,50 +105,6 @@ bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) | |||
134 | } | 105 | } |
135 | 106 | ||
136 | /* | 107 | /* |
137 | * Dequeue and return the first ready-to-invoke callback. If there | ||
138 | * are no ready-to-invoke callbacks, return NULL. Disables interrupts | ||
139 | * to avoid interference. Does not protect from interference from other | ||
140 | * CPUs or tasks. | ||
141 | */ | ||
142 | struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp) | ||
143 | { | ||
144 | unsigned long flags; | ||
145 | int i; | ||
146 | struct rcu_head *rhp; | ||
147 | |||
148 | local_irq_save(flags); | ||
149 | if (!rcu_segcblist_ready_cbs(rsclp)) { | ||
150 | local_irq_restore(flags); | ||
151 | return NULL; | ||
152 | } | ||
153 | rhp = rsclp->head; | ||
154 | BUG_ON(!rhp); | ||
155 | rsclp->head = rhp->next; | ||
156 | for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) { | ||
157 | if (rsclp->tails[i] != &rhp->next) | ||
158 | break; | ||
159 | rsclp->tails[i] = &rsclp->head; | ||
160 | } | ||
161 | smp_mb(); /* Dequeue before decrement for rcu_barrier(). */ | ||
162 | WRITE_ONCE(rsclp->len, rsclp->len - 1); | ||
163 | local_irq_restore(flags); | ||
164 | return rhp; | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Account for the fact that a previously dequeued callback turned out | ||
169 | * to be marked as lazy. | ||
170 | */ | ||
171 | void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp) | ||
172 | { | ||
173 | unsigned long flags; | ||
174 | |||
175 | local_irq_save(flags); | ||
176 | rsclp->len_lazy--; | ||
177 | local_irq_restore(flags); | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Return a pointer to the first callback in the specified rcu_segcblist | 108 | * Return a pointer to the first callback in the specified rcu_segcblist |
182 | * structure. This is useful for diagnostics. | 109 | * structure. This is useful for diagnostics. |
183 | */ | 110 | */ |
@@ -203,17 +130,6 @@ struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) | |||
203 | } | 130 | } |
204 | 131 | ||
205 | /* | 132 | /* |
206 | * Does the specified rcu_segcblist structure contain callbacks that | ||
207 | * have not yet been processed beyond having been posted, that is, | ||
208 | * does it contain callbacks in its last segment? | ||
209 | */ | ||
210 | bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp) | ||
211 | { | ||
212 | return rcu_segcblist_is_enabled(rsclp) && | ||
213 | !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Enqueue the specified callback onto the specified rcu_segcblist | 133 | * Enqueue the specified callback onto the specified rcu_segcblist |
218 | * structure, updating accounting as needed. Note that the ->len | 134 | * structure, updating accounting as needed. Note that the ->len |
219 | * field may be accessed locklessly, hence the WRITE_ONCE(). | 135 | * field may be accessed locklessly, hence the WRITE_ONCE(). |
@@ -503,3 +419,27 @@ bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, | |||
503 | return true; | 419 | return true; |
504 | return false; | 420 | return false; |
505 | } | 421 | } |
422 | |||
423 | /* | ||
424 | * Merge the source rcu_segcblist structure into the destination | ||
425 | * rcu_segcblist structure, then initialize the source. Any pending | ||
426 | * callbacks from the source get to start over. It is best to | ||
427 | * advance and accelerate both the destination and the source | ||
428 | * before merging. | ||
429 | */ | ||
430 | void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, | ||
431 | struct rcu_segcblist *src_rsclp) | ||
432 | { | ||
433 | struct rcu_cblist donecbs; | ||
434 | struct rcu_cblist pendcbs; | ||
435 | |||
436 | rcu_cblist_init(&donecbs); | ||
437 | rcu_cblist_init(&pendcbs); | ||
438 | rcu_segcblist_extract_count(src_rsclp, &donecbs); | ||
439 | rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs); | ||
440 | rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs); | ||
441 | rcu_segcblist_insert_count(dst_rsclp, &donecbs); | ||
442 | rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs); | ||
443 | rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs); | ||
444 | rcu_segcblist_init(src_rsclp); | ||
445 | } | ||
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 6e36e36478cd..581c12b63544 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h | |||
@@ -31,29 +31,7 @@ static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) | |||
31 | rclp->len_lazy--; | 31 | rclp->len_lazy--; |
32 | } | 32 | } |
33 | 33 | ||
34 | /* | ||
35 | * Interim function to return rcu_cblist head pointer. Longer term, the | ||
36 | * rcu_cblist will be used more pervasively, removing the need for this | ||
37 | * function. | ||
38 | */ | ||
39 | static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp) | ||
40 | { | ||
41 | return rclp->head; | ||
42 | } | ||
43 | |||
44 | /* | ||
45 | * Interim function to return rcu_cblist head pointer. Longer term, the | ||
46 | * rcu_cblist will be used more pervasively, removing the need for this | ||
47 | * function. | ||
48 | */ | ||
49 | static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp) | ||
50 | { | ||
51 | WARN_ON_ONCE(!rclp->head); | ||
52 | return rclp->tail; | ||
53 | } | ||
54 | |||
55 | void rcu_cblist_init(struct rcu_cblist *rclp); | 34 | void rcu_cblist_init(struct rcu_cblist *rclp); |
56 | long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim); | ||
57 | struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp); | 35 | struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp); |
58 | 36 | ||
59 | /* | 37 | /* |
@@ -134,14 +112,10 @@ static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) | |||
134 | 112 | ||
135 | void rcu_segcblist_init(struct rcu_segcblist *rsclp); | 113 | void rcu_segcblist_init(struct rcu_segcblist *rsclp); |
136 | void rcu_segcblist_disable(struct rcu_segcblist *rsclp); | 114 | void rcu_segcblist_disable(struct rcu_segcblist *rsclp); |
137 | bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg); | ||
138 | bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp); | 115 | bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp); |
139 | bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp); | 116 | bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp); |
140 | struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp); | ||
141 | void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp); | ||
142 | struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp); | 117 | struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp); |
143 | struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp); | 118 | struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp); |
144 | bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp); | ||
145 | void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, | 119 | void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, |
146 | struct rcu_head *rhp, bool lazy); | 120 | struct rcu_head *rhp, bool lazy); |
147 | bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, | 121 | bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, |
@@ -162,3 +136,5 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq); | |||
162 | bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq); | 136 | bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq); |
163 | bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, | 137 | bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, |
164 | unsigned long seq); | 138 | unsigned long seq); |
139 | void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, | ||
140 | struct rcu_segcblist *src_rsclp); | ||
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 3cc18110b612..1f87a02c3399 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c | |||
@@ -317,8 +317,6 @@ static struct rcu_perf_ops sched_ops = { | |||
317 | .name = "sched" | 317 | .name = "sched" |
318 | }; | 318 | }; |
319 | 319 | ||
320 | #ifdef CONFIG_TASKS_RCU | ||
321 | |||
322 | /* | 320 | /* |
323 | * Definitions for RCU-tasks perf testing. | 321 | * Definitions for RCU-tasks perf testing. |
324 | */ | 322 | */ |
@@ -346,24 +344,11 @@ static struct rcu_perf_ops tasks_ops = { | |||
346 | .name = "tasks" | 344 | .name = "tasks" |
347 | }; | 345 | }; |
348 | 346 | ||
349 | #define RCUPERF_TASKS_OPS &tasks_ops, | ||
350 | |||
351 | static bool __maybe_unused torturing_tasks(void) | 347 | static bool __maybe_unused torturing_tasks(void) |
352 | { | 348 | { |
353 | return cur_ops == &tasks_ops; | 349 | return cur_ops == &tasks_ops; |
354 | } | 350 | } |
355 | 351 | ||
356 | #else /* #ifdef CONFIG_TASKS_RCU */ | ||
357 | |||
358 | #define RCUPERF_TASKS_OPS | ||
359 | |||
360 | static bool __maybe_unused torturing_tasks(void) | ||
361 | { | ||
362 | return false; | ||
363 | } | ||
364 | |||
365 | #endif /* #else #ifdef CONFIG_TASKS_RCU */ | ||
366 | |||
367 | /* | 352 | /* |
368 | * If performance tests complete, wait for shutdown to commence. | 353 | * If performance tests complete, wait for shutdown to commence. |
369 | */ | 354 | */ |
@@ -658,7 +643,7 @@ rcu_perf_init(void) | |||
658 | int firsterr = 0; | 643 | int firsterr = 0; |
659 | static struct rcu_perf_ops *perf_ops[] = { | 644 | static struct rcu_perf_ops *perf_ops[] = { |
660 | &rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops, | 645 | &rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops, |
661 | RCUPERF_TASKS_OPS | 646 | &tasks_ops, |
662 | }; | 647 | }; |
663 | 648 | ||
664 | if (!torture_init_begin(perf_type, verbose, &perf_runnable)) | 649 | if (!torture_init_begin(perf_type, verbose, &perf_runnable)) |
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index b8f7f8ce8575..45f2ffbc1e78 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
@@ -199,7 +199,8 @@ MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot"); | |||
199 | static u64 notrace rcu_trace_clock_local(void) | 199 | static u64 notrace rcu_trace_clock_local(void) |
200 | { | 200 | { |
201 | u64 ts = trace_clock_local(); | 201 | u64 ts = trace_clock_local(); |
202 | unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC); | 202 | |
203 | (void)do_div(ts, NSEC_PER_USEC); | ||
203 | return ts; | 204 | return ts; |
204 | } | 205 | } |
205 | #else /* #ifdef CONFIG_RCU_TRACE */ | 206 | #else /* #ifdef CONFIG_RCU_TRACE */ |
@@ -496,7 +497,7 @@ static struct rcu_torture_ops rcu_busted_ops = { | |||
496 | .fqs = NULL, | 497 | .fqs = NULL, |
497 | .stats = NULL, | 498 | .stats = NULL, |
498 | .irq_capable = 1, | 499 | .irq_capable = 1, |
499 | .name = "rcu_busted" | 500 | .name = "busted" |
500 | }; | 501 | }; |
501 | 502 | ||
502 | /* | 503 | /* |
@@ -522,7 +523,7 @@ static void srcu_read_delay(struct torture_random_state *rrsp) | |||
522 | 523 | ||
523 | delay = torture_random(rrsp) % | 524 | delay = torture_random(rrsp) % |
524 | (nrealreaders * 2 * longdelay * uspertick); | 525 | (nrealreaders * 2 * longdelay * uspertick); |
525 | if (!delay) | 526 | if (!delay && in_task()) |
526 | schedule_timeout_interruptible(longdelay); | 527 | schedule_timeout_interruptible(longdelay); |
527 | else | 528 | else |
528 | rcu_read_delay(rrsp); | 529 | rcu_read_delay(rrsp); |
@@ -561,44 +562,7 @@ static void srcu_torture_barrier(void) | |||
561 | 562 | ||
562 | static void srcu_torture_stats(void) | 563 | static void srcu_torture_stats(void) |
563 | { | 564 | { |
564 | int __maybe_unused cpu; | 565 | srcu_torture_stats_print(srcu_ctlp, torture_type, TORTURE_FLAG); |
565 | int idx; | ||
566 | |||
567 | #ifdef CONFIG_TREE_SRCU | ||
568 | idx = srcu_ctlp->srcu_idx & 0x1; | ||
569 | pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", | ||
570 | torture_type, TORTURE_FLAG, idx); | ||
571 | for_each_possible_cpu(cpu) { | ||
572 | unsigned long l0, l1; | ||
573 | unsigned long u0, u1; | ||
574 | long c0, c1; | ||
575 | struct srcu_data *counts; | ||
576 | |||
577 | counts = per_cpu_ptr(srcu_ctlp->sda, cpu); | ||
578 | u0 = counts->srcu_unlock_count[!idx]; | ||
579 | u1 = counts->srcu_unlock_count[idx]; | ||
580 | |||
581 | /* | ||
582 | * Make sure that a lock is always counted if the corresponding | ||
583 | * unlock is counted. | ||
584 | */ | ||
585 | smp_rmb(); | ||
586 | |||
587 | l0 = counts->srcu_lock_count[!idx]; | ||
588 | l1 = counts->srcu_lock_count[idx]; | ||
589 | |||
590 | c0 = l0 - u0; | ||
591 | c1 = l1 - u1; | ||
592 | pr_cont(" %d(%ld,%ld)", cpu, c0, c1); | ||
593 | } | ||
594 | pr_cont("\n"); | ||
595 | #elif defined(CONFIG_TINY_SRCU) | ||
596 | idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1; | ||
597 | pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", | ||
598 | torture_type, TORTURE_FLAG, idx, | ||
599 | READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]), | ||
600 | READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx])); | ||
601 | #endif | ||
602 | } | 566 | } |
603 | 567 | ||
604 | static void srcu_torture_synchronize_expedited(void) | 568 | static void srcu_torture_synchronize_expedited(void) |
@@ -620,6 +584,7 @@ static struct rcu_torture_ops srcu_ops = { | |||
620 | .call = srcu_torture_call, | 584 | .call = srcu_torture_call, |
621 | .cb_barrier = srcu_torture_barrier, | 585 | .cb_barrier = srcu_torture_barrier, |
622 | .stats = srcu_torture_stats, | 586 | .stats = srcu_torture_stats, |
587 | .irq_capable = 1, | ||
623 | .name = "srcu" | 588 | .name = "srcu" |
624 | }; | 589 | }; |
625 | 590 | ||
@@ -652,6 +617,7 @@ static struct rcu_torture_ops srcud_ops = { | |||
652 | .call = srcu_torture_call, | 617 | .call = srcu_torture_call, |
653 | .cb_barrier = srcu_torture_barrier, | 618 | .cb_barrier = srcu_torture_barrier, |
654 | .stats = srcu_torture_stats, | 619 | .stats = srcu_torture_stats, |
620 | .irq_capable = 1, | ||
655 | .name = "srcud" | 621 | .name = "srcud" |
656 | }; | 622 | }; |
657 | 623 | ||
@@ -696,8 +662,6 @@ static struct rcu_torture_ops sched_ops = { | |||
696 | .name = "sched" | 662 | .name = "sched" |
697 | }; | 663 | }; |
698 | 664 | ||
699 | #ifdef CONFIG_TASKS_RCU | ||
700 | |||
701 | /* | 665 | /* |
702 | * Definitions for RCU-tasks torture testing. | 666 | * Definitions for RCU-tasks torture testing. |
703 | */ | 667 | */ |
@@ -735,24 +699,11 @@ static struct rcu_torture_ops tasks_ops = { | |||
735 | .name = "tasks" | 699 | .name = "tasks" |
736 | }; | 700 | }; |
737 | 701 | ||
738 | #define RCUTORTURE_TASKS_OPS &tasks_ops, | ||
739 | |||
740 | static bool __maybe_unused torturing_tasks(void) | 702 | static bool __maybe_unused torturing_tasks(void) |
741 | { | 703 | { |
742 | return cur_ops == &tasks_ops; | 704 | return cur_ops == &tasks_ops; |
743 | } | 705 | } |
744 | 706 | ||
745 | #else /* #ifdef CONFIG_TASKS_RCU */ | ||
746 | |||
747 | #define RCUTORTURE_TASKS_OPS | ||
748 | |||
749 | static bool __maybe_unused torturing_tasks(void) | ||
750 | { | ||
751 | return false; | ||
752 | } | ||
753 | |||
754 | #endif /* #else #ifdef CONFIG_TASKS_RCU */ | ||
755 | |||
756 | /* | 707 | /* |
757 | * RCU torture priority-boost testing. Runs one real-time thread per | 708 | * RCU torture priority-boost testing. Runs one real-time thread per |
758 | * CPU for moderate bursts, repeatedly registering RCU callbacks and | 709 | * CPU for moderate bursts, repeatedly registering RCU callbacks and |
@@ -1114,6 +1065,11 @@ rcu_torture_fakewriter(void *arg) | |||
1114 | return 0; | 1065 | return 0; |
1115 | } | 1066 | } |
1116 | 1067 | ||
1068 | static void rcu_torture_timer_cb(struct rcu_head *rhp) | ||
1069 | { | ||
1070 | kfree(rhp); | ||
1071 | } | ||
1072 | |||
1117 | /* | 1073 | /* |
1118 | * RCU torture reader from timer handler. Dereferences rcu_torture_current, | 1074 | * RCU torture reader from timer handler. Dereferences rcu_torture_current, |
1119 | * incrementing the corresponding element of the pipeline array. The | 1075 | * incrementing the corresponding element of the pipeline array. The |
@@ -1176,6 +1132,14 @@ static void rcu_torture_timer(unsigned long unused) | |||
1176 | __this_cpu_inc(rcu_torture_batch[completed]); | 1132 | __this_cpu_inc(rcu_torture_batch[completed]); |
1177 | preempt_enable(); | 1133 | preempt_enable(); |
1178 | cur_ops->readunlock(idx); | 1134 | cur_ops->readunlock(idx); |
1135 | |||
1136 | /* Test call_rcu() invocation from interrupt handler. */ | ||
1137 | if (cur_ops->call) { | ||
1138 | struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_NOWAIT); | ||
1139 | |||
1140 | if (rhp) | ||
1141 | cur_ops->call(rhp, rcu_torture_timer_cb); | ||
1142 | } | ||
1179 | } | 1143 | } |
1180 | 1144 | ||
1181 | /* | 1145 | /* |
@@ -1354,11 +1318,12 @@ rcu_torture_stats_print(void) | |||
1354 | srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, | 1318 | srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, |
1355 | &flags, &gpnum, &completed); | 1319 | &flags, &gpnum, &completed); |
1356 | wtp = READ_ONCE(writer_task); | 1320 | wtp = READ_ONCE(writer_task); |
1357 | pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n", | 1321 | pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx cpu %d\n", |
1358 | rcu_torture_writer_state_getname(), | 1322 | rcu_torture_writer_state_getname(), |
1359 | rcu_torture_writer_state, | 1323 | rcu_torture_writer_state, |
1360 | gpnum, completed, flags, | 1324 | gpnum, completed, flags, |
1361 | wtp == NULL ? ~0UL : wtp->state); | 1325 | wtp == NULL ? ~0UL : wtp->state, |
1326 | wtp == NULL ? -1 : (int)task_cpu(wtp)); | ||
1362 | show_rcu_gp_kthreads(); | 1327 | show_rcu_gp_kthreads(); |
1363 | rcu_ftrace_dump(DUMP_ALL); | 1328 | rcu_ftrace_dump(DUMP_ALL); |
1364 | } | 1329 | } |
@@ -1749,7 +1714,7 @@ rcu_torture_init(void) | |||
1749 | int firsterr = 0; | 1714 | int firsterr = 0; |
1750 | static struct rcu_torture_ops *torture_ops[] = { | 1715 | static struct rcu_torture_ops *torture_ops[] = { |
1751 | &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, | 1716 | &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, |
1752 | &sched_ops, RCUTORTURE_TASKS_OPS | 1717 | &sched_ops, &tasks_ops, |
1753 | }; | 1718 | }; |
1754 | 1719 | ||
1755 | if (!torture_init_begin(torture_type, verbose, &torture_runnable)) | 1720 | if (!torture_init_begin(torture_type, verbose, &torture_runnable)) |
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 1a1c1047d2ed..76ac5f50b2c7 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include "rcu_segcblist.h" | 33 | #include "rcu_segcblist.h" |
34 | #include "rcu.h" | 34 | #include "rcu.h" |
35 | 35 | ||
36 | int rcu_scheduler_active __read_mostly; | ||
37 | |||
36 | static int init_srcu_struct_fields(struct srcu_struct *sp) | 38 | static int init_srcu_struct_fields(struct srcu_struct *sp) |
37 | { | 39 | { |
38 | sp->srcu_lock_nesting[0] = 0; | 40 | sp->srcu_lock_nesting[0] = 0; |
@@ -193,3 +195,9 @@ void synchronize_srcu(struct srcu_struct *sp) | |||
193 | destroy_rcu_head_on_stack(&rs.head); | 195 | destroy_rcu_head_on_stack(&rs.head); |
194 | } | 196 | } |
195 | EXPORT_SYMBOL_GPL(synchronize_srcu); | 197 | EXPORT_SYMBOL_GPL(synchronize_srcu); |
198 | |||
199 | /* Lockdep diagnostics. */ | ||
200 | void __init rcu_scheduler_starting(void) | ||
201 | { | ||
202 | rcu_scheduler_active = RCU_SCHEDULER_RUNNING; | ||
203 | } | ||
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index d0ca524bf042..729a8706751d 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c | |||
@@ -51,6 +51,7 @@ module_param(counter_wrap_check, ulong, 0444); | |||
51 | 51 | ||
52 | static void srcu_invoke_callbacks(struct work_struct *work); | 52 | static void srcu_invoke_callbacks(struct work_struct *work); |
53 | static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); | 53 | static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); |
54 | static void process_srcu(struct work_struct *work); | ||
54 | 55 | ||
55 | /* | 56 | /* |
56 | * Initialize SRCU combining tree. Note that statically allocated | 57 | * Initialize SRCU combining tree. Note that statically allocated |
@@ -896,6 +897,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) | |||
896 | __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); | 897 | __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); |
897 | wait_for_completion(&rcu.completion); | 898 | wait_for_completion(&rcu.completion); |
898 | destroy_rcu_head_on_stack(&rcu.head); | 899 | destroy_rcu_head_on_stack(&rcu.head); |
900 | |||
901 | /* | ||
902 | * Make sure that later code is ordered after the SRCU grace | ||
903 | * period. This pairs with the raw_spin_lock_irq_rcu_node() | ||
904 | * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed | ||
905 | * because the current CPU might have been totally uninvolved with | ||
906 | * (and thus unordered against) that grace period. | ||
907 | */ | ||
908 | smp_mb(); | ||
899 | } | 909 | } |
900 | 910 | ||
901 | /** | 911 | /** |
@@ -1194,7 +1204,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) | |||
1194 | /* | 1204 | /* |
1195 | * This is the work-queue function that handles SRCU grace periods. | 1205 | * This is the work-queue function that handles SRCU grace periods. |
1196 | */ | 1206 | */ |
1197 | void process_srcu(struct work_struct *work) | 1207 | static void process_srcu(struct work_struct *work) |
1198 | { | 1208 | { |
1199 | struct srcu_struct *sp; | 1209 | struct srcu_struct *sp; |
1200 | 1210 | ||
@@ -1203,7 +1213,6 @@ void process_srcu(struct work_struct *work) | |||
1203 | srcu_advance_state(sp); | 1213 | srcu_advance_state(sp); |
1204 | srcu_reschedule(sp, srcu_get_delay(sp)); | 1214 | srcu_reschedule(sp, srcu_get_delay(sp)); |
1205 | } | 1215 | } |
1206 | EXPORT_SYMBOL_GPL(process_srcu); | ||
1207 | 1216 | ||
1208 | void srcutorture_get_gp_data(enum rcutorture_type test_type, | 1217 | void srcutorture_get_gp_data(enum rcutorture_type test_type, |
1209 | struct srcu_struct *sp, int *flags, | 1218 | struct srcu_struct *sp, int *flags, |
@@ -1217,6 +1226,43 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type, | |||
1217 | } | 1226 | } |
1218 | EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); | 1227 | EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); |
1219 | 1228 | ||
1229 | void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf) | ||
1230 | { | ||
1231 | int cpu; | ||
1232 | int idx; | ||
1233 | unsigned long s0 = 0, s1 = 0; | ||
1234 | |||
1235 | idx = sp->srcu_idx & 0x1; | ||
1236 | pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", tt, tf, idx); | ||
1237 | for_each_possible_cpu(cpu) { | ||
1238 | unsigned long l0, l1; | ||
1239 | unsigned long u0, u1; | ||
1240 | long c0, c1; | ||
1241 | struct srcu_data *counts; | ||
1242 | |||
1243 | counts = per_cpu_ptr(sp->sda, cpu); | ||
1244 | u0 = counts->srcu_unlock_count[!idx]; | ||
1245 | u1 = counts->srcu_unlock_count[idx]; | ||
1246 | |||
1247 | /* | ||
1248 | * Make sure that a lock is always counted if the corresponding | ||
1249 | * unlock is counted. | ||
1250 | */ | ||
1251 | smp_rmb(); | ||
1252 | |||
1253 | l0 = counts->srcu_lock_count[!idx]; | ||
1254 | l1 = counts->srcu_lock_count[idx]; | ||
1255 | |||
1256 | c0 = l0 - u0; | ||
1257 | c1 = l1 - u1; | ||
1258 | pr_cont(" %d(%ld,%ld)", cpu, c0, c1); | ||
1259 | s0 += c0; | ||
1260 | s1 += c1; | ||
1261 | } | ||
1262 | pr_cont(" T(%ld,%ld)\n", s0, s1); | ||
1263 | } | ||
1264 | EXPORT_SYMBOL_GPL(srcu_torture_stats_print); | ||
1265 | |||
1220 | static int __init srcu_bootup_announce(void) | 1266 | static int __init srcu_bootup_announce(void) |
1221 | { | 1267 | { |
1222 | pr_info("Hierarchical SRCU implementation.\n"); | 1268 | pr_info("Hierarchical SRCU implementation.\n"); |
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index f8488965250f..a64eee0db39e 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c | |||
@@ -56,8 +56,6 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { | |||
56 | .curtail = &rcu_bh_ctrlblk.rcucblist, | 56 | .curtail = &rcu_bh_ctrlblk.rcucblist, |
57 | }; | 57 | }; |
58 | 58 | ||
59 | #include "tiny_plugin.h" | ||
60 | |||
61 | void rcu_barrier_bh(void) | 59 | void rcu_barrier_bh(void) |
62 | { | 60 | { |
63 | wait_rcu_gp(call_rcu_bh); | 61 | wait_rcu_gp(call_rcu_bh); |
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h deleted file mode 100644 index f0a01b2a3062..000000000000 --- a/kernel/rcu/tiny_plugin.h +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | /* | ||
2 | * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition | ||
3 | * Internal non-public definitions that provide either classic | ||
4 | * or preemptible semantics. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, you can access it online at | ||
18 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
19 | * | ||
20 | * Copyright (c) 2010 Linaro | ||
21 | * | ||
22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
23 | */ | ||
24 | |||
25 | #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) | ||
26 | #include <linux/kernel_stat.h> | ||
27 | |||
28 | int rcu_scheduler_active __read_mostly; | ||
29 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
30 | |||
31 | /* | ||
32 | * During boot, we forgive RCU lockdep issues. After this function is | ||
33 | * invoked, we start taking RCU lockdep issues seriously. Note that unlike | ||
34 | * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE | ||
35 | * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. | ||
36 | * The reason for this is that Tiny RCU does not need kthreads, so does | ||
37 | * not have to care about the fact that the scheduler is half-initialized | ||
38 | * at a certain phase of the boot process. Unless SRCU is in the mix. | ||
39 | */ | ||
40 | void __init rcu_scheduler_starting(void) | ||
41 | { | ||
42 | WARN_ON(nr_context_switches() > 0); | ||
43 | rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU) | ||
44 | ? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING; | ||
45 | } | ||
46 | |||
47 | #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ | ||
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9bb5dff50815..84fe96641b2e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -97,9 +97,6 @@ struct rcu_state sname##_state = { \ | |||
97 | .gp_state = RCU_GP_IDLE, \ | 97 | .gp_state = RCU_GP_IDLE, \ |
98 | .gpnum = 0UL - 300UL, \ | 98 | .gpnum = 0UL - 300UL, \ |
99 | .completed = 0UL - 300UL, \ | 99 | .completed = 0UL - 300UL, \ |
100 | .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ | ||
101 | .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \ | ||
102 | .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \ | ||
103 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ | 100 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
104 | .name = RCU_STATE_NAME(sname), \ | 101 | .name = RCU_STATE_NAME(sname), \ |
105 | .abbr = sabbr, \ | 102 | .abbr = sabbr, \ |
@@ -843,13 +840,9 @@ static void rcu_eqs_enter(bool user) | |||
843 | */ | 840 | */ |
844 | void rcu_idle_enter(void) | 841 | void rcu_idle_enter(void) |
845 | { | 842 | { |
846 | unsigned long flags; | 843 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!"); |
847 | |||
848 | local_irq_save(flags); | ||
849 | rcu_eqs_enter(false); | 844 | rcu_eqs_enter(false); |
850 | local_irq_restore(flags); | ||
851 | } | 845 | } |
852 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | ||
853 | 846 | ||
854 | #ifdef CONFIG_NO_HZ_FULL | 847 | #ifdef CONFIG_NO_HZ_FULL |
855 | /** | 848 | /** |
@@ -862,7 +855,8 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); | |||
862 | */ | 855 | */ |
863 | void rcu_user_enter(void) | 856 | void rcu_user_enter(void) |
864 | { | 857 | { |
865 | rcu_eqs_enter(1); | 858 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!"); |
859 | rcu_eqs_enter(true); | ||
866 | } | 860 | } |
867 | #endif /* CONFIG_NO_HZ_FULL */ | 861 | #endif /* CONFIG_NO_HZ_FULL */ |
868 | 862 | ||
@@ -955,8 +949,10 @@ static void rcu_eqs_exit(bool user) | |||
955 | if (oldval & DYNTICK_TASK_NEST_MASK) { | 949 | if (oldval & DYNTICK_TASK_NEST_MASK) { |
956 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | 950 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; |
957 | } else { | 951 | } else { |
952 | __this_cpu_inc(disable_rcu_irq_enter); | ||
958 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | 953 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
959 | rcu_eqs_exit_common(oldval, user); | 954 | rcu_eqs_exit_common(oldval, user); |
955 | __this_cpu_dec(disable_rcu_irq_enter); | ||
960 | } | 956 | } |
961 | } | 957 | } |
962 | 958 | ||
@@ -979,7 +975,6 @@ void rcu_idle_exit(void) | |||
979 | rcu_eqs_exit(false); | 975 | rcu_eqs_exit(false); |
980 | local_irq_restore(flags); | 976 | local_irq_restore(flags); |
981 | } | 977 | } |
982 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | ||
983 | 978 | ||
984 | #ifdef CONFIG_NO_HZ_FULL | 979 | #ifdef CONFIG_NO_HZ_FULL |
985 | /** | 980 | /** |
@@ -1358,12 +1353,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) | |||
1358 | j = jiffies; | 1353 | j = jiffies; |
1359 | gpa = READ_ONCE(rsp->gp_activity); | 1354 | gpa = READ_ONCE(rsp->gp_activity); |
1360 | if (j - gpa > 2 * HZ) { | 1355 | if (j - gpa > 2 * HZ) { |
1361 | pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n", | 1356 | pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n", |
1362 | rsp->name, j - gpa, | 1357 | rsp->name, j - gpa, |
1363 | rsp->gpnum, rsp->completed, | 1358 | rsp->gpnum, rsp->completed, |
1364 | rsp->gp_flags, | 1359 | rsp->gp_flags, |
1365 | gp_state_getname(rsp->gp_state), rsp->gp_state, | 1360 | gp_state_getname(rsp->gp_state), rsp->gp_state, |
1366 | rsp->gp_kthread ? rsp->gp_kthread->state : ~0); | 1361 | rsp->gp_kthread ? rsp->gp_kthread->state : ~0, |
1362 | rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1); | ||
1367 | if (rsp->gp_kthread) { | 1363 | if (rsp->gp_kthread) { |
1368 | sched_show_task(rsp->gp_kthread); | 1364 | sched_show_task(rsp->gp_kthread); |
1369 | wake_up_process(rsp->gp_kthread); | 1365 | wake_up_process(rsp->gp_kthread); |
@@ -2067,8 +2063,8 @@ static bool rcu_gp_init(struct rcu_state *rsp) | |||
2067 | } | 2063 | } |
2068 | 2064 | ||
2069 | /* | 2065 | /* |
2070 | * Helper function for wait_event_interruptible_timeout() wakeup | 2066 | * Helper function for swait_event_idle() wakeup at force-quiescent-state |
2071 | * at force-quiescent-state time. | 2067 | * time. |
2072 | */ | 2068 | */ |
2073 | static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) | 2069 | static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) |
2074 | { | 2070 | { |
@@ -2206,9 +2202,8 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
2206 | READ_ONCE(rsp->gpnum), | 2202 | READ_ONCE(rsp->gpnum), |
2207 | TPS("reqwait")); | 2203 | TPS("reqwait")); |
2208 | rsp->gp_state = RCU_GP_WAIT_GPS; | 2204 | rsp->gp_state = RCU_GP_WAIT_GPS; |
2209 | swait_event_interruptible(rsp->gp_wq, | 2205 | swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & |
2210 | READ_ONCE(rsp->gp_flags) & | 2206 | RCU_GP_FLAG_INIT); |
2211 | RCU_GP_FLAG_INIT); | ||
2212 | rsp->gp_state = RCU_GP_DONE_GPS; | 2207 | rsp->gp_state = RCU_GP_DONE_GPS; |
2213 | /* Locking provides needed memory barrier. */ | 2208 | /* Locking provides needed memory barrier. */ |
2214 | if (rcu_gp_init(rsp)) | 2209 | if (rcu_gp_init(rsp)) |
@@ -2239,7 +2234,7 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
2239 | READ_ONCE(rsp->gpnum), | 2234 | READ_ONCE(rsp->gpnum), |
2240 | TPS("fqswait")); | 2235 | TPS("fqswait")); |
2241 | rsp->gp_state = RCU_GP_WAIT_FQS; | 2236 | rsp->gp_state = RCU_GP_WAIT_FQS; |
2242 | ret = swait_event_interruptible_timeout(rsp->gp_wq, | 2237 | ret = swait_event_idle_timeout(rsp->gp_wq, |
2243 | rcu_gp_fqs_check_wake(rsp, &gf), j); | 2238 | rcu_gp_fqs_check_wake(rsp, &gf), j); |
2244 | rsp->gp_state = RCU_GP_DOING_FQS; | 2239 | rsp->gp_state = RCU_GP_DOING_FQS; |
2245 | /* Locking provides needed memory barriers. */ | 2240 | /* Locking provides needed memory barriers. */ |
@@ -2409,6 +2404,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | |||
2409 | return; | 2404 | return; |
2410 | } | 2405 | } |
2411 | WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ | 2406 | WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ |
2407 | WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 && | ||
2408 | rcu_preempt_blocked_readers_cgp(rnp)); | ||
2412 | rnp->qsmask &= ~mask; | 2409 | rnp->qsmask &= ~mask; |
2413 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, | 2410 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, |
2414 | mask, rnp->qsmask, rnp->level, | 2411 | mask, rnp->qsmask, rnp->level, |
@@ -3476,10 +3473,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp) | |||
3476 | struct rcu_state *rsp = rdp->rsp; | 3473 | struct rcu_state *rsp = rdp->rsp; |
3477 | 3474 | ||
3478 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { | 3475 | if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { |
3479 | _rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence); | 3476 | _rcu_barrier_trace(rsp, TPS("LastCB"), -1, |
3477 | rsp->barrier_sequence); | ||
3480 | complete(&rsp->barrier_completion); | 3478 | complete(&rsp->barrier_completion); |
3481 | } else { | 3479 | } else { |
3482 | _rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence); | 3480 | _rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence); |
3483 | } | 3481 | } |
3484 | } | 3482 | } |
3485 | 3483 | ||
@@ -3491,14 +3489,15 @@ static void rcu_barrier_func(void *type) | |||
3491 | struct rcu_state *rsp = type; | 3489 | struct rcu_state *rsp = type; |
3492 | struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); | 3490 | struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); |
3493 | 3491 | ||
3494 | _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence); | 3492 | _rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence); |
3495 | rdp->barrier_head.func = rcu_barrier_callback; | 3493 | rdp->barrier_head.func = rcu_barrier_callback; |
3496 | debug_rcu_head_queue(&rdp->barrier_head); | 3494 | debug_rcu_head_queue(&rdp->barrier_head); |
3497 | if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { | 3495 | if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { |
3498 | atomic_inc(&rsp->barrier_cpu_count); | 3496 | atomic_inc(&rsp->barrier_cpu_count); |
3499 | } else { | 3497 | } else { |
3500 | debug_rcu_head_unqueue(&rdp->barrier_head); | 3498 | debug_rcu_head_unqueue(&rdp->barrier_head); |
3501 | _rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence); | 3499 | _rcu_barrier_trace(rsp, TPS("IRQNQ"), -1, |
3500 | rsp->barrier_sequence); | ||
3502 | } | 3501 | } |
3503 | } | 3502 | } |
3504 | 3503 | ||
@@ -3512,14 +3511,15 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
3512 | struct rcu_data *rdp; | 3511 | struct rcu_data *rdp; |
3513 | unsigned long s = rcu_seq_snap(&rsp->barrier_sequence); | 3512 | unsigned long s = rcu_seq_snap(&rsp->barrier_sequence); |
3514 | 3513 | ||
3515 | _rcu_barrier_trace(rsp, "Begin", -1, s); | 3514 | _rcu_barrier_trace(rsp, TPS("Begin"), -1, s); |
3516 | 3515 | ||
3517 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 3516 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
3518 | mutex_lock(&rsp->barrier_mutex); | 3517 | mutex_lock(&rsp->barrier_mutex); |
3519 | 3518 | ||
3520 | /* Did someone else do our work for us? */ | 3519 | /* Did someone else do our work for us? */ |
3521 | if (rcu_seq_done(&rsp->barrier_sequence, s)) { | 3520 | if (rcu_seq_done(&rsp->barrier_sequence, s)) { |
3522 | _rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence); | 3521 | _rcu_barrier_trace(rsp, TPS("EarlyExit"), -1, |
3522 | rsp->barrier_sequence); | ||
3523 | smp_mb(); /* caller's subsequent code after above check. */ | 3523 | smp_mb(); /* caller's subsequent code after above check. */ |
3524 | mutex_unlock(&rsp->barrier_mutex); | 3524 | mutex_unlock(&rsp->barrier_mutex); |
3525 | return; | 3525 | return; |
@@ -3527,7 +3527,7 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
3527 | 3527 | ||
3528 | /* Mark the start of the barrier operation. */ | 3528 | /* Mark the start of the barrier operation. */ |
3529 | rcu_seq_start(&rsp->barrier_sequence); | 3529 | rcu_seq_start(&rsp->barrier_sequence); |
3530 | _rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence); | 3530 | _rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence); |
3531 | 3531 | ||
3532 | /* | 3532 | /* |
3533 | * Initialize the count to one rather than to zero in order to | 3533 | * Initialize the count to one rather than to zero in order to |
@@ -3550,10 +3550,10 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
3550 | rdp = per_cpu_ptr(rsp->rda, cpu); | 3550 | rdp = per_cpu_ptr(rsp->rda, cpu); |
3551 | if (rcu_is_nocb_cpu(cpu)) { | 3551 | if (rcu_is_nocb_cpu(cpu)) { |
3552 | if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { | 3552 | if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { |
3553 | _rcu_barrier_trace(rsp, "OfflineNoCB", cpu, | 3553 | _rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu, |
3554 | rsp->barrier_sequence); | 3554 | rsp->barrier_sequence); |
3555 | } else { | 3555 | } else { |
3556 | _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, | 3556 | _rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu, |
3557 | rsp->barrier_sequence); | 3557 | rsp->barrier_sequence); |
3558 | smp_mb__before_atomic(); | 3558 | smp_mb__before_atomic(); |
3559 | atomic_inc(&rsp->barrier_cpu_count); | 3559 | atomic_inc(&rsp->barrier_cpu_count); |
@@ -3561,11 +3561,11 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
3561 | rcu_barrier_callback, rsp, cpu, 0); | 3561 | rcu_barrier_callback, rsp, cpu, 0); |
3562 | } | 3562 | } |
3563 | } else if (rcu_segcblist_n_cbs(&rdp->cblist)) { | 3563 | } else if (rcu_segcblist_n_cbs(&rdp->cblist)) { |
3564 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, | 3564 | _rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu, |
3565 | rsp->barrier_sequence); | 3565 | rsp->barrier_sequence); |
3566 | smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); | 3566 | smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); |
3567 | } else { | 3567 | } else { |
3568 | _rcu_barrier_trace(rsp, "OnlineNQ", cpu, | 3568 | _rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu, |
3569 | rsp->barrier_sequence); | 3569 | rsp->barrier_sequence); |
3570 | } | 3570 | } |
3571 | } | 3571 | } |
@@ -3582,7 +3582,7 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
3582 | wait_for_completion(&rsp->barrier_completion); | 3582 | wait_for_completion(&rsp->barrier_completion); |
3583 | 3583 | ||
3584 | /* Mark the end of the barrier operation. */ | 3584 | /* Mark the end of the barrier operation. */ |
3585 | _rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence); | 3585 | _rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence); |
3586 | rcu_seq_end(&rsp->barrier_sequence); | 3586 | rcu_seq_end(&rsp->barrier_sequence); |
3587 | 3587 | ||
3588 | /* Other rcu_barrier() invocations can now safely proceed. */ | 3588 | /* Other rcu_barrier() invocations can now safely proceed. */ |
@@ -3684,8 +3684,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3684 | */ | 3684 | */ |
3685 | rnp = rdp->mynode; | 3685 | rnp = rdp->mynode; |
3686 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ | 3686 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ |
3687 | if (!rdp->beenonline) | ||
3688 | WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1); | ||
3689 | rdp->beenonline = true; /* We have now been online. */ | 3687 | rdp->beenonline = true; /* We have now been online. */ |
3690 | rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ | 3688 | rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ |
3691 | rdp->completed = rnp->completed; | 3689 | rdp->completed = rnp->completed; |
@@ -3789,6 +3787,8 @@ void rcu_cpu_starting(unsigned int cpu) | |||
3789 | { | 3787 | { |
3790 | unsigned long flags; | 3788 | unsigned long flags; |
3791 | unsigned long mask; | 3789 | unsigned long mask; |
3790 | int nbits; | ||
3791 | unsigned long oldmask; | ||
3792 | struct rcu_data *rdp; | 3792 | struct rcu_data *rdp; |
3793 | struct rcu_node *rnp; | 3793 | struct rcu_node *rnp; |
3794 | struct rcu_state *rsp; | 3794 | struct rcu_state *rsp; |
@@ -3799,9 +3799,15 @@ void rcu_cpu_starting(unsigned int cpu) | |||
3799 | mask = rdp->grpmask; | 3799 | mask = rdp->grpmask; |
3800 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 3800 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
3801 | rnp->qsmaskinitnext |= mask; | 3801 | rnp->qsmaskinitnext |= mask; |
3802 | oldmask = rnp->expmaskinitnext; | ||
3802 | rnp->expmaskinitnext |= mask; | 3803 | rnp->expmaskinitnext |= mask; |
3804 | oldmask ^= rnp->expmaskinitnext; | ||
3805 | nbits = bitmap_weight(&oldmask, BITS_PER_LONG); | ||
3806 | /* Allow lockless access for expedited grace periods. */ | ||
3807 | smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */ | ||
3803 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | 3808 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); |
3804 | } | 3809 | } |
3810 | smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ | ||
3805 | } | 3811 | } |
3806 | 3812 | ||
3807 | #ifdef CONFIG_HOTPLUG_CPU | 3813 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -3845,96 +3851,30 @@ void rcu_report_dead(unsigned int cpu) | |||
3845 | rcu_cleanup_dying_idle_cpu(cpu, rsp); | 3851 | rcu_cleanup_dying_idle_cpu(cpu, rsp); |
3846 | } | 3852 | } |
3847 | 3853 | ||
3848 | /* | 3854 | /* Migrate the dead CPU's callbacks to the current CPU. */ |
3849 | * Send the specified CPU's RCU callbacks to the orphanage. The | ||
3850 | * specified CPU must be offline, and the caller must hold the | ||
3851 | * ->orphan_lock. | ||
3852 | */ | ||
3853 | static void | ||
3854 | rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | ||
3855 | struct rcu_node *rnp, struct rcu_data *rdp) | ||
3856 | { | ||
3857 | lockdep_assert_held(&rsp->orphan_lock); | ||
3858 | |||
3859 | /* No-CBs CPUs do not have orphanable callbacks. */ | ||
3860 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) | ||
3861 | return; | ||
3862 | |||
3863 | /* | ||
3864 | * Orphan the callbacks. First adjust the counts. This is safe | ||
3865 | * because _rcu_barrier() excludes CPU-hotplug operations, so it | ||
3866 | * cannot be running now. Thus no memory barrier is required. | ||
3867 | */ | ||
3868 | rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist); | ||
3869 | rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done); | ||
3870 | |||
3871 | /* | ||
3872 | * Next, move those callbacks still needing a grace period to | ||
3873 | * the orphanage, where some other CPU will pick them up. | ||
3874 | * Some of the callbacks might have gone partway through a grace | ||
3875 | * period, but that is too bad. They get to start over because we | ||
3876 | * cannot assume that grace periods are synchronized across CPUs. | ||
3877 | */ | ||
3878 | rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend); | ||
3879 | |||
3880 | /* | ||
3881 | * Then move the ready-to-invoke callbacks to the orphanage, | ||
3882 | * where some other CPU will pick them up. These will not be | ||
3883 | * required to pass though another grace period: They are done. | ||
3884 | */ | ||
3885 | rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done); | ||
3886 | |||
3887 | /* Finally, disallow further callbacks on this CPU. */ | ||
3888 | rcu_segcblist_disable(&rdp->cblist); | ||
3889 | } | ||
3890 | |||
3891 | /* | ||
3892 | * Adopt the RCU callbacks from the specified rcu_state structure's | ||
3893 | * orphanage. The caller must hold the ->orphan_lock. | ||
3894 | */ | ||
3895 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) | ||
3896 | { | ||
3897 | struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); | ||
3898 | |||
3899 | lockdep_assert_held(&rsp->orphan_lock); | ||
3900 | |||
3901 | /* No-CBs CPUs are handled specially. */ | ||
3902 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || | ||
3903 | rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) | ||
3904 | return; | ||
3905 | |||
3906 | /* Do the accounting first. */ | ||
3907 | rdp->n_cbs_adopted += rsp->orphan_done.len; | ||
3908 | if (rsp->orphan_done.len_lazy != rsp->orphan_done.len) | ||
3909 | rcu_idle_count_callbacks_posted(); | ||
3910 | rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done); | ||
3911 | |||
3912 | /* | ||
3913 | * We do not need a memory barrier here because the only way we | ||
3914 | * can get here if there is an rcu_barrier() in flight is if | ||
3915 | * we are the task doing the rcu_barrier(). | ||
3916 | */ | ||
3917 | |||
3918 | /* First adopt the ready-to-invoke callbacks, then the done ones. */ | ||
3919 | rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done); | ||
3920 | WARN_ON_ONCE(rsp->orphan_done.head); | ||
3921 | rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend); | ||
3922 | WARN_ON_ONCE(rsp->orphan_pend.head); | ||
3923 | WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != | ||
3924 | !rcu_segcblist_n_cbs(&rdp->cblist)); | ||
3925 | } | ||
3926 | |||
3927 | /* Orphan the dead CPU's callbacks, and then adopt them. */ | ||
3928 | static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) | 3855 | static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) |
3929 | { | 3856 | { |
3930 | unsigned long flags; | 3857 | unsigned long flags; |
3858 | struct rcu_data *my_rdp; | ||
3931 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 3859 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
3932 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ | 3860 | struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); |
3933 | 3861 | ||
3934 | raw_spin_lock_irqsave(&rsp->orphan_lock, flags); | 3862 | if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist)) |
3935 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); | 3863 | return; /* No callbacks to migrate. */ |
3936 | rcu_adopt_orphan_cbs(rsp, flags); | 3864 | |
3937 | raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); | 3865 | local_irq_save(flags); |
3866 | my_rdp = this_cpu_ptr(rsp->rda); | ||
3867 | if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) { | ||
3868 | local_irq_restore(flags); | ||
3869 | return; | ||
3870 | } | ||
3871 | raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ | ||
3872 | rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */ | ||
3873 | rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */ | ||
3874 | rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); | ||
3875 | WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != | ||
3876 | !rcu_segcblist_n_cbs(&my_rdp->cblist)); | ||
3877 | raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); | ||
3938 | WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || | 3878 | WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || |
3939 | !rcu_segcblist_empty(&rdp->cblist), | 3879 | !rcu_segcblist_empty(&rdp->cblist), |
3940 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", | 3880 | "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 9af0f31d6847..8e1f285f0a70 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
@@ -219,8 +219,6 @@ struct rcu_data { | |||
219 | /* qlen at last check for QS forcing */ | 219 | /* qlen at last check for QS forcing */ |
220 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | 220 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ |
221 | unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */ | 221 | unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */ |
222 | unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ | ||
223 | unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ | ||
224 | unsigned long n_force_qs_snap; | 222 | unsigned long n_force_qs_snap; |
225 | /* did other CPU force QS recently? */ | 223 | /* did other CPU force QS recently? */ |
226 | long blimit; /* Upper limit on a processed batch */ | 224 | long blimit; /* Upper limit on a processed batch */ |
@@ -268,7 +266,9 @@ struct rcu_data { | |||
268 | struct rcu_head **nocb_follower_tail; | 266 | struct rcu_head **nocb_follower_tail; |
269 | struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */ | 267 | struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */ |
270 | struct task_struct *nocb_kthread; | 268 | struct task_struct *nocb_kthread; |
269 | raw_spinlock_t nocb_lock; /* Guard following pair of fields. */ | ||
271 | int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ | 270 | int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ |
271 | struct timer_list nocb_timer; /* Enforce finite deferral. */ | ||
272 | 272 | ||
273 | /* The following fields are used by the leader, hence own cacheline. */ | 273 | /* The following fields are used by the leader, hence own cacheline. */ |
274 | struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; | 274 | struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; |
@@ -350,15 +350,6 @@ struct rcu_state { | |||
350 | 350 | ||
351 | /* End of fields guarded by root rcu_node's lock. */ | 351 | /* End of fields guarded by root rcu_node's lock. */ |
352 | 352 | ||
353 | raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp; | ||
354 | /* Protect following fields. */ | ||
355 | struct rcu_cblist orphan_pend; /* Orphaned callbacks that */ | ||
356 | /* need a grace period. */ | ||
357 | struct rcu_cblist orphan_done; /* Orphaned callbacks that */ | ||
358 | /* are ready to invoke. */ | ||
359 | /* (Contains counts.) */ | ||
360 | /* End of fields guarded by orphan_lock. */ | ||
361 | |||
362 | struct mutex barrier_mutex; /* Guards barrier fields. */ | 353 | struct mutex barrier_mutex; /* Guards barrier fields. */ |
363 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ | 354 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ |
364 | struct completion barrier_completion; /* Wake at barrier end. */ | 355 | struct completion barrier_completion; /* Wake at barrier end. */ |
@@ -495,7 +486,7 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); | |||
495 | static void rcu_init_one_nocb(struct rcu_node *rnp); | 486 | static void rcu_init_one_nocb(struct rcu_node *rnp); |
496 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | 487 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, |
497 | bool lazy, unsigned long flags); | 488 | bool lazy, unsigned long flags); |
498 | static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | 489 | static bool rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp, |
499 | struct rcu_data *rdp, | 490 | struct rcu_data *rdp, |
500 | unsigned long flags); | 491 | unsigned long flags); |
501 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); | 492 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); |
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index dd21ca47e4b4..46d61b597731 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h | |||
@@ -73,7 +73,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) | |||
73 | unsigned long flags; | 73 | unsigned long flags; |
74 | unsigned long mask; | 74 | unsigned long mask; |
75 | unsigned long oldmask; | 75 | unsigned long oldmask; |
76 | int ncpus = READ_ONCE(rsp->ncpus); | 76 | int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */ |
77 | struct rcu_node *rnp; | 77 | struct rcu_node *rnp; |
78 | struct rcu_node *rnp_up; | 78 | struct rcu_node *rnp_up; |
79 | 79 | ||
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 908b309d60d7..55bde94b9572 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -180,6 +180,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) | |||
180 | struct task_struct *t = current; | 180 | struct task_struct *t = current; |
181 | 181 | ||
182 | lockdep_assert_held(&rnp->lock); | 182 | lockdep_assert_held(&rnp->lock); |
183 | WARN_ON_ONCE(rdp->mynode != rnp); | ||
184 | WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); | ||
183 | 185 | ||
184 | /* | 186 | /* |
185 | * Decide where to queue the newly blocked task. In theory, | 187 | * Decide where to queue the newly blocked task. In theory, |
@@ -261,6 +263,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) | |||
261 | rnp->gp_tasks = &t->rcu_node_entry; | 263 | rnp->gp_tasks = &t->rcu_node_entry; |
262 | if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) | 264 | if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) |
263 | rnp->exp_tasks = &t->rcu_node_entry; | 265 | rnp->exp_tasks = &t->rcu_node_entry; |
266 | WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) != | ||
267 | !(rnp->qsmask & rdp->grpmask)); | ||
268 | WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) != | ||
269 | !(rnp->expmask & rdp->grpmask)); | ||
264 | raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */ | 270 | raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */ |
265 | 271 | ||
266 | /* | 272 | /* |
@@ -482,6 +488,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
482 | rnp = t->rcu_blocked_node; | 488 | rnp = t->rcu_blocked_node; |
483 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ | 489 | raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ |
484 | WARN_ON_ONCE(rnp != t->rcu_blocked_node); | 490 | WARN_ON_ONCE(rnp != t->rcu_blocked_node); |
491 | WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); | ||
485 | empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); | 492 | empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); |
486 | empty_exp = sync_rcu_preempt_exp_done(rnp); | 493 | empty_exp = sync_rcu_preempt_exp_done(rnp); |
487 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 494 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
@@ -495,10 +502,10 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
495 | if (&t->rcu_node_entry == rnp->exp_tasks) | 502 | if (&t->rcu_node_entry == rnp->exp_tasks) |
496 | rnp->exp_tasks = np; | 503 | rnp->exp_tasks = np; |
497 | if (IS_ENABLED(CONFIG_RCU_BOOST)) { | 504 | if (IS_ENABLED(CONFIG_RCU_BOOST)) { |
498 | if (&t->rcu_node_entry == rnp->boost_tasks) | ||
499 | rnp->boost_tasks = np; | ||
500 | /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ | 505 | /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ |
501 | drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; | 506 | drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; |
507 | if (&t->rcu_node_entry == rnp->boost_tasks) | ||
508 | rnp->boost_tasks = np; | ||
502 | } | 509 | } |
503 | 510 | ||
504 | /* | 511 | /* |
@@ -636,10 +643,17 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) | |||
636 | */ | 643 | */ |
637 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | 644 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) |
638 | { | 645 | { |
646 | struct task_struct *t; | ||
647 | |||
639 | RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); | 648 | RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); |
640 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); | 649 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); |
641 | if (rcu_preempt_has_tasks(rnp)) | 650 | if (rcu_preempt_has_tasks(rnp)) { |
642 | rnp->gp_tasks = rnp->blkd_tasks.next; | 651 | rnp->gp_tasks = rnp->blkd_tasks.next; |
652 | t = container_of(rnp->gp_tasks, struct task_struct, | ||
653 | rcu_node_entry); | ||
654 | trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"), | ||
655 | rnp->gpnum, t->pid); | ||
656 | } | ||
643 | WARN_ON_ONCE(rnp->qsmask); | 657 | WARN_ON_ONCE(rnp->qsmask); |
644 | } | 658 | } |
645 | 659 | ||
@@ -1788,23 +1802,62 @@ bool rcu_is_nocb_cpu(int cpu) | |||
1788 | } | 1802 | } |
1789 | 1803 | ||
1790 | /* | 1804 | /* |
1791 | * Kick the leader kthread for this NOCB group. | 1805 | * Kick the leader kthread for this NOCB group. Caller holds ->nocb_lock |
1806 | * and this function releases it. | ||
1792 | */ | 1807 | */ |
1793 | static void wake_nocb_leader(struct rcu_data *rdp, bool force) | 1808 | static void __wake_nocb_leader(struct rcu_data *rdp, bool force, |
1809 | unsigned long flags) | ||
1810 | __releases(rdp->nocb_lock) | ||
1794 | { | 1811 | { |
1795 | struct rcu_data *rdp_leader = rdp->nocb_leader; | 1812 | struct rcu_data *rdp_leader = rdp->nocb_leader; |
1796 | 1813 | ||
1797 | if (!READ_ONCE(rdp_leader->nocb_kthread)) | 1814 | lockdep_assert_held(&rdp->nocb_lock); |
1815 | if (!READ_ONCE(rdp_leader->nocb_kthread)) { | ||
1816 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | ||
1798 | return; | 1817 | return; |
1799 | if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) { | 1818 | } |
1819 | if (rdp_leader->nocb_leader_sleep || force) { | ||
1800 | /* Prior smp_mb__after_atomic() orders against prior enqueue. */ | 1820 | /* Prior smp_mb__after_atomic() orders against prior enqueue. */ |
1801 | WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); | 1821 | WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); |
1822 | del_timer(&rdp->nocb_timer); | ||
1823 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | ||
1802 | smp_mb(); /* ->nocb_leader_sleep before swake_up(). */ | 1824 | smp_mb(); /* ->nocb_leader_sleep before swake_up(). */ |
1803 | swake_up(&rdp_leader->nocb_wq); | 1825 | swake_up(&rdp_leader->nocb_wq); |
1826 | } else { | ||
1827 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | ||
1804 | } | 1828 | } |
1805 | } | 1829 | } |
1806 | 1830 | ||
1807 | /* | 1831 | /* |
1832 | * Kick the leader kthread for this NOCB group, but caller has not | ||
1833 | * acquired locks. | ||
1834 | */ | ||
1835 | static void wake_nocb_leader(struct rcu_data *rdp, bool force) | ||
1836 | { | ||
1837 | unsigned long flags; | ||
1838 | |||
1839 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); | ||
1840 | __wake_nocb_leader(rdp, force, flags); | ||
1841 | } | ||
1842 | |||
1843 | /* | ||
1844 | * Arrange to wake the leader kthread for this NOCB group at some | ||
1845 | * future time when it is safe to do so. | ||
1846 | */ | ||
1847 | static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype, | ||
1848 | const char *reason) | ||
1849 | { | ||
1850 | unsigned long flags; | ||
1851 | |||
1852 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); | ||
1853 | if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) | ||
1854 | mod_timer(&rdp->nocb_timer, jiffies + 1); | ||
1855 | WRITE_ONCE(rdp->nocb_defer_wakeup, waketype); | ||
1856 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, reason); | ||
1857 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | ||
1858 | } | ||
1859 | |||
1860 | /* | ||
1808 | * Does the specified CPU need an RCU callback for the specified flavor | 1861 | * Does the specified CPU need an RCU callback for the specified flavor |
1809 | * of rcu_barrier()? | 1862 | * of rcu_barrier()? |
1810 | */ | 1863 | */ |
@@ -1891,11 +1944,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | |||
1891 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | 1944 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, |
1892 | TPS("WakeEmpty")); | 1945 | TPS("WakeEmpty")); |
1893 | } else { | 1946 | } else { |
1894 | WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE); | 1947 | wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE, |
1895 | /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ | 1948 | TPS("WakeEmptyIsDeferred")); |
1896 | smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); | ||
1897 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||
1898 | TPS("WakeEmptyIsDeferred")); | ||
1899 | } | 1949 | } |
1900 | rdp->qlen_last_fqs_check = 0; | 1950 | rdp->qlen_last_fqs_check = 0; |
1901 | } else if (len > rdp->qlen_last_fqs_check + qhimark) { | 1951 | } else if (len > rdp->qlen_last_fqs_check + qhimark) { |
@@ -1905,11 +1955,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | |||
1905 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | 1955 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, |
1906 | TPS("WakeOvf")); | 1956 | TPS("WakeOvf")); |
1907 | } else { | 1957 | } else { |
1908 | WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE); | 1958 | wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE, |
1909 | /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ | 1959 | TPS("WakeOvfIsDeferred")); |
1910 | smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); | ||
1911 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||
1912 | TPS("WakeOvfIsDeferred")); | ||
1913 | } | 1960 | } |
1914 | rdp->qlen_last_fqs_check = LONG_MAX / 2; | 1961 | rdp->qlen_last_fqs_check = LONG_MAX / 2; |
1915 | } else { | 1962 | } else { |
@@ -1961,30 +2008,19 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | |||
1961 | * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is | 2008 | * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is |
1962 | * not a no-CBs CPU. | 2009 | * not a no-CBs CPU. |
1963 | */ | 2010 | */ |
1964 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | 2011 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp, |
1965 | struct rcu_data *rdp, | 2012 | struct rcu_data *rdp, |
1966 | unsigned long flags) | 2013 | unsigned long flags) |
1967 | { | 2014 | { |
1968 | long ql = rsp->orphan_done.len; | 2015 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_nocb_adopt_orphan_cbs() invoked with irqs enabled!!!"); |
1969 | long qll = rsp->orphan_done.len_lazy; | ||
1970 | |||
1971 | /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ | ||
1972 | if (!rcu_is_nocb_cpu(smp_processor_id())) | 2016 | if (!rcu_is_nocb_cpu(smp_processor_id())) |
1973 | return false; | 2017 | return false; /* Not NOCBs CPU, caller must migrate CBs. */ |
1974 | 2018 | __call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist), | |
1975 | /* First, enqueue the donelist, if any. This preserves CB ordering. */ | 2019 | rcu_segcblist_tail(&rdp->cblist), |
1976 | if (rsp->orphan_done.head) { | 2020 | rcu_segcblist_n_cbs(&rdp->cblist), |
1977 | __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done), | 2021 | rcu_segcblist_n_lazy_cbs(&rdp->cblist), flags); |
1978 | rcu_cblist_tail(&rsp->orphan_done), | 2022 | rcu_segcblist_init(&rdp->cblist); |
1979 | ql, qll, flags); | 2023 | rcu_segcblist_disable(&rdp->cblist); |
1980 | } | ||
1981 | if (rsp->orphan_pend.head) { | ||
1982 | __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend), | ||
1983 | rcu_cblist_tail(&rsp->orphan_pend), | ||
1984 | ql, qll, flags); | ||
1985 | } | ||
1986 | rcu_cblist_init(&rsp->orphan_done); | ||
1987 | rcu_cblist_init(&rsp->orphan_pend); | ||
1988 | return true; | 2024 | return true; |
1989 | } | 2025 | } |
1990 | 2026 | ||
@@ -2031,6 +2067,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) | |||
2031 | static void nocb_leader_wait(struct rcu_data *my_rdp) | 2067 | static void nocb_leader_wait(struct rcu_data *my_rdp) |
2032 | { | 2068 | { |
2033 | bool firsttime = true; | 2069 | bool firsttime = true; |
2070 | unsigned long flags; | ||
2034 | bool gotcbs; | 2071 | bool gotcbs; |
2035 | struct rcu_data *rdp; | 2072 | struct rcu_data *rdp; |
2036 | struct rcu_head **tail; | 2073 | struct rcu_head **tail; |
@@ -2039,13 +2076,17 @@ wait_again: | |||
2039 | 2076 | ||
2040 | /* Wait for callbacks to appear. */ | 2077 | /* Wait for callbacks to appear. */ |
2041 | if (!rcu_nocb_poll) { | 2078 | if (!rcu_nocb_poll) { |
2042 | trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); | 2079 | trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep")); |
2043 | swait_event_interruptible(my_rdp->nocb_wq, | 2080 | swait_event_interruptible(my_rdp->nocb_wq, |
2044 | !READ_ONCE(my_rdp->nocb_leader_sleep)); | 2081 | !READ_ONCE(my_rdp->nocb_leader_sleep)); |
2045 | /* Memory barrier handled by smp_mb() calls below and repoll. */ | 2082 | raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags); |
2083 | my_rdp->nocb_leader_sleep = true; | ||
2084 | WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); | ||
2085 | del_timer(&my_rdp->nocb_timer); | ||
2086 | raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags); | ||
2046 | } else if (firsttime) { | 2087 | } else if (firsttime) { |
2047 | firsttime = false; /* Don't drown trace log with "Poll"! */ | 2088 | firsttime = false; /* Don't drown trace log with "Poll"! */ |
2048 | trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Poll"); | 2089 | trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Poll")); |
2049 | } | 2090 | } |
2050 | 2091 | ||
2051 | /* | 2092 | /* |
@@ -2054,7 +2095,7 @@ wait_again: | |||
2054 | * nocb_gp_head, where they await a grace period. | 2095 | * nocb_gp_head, where they await a grace period. |
2055 | */ | 2096 | */ |
2056 | gotcbs = false; | 2097 | gotcbs = false; |
2057 | smp_mb(); /* wakeup before ->nocb_head reads. */ | 2098 | smp_mb(); /* wakeup and _sleep before ->nocb_head reads. */ |
2058 | for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { | 2099 | for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { |
2059 | rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head); | 2100 | rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head); |
2060 | if (!rdp->nocb_gp_head) | 2101 | if (!rdp->nocb_gp_head) |
@@ -2066,56 +2107,41 @@ wait_again: | |||
2066 | gotcbs = true; | 2107 | gotcbs = true; |
2067 | } | 2108 | } |
2068 | 2109 | ||
2069 | /* | 2110 | /* No callbacks? Sleep a bit if polling, and go retry. */ |
2070 | * If there were no callbacks, sleep a bit, rescan after a | ||
2071 | * memory barrier, and go retry. | ||
2072 | */ | ||
2073 | if (unlikely(!gotcbs)) { | 2111 | if (unlikely(!gotcbs)) { |
2074 | if (!rcu_nocb_poll) | ||
2075 | trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, | ||
2076 | "WokeEmpty"); | ||
2077 | WARN_ON(signal_pending(current)); | 2112 | WARN_ON(signal_pending(current)); |
2078 | schedule_timeout_interruptible(1); | 2113 | if (rcu_nocb_poll) { |
2079 | 2114 | schedule_timeout_interruptible(1); | |
2080 | /* Rescan in case we were a victim of memory ordering. */ | 2115 | } else { |
2081 | my_rdp->nocb_leader_sleep = true; | 2116 | trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, |
2082 | smp_mb(); /* Ensure _sleep true before scan. */ | 2117 | TPS("WokeEmpty")); |
2083 | for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) | 2118 | } |
2084 | if (READ_ONCE(rdp->nocb_head)) { | ||
2085 | /* Found CB, so short-circuit next wait. */ | ||
2086 | my_rdp->nocb_leader_sleep = false; | ||
2087 | break; | ||
2088 | } | ||
2089 | goto wait_again; | 2119 | goto wait_again; |
2090 | } | 2120 | } |
2091 | 2121 | ||
2092 | /* Wait for one grace period. */ | 2122 | /* Wait for one grace period. */ |
2093 | rcu_nocb_wait_gp(my_rdp); | 2123 | rcu_nocb_wait_gp(my_rdp); |
2094 | 2124 | ||
2095 | /* | ||
2096 | * We left ->nocb_leader_sleep unset to reduce cache thrashing. | ||
2097 | * We set it now, but recheck for new callbacks while | ||
2098 | * traversing our follower list. | ||
2099 | */ | ||
2100 | my_rdp->nocb_leader_sleep = true; | ||
2101 | smp_mb(); /* Ensure _sleep true before scan of ->nocb_head. */ | ||
2102 | |||
2103 | /* Each pass through the following loop wakes a follower, if needed. */ | 2125 | /* Each pass through the following loop wakes a follower, if needed. */ |
2104 | for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { | 2126 | for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { |
2105 | if (READ_ONCE(rdp->nocb_head)) | 2127 | if (!rcu_nocb_poll && |
2128 | READ_ONCE(rdp->nocb_head) && | ||
2129 | READ_ONCE(my_rdp->nocb_leader_sleep)) { | ||
2130 | raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags); | ||
2106 | my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ | 2131 | my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ |
2132 | raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags); | ||
2133 | } | ||
2107 | if (!rdp->nocb_gp_head) | 2134 | if (!rdp->nocb_gp_head) |
2108 | continue; /* No CBs, so no need to wake follower. */ | 2135 | continue; /* No CBs, so no need to wake follower. */ |
2109 | 2136 | ||
2110 | /* Append callbacks to follower's "done" list. */ | 2137 | /* Append callbacks to follower's "done" list. */ |
2111 | tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); | 2138 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
2139 | tail = rdp->nocb_follower_tail; | ||
2140 | rdp->nocb_follower_tail = rdp->nocb_gp_tail; | ||
2112 | *tail = rdp->nocb_gp_head; | 2141 | *tail = rdp->nocb_gp_head; |
2113 | smp_mb__after_atomic(); /* Store *tail before wakeup. */ | 2142 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); |
2114 | if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { | 2143 | if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { |
2115 | /* | 2144 | /* List was empty, so wake up the follower. */ |
2116 | * List was empty, wake up the follower. | ||
2117 | * Memory barriers supplied by atomic_long_add(). | ||
2118 | */ | ||
2119 | swake_up(&rdp->nocb_wq); | 2145 | swake_up(&rdp->nocb_wq); |
2120 | } | 2146 | } |
2121 | } | 2147 | } |
@@ -2131,28 +2157,16 @@ wait_again: | |||
2131 | */ | 2157 | */ |
2132 | static void nocb_follower_wait(struct rcu_data *rdp) | 2158 | static void nocb_follower_wait(struct rcu_data *rdp) |
2133 | { | 2159 | { |
2134 | bool firsttime = true; | ||
2135 | |||
2136 | for (;;) { | 2160 | for (;;) { |
2137 | if (!rcu_nocb_poll) { | 2161 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep")); |
2138 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | 2162 | swait_event_interruptible(rdp->nocb_wq, |
2139 | "FollowerSleep"); | 2163 | READ_ONCE(rdp->nocb_follower_head)); |
2140 | swait_event_interruptible(rdp->nocb_wq, | ||
2141 | READ_ONCE(rdp->nocb_follower_head)); | ||
2142 | } else if (firsttime) { | ||
2143 | /* Don't drown trace log with "Poll"! */ | ||
2144 | firsttime = false; | ||
2145 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "Poll"); | ||
2146 | } | ||
2147 | if (smp_load_acquire(&rdp->nocb_follower_head)) { | 2164 | if (smp_load_acquire(&rdp->nocb_follower_head)) { |
2148 | /* ^^^ Ensure CB invocation follows _head test. */ | 2165 | /* ^^^ Ensure CB invocation follows _head test. */ |
2149 | return; | 2166 | return; |
2150 | } | 2167 | } |
2151 | if (!rcu_nocb_poll) | ||
2152 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||
2153 | "WokeEmpty"); | ||
2154 | WARN_ON(signal_pending(current)); | 2168 | WARN_ON(signal_pending(current)); |
2155 | schedule_timeout_interruptible(1); | 2169 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeEmpty")); |
2156 | } | 2170 | } |
2157 | } | 2171 | } |
2158 | 2172 | ||
@@ -2165,6 +2179,7 @@ static void nocb_follower_wait(struct rcu_data *rdp) | |||
2165 | static int rcu_nocb_kthread(void *arg) | 2179 | static int rcu_nocb_kthread(void *arg) |
2166 | { | 2180 | { |
2167 | int c, cl; | 2181 | int c, cl; |
2182 | unsigned long flags; | ||
2168 | struct rcu_head *list; | 2183 | struct rcu_head *list; |
2169 | struct rcu_head *next; | 2184 | struct rcu_head *next; |
2170 | struct rcu_head **tail; | 2185 | struct rcu_head **tail; |
@@ -2179,11 +2194,14 @@ static int rcu_nocb_kthread(void *arg) | |||
2179 | nocb_follower_wait(rdp); | 2194 | nocb_follower_wait(rdp); |
2180 | 2195 | ||
2181 | /* Pull the ready-to-invoke callbacks onto local list. */ | 2196 | /* Pull the ready-to-invoke callbacks onto local list. */ |
2182 | list = READ_ONCE(rdp->nocb_follower_head); | 2197 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
2198 | list = rdp->nocb_follower_head; | ||
2199 | rdp->nocb_follower_head = NULL; | ||
2200 | tail = rdp->nocb_follower_tail; | ||
2201 | rdp->nocb_follower_tail = &rdp->nocb_follower_head; | ||
2202 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | ||
2183 | BUG_ON(!list); | 2203 | BUG_ON(!list); |
2184 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); | 2204 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeNonEmpty")); |
2185 | WRITE_ONCE(rdp->nocb_follower_head, NULL); | ||
2186 | tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); | ||
2187 | 2205 | ||
2188 | /* Each pass through the following loop invokes a callback. */ | 2206 | /* Each pass through the following loop invokes a callback. */ |
2189 | trace_rcu_batch_start(rdp->rsp->name, | 2207 | trace_rcu_batch_start(rdp->rsp->name, |
@@ -2226,18 +2244,39 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) | |||
2226 | } | 2244 | } |
2227 | 2245 | ||
2228 | /* Do a deferred wakeup of rcu_nocb_kthread(). */ | 2246 | /* Do a deferred wakeup of rcu_nocb_kthread(). */ |
2229 | static void do_nocb_deferred_wakeup(struct rcu_data *rdp) | 2247 | static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp) |
2230 | { | 2248 | { |
2249 | unsigned long flags; | ||
2231 | int ndw; | 2250 | int ndw; |
2232 | 2251 | ||
2233 | if (!rcu_nocb_need_deferred_wakeup(rdp)) | 2252 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); |
2253 | if (!rcu_nocb_need_deferred_wakeup(rdp)) { | ||
2254 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | ||
2234 | return; | 2255 | return; |
2256 | } | ||
2235 | ndw = READ_ONCE(rdp->nocb_defer_wakeup); | 2257 | ndw = READ_ONCE(rdp->nocb_defer_wakeup); |
2236 | WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); | 2258 | WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); |
2237 | wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE); | 2259 | __wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); |
2238 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); | 2260 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); |
2239 | } | 2261 | } |
2240 | 2262 | ||
2263 | /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ | ||
2264 | static void do_nocb_deferred_wakeup_timer(unsigned long x) | ||
2265 | { | ||
2266 | do_nocb_deferred_wakeup_common((struct rcu_data *)x); | ||
2267 | } | ||
2268 | |||
2269 | /* | ||
2270 | * Do a deferred wakeup of rcu_nocb_kthread() from fastpath. | ||
2271 | * This means we do an inexact common-case check. Note that if | ||
2272 | * we miss, ->nocb_timer will eventually clean things up. | ||
2273 | */ | ||
2274 | static void do_nocb_deferred_wakeup(struct rcu_data *rdp) | ||
2275 | { | ||
2276 | if (rcu_nocb_need_deferred_wakeup(rdp)) | ||
2277 | do_nocb_deferred_wakeup_common(rdp); | ||
2278 | } | ||
2279 | |||
2241 | void __init rcu_init_nohz(void) | 2280 | void __init rcu_init_nohz(void) |
2242 | { | 2281 | { |
2243 | int cpu; | 2282 | int cpu; |
@@ -2287,6 +2326,9 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | |||
2287 | rdp->nocb_tail = &rdp->nocb_head; | 2326 | rdp->nocb_tail = &rdp->nocb_head; |
2288 | init_swait_queue_head(&rdp->nocb_wq); | 2327 | init_swait_queue_head(&rdp->nocb_wq); |
2289 | rdp->nocb_follower_tail = &rdp->nocb_follower_head; | 2328 | rdp->nocb_follower_tail = &rdp->nocb_follower_head; |
2329 | raw_spin_lock_init(&rdp->nocb_lock); | ||
2330 | setup_timer(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, | ||
2331 | (unsigned long)rdp); | ||
2290 | } | 2332 | } |
2291 | 2333 | ||
2292 | /* | 2334 | /* |
@@ -2459,7 +2501,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | |||
2459 | return false; | 2501 | return false; |
2460 | } | 2502 | } |
2461 | 2503 | ||
2462 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | 2504 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp, |
2463 | struct rcu_data *rdp, | 2505 | struct rcu_data *rdp, |
2464 | unsigned long flags) | 2506 | unsigned long flags) |
2465 | { | 2507 | { |
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 00e77c470017..5033b66d2753 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
@@ -568,7 +568,7 @@ static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq); | |||
568 | static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); | 568 | static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); |
569 | 569 | ||
570 | /* Track exiting tasks in order to allow them to be waited for. */ | 570 | /* Track exiting tasks in order to allow them to be waited for. */ |
571 | DEFINE_SRCU(tasks_rcu_exit_srcu); | 571 | DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu); |
572 | 572 | ||
573 | /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ | 573 | /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ |
574 | #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) | 574 | #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) |
@@ -875,6 +875,22 @@ static void rcu_spawn_tasks_kthread(void) | |||
875 | mutex_unlock(&rcu_tasks_kthread_mutex); | 875 | mutex_unlock(&rcu_tasks_kthread_mutex); |
876 | } | 876 | } |
877 | 877 | ||
878 | /* Do the srcu_read_lock() for the above synchronize_srcu(). */ | ||
879 | void exit_tasks_rcu_start(void) | ||
880 | { | ||
881 | preempt_disable(); | ||
882 | current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); | ||
883 | preempt_enable(); | ||
884 | } | ||
885 | |||
886 | /* Do the srcu_read_unlock() for the above synchronize_srcu(). */ | ||
887 | void exit_tasks_rcu_finish(void) | ||
888 | { | ||
889 | preempt_disable(); | ||
890 | __srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx); | ||
891 | preempt_enable(); | ||
892 | } | ||
893 | |||
878 | #endif /* #ifdef CONFIG_TASKS_RCU */ | 894 | #endif /* #ifdef CONFIG_TASKS_RCU */ |
879 | 895 | ||
880 | #ifndef CONFIG_TINY_RCU | 896 | #ifndef CONFIG_TINY_RCU |
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 53f0164ed362..78f54932ea1d 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile | |||
@@ -25,3 +25,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o | |||
25 | obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o | 25 | obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o |
26 | obj-$(CONFIG_CPU_FREQ) += cpufreq.o | 26 | obj-$(CONFIG_CPU_FREQ) += cpufreq.o |
27 | obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o | 27 | obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o |
28 | obj-$(CONFIG_MEMBARRIER) += membarrier.o | ||
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 13fc5ae9bf2f..c9524d2d9316 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c | |||
@@ -300,6 +300,8 @@ EXPORT_SYMBOL(try_wait_for_completion); | |||
300 | */ | 300 | */ |
301 | bool completion_done(struct completion *x) | 301 | bool completion_done(struct completion *x) |
302 | { | 302 | { |
303 | unsigned long flags; | ||
304 | |||
303 | if (!READ_ONCE(x->done)) | 305 | if (!READ_ONCE(x->done)) |
304 | return false; | 306 | return false; |
305 | 307 | ||
@@ -307,14 +309,9 @@ bool completion_done(struct completion *x) | |||
307 | * If ->done, we need to wait for complete() to release ->wait.lock | 309 | * If ->done, we need to wait for complete() to release ->wait.lock |
308 | * otherwise we can end up freeing the completion before complete() | 310 | * otherwise we can end up freeing the completion before complete() |
309 | * is done referencing it. | 311 | * is done referencing it. |
310 | * | ||
311 | * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders | ||
312 | * the loads of ->done and ->wait.lock such that we cannot observe | ||
313 | * the lock before complete() acquires it while observing the ->done | ||
314 | * after it's acquired the lock. | ||
315 | */ | 312 | */ |
316 | smp_rmb(); | 313 | spin_lock_irqsave(&x->wait.lock, flags); |
317 | spin_unlock_wait(&x->wait.lock); | 314 | spin_unlock_irqrestore(&x->wait.lock, flags); |
318 | return true; | 315 | return true; |
319 | } | 316 | } |
320 | EXPORT_SYMBOL(completion_done); | 317 | EXPORT_SYMBOL(completion_done); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0869b20fba81..e053c31d96da 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -951,8 +951,13 @@ struct migration_arg { | |||
951 | static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, | 951 | static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, |
952 | struct task_struct *p, int dest_cpu) | 952 | struct task_struct *p, int dest_cpu) |
953 | { | 953 | { |
954 | if (unlikely(!cpu_active(dest_cpu))) | 954 | if (p->flags & PF_KTHREAD) { |
955 | return rq; | 955 | if (unlikely(!cpu_online(dest_cpu))) |
956 | return rq; | ||
957 | } else { | ||
958 | if (unlikely(!cpu_active(dest_cpu))) | ||
959 | return rq; | ||
960 | } | ||
956 | 961 | ||
957 | /* Affinity changed (again). */ | 962 | /* Affinity changed (again). */ |
958 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 963 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
@@ -2635,6 +2640,16 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |||
2635 | prev_state = prev->state; | 2640 | prev_state = prev->state; |
2636 | vtime_task_switch(prev); | 2641 | vtime_task_switch(prev); |
2637 | perf_event_task_sched_in(prev, current); | 2642 | perf_event_task_sched_in(prev, current); |
2643 | /* | ||
2644 | * The membarrier system call requires a full memory barrier | ||
2645 | * after storing to rq->curr, before going back to user-space. | ||
2646 | * | ||
2647 | * TODO: This smp_mb__after_unlock_lock can go away if PPC end | ||
2648 | * up adding a full barrier to switch_mm(), or we should figure | ||
2649 | * out if a smp_mb__after_unlock_lock is really the proper API | ||
2650 | * to use. | ||
2651 | */ | ||
2652 | smp_mb__after_unlock_lock(); | ||
2638 | finish_lock_switch(rq, prev); | 2653 | finish_lock_switch(rq, prev); |
2639 | finish_arch_post_lock_switch(); | 2654 | finish_arch_post_lock_switch(); |
2640 | 2655 | ||
@@ -3324,6 +3339,21 @@ static void __sched notrace __schedule(bool preempt) | |||
3324 | if (likely(prev != next)) { | 3339 | if (likely(prev != next)) { |
3325 | rq->nr_switches++; | 3340 | rq->nr_switches++; |
3326 | rq->curr = next; | 3341 | rq->curr = next; |
3342 | /* | ||
3343 | * The membarrier system call requires each architecture | ||
3344 | * to have a full memory barrier after updating | ||
3345 | * rq->curr, before returning to user-space. For TSO | ||
3346 | * (e.g. x86), the architecture must provide its own | ||
3347 | * barrier in switch_mm(). For weakly ordered machines | ||
3348 | * for which spin_unlock() acts as a full memory | ||
3349 | * barrier, finish_lock_switch() in common code takes | ||
3350 | * care of this barrier. For weakly ordered machines for | ||
3351 | * which spin_unlock() acts as a RELEASE barrier (only | ||
3352 | * arm64 and PowerPC), arm64 has a full barrier in | ||
3353 | * switch_to(), and PowerPC has | ||
3354 | * smp_mb__after_unlock_lock() before | ||
3355 | * finish_lock_switch(). | ||
3356 | */ | ||
3327 | ++*switch_count; | 3357 | ++*switch_count; |
3328 | 3358 | ||
3329 | trace_sched_switch(preempt, prev, next); | 3359 | trace_sched_switch(preempt, prev, next); |
@@ -3352,8 +3382,8 @@ void __noreturn do_task_dead(void) | |||
3352 | * To avoid it, we have to wait for releasing tsk->pi_lock which | 3382 | * To avoid it, we have to wait for releasing tsk->pi_lock which |
3353 | * is held by try_to_wake_up() | 3383 | * is held by try_to_wake_up() |
3354 | */ | 3384 | */ |
3355 | smp_mb(); | 3385 | raw_spin_lock_irq(¤t->pi_lock); |
3356 | raw_spin_unlock_wait(¤t->pi_lock); | 3386 | raw_spin_unlock_irq(¤t->pi_lock); |
3357 | 3387 | ||
3358 | /* Causes final put_task_struct in finish_task_switch(): */ | 3388 | /* Causes final put_task_struct in finish_task_switch(): */ |
3359 | __set_current_state(TASK_DEAD); | 3389 | __set_current_state(TASK_DEAD); |
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c new file mode 100644 index 000000000000..a92fddc22747 --- /dev/null +++ b/kernel/sched/membarrier.c | |||
@@ -0,0 +1,152 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
3 | * | ||
4 | * membarrier system call | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/syscalls.h> | ||
18 | #include <linux/membarrier.h> | ||
19 | #include <linux/tick.h> | ||
20 | #include <linux/cpumask.h> | ||
21 | |||
22 | #include "sched.h" /* for cpu_rq(). */ | ||
23 | |||
24 | /* | ||
25 | * Bitmask made from a "or" of all commands within enum membarrier_cmd, | ||
26 | * except MEMBARRIER_CMD_QUERY. | ||
27 | */ | ||
28 | #define MEMBARRIER_CMD_BITMASK \ | ||
29 | (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) | ||
30 | |||
31 | static void ipi_mb(void *info) | ||
32 | { | ||
33 | smp_mb(); /* IPIs should be serializing but paranoid. */ | ||
34 | } | ||
35 | |||
36 | static void membarrier_private_expedited(void) | ||
37 | { | ||
38 | int cpu; | ||
39 | bool fallback = false; | ||
40 | cpumask_var_t tmpmask; | ||
41 | |||
42 | if (num_online_cpus() == 1) | ||
43 | return; | ||
44 | |||
45 | /* | ||
46 | * Matches memory barriers around rq->curr modification in | ||
47 | * scheduler. | ||
48 | */ | ||
49 | smp_mb(); /* system call entry is not a mb. */ | ||
50 | |||
51 | /* | ||
52 | * Expedited membarrier commands guarantee that they won't | ||
53 | * block, hence the GFP_NOWAIT allocation flag and fallback | ||
54 | * implementation. | ||
55 | */ | ||
56 | if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) { | ||
57 | /* Fallback for OOM. */ | ||
58 | fallback = true; | ||
59 | } | ||
60 | |||
61 | cpus_read_lock(); | ||
62 | for_each_online_cpu(cpu) { | ||
63 | struct task_struct *p; | ||
64 | |||
65 | /* | ||
66 | * Skipping the current CPU is OK even through we can be | ||
67 | * migrated at any point. The current CPU, at the point | ||
68 | * where we read raw_smp_processor_id(), is ensured to | ||
69 | * be in program order with respect to the caller | ||
70 | * thread. Therefore, we can skip this CPU from the | ||
71 | * iteration. | ||
72 | */ | ||
73 | if (cpu == raw_smp_processor_id()) | ||
74 | continue; | ||
75 | rcu_read_lock(); | ||
76 | p = task_rcu_dereference(&cpu_rq(cpu)->curr); | ||
77 | if (p && p->mm == current->mm) { | ||
78 | if (!fallback) | ||
79 | __cpumask_set_cpu(cpu, tmpmask); | ||
80 | else | ||
81 | smp_call_function_single(cpu, ipi_mb, NULL, 1); | ||
82 | } | ||
83 | rcu_read_unlock(); | ||
84 | } | ||
85 | if (!fallback) { | ||
86 | smp_call_function_many(tmpmask, ipi_mb, NULL, 1); | ||
87 | free_cpumask_var(tmpmask); | ||
88 | } | ||
89 | cpus_read_unlock(); | ||
90 | |||
91 | /* | ||
92 | * Memory barrier on the caller thread _after_ we finished | ||
93 | * waiting for the last IPI. Matches memory barriers around | ||
94 | * rq->curr modification in scheduler. | ||
95 | */ | ||
96 | smp_mb(); /* exit from system call is not a mb */ | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * sys_membarrier - issue memory barriers on a set of threads | ||
101 | * @cmd: Takes command values defined in enum membarrier_cmd. | ||
102 | * @flags: Currently needs to be 0. For future extensions. | ||
103 | * | ||
104 | * If this system call is not implemented, -ENOSYS is returned. If the | ||
105 | * command specified does not exist, not available on the running | ||
106 | * kernel, or if the command argument is invalid, this system call | ||
107 | * returns -EINVAL. For a given command, with flags argument set to 0, | ||
108 | * this system call is guaranteed to always return the same value until | ||
109 | * reboot. | ||
110 | * | ||
111 | * All memory accesses performed in program order from each targeted thread | ||
112 | * is guaranteed to be ordered with respect to sys_membarrier(). If we use | ||
113 | * the semantic "barrier()" to represent a compiler barrier forcing memory | ||
114 | * accesses to be performed in program order across the barrier, and | ||
115 | * smp_mb() to represent explicit memory barriers forcing full memory | ||
116 | * ordering across the barrier, we have the following ordering table for | ||
117 | * each pair of barrier(), sys_membarrier() and smp_mb(): | ||
118 | * | ||
119 | * The pair ordering is detailed as (O: ordered, X: not ordered): | ||
120 | * | ||
121 | * barrier() smp_mb() sys_membarrier() | ||
122 | * barrier() X X O | ||
123 | * smp_mb() X O O | ||
124 | * sys_membarrier() O O O | ||
125 | */ | ||
126 | SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) | ||
127 | { | ||
128 | if (unlikely(flags)) | ||
129 | return -EINVAL; | ||
130 | switch (cmd) { | ||
131 | case MEMBARRIER_CMD_QUERY: | ||
132 | { | ||
133 | int cmd_mask = MEMBARRIER_CMD_BITMASK; | ||
134 | |||
135 | if (tick_nohz_full_enabled()) | ||
136 | cmd_mask &= ~MEMBARRIER_CMD_SHARED; | ||
137 | return cmd_mask; | ||
138 | } | ||
139 | case MEMBARRIER_CMD_SHARED: | ||
140 | /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */ | ||
141 | if (tick_nohz_full_enabled()) | ||
142 | return -EINVAL; | ||
143 | if (num_online_cpus() > 1) | ||
144 | synchronize_sched(); | ||
145 | return 0; | ||
146 | case MEMBARRIER_CMD_PRIVATE_EXPEDITED: | ||
147 | membarrier_private_expedited(); | ||
148 | return 0; | ||
149 | default: | ||
150 | return -EINVAL; | ||
151 | } | ||
152 | } | ||
diff --git a/kernel/task_work.c b/kernel/task_work.c index d513051fcca2..836a72a66fba 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c | |||
@@ -96,20 +96,16 @@ void task_work_run(void) | |||
96 | * work->func() can do task_work_add(), do not set | 96 | * work->func() can do task_work_add(), do not set |
97 | * work_exited unless the list is empty. | 97 | * work_exited unless the list is empty. |
98 | */ | 98 | */ |
99 | raw_spin_lock_irq(&task->pi_lock); | ||
99 | do { | 100 | do { |
100 | work = READ_ONCE(task->task_works); | 101 | work = READ_ONCE(task->task_works); |
101 | head = !work && (task->flags & PF_EXITING) ? | 102 | head = !work && (task->flags & PF_EXITING) ? |
102 | &work_exited : NULL; | 103 | &work_exited : NULL; |
103 | } while (cmpxchg(&task->task_works, work, head) != work); | 104 | } while (cmpxchg(&task->task_works, work, head) != work); |
105 | raw_spin_unlock_irq(&task->pi_lock); | ||
104 | 106 | ||
105 | if (!work) | 107 | if (!work) |
106 | break; | 108 | break; |
107 | /* | ||
108 | * Synchronize with task_work_cancel(). It can't remove | ||
109 | * the first entry == work, cmpxchg(task_works) should | ||
110 | * fail, but it can play with *work and other entries. | ||
111 | */ | ||
112 | raw_spin_unlock_wait(&task->pi_lock); | ||
113 | 109 | ||
114 | do { | 110 | do { |
115 | next = work->next; | 111 | next = work->next; |
diff --git a/kernel/torture.c b/kernel/torture.c index 55de96529287..637e172835d8 100644 --- a/kernel/torture.c +++ b/kernel/torture.c | |||
@@ -117,7 +117,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes, | |||
117 | torture_type, cpu); | 117 | torture_type, cpu); |
118 | (*n_offl_successes)++; | 118 | (*n_offl_successes)++; |
119 | delta = jiffies - starttime; | 119 | delta = jiffies - starttime; |
120 | sum_offl += delta; | 120 | *sum_offl += delta; |
121 | if (*min_offl < 0) { | 121 | if (*min_offl < 0) { |
122 | *min_offl = delta; | 122 | *min_offl = delta; |
123 | *max_offl = delta; | 123 | *max_offl = delta; |