aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html130
-rw-r--r--Documentation/RCU/checklist.txt121
-rw-r--r--Documentation/RCU/rcu.txt9
-rw-r--r--Documentation/RCU/rcu_dereference.txt61
-rw-r--r--Documentation/RCU/rcubarrier.txt5
-rw-r--r--Documentation/RCU/torture.txt20
-rw-r--r--Documentation/RCU/whatisRCU.txt5
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt7
-rw-r--r--Documentation/core-api/kernel-api.rst49
-rw-r--r--Documentation/memory-barriers.txt41
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/alpha/include/asm/spinlock.h5
-rw-r--r--arch/arc/include/asm/spinlock.h5
-rw-r--r--arch/arm/include/asm/spinlock.h16
-rw-r--r--arch/arm64/include/asm/spinlock.h58
-rw-r--r--arch/arm64/kernel/process.c2
-rw-r--r--arch/blackfin/include/asm/spinlock.h5
-rw-r--r--arch/blackfin/kernel/module.c39
-rw-r--r--arch/hexagon/include/asm/spinlock.h5
-rw-r--r--arch/ia64/include/asm/spinlock.h21
-rw-r--r--arch/m32r/include/asm/spinlock.h5
-rw-r--r--arch/metag/include/asm/spinlock.h5
-rw-r--r--arch/mn10300/include/asm/spinlock.h5
-rw-r--r--arch/parisc/include/asm/spinlock.h7
-rw-r--r--arch/powerpc/include/asm/spinlock.h33
-rw-r--r--arch/s390/include/asm/spinlock.h7
-rw-r--r--arch/sh/include/asm/spinlock-cas.h5
-rw-r--r--arch/sh/include/asm/spinlock-llsc.h5
-rw-r--r--arch/sparc/include/asm/spinlock_32.h5
-rw-r--r--arch/tile/include/asm/spinlock_32.h2
-rw-r--r--arch/tile/include/asm/spinlock_64.h2
-rw-r--r--arch/tile/lib/spinlock_32.c23
-rw-r--r--arch/tile/lib/spinlock_64.c22
-rw-r--r--arch/xtensa/include/asm/spinlock.h5
-rw-r--r--drivers/ata/libata-eh.c8
-rw-r--r--include/asm-generic/qspinlock.h14
-rw-r--r--include/linux/init_task.h8
-rw-r--r--include/linux/rcupdate.h14
-rw-r--r--include/linux/rcutiny.h8
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/spinlock.h31
-rw-r--r--include/linux/spinlock_up.h6
-rw-r--r--include/linux/srcutiny.h13
-rw-r--r--include/linux/srcutree.h3
-rw-r--r--include/linux/swait.h55
-rw-r--r--include/trace/events/rcu.h7
-rw-r--r--include/uapi/linux/membarrier.h23
-rw-r--r--ipc/sem.c3
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/locking/qspinlock.c117
-rw-r--r--kernel/membarrier.c70
-rw-r--r--kernel/rcu/Kconfig3
-rw-r--r--kernel/rcu/rcu.h128
-rw-r--r--kernel/rcu/rcu_segcblist.c108
-rw-r--r--kernel/rcu/rcu_segcblist.h28
-rw-r--r--kernel/rcu/rcuperf.c17
-rw-r--r--kernel/rcu/rcutorture.c83
-rw-r--r--kernel/rcu/srcutiny.c8
-rw-r--r--kernel/rcu/srcutree.c50
-rw-r--r--kernel/rcu/tiny.c2
-rw-r--r--kernel/rcu/tiny_plugin.h47
-rw-r--r--kernel/rcu/tree.c174
-rw-r--r--kernel/rcu/tree.h15
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_plugin.h238
-rw-r--r--kernel/rcu/update.c18
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/completion.c11
-rw-r--r--kernel/sched/core.c38
-rw-r--r--kernel/sched/membarrier.c152
-rw-r--r--kernel/task_work.c8
-rw-r--r--kernel/torture.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c52
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config_override.sh61
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh27
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh11
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh58
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh34
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-u3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot2
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt2
84 files changed, 1207 insertions, 1312 deletions
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 95b30fa25d56..62e847bcdcdd 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2080,6 +2080,8 @@ Some of the relevant points of interest are as follows:
2080<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>. 2080<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
2081<li> <a href="#Tracing and RCU">Tracing and RCU</a>. 2081<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
2082<li> <a href="#Energy Efficiency">Energy Efficiency</a>. 2082<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
2083<li> <a href="#Scheduling-Clock Interrupts and RCU">
2084 Scheduling-Clock Interrupts and RCU</a>.
2083<li> <a href="#Memory Efficiency">Memory Efficiency</a>. 2085<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
2084<li> <a href="#Performance, Scalability, Response Time, and Reliability"> 2086<li> <a href="#Performance, Scalability, Response Time, and Reliability">
2085 Performance, Scalability, Response Time, and Reliability</a>. 2087 Performance, Scalability, Response Time, and Reliability</a>.
@@ -2532,6 +2534,134 @@ I learned of many of these requirements via angry phone calls:
2532Flaming me on the Linux-kernel mailing list was apparently not 2534Flaming me on the Linux-kernel mailing list was apparently not
2533sufficient to fully vent their ire at RCU's energy-efficiency bugs! 2535sufficient to fully vent their ire at RCU's energy-efficiency bugs!
2534 2536
2537<h3><a name="Scheduling-Clock Interrupts and RCU">
2538Scheduling-Clock Interrupts and RCU</a></h3>
2539
2540<p>
2541The kernel transitions between in-kernel non-idle execution, userspace
2542execution, and the idle loop.
2543Depending on kernel configuration, RCU handles these states differently:
2544
2545<table border=3>
2546<tr><th><tt>HZ</tt> Kconfig</th>
2547 <th>In-Kernel</th>
2548 <th>Usermode</th>
2549 <th>Idle</th></tr>
2550<tr><th align="left"><tt>HZ_PERIODIC</tt></th>
2551 <td>Can rely on scheduling-clock interrupt.</td>
2552 <td>Can rely on scheduling-clock interrupt and its
2553 detection of interrupt from usermode.</td>
2554 <td>Can rely on RCU's dyntick-idle detection.</td></tr>
2555<tr><th align="left"><tt>NO_HZ_IDLE</tt></th>
2556 <td>Can rely on scheduling-clock interrupt.</td>
2557 <td>Can rely on scheduling-clock interrupt and its
2558 detection of interrupt from usermode.</td>
2559 <td>Can rely on RCU's dyntick-idle detection.</td></tr>
2560<tr><th align="left"><tt>NO_HZ_FULL</tt></th>
2561 <td>Can only sometimes rely on scheduling-clock interrupt.
2562 In other cases, it is necessary to bound kernel execution
2563 times and/or use IPIs.</td>
2564 <td>Can rely on RCU's dyntick-idle detection.</td>
2565 <td>Can rely on RCU's dyntick-idle detection.</td></tr>
2566</table>
2567
2568<table>
2569<tr><th>&nbsp;</th></tr>
2570<tr><th align="left">Quick Quiz:</th></tr>
2571<tr><td>
2572 Why can't <tt>NO_HZ_FULL</tt> in-kernel execution rely on the
2573 scheduling-clock interrupt, just like <tt>HZ_PERIODIC</tt>
2574 and <tt>NO_HZ_IDLE</tt> do?
2575</td></tr>
2576<tr><th align="left">Answer:</th></tr>
2577<tr><td bgcolor="#ffffff"><font color="ffffff">
2578 Because, as a performance optimization, <tt>NO_HZ_FULL</tt>
2579 does not necessarily re-enable the scheduling-clock interrupt
2580 on entry to each and every system call.
2581</font></td></tr>
2582<tr><td>&nbsp;</td></tr>
2583</table>
2584
2585<p>
2586However, RCU must be reliably informed as to whether any given
2587CPU is currently in the idle loop, and, for <tt>NO_HZ_FULL</tt>,
2588also whether that CPU is executing in usermode, as discussed
2589<a href="#Energy Efficiency">earlier</a>.
2590It also requires that the scheduling-clock interrupt be enabled when
2591RCU needs it to be:
2592
2593<ol>
2594<li> If a CPU is either idle or executing in usermode, and RCU believes
2595 it is non-idle, the scheduling-clock tick had better be running.
2596 Otherwise, you will get RCU CPU stall warnings. Or at best,
2597 very long (11-second) grace periods, with a pointless IPI waking
2598 the CPU from time to time.
2599<li> If a CPU is in a portion of the kernel that executes RCU read-side
2600 critical sections, and RCU believes this CPU to be idle, you will get
2601 random memory corruption. <b>DON'T DO THIS!!!</b>
2602
2603 <br>This is one reason to test with lockdep, which will complain
2604 about this sort of thing.
2605<li> If a CPU is in a portion of the kernel that is absolutely
2606 positively no-joking guaranteed to never execute any RCU read-side
2607 critical sections, and RCU believes this CPU to to be idle,
2608 no problem. This sort of thing is used by some architectures
2609 for light-weight exception handlers, which can then avoid the
2610 overhead of <tt>rcu_irq_enter()</tt> and <tt>rcu_irq_exit()</tt>
2611 at exception entry and exit, respectively.
2612 Some go further and avoid the entireties of <tt>irq_enter()</tt>
2613 and <tt>irq_exit()</tt>.
2614
2615 <br>Just make very sure you are running some of your tests with
2616 <tt>CONFIG_PROVE_RCU=y</tt>, just in case one of your code paths
2617 was in fact joking about not doing RCU read-side critical sections.
2618<li> If a CPU is executing in the kernel with the scheduling-clock
2619 interrupt disabled and RCU believes this CPU to be non-idle,
2620 and if the CPU goes idle (from an RCU perspective) every few
2621 jiffies, no problem. It is usually OK for there to be the
2622 occasional gap between idle periods of up to a second or so.
2623
2624 <br>If the gap grows too long, you get RCU CPU stall warnings.
2625<li> If a CPU is either idle or executing in usermode, and RCU believes
2626 it to be idle, of course no problem.
2627<li> If a CPU is executing in the kernel, the kernel code
2628 path is passing through quiescent states at a reasonable
2629 frequency (preferably about once per few jiffies, but the
2630 occasional excursion to a second or so is usually OK) and the
2631 scheduling-clock interrupt is enabled, of course no problem.
2632
2633 <br>If the gap between a successive pair of quiescent states grows
2634 too long, you get RCU CPU stall warnings.
2635</ol>
2636
2637<table>
2638<tr><th>&nbsp;</th></tr>
2639<tr><th align="left">Quick Quiz:</th></tr>
2640<tr><td>
2641 But what if my driver has a hardware interrupt handler
2642 that can run for many seconds?
2643 I cannot invoke <tt>schedule()</tt> from an hardware
2644 interrupt handler, after all!
2645</td></tr>
2646<tr><th align="left">Answer:</th></tr>
2647<tr><td bgcolor="#ffffff"><font color="ffffff">
2648 One approach is to do <tt>rcu_irq_exit();rcu_irq_enter();</tt>
2649 every so often.
2650 But given that long-running interrupt handlers can cause
2651 other problems, not least for response time, shouldn't you
2652 work to keep your interrupt handler's runtime within reasonable
2653 bounds?
2654</font></td></tr>
2655<tr><td>&nbsp;</td></tr>
2656</table>
2657
2658<p>
2659But as long as RCU is properly informed of kernel state transitions between
2660in-kernel execution, usermode execution, and idle, and as long as the
2661scheduling-clock interrupt is enabled when RCU needs it to be, you
2662can rest assured that the bugs you encounter will be in some other
2663part of RCU or some other part of the kernel!
2664
2535<h3><a name="Memory Efficiency">Memory Efficiency</a></h3> 2665<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
2536 2666
2537<p> 2667<p>
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 6beda556faf3..49747717d905 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -23,6 +23,14 @@ over a rather long period of time, but improvements are always welcome!
23 Yet another exception is where the low real-time latency of RCU's 23 Yet another exception is where the low real-time latency of RCU's
24 read-side primitives is critically important. 24 read-side primitives is critically important.
25 25
26 One final exception is where RCU readers are used to prevent
27 the ABA problem (https://en.wikipedia.org/wiki/ABA_problem)
28 for lockless updates. This does result in the mildly
29 counter-intuitive situation where rcu_read_lock() and
30 rcu_read_unlock() are used to protect updates, however, this
31 approach provides the same potential simplifications that garbage
32 collectors do.
33
261. Does the update code have proper mutual exclusion? 341. Does the update code have proper mutual exclusion?
27 35
28 RCU does allow -readers- to run (almost) naked, but -writers- must 36 RCU does allow -readers- to run (almost) naked, but -writers- must
@@ -40,7 +48,9 @@ over a rather long period of time, but improvements are always welcome!
40 explain how this single task does not become a major bottleneck on 48 explain how this single task does not become a major bottleneck on
41 big multiprocessor machines (for example, if the task is updating 49 big multiprocessor machines (for example, if the task is updating
42 information relating to itself that other tasks can read, there 50 information relating to itself that other tasks can read, there
43 by definition can be no bottleneck). 51 by definition can be no bottleneck). Note that the definition
52 of "large" has changed significantly: Eight CPUs was "large"
53 in the year 2000, but a hundred CPUs was unremarkable in 2017.
44 54
452. Do the RCU read-side critical sections make proper use of 552. Do the RCU read-side critical sections make proper use of
46 rcu_read_lock() and friends? These primitives are needed 56 rcu_read_lock() and friends? These primitives are needed
@@ -55,6 +65,12 @@ over a rather long period of time, but improvements are always welcome!
55 Disabling of preemption can serve as rcu_read_lock_sched(), but 65 Disabling of preemption can serve as rcu_read_lock_sched(), but
56 is less readable. 66 is less readable.
57 67
68 Letting RCU-protected pointers "leak" out of an RCU read-side
69 critical section is every bid as bad as letting them leak out
70 from under a lock. Unless, of course, you have arranged some
71 other means of protection, such as a lock or a reference count
72 -before- letting them out of the RCU read-side critical section.
73
583. Does the update code tolerate concurrent accesses? 743. Does the update code tolerate concurrent accesses?
59 75
60 The whole point of RCU is to permit readers to run without 76 The whole point of RCU is to permit readers to run without
@@ -78,10 +94,10 @@ over a rather long period of time, but improvements are always welcome!
78 94
79 This works quite well, also. 95 This works quite well, also.
80 96
81 c. Make updates appear atomic to readers. For example, 97 c. Make updates appear atomic to readers. For example,
82 pointer updates to properly aligned fields will 98 pointer updates to properly aligned fields will
83 appear atomic, as will individual atomic primitives. 99 appear atomic, as will individual atomic primitives.
84 Sequences of perations performed under a lock will -not- 100 Sequences of operations performed under a lock will -not-
85 appear to be atomic to RCU readers, nor will sequences 101 appear to be atomic to RCU readers, nor will sequences
86 of multiple atomic primitives. 102 of multiple atomic primitives.
87 103
@@ -168,8 +184,8 @@ over a rather long period of time, but improvements are always welcome!
168 184
1695. If call_rcu(), or a related primitive such as call_rcu_bh(), 1855. If call_rcu(), or a related primitive such as call_rcu_bh(),
170 call_rcu_sched(), or call_srcu() is used, the callback function 186 call_rcu_sched(), or call_srcu() is used, the callback function
171 must be written to be called from softirq context. In particular, 187 will be called from softirq context. In particular, it cannot
172 it cannot block. 188 block.
173 189
1746. Since synchronize_rcu() can block, it cannot be called from 1906. Since synchronize_rcu() can block, it cannot be called from
175 any sort of irq context. The same rule applies for 191 any sort of irq context. The same rule applies for
@@ -178,11 +194,14 @@ over a rather long period of time, but improvements are always welcome!
178 synchronize_sched_expedite(), and synchronize_srcu_expedited(). 194 synchronize_sched_expedite(), and synchronize_srcu_expedited().
179 195
180 The expedited forms of these primitives have the same semantics 196 The expedited forms of these primitives have the same semantics
181 as the non-expedited forms, but expediting is both expensive 197 as the non-expedited forms, but expediting is both expensive and
182 and unfriendly to real-time workloads. Use of the expedited 198 (with the exception of synchronize_srcu_expedited()) unfriendly
183 primitives should be restricted to rare configuration-change 199 to real-time workloads. Use of the expedited primitives should
184 operations that would not normally be undertaken while a real-time 200 be restricted to rare configuration-change operations that would
185 workload is running. 201 not normally be undertaken while a real-time workload is running.
202 However, real-time workloads can use rcupdate.rcu_normal kernel
203 boot parameter to completely disable expedited grace periods,
204 though this might have performance implications.
186 205
187 In particular, if you find yourself invoking one of the expedited 206 In particular, if you find yourself invoking one of the expedited
188 primitives repeatedly in a loop, please do everyone a favor: 207 primitives repeatedly in a loop, please do everyone a favor:
@@ -193,11 +212,6 @@ over a rather long period of time, but improvements are always welcome!
193 of the system, especially to real-time workloads running on 212 of the system, especially to real-time workloads running on
194 the rest of the system. 213 the rest of the system.
195 214
196 In addition, it is illegal to call the expedited forms from
197 a CPU-hotplug notifier, or while holding a lock that is acquired
198 by a CPU-hotplug notifier. Failing to observe this restriction
199 will result in deadlock.
200
2017. If the updater uses call_rcu() or synchronize_rcu(), then the 2157. If the updater uses call_rcu() or synchronize_rcu(), then the
202 corresponding readers must use rcu_read_lock() and 216 corresponding readers must use rcu_read_lock() and
203 rcu_read_unlock(). If the updater uses call_rcu_bh() or 217 rcu_read_unlock(). If the updater uses call_rcu_bh() or
@@ -321,7 +335,7 @@ over a rather long period of time, but improvements are always welcome!
321 Similarly, disabling preemption is not an acceptable substitute 335 Similarly, disabling preemption is not an acceptable substitute
322 for rcu_read_lock(). Code that attempts to use preemption 336 for rcu_read_lock(). Code that attempts to use preemption
323 disabling where it should be using rcu_read_lock() will break 337 disabling where it should be using rcu_read_lock() will break
324 in real-time kernel builds. 338 in CONFIG_PREEMPT=y kernel builds.
325 339
326 If you want to wait for interrupt handlers, NMI handlers, and 340 If you want to wait for interrupt handlers, NMI handlers, and
327 code under the influence of preempt_disable(), you instead 341 code under the influence of preempt_disable(), you instead
@@ -356,23 +370,22 @@ over a rather long period of time, but improvements are always welcome!
356 not the case, a self-spawning RCU callback would prevent the 370 not the case, a self-spawning RCU callback would prevent the
357 victim CPU from ever going offline.) 371 victim CPU from ever going offline.)
358 372
35914. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), 37314. Unlike other forms of RCU, it -is- permissible to block in an
360 synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu()) 374 SRCU read-side critical section (demarked by srcu_read_lock()
361 may only be invoked from process context. Unlike other forms of 375 and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
362 RCU, it -is- permissible to block in an SRCU read-side critical 376 Please note that if you don't need to sleep in read-side critical
363 section (demarked by srcu_read_lock() and srcu_read_unlock()), 377 sections, you should be using RCU rather than SRCU, because RCU
364 hence the "SRCU": "sleepable RCU". Please note that if you 378 is almost always faster and easier to use than is SRCU.
365 don't need to sleep in read-side critical sections, you should be 379
366 using RCU rather than SRCU, because RCU is almost always faster 380 Also unlike other forms of RCU, explicit initialization and
367 and easier to use than is SRCU. 381 cleanup is required either at build time via DEFINE_SRCU()
368 382 or DEFINE_STATIC_SRCU() or at runtime via init_srcu_struct()
369 Also unlike other forms of RCU, explicit initialization 383 and cleanup_srcu_struct(). These last two are passed a
370 and cleanup is required via init_srcu_struct() and 384 "struct srcu_struct" that defines the scope of a given
371 cleanup_srcu_struct(). These are passed a "struct srcu_struct" 385 SRCU domain. Once initialized, the srcu_struct is passed
372 that defines the scope of a given SRCU domain. Once initialized, 386 to srcu_read_lock(), srcu_read_unlock() synchronize_srcu(),
373 the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() 387 synchronize_srcu_expedited(), and call_srcu(). A given
374 synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). 388 synchronize_srcu() waits only for SRCU read-side critical
375 A given synchronize_srcu() waits only for SRCU read-side critical
376 sections governed by srcu_read_lock() and srcu_read_unlock() 389 sections governed by srcu_read_lock() and srcu_read_unlock()
377 calls that have been passed the same srcu_struct. This property 390 calls that have been passed the same srcu_struct. This property
378 is what makes sleeping read-side critical sections tolerable -- 391 is what makes sleeping read-side critical sections tolerable --
@@ -390,10 +403,16 @@ over a rather long period of time, but improvements are always welcome!
390 Therefore, SRCU should be used in preference to rw_semaphore 403 Therefore, SRCU should be used in preference to rw_semaphore
391 only in extremely read-intensive situations, or in situations 404 only in extremely read-intensive situations, or in situations
392 requiring SRCU's read-side deadlock immunity or low read-side 405 requiring SRCU's read-side deadlock immunity or low read-side
393 realtime latency. 406 realtime latency. You should also consider percpu_rw_semaphore
407 when you need lightweight readers.
394 408
395 Note that, rcu_assign_pointer() relates to SRCU just as it does 409 SRCU's expedited primitive (synchronize_srcu_expedited())
396 to other forms of RCU. 410 never sends IPIs to other CPUs, so it is easier on
411 real-time workloads than is synchronize_rcu_expedited(),
412 synchronize_rcu_bh_expedited() or synchronize_sched_expedited().
413
414 Note that rcu_dereference() and rcu_assign_pointer() relate to
415 SRCU just as they do to other forms of RCU.
397 416
39815. The whole point of call_rcu(), synchronize_rcu(), and friends 41715. The whole point of call_rcu(), synchronize_rcu(), and friends
399 is to wait until all pre-existing readers have finished before 418 is to wait until all pre-existing readers have finished before
@@ -435,3 +454,33 @@ over a rather long period of time, but improvements are always welcome!
435 454
436 These debugging aids can help you find problems that are 455 These debugging aids can help you find problems that are
437 otherwise extremely difficult to spot. 456 otherwise extremely difficult to spot.
457
45818. If you register a callback using call_rcu(), call_rcu_bh(),
459 call_rcu_sched(), or call_srcu(), and pass in a function defined
460 within a loadable module, then it in necessary to wait for
461 all pending callbacks to be invoked after the last invocation
462 and before unloading that module. Note that it is absolutely
463 -not- sufficient to wait for a grace period! The current (say)
464 synchronize_rcu() implementation waits only for all previous
465 callbacks registered on the CPU that synchronize_rcu() is running
466 on, but it is -not- guaranteed to wait for callbacks registered
467 on other CPUs.
468
469 You instead need to use one of the barrier functions:
470
471 o call_rcu() -> rcu_barrier()
472 o call_rcu_bh() -> rcu_barrier_bh()
473 o call_rcu_sched() -> rcu_barrier_sched()
474 o call_srcu() -> srcu_barrier()
475
476 However, these barrier functions are absolutely -not- guaranteed
477 to wait for a grace period. In fact, if there are no call_rcu()
478 callbacks waiting anywhere in the system, rcu_barrier() is within
479 its rights to return immediately.
480
481 So if you need to wait for both an RCU grace period and for
482 all pre-existing call_rcu() callbacks, you will need to execute
483 both rcu_barrier() and synchronize_rcu(), if necessary, using
484 something like workqueues to to execute them concurrently.
485
486 See rcubarrier.txt for more information.
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 745f429fda79..7d4ae110c2c9 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -76,15 +76,12 @@ o I hear that RCU is patented? What is with that?
76 Of these, one was allowed to lapse by the assignee, and the 76 Of these, one was allowed to lapse by the assignee, and the
77 others have been contributed to the Linux kernel under GPL. 77 others have been contributed to the Linux kernel under GPL.
78 There are now also LGPL implementations of user-level RCU 78 There are now also LGPL implementations of user-level RCU
79 available (http://lttng.org/?q=node/18). 79 available (http://liburcu.org/).
80 80
81o I hear that RCU needs work in order to support realtime kernels? 81o I hear that RCU needs work in order to support realtime kernels?
82 82
83 This work is largely completed. Realtime-friendly RCU can be 83 Realtime-friendly RCU can be enabled via the CONFIG_PREEMPT_RCU
84 enabled via the CONFIG_PREEMPT_RCU kernel configuration 84 kernel configuration parameter.
85 parameter. However, work is in progress for enabling priority
86 boosting of preempted RCU read-side critical sections. This is
87 needed if you have CPU-bound realtime threads.
88 85
89o Where can I find more information on RCU? 86o Where can I find more information on RCU?
90 87
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index b2a613f16d74..1acb26b09b48 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -25,35 +25,35 @@ o You must use one of the rcu_dereference() family of primitives
25 for an example where the compiler can in fact deduce the exact 25 for an example where the compiler can in fact deduce the exact
26 value of the pointer, and thus cause misordering. 26 value of the pointer, and thus cause misordering.
27 27
28o You are only permitted to use rcu_dereference on pointer values.
29 The compiler simply knows too much about integral values to
30 trust it to carry dependencies through integer operations.
31 There are a very few exceptions, namely that you can temporarily
32 cast the pointer to uintptr_t in order to:
33
34 o Set bits and clear bits down in the must-be-zero low-order
35 bits of that pointer. This clearly means that the pointer
36 must have alignment constraints, for example, this does
37 -not- work in general for char* pointers.
38
39 o XOR bits to translate pointers, as is done in some
40 classic buddy-allocator algorithms.
41
42 It is important to cast the value back to pointer before
43 doing much of anything else with it.
44
28o Avoid cancellation when using the "+" and "-" infix arithmetic 45o Avoid cancellation when using the "+" and "-" infix arithmetic
29 operators. For example, for a given variable "x", avoid 46 operators. For example, for a given variable "x", avoid
30 "(x-x)". There are similar arithmetic pitfalls from other 47 "(x-(uintptr_t)x)" for char* pointers. The compiler is within its
31 arithmetic operators, such as "(x*0)", "(x/(x+1))" or "(x%1)". 48 rights to substitute zero for this sort of expression, so that
32 The compiler is within its rights to substitute zero for all of 49 subsequent accesses no longer depend on the rcu_dereference(),
33 these expressions, so that subsequent accesses no longer depend 50 again possibly resulting in bugs due to misordering.
34 on the rcu_dereference(), again possibly resulting in bugs due
35 to misordering.
36 51
37 Of course, if "p" is a pointer from rcu_dereference(), and "a" 52 Of course, if "p" is a pointer from rcu_dereference(), and "a"
38 and "b" are integers that happen to be equal, the expression 53 and "b" are integers that happen to be equal, the expression
39 "p+a-b" is safe because its value still necessarily depends on 54 "p+a-b" is safe because its value still necessarily depends on
40 the rcu_dereference(), thus maintaining proper ordering. 55 the rcu_dereference(), thus maintaining proper ordering.
41 56
42o Avoid all-zero operands to the bitwise "&" operator, and
43 similarly avoid all-ones operands to the bitwise "|" operator.
44 If the compiler is able to deduce the value of such operands,
45 it is within its rights to substitute the corresponding constant
46 for the bitwise operation. Once again, this causes subsequent
47 accesses to no longer depend on the rcu_dereference(), causing
48 bugs due to misordering.
49
50 Please note that single-bit operands to bitwise "&" can also
51 be dangerous. At this point, the compiler knows that the
52 resulting value can only take on one of two possible values.
53 Therefore, a very small amount of additional information will
54 allow the compiler to deduce the exact value, which again can
55 result in misordering.
56
57o If you are using RCU to protect JITed functions, so that the 57o If you are using RCU to protect JITed functions, so that the
58 "()" function-invocation operator is applied to a value obtained 58 "()" function-invocation operator is applied to a value obtained
59 (directly or indirectly) from rcu_dereference(), you may need to 59 (directly or indirectly) from rcu_dereference(), you may need to
@@ -61,25 +61,6 @@ o If you are using RCU to protect JITed functions, so that the
61 This issue arises on some systems when a newly JITed function is 61 This issue arises on some systems when a newly JITed function is
62 using the same memory that was used by an earlier JITed function. 62 using the same memory that was used by an earlier JITed function.
63 63
64o Do not use the results from the boolean "&&" and "||" when
65 dereferencing. For example, the following (rather improbable)
66 code is buggy:
67
68 int *p;
69 int *q;
70
71 ...
72
73 p = rcu_dereference(gp)
74 q = &global_q;
75 q += p != &oom_p1 && p != &oom_p2;
76 r1 = *q; /* BUGGY!!! */
77
78 The reason this is buggy is that "&&" and "||" are often compiled
79 using branches. While weak-memory machines such as ARM or PowerPC
80 do order stores after such branches, they can speculate loads,
81 which can result in misordering bugs.
82
83o Do not use the results from relational operators ("==", "!=", 64o Do not use the results from relational operators ("==", "!=",
84 ">", ">=", "<", or "<=") when dereferencing. For example, 65 ">", ">=", "<", or "<=") when dereferencing. For example,
85 the following (quite strange) code is buggy: 66 the following (quite strange) code is buggy:
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index b10cfe711e68..5d7759071a3e 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -263,6 +263,11 @@ Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
263 are delayed for a full grace period? Couldn't this result in 263 are delayed for a full grace period? Couldn't this result in
264 rcu_barrier() returning prematurely? 264 rcu_barrier() returning prematurely?
265 265
266The current rcu_barrier() implementation is more complex, due to the need
267to avoid disturbing idle CPUs (especially on battery-powered systems)
268and the need to minimally disturb non-idle CPUs in real-time systems.
269However, the code above illustrates the concepts.
270
266 271
267rcu_barrier() Summary 272rcu_barrier() Summary
268 273
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 278f6a9383b6..55918b54808b 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -276,15 +276,17 @@ o "Free-Block Circulation": Shows the number of torture structures
276 somehow gets incremented farther than it should. 276 somehow gets incremented farther than it should.
277 277
278Different implementations of RCU can provide implementation-specific 278Different implementations of RCU can provide implementation-specific
279additional information. For example, SRCU provides the following 279additional information. For example, Tree SRCU provides the following
280additional line: 280additional line:
281 281
282 srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1) 282 srcud-torture: Tree SRCU per-CPU(idx=0): 0(35,-21) 1(-4,24) 2(1,1) 3(-26,20) 4(28,-47) 5(-9,4) 6(-10,14) 7(-14,11) T(1,6)
283 283
284This line shows the per-CPU counter state. The numbers in parentheses are 284This line shows the per-CPU counter state, in this case for Tree SRCU
285the values of the "old" and "current" counters for the corresponding CPU. 285using a dynamically allocated srcu_struct (hence "srcud-" rather than
286The "idx" value maps the "old" and "current" values to the underlying 286"srcu-"). The numbers in parentheses are the values of the "old" and
287array, and is useful for debugging. 287"current" counters for the corresponding CPU. The "idx" value maps the
288"old" and "current" values to the underlying array, and is useful for
289debugging. The final "T" entry contains the totals of the counters.
288 290
289 291
290USAGE 292USAGE
@@ -304,3 +306,9 @@ checked for such errors. The "rmmod" command forces a "SUCCESS",
304"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed. The first 306"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed. The first
305two are self-explanatory, while the last indicates that while there 307two are self-explanatory, while the last indicates that while there
306were no RCU failures, CPU-hotplug problems were detected. 308were no RCU failures, CPU-hotplug problems were detected.
309
310However, the tools/testing/selftests/rcutorture/bin/kvm.sh script
311provides better automation, including automatic failure analysis.
312It assumes a qemu/kvm-enabled platform, and runs guest OSes out of initrd.
313See tools/testing/selftests/rcutorture/doc/initrd.txt for instructions
314on setting up such an initrd.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 8ed6c9f6133c..df62466da4e0 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -890,6 +890,8 @@ SRCU: Critical sections Grace period Barrier
890 srcu_read_lock_held 890 srcu_read_lock_held
891 891
892SRCU: Initialization/cleanup 892SRCU: Initialization/cleanup
893 DEFINE_SRCU
894 DEFINE_STATIC_SRCU
893 init_srcu_struct 895 init_srcu_struct
894 cleanup_srcu_struct 896 cleanup_srcu_struct
895 897
@@ -913,7 +915,8 @@ a. Will readers need to block? If so, you need SRCU.
913b. What about the -rt patchset? If readers would need to block 915b. What about the -rt patchset? If readers would need to block
914 in an non-rt kernel, you need SRCU. If readers would block 916 in an non-rt kernel, you need SRCU. If readers would block
915 in a -rt kernel, but not in a non-rt kernel, SRCU is not 917 in a -rt kernel, but not in a non-rt kernel, SRCU is not
916 necessary. 918 necessary. (The -rt patchset turns spinlocks into sleeplocks,
919 hence this distinction.)
917 920
918c. Do you need to treat NMI handlers, hardirq handlers, 921c. Do you need to treat NMI handlers, hardirq handlers,
919 and code segments with preemption disabled (whether 922 and code segments with preemption disabled (whether
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d9c171ce4190..3a99cc96b6b1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2633,9 +2633,10 @@
2633 In kernels built with CONFIG_NO_HZ_FULL=y, set 2633 In kernels built with CONFIG_NO_HZ_FULL=y, set
2634 the specified list of CPUs whose tick will be stopped 2634 the specified list of CPUs whose tick will be stopped
2635 whenever possible. The boot CPU will be forced outside 2635 whenever possible. The boot CPU will be forced outside
2636 the range to maintain the timekeeping. 2636 the range to maintain the timekeeping. Any CPUs
2637 The CPUs in this range must also be included in the 2637 in this list will have their RCU callbacks offloaded,
2638 rcu_nocbs= set. 2638 just as if they had also been called out in the
2639 rcu_nocbs= boot parameter.
2639 2640
2640 noiotrap [SH] Disables trapped I/O port accesses. 2641 noiotrap [SH] Disables trapped I/O port accesses.
2641 2642
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 17b00914c6ab..8282099e0cbf 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -344,3 +344,52 @@ codecs, and devices with strict requirements for interface clocking.
344 344
345.. kernel-doc:: include/linux/clk.h 345.. kernel-doc:: include/linux/clk.h
346 :internal: 346 :internal:
347
348Synchronization Primitives
349==========================
350
351Read-Copy Update (RCU)
352----------------------
353
354.. kernel-doc:: include/linux/rcupdate.h
355 :external:
356
357.. kernel-doc:: include/linux/rcupdate_wait.h
358 :external:
359
360.. kernel-doc:: include/linux/rcutree.h
361 :external:
362
363.. kernel-doc:: kernel/rcu/tree.c
364 :external:
365
366.. kernel-doc:: kernel/rcu/tree_plugin.h
367 :external:
368
369.. kernel-doc:: kernel/rcu/tree_exp.h
370 :external:
371
372.. kernel-doc:: kernel/rcu/update.c
373 :external:
374
375.. kernel-doc:: include/linux/srcu.h
376 :external:
377
378.. kernel-doc:: kernel/rcu/srcutree.c
379 :external:
380
381.. kernel-doc:: include/linux/rculist_bl.h
382 :external:
383
384.. kernel-doc:: include/linux/rculist.h
385 :external:
386
387.. kernel-doc:: include/linux/rculist_nulls.h
388 :external:
389
390.. kernel-doc:: include/linux/rcu_sync.h
391 :external:
392
393.. kernel-doc:: kernel/rcu/sync.c
394 :external:
395
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index c4ddfcd5ee32..e2ee0a1c299a 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -594,7 +594,24 @@ between the address load and the data load:
594This enforces the occurrence of one of the two implications, and prevents the 594This enforces the occurrence of one of the two implications, and prevents the
595third possibility from arising. 595third possibility from arising.
596 596
597A data-dependency barrier must also order against dependent writes: 597
598[!] Note that this extremely counterintuitive situation arises most easily on
599machines with split caches, so that, for example, one cache bank processes
600even-numbered cache lines and the other bank processes odd-numbered cache
601lines. The pointer P might be stored in an odd-numbered cache line, and the
602variable B might be stored in an even-numbered cache line. Then, if the
603even-numbered bank of the reading CPU's cache is extremely busy while the
604odd-numbered bank is idle, one can see the new value of the pointer P (&B),
605but the old value of the variable B (2).
606
607
608A data-dependency barrier is not required to order dependent writes
609because the CPUs that the Linux kernel supports don't do writes
610until they are certain (1) that the write will actually happen, (2)
611of the location of the write, and (3) of the value to be written.
612But please carefully read the "CONTROL DEPENDENCIES" section and the
613Documentation/RCU/rcu_dereference.txt file: The compiler can and does
614break dependencies in a great many highly creative ways.
598 615
599 CPU 1 CPU 2 616 CPU 1 CPU 2
600 =============== =============== 617 =============== ===============
@@ -603,29 +620,19 @@ A data-dependency barrier must also order against dependent writes:
603 <write barrier> 620 <write barrier>
604 WRITE_ONCE(P, &B); 621 WRITE_ONCE(P, &B);
605 Q = READ_ONCE(P); 622 Q = READ_ONCE(P);
606 <data dependency barrier> 623 WRITE_ONCE(*Q, 5);
607 *Q = 5;
608 624
609The data-dependency barrier must order the read into Q with the store 625Therefore, no data-dependency barrier is required to order the read into
610into *Q. This prohibits this outcome: 626Q with the store into *Q. In other words, this outcome is prohibited,
627even without a data-dependency barrier:
611 628
612 (Q == &B) && (B == 4) 629 (Q == &B) && (B == 4)
613 630
614Please note that this pattern should be rare. After all, the whole point 631Please note that this pattern should be rare. After all, the whole point
615of dependency ordering is to -prevent- writes to the data structure, along 632of dependency ordering is to -prevent- writes to the data structure, along
616with the expensive cache misses associated with those writes. This pattern 633with the expensive cache misses associated with those writes. This pattern
617can be used to record rare error conditions and the like, and the ordering 634can be used to record rare error conditions and the like, and the CPUs'
618prevents such records from being lost. 635naturally occurring ordering prevents such records from being lost.
619
620
621[!] Note that this extremely counterintuitive situation arises most easily on
622machines with split caches, so that, for example, one cache bank processes
623even-numbered cache lines and the other bank processes odd-numbered cache
624lines. The pointer P might be stored in an odd-numbered cache line, and the
625variable B might be stored in an even-numbered cache line. Then, if the
626even-numbered bank of the reading CPU's cache is extremely busy while the
627odd-numbered bank is idle, one can see the new value of the pointer P (&B),
628but the old value of the variable B (2).
629 636
630 637
631The data dependency barrier is very important to the RCU system, 638The data dependency barrier is very important to the RCU system,
diff --git a/MAINTAINERS b/MAINTAINERS
index 6f7721d1634c..ceedacf33535 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8629,7 +8629,7 @@ M: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
8629M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> 8629M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
8630L: linux-kernel@vger.kernel.org 8630L: linux-kernel@vger.kernel.org
8631S: Supported 8631S: Supported
8632F: kernel/membarrier.c 8632F: kernel/sched/membarrier.c
8633F: include/uapi/linux/membarrier.h 8633F: include/uapi/linux/membarrier.h
8634 8634
8635MEMORY MANAGEMENT 8635MEMORY MANAGEMENT
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index a40b9fc0c6c3..718ac0b64adf 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -16,11 +16,6 @@
16#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 16#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
17#define arch_spin_is_locked(x) ((x)->lock != 0) 17#define arch_spin_is_locked(x) ((x)->lock != 0)
18 18
19static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
20{
21 smp_cond_load_acquire(&lock->lock, !VAL);
22}
23
24static inline int arch_spin_value_unlocked(arch_spinlock_t lock) 19static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
25{ 20{
26 return lock.lock == 0; 21 return lock.lock == 0;
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 233d5ffe6ec7..a325e6a36523 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -16,11 +16,6 @@
16#define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) 16#define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
17#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 17#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
18 18
19static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
20{
21 smp_cond_load_acquire(&lock->slock, !VAL);
22}
23
24#ifdef CONFIG_ARC_HAS_LLSC 19#ifdef CONFIG_ARC_HAS_LLSC
25 20
26static inline void arch_spin_lock(arch_spinlock_t *lock) 21static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 4bec45442072..c030143c18c6 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -52,22 +52,6 @@ static inline void dsb_sev(void)
52 * memory. 52 * memory.
53 */ 53 */
54 54
55static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
56{
57 u16 owner = READ_ONCE(lock->tickets.owner);
58
59 for (;;) {
60 arch_spinlock_t tmp = READ_ONCE(*lock);
61
62 if (tmp.tickets.owner == tmp.tickets.next ||
63 tmp.tickets.owner != owner)
64 break;
65
66 wfe();
67 }
68 smp_acquire__after_ctrl_dep();
69}
70
71#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 55#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
72 56
73static inline void arch_spin_lock(arch_spinlock_t *lock) 57static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f8..f445bd7f2b9f 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -26,58 +26,6 @@
26 * The memory barriers are implicit with the load-acquire and store-release 26 * The memory barriers are implicit with the load-acquire and store-release
27 * instructions. 27 * instructions.
28 */ 28 */
29static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
30{
31 unsigned int tmp;
32 arch_spinlock_t lockval;
33 u32 owner;
34
35 /*
36 * Ensure prior spin_lock operations to other locks have completed
37 * on this CPU before we test whether "lock" is locked.
38 */
39 smp_mb();
40 owner = READ_ONCE(lock->owner) << 16;
41
42 asm volatile(
43" sevl\n"
44"1: wfe\n"
45"2: ldaxr %w0, %2\n"
46 /* Is the lock free? */
47" eor %w1, %w0, %w0, ror #16\n"
48" cbz %w1, 3f\n"
49 /* Lock taken -- has there been a subsequent unlock->lock transition? */
50" eor %w1, %w3, %w0, lsl #16\n"
51" cbz %w1, 1b\n"
52 /*
53 * The owner has been updated, so there was an unlock->lock
54 * transition that we missed. That means we can rely on the
55 * store-release of the unlock operation paired with the
56 * load-acquire of the lock operation to publish any of our
57 * previous stores to the new lock owner and therefore don't
58 * need to bother with the writeback below.
59 */
60" b 4f\n"
61"3:\n"
62 /*
63 * Serialise against any concurrent lockers by writing back the
64 * unlocked lock value
65 */
66 ARM64_LSE_ATOMIC_INSN(
67 /* LL/SC */
68" stxr %w1, %w0, %2\n"
69 __nops(2),
70 /* LSE atomics */
71" mov %w1, %w0\n"
72" cas %w0, %w0, %2\n"
73" eor %w1, %w1, %w0\n")
74 /* Somebody else wrote to the lock, GOTO 10 and reload the value */
75" cbnz %w1, 2b\n"
76"4:"
77 : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
78 : "r" (owner)
79 : "memory");
80}
81 29
82#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 30#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
83 31
@@ -176,7 +124,11 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
176 124
177static inline int arch_spin_is_locked(arch_spinlock_t *lock) 125static inline int arch_spin_is_locked(arch_spinlock_t *lock)
178{ 126{
179 smp_mb(); /* See arch_spin_unlock_wait */ 127 /*
128 * Ensure prior spin_lock operations to other locks have completed
129 * on this CPU before we test whether "lock" is locked.
130 */
131 smp_mb(); /* ^^^ */
180 return !arch_spin_value_unlocked(READ_ONCE(*lock)); 132 return !arch_spin_value_unlocked(READ_ONCE(*lock));
181} 133}
182 134
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 659ae8094ed5..c8f7d98d8cb9 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -360,6 +360,8 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
360 /* 360 /*
361 * Complete any pending TLB or cache maintenance on this CPU in case 361 * Complete any pending TLB or cache maintenance on this CPU in case
362 * the thread migrates to a different CPU. 362 * the thread migrates to a different CPU.
363 * This full barrier is also required by the membarrier system
364 * call.
363 */ 365 */
364 dsb(ish); 366 dsb(ish);
365 367
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index c58f4a83ed6f..f6431439d15d 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -48,11 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
48 __raw_spin_unlock_asm(&lock->lock); 48 __raw_spin_unlock_asm(&lock->lock);
49} 49}
50 50
51static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
52{
53 smp_cond_load_acquire(&lock->lock, !VAL);
54}
55
56static inline int arch_read_can_lock(arch_rwlock_t *rw) 51static inline int arch_read_can_lock(arch_rwlock_t *rw)
57{ 52{
58 return __raw_uncached_fetch_asm(&rw->lock) > 0; 53 return __raw_uncached_fetch_asm(&rw->lock) > 0;
diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c
index 0188c933b155..15af5768c403 100644
--- a/arch/blackfin/kernel/module.c
+++ b/arch/blackfin/kernel/module.c
@@ -4,8 +4,6 @@
4 * Licensed under the GPL-2 or later 4 * Licensed under the GPL-2 or later
5 */ 5 */
6 6
7#define pr_fmt(fmt) "module %s: " fmt, mod->name
8
9#include <linux/moduleloader.h> 7#include <linux/moduleloader.h>
10#include <linux/elf.h> 8#include <linux/elf.h>
11#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
@@ -16,6 +14,11 @@
16#include <asm/cacheflush.h> 14#include <asm/cacheflush.h>
17#include <linux/uaccess.h> 15#include <linux/uaccess.h>
18 16
17#define mod_err(mod, fmt, ...) \
18 pr_err("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
19#define mod_debug(mod, fmt, ...) \
20 pr_debug("module %s: " fmt, (mod)->name, ##__VA_ARGS__)
21
19/* Transfer the section to the L1 memory */ 22/* Transfer the section to the L1 memory */
20int 23int
21module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, 24module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
@@ -44,7 +47,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
44 dest = l1_inst_sram_alloc(s->sh_size); 47 dest = l1_inst_sram_alloc(s->sh_size);
45 mod->arch.text_l1 = dest; 48 mod->arch.text_l1 = dest;
46 if (dest == NULL) { 49 if (dest == NULL) {
47 pr_err("L1 inst memory allocation failed\n"); 50 mod_err(mod, "L1 inst memory allocation failed\n");
48 return -1; 51 return -1;
49 } 52 }
50 dma_memcpy(dest, (void *)s->sh_addr, s->sh_size); 53 dma_memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -56,7 +59,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
56 dest = l1_data_sram_alloc(s->sh_size); 59 dest = l1_data_sram_alloc(s->sh_size);
57 mod->arch.data_a_l1 = dest; 60 mod->arch.data_a_l1 = dest;
58 if (dest == NULL) { 61 if (dest == NULL) {
59 pr_err("L1 data memory allocation failed\n"); 62 mod_err(mod, "L1 data memory allocation failed\n");
60 return -1; 63 return -1;
61 } 64 }
62 memcpy(dest, (void *)s->sh_addr, s->sh_size); 65 memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -68,7 +71,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
68 dest = l1_data_sram_zalloc(s->sh_size); 71 dest = l1_data_sram_zalloc(s->sh_size);
69 mod->arch.bss_a_l1 = dest; 72 mod->arch.bss_a_l1 = dest;
70 if (dest == NULL) { 73 if (dest == NULL) {
71 pr_err("L1 data memory allocation failed\n"); 74 mod_err(mod, "L1 data memory allocation failed\n");
72 return -1; 75 return -1;
73 } 76 }
74 77
@@ -77,7 +80,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
77 dest = l1_data_B_sram_alloc(s->sh_size); 80 dest = l1_data_B_sram_alloc(s->sh_size);
78 mod->arch.data_b_l1 = dest; 81 mod->arch.data_b_l1 = dest;
79 if (dest == NULL) { 82 if (dest == NULL) {
80 pr_err("L1 data memory allocation failed\n"); 83 mod_err(mod, "L1 data memory allocation failed\n");
81 return -1; 84 return -1;
82 } 85 }
83 memcpy(dest, (void *)s->sh_addr, s->sh_size); 86 memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -87,7 +90,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
87 dest = l1_data_B_sram_alloc(s->sh_size); 90 dest = l1_data_B_sram_alloc(s->sh_size);
88 mod->arch.bss_b_l1 = dest; 91 mod->arch.bss_b_l1 = dest;
89 if (dest == NULL) { 92 if (dest == NULL) {
90 pr_err("L1 data memory allocation failed\n"); 93 mod_err(mod, "L1 data memory allocation failed\n");
91 return -1; 94 return -1;
92 } 95 }
93 memset(dest, 0, s->sh_size); 96 memset(dest, 0, s->sh_size);
@@ -99,7 +102,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
99 dest = l2_sram_alloc(s->sh_size); 102 dest = l2_sram_alloc(s->sh_size);
100 mod->arch.text_l2 = dest; 103 mod->arch.text_l2 = dest;
101 if (dest == NULL) { 104 if (dest == NULL) {
102 pr_err("L2 SRAM allocation failed\n"); 105 mod_err(mod, "L2 SRAM allocation failed\n");
103 return -1; 106 return -1;
104 } 107 }
105 memcpy(dest, (void *)s->sh_addr, s->sh_size); 108 memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -111,7 +114,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
111 dest = l2_sram_alloc(s->sh_size); 114 dest = l2_sram_alloc(s->sh_size);
112 mod->arch.data_l2 = dest; 115 mod->arch.data_l2 = dest;
113 if (dest == NULL) { 116 if (dest == NULL) {
114 pr_err("L2 SRAM allocation failed\n"); 117 mod_err(mod, "L2 SRAM allocation failed\n");
115 return -1; 118 return -1;
116 } 119 }
117 memcpy(dest, (void *)s->sh_addr, s->sh_size); 120 memcpy(dest, (void *)s->sh_addr, s->sh_size);
@@ -123,7 +126,7 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
123 dest = l2_sram_zalloc(s->sh_size); 126 dest = l2_sram_zalloc(s->sh_size);
124 mod->arch.bss_l2 = dest; 127 mod->arch.bss_l2 = dest;
125 if (dest == NULL) { 128 if (dest == NULL) {
126 pr_err("L2 SRAM allocation failed\n"); 129 mod_err(mod, "L2 SRAM allocation failed\n");
127 return -1; 130 return -1;
128 } 131 }
129 132
@@ -157,8 +160,8 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
157 Elf32_Sym *sym; 160 Elf32_Sym *sym;
158 unsigned long location, value, size; 161 unsigned long location, value, size;
159 162
160 pr_debug("applying relocate section %u to %u\n", 163 mod_debug(mod, "applying relocate section %u to %u\n",
161 relsec, sechdrs[relsec].sh_info); 164 relsec, sechdrs[relsec].sh_info);
162 165
163 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { 166 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
164 /* This is where to make the change */ 167 /* This is where to make the change */
@@ -174,14 +177,14 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
174 177
175#ifdef CONFIG_SMP 178#ifdef CONFIG_SMP
176 if (location >= COREB_L1_DATA_A_START) { 179 if (location >= COREB_L1_DATA_A_START) {
177 pr_err("cannot relocate in L1: %u (SMP kernel)\n", 180 mod_err(mod, "cannot relocate in L1: %u (SMP kernel)\n",
178 ELF32_R_TYPE(rel[i].r_info)); 181 ELF32_R_TYPE(rel[i].r_info));
179 return -ENOEXEC; 182 return -ENOEXEC;
180 } 183 }
181#endif 184#endif
182 185
183 pr_debug("location is %lx, value is %lx type is %d\n", 186 mod_debug(mod, "location is %lx, value is %lx type is %d\n",
184 location, value, ELF32_R_TYPE(rel[i].r_info)); 187 location, value, ELF32_R_TYPE(rel[i].r_info));
185 188
186 switch (ELF32_R_TYPE(rel[i].r_info)) { 189 switch (ELF32_R_TYPE(rel[i].r_info)) {
187 190
@@ -200,12 +203,12 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
200 case R_BFIN_PCREL12_JUMP: 203 case R_BFIN_PCREL12_JUMP:
201 case R_BFIN_PCREL12_JUMP_S: 204 case R_BFIN_PCREL12_JUMP_S:
202 case R_BFIN_PCREL10: 205 case R_BFIN_PCREL10:
203 pr_err("unsupported relocation: %u (no -mlong-calls?)\n", 206 mod_err(mod, "unsupported relocation: %u (no -mlong-calls?)\n",
204 ELF32_R_TYPE(rel[i].r_info)); 207 ELF32_R_TYPE(rel[i].r_info));
205 return -ENOEXEC; 208 return -ENOEXEC;
206 209
207 default: 210 default:
208 pr_err("unknown relocation: %u\n", 211 mod_err(mod, "unknown relocation: %u\n",
209 ELF32_R_TYPE(rel[i].r_info)); 212 ELF32_R_TYPE(rel[i].r_info));
210 return -ENOEXEC; 213 return -ENOEXEC;
211 } 214 }
@@ -222,7 +225,7 @@ apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
222 isram_memcpy((void *)location, &value, size); 225 isram_memcpy((void *)location, &value, size);
223 break; 226 break;
224 default: 227 default:
225 pr_err("invalid relocation for %#lx\n", location); 228 mod_err(mod, "invalid relocation for %#lx\n", location);
226 return -ENOEXEC; 229 return -ENOEXEC;
227 } 230 }
228 } 231 }
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index a1c55788c5d6..53a8d5885887 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -179,11 +179,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
179 */ 179 */
180#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 180#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
181 181
182static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
183{
184 smp_cond_load_acquire(&lock->lock, !VAL);
185}
186
187#define arch_spin_is_locked(x) ((x)->lock != 0) 182#define arch_spin_is_locked(x) ((x)->lock != 0)
188 183
189#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 184#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index ca9e76149a4a..df2c121164b8 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -76,22 +76,6 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
76 ACCESS_ONCE(*p) = (tmp + 2) & ~1; 76 ACCESS_ONCE(*p) = (tmp + 2) & ~1;
77} 77}
78 78
79static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
80{
81 int *p = (int *)&lock->lock, ticket;
82
83 ia64_invala();
84
85 for (;;) {
86 asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
87 if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
88 return;
89 cpu_relax();
90 }
91
92 smp_acquire__after_ctrl_dep();
93}
94
95static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 79static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
96{ 80{
97 long tmp = ACCESS_ONCE(lock->lock); 81 long tmp = ACCESS_ONCE(lock->lock);
@@ -143,11 +127,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
143 arch_spin_lock(lock); 127 arch_spin_lock(lock);
144} 128}
145 129
146static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
147{
148 __ticket_spin_unlock_wait(lock);
149}
150
151#define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0) 130#define arch_read_can_lock(rw) (*(volatile int *)(rw) >= 0)
152#define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0) 131#define arch_write_can_lock(rw) (*(volatile int *)(rw) == 0)
153 132
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 323c7fc953cd..a56825592b90 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -30,11 +30,6 @@
30#define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) 30#define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0)
31#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 31#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
32 32
33static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
34{
35 smp_cond_load_acquire(&lock->slock, VAL > 0);
36}
37
38/** 33/**
39 * arch_spin_trylock - Try spin lock and return a result 34 * arch_spin_trylock - Try spin lock and return a result
40 * @lock: Pointer to the lock variable 35 * @lock: Pointer to the lock variable
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index c0c7a22be1ae..ddf7fe5708a6 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -15,11 +15,6 @@
15 * locked. 15 * locked.
16 */ 16 */
17 17
18static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
19{
20 smp_cond_load_acquire(&lock->lock, !VAL);
21}
22
23#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 18#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
24 19
25#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 20#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 9c7b8f7942d8..fe413b41df6c 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -26,11 +26,6 @@
26 26
27#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0) 27#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0)
28 28
29static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
30{
31 smp_cond_load_acquire(&lock->slock, !VAL);
32}
33
34static inline void arch_spin_unlock(arch_spinlock_t *lock) 29static inline void arch_spin_unlock(arch_spinlock_t *lock)
35{ 30{
36 asm volatile( 31 asm volatile(
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index e32936cd7f10..55bfe4affca3 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -14,13 +14,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
14 14
15#define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) 15#define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
16 16
17static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
18{
19 volatile unsigned int *a = __ldcw_align(x);
20
21 smp_cond_load_acquire(a, VAL);
22}
23
24static inline void arch_spin_lock_flags(arch_spinlock_t *x, 17static inline void arch_spin_lock_flags(arch_spinlock_t *x,
25 unsigned long flags) 18 unsigned long flags)
26{ 19{
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 8c1b913de6d7..d256e448ea49 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -170,39 +170,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
170 lock->slock = 0; 170 lock->slock = 0;
171} 171}
172 172
173static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
174{
175 arch_spinlock_t lock_val;
176
177 smp_mb();
178
179 /*
180 * Atomically load and store back the lock value (unchanged). This
181 * ensures that our observation of the lock value is ordered with
182 * respect to other lock operations.
183 */
184 __asm__ __volatile__(
185"1: " PPC_LWARX(%0, 0, %2, 0) "\n"
186" stwcx. %0, 0, %2\n"
187" bne- 1b\n"
188 : "=&r" (lock_val), "+m" (*lock)
189 : "r" (lock)
190 : "cr0", "xer");
191
192 if (arch_spin_value_unlocked(lock_val))
193 goto out;
194
195 while (lock->slock) {
196 HMT_low();
197 if (SHARED_PROCESSOR)
198 __spin_yield(lock);
199 }
200 HMT_medium();
201
202out:
203 smp_mb();
204}
205
206/* 173/*
207 * Read-write spinlocks, allowing multiple readers 174 * Read-write spinlocks, allowing multiple readers
208 * but only one writer. 175 * but only one writer.
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index f7838ecd83c6..217ee5210c32 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -98,13 +98,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
98 : "cc", "memory"); 98 : "cc", "memory");
99} 99}
100 100
101static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
102{
103 while (arch_spin_is_locked(lock))
104 arch_spin_relax(lock);
105 smp_acquire__after_ctrl_dep();
106}
107
108/* 101/*
109 * Read-write spinlocks, allowing multiple readers 102 * Read-write spinlocks, allowing multiple readers
110 * but only one writer. 103 * but only one writer.
diff --git a/arch/sh/include/asm/spinlock-cas.h b/arch/sh/include/asm/spinlock-cas.h
index c46e8cc7b515..5ed7dbbd94ff 100644
--- a/arch/sh/include/asm/spinlock-cas.h
+++ b/arch/sh/include/asm/spinlock-cas.h
@@ -29,11 +29,6 @@ static inline unsigned __sl_cas(volatile unsigned *p, unsigned old, unsigned new
29#define arch_spin_is_locked(x) ((x)->lock <= 0) 29#define arch_spin_is_locked(x) ((x)->lock <= 0)
30#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 30#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
31 31
32static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
33{
34 smp_cond_load_acquire(&lock->lock, VAL > 0);
35}
36
37static inline void arch_spin_lock(arch_spinlock_t *lock) 32static inline void arch_spin_lock(arch_spinlock_t *lock)
38{ 33{
39 while (!__sl_cas(&lock->lock, 1, 0)); 34 while (!__sl_cas(&lock->lock, 1, 0));
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
index cec78143fa83..f77263aae760 100644
--- a/arch/sh/include/asm/spinlock-llsc.h
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -21,11 +21,6 @@
21#define arch_spin_is_locked(x) ((x)->lock <= 0) 21#define arch_spin_is_locked(x) ((x)->lock <= 0)
22#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 22#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
23 23
24static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
25{
26 smp_cond_load_acquire(&lock->lock, VAL > 0);
27}
28
29/* 24/*
30 * Simple spin lock operations. There are two variants, one clears IRQ's 25 * Simple spin lock operations. There are two variants, one clears IRQ's
31 * on the local processor, one does not. 26 * on the local processor, one does not.
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 8011e79f59c9..67345b2dc408 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -14,11 +14,6 @@
14 14
15#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) 15#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
16 16
17static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
18{
19 smp_cond_load_acquire(&lock->lock, !VAL);
20}
21
22static inline void arch_spin_lock(arch_spinlock_t *lock) 17static inline void arch_spin_lock(arch_spinlock_t *lock)
23{ 18{
24 __asm__ __volatile__( 19 __asm__ __volatile__(
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index b14b1ba5bf9c..cba8ba9b8da6 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -64,8 +64,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
64 lock->current_ticket = old_ticket + TICKET_QUANTUM; 64 lock->current_ticket = old_ticket + TICKET_QUANTUM;
65} 65}
66 66
67void arch_spin_unlock_wait(arch_spinlock_t *lock);
68
69/* 67/*
70 * Read-write spinlocks, allowing multiple readers 68 * Read-write spinlocks, allowing multiple readers
71 * but only one writer. 69 * but only one writer.
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index b9718fb4e74a..9a2c2d605752 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -58,8 +58,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
58 __insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT); 58 __insn_fetchadd4(&lock->lock, 1U << __ARCH_SPIN_CURRENT_SHIFT);
59} 59}
60 60
61void arch_spin_unlock_wait(arch_spinlock_t *lock);
62
63void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val); 61void arch_spin_lock_slow(arch_spinlock_t *lock, u32 val);
64 62
65/* Grab the "next" ticket number and bump it atomically. 63/* Grab the "next" ticket number and bump it atomically.
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 076c6cc43113..db9333f2447c 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -62,29 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
62} 62}
63EXPORT_SYMBOL(arch_spin_trylock); 63EXPORT_SYMBOL(arch_spin_trylock);
64 64
65void arch_spin_unlock_wait(arch_spinlock_t *lock)
66{
67 u32 iterations = 0;
68 int curr = READ_ONCE(lock->current_ticket);
69 int next = READ_ONCE(lock->next_ticket);
70
71 /* Return immediately if unlocked. */
72 if (next == curr)
73 return;
74
75 /* Wait until the current locker has released the lock. */
76 do {
77 delay_backoff(iterations++);
78 } while (READ_ONCE(lock->current_ticket) == curr);
79
80 /*
81 * The TILE architecture doesn't do read speculation; therefore
82 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
83 */
84 barrier();
85}
86EXPORT_SYMBOL(arch_spin_unlock_wait);
87
88/* 65/*
89 * The low byte is always reserved to be the marker for a "tns" operation 66 * The low byte is always reserved to be the marker for a "tns" operation
90 * since the low bit is set to "1" by a tns. The next seven bits are 67 * since the low bit is set to "1" by a tns. The next seven bits are
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index a4b5b2cbce93..de414c22892f 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -62,28 +62,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
62} 62}
63EXPORT_SYMBOL(arch_spin_trylock); 63EXPORT_SYMBOL(arch_spin_trylock);
64 64
65void arch_spin_unlock_wait(arch_spinlock_t *lock)
66{
67 u32 iterations = 0;
68 u32 val = READ_ONCE(lock->lock);
69 u32 curr = arch_spin_current(val);
70
71 /* Return immediately if unlocked. */
72 if (arch_spin_next(val) == curr)
73 return;
74
75 /* Wait until the current locker has released the lock. */
76 do {
77 delay_backoff(iterations++);
78 } while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
79
80 /*
81 * The TILE architecture doesn't do read speculation; therefore
82 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
83 */
84 barrier();
85}
86EXPORT_SYMBOL(arch_spin_unlock_wait);
87 65
88/* 66/*
89 * If the read lock fails due to a writer, we retry periodically 67 * If the read lock fails due to a writer, we retry periodically
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index a36221cf6363..3bb49681ee24 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -33,11 +33,6 @@
33 33
34#define arch_spin_is_locked(x) ((x)->slock != 0) 34#define arch_spin_is_locked(x) ((x)->slock != 0)
35 35
36static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
37{
38 smp_cond_load_acquire(&lock->slock, !VAL);
39}
40
41#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 36#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
42 37
43static inline void arch_spin_lock(arch_spinlock_t *lock) 38static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 3dbd05532c09..e4effef0c83f 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -645,12 +645,11 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
645 * completions are honored. A scmd is determined to have 645 * completions are honored. A scmd is determined to have
646 * timed out iff its associated qc is active and not failed. 646 * timed out iff its associated qc is active and not failed.
647 */ 647 */
648 spin_lock_irqsave(ap->lock, flags);
648 if (ap->ops->error_handler) { 649 if (ap->ops->error_handler) {
649 struct scsi_cmnd *scmd, *tmp; 650 struct scsi_cmnd *scmd, *tmp;
650 int nr_timedout = 0; 651 int nr_timedout = 0;
651 652
652 spin_lock_irqsave(ap->lock, flags);
653
654 /* This must occur under the ap->lock as we don't want 653 /* This must occur under the ap->lock as we don't want
655 a polled recovery to race the real interrupt handler 654 a polled recovery to race the real interrupt handler
656 655
@@ -700,12 +699,11 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
700 if (nr_timedout) 699 if (nr_timedout)
701 __ata_port_freeze(ap); 700 __ata_port_freeze(ap);
702 701
703 spin_unlock_irqrestore(ap->lock, flags);
704 702
705 /* initialize eh_tries */ 703 /* initialize eh_tries */
706 ap->eh_tries = ATA_EH_MAX_TRIES; 704 ap->eh_tries = ATA_EH_MAX_TRIES;
707 } else 705 }
708 spin_unlock_wait(ap->lock); 706 spin_unlock_irqrestore(ap->lock, flags);
709 707
710} 708}
711EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 709EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 9f0681bf1e87..66260777d644 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -22,17 +22,6 @@
22#include <asm-generic/qspinlock_types.h> 22#include <asm-generic/qspinlock_types.h>
23 23
24/** 24/**
25 * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock
26 * @lock : Pointer to queued spinlock structure
27 *
28 * There is a very slight possibility of live-lock if the lockers keep coming
29 * and the waiter is just unfortunate enough to not see any unlock state.
30 */
31#ifndef queued_spin_unlock_wait
32extern void queued_spin_unlock_wait(struct qspinlock *lock);
33#endif
34
35/**
36 * queued_spin_is_locked - is the spinlock locked? 25 * queued_spin_is_locked - is the spinlock locked?
37 * @lock: Pointer to queued spinlock structure 26 * @lock: Pointer to queued spinlock structure
38 * Return: 1 if it is locked, 0 otherwise 27 * Return: 1 if it is locked, 0 otherwise
@@ -41,8 +30,6 @@ extern void queued_spin_unlock_wait(struct qspinlock *lock);
41static __always_inline int queued_spin_is_locked(struct qspinlock *lock) 30static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
42{ 31{
43 /* 32 /*
44 * See queued_spin_unlock_wait().
45 *
46 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL 33 * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL
47 * isn't immediately observable. 34 * isn't immediately observable.
48 */ 35 */
@@ -135,6 +122,5 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
135#define arch_spin_trylock(l) queued_spin_trylock(l) 122#define arch_spin_trylock(l) queued_spin_trylock(l)
136#define arch_spin_unlock(l) queued_spin_unlock(l) 123#define arch_spin_unlock(l) queued_spin_unlock(l)
137#define arch_spin_lock_flags(l, f) queued_spin_lock(l) 124#define arch_spin_lock_flags(l, f) queued_spin_lock(l)
138#define arch_spin_unlock_wait(l) queued_spin_unlock_wait(l)
139 125
140#endif /* __ASM_GENERIC_QSPINLOCK_H */ 126#endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a2f6707e9fc0..0e849715e5be 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -126,17 +126,11 @@ extern struct group_info init_groups;
126#endif 126#endif
127 127
128#ifdef CONFIG_PREEMPT_RCU 128#ifdef CONFIG_PREEMPT_RCU
129#define INIT_TASK_RCU_TREE_PREEMPT() \
130 .rcu_blocked_node = NULL,
131#else
132#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
133#endif
134#ifdef CONFIG_PREEMPT_RCU
135#define INIT_TASK_RCU_PREEMPT(tsk) \ 129#define INIT_TASK_RCU_PREEMPT(tsk) \
136 .rcu_read_lock_nesting = 0, \ 130 .rcu_read_lock_nesting = 0, \
137 .rcu_read_unlock_special.s = 0, \ 131 .rcu_read_unlock_special.s = 0, \
138 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ 132 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
139 INIT_TASK_RCU_TREE_PREEMPT() 133 .rcu_blocked_node = NULL,
140#else 134#else
141#define INIT_TASK_RCU_PREEMPT(tsk) 135#define INIT_TASK_RCU_PREEMPT(tsk)
142#endif 136#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index cf307ebf345d..96f1baf62ab8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -58,8 +58,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
58void call_rcu_bh(struct rcu_head *head, rcu_callback_t func); 58void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
59void call_rcu_sched(struct rcu_head *head, rcu_callback_t func); 59void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
60void synchronize_sched(void); 60void synchronize_sched(void);
61void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
62void synchronize_rcu_tasks(void);
63void rcu_barrier_tasks(void); 61void rcu_barrier_tasks(void);
64 62
65#ifdef CONFIG_PREEMPT_RCU 63#ifdef CONFIG_PREEMPT_RCU
@@ -105,6 +103,7 @@ static inline int rcu_preempt_depth(void)
105 103
106/* Internal to kernel */ 104/* Internal to kernel */
107void rcu_init(void); 105void rcu_init(void);
106extern int rcu_scheduler_active __read_mostly;
108void rcu_sched_qs(void); 107void rcu_sched_qs(void);
109void rcu_bh_qs(void); 108void rcu_bh_qs(void);
110void rcu_check_callbacks(int user); 109void rcu_check_callbacks(int user);
@@ -165,8 +164,6 @@ static inline void rcu_init_nohz(void) { }
165 * macro rather than an inline function to avoid #include hell. 164 * macro rather than an inline function to avoid #include hell.
166 */ 165 */
167#ifdef CONFIG_TASKS_RCU 166#ifdef CONFIG_TASKS_RCU
168#define TASKS_RCU(x) x
169extern struct srcu_struct tasks_rcu_exit_srcu;
170#define rcu_note_voluntary_context_switch_lite(t) \ 167#define rcu_note_voluntary_context_switch_lite(t) \
171 do { \ 168 do { \
172 if (READ_ONCE((t)->rcu_tasks_holdout)) \ 169 if (READ_ONCE((t)->rcu_tasks_holdout)) \
@@ -177,10 +174,17 @@ extern struct srcu_struct tasks_rcu_exit_srcu;
177 rcu_all_qs(); \ 174 rcu_all_qs(); \
178 rcu_note_voluntary_context_switch_lite(t); \ 175 rcu_note_voluntary_context_switch_lite(t); \
179 } while (0) 176 } while (0)
177void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
178void synchronize_rcu_tasks(void);
179void exit_tasks_rcu_start(void);
180void exit_tasks_rcu_finish(void);
180#else /* #ifdef CONFIG_TASKS_RCU */ 181#else /* #ifdef CONFIG_TASKS_RCU */
181#define TASKS_RCU(x) do { } while (0)
182#define rcu_note_voluntary_context_switch_lite(t) do { } while (0) 182#define rcu_note_voluntary_context_switch_lite(t) do { } while (0)
183#define rcu_note_voluntary_context_switch(t) rcu_all_qs() 183#define rcu_note_voluntary_context_switch(t) rcu_all_qs()
184#define call_rcu_tasks call_rcu_sched
185#define synchronize_rcu_tasks synchronize_sched
186static inline void exit_tasks_rcu_start(void) { }
187static inline void exit_tasks_rcu_finish(void) { }
184#endif /* #else #ifdef CONFIG_TASKS_RCU */ 188#endif /* #else #ifdef CONFIG_TASKS_RCU */
185 189
186/** 190/**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 5becbbccb998..b3dbf9502fd0 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -116,13 +116,11 @@ static inline void rcu_irq_exit_irqson(void) { }
116static inline void rcu_irq_enter_irqson(void) { } 116static inline void rcu_irq_enter_irqson(void) { }
117static inline void rcu_irq_exit(void) { } 117static inline void rcu_irq_exit(void) { }
118static inline void exit_rcu(void) { } 118static inline void exit_rcu(void) { }
119 119#ifdef CONFIG_SRCU
120#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
121extern int rcu_scheduler_active __read_mostly;
122void rcu_scheduler_starting(void); 120void rcu_scheduler_starting(void);
123#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ 121#else /* #ifndef CONFIG_SRCU */
124static inline void rcu_scheduler_starting(void) { } 122static inline void rcu_scheduler_starting(void) { }
125#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ 123#endif /* #else #ifndef CONFIG_SRCU */
126static inline void rcu_end_inkernel_boot(void) { } 124static inline void rcu_end_inkernel_boot(void) { }
127static inline bool rcu_is_watching(void) { return true; } 125static inline bool rcu_is_watching(void) { return true; }
128 126
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8337e2db0bb2..e4c38809a09e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -589,9 +589,10 @@ struct task_struct {
589 589
590#ifdef CONFIG_TASKS_RCU 590#ifdef CONFIG_TASKS_RCU
591 unsigned long rcu_tasks_nvcsw; 591 unsigned long rcu_tasks_nvcsw;
592 bool rcu_tasks_holdout; 592 u8 rcu_tasks_holdout;
593 struct list_head rcu_tasks_holdout_list; 593 u8 rcu_tasks_idx;
594 int rcu_tasks_idle_cpu; 594 int rcu_tasks_idle_cpu;
595 struct list_head rcu_tasks_holdout_list;
595#endif /* #ifdef CONFIG_TASKS_RCU */ 596#endif /* #ifdef CONFIG_TASKS_RCU */
596 597
597 struct sched_info sched_info; 598 struct sched_info sched_info;
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d9510e8522d4..ef018a6e4985 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -130,12 +130,6 @@ do { \
130#define smp_mb__before_spinlock() smp_wmb() 130#define smp_mb__before_spinlock() smp_wmb()
131#endif 131#endif
132 132
133/**
134 * raw_spin_unlock_wait - wait until the spinlock gets unlocked
135 * @lock: the spinlock in question.
136 */
137#define raw_spin_unlock_wait(lock) arch_spin_unlock_wait(&(lock)->raw_lock)
138
139#ifdef CONFIG_DEBUG_SPINLOCK 133#ifdef CONFIG_DEBUG_SPINLOCK
140 extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); 134 extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
141#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock) 135#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
@@ -369,31 +363,6 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
369 raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ 363 raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
370}) 364})
371 365
372/**
373 * spin_unlock_wait - Interpose between successive critical sections
374 * @lock: the spinlock whose critical sections are to be interposed.
375 *
376 * Semantically this is equivalent to a spin_lock() immediately
377 * followed by a spin_unlock(). However, most architectures have
378 * more efficient implementations in which the spin_unlock_wait()
379 * cannot block concurrent lock acquisition, and in some cases
380 * where spin_unlock_wait() does not write to the lock variable.
381 * Nevertheless, spin_unlock_wait() can have high overhead, so if
382 * you feel the need to use it, please check to see if there is
383 * a better way to get your job done.
384 *
385 * The ordering guarantees provided by spin_unlock_wait() are:
386 *
387 * 1. All accesses preceding the spin_unlock_wait() happen before
388 * any accesses in later critical sections for this same lock.
389 * 2. All accesses following the spin_unlock_wait() happen after
390 * any accesses in earlier critical sections for this same lock.
391 */
392static __always_inline void spin_unlock_wait(spinlock_t *lock)
393{
394 raw_spin_unlock_wait(&lock->rlock);
395}
396
397static __always_inline int spin_is_locked(spinlock_t *lock) 366static __always_inline int spin_is_locked(spinlock_t *lock)
398{ 367{
399 return raw_spin_is_locked(&lock->rlock); 368 return raw_spin_is_locked(&lock->rlock);
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 0d9848de677d..612fb530af41 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -26,11 +26,6 @@
26#ifdef CONFIG_DEBUG_SPINLOCK 26#ifdef CONFIG_DEBUG_SPINLOCK
27#define arch_spin_is_locked(x) ((x)->slock == 0) 27#define arch_spin_is_locked(x) ((x)->slock == 0)
28 28
29static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
30{
31 smp_cond_load_acquire(&lock->slock, VAL);
32}
33
34static inline void arch_spin_lock(arch_spinlock_t *lock) 29static inline void arch_spin_lock(arch_spinlock_t *lock)
35{ 30{
36 lock->slock = 0; 31 lock->slock = 0;
@@ -73,7 +68,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
73 68
74#else /* DEBUG_SPINLOCK */ 69#else /* DEBUG_SPINLOCK */
75#define arch_spin_is_locked(lock) ((void)(lock), 0) 70#define arch_spin_is_locked(lock) ((void)(lock), 0)
76#define arch_spin_unlock_wait(lock) do { barrier(); (void)(lock); } while (0)
77/* for sched/core.c and kernel_lock.c: */ 71/* for sched/core.c and kernel_lock.c: */
78# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) 72# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0)
79# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) 73# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0)
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index cfbfc540cafc..261471f407a5 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -87,4 +87,17 @@ static inline void srcu_barrier(struct srcu_struct *sp)
87 synchronize_srcu(sp); 87 synchronize_srcu(sp);
88} 88}
89 89
90/* Defined here to avoid size increase for non-torture kernels. */
91static inline void srcu_torture_stats_print(struct srcu_struct *sp,
92 char *tt, char *tf)
93{
94 int idx;
95
96 idx = READ_ONCE(sp->srcu_idx) & 0x1;
97 pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
98 tt, tf, idx,
99 READ_ONCE(sp->srcu_lock_nesting[!idx]),
100 READ_ONCE(sp->srcu_lock_nesting[idx]));
101}
102
90#endif 103#endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 42973f787e7e..a949f4f9e4d7 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -104,8 +104,6 @@ struct srcu_struct {
104#define SRCU_STATE_SCAN1 1 104#define SRCU_STATE_SCAN1 1
105#define SRCU_STATE_SCAN2 2 105#define SRCU_STATE_SCAN2 2
106 106
107void process_srcu(struct work_struct *work);
108
109#define __SRCU_STRUCT_INIT(name) \ 107#define __SRCU_STRUCT_INIT(name) \
110 { \ 108 { \
111 .sda = &name##_srcu_data, \ 109 .sda = &name##_srcu_data, \
@@ -141,5 +139,6 @@ void process_srcu(struct work_struct *work);
141 139
142void synchronize_srcu_expedited(struct srcu_struct *sp); 140void synchronize_srcu_expedited(struct srcu_struct *sp);
143void srcu_barrier(struct srcu_struct *sp); 141void srcu_barrier(struct srcu_struct *sp);
142void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf);
144 143
145#endif 144#endif
diff --git a/include/linux/swait.h b/include/linux/swait.h
index c1f9c62a8a50..4a4e180d0a35 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -169,4 +169,59 @@ do { \
169 __ret; \ 169 __ret; \
170}) 170})
171 171
172#define __swait_event_idle(wq, condition) \
173 (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule())
174
175/**
176 * swait_event_idle - wait without system load contribution
177 * @wq: the waitqueue to wait on
178 * @condition: a C expression for the event to wait for
179 *
180 * The process is put to sleep (TASK_IDLE) until the @condition evaluates to
181 * true. The @condition is checked each time the waitqueue @wq is woken up.
182 *
183 * This function is mostly used when a kthread or workqueue waits for some
184 * condition and doesn't want to contribute to system load. Signals are
185 * ignored.
186 */
187#define swait_event_idle(wq, condition) \
188do { \
189 if (condition) \
190 break; \
191 __swait_event_idle(wq, condition); \
192} while (0)
193
194#define __swait_event_idle_timeout(wq, condition, timeout) \
195 ___swait_event(wq, ___wait_cond_timeout(condition), \
196 TASK_IDLE, timeout, \
197 __ret = schedule_timeout(__ret))
198
199/**
200 * swait_event_idle_timeout - wait up to timeout without load contribution
201 * @wq: the waitqueue to wait on
202 * @condition: a C expression for the event to wait for
203 * @timeout: timeout at which we'll give up in jiffies
204 *
205 * The process is put to sleep (TASK_IDLE) until the @condition evaluates to
206 * true. The @condition is checked each time the waitqueue @wq is woken up.
207 *
208 * This function is mostly used when a kthread or workqueue waits for some
209 * condition and doesn't want to contribute to system load. Signals are
210 * ignored.
211 *
212 * Returns:
213 * 0 if the @condition evaluated to %false after the @timeout elapsed,
214 * 1 if the @condition evaluated to %true after the @timeout elapsed,
215 * or the remaining jiffies (at least 1) if the @condition evaluated
216 * to %true before the @timeout elapsed.
217 */
218#define swait_event_idle_timeout(wq, condition, timeout) \
219({ \
220 long __ret = timeout; \
221 if (!___wait_cond_timeout(condition)) \
222 __ret = __swait_event_idle_timeout(wq, \
223 condition, timeout); \
224 __ret; \
225})
226
172#endif /* _LINUX_SWAIT_H */ 227#endif /* _LINUX_SWAIT_H */
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 91dc089d65b7..e91ae1f2290d 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -703,6 +703,7 @@ TRACE_EVENT(rcu_batch_end,
703 * at the beginning and end of the read, respectively. Note that the 703 * at the beginning and end of the read, respectively. Note that the
704 * callback address can be NULL. 704 * callback address can be NULL.
705 */ 705 */
706#define RCUTORTURENAME_LEN 8
706TRACE_EVENT(rcu_torture_read, 707TRACE_EVENT(rcu_torture_read,
707 708
708 TP_PROTO(const char *rcutorturename, struct rcu_head *rhp, 709 TP_PROTO(const char *rcutorturename, struct rcu_head *rhp,
@@ -711,7 +712,7 @@ TRACE_EVENT(rcu_torture_read,
711 TP_ARGS(rcutorturename, rhp, secs, c_old, c), 712 TP_ARGS(rcutorturename, rhp, secs, c_old, c),
712 713
713 TP_STRUCT__entry( 714 TP_STRUCT__entry(
714 __field(const char *, rcutorturename) 715 __field(char, rcutorturename[RCUTORTURENAME_LEN])
715 __field(struct rcu_head *, rhp) 716 __field(struct rcu_head *, rhp)
716 __field(unsigned long, secs) 717 __field(unsigned long, secs)
717 __field(unsigned long, c_old) 718 __field(unsigned long, c_old)
@@ -719,7 +720,9 @@ TRACE_EVENT(rcu_torture_read,
719 ), 720 ),
720 721
721 TP_fast_assign( 722 TP_fast_assign(
722 __entry->rcutorturename = rcutorturename; 723 strncpy(__entry->rcutorturename, rcutorturename,
724 RCUTORTURENAME_LEN);
725 __entry->rcutorturename[RCUTORTURENAME_LEN - 1] = 0;
723 __entry->rhp = rhp; 726 __entry->rhp = rhp;
724 __entry->secs = secs; 727 __entry->secs = secs;
725 __entry->c_old = c_old; 728 __entry->c_old = c_old;
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
index e0b108bd2624..6d47b3249d8a 100644
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -40,14 +40,33 @@
40 * (non-running threads are de facto in such a 40 * (non-running threads are de facto in such a
41 * state). This covers threads from all processes 41 * state). This covers threads from all processes
42 * running on the system. This command returns 0. 42 * running on the system. This command returns 0.
43 * @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
44 * Execute a memory barrier on each running
45 * thread belonging to the same process as the current
46 * thread. Upon return from system call, the
47 * caller thread is ensured that all its running
48 * threads siblings have passed through a state
49 * where all memory accesses to user-space
50 * addresses match program order between entry
51 * to and return from the system call
52 * (non-running threads are de facto in such a
53 * state). This only covers threads from the
54 * same processes as the caller thread. This
55 * command returns 0. The "expedited" commands
56 * complete faster than the non-expedited ones,
57 * they never block, but have the downside of
58 * causing extra overhead.
43 * 59 *
44 * Command to be passed to the membarrier system call. The commands need to 60 * Command to be passed to the membarrier system call. The commands need to
45 * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to 61 * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
46 * the value 0. 62 * the value 0.
47 */ 63 */
48enum membarrier_cmd { 64enum membarrier_cmd {
49 MEMBARRIER_CMD_QUERY = 0, 65 MEMBARRIER_CMD_QUERY = 0,
50 MEMBARRIER_CMD_SHARED = (1 << 0), 66 MEMBARRIER_CMD_SHARED = (1 << 0),
67 /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
68 /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
69 MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
51}; 70};
52 71
53#endif /* _UAPI_LINUX_MEMBARRIER_H */ 72#endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/ipc/sem.c b/ipc/sem.c
index 38371e93bfa5..c6c50370504c 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2091,7 +2091,8 @@ void exit_sem(struct task_struct *tsk)
2091 * possibility where we exit while freeary() didn't 2091 * possibility where we exit while freeary() didn't
2092 * finish unlocking sem_undo_list. 2092 * finish unlocking sem_undo_list.
2093 */ 2093 */
2094 spin_unlock_wait(&ulp->lock); 2094 spin_lock(&ulp->lock);
2095 spin_unlock(&ulp->lock);
2095 rcu_read_unlock(); 2096 rcu_read_unlock();
2096 break; 2097 break;
2097 } 2098 }
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb8e8b23c6e..9c323a6daa46 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -108,7 +108,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
108obj-$(CONFIG_JUMP_LABEL) += jump_label.o 108obj-$(CONFIG_JUMP_LABEL) += jump_label.o
109obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o 109obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
110obj-$(CONFIG_TORTURE_TEST) += torture.o 110obj-$(CONFIG_TORTURE_TEST) += torture.o
111obj-$(CONFIG_MEMBARRIER) += membarrier.o
112 111
113obj-$(CONFIG_HAS_IOMEM) += memremap.o 112obj-$(CONFIG_HAS_IOMEM) += memremap.o
114 113
diff --git a/kernel/exit.c b/kernel/exit.c
index c5548faa9f37..f9ef3ecc78c1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -764,7 +764,6 @@ void __noreturn do_exit(long code)
764{ 764{
765 struct task_struct *tsk = current; 765 struct task_struct *tsk = current;
766 int group_dead; 766 int group_dead;
767 TASKS_RCU(int tasks_rcu_i);
768 767
769 profile_task_exit(tsk); 768 profile_task_exit(tsk);
770 kcov_task_exit(tsk); 769 kcov_task_exit(tsk);
@@ -819,7 +818,8 @@ void __noreturn do_exit(long code)
819 * Ensure that we must observe the pi_state in exit_mm() -> 818 * Ensure that we must observe the pi_state in exit_mm() ->
820 * mm_release() -> exit_pi_state_list(). 819 * mm_release() -> exit_pi_state_list().
821 */ 820 */
822 raw_spin_unlock_wait(&tsk->pi_lock); 821 raw_spin_lock_irq(&tsk->pi_lock);
822 raw_spin_unlock_irq(&tsk->pi_lock);
823 823
824 if (unlikely(in_atomic())) { 824 if (unlikely(in_atomic())) {
825 pr_info("note: %s[%d] exited with preempt_count %d\n", 825 pr_info("note: %s[%d] exited with preempt_count %d\n",
@@ -881,9 +881,7 @@ void __noreturn do_exit(long code)
881 */ 881 */
882 flush_ptrace_hw_breakpoint(tsk); 882 flush_ptrace_hw_breakpoint(tsk);
883 883
884 TASKS_RCU(preempt_disable()); 884 exit_tasks_rcu_start();
885 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
886 TASKS_RCU(preempt_enable());
887 exit_notify(tsk, group_dead); 885 exit_notify(tsk, group_dead);
888 proc_exit_connector(tsk); 886 proc_exit_connector(tsk);
889 mpol_put_task_policy(tsk); 887 mpol_put_task_policy(tsk);
@@ -918,7 +916,7 @@ void __noreturn do_exit(long code)
918 if (tsk->nr_dirtied) 916 if (tsk->nr_dirtied)
919 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 917 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
920 exit_rcu(); 918 exit_rcu();
921 TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); 919 exit_tasks_rcu_finish();
922 920
923 do_task_dead(); 921 do_task_dead();
924} 922}
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index fd24153e8a48..294294c71ba4 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -268,123 +268,6 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
268#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath 268#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
269#endif 269#endif
270 270
271/*
272 * Various notes on spin_is_locked() and spin_unlock_wait(), which are
273 * 'interesting' functions:
274 *
275 * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
276 * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
277 * PPC). Also qspinlock has a similar issue per construction, the setting of
278 * the locked byte can be unordered acquiring the lock proper.
279 *
280 * This gets to be 'interesting' in the following cases, where the /should/s
281 * end up false because of this issue.
282 *
283 *
284 * CASE 1:
285 *
286 * So the spin_is_locked() correctness issue comes from something like:
287 *
288 * CPU0 CPU1
289 *
290 * global_lock(); local_lock(i)
291 * spin_lock(&G) spin_lock(&L[i])
292 * for (i) if (!spin_is_locked(&G)) {
293 * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep();
294 * return;
295 * }
296 * // deal with fail
297 *
298 * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
299 * that there is exclusion between the two critical sections.
300 *
301 * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
302 * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
303 * /should/ be constrained by the ACQUIRE from spin_lock(&G).
304 *
305 * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
306 *
307 *
308 * CASE 2:
309 *
310 * For spin_unlock_wait() there is a second correctness issue, namely:
311 *
312 * CPU0 CPU1
313 *
314 * flag = set;
315 * smp_mb(); spin_lock(&l)
316 * spin_unlock_wait(&l); if (!flag)
317 * // add to lockless list
318 * spin_unlock(&l);
319 * // iterate lockless list
320 *
321 * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
322 * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
323 * semantics etc..)
324 *
325 * Where flag /should/ be ordered against the locked store of l.
326 */
327
328/*
329 * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
330 * issuing an _unordered_ store to set _Q_LOCKED_VAL.
331 *
332 * This means that the store can be delayed, but no later than the
333 * store-release from the unlock. This means that simply observing
334 * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired.
335 *
336 * There are two paths that can issue the unordered store:
337 *
338 * (1) clear_pending_set_locked(): *,1,0 -> *,0,1
339 *
340 * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0
341 * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1
342 *
343 * However, in both cases we have other !0 state we've set before to queue
344 * ourseves:
345 *
346 * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our
347 * load is constrained by that ACQUIRE to not pass before that, and thus must
348 * observe the store.
349 *
350 * For (2) we have a more intersting scenario. We enqueue ourselves using
351 * xchg_tail(), which ends up being a RELEASE. This in itself is not
352 * sufficient, however that is followed by an smp_cond_acquire() on the same
353 * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and
354 * guarantees we must observe that store.
355 *
356 * Therefore both cases have other !0 state that is observable before the
357 * unordered locked byte store comes through. This means we can use that to
358 * wait for the lock store, and then wait for an unlock.
359 */
360#ifndef queued_spin_unlock_wait
361void queued_spin_unlock_wait(struct qspinlock *lock)
362{
363 u32 val;
364
365 for (;;) {
366 val = atomic_read(&lock->val);
367
368 if (!val) /* not locked, we're done */
369 goto done;
370
371 if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */
372 break;
373
374 /* not locked, but pending, wait until we observe the lock */
375 cpu_relax();
376 }
377
378 /* any unlock is good */
379 while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
380 cpu_relax();
381
382done:
383 smp_acquire__after_ctrl_dep();
384}
385EXPORT_SYMBOL(queued_spin_unlock_wait);
386#endif
387
388#endif /* _GEN_PV_LOCK_SLOWPATH */ 271#endif /* _GEN_PV_LOCK_SLOWPATH */
389 272
390/** 273/**
diff --git a/kernel/membarrier.c b/kernel/membarrier.c
deleted file mode 100644
index 9f9284f37f8d..000000000000
--- a/kernel/membarrier.c
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * membarrier system call
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/syscalls.h>
18#include <linux/membarrier.h>
19#include <linux/tick.h>
20
21/*
22 * Bitmask made from a "or" of all commands within enum membarrier_cmd,
23 * except MEMBARRIER_CMD_QUERY.
24 */
25#define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED)
26
27/**
28 * sys_membarrier - issue memory barriers on a set of threads
29 * @cmd: Takes command values defined in enum membarrier_cmd.
30 * @flags: Currently needs to be 0. For future extensions.
31 *
32 * If this system call is not implemented, -ENOSYS is returned. If the
33 * command specified does not exist, or if the command argument is invalid,
34 * this system call returns -EINVAL. For a given command, with flags argument
35 * set to 0, this system call is guaranteed to always return the same value
36 * until reboot.
37 *
38 * All memory accesses performed in program order from each targeted thread
39 * is guaranteed to be ordered with respect to sys_membarrier(). If we use
40 * the semantic "barrier()" to represent a compiler barrier forcing memory
41 * accesses to be performed in program order across the barrier, and
42 * smp_mb() to represent explicit memory barriers forcing full memory
43 * ordering across the barrier, we have the following ordering table for
44 * each pair of barrier(), sys_membarrier() and smp_mb():
45 *
46 * The pair ordering is detailed as (O: ordered, X: not ordered):
47 *
48 * barrier() smp_mb() sys_membarrier()
49 * barrier() X X O
50 * smp_mb() X O O
51 * sys_membarrier() O O O
52 */
53SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
54{
55 /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
56 if (tick_nohz_full_enabled())
57 return -ENOSYS;
58 if (unlikely(flags))
59 return -EINVAL;
60 switch (cmd) {
61 case MEMBARRIER_CMD_QUERY:
62 return MEMBARRIER_CMD_BITMASK;
63 case MEMBARRIER_CMD_SHARED:
64 if (num_online_cpus() > 1)
65 synchronize_sched();
66 return 0;
67 default:
68 return -EINVAL;
69 }
70}
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index be90c945063f..9210379c0353 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -69,8 +69,7 @@ config TREE_SRCU
69 This option selects the full-fledged version of SRCU. 69 This option selects the full-fledged version of SRCU.
70 70
71config TASKS_RCU 71config TASKS_RCU
72 bool 72 def_bool PREEMPT
73 default n
74 select SRCU 73 select SRCU
75 help 74 help
76 This option enables a task-based RCU implementation that uses 75 This option enables a task-based RCU implementation that uses
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 808b8c85f626..e4b43fef89f5 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -356,22 +356,10 @@ do { \
356 356
357#ifdef CONFIG_TINY_RCU 357#ifdef CONFIG_TINY_RCU
358/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ 358/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
359static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ 359static inline bool rcu_gp_is_normal(void) { return true; }
360{ 360static inline bool rcu_gp_is_expedited(void) { return false; }
361 return true; 361static inline void rcu_expedite_gp(void) { }
362} 362static inline void rcu_unexpedite_gp(void) { }
363static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */
364{
365 return false;
366}
367
368static inline void rcu_expedite_gp(void)
369{
370}
371
372static inline void rcu_unexpedite_gp(void)
373{
374}
375#else /* #ifdef CONFIG_TINY_RCU */ 363#else /* #ifdef CONFIG_TINY_RCU */
376bool rcu_gp_is_normal(void); /* Internal RCU use. */ 364bool rcu_gp_is_normal(void); /* Internal RCU use. */
377bool rcu_gp_is_expedited(void); /* Internal RCU use. */ 365bool rcu_gp_is_expedited(void); /* Internal RCU use. */
@@ -419,12 +407,8 @@ static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
419 *gpnum = 0; 407 *gpnum = 0;
420 *completed = 0; 408 *completed = 0;
421} 409}
422static inline void rcutorture_record_test_transition(void) 410static inline void rcutorture_record_test_transition(void) { }
423{ 411static inline void rcutorture_record_progress(unsigned long vernum) { }
424}
425static inline void rcutorture_record_progress(unsigned long vernum)
426{
427}
428#ifdef CONFIG_RCU_TRACE 412#ifdef CONFIG_RCU_TRACE
429void do_trace_rcu_torture_read(const char *rcutorturename, 413void do_trace_rcu_torture_read(const char *rcutorturename,
430 struct rcu_head *rhp, 414 struct rcu_head *rhp,
@@ -460,92 +444,20 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
460#endif 444#endif
461 445
462#ifdef CONFIG_TINY_RCU 446#ifdef CONFIG_TINY_RCU
463 447static inline unsigned long rcu_batches_started(void) { return 0; }
464/* 448static inline unsigned long rcu_batches_started_bh(void) { return 0; }
465 * Return the number of grace periods started. 449static inline unsigned long rcu_batches_started_sched(void) { return 0; }
466 */ 450static inline unsigned long rcu_batches_completed(void) { return 0; }
467static inline unsigned long rcu_batches_started(void) 451static inline unsigned long rcu_batches_completed_bh(void) { return 0; }
468{ 452static inline unsigned long rcu_batches_completed_sched(void) { return 0; }
469 return 0; 453static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
470} 454static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; }
471 455static inline unsigned long
472/* 456srcu_batches_completed(struct srcu_struct *sp) { return 0; }
473 * Return the number of bottom-half grace periods started. 457static inline void rcu_force_quiescent_state(void) { }
474 */ 458static inline void rcu_bh_force_quiescent_state(void) { }
475static inline unsigned long rcu_batches_started_bh(void) 459static inline void rcu_sched_force_quiescent_state(void) { }
476{ 460static inline void show_rcu_gp_kthreads(void) { }
477 return 0;
478}
479
480/*
481 * Return the number of sched grace periods started.
482 */
483static inline unsigned long rcu_batches_started_sched(void)
484{
485 return 0;
486}
487
488/*
489 * Return the number of grace periods completed.
490 */
491static inline unsigned long rcu_batches_completed(void)
492{
493 return 0;
494}
495
496/*
497 * Return the number of bottom-half grace periods completed.
498 */
499static inline unsigned long rcu_batches_completed_bh(void)
500{
501 return 0;
502}
503
504/*
505 * Return the number of sched grace periods completed.
506 */
507static inline unsigned long rcu_batches_completed_sched(void)
508{
509 return 0;
510}
511
512/*
513 * Return the number of expedited grace periods completed.
514 */
515static inline unsigned long rcu_exp_batches_completed(void)
516{
517 return 0;
518}
519
520/*
521 * Return the number of expedited sched grace periods completed.
522 */
523static inline unsigned long rcu_exp_batches_completed_sched(void)
524{
525 return 0;
526}
527
528static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
529{
530 return 0;
531}
532
533static inline void rcu_force_quiescent_state(void)
534{
535}
536
537static inline void rcu_bh_force_quiescent_state(void)
538{
539}
540
541static inline void rcu_sched_force_quiescent_state(void)
542{
543}
544
545static inline void show_rcu_gp_kthreads(void)
546{
547}
548
549#else /* #ifdef CONFIG_TINY_RCU */ 461#else /* #ifdef CONFIG_TINY_RCU */
550extern unsigned long rcutorture_testseq; 462extern unsigned long rcutorture_testseq;
551extern unsigned long rcutorture_vernum; 463extern unsigned long rcutorture_vernum;
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 2b62a38b080f..7649fcd2c4c7 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -36,24 +36,6 @@ void rcu_cblist_init(struct rcu_cblist *rclp)
36} 36}
37 37
38/* 38/*
39 * Debug function to actually count the number of callbacks.
40 * If the number exceeds the limit specified, return -1.
41 */
42long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
43{
44 int cnt = 0;
45 struct rcu_head **rhpp = &rclp->head;
46
47 for (;;) {
48 if (!*rhpp)
49 return cnt;
50 if (++cnt > lim)
51 return -1;
52 rhpp = &(*rhpp)->next;
53 }
54}
55
56/*
57 * Dequeue the oldest rcu_head structure from the specified callback 39 * Dequeue the oldest rcu_head structure from the specified callback
58 * list. This function assumes that the callback is non-lazy, but 40 * list. This function assumes that the callback is non-lazy, but
59 * the caller can later invoke rcu_cblist_dequeued_lazy() if it 41 * the caller can later invoke rcu_cblist_dequeued_lazy() if it
@@ -103,17 +85,6 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
103} 85}
104 86
105/* 87/*
106 * Is the specified segment of the specified rcu_segcblist structure
107 * empty of callbacks?
108 */
109bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
110{
111 if (seg == RCU_DONE_TAIL)
112 return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
113 return rsclp->tails[seg - 1] == rsclp->tails[seg];
114}
115
116/*
117 * Does the specified rcu_segcblist structure contain callbacks that 88 * Does the specified rcu_segcblist structure contain callbacks that
118 * are ready to be invoked? 89 * are ready to be invoked?
119 */ 90 */
@@ -134,50 +105,6 @@ bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
134} 105}
135 106
136/* 107/*
137 * Dequeue and return the first ready-to-invoke callback. If there
138 * are no ready-to-invoke callbacks, return NULL. Disables interrupts
139 * to avoid interference. Does not protect from interference from other
140 * CPUs or tasks.
141 */
142struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp)
143{
144 unsigned long flags;
145 int i;
146 struct rcu_head *rhp;
147
148 local_irq_save(flags);
149 if (!rcu_segcblist_ready_cbs(rsclp)) {
150 local_irq_restore(flags);
151 return NULL;
152 }
153 rhp = rsclp->head;
154 BUG_ON(!rhp);
155 rsclp->head = rhp->next;
156 for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) {
157 if (rsclp->tails[i] != &rhp->next)
158 break;
159 rsclp->tails[i] = &rsclp->head;
160 }
161 smp_mb(); /* Dequeue before decrement for rcu_barrier(). */
162 WRITE_ONCE(rsclp->len, rsclp->len - 1);
163 local_irq_restore(flags);
164 return rhp;
165}
166
167/*
168 * Account for the fact that a previously dequeued callback turned out
169 * to be marked as lazy.
170 */
171void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp)
172{
173 unsigned long flags;
174
175 local_irq_save(flags);
176 rsclp->len_lazy--;
177 local_irq_restore(flags);
178}
179
180/*
181 * Return a pointer to the first callback in the specified rcu_segcblist 108 * Return a pointer to the first callback in the specified rcu_segcblist
182 * structure. This is useful for diagnostics. 109 * structure. This is useful for diagnostics.
183 */ 110 */
@@ -203,17 +130,6 @@ struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
203} 130}
204 131
205/* 132/*
206 * Does the specified rcu_segcblist structure contain callbacks that
207 * have not yet been processed beyond having been posted, that is,
208 * does it contain callbacks in its last segment?
209 */
210bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
211{
212 return rcu_segcblist_is_enabled(rsclp) &&
213 !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
214}
215
216/*
217 * Enqueue the specified callback onto the specified rcu_segcblist 133 * Enqueue the specified callback onto the specified rcu_segcblist
218 * structure, updating accounting as needed. Note that the ->len 134 * structure, updating accounting as needed. Note that the ->len
219 * field may be accessed locklessly, hence the WRITE_ONCE(). 135 * field may be accessed locklessly, hence the WRITE_ONCE().
@@ -503,3 +419,27 @@ bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
503 return true; 419 return true;
504 return false; 420 return false;
505} 421}
422
423/*
424 * Merge the source rcu_segcblist structure into the destination
425 * rcu_segcblist structure, then initialize the source. Any pending
426 * callbacks from the source get to start over. It is best to
427 * advance and accelerate both the destination and the source
428 * before merging.
429 */
430void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
431 struct rcu_segcblist *src_rsclp)
432{
433 struct rcu_cblist donecbs;
434 struct rcu_cblist pendcbs;
435
436 rcu_cblist_init(&donecbs);
437 rcu_cblist_init(&pendcbs);
438 rcu_segcblist_extract_count(src_rsclp, &donecbs);
439 rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs);
440 rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs);
441 rcu_segcblist_insert_count(dst_rsclp, &donecbs);
442 rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs);
443 rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs);
444 rcu_segcblist_init(src_rsclp);
445}
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 6e36e36478cd..581c12b63544 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -31,29 +31,7 @@ static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
31 rclp->len_lazy--; 31 rclp->len_lazy--;
32} 32}
33 33
34/*
35 * Interim function to return rcu_cblist head pointer. Longer term, the
36 * rcu_cblist will be used more pervasively, removing the need for this
37 * function.
38 */
39static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
40{
41 return rclp->head;
42}
43
44/*
45 * Interim function to return rcu_cblist head pointer. Longer term, the
46 * rcu_cblist will be used more pervasively, removing the need for this
47 * function.
48 */
49static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
50{
51 WARN_ON_ONCE(!rclp->head);
52 return rclp->tail;
53}
54
55void rcu_cblist_init(struct rcu_cblist *rclp); 34void rcu_cblist_init(struct rcu_cblist *rclp);
56long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim);
57struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp); 35struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp);
58 36
59/* 37/*
@@ -134,14 +112,10 @@ static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
134 112
135void rcu_segcblist_init(struct rcu_segcblist *rsclp); 113void rcu_segcblist_init(struct rcu_segcblist *rsclp);
136void rcu_segcblist_disable(struct rcu_segcblist *rsclp); 114void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
137bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg);
138bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp); 115bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
139bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp); 116bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
140struct rcu_head *rcu_segcblist_dequeue(struct rcu_segcblist *rsclp);
141void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp);
142struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp); 117struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
143struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp); 118struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp);
144bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp);
145void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, 119void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
146 struct rcu_head *rhp, bool lazy); 120 struct rcu_head *rhp, bool lazy);
147bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, 121bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
@@ -162,3 +136,5 @@ void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
162bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq); 136bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
163bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, 137bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
164 unsigned long seq); 138 unsigned long seq);
139void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
140 struct rcu_segcblist *src_rsclp);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 3cc18110b612..1f87a02c3399 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -317,8 +317,6 @@ static struct rcu_perf_ops sched_ops = {
317 .name = "sched" 317 .name = "sched"
318}; 318};
319 319
320#ifdef CONFIG_TASKS_RCU
321
322/* 320/*
323 * Definitions for RCU-tasks perf testing. 321 * Definitions for RCU-tasks perf testing.
324 */ 322 */
@@ -346,24 +344,11 @@ static struct rcu_perf_ops tasks_ops = {
346 .name = "tasks" 344 .name = "tasks"
347}; 345};
348 346
349#define RCUPERF_TASKS_OPS &tasks_ops,
350
351static bool __maybe_unused torturing_tasks(void) 347static bool __maybe_unused torturing_tasks(void)
352{ 348{
353 return cur_ops == &tasks_ops; 349 return cur_ops == &tasks_ops;
354} 350}
355 351
356#else /* #ifdef CONFIG_TASKS_RCU */
357
358#define RCUPERF_TASKS_OPS
359
360static bool __maybe_unused torturing_tasks(void)
361{
362 return false;
363}
364
365#endif /* #else #ifdef CONFIG_TASKS_RCU */
366
367/* 352/*
368 * If performance tests complete, wait for shutdown to commence. 353 * If performance tests complete, wait for shutdown to commence.
369 */ 354 */
@@ -658,7 +643,7 @@ rcu_perf_init(void)
658 int firsterr = 0; 643 int firsterr = 0;
659 static struct rcu_perf_ops *perf_ops[] = { 644 static struct rcu_perf_ops *perf_ops[] = {
660 &rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops, 645 &rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops,
661 RCUPERF_TASKS_OPS 646 &tasks_ops,
662 }; 647 };
663 648
664 if (!torture_init_begin(perf_type, verbose, &perf_runnable)) 649 if (!torture_init_begin(perf_type, verbose, &perf_runnable))
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index b8f7f8ce8575..45f2ffbc1e78 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -199,7 +199,8 @@ MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
199static u64 notrace rcu_trace_clock_local(void) 199static u64 notrace rcu_trace_clock_local(void)
200{ 200{
201 u64 ts = trace_clock_local(); 201 u64 ts = trace_clock_local();
202 unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC); 202
203 (void)do_div(ts, NSEC_PER_USEC);
203 return ts; 204 return ts;
204} 205}
205#else /* #ifdef CONFIG_RCU_TRACE */ 206#else /* #ifdef CONFIG_RCU_TRACE */
@@ -496,7 +497,7 @@ static struct rcu_torture_ops rcu_busted_ops = {
496 .fqs = NULL, 497 .fqs = NULL,
497 .stats = NULL, 498 .stats = NULL,
498 .irq_capable = 1, 499 .irq_capable = 1,
499 .name = "rcu_busted" 500 .name = "busted"
500}; 501};
501 502
502/* 503/*
@@ -522,7 +523,7 @@ static void srcu_read_delay(struct torture_random_state *rrsp)
522 523
523 delay = torture_random(rrsp) % 524 delay = torture_random(rrsp) %
524 (nrealreaders * 2 * longdelay * uspertick); 525 (nrealreaders * 2 * longdelay * uspertick);
525 if (!delay) 526 if (!delay && in_task())
526 schedule_timeout_interruptible(longdelay); 527 schedule_timeout_interruptible(longdelay);
527 else 528 else
528 rcu_read_delay(rrsp); 529 rcu_read_delay(rrsp);
@@ -561,44 +562,7 @@ static void srcu_torture_barrier(void)
561 562
562static void srcu_torture_stats(void) 563static void srcu_torture_stats(void)
563{ 564{
564 int __maybe_unused cpu; 565 srcu_torture_stats_print(srcu_ctlp, torture_type, TORTURE_FLAG);
565 int idx;
566
567#ifdef CONFIG_TREE_SRCU
568 idx = srcu_ctlp->srcu_idx & 0x1;
569 pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
570 torture_type, TORTURE_FLAG, idx);
571 for_each_possible_cpu(cpu) {
572 unsigned long l0, l1;
573 unsigned long u0, u1;
574 long c0, c1;
575 struct srcu_data *counts;
576
577 counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
578 u0 = counts->srcu_unlock_count[!idx];
579 u1 = counts->srcu_unlock_count[idx];
580
581 /*
582 * Make sure that a lock is always counted if the corresponding
583 * unlock is counted.
584 */
585 smp_rmb();
586
587 l0 = counts->srcu_lock_count[!idx];
588 l1 = counts->srcu_lock_count[idx];
589
590 c0 = l0 - u0;
591 c1 = l1 - u1;
592 pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
593 }
594 pr_cont("\n");
595#elif defined(CONFIG_TINY_SRCU)
596 idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
597 pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
598 torture_type, TORTURE_FLAG, idx,
599 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
600 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));
601#endif
602} 566}
603 567
604static void srcu_torture_synchronize_expedited(void) 568static void srcu_torture_synchronize_expedited(void)
@@ -620,6 +584,7 @@ static struct rcu_torture_ops srcu_ops = {
620 .call = srcu_torture_call, 584 .call = srcu_torture_call,
621 .cb_barrier = srcu_torture_barrier, 585 .cb_barrier = srcu_torture_barrier,
622 .stats = srcu_torture_stats, 586 .stats = srcu_torture_stats,
587 .irq_capable = 1,
623 .name = "srcu" 588 .name = "srcu"
624}; 589};
625 590
@@ -652,6 +617,7 @@ static struct rcu_torture_ops srcud_ops = {
652 .call = srcu_torture_call, 617 .call = srcu_torture_call,
653 .cb_barrier = srcu_torture_barrier, 618 .cb_barrier = srcu_torture_barrier,
654 .stats = srcu_torture_stats, 619 .stats = srcu_torture_stats,
620 .irq_capable = 1,
655 .name = "srcud" 621 .name = "srcud"
656}; 622};
657 623
@@ -696,8 +662,6 @@ static struct rcu_torture_ops sched_ops = {
696 .name = "sched" 662 .name = "sched"
697}; 663};
698 664
699#ifdef CONFIG_TASKS_RCU
700
701/* 665/*
702 * Definitions for RCU-tasks torture testing. 666 * Definitions for RCU-tasks torture testing.
703 */ 667 */
@@ -735,24 +699,11 @@ static struct rcu_torture_ops tasks_ops = {
735 .name = "tasks" 699 .name = "tasks"
736}; 700};
737 701
738#define RCUTORTURE_TASKS_OPS &tasks_ops,
739
740static bool __maybe_unused torturing_tasks(void) 702static bool __maybe_unused torturing_tasks(void)
741{ 703{
742 return cur_ops == &tasks_ops; 704 return cur_ops == &tasks_ops;
743} 705}
744 706
745#else /* #ifdef CONFIG_TASKS_RCU */
746
747#define RCUTORTURE_TASKS_OPS
748
749static bool __maybe_unused torturing_tasks(void)
750{
751 return false;
752}
753
754#endif /* #else #ifdef CONFIG_TASKS_RCU */
755
756/* 707/*
757 * RCU torture priority-boost testing. Runs one real-time thread per 708 * RCU torture priority-boost testing. Runs one real-time thread per
758 * CPU for moderate bursts, repeatedly registering RCU callbacks and 709 * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -1114,6 +1065,11 @@ rcu_torture_fakewriter(void *arg)
1114 return 0; 1065 return 0;
1115} 1066}
1116 1067
1068static void rcu_torture_timer_cb(struct rcu_head *rhp)
1069{
1070 kfree(rhp);
1071}
1072
1117/* 1073/*
1118 * RCU torture reader from timer handler. Dereferences rcu_torture_current, 1074 * RCU torture reader from timer handler. Dereferences rcu_torture_current,
1119 * incrementing the corresponding element of the pipeline array. The 1075 * incrementing the corresponding element of the pipeline array. The
@@ -1176,6 +1132,14 @@ static void rcu_torture_timer(unsigned long unused)
1176 __this_cpu_inc(rcu_torture_batch[completed]); 1132 __this_cpu_inc(rcu_torture_batch[completed]);
1177 preempt_enable(); 1133 preempt_enable();
1178 cur_ops->readunlock(idx); 1134 cur_ops->readunlock(idx);
1135
1136 /* Test call_rcu() invocation from interrupt handler. */
1137 if (cur_ops->call) {
1138 struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_NOWAIT);
1139
1140 if (rhp)
1141 cur_ops->call(rhp, rcu_torture_timer_cb);
1142 }
1179} 1143}
1180 1144
1181/* 1145/*
@@ -1354,11 +1318,12 @@ rcu_torture_stats_print(void)
1354 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp, 1318 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
1355 &flags, &gpnum, &completed); 1319 &flags, &gpnum, &completed);
1356 wtp = READ_ONCE(writer_task); 1320 wtp = READ_ONCE(writer_task);
1357 pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n", 1321 pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx cpu %d\n",
1358 rcu_torture_writer_state_getname(), 1322 rcu_torture_writer_state_getname(),
1359 rcu_torture_writer_state, 1323 rcu_torture_writer_state,
1360 gpnum, completed, flags, 1324 gpnum, completed, flags,
1361 wtp == NULL ? ~0UL : wtp->state); 1325 wtp == NULL ? ~0UL : wtp->state,
1326 wtp == NULL ? -1 : (int)task_cpu(wtp));
1362 show_rcu_gp_kthreads(); 1327 show_rcu_gp_kthreads();
1363 rcu_ftrace_dump(DUMP_ALL); 1328 rcu_ftrace_dump(DUMP_ALL);
1364 } 1329 }
@@ -1749,7 +1714,7 @@ rcu_torture_init(void)
1749 int firsterr = 0; 1714 int firsterr = 0;
1750 static struct rcu_torture_ops *torture_ops[] = { 1715 static struct rcu_torture_ops *torture_ops[] = {
1751 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, 1716 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
1752 &sched_ops, RCUTORTURE_TASKS_OPS 1717 &sched_ops, &tasks_ops,
1753 }; 1718 };
1754 1719
1755 if (!torture_init_begin(torture_type, verbose, &torture_runnable)) 1720 if (!torture_init_begin(torture_type, verbose, &torture_runnable))
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 1a1c1047d2ed..76ac5f50b2c7 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -33,6 +33,8 @@
33#include "rcu_segcblist.h" 33#include "rcu_segcblist.h"
34#include "rcu.h" 34#include "rcu.h"
35 35
36int rcu_scheduler_active __read_mostly;
37
36static int init_srcu_struct_fields(struct srcu_struct *sp) 38static int init_srcu_struct_fields(struct srcu_struct *sp)
37{ 39{
38 sp->srcu_lock_nesting[0] = 0; 40 sp->srcu_lock_nesting[0] = 0;
@@ -193,3 +195,9 @@ void synchronize_srcu(struct srcu_struct *sp)
193 destroy_rcu_head_on_stack(&rs.head); 195 destroy_rcu_head_on_stack(&rs.head);
194} 196}
195EXPORT_SYMBOL_GPL(synchronize_srcu); 197EXPORT_SYMBOL_GPL(synchronize_srcu);
198
199/* Lockdep diagnostics. */
200void __init rcu_scheduler_starting(void)
201{
202 rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
203}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index d0ca524bf042..729a8706751d 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -51,6 +51,7 @@ module_param(counter_wrap_check, ulong, 0444);
51 51
52static void srcu_invoke_callbacks(struct work_struct *work); 52static void srcu_invoke_callbacks(struct work_struct *work);
53static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); 53static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
54static void process_srcu(struct work_struct *work);
54 55
55/* 56/*
56 * Initialize SRCU combining tree. Note that statically allocated 57 * Initialize SRCU combining tree. Note that statically allocated
@@ -896,6 +897,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
896 __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); 897 __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
897 wait_for_completion(&rcu.completion); 898 wait_for_completion(&rcu.completion);
898 destroy_rcu_head_on_stack(&rcu.head); 899 destroy_rcu_head_on_stack(&rcu.head);
900
901 /*
902 * Make sure that later code is ordered after the SRCU grace
903 * period. This pairs with the raw_spin_lock_irq_rcu_node()
904 * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
905 * because the current CPU might have been totally uninvolved with
906 * (and thus unordered against) that grace period.
907 */
908 smp_mb();
899} 909}
900 910
901/** 911/**
@@ -1194,7 +1204,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
1194/* 1204/*
1195 * This is the work-queue function that handles SRCU grace periods. 1205 * This is the work-queue function that handles SRCU grace periods.
1196 */ 1206 */
1197void process_srcu(struct work_struct *work) 1207static void process_srcu(struct work_struct *work)
1198{ 1208{
1199 struct srcu_struct *sp; 1209 struct srcu_struct *sp;
1200 1210
@@ -1203,7 +1213,6 @@ void process_srcu(struct work_struct *work)
1203 srcu_advance_state(sp); 1213 srcu_advance_state(sp);
1204 srcu_reschedule(sp, srcu_get_delay(sp)); 1214 srcu_reschedule(sp, srcu_get_delay(sp));
1205} 1215}
1206EXPORT_SYMBOL_GPL(process_srcu);
1207 1216
1208void srcutorture_get_gp_data(enum rcutorture_type test_type, 1217void srcutorture_get_gp_data(enum rcutorture_type test_type,
1209 struct srcu_struct *sp, int *flags, 1218 struct srcu_struct *sp, int *flags,
@@ -1217,6 +1226,43 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
1217} 1226}
1218EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); 1227EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1219 1228
1229void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf)
1230{
1231 int cpu;
1232 int idx;
1233 unsigned long s0 = 0, s1 = 0;
1234
1235 idx = sp->srcu_idx & 0x1;
1236 pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", tt, tf, idx);
1237 for_each_possible_cpu(cpu) {
1238 unsigned long l0, l1;
1239 unsigned long u0, u1;
1240 long c0, c1;
1241 struct srcu_data *counts;
1242
1243 counts = per_cpu_ptr(sp->sda, cpu);
1244 u0 = counts->srcu_unlock_count[!idx];
1245 u1 = counts->srcu_unlock_count[idx];
1246
1247 /*
1248 * Make sure that a lock is always counted if the corresponding
1249 * unlock is counted.
1250 */
1251 smp_rmb();
1252
1253 l0 = counts->srcu_lock_count[!idx];
1254 l1 = counts->srcu_lock_count[idx];
1255
1256 c0 = l0 - u0;
1257 c1 = l1 - u1;
1258 pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
1259 s0 += c0;
1260 s1 += c1;
1261 }
1262 pr_cont(" T(%ld,%ld)\n", s0, s1);
1263}
1264EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
1265
1220static int __init srcu_bootup_announce(void) 1266static int __init srcu_bootup_announce(void)
1221{ 1267{
1222 pr_info("Hierarchical SRCU implementation.\n"); 1268 pr_info("Hierarchical SRCU implementation.\n");
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index f8488965250f..a64eee0db39e 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -56,8 +56,6 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
56 .curtail = &rcu_bh_ctrlblk.rcucblist, 56 .curtail = &rcu_bh_ctrlblk.rcucblist,
57}; 57};
58 58
59#include "tiny_plugin.h"
60
61void rcu_barrier_bh(void) 59void rcu_barrier_bh(void)
62{ 60{
63 wait_rcu_gp(call_rcu_bh); 61 wait_rcu_gp(call_rcu_bh);
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
deleted file mode 100644
index f0a01b2a3062..000000000000
--- a/kernel/rcu/tiny_plugin.h
+++ /dev/null
@@ -1,47 +0,0 @@
1/*
2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
3 * Internal non-public definitions that provide either classic
4 * or preemptible semantics.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 * Copyright (c) 2010 Linaro
21 *
22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */
24
25#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
26#include <linux/kernel_stat.h>
27
28int rcu_scheduler_active __read_mostly;
29EXPORT_SYMBOL_GPL(rcu_scheduler_active);
30
31/*
32 * During boot, we forgive RCU lockdep issues. After this function is
33 * invoked, we start taking RCU lockdep issues seriously. Note that unlike
34 * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE
35 * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage.
36 * The reason for this is that Tiny RCU does not need kthreads, so does
37 * not have to care about the fact that the scheduler is half-initialized
38 * at a certain phase of the boot process. Unless SRCU is in the mix.
39 */
40void __init rcu_scheduler_starting(void)
41{
42 WARN_ON(nr_context_switches() > 0);
43 rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU)
44 ? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING;
45}
46
47#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9bb5dff50815..84fe96641b2e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -97,9 +97,6 @@ struct rcu_state sname##_state = { \
97 .gp_state = RCU_GP_IDLE, \ 97 .gp_state = RCU_GP_IDLE, \
98 .gpnum = 0UL - 300UL, \ 98 .gpnum = 0UL - 300UL, \
99 .completed = 0UL - 300UL, \ 99 .completed = 0UL - 300UL, \
100 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
101 .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
102 .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
103 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 100 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
104 .name = RCU_STATE_NAME(sname), \ 101 .name = RCU_STATE_NAME(sname), \
105 .abbr = sabbr, \ 102 .abbr = sabbr, \
@@ -843,13 +840,9 @@ static void rcu_eqs_enter(bool user)
843 */ 840 */
844void rcu_idle_enter(void) 841void rcu_idle_enter(void)
845{ 842{
846 unsigned long flags; 843 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!");
847
848 local_irq_save(flags);
849 rcu_eqs_enter(false); 844 rcu_eqs_enter(false);
850 local_irq_restore(flags);
851} 845}
852EXPORT_SYMBOL_GPL(rcu_idle_enter);
853 846
854#ifdef CONFIG_NO_HZ_FULL 847#ifdef CONFIG_NO_HZ_FULL
855/** 848/**
@@ -862,7 +855,8 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
862 */ 855 */
863void rcu_user_enter(void) 856void rcu_user_enter(void)
864{ 857{
865 rcu_eqs_enter(1); 858 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!");
859 rcu_eqs_enter(true);
866} 860}
867#endif /* CONFIG_NO_HZ_FULL */ 861#endif /* CONFIG_NO_HZ_FULL */
868 862
@@ -955,8 +949,10 @@ static void rcu_eqs_exit(bool user)
955 if (oldval & DYNTICK_TASK_NEST_MASK) { 949 if (oldval & DYNTICK_TASK_NEST_MASK) {
956 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; 950 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
957 } else { 951 } else {
952 __this_cpu_inc(disable_rcu_irq_enter);
958 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 953 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
959 rcu_eqs_exit_common(oldval, user); 954 rcu_eqs_exit_common(oldval, user);
955 __this_cpu_dec(disable_rcu_irq_enter);
960 } 956 }
961} 957}
962 958
@@ -979,7 +975,6 @@ void rcu_idle_exit(void)
979 rcu_eqs_exit(false); 975 rcu_eqs_exit(false);
980 local_irq_restore(flags); 976 local_irq_restore(flags);
981} 977}
982EXPORT_SYMBOL_GPL(rcu_idle_exit);
983 978
984#ifdef CONFIG_NO_HZ_FULL 979#ifdef CONFIG_NO_HZ_FULL
985/** 980/**
@@ -1358,12 +1353,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
1358 j = jiffies; 1353 j = jiffies;
1359 gpa = READ_ONCE(rsp->gp_activity); 1354 gpa = READ_ONCE(rsp->gp_activity);
1360 if (j - gpa > 2 * HZ) { 1355 if (j - gpa > 2 * HZ) {
1361 pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n", 1356 pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
1362 rsp->name, j - gpa, 1357 rsp->name, j - gpa,
1363 rsp->gpnum, rsp->completed, 1358 rsp->gpnum, rsp->completed,
1364 rsp->gp_flags, 1359 rsp->gp_flags,
1365 gp_state_getname(rsp->gp_state), rsp->gp_state, 1360 gp_state_getname(rsp->gp_state), rsp->gp_state,
1366 rsp->gp_kthread ? rsp->gp_kthread->state : ~0); 1361 rsp->gp_kthread ? rsp->gp_kthread->state : ~0,
1362 rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);
1367 if (rsp->gp_kthread) { 1363 if (rsp->gp_kthread) {
1368 sched_show_task(rsp->gp_kthread); 1364 sched_show_task(rsp->gp_kthread);
1369 wake_up_process(rsp->gp_kthread); 1365 wake_up_process(rsp->gp_kthread);
@@ -2067,8 +2063,8 @@ static bool rcu_gp_init(struct rcu_state *rsp)
2067} 2063}
2068 2064
2069/* 2065/*
2070 * Helper function for wait_event_interruptible_timeout() wakeup 2066 * Helper function for swait_event_idle() wakeup at force-quiescent-state
2071 * at force-quiescent-state time. 2067 * time.
2072 */ 2068 */
2073static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) 2069static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
2074{ 2070{
@@ -2206,9 +2202,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
2206 READ_ONCE(rsp->gpnum), 2202 READ_ONCE(rsp->gpnum),
2207 TPS("reqwait")); 2203 TPS("reqwait"));
2208 rsp->gp_state = RCU_GP_WAIT_GPS; 2204 rsp->gp_state = RCU_GP_WAIT_GPS;
2209 swait_event_interruptible(rsp->gp_wq, 2205 swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
2210 READ_ONCE(rsp->gp_flags) & 2206 RCU_GP_FLAG_INIT);
2211 RCU_GP_FLAG_INIT);
2212 rsp->gp_state = RCU_GP_DONE_GPS; 2207 rsp->gp_state = RCU_GP_DONE_GPS;
2213 /* Locking provides needed memory barrier. */ 2208 /* Locking provides needed memory barrier. */
2214 if (rcu_gp_init(rsp)) 2209 if (rcu_gp_init(rsp))
@@ -2239,7 +2234,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
2239 READ_ONCE(rsp->gpnum), 2234 READ_ONCE(rsp->gpnum),
2240 TPS("fqswait")); 2235 TPS("fqswait"));
2241 rsp->gp_state = RCU_GP_WAIT_FQS; 2236 rsp->gp_state = RCU_GP_WAIT_FQS;
2242 ret = swait_event_interruptible_timeout(rsp->gp_wq, 2237 ret = swait_event_idle_timeout(rsp->gp_wq,
2243 rcu_gp_fqs_check_wake(rsp, &gf), j); 2238 rcu_gp_fqs_check_wake(rsp, &gf), j);
2244 rsp->gp_state = RCU_GP_DOING_FQS; 2239 rsp->gp_state = RCU_GP_DOING_FQS;
2245 /* Locking provides needed memory barriers. */ 2240 /* Locking provides needed memory barriers. */
@@ -2409,6 +2404,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2409 return; 2404 return;
2410 } 2405 }
2411 WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ 2406 WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
2407 WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
2408 rcu_preempt_blocked_readers_cgp(rnp));
2412 rnp->qsmask &= ~mask; 2409 rnp->qsmask &= ~mask;
2413 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, 2410 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
2414 mask, rnp->qsmask, rnp->level, 2411 mask, rnp->qsmask, rnp->level,
@@ -3476,10 +3473,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
3476 struct rcu_state *rsp = rdp->rsp; 3473 struct rcu_state *rsp = rdp->rsp;
3477 3474
3478 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { 3475 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
3479 _rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence); 3476 _rcu_barrier_trace(rsp, TPS("LastCB"), -1,
3477 rsp->barrier_sequence);
3480 complete(&rsp->barrier_completion); 3478 complete(&rsp->barrier_completion);
3481 } else { 3479 } else {
3482 _rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence); 3480 _rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence);
3483 } 3481 }
3484} 3482}
3485 3483
@@ -3491,14 +3489,15 @@ static void rcu_barrier_func(void *type)
3491 struct rcu_state *rsp = type; 3489 struct rcu_state *rsp = type;
3492 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 3490 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
3493 3491
3494 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence); 3492 _rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence);
3495 rdp->barrier_head.func = rcu_barrier_callback; 3493 rdp->barrier_head.func = rcu_barrier_callback;
3496 debug_rcu_head_queue(&rdp->barrier_head); 3494 debug_rcu_head_queue(&rdp->barrier_head);
3497 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { 3495 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
3498 atomic_inc(&rsp->barrier_cpu_count); 3496 atomic_inc(&rsp->barrier_cpu_count);
3499 } else { 3497 } else {
3500 debug_rcu_head_unqueue(&rdp->barrier_head); 3498 debug_rcu_head_unqueue(&rdp->barrier_head);
3501 _rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence); 3499 _rcu_barrier_trace(rsp, TPS("IRQNQ"), -1,
3500 rsp->barrier_sequence);
3502 } 3501 }
3503} 3502}
3504 3503
@@ -3512,14 +3511,15 @@ static void _rcu_barrier(struct rcu_state *rsp)
3512 struct rcu_data *rdp; 3511 struct rcu_data *rdp;
3513 unsigned long s = rcu_seq_snap(&rsp->barrier_sequence); 3512 unsigned long s = rcu_seq_snap(&rsp->barrier_sequence);
3514 3513
3515 _rcu_barrier_trace(rsp, "Begin", -1, s); 3514 _rcu_barrier_trace(rsp, TPS("Begin"), -1, s);
3516 3515
3517 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 3516 /* Take mutex to serialize concurrent rcu_barrier() requests. */
3518 mutex_lock(&rsp->barrier_mutex); 3517 mutex_lock(&rsp->barrier_mutex);
3519 3518
3520 /* Did someone else do our work for us? */ 3519 /* Did someone else do our work for us? */
3521 if (rcu_seq_done(&rsp->barrier_sequence, s)) { 3520 if (rcu_seq_done(&rsp->barrier_sequence, s)) {
3522 _rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence); 3521 _rcu_barrier_trace(rsp, TPS("EarlyExit"), -1,
3522 rsp->barrier_sequence);
3523 smp_mb(); /* caller's subsequent code after above check. */ 3523 smp_mb(); /* caller's subsequent code after above check. */
3524 mutex_unlock(&rsp->barrier_mutex); 3524 mutex_unlock(&rsp->barrier_mutex);
3525 return; 3525 return;
@@ -3527,7 +3527,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3527 3527
3528 /* Mark the start of the barrier operation. */ 3528 /* Mark the start of the barrier operation. */
3529 rcu_seq_start(&rsp->barrier_sequence); 3529 rcu_seq_start(&rsp->barrier_sequence);
3530 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence); 3530 _rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence);
3531 3531
3532 /* 3532 /*
3533 * Initialize the count to one rather than to zero in order to 3533 * Initialize the count to one rather than to zero in order to
@@ -3550,10 +3550,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
3550 rdp = per_cpu_ptr(rsp->rda, cpu); 3550 rdp = per_cpu_ptr(rsp->rda, cpu);
3551 if (rcu_is_nocb_cpu(cpu)) { 3551 if (rcu_is_nocb_cpu(cpu)) {
3552 if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { 3552 if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) {
3553 _rcu_barrier_trace(rsp, "OfflineNoCB", cpu, 3553 _rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu,
3554 rsp->barrier_sequence); 3554 rsp->barrier_sequence);
3555 } else { 3555 } else {
3556 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, 3556 _rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu,
3557 rsp->barrier_sequence); 3557 rsp->barrier_sequence);
3558 smp_mb__before_atomic(); 3558 smp_mb__before_atomic();
3559 atomic_inc(&rsp->barrier_cpu_count); 3559 atomic_inc(&rsp->barrier_cpu_count);
@@ -3561,11 +3561,11 @@ static void _rcu_barrier(struct rcu_state *rsp)
3561 rcu_barrier_callback, rsp, cpu, 0); 3561 rcu_barrier_callback, rsp, cpu, 0);
3562 } 3562 }
3563 } else if (rcu_segcblist_n_cbs(&rdp->cblist)) { 3563 } else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
3564 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 3564 _rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu,
3565 rsp->barrier_sequence); 3565 rsp->barrier_sequence);
3566 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 3566 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
3567 } else { 3567 } else {
3568 _rcu_barrier_trace(rsp, "OnlineNQ", cpu, 3568 _rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu,
3569 rsp->barrier_sequence); 3569 rsp->barrier_sequence);
3570 } 3570 }
3571 } 3571 }
@@ -3582,7 +3582,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3582 wait_for_completion(&rsp->barrier_completion); 3582 wait_for_completion(&rsp->barrier_completion);
3583 3583
3584 /* Mark the end of the barrier operation. */ 3584 /* Mark the end of the barrier operation. */
3585 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence); 3585 _rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence);
3586 rcu_seq_end(&rsp->barrier_sequence); 3586 rcu_seq_end(&rsp->barrier_sequence);
3587 3587
3588 /* Other rcu_barrier() invocations can now safely proceed. */ 3588 /* Other rcu_barrier() invocations can now safely proceed. */
@@ -3684,8 +3684,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3684 */ 3684 */
3685 rnp = rdp->mynode; 3685 rnp = rdp->mynode;
3686 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 3686 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
3687 if (!rdp->beenonline)
3688 WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
3689 rdp->beenonline = true; /* We have now been online. */ 3687 rdp->beenonline = true; /* We have now been online. */
3690 rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ 3688 rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
3691 rdp->completed = rnp->completed; 3689 rdp->completed = rnp->completed;
@@ -3789,6 +3787,8 @@ void rcu_cpu_starting(unsigned int cpu)
3789{ 3787{
3790 unsigned long flags; 3788 unsigned long flags;
3791 unsigned long mask; 3789 unsigned long mask;
3790 int nbits;
3791 unsigned long oldmask;
3792 struct rcu_data *rdp; 3792 struct rcu_data *rdp;
3793 struct rcu_node *rnp; 3793 struct rcu_node *rnp;
3794 struct rcu_state *rsp; 3794 struct rcu_state *rsp;
@@ -3799,9 +3799,15 @@ void rcu_cpu_starting(unsigned int cpu)
3799 mask = rdp->grpmask; 3799 mask = rdp->grpmask;
3800 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3800 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3801 rnp->qsmaskinitnext |= mask; 3801 rnp->qsmaskinitnext |= mask;
3802 oldmask = rnp->expmaskinitnext;
3802 rnp->expmaskinitnext |= mask; 3803 rnp->expmaskinitnext |= mask;
3804 oldmask ^= rnp->expmaskinitnext;
3805 nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
3806 /* Allow lockless access for expedited grace periods. */
3807 smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
3803 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3808 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3804 } 3809 }
3810 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
3805} 3811}
3806 3812
3807#ifdef CONFIG_HOTPLUG_CPU 3813#ifdef CONFIG_HOTPLUG_CPU
@@ -3845,96 +3851,30 @@ void rcu_report_dead(unsigned int cpu)
3845 rcu_cleanup_dying_idle_cpu(cpu, rsp); 3851 rcu_cleanup_dying_idle_cpu(cpu, rsp);
3846} 3852}
3847 3853
3848/* 3854/* Migrate the dead CPU's callbacks to the current CPU. */
3849 * Send the specified CPU's RCU callbacks to the orphanage. The
3850 * specified CPU must be offline, and the caller must hold the
3851 * ->orphan_lock.
3852 */
3853static void
3854rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
3855 struct rcu_node *rnp, struct rcu_data *rdp)
3856{
3857 lockdep_assert_held(&rsp->orphan_lock);
3858
3859 /* No-CBs CPUs do not have orphanable callbacks. */
3860 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
3861 return;
3862
3863 /*
3864 * Orphan the callbacks. First adjust the counts. This is safe
3865 * because _rcu_barrier() excludes CPU-hotplug operations, so it
3866 * cannot be running now. Thus no memory barrier is required.
3867 */
3868 rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
3869 rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
3870
3871 /*
3872 * Next, move those callbacks still needing a grace period to
3873 * the orphanage, where some other CPU will pick them up.
3874 * Some of the callbacks might have gone partway through a grace
3875 * period, but that is too bad. They get to start over because we
3876 * cannot assume that grace periods are synchronized across CPUs.
3877 */
3878 rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
3879
3880 /*
3881 * Then move the ready-to-invoke callbacks to the orphanage,
3882 * where some other CPU will pick them up. These will not be
3883 * required to pass though another grace period: They are done.
3884 */
3885 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
3886
3887 /* Finally, disallow further callbacks on this CPU. */
3888 rcu_segcblist_disable(&rdp->cblist);
3889}
3890
3891/*
3892 * Adopt the RCU callbacks from the specified rcu_state structure's
3893 * orphanage. The caller must hold the ->orphan_lock.
3894 */
3895static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
3896{
3897 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
3898
3899 lockdep_assert_held(&rsp->orphan_lock);
3900
3901 /* No-CBs CPUs are handled specially. */
3902 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
3903 rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
3904 return;
3905
3906 /* Do the accounting first. */
3907 rdp->n_cbs_adopted += rsp->orphan_done.len;
3908 if (rsp->orphan_done.len_lazy != rsp->orphan_done.len)
3909 rcu_idle_count_callbacks_posted();
3910 rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
3911
3912 /*
3913 * We do not need a memory barrier here because the only way we
3914 * can get here if there is an rcu_barrier() in flight is if
3915 * we are the task doing the rcu_barrier().
3916 */
3917
3918 /* First adopt the ready-to-invoke callbacks, then the done ones. */
3919 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
3920 WARN_ON_ONCE(rsp->orphan_done.head);
3921 rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
3922 WARN_ON_ONCE(rsp->orphan_pend.head);
3923 WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
3924 !rcu_segcblist_n_cbs(&rdp->cblist));
3925}
3926
3927/* Orphan the dead CPU's callbacks, and then adopt them. */
3928static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) 3855static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
3929{ 3856{
3930 unsigned long flags; 3857 unsigned long flags;
3858 struct rcu_data *my_rdp;
3931 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3859 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
3932 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 3860 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
3933 3861
3934 raw_spin_lock_irqsave(&rsp->orphan_lock, flags); 3862 if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
3935 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 3863 return; /* No callbacks to migrate. */
3936 rcu_adopt_orphan_cbs(rsp, flags); 3864
3937 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); 3865 local_irq_save(flags);
3866 my_rdp = this_cpu_ptr(rsp->rda);
3867 if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {
3868 local_irq_restore(flags);
3869 return;
3870 }
3871 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
3872 rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
3873 rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
3874 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
3875 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
3876 !rcu_segcblist_n_cbs(&my_rdp->cblist));
3877 raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
3938 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || 3878 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
3939 !rcu_segcblist_empty(&rdp->cblist), 3879 !rcu_segcblist_empty(&rdp->cblist),
3940 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", 3880 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 9af0f31d6847..8e1f285f0a70 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -219,8 +219,6 @@ struct rcu_data {
219 /* qlen at last check for QS forcing */ 219 /* qlen at last check for QS forcing */
220 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 220 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
221 unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */ 221 unsigned long n_nocbs_invoked; /* count of no-CBs RCU cbs invoked. */
222 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
223 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
224 unsigned long n_force_qs_snap; 222 unsigned long n_force_qs_snap;
225 /* did other CPU force QS recently? */ 223 /* did other CPU force QS recently? */
226 long blimit; /* Upper limit on a processed batch */ 224 long blimit; /* Upper limit on a processed batch */
@@ -268,7 +266,9 @@ struct rcu_data {
268 struct rcu_head **nocb_follower_tail; 266 struct rcu_head **nocb_follower_tail;
269 struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */ 267 struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
270 struct task_struct *nocb_kthread; 268 struct task_struct *nocb_kthread;
269 raw_spinlock_t nocb_lock; /* Guard following pair of fields. */
271 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ 270 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
271 struct timer_list nocb_timer; /* Enforce finite deferral. */
272 272
273 /* The following fields are used by the leader, hence own cacheline. */ 273 /* The following fields are used by the leader, hence own cacheline. */
274 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; 274 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
@@ -350,15 +350,6 @@ struct rcu_state {
350 350
351 /* End of fields guarded by root rcu_node's lock. */ 351 /* End of fields guarded by root rcu_node's lock. */
352 352
353 raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
354 /* Protect following fields. */
355 struct rcu_cblist orphan_pend; /* Orphaned callbacks that */
356 /* need a grace period. */
357 struct rcu_cblist orphan_done; /* Orphaned callbacks that */
358 /* are ready to invoke. */
359 /* (Contains counts.) */
360 /* End of fields guarded by orphan_lock. */
361
362 struct mutex barrier_mutex; /* Guards barrier fields. */ 353 struct mutex barrier_mutex; /* Guards barrier fields. */
363 atomic_t barrier_cpu_count; /* # CPUs waiting on. */ 354 atomic_t barrier_cpu_count; /* # CPUs waiting on. */
364 struct completion barrier_completion; /* Wake at barrier end. */ 355 struct completion barrier_completion; /* Wake at barrier end. */
@@ -495,7 +486,7 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
495static void rcu_init_one_nocb(struct rcu_node *rnp); 486static void rcu_init_one_nocb(struct rcu_node *rnp);
496static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 487static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
497 bool lazy, unsigned long flags); 488 bool lazy, unsigned long flags);
498static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 489static bool rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
499 struct rcu_data *rdp, 490 struct rcu_data *rdp,
500 unsigned long flags); 491 unsigned long flags);
501static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); 492static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index dd21ca47e4b4..46d61b597731 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -73,7 +73,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
73 unsigned long flags; 73 unsigned long flags;
74 unsigned long mask; 74 unsigned long mask;
75 unsigned long oldmask; 75 unsigned long oldmask;
76 int ncpus = READ_ONCE(rsp->ncpus); 76 int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */
77 struct rcu_node *rnp; 77 struct rcu_node *rnp;
78 struct rcu_node *rnp_up; 78 struct rcu_node *rnp_up;
79 79
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 908b309d60d7..55bde94b9572 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -180,6 +180,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
180 struct task_struct *t = current; 180 struct task_struct *t = current;
181 181
182 lockdep_assert_held(&rnp->lock); 182 lockdep_assert_held(&rnp->lock);
183 WARN_ON_ONCE(rdp->mynode != rnp);
184 WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
183 185
184 /* 186 /*
185 * Decide where to queue the newly blocked task. In theory, 187 * Decide where to queue the newly blocked task. In theory,
@@ -261,6 +263,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
261 rnp->gp_tasks = &t->rcu_node_entry; 263 rnp->gp_tasks = &t->rcu_node_entry;
262 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) 264 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
263 rnp->exp_tasks = &t->rcu_node_entry; 265 rnp->exp_tasks = &t->rcu_node_entry;
266 WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
267 !(rnp->qsmask & rdp->grpmask));
268 WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
269 !(rnp->expmask & rdp->grpmask));
264 raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */ 270 raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
265 271
266 /* 272 /*
@@ -482,6 +488,7 @@ void rcu_read_unlock_special(struct task_struct *t)
482 rnp = t->rcu_blocked_node; 488 rnp = t->rcu_blocked_node;
483 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 489 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
484 WARN_ON_ONCE(rnp != t->rcu_blocked_node); 490 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
491 WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
485 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); 492 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
486 empty_exp = sync_rcu_preempt_exp_done(rnp); 493 empty_exp = sync_rcu_preempt_exp_done(rnp);
487 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 494 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -495,10 +502,10 @@ void rcu_read_unlock_special(struct task_struct *t)
495 if (&t->rcu_node_entry == rnp->exp_tasks) 502 if (&t->rcu_node_entry == rnp->exp_tasks)
496 rnp->exp_tasks = np; 503 rnp->exp_tasks = np;
497 if (IS_ENABLED(CONFIG_RCU_BOOST)) { 504 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
498 if (&t->rcu_node_entry == rnp->boost_tasks)
499 rnp->boost_tasks = np;
500 /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ 505 /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
501 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; 506 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
507 if (&t->rcu_node_entry == rnp->boost_tasks)
508 rnp->boost_tasks = np;
502 } 509 }
503 510
504 /* 511 /*
@@ -636,10 +643,17 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
636 */ 643 */
637static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 644static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
638{ 645{
646 struct task_struct *t;
647
639 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); 648 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
640 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 649 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
641 if (rcu_preempt_has_tasks(rnp)) 650 if (rcu_preempt_has_tasks(rnp)) {
642 rnp->gp_tasks = rnp->blkd_tasks.next; 651 rnp->gp_tasks = rnp->blkd_tasks.next;
652 t = container_of(rnp->gp_tasks, struct task_struct,
653 rcu_node_entry);
654 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
655 rnp->gpnum, t->pid);
656 }
643 WARN_ON_ONCE(rnp->qsmask); 657 WARN_ON_ONCE(rnp->qsmask);
644} 658}
645 659
@@ -1788,23 +1802,62 @@ bool rcu_is_nocb_cpu(int cpu)
1788} 1802}
1789 1803
1790/* 1804/*
1791 * Kick the leader kthread for this NOCB group. 1805 * Kick the leader kthread for this NOCB group. Caller holds ->nocb_lock
1806 * and this function releases it.
1792 */ 1807 */
1793static void wake_nocb_leader(struct rcu_data *rdp, bool force) 1808static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
1809 unsigned long flags)
1810 __releases(rdp->nocb_lock)
1794{ 1811{
1795 struct rcu_data *rdp_leader = rdp->nocb_leader; 1812 struct rcu_data *rdp_leader = rdp->nocb_leader;
1796 1813
1797 if (!READ_ONCE(rdp_leader->nocb_kthread)) 1814 lockdep_assert_held(&rdp->nocb_lock);
1815 if (!READ_ONCE(rdp_leader->nocb_kthread)) {
1816 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1798 return; 1817 return;
1799 if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) { 1818 }
1819 if (rdp_leader->nocb_leader_sleep || force) {
1800 /* Prior smp_mb__after_atomic() orders against prior enqueue. */ 1820 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
1801 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); 1821 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
1822 del_timer(&rdp->nocb_timer);
1823 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1802 smp_mb(); /* ->nocb_leader_sleep before swake_up(). */ 1824 smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
1803 swake_up(&rdp_leader->nocb_wq); 1825 swake_up(&rdp_leader->nocb_wq);
1826 } else {
1827 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1804 } 1828 }
1805} 1829}
1806 1830
1807/* 1831/*
1832 * Kick the leader kthread for this NOCB group, but caller has not
1833 * acquired locks.
1834 */
1835static void wake_nocb_leader(struct rcu_data *rdp, bool force)
1836{
1837 unsigned long flags;
1838
1839 raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
1840 __wake_nocb_leader(rdp, force, flags);
1841}
1842
1843/*
1844 * Arrange to wake the leader kthread for this NOCB group at some
1845 * future time when it is safe to do so.
1846 */
1847static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
1848 const char *reason)
1849{
1850 unsigned long flags;
1851
1852 raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
1853 if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
1854 mod_timer(&rdp->nocb_timer, jiffies + 1);
1855 WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
1856 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, reason);
1857 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1858}
1859
1860/*
1808 * Does the specified CPU need an RCU callback for the specified flavor 1861 * Does the specified CPU need an RCU callback for the specified flavor
1809 * of rcu_barrier()? 1862 * of rcu_barrier()?
1810 */ 1863 */
@@ -1891,11 +1944,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
1891 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1944 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1892 TPS("WakeEmpty")); 1945 TPS("WakeEmpty"));
1893 } else { 1946 } else {
1894 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE); 1947 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
1895 /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ 1948 TPS("WakeEmptyIsDeferred"));
1896 smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
1897 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1898 TPS("WakeEmptyIsDeferred"));
1899 } 1949 }
1900 rdp->qlen_last_fqs_check = 0; 1950 rdp->qlen_last_fqs_check = 0;
1901 } else if (len > rdp->qlen_last_fqs_check + qhimark) { 1951 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
@@ -1905,11 +1955,8 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
1905 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1955 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1906 TPS("WakeOvf")); 1956 TPS("WakeOvf"));
1907 } else { 1957 } else {
1908 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE); 1958 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
1909 /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ 1959 TPS("WakeOvfIsDeferred"));
1910 smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
1911 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1912 TPS("WakeOvfIsDeferred"));
1913 } 1960 }
1914 rdp->qlen_last_fqs_check = LONG_MAX / 2; 1961 rdp->qlen_last_fqs_check = LONG_MAX / 2;
1915 } else { 1962 } else {
@@ -1961,30 +2008,19 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
1961 * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is 2008 * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
1962 * not a no-CBs CPU. 2009 * not a no-CBs CPU.
1963 */ 2010 */
1964static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2011static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
1965 struct rcu_data *rdp, 2012 struct rcu_data *rdp,
1966 unsigned long flags) 2013 unsigned long flags)
1967{ 2014{
1968 long ql = rsp->orphan_done.len; 2015 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_nocb_adopt_orphan_cbs() invoked with irqs enabled!!!");
1969 long qll = rsp->orphan_done.len_lazy;
1970
1971 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
1972 if (!rcu_is_nocb_cpu(smp_processor_id())) 2016 if (!rcu_is_nocb_cpu(smp_processor_id()))
1973 return false; 2017 return false; /* Not NOCBs CPU, caller must migrate CBs. */
1974 2018 __call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist),
1975 /* First, enqueue the donelist, if any. This preserves CB ordering. */ 2019 rcu_segcblist_tail(&rdp->cblist),
1976 if (rsp->orphan_done.head) { 2020 rcu_segcblist_n_cbs(&rdp->cblist),
1977 __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done), 2021 rcu_segcblist_n_lazy_cbs(&rdp->cblist), flags);
1978 rcu_cblist_tail(&rsp->orphan_done), 2022 rcu_segcblist_init(&rdp->cblist);
1979 ql, qll, flags); 2023 rcu_segcblist_disable(&rdp->cblist);
1980 }
1981 if (rsp->orphan_pend.head) {
1982 __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend),
1983 rcu_cblist_tail(&rsp->orphan_pend),
1984 ql, qll, flags);
1985 }
1986 rcu_cblist_init(&rsp->orphan_done);
1987 rcu_cblist_init(&rsp->orphan_pend);
1988 return true; 2024 return true;
1989} 2025}
1990 2026
@@ -2031,6 +2067,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2031static void nocb_leader_wait(struct rcu_data *my_rdp) 2067static void nocb_leader_wait(struct rcu_data *my_rdp)
2032{ 2068{
2033 bool firsttime = true; 2069 bool firsttime = true;
2070 unsigned long flags;
2034 bool gotcbs; 2071 bool gotcbs;
2035 struct rcu_data *rdp; 2072 struct rcu_data *rdp;
2036 struct rcu_head **tail; 2073 struct rcu_head **tail;
@@ -2039,13 +2076,17 @@ wait_again:
2039 2076
2040 /* Wait for callbacks to appear. */ 2077 /* Wait for callbacks to appear. */
2041 if (!rcu_nocb_poll) { 2078 if (!rcu_nocb_poll) {
2042 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); 2079 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
2043 swait_event_interruptible(my_rdp->nocb_wq, 2080 swait_event_interruptible(my_rdp->nocb_wq,
2044 !READ_ONCE(my_rdp->nocb_leader_sleep)); 2081 !READ_ONCE(my_rdp->nocb_leader_sleep));
2045 /* Memory barrier handled by smp_mb() calls below and repoll. */ 2082 raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
2083 my_rdp->nocb_leader_sleep = true;
2084 WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
2085 del_timer(&my_rdp->nocb_timer);
2086 raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
2046 } else if (firsttime) { 2087 } else if (firsttime) {
2047 firsttime = false; /* Don't drown trace log with "Poll"! */ 2088 firsttime = false; /* Don't drown trace log with "Poll"! */
2048 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Poll"); 2089 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Poll"));
2049 } 2090 }
2050 2091
2051 /* 2092 /*
@@ -2054,7 +2095,7 @@ wait_again:
2054 * nocb_gp_head, where they await a grace period. 2095 * nocb_gp_head, where they await a grace period.
2055 */ 2096 */
2056 gotcbs = false; 2097 gotcbs = false;
2057 smp_mb(); /* wakeup before ->nocb_head reads. */ 2098 smp_mb(); /* wakeup and _sleep before ->nocb_head reads. */
2058 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { 2099 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2059 rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head); 2100 rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
2060 if (!rdp->nocb_gp_head) 2101 if (!rdp->nocb_gp_head)
@@ -2066,56 +2107,41 @@ wait_again:
2066 gotcbs = true; 2107 gotcbs = true;
2067 } 2108 }
2068 2109
2069 /* 2110 /* No callbacks? Sleep a bit if polling, and go retry. */
2070 * If there were no callbacks, sleep a bit, rescan after a
2071 * memory barrier, and go retry.
2072 */
2073 if (unlikely(!gotcbs)) { 2111 if (unlikely(!gotcbs)) {
2074 if (!rcu_nocb_poll)
2075 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
2076 "WokeEmpty");
2077 WARN_ON(signal_pending(current)); 2112 WARN_ON(signal_pending(current));
2078 schedule_timeout_interruptible(1); 2113 if (rcu_nocb_poll) {
2079 2114 schedule_timeout_interruptible(1);
2080 /* Rescan in case we were a victim of memory ordering. */ 2115 } else {
2081 my_rdp->nocb_leader_sleep = true; 2116 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
2082 smp_mb(); /* Ensure _sleep true before scan. */ 2117 TPS("WokeEmpty"));
2083 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) 2118 }
2084 if (READ_ONCE(rdp->nocb_head)) {
2085 /* Found CB, so short-circuit next wait. */
2086 my_rdp->nocb_leader_sleep = false;
2087 break;
2088 }
2089 goto wait_again; 2119 goto wait_again;
2090 } 2120 }
2091 2121
2092 /* Wait for one grace period. */ 2122 /* Wait for one grace period. */
2093 rcu_nocb_wait_gp(my_rdp); 2123 rcu_nocb_wait_gp(my_rdp);
2094 2124
2095 /*
2096 * We left ->nocb_leader_sleep unset to reduce cache thrashing.
2097 * We set it now, but recheck for new callbacks while
2098 * traversing our follower list.
2099 */
2100 my_rdp->nocb_leader_sleep = true;
2101 smp_mb(); /* Ensure _sleep true before scan of ->nocb_head. */
2102
2103 /* Each pass through the following loop wakes a follower, if needed. */ 2125 /* Each pass through the following loop wakes a follower, if needed. */
2104 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { 2126 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2105 if (READ_ONCE(rdp->nocb_head)) 2127 if (!rcu_nocb_poll &&
2128 READ_ONCE(rdp->nocb_head) &&
2129 READ_ONCE(my_rdp->nocb_leader_sleep)) {
2130 raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
2106 my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ 2131 my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
2132 raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
2133 }
2107 if (!rdp->nocb_gp_head) 2134 if (!rdp->nocb_gp_head)
2108 continue; /* No CBs, so no need to wake follower. */ 2135 continue; /* No CBs, so no need to wake follower. */
2109 2136
2110 /* Append callbacks to follower's "done" list. */ 2137 /* Append callbacks to follower's "done" list. */
2111 tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); 2138 raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
2139 tail = rdp->nocb_follower_tail;
2140 rdp->nocb_follower_tail = rdp->nocb_gp_tail;
2112 *tail = rdp->nocb_gp_head; 2141 *tail = rdp->nocb_gp_head;
2113 smp_mb__after_atomic(); /* Store *tail before wakeup. */ 2142 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
2114 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { 2143 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2115 /* 2144 /* List was empty, so wake up the follower. */
2116 * List was empty, wake up the follower.
2117 * Memory barriers supplied by atomic_long_add().
2118 */
2119 swake_up(&rdp->nocb_wq); 2145 swake_up(&rdp->nocb_wq);
2120 } 2146 }
2121 } 2147 }
@@ -2131,28 +2157,16 @@ wait_again:
2131 */ 2157 */
2132static void nocb_follower_wait(struct rcu_data *rdp) 2158static void nocb_follower_wait(struct rcu_data *rdp)
2133{ 2159{
2134 bool firsttime = true;
2135
2136 for (;;) { 2160 for (;;) {
2137 if (!rcu_nocb_poll) { 2161 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
2138 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2162 swait_event_interruptible(rdp->nocb_wq,
2139 "FollowerSleep"); 2163 READ_ONCE(rdp->nocb_follower_head));
2140 swait_event_interruptible(rdp->nocb_wq,
2141 READ_ONCE(rdp->nocb_follower_head));
2142 } else if (firsttime) {
2143 /* Don't drown trace log with "Poll"! */
2144 firsttime = false;
2145 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "Poll");
2146 }
2147 if (smp_load_acquire(&rdp->nocb_follower_head)) { 2164 if (smp_load_acquire(&rdp->nocb_follower_head)) {
2148 /* ^^^ Ensure CB invocation follows _head test. */ 2165 /* ^^^ Ensure CB invocation follows _head test. */
2149 return; 2166 return;
2150 } 2167 }
2151 if (!rcu_nocb_poll)
2152 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2153 "WokeEmpty");
2154 WARN_ON(signal_pending(current)); 2168 WARN_ON(signal_pending(current));
2155 schedule_timeout_interruptible(1); 2169 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeEmpty"));
2156 } 2170 }
2157} 2171}
2158 2172
@@ -2165,6 +2179,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
2165static int rcu_nocb_kthread(void *arg) 2179static int rcu_nocb_kthread(void *arg)
2166{ 2180{
2167 int c, cl; 2181 int c, cl;
2182 unsigned long flags;
2168 struct rcu_head *list; 2183 struct rcu_head *list;
2169 struct rcu_head *next; 2184 struct rcu_head *next;
2170 struct rcu_head **tail; 2185 struct rcu_head **tail;
@@ -2179,11 +2194,14 @@ static int rcu_nocb_kthread(void *arg)
2179 nocb_follower_wait(rdp); 2194 nocb_follower_wait(rdp);
2180 2195
2181 /* Pull the ready-to-invoke callbacks onto local list. */ 2196 /* Pull the ready-to-invoke callbacks onto local list. */
2182 list = READ_ONCE(rdp->nocb_follower_head); 2197 raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
2198 list = rdp->nocb_follower_head;
2199 rdp->nocb_follower_head = NULL;
2200 tail = rdp->nocb_follower_tail;
2201 rdp->nocb_follower_tail = &rdp->nocb_follower_head;
2202 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
2183 BUG_ON(!list); 2203 BUG_ON(!list);
2184 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); 2204 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeNonEmpty"));
2185 WRITE_ONCE(rdp->nocb_follower_head, NULL);
2186 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
2187 2205
2188 /* Each pass through the following loop invokes a callback. */ 2206 /* Each pass through the following loop invokes a callback. */
2189 trace_rcu_batch_start(rdp->rsp->name, 2207 trace_rcu_batch_start(rdp->rsp->name,
@@ -2226,18 +2244,39 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2226} 2244}
2227 2245
2228/* Do a deferred wakeup of rcu_nocb_kthread(). */ 2246/* Do a deferred wakeup of rcu_nocb_kthread(). */
2229static void do_nocb_deferred_wakeup(struct rcu_data *rdp) 2247static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
2230{ 2248{
2249 unsigned long flags;
2231 int ndw; 2250 int ndw;
2232 2251
2233 if (!rcu_nocb_need_deferred_wakeup(rdp)) 2252 raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
2253 if (!rcu_nocb_need_deferred_wakeup(rdp)) {
2254 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
2234 return; 2255 return;
2256 }
2235 ndw = READ_ONCE(rdp->nocb_defer_wakeup); 2257 ndw = READ_ONCE(rdp->nocb_defer_wakeup);
2236 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); 2258 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
2237 wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE); 2259 __wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
2238 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); 2260 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
2239} 2261}
2240 2262
2263/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
2264static void do_nocb_deferred_wakeup_timer(unsigned long x)
2265{
2266 do_nocb_deferred_wakeup_common((struct rcu_data *)x);
2267}
2268
2269/*
2270 * Do a deferred wakeup of rcu_nocb_kthread() from fastpath.
2271 * This means we do an inexact common-case check. Note that if
2272 * we miss, ->nocb_timer will eventually clean things up.
2273 */
2274static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2275{
2276 if (rcu_nocb_need_deferred_wakeup(rdp))
2277 do_nocb_deferred_wakeup_common(rdp);
2278}
2279
2241void __init rcu_init_nohz(void) 2280void __init rcu_init_nohz(void)
2242{ 2281{
2243 int cpu; 2282 int cpu;
@@ -2287,6 +2326,9 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2287 rdp->nocb_tail = &rdp->nocb_head; 2326 rdp->nocb_tail = &rdp->nocb_head;
2288 init_swait_queue_head(&rdp->nocb_wq); 2327 init_swait_queue_head(&rdp->nocb_wq);
2289 rdp->nocb_follower_tail = &rdp->nocb_follower_head; 2328 rdp->nocb_follower_tail = &rdp->nocb_follower_head;
2329 raw_spin_lock_init(&rdp->nocb_lock);
2330 setup_timer(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer,
2331 (unsigned long)rdp);
2290} 2332}
2291 2333
2292/* 2334/*
@@ -2459,7 +2501,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2459 return false; 2501 return false;
2460} 2502}
2461 2503
2462static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2504static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
2463 struct rcu_data *rdp, 2505 struct rcu_data *rdp,
2464 unsigned long flags) 2506 unsigned long flags)
2465{ 2507{
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 00e77c470017..5033b66d2753 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -568,7 +568,7 @@ static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
568static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); 568static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
569 569
570/* Track exiting tasks in order to allow them to be waited for. */ 570/* Track exiting tasks in order to allow them to be waited for. */
571DEFINE_SRCU(tasks_rcu_exit_srcu); 571DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
572 572
573/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ 573/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
574#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) 574#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
@@ -875,6 +875,22 @@ static void rcu_spawn_tasks_kthread(void)
875 mutex_unlock(&rcu_tasks_kthread_mutex); 875 mutex_unlock(&rcu_tasks_kthread_mutex);
876} 876}
877 877
878/* Do the srcu_read_lock() for the above synchronize_srcu(). */
879void exit_tasks_rcu_start(void)
880{
881 preempt_disable();
882 current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
883 preempt_enable();
884}
885
886/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
887void exit_tasks_rcu_finish(void)
888{
889 preempt_disable();
890 __srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
891 preempt_enable();
892}
893
878#endif /* #ifdef CONFIG_TASKS_RCU */ 894#endif /* #ifdef CONFIG_TASKS_RCU */
879 895
880#ifndef CONFIG_TINY_RCU 896#ifndef CONFIG_TINY_RCU
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 53f0164ed362..78f54932ea1d 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
25obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o 25obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
26obj-$(CONFIG_CPU_FREQ) += cpufreq.o 26obj-$(CONFIG_CPU_FREQ) += cpufreq.o
27obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o 27obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
28obj-$(CONFIG_MEMBARRIER) += membarrier.o
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 13fc5ae9bf2f..c9524d2d9316 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -300,6 +300,8 @@ EXPORT_SYMBOL(try_wait_for_completion);
300 */ 300 */
301bool completion_done(struct completion *x) 301bool completion_done(struct completion *x)
302{ 302{
303 unsigned long flags;
304
303 if (!READ_ONCE(x->done)) 305 if (!READ_ONCE(x->done))
304 return false; 306 return false;
305 307
@@ -307,14 +309,9 @@ bool completion_done(struct completion *x)
307 * If ->done, we need to wait for complete() to release ->wait.lock 309 * If ->done, we need to wait for complete() to release ->wait.lock
308 * otherwise we can end up freeing the completion before complete() 310 * otherwise we can end up freeing the completion before complete()
309 * is done referencing it. 311 * is done referencing it.
310 *
311 * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
312 * the loads of ->done and ->wait.lock such that we cannot observe
313 * the lock before complete() acquires it while observing the ->done
314 * after it's acquired the lock.
315 */ 312 */
316 smp_rmb(); 313 spin_lock_irqsave(&x->wait.lock, flags);
317 spin_unlock_wait(&x->wait.lock); 314 spin_unlock_irqrestore(&x->wait.lock, flags);
318 return true; 315 return true;
319} 316}
320EXPORT_SYMBOL(completion_done); 317EXPORT_SYMBOL(completion_done);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0869b20fba81..e053c31d96da 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -951,8 +951,13 @@ struct migration_arg {
951static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, 951static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
952 struct task_struct *p, int dest_cpu) 952 struct task_struct *p, int dest_cpu)
953{ 953{
954 if (unlikely(!cpu_active(dest_cpu))) 954 if (p->flags & PF_KTHREAD) {
955 return rq; 955 if (unlikely(!cpu_online(dest_cpu)))
956 return rq;
957 } else {
958 if (unlikely(!cpu_active(dest_cpu)))
959 return rq;
960 }
956 961
957 /* Affinity changed (again). */ 962 /* Affinity changed (again). */
958 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 963 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
@@ -2635,6 +2640,16 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2635 prev_state = prev->state; 2640 prev_state = prev->state;
2636 vtime_task_switch(prev); 2641 vtime_task_switch(prev);
2637 perf_event_task_sched_in(prev, current); 2642 perf_event_task_sched_in(prev, current);
2643 /*
2644 * The membarrier system call requires a full memory barrier
2645 * after storing to rq->curr, before going back to user-space.
2646 *
2647 * TODO: This smp_mb__after_unlock_lock can go away if PPC end
2648 * up adding a full barrier to switch_mm(), or we should figure
2649 * out if a smp_mb__after_unlock_lock is really the proper API
2650 * to use.
2651 */
2652 smp_mb__after_unlock_lock();
2638 finish_lock_switch(rq, prev); 2653 finish_lock_switch(rq, prev);
2639 finish_arch_post_lock_switch(); 2654 finish_arch_post_lock_switch();
2640 2655
@@ -3324,6 +3339,21 @@ static void __sched notrace __schedule(bool preempt)
3324 if (likely(prev != next)) { 3339 if (likely(prev != next)) {
3325 rq->nr_switches++; 3340 rq->nr_switches++;
3326 rq->curr = next; 3341 rq->curr = next;
3342 /*
3343 * The membarrier system call requires each architecture
3344 * to have a full memory barrier after updating
3345 * rq->curr, before returning to user-space. For TSO
3346 * (e.g. x86), the architecture must provide its own
3347 * barrier in switch_mm(). For weakly ordered machines
3348 * for which spin_unlock() acts as a full memory
3349 * barrier, finish_lock_switch() in common code takes
3350 * care of this barrier. For weakly ordered machines for
3351 * which spin_unlock() acts as a RELEASE barrier (only
3352 * arm64 and PowerPC), arm64 has a full barrier in
3353 * switch_to(), and PowerPC has
3354 * smp_mb__after_unlock_lock() before
3355 * finish_lock_switch().
3356 */
3327 ++*switch_count; 3357 ++*switch_count;
3328 3358
3329 trace_sched_switch(preempt, prev, next); 3359 trace_sched_switch(preempt, prev, next);
@@ -3352,8 +3382,8 @@ void __noreturn do_task_dead(void)
3352 * To avoid it, we have to wait for releasing tsk->pi_lock which 3382 * To avoid it, we have to wait for releasing tsk->pi_lock which
3353 * is held by try_to_wake_up() 3383 * is held by try_to_wake_up()
3354 */ 3384 */
3355 smp_mb(); 3385 raw_spin_lock_irq(&current->pi_lock);
3356 raw_spin_unlock_wait(&current->pi_lock); 3386 raw_spin_unlock_irq(&current->pi_lock);
3357 3387
3358 /* Causes final put_task_struct in finish_task_switch(): */ 3388 /* Causes final put_task_struct in finish_task_switch(): */
3359 __set_current_state(TASK_DEAD); 3389 __set_current_state(TASK_DEAD);
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
new file mode 100644
index 000000000000..a92fddc22747
--- /dev/null
+++ b/kernel/sched/membarrier.c
@@ -0,0 +1,152 @@
1/*
2 * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * membarrier system call
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/syscalls.h>
18#include <linux/membarrier.h>
19#include <linux/tick.h>
20#include <linux/cpumask.h>
21
22#include "sched.h" /* for cpu_rq(). */
23
24/*
25 * Bitmask made from a "or" of all commands within enum membarrier_cmd,
26 * except MEMBARRIER_CMD_QUERY.
27 */
28#define MEMBARRIER_CMD_BITMASK \
29 (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
30
31static void ipi_mb(void *info)
32{
33 smp_mb(); /* IPIs should be serializing but paranoid. */
34}
35
36static void membarrier_private_expedited(void)
37{
38 int cpu;
39 bool fallback = false;
40 cpumask_var_t tmpmask;
41
42 if (num_online_cpus() == 1)
43 return;
44
45 /*
46 * Matches memory barriers around rq->curr modification in
47 * scheduler.
48 */
49 smp_mb(); /* system call entry is not a mb. */
50
51 /*
52 * Expedited membarrier commands guarantee that they won't
53 * block, hence the GFP_NOWAIT allocation flag and fallback
54 * implementation.
55 */
56 if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
57 /* Fallback for OOM. */
58 fallback = true;
59 }
60
61 cpus_read_lock();
62 for_each_online_cpu(cpu) {
63 struct task_struct *p;
64
65 /*
66 * Skipping the current CPU is OK even through we can be
67 * migrated at any point. The current CPU, at the point
68 * where we read raw_smp_processor_id(), is ensured to
69 * be in program order with respect to the caller
70 * thread. Therefore, we can skip this CPU from the
71 * iteration.
72 */
73 if (cpu == raw_smp_processor_id())
74 continue;
75 rcu_read_lock();
76 p = task_rcu_dereference(&cpu_rq(cpu)->curr);
77 if (p && p->mm == current->mm) {
78 if (!fallback)
79 __cpumask_set_cpu(cpu, tmpmask);
80 else
81 smp_call_function_single(cpu, ipi_mb, NULL, 1);
82 }
83 rcu_read_unlock();
84 }
85 if (!fallback) {
86 smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
87 free_cpumask_var(tmpmask);
88 }
89 cpus_read_unlock();
90
91 /*
92 * Memory barrier on the caller thread _after_ we finished
93 * waiting for the last IPI. Matches memory barriers around
94 * rq->curr modification in scheduler.
95 */
96 smp_mb(); /* exit from system call is not a mb */
97}
98
99/**
100 * sys_membarrier - issue memory barriers on a set of threads
101 * @cmd: Takes command values defined in enum membarrier_cmd.
102 * @flags: Currently needs to be 0. For future extensions.
103 *
104 * If this system call is not implemented, -ENOSYS is returned. If the
105 * command specified does not exist, not available on the running
106 * kernel, or if the command argument is invalid, this system call
107 * returns -EINVAL. For a given command, with flags argument set to 0,
108 * this system call is guaranteed to always return the same value until
109 * reboot.
110 *
111 * All memory accesses performed in program order from each targeted thread
112 * is guaranteed to be ordered with respect to sys_membarrier(). If we use
113 * the semantic "barrier()" to represent a compiler barrier forcing memory
114 * accesses to be performed in program order across the barrier, and
115 * smp_mb() to represent explicit memory barriers forcing full memory
116 * ordering across the barrier, we have the following ordering table for
117 * each pair of barrier(), sys_membarrier() and smp_mb():
118 *
119 * The pair ordering is detailed as (O: ordered, X: not ordered):
120 *
121 * barrier() smp_mb() sys_membarrier()
122 * barrier() X X O
123 * smp_mb() X O O
124 * sys_membarrier() O O O
125 */
126SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
127{
128 if (unlikely(flags))
129 return -EINVAL;
130 switch (cmd) {
131 case MEMBARRIER_CMD_QUERY:
132 {
133 int cmd_mask = MEMBARRIER_CMD_BITMASK;
134
135 if (tick_nohz_full_enabled())
136 cmd_mask &= ~MEMBARRIER_CMD_SHARED;
137 return cmd_mask;
138 }
139 case MEMBARRIER_CMD_SHARED:
140 /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
141 if (tick_nohz_full_enabled())
142 return -EINVAL;
143 if (num_online_cpus() > 1)
144 synchronize_sched();
145 return 0;
146 case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
147 membarrier_private_expedited();
148 return 0;
149 default:
150 return -EINVAL;
151 }
152}
diff --git a/kernel/task_work.c b/kernel/task_work.c
index d513051fcca2..836a72a66fba 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -96,20 +96,16 @@ void task_work_run(void)
96 * work->func() can do task_work_add(), do not set 96 * work->func() can do task_work_add(), do not set
97 * work_exited unless the list is empty. 97 * work_exited unless the list is empty.
98 */ 98 */
99 raw_spin_lock_irq(&task->pi_lock);
99 do { 100 do {
100 work = READ_ONCE(task->task_works); 101 work = READ_ONCE(task->task_works);
101 head = !work && (task->flags & PF_EXITING) ? 102 head = !work && (task->flags & PF_EXITING) ?
102 &work_exited : NULL; 103 &work_exited : NULL;
103 } while (cmpxchg(&task->task_works, work, head) != work); 104 } while (cmpxchg(&task->task_works, work, head) != work);
105 raw_spin_unlock_irq(&task->pi_lock);
104 106
105 if (!work) 107 if (!work)
106 break; 108 break;
107 /*
108 * Synchronize with task_work_cancel(). It can't remove
109 * the first entry == work, cmpxchg(task_works) should
110 * fail, but it can play with *work and other entries.
111 */
112 raw_spin_unlock_wait(&task->pi_lock);
113 109
114 do { 110 do {
115 next = work->next; 111 next = work->next;
diff --git a/kernel/torture.c b/kernel/torture.c
index 55de96529287..637e172835d8 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -117,7 +117,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
117 torture_type, cpu); 117 torture_type, cpu);
118 (*n_offl_successes)++; 118 (*n_offl_successes)++;
119 delta = jiffies - starttime; 119 delta = jiffies - starttime;
120 sum_offl += delta; 120 *sum_offl += delta;
121 if (*min_offl < 0) { 121 if (*min_offl < 0) {
122 *min_offl = delta; 122 *min_offl = delta;
123 *max_offl = delta; 123 *max_offl = delta;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9979f46c81dc..51390febd5e3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -96,19 +96,26 @@ static struct conntrack_gc_work conntrack_gc_work;
96 96
97void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) 97void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
98{ 98{
99 /* 1) Acquire the lock */
99 spin_lock(lock); 100 spin_lock(lock);
100 while (unlikely(nf_conntrack_locks_all)) {
101 spin_unlock(lock);
102 101
103 /* 102 /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
104 * Order the 'nf_conntrack_locks_all' load vs. the 103 * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
105 * spin_unlock_wait() loads below, to ensure 104 */
106 * that 'nf_conntrack_locks_all_lock' is indeed held: 105 if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
107 */ 106 return;
108 smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ 107
109 spin_unlock_wait(&nf_conntrack_locks_all_lock); 108 /* fast path failed, unlock */
110 spin_lock(lock); 109 spin_unlock(lock);
111 } 110
111 /* Slow path 1) get global lock */
112 spin_lock(&nf_conntrack_locks_all_lock);
113
114 /* Slow path 2) get the lock we want */
115 spin_lock(lock);
116
117 /* Slow path 3) release the global lock */
118 spin_unlock(&nf_conntrack_locks_all_lock);
112} 119}
113EXPORT_SYMBOL_GPL(nf_conntrack_lock); 120EXPORT_SYMBOL_GPL(nf_conntrack_lock);
114 121
@@ -149,28 +156,27 @@ static void nf_conntrack_all_lock(void)
149 int i; 156 int i;
150 157
151 spin_lock(&nf_conntrack_locks_all_lock); 158 spin_lock(&nf_conntrack_locks_all_lock);
152 nf_conntrack_locks_all = true;
153 159
154 /* 160 nf_conntrack_locks_all = true;
155 * Order the above store of 'nf_conntrack_locks_all' against
156 * the spin_unlock_wait() loads below, such that if
157 * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
158 * we must observe nf_conntrack_locks[] held:
159 */
160 smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
161 161
162 for (i = 0; i < CONNTRACK_LOCKS; i++) { 162 for (i = 0; i < CONNTRACK_LOCKS; i++) {
163 spin_unlock_wait(&nf_conntrack_locks[i]); 163 spin_lock(&nf_conntrack_locks[i]);
164
165 /* This spin_unlock provides the "release" to ensure that
166 * nf_conntrack_locks_all==true is visible to everyone that
167 * acquired spin_lock(&nf_conntrack_locks[]).
168 */
169 spin_unlock(&nf_conntrack_locks[i]);
164 } 170 }
165} 171}
166 172
167static void nf_conntrack_all_unlock(void) 173static void nf_conntrack_all_unlock(void)
168{ 174{
169 /* 175 /* All prior stores must be complete before we clear
170 * All prior stores must be complete before we clear
171 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() 176 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
172 * might observe the false value but not the entire 177 * might observe the false value but not the entire
173 * critical section: 178 * critical section.
179 * It pairs with the smp_load_acquire() in nf_conntrack_lock()
174 */ 180 */
175 smp_store_release(&nf_conntrack_locks_all, false); 181 smp_store_release(&nf_conntrack_locks_all, false);
176 spin_unlock(&nf_conntrack_locks_all_lock); 182 spin_unlock(&nf_conntrack_locks_all_lock);
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
new file mode 100755
index 000000000000..49fa51726ce3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -0,0 +1,61 @@
1#!/bin/bash
2#
3# config_override.sh base override
4#
5# Combines base and override, removing any Kconfig options from base
6# that conflict with any in override, concatenating what remains and
7# sending the result to standard output.
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation; either version 2 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, you can access it online at
21# http://www.gnu.org/licenses/gpl-2.0.html.
22#
23# Copyright (C) IBM Corporation, 2017
24#
25# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
26
27base=$1
28if test -r $base
29then
30 :
31else
32 echo Base file $base unreadable!!!
33 exit 1
34fi
35
36override=$2
37if test -r $override
38then
39 :
40else
41 echo Override file $override unreadable!!!
42 exit 1
43fi
44
45T=/tmp/config_override.sh.$$
46trap 'rm -rf $T' 0
47mkdir $T
48
49sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
50 awk '
51 {
52 if (last)
53 print last " |";
54 last = $0;
55 }
56 END {
57 if (last)
58 print last;
59 }' > $T/script
60sh $T/script < $base
61cat $override
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 1426a9b97494..07a13779eece 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -66,9 +66,34 @@ configfrag_boot_params () {
66 66
67# configfrag_boot_cpus bootparam-string config-fragment-file config-cpus 67# configfrag_boot_cpus bootparam-string config-fragment-file config-cpus
68# 68#
69# Decreases number of CPUs based on any maxcpus= boot parameters specified. 69# Decreases number of CPUs based on any nr_cpus= boot parameters specified.
70configfrag_boot_cpus () { 70configfrag_boot_cpus () {
71 local bootargs="`configfrag_boot_params "$1" "$2"`" 71 local bootargs="`configfrag_boot_params "$1" "$2"`"
72 local nr_cpus
73 if echo "${bootargs}" | grep -q 'nr_cpus=[0-9]'
74 then
75 nr_cpus="`echo "${bootargs}" | sed -e 's/^.*nr_cpus=\([0-9]*\).*$/\1/'`"
76 if test "$3" -gt "$nr_cpus"
77 then
78 echo $nr_cpus
79 else
80 echo $3
81 fi
82 else
83 echo $3
84 fi
85}
86
87# configfrag_boot_maxcpus bootparam-string config-fragment-file config-cpus
88#
89# Decreases number of CPUs based on any maxcpus= boot parameters specified.
90# This allows tests where additional CPUs come online later during the
91# test run. However, the torture parameters will be set based on the
92# number of CPUs initially present, so the scripting should schedule
93# test runs based on the maxcpus= boot parameter controlling the initial
94# number of CPUs instead of on the ultimate number of CPUs.
95configfrag_boot_maxcpus () {
96 local bootargs="`configfrag_boot_params "$1" "$2"`"
72 local maxcpus 97 local maxcpus
73 if echo "${bootargs}" | grep -q 'maxcpus=[0-9]' 98 if echo "${bootargs}" | grep -q 'maxcpus=[0-9]'
74 then 99 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index c29f2ec0bf9f..46752c164676 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -2,7 +2,7 @@
2# 2#
3# Build a kvm-ready Linux kernel from the tree in the current directory. 3# Build a kvm-ready Linux kernel from the tree in the current directory.
4# 4#
5# Usage: kvm-build.sh config-template build-dir more-configs 5# Usage: kvm-build.sh config-template build-dir
6# 6#
7# This program is free software; you can redistribute it and/or modify 7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by 8# it under the terms of the GNU General Public License as published by
@@ -34,24 +34,17 @@ then
34 echo "kvm-build.sh :$builddir: Not a writable directory, cannot build into it" 34 echo "kvm-build.sh :$builddir: Not a writable directory, cannot build into it"
35 exit 1 35 exit 1
36fi 36fi
37moreconfigs=${3}
38if test -z "$moreconfigs" -o ! -r "$moreconfigs"
39then
40 echo "kvm-build.sh :$moreconfigs: Not a readable file"
41 exit 1
42fi
43 37
44T=/tmp/test-linux.sh.$$ 38T=/tmp/test-linux.sh.$$
45trap 'rm -rf $T' 0 39trap 'rm -rf $T' 0
46mkdir $T 40mkdir $T
47 41
48grep -v 'CONFIG_[A-Z]*_TORTURE_TEST=' < ${config_template} > $T/config 42cp ${config_template} $T/config
49cat << ___EOF___ >> $T/config 43cat << ___EOF___ >> $T/config
50CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD" 44CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
51CONFIG_VIRTIO_PCI=y 45CONFIG_VIRTIO_PCI=y
52CONFIG_VIRTIO_CONSOLE=y 46CONFIG_VIRTIO_CONSOLE=y
53___EOF___ 47___EOF___
54cat $moreconfigs >> $T/config
55 48
56configinit.sh $T/config O=$builddir 49configinit.sh $T/config O=$builddir
57retval=$? 50retval=$?
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 93eede4e8fbe..0af36a721b9c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -40,7 +40,7 @@
40 40
41T=/tmp/kvm-test-1-run.sh.$$ 41T=/tmp/kvm-test-1-run.sh.$$
42trap 'rm -rf $T' 0 42trap 'rm -rf $T' 0
43touch $T 43mkdir $T
44 44
45. $KVM/bin/functions.sh 45. $KVM/bin/functions.sh
46. $CONFIGFRAG/ver_functions.sh 46. $CONFIGFRAG/ver_functions.sh
@@ -60,37 +60,33 @@ then
60 echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it" 60 echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
61 exit 1 61 exit 1
62fi 62fi
63cp $config_template $resdir/ConfigFragment
64echo ' ---' `date`: Starting build 63echo ' ---' `date`: Starting build
65echo ' ---' Kconfig fragment at: $config_template >> $resdir/log 64echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
65touch $resdir/ConfigFragment.input $resdir/ConfigFragment
66if test -r "$config_dir/CFcommon" 66if test -r "$config_dir/CFcommon"
67then 67then
68 cat < $config_dir/CFcommon >> $T 68 echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
69 cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
70 config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
71 grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
72else
73 cp $config_template $T/Kc1
69fi 74fi
70# Optimizations below this point 75echo " --- $config_template" >> $resdir/ConfigFragment.input
71# CONFIG_USB=n 76cat $config_template >> $resdir/ConfigFragment.input
72# CONFIG_SECURITY=n 77grep '#CHECK#' $config_template >> $resdir/ConfigFragment
73# CONFIG_NFS_FS=n 78if test -n "$TORTURE_KCONFIG_ARG"
74# CONFIG_SOUND=n 79then
75# CONFIG_INPUT_JOYSTICK=n 80 echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
76# CONFIG_INPUT_TABLET=n 81 echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
77# CONFIG_INPUT_TOUCHSCREEN=n 82 cat $T/cmdline >> $resdir/ConfigFragment.input
78# CONFIG_INPUT_MISC=n 83 config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
79# CONFIG_INPUT_MOUSE=n 84 # Note that "#CHECK#" is not permitted on commandline.
80# # CONFIG_NET=n # disables console access, so accept the slower build. 85else
81# CONFIG_SCSI=n 86 cp $T/Kc1 $T/Kc2
82# CONFIG_ATA=n 87fi
83# CONFIG_FAT_FS=n 88cat $T/Kc2 >> $resdir/ConfigFragment
84# CONFIG_MSDOS_FS=n 89
85# CONFIG_VFAT_FS=n
86# CONFIG_ISO9660_FS=n
87# CONFIG_QUOTA=n
88# CONFIG_HID=n
89# CONFIG_CRYPTO=n
90# CONFIG_PCCARD=n
91# CONFIG_PCMCIA=n
92# CONFIG_CARDBUS=n
93# CONFIG_YENTA=n
94base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` 90base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
95if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux 91if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
96then 92then
@@ -100,7 +96,9 @@ then
100 KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE} 96 KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE}
101 ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh 97 ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh
102 ln -s $base_resdir/.config $resdir # for kvm-recheck.sh 98 ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
103elif kvm-build.sh $config_template $builddir $T 99 # Arch-independent indicator
100 touch $resdir/builtkernel
101elif kvm-build.sh $T/Kc2 $builddir
104then 102then
105 # Had to build a kernel for this test. 103 # Had to build a kernel for this test.
106 QEMU="`identify_qemu $builddir/vmlinux`" 104 QEMU="`identify_qemu $builddir/vmlinux`"
@@ -112,6 +110,8 @@ then
112 then 110 then
113 cp $builddir/$BOOT_IMAGE $resdir 111 cp $builddir/$BOOT_IMAGE $resdir
114 KERNEL=$resdir/${BOOT_IMAGE##*/} 112 KERNEL=$resdir/${BOOT_IMAGE##*/}
113 # Arch-independent indicator
114 touch $resdir/builtkernel
115 else 115 else
116 echo No identifiable boot image, not running KVM, see $resdir. 116 echo No identifiable boot image, not running KVM, see $resdir.
117 echo Do the torture scripts know about your architecture? 117 echo Do the torture scripts know about your architecture?
@@ -149,7 +149,7 @@ fi
149 149
150# Generate -smp qemu argument. 150# Generate -smp qemu argument.
151qemu_args="-enable-kvm -nographic $qemu_args" 151qemu_args="-enable-kvm -nographic $qemu_args"
152cpu_count=`configNR_CPUS.sh $config_template` 152cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
153cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` 153cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
154vcpus=`identify_qemu_vcpus` 154vcpus=`identify_qemu_vcpus`
155if test $cpu_count -gt $vcpus 155if test $cpu_count -gt $vcpus
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 50091de3a911..b55895fb10ed 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -41,6 +41,7 @@ PATH=${KVM}/bin:$PATH; export PATH
41TORTURE_DEFCONFIG=defconfig 41TORTURE_DEFCONFIG=defconfig
42TORTURE_BOOT_IMAGE="" 42TORTURE_BOOT_IMAGE=""
43TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD 43TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
44TORTURE_KCONFIG_ARG=""
44TORTURE_KMAKE_ARG="" 45TORTURE_KMAKE_ARG=""
45TORTURE_SHUTDOWN_GRACE=180 46TORTURE_SHUTDOWN_GRACE=180
46TORTURE_SUITE=rcu 47TORTURE_SUITE=rcu
@@ -65,6 +66,7 @@ usage () {
65 echo " --duration minutes" 66 echo " --duration minutes"
66 echo " --interactive" 67 echo " --interactive"
67 echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" 68 echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
69 echo " --kconfig Kconfig-options"
68 echo " --kmake-arg kernel-make-arguments" 70 echo " --kmake-arg kernel-make-arguments"
69 echo " --mac nn:nn:nn:nn:nn:nn" 71 echo " --mac nn:nn:nn:nn:nn:nn"
70 echo " --no-initrd" 72 echo " --no-initrd"
@@ -129,6 +131,11 @@ do
129 jitter="$2" 131 jitter="$2"
130 shift 132 shift
131 ;; 133 ;;
134 --kconfig)
135 checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
136 TORTURE_KCONFIG_ARG="$2"
137 shift
138 ;;
132 --kmake-arg) 139 --kmake-arg)
133 checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' 140 checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
134 TORTURE_KMAKE_ARG="$2" 141 TORTURE_KMAKE_ARG="$2"
@@ -205,6 +212,7 @@ do
205 then 212 then
206 cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1` 213 cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
207 cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` 214 cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
215 cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
208 for ((cur_rep=0;cur_rep<$config_reps;cur_rep++)) 216 for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
209 do 217 do
210 echo $CF1 $cpu_count >> $T/cfgcpu 218 echo $CF1 $cpu_count >> $T/cfgcpu
@@ -275,6 +283,7 @@ TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
275TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY 283TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
276TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG 284TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
277TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD 285TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
286TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
278TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG 287TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
279TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD 288TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
280TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE 289TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
@@ -324,6 +333,7 @@ function dump(first, pastlast, batchnum)
324{ 333{
325 print "echo ----Start batch " batchnum ": `date`"; 334 print "echo ----Start batch " batchnum ": `date`";
326 print "echo ----Start batch " batchnum ": `date` >> " rd "/log"; 335 print "echo ----Start batch " batchnum ": `date` >> " rd "/log";
336 print "needqemurun="
327 jn=1 337 jn=1
328 for (j = first; j < pastlast; j++) { 338 for (j = first; j < pastlast; j++) {
329 builddir=KVM "/b" jn 339 builddir=KVM "/b" jn
@@ -359,10 +369,11 @@ function dump(first, pastlast, batchnum)
359 for (j = 1; j < jn; j++) { 369 for (j = 1; j < jn; j++) {
360 builddir=KVM "/b" j 370 builddir=KVM "/b" j
361 print "rm -f " builddir ".ready" 371 print "rm -f " builddir ".ready"
362 print "if test -z \"$TORTURE_BUILDONLY\"" 372 print "if test -f \"" rd cfr[j] "/builtkernel\""
363 print "then" 373 print "then"
364 print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date`"; 374 print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date`";
365 print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log"; 375 print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` >> " rd "/log";
376 print "\tneedqemurun=1"
366 print "fi" 377 print "fi"
367 } 378 }
368 njitter = 0; 379 njitter = 0;
@@ -377,13 +388,22 @@ function dump(first, pastlast, batchnum)
377 njitter = 0; 388 njitter = 0;
378 print "echo Build-only run, so suppressing jitter >> " rd "/log" 389 print "echo Build-only run, so suppressing jitter >> " rd "/log"
379 } 390 }
380 for (j = 0; j < njitter; j++) 391 if (TORTURE_BUILDONLY) {
381 print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&" 392 print "needqemurun="
382 print "wait" 393 }
383 print "if test -z \"$TORTURE_BUILDONLY\"" 394 print "if test -n \"$needqemurun\""
384 print "then" 395 print "then"
396 print "\techo ---- Starting kernels. `date`";
397 print "\techo ---- Starting kernels. `date` >> " rd "/log";
398 for (j = 0; j < njitter; j++)
399 print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
400 print "\twait"
385 print "\techo ---- All kernel runs complete. `date`"; 401 print "\techo ---- All kernel runs complete. `date`";
386 print "\techo ---- All kernel runs complete. `date` >> " rd "/log"; 402 print "\techo ---- All kernel runs complete. `date` >> " rd "/log";
403 print "else"
404 print "\twait"
405 print "\techo ---- No kernel runs. `date`";
406 print "\techo ---- No kernel runs. `date` >> " rd "/log";
387 print "fi" 407 print "fi"
388 for (j = 1; j < jn; j++) { 408 for (j = 1; j < jn; j++) {
389 builddir=KVM "/b" j 409 builddir=KVM "/b" j
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
index 6804f9dcfc1b..be7728db42fd 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
@@ -1 +1 @@
rcutorture.torture_type=rcu_busted rcutorture.torture_type=busted
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
deleted file mode 100644
index 84a7d51b7481..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
+++ /dev/null
@@ -1 +0,0 @@
1rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
index 6bc24e99862f..c15ada821e45 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -4,6 +4,7 @@ CONFIG_PREEMPT_VOLUNTARY=n
4CONFIG_PREEMPT=n 4CONFIG_PREEMPT=n
5#CHECK#CONFIG_TINY_SRCU=y 5#CHECK#CONFIG_TINY_SRCU=y
6CONFIG_RCU_TRACE=n 6CONFIG_RCU_TRACE=n
7CONFIG_DEBUG_LOCK_ALLOC=n 7CONFIG_DEBUG_LOCK_ALLOC=y
8CONFIG_PROVE_LOCKING=y
8CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 9CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
9CONFIG_PREEMPT_COUNT=n 10CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index 1d14e1383016..9f3a4d28e508 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1,4 +1,4 @@
1rcutorture.torture_type=rcu_bh maxcpus=8 1rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43
2rcutree.gp_preinit_delay=3 2rcutree.gp_preinit_delay=3
3rcutree.gp_init_delay=3 3rcutree.gp_init_delay=3
4rcutree.gp_cleanup_delay=3 4rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 9ad3f89c8dc7..af6fca03602f 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -69,11 +69,11 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
69CONFIG_PREEMPT_RCU 69CONFIG_PREEMPT_RCU
70CONFIG_TREE_RCU 70CONFIG_TREE_RCU
71CONFIG_TINY_RCU 71CONFIG_TINY_RCU
72CONFIG_TASKS_RCU
72 73
73 These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP. 74 These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
74 75
75CONFIG_SRCU 76CONFIG_SRCU
76CONFIG_TASKS_RCU
77 77
78 Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable. 78 Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
79 79