summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/atomic_bitops.txt66
-rw-r--r--Documentation/atomic_t.txt242
-rw-r--r--Documentation/locking/crossrelease.txt874
-rw-r--r--Documentation/memory-barriers.txt101
-rw-r--r--Documentation/static-keys.txt20
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt5
-rw-r--r--arch/Kconfig12
-rw-r--r--arch/alpha/include/asm/futex.h26
-rw-r--r--arch/arc/include/asm/atomic.h2
-rw-r--r--arch/arc/include/asm/futex.h40
-rw-r--r--arch/arm/include/asm/futex.h26
-rw-r--r--arch/arm64/include/asm/futex.h26
-rw-r--r--arch/arm64/include/asm/spinlock.h11
-rw-r--r--arch/frv/include/asm/futex.h3
-rw-r--r--arch/frv/kernel/futex.c27
-rw-r--r--arch/hexagon/include/asm/atomic.h2
-rw-r--r--arch/hexagon/include/asm/futex.h38
-rw-r--r--arch/ia64/include/asm/futex.h25
-rw-r--r--arch/metag/include/asm/atomic_lock1.h2
-rw-r--r--arch/microblaze/include/asm/futex.h38
-rw-r--r--arch/mips/include/asm/futex.h25
-rw-r--r--arch/mips/kernel/smp.c6
-rw-r--r--arch/openrisc/include/asm/futex.h39
-rw-r--r--arch/parisc/include/asm/atomic.h2
-rw-r--r--arch/parisc/include/asm/futex.h26
-rw-r--r--arch/powerpc/include/asm/barrier.h7
-rw-r--r--arch/powerpc/include/asm/futex.h26
-rw-r--r--arch/powerpc/include/asm/spinlock.h3
-rw-r--r--arch/s390/include/asm/futex.h23
-rw-r--r--arch/sh/include/asm/futex.h26
-rw-r--r--arch/sparc/include/asm/atomic_32.h2
-rw-r--r--arch/sparc/include/asm/futex_64.h26
-rw-r--r--arch/tile/include/asm/atomic_32.h2
-rw-r--r--arch/tile/include/asm/futex.h40
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/asm.h6
-rw-r--r--arch/x86/include/asm/atomic.h69
-rw-r--r--arch/x86/include/asm/atomic64_32.h81
-rw-r--r--arch/x86/include/asm/atomic64_64.h73
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/futex.h40
-rw-r--r--arch/x86/include/asm/refcount.h109
-rw-r--r--arch/x86/mm/extable.c42
-rw-r--r--arch/xtensa/include/asm/futex.h27
-rw-r--r--block/blk-softirq.c2
-rw-r--r--drivers/acpi/nfit/core.c2
-rw-r--r--drivers/block/null_blk.c2
-rw-r--r--drivers/clocksource/arm_arch_timer.c6
-rw-r--r--drivers/cpuidle/coupled.c10
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c5
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c2
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.h2
-rw-r--r--fs/overlayfs/readdir.c4
-rw-r--r--fs/userfaultfd.c25
-rw-r--r--include/asm-generic/atomic64.h2
-rw-r--r--include/asm-generic/futex.h50
-rw-r--r--include/linux/atomic.h3
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/completion.h47
-rw-r--r--include/linux/cpuset.h6
-rw-r--r--include/linux/futex.h7
-rw-r--r--include/linux/irqflags.h24
-rw-r--r--include/linux/jump_label.h33
-rw-r--r--include/linux/kasan-checks.h10
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/lockdep.h165
-rw-r--r--include/linux/mm_types.h95
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/refcount.h4
-rw-r--r--include/linux/rwsem-spinlock.h1
-rw-r--r--include/linux/rwsem.h1
-rw-r--r--include/linux/sched.h12
-rw-r--r--include/linux/sched/mm.h8
-rw-r--r--include/linux/smp.h8
-rw-r--r--include/linux/spinlock.h41
-rw-r--r--init/Kconfig7
-rw-r--r--kernel/cgroup/cpuset.c7
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c61
-rw-r--r--kernel/jump_label.c104
-rw-r--r--kernel/locking/lockdep.c1008
-rw-r--r--kernel/locking/lockdep_internals.h2
-rw-r--r--kernel/locking/lockdep_proc.c4
-rw-r--r--kernel/locking/lockdep_states.h1
-rw-r--r--kernel/locking/osq_lock.c13
-rw-r--r--kernel/locking/qspinlock_paravirt.h24
-rw-r--r--kernel/locking/rtmutex_common.h29
-rw-r--r--kernel/locking/rwsem-spinlock.c37
-rw-r--r--kernel/locking/rwsem-xadd.c33
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/sched/completion.c11
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/sched/swait.c6
-rw-r--r--kernel/smp.c32
-rw-r--r--kernel/up.c2
-rw-r--r--kernel/workqueue.c53
-rw-r--r--lib/Kconfig.debug18
-rw-r--r--lib/locking-selftest.c123
-rw-r--r--mm/huge_memory.c12
-rw-r--r--mm/kasan/kasan.c4
-rw-r--r--mm/page_alloc.c49
-rw-r--r--mm/slab.h6
-rw-r--r--mm/slob.c6
-rw-r--r--mm/vmscan.c13
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/udp.c3
108 files changed, 3460 insertions, 1112 deletions
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt
new file mode 100644
index 000000000000..5550bfdcce5f
--- /dev/null
+++ b/Documentation/atomic_bitops.txt
@@ -0,0 +1,66 @@
1
2On atomic bitops.
3
4
5While our bitmap_{}() functions are non-atomic, we have a number of operations
6operating on single bits in a bitmap that are atomic.
7
8
9API
10---
11
12The single bit operations are:
13
14Non-RMW ops:
15
16 test_bit()
17
18RMW atomic operations without return value:
19
20 {set,clear,change}_bit()
21 clear_bit_unlock()
22
23RMW atomic operations with return value:
24
25 test_and_{set,clear,change}_bit()
26 test_and_set_bit_lock()
27
28Barriers:
29
30 smp_mb__{before,after}_atomic()
31
32
33All RMW atomic operations have a '__' prefixed variant which is non-atomic.
34
35
36SEMANTICS
37---------
38
39Non-atomic ops:
40
41In particular __clear_bit_unlock() suffers the same issue as atomic_set(),
42which is why the generic version maps to clear_bit_unlock(), see atomic_t.txt.
43
44
45RMW ops:
46
47The test_and_{}_bit() operations return the original value of the bit.
48
49
50ORDERING
51--------
52
53Like with atomic_t, the rule of thumb is:
54
55 - non-RMW operations are unordered;
56
57 - RMW operations that have no return value are unordered;
58
59 - RMW operations that have a return value are fully ordered.
60
61Except for test_and_set_bit_lock() which has ACQUIRE semantics and
62clear_bit_unlock() which has RELEASE semantics.
63
64Since a platform only has a single means of achieving atomic operations
65the same barriers as for atomic_t are used, see atomic_t.txt.
66
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
new file mode 100644
index 000000000000..913396ac5824
--- /dev/null
+++ b/Documentation/atomic_t.txt
@@ -0,0 +1,242 @@
1
2On atomic types (atomic_t atomic64_t and atomic_long_t).
3
4The atomic type provides an interface to the architecture's means of atomic
5RMW operations between CPUs (atomic operations on MMIO are not supported and
6can lead to fatal traps on some platforms).
7
8API
9---
10
11The 'full' API consists of (atomic64_ and atomic_long_ prefixes omitted for
12brevity):
13
14Non-RMW ops:
15
16 atomic_read(), atomic_set()
17 atomic_read_acquire(), atomic_set_release()
18
19
20RMW atomic operations:
21
22Arithmetic:
23
24 atomic_{add,sub,inc,dec}()
25 atomic_{add,sub,inc,dec}_return{,_relaxed,_acquire,_release}()
26 atomic_fetch_{add,sub,inc,dec}{,_relaxed,_acquire,_release}()
27
28
29Bitwise:
30
31 atomic_{and,or,xor,andnot}()
32 atomic_fetch_{and,or,xor,andnot}{,_relaxed,_acquire,_release}()
33
34
35Swap:
36
37 atomic_xchg{,_relaxed,_acquire,_release}()
38 atomic_cmpxchg{,_relaxed,_acquire,_release}()
39 atomic_try_cmpxchg{,_relaxed,_acquire,_release}()
40
41
42Reference count (but please see refcount_t):
43
44 atomic_add_unless(), atomic_inc_not_zero()
45 atomic_sub_and_test(), atomic_dec_and_test()
46
47
48Misc:
49
50 atomic_inc_and_test(), atomic_add_negative()
51 atomic_dec_unless_positive(), atomic_inc_unless_negative()
52
53
54Barriers:
55
56 smp_mb__{before,after}_atomic()
57
58
59
60SEMANTICS
61---------
62
63Non-RMW ops:
64
65The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
66implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
67smp_store_release() respectively.
68
69The one detail to this is that atomic_set{}() should be observable to the RMW
70ops. That is:
71
72 C atomic-set
73
74 {
75 atomic_set(v, 1);
76 }
77
78 P1(atomic_t *v)
79 {
80 atomic_add_unless(v, 1, 0);
81 }
82
83 P2(atomic_t *v)
84 {
85 atomic_set(v, 0);
86 }
87
88 exists
89 (v=2)
90
91In this case we would expect the atomic_set() from CPU1 to either happen
92before the atomic_add_unless(), in which case that latter one would no-op, or
93_after_ in which case we'd overwrite its result. In no case is "2" a valid
94outcome.
95
96This is typically true on 'normal' platforms, where a regular competing STORE
97will invalidate a LL/SC or fail a CMPXCHG.
98
99The obvious case where this is not so is when we need to implement atomic ops
100with a lock:
101
102 CPU0 CPU1
103
104 atomic_add_unless(v, 1, 0);
105 lock();
106 ret = READ_ONCE(v->counter); // == 1
107 atomic_set(v, 0);
108 if (ret != u) WRITE_ONCE(v->counter, 0);
109 WRITE_ONCE(v->counter, ret + 1);
110 unlock();
111
112the typical solution is to then implement atomic_set{}() with atomic_xchg().
113
114
115RMW ops:
116
117These come in various forms:
118
119 - plain operations without return value: atomic_{}()
120
121 - operations which return the modified value: atomic_{}_return()
122
123 these are limited to the arithmetic operations because those are
124 reversible. Bitops are irreversible and therefore the modified value
125 is of dubious utility.
126
127 - operations which return the original value: atomic_fetch_{}()
128
129 - swap operations: xchg(), cmpxchg() and try_cmpxchg()
130
131 - misc; the special purpose operations that are commonly used and would,
132 given the interface, normally be implemented using (try_)cmpxchg loops but
133 are time critical and can, (typically) on LL/SC architectures, be more
134 efficiently implemented.
135
136All these operations are SMP atomic; that is, the operations (for a single
137atomic variable) can be fully ordered and no intermediate state is lost or
138visible.
139
140
141ORDERING (go read memory-barriers.txt first)
142--------
143
144The rule of thumb:
145
146 - non-RMW operations are unordered;
147
148 - RMW operations that have no return value are unordered;
149
150 - RMW operations that have a return value are fully ordered;
151
152 - RMW operations that are conditional are unordered on FAILURE,
153 otherwise the above rules apply.
154
155Except of course when an operation has an explicit ordering like:
156
157 {}_relaxed: unordered
158 {}_acquire: the R of the RMW (or atomic_read) is an ACQUIRE
159 {}_release: the W of the RMW (or atomic_set) is a RELEASE
160
161Where 'unordered' is against other memory locations. Address dependencies are
162not defeated.
163
164Fully ordered primitives are ordered against everything prior and everything
165subsequent. Therefore a fully ordered primitive is like having an smp_mb()
166before and an smp_mb() after the primitive.
167
168
169The barriers:
170
171 smp_mb__{before,after}_atomic()
172
173only apply to the RMW ops and can be used to augment/upgrade the ordering
174inherent to the used atomic op. These barriers provide a full smp_mb().
175
176These helper barriers exist because architectures have varying implicit
177ordering on their SMP atomic primitives. For example our TSO architectures
178provide full ordered atomics and these barriers are no-ops.
179
180Thus:
181
182 atomic_fetch_add();
183
184is equivalent to:
185
186 smp_mb__before_atomic();
187 atomic_fetch_add_relaxed();
188 smp_mb__after_atomic();
189
190However the atomic_fetch_add() might be implemented more efficiently.
191
192Further, while something like:
193
194 smp_mb__before_atomic();
195 atomic_dec(&X);
196
197is a 'typical' RELEASE pattern, the barrier is strictly stronger than
198a RELEASE. Similarly for something like:
199
200 atomic_inc(&X);
201 smp_mb__after_atomic();
202
203is an ACQUIRE pattern (though very much not typical), but again the barrier is
204strictly stronger than ACQUIRE. As illustrated:
205
206 C strong-acquire
207
208 {
209 }
210
211 P1(int *x, atomic_t *y)
212 {
213 r0 = READ_ONCE(*x);
214 smp_rmb();
215 r1 = atomic_read(y);
216 }
217
218 P2(int *x, atomic_t *y)
219 {
220 atomic_inc(y);
221 smp_mb__after_atomic();
222 WRITE_ONCE(*x, 1);
223 }
224
225 exists
226 (r0=1 /\ r1=0)
227
228This should not happen; but a hypothetical atomic_inc_acquire() --
229(void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
230since then:
231
232 P1 P2
233
234 t = LL.acq *y (0)
235 t++;
236 *x = 1;
237 r0 = *x (1)
238 RMB
239 r1 = *y (0)
240 SC *y, t;
241
242is allowed.
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt
new file mode 100644
index 000000000000..bdf1423d5f99
--- /dev/null
+++ b/Documentation/locking/crossrelease.txt
@@ -0,0 +1,874 @@
1Crossrelease
2============
3
4Started by Byungchul Park <byungchul.park@lge.com>
5
6Contents:
7
8 (*) Background
9
10 - What causes deadlock
11 - How lockdep works
12
13 (*) Limitation
14
15 - Limit lockdep
16 - Pros from the limitation
17 - Cons from the limitation
18 - Relax the limitation
19
20 (*) Crossrelease
21
22 - Introduce crossrelease
23 - Introduce commit
24
25 (*) Implementation
26
27 - Data structures
28 - How crossrelease works
29
30 (*) Optimizations
31
32 - Avoid duplication
33 - Lockless for hot paths
34
35 (*) APPENDIX A: What lockdep does to work aggresively
36
37 (*) APPENDIX B: How to avoid adding false dependencies
38
39
40==========
41Background
42==========
43
44What causes deadlock
45--------------------
46
47A deadlock occurs when a context is waiting for an event to happen,
48which is impossible because another (or the) context who can trigger the
49event is also waiting for another (or the) event to happen, which is
50also impossible due to the same reason.
51
52For example:
53
54 A context going to trigger event C is waiting for event A to happen.
55 A context going to trigger event A is waiting for event B to happen.
56 A context going to trigger event B is waiting for event C to happen.
57
58A deadlock occurs when these three wait operations run at the same time,
59because event C cannot be triggered if event A does not happen, which in
60turn cannot be triggered if event B does not happen, which in turn
61cannot be triggered if event C does not happen. After all, no event can
62be triggered since any of them never meets its condition to wake up.
63
64A dependency might exist between two waiters and a deadlock might happen
65due to an incorrect releationship between dependencies. Thus, we must
66define what a dependency is first. A dependency exists between them if:
67
68 1. There are two waiters waiting for each event at a given time.
69 2. The only way to wake up each waiter is to trigger its event.
70 3. Whether one can be woken up depends on whether the other can.
71
72Each wait in the example creates its dependency like:
73
74 Event C depends on event A.
75 Event A depends on event B.
76 Event B depends on event C.
77
78 NOTE: Precisely speaking, a dependency is one between whether a
79 waiter for an event can be woken up and whether another waiter for
80 another event can be woken up. However from now on, we will describe
81 a dependency as if it's one between an event and another event for
82 simplicity.
83
84And they form circular dependencies like:
85
86 -> C -> A -> B -
87 / \
88 \ /
89 ----------------
90
91 where 'A -> B' means that event A depends on event B.
92
93Such circular dependencies lead to a deadlock since no waiter can meet
94its condition to wake up as described.
95
96CONCLUSION
97
98Circular dependencies cause a deadlock.
99
100
101How lockdep works
102-----------------
103
104Lockdep tries to detect a deadlock by checking dependencies created by
105lock operations, acquire and release. Waiting for a lock corresponds to
106waiting for an event, and releasing a lock corresponds to triggering an
107event in the previous section.
108
109In short, lockdep does:
110
111 1. Detect a new dependency.
112 2. Add the dependency into a global graph.
113 3. Check if that makes dependencies circular.
114 4. Report a deadlock or its possibility if so.
115
116For example, consider a graph built by lockdep that looks like:
117
118 A -> B -
119 \
120 -> E
121 /
122 C -> D -
123
124 where A, B,..., E are different lock classes.
125
126Lockdep will add a dependency into the graph on detection of a new
127dependency. For example, it will add a dependency 'E -> C' when a new
128dependency between lock E and lock C is detected. Then the graph will be:
129
130 A -> B -
131 \
132 -> E -
133 / \
134 -> C -> D - \
135 / /
136 \ /
137 ------------------
138
139 where A, B,..., E are different lock classes.
140
141This graph contains a subgraph which demonstrates circular dependencies:
142
143 -> E -
144 / \
145 -> C -> D - \
146 / /
147 \ /
148 ------------------
149
150 where C, D and E are different lock classes.
151
152This is the condition under which a deadlock might occur. Lockdep
153reports it on detection after adding a new dependency. This is the way
154how lockdep works.
155
156CONCLUSION
157
158Lockdep detects a deadlock or its possibility by checking if circular
159dependencies were created after adding each new dependency.
160
161
162==========
163Limitation
164==========
165
166Limit lockdep
167-------------
168
169Limiting lockdep to work on only typical locks e.g. spin locks and
170mutexes, which are released within the acquire context, the
171implementation becomes simple but its capacity for detection becomes
172limited. Let's check pros and cons in next section.
173
174
175Pros from the limitation
176------------------------
177
178Given the limitation, when acquiring a lock, locks in a held_locks
179cannot be released if the context cannot acquire it so has to wait to
180acquire it, which means all waiters for the locks in the held_locks are
181stuck. It's an exact case to create dependencies between each lock in
182the held_locks and the lock to acquire.
183
184For example:
185
186 CONTEXT X
187 ---------
188 acquire A
189 acquire B /* Add a dependency 'A -> B' */
190 release B
191 release A
192
193 where A and B are different lock classes.
194
195When acquiring lock A, the held_locks of CONTEXT X is empty thus no
196dependency is added. But when acquiring lock B, lockdep detects and adds
197a new dependency 'A -> B' between lock A in the held_locks and lock B.
198They can be simply added whenever acquiring each lock.
199
200And data required by lockdep exists in a local structure, held_locks
201embedded in task_struct. Forcing to access the data within the context,
202lockdep can avoid racy problems without explicit locks while handling
203the local data.
204
205Lastly, lockdep only needs to keep locks currently being held, to build
206a dependency graph. However, relaxing the limitation, it needs to keep
207even locks already released, because a decision whether they created
208dependencies might be long-deferred.
209
210To sum up, we can expect several advantages from the limitation:
211
212 1. Lockdep can easily identify a dependency when acquiring a lock.
213 2. Races are avoidable while accessing local locks in a held_locks.
214 3. Lockdep only needs to keep locks currently being held.
215
216CONCLUSION
217
218Given the limitation, the implementation becomes simple and efficient.
219
220
221Cons from the limitation
222------------------------
223
224Given the limitation, lockdep is applicable only to typical locks. For
225example, page locks for page access or completions for synchronization
226cannot work with lockdep.
227
228Can we detect deadlocks below, under the limitation?
229
230Example 1:
231
232 CONTEXT X CONTEXT Y CONTEXT Z
233 --------- --------- ----------
234 mutex_lock A
235 lock_page B
236 lock_page B
237 mutex_lock A /* DEADLOCK */
238 unlock_page B held by X
239 unlock_page B
240 mutex_unlock A
241 mutex_unlock A
242
243 where A and B are different lock classes.
244
245No, we cannot.
246
247Example 2:
248
249 CONTEXT X CONTEXT Y
250 --------- ---------
251 mutex_lock A
252 mutex_lock A
253 wait_for_complete B /* DEADLOCK */
254 complete B
255 mutex_unlock A
256 mutex_unlock A
257
258 where A is a lock class and B is a completion variable.
259
260No, we cannot.
261
262CONCLUSION
263
264Given the limitation, lockdep cannot detect a deadlock or its
265possibility caused by page locks or completions.
266
267
268Relax the limitation
269--------------------
270
271Under the limitation, things to create dependencies are limited to
272typical locks. However, synchronization primitives like page locks and
273completions, which are allowed to be released in any context, also
274create dependencies and can cause a deadlock. So lockdep should track
275these locks to do a better job. We have to relax the limitation for
276these locks to work with lockdep.
277
278Detecting dependencies is very important for lockdep to work because
279adding a dependency means adding an opportunity to check whether it
280causes a deadlock. The more lockdep adds dependencies, the more it
281thoroughly works. Thus Lockdep has to do its best to detect and add as
282many true dependencies into a graph as possible.
283
284For example, considering only typical locks, lockdep builds a graph like:
285
286 A -> B -
287 \
288 -> E
289 /
290 C -> D -
291
292 where A, B,..., E are different lock classes.
293
294On the other hand, under the relaxation, additional dependencies might
295be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
296added thanks to the relaxation, the graph will be:
297
298 A -> B -
299 \
300 -> E -> GX
301 /
302 FX -> C -> D -
303
304 where A, B,..., E, FX and GX are different lock classes, and a suffix
305 'X' is added on non-typical locks.
306
307The latter graph gives us more chances to check circular dependencies
308than the former. However, it might suffer performance degradation since
309relaxing the limitation, with which design and implementation of lockdep
310can be efficient, might introduce inefficiency inevitably. So lockdep
311should provide two options, strong detection and efficient detection.
312
313Choosing efficient detection:
314
315 Lockdep works with only locks restricted to be released within the
316 acquire context. However, lockdep works efficiently.
317
318Choosing strong detection:
319
320 Lockdep works with all synchronization primitives. However, lockdep
321 suffers performance degradation.
322
323CONCLUSION
324
325Relaxing the limitation, lockdep can add additional dependencies giving
326additional opportunities to check circular dependencies.
327
328
329============
330Crossrelease
331============
332
333Introduce crossrelease
334----------------------
335
336In order to allow lockdep to handle additional dependencies by what
337might be released in any context, namely 'crosslock', we have to be able
338to identify those created by crosslocks. The proposed 'crossrelease'
339feature provoides a way to do that.
340
341Crossrelease feature has to do:
342
343 1. Identify dependencies created by crosslocks.
344 2. Add the dependencies into a dependency graph.
345
346That's all. Once a meaningful dependency is added into graph, then
347lockdep would work with the graph as it did. The most important thing
348crossrelease feature has to do is to correctly identify and add true
349dependencies into the global graph.
350
351A dependency e.g. 'A -> B' can be identified only in the A's release
352context because a decision required to identify the dependency can be
353made only in the release context. That is to decide whether A can be
354released so that a waiter for A can be woken up. It cannot be made in
355other than the A's release context.
356
357It's no matter for typical locks because each acquire context is same as
358its release context, thus lockdep can decide whether a lock can be
359released in the acquire context. However for crosslocks, lockdep cannot
360make the decision in the acquire context but has to wait until the
361release context is identified.
362
363Therefore, deadlocks by crosslocks cannot be detected just when it
364happens, because those cannot be identified until the crosslocks are
365released. However, deadlock possibilities can be detected and it's very
366worth. See 'APPENDIX A' section to check why.
367
368CONCLUSION
369
370Using crossrelease feature, lockdep can work with what might be released
371in any context, namely crosslock.
372
373
374Introduce commit
375----------------
376
377Since crossrelease defers the work adding true dependencies of
378crosslocks until they are actually released, crossrelease has to queue
379all acquisitions which might create dependencies with the crosslocks.
380Then it identifies dependencies using the queued data in batches at a
381proper time. We call it 'commit'.
382
383There are four types of dependencies:
384
3851. TT type: 'typical lock A -> typical lock B'
386
387 Just when acquiring B, lockdep can see it's in the A's release
388 context. So the dependency between A and B can be identified
389 immediately. Commit is unnecessary.
390
3912. TC type: 'typical lock A -> crosslock BX'
392
393 Just when acquiring BX, lockdep can see it's in the A's release
394 context. So the dependency between A and BX can be identified
395 immediately. Commit is unnecessary, too.
396
3973. CT type: 'crosslock AX -> typical lock B'
398
399 When acquiring B, lockdep cannot identify the dependency because
400 there's no way to know if it's in the AX's release context. It has
401 to wait until the decision can be made. Commit is necessary.
402
4034. CC type: 'crosslock AX -> crosslock BX'
404
405 When acquiring BX, lockdep cannot identify the dependency because
406 there's no way to know if it's in the AX's release context. It has
407 to wait until the decision can be made. Commit is necessary.
408 But, handling CC type is not implemented yet. It's a future work.
409
410Lockdep can work without commit for typical locks, but commit step is
411necessary once crosslocks are involved. Introducing commit, lockdep
412performs three steps. What lockdep does in each step is:
413
4141. Acquisition: For typical locks, lockdep does what it originally did
415 and queues the lock so that CT type dependencies can be checked using
416 it at the commit step. For crosslocks, it saves data which will be
417 used at the commit step and increases a reference count for it.
418
4192. Commit: No action is reauired for typical locks. For crosslocks,
420 lockdep adds CT type dependencies using the data saved at the
421 acquisition step.
422
4233. Release: No changes are required for typical locks. When a crosslock
424 is released, it decreases a reference count for it.
425
426CONCLUSION
427
428Crossrelease introduces commit step to handle dependencies of crosslocks
429in batches at a proper time.
430
431
432==============
433Implementation
434==============
435
436Data structures
437---------------
438
439Crossrelease introduces two main data structures.
440
4411. hist_lock
442
443 This is an array embedded in task_struct, for keeping lock history so
444 that dependencies can be added using them at the commit step. Since
445 it's local data, it can be accessed locklessly in the owner context.
446 The array is filled at the acquisition step and consumed at the
447 commit step. And it's managed in circular manner.
448
4492. cross_lock
450
451 One per lockdep_map exists. This is for keeping data of crosslocks
452 and used at the commit step.
453
454
455How crossrelease works
456----------------------
457
458It's the key of how crossrelease works, to defer necessary works to an
459appropriate point in time and perform in at once at the commit step.
460Let's take a look with examples step by step, starting from how lockdep
461works without crossrelease for typical locks.
462
463 acquire A /* Push A onto held_locks */
464 acquire B /* Push B onto held_locks and add 'A -> B' */
465 acquire C /* Push C onto held_locks and add 'B -> C' */
466 release C /* Pop C from held_locks */
467 release B /* Pop B from held_locks */
468 release A /* Pop A from held_locks */
469
470 where A, B and C are different lock classes.
471
472 NOTE: This document assumes that readers already understand how
473 lockdep works without crossrelease thus omits details. But there's
474 one thing to note. Lockdep pretends to pop a lock from held_locks
475 when releasing it. But it's subtly different from the original pop
476 operation because lockdep allows other than the top to be poped.
477
478In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
479dependency every time acquiring a lock.
480
481After adding 'A -> B', a dependency graph will be:
482
483 A -> B
484
485 where A and B are different lock classes.
486
487And after adding 'B -> C', the graph will be:
488
489 A -> B -> C
490
491 where A, B and C are different lock classes.
492
493Let's performs commit step even for typical locks to add dependencies.
494Of course, commit step is not necessary for them, however, it would work
495well because this is a more general way.
496
497 acquire A
498 /*
499 * Queue A into hist_locks
500 *
501 * In hist_locks: A
502 * In graph: Empty
503 */
504
505 acquire B
506 /*
507 * Queue B into hist_locks
508 *
509 * In hist_locks: A, B
510 * In graph: Empty
511 */
512
513 acquire C
514 /*
515 * Queue C into hist_locks
516 *
517 * In hist_locks: A, B, C
518 * In graph: Empty
519 */
520
521 commit C
522 /*
523 * Add 'C -> ?'
524 * Answer the following to decide '?'
525 * What has been queued since acquire C: Nothing
526 *
527 * In hist_locks: A, B, C
528 * In graph: Empty
529 */
530
531 release C
532
533 commit B
534 /*
535 * Add 'B -> ?'
536 * Answer the following to decide '?'
537 * What has been queued since acquire B: C
538 *
539 * In hist_locks: A, B, C
540 * In graph: 'B -> C'
541 */
542
543 release B
544
545 commit A
546 /*
547 * Add 'A -> ?'
548 * Answer the following to decide '?'
549 * What has been queued since acquire A: B, C
550 *
551 * In hist_locks: A, B, C
552 * In graph: 'B -> C', 'A -> B', 'A -> C'
553 */
554
555 release A
556
557 where A, B and C are different lock classes.
558
559In this case, dependencies are added at the commit step as described.
560
561After commits for A, B and C, the graph will be:
562
563 A -> B -> C
564
565 where A, B and C are different lock classes.
566
567 NOTE: A dependency 'A -> C' is optimized out.
568
569We can see the former graph built without commit step is same as the
570latter graph built using commit steps. Of course the former way leads to
571earlier finish for building the graph, which means we can detect a
572deadlock or its possibility sooner. So the former way would be prefered
573when possible. But we cannot avoid using the latter way for crosslocks.
574
575Let's look at how commit steps work for crosslocks. In this case, the
576commit step is performed only on crosslock AX as real. And it assumes
577that the AX release context is different from the AX acquire context.
578
579 BX RELEASE CONTEXT BX ACQUIRE CONTEXT
580 ------------------ ------------------
581 acquire A
582 /*
583 * Push A onto held_locks
584 * Queue A into hist_locks
585 *
586 * In held_locks: A
587 * In hist_locks: A
588 * In graph: Empty
589 */
590
591 acquire BX
592 /*
593 * Add 'the top of held_locks -> BX'
594 *
595 * In held_locks: A
596 * In hist_locks: A
597 * In graph: 'A -> BX'
598 */
599
600 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
601 It must be guaranteed that the following operations are seen after
602 acquiring BX globally. It can be done by things like barrier.
603 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
604
605 acquire C
606 /*
607 * Push C onto held_locks
608 * Queue C into hist_locks
609 *
610 * In held_locks: C
611 * In hist_locks: C
612 * In graph: 'A -> BX'
613 */
614
615 release C
616 /*
617 * Pop C from held_locks
618 *
619 * In held_locks: Empty
620 * In hist_locks: C
621 * In graph: 'A -> BX'
622 */
623 acquire D
624 /*
625 * Push D onto held_locks
626 * Queue D into hist_locks
627 * Add 'the top of held_locks -> D'
628 *
629 * In held_locks: A, D
630 * In hist_locks: A, D
631 * In graph: 'A -> BX', 'A -> D'
632 */
633 acquire E
634 /*
635 * Push E onto held_locks
636 * Queue E into hist_locks
637 *
638 * In held_locks: E
639 * In hist_locks: C, E
640 * In graph: 'A -> BX', 'A -> D'
641 */
642
643 release E
644 /*
645 * Pop E from held_locks
646 *
647 * In held_locks: Empty
648 * In hist_locks: D, E
649 * In graph: 'A -> BX', 'A -> D'
650 */
651 release D
652 /*
653 * Pop D from held_locks
654 *
655 * In held_locks: A
656 * In hist_locks: A, D
657 * In graph: 'A -> BX', 'A -> D'
658 */
659 commit BX
660 /*
661 * Add 'BX -> ?'
662 * What has been queued since acquire BX: C, E
663 *
664 * In held_locks: Empty
665 * In hist_locks: D, E
666 * In graph: 'A -> BX', 'A -> D',
667 * 'BX -> C', 'BX -> E'
668 */
669
670 release BX
671 /*
672 * In held_locks: Empty
673 * In hist_locks: D, E
674 * In graph: 'A -> BX', 'A -> D',
675 * 'BX -> C', 'BX -> E'
676 */
677 release A
678 /*
679 * Pop A from held_locks
680 *
681 * In held_locks: Empty
682 * In hist_locks: A, D
683 * In graph: 'A -> BX', 'A -> D',
684 * 'BX -> C', 'BX -> E'
685 */
686
687 where A, BX, C,..., E are different lock classes, and a suffix 'X' is
688 added on crosslocks.
689
690Crossrelease considers all acquisitions after acqiuring BX are
691candidates which might create dependencies with BX. True dependencies
692will be determined when identifying the release context of BX. Meanwhile,
693all typical locks are queued so that they can be used at the commit step.
694And then two dependencies 'BX -> C' and 'BX -> E' are added at the
695commit step when identifying the release context.
696
697The final graph will be, with crossrelease:
698
699 -> C
700 /
701 -> BX -
702 / \
703 A - -> E
704 \
705 -> D
706
707 where A, BX, C,..., E are different lock classes, and a suffix 'X' is
708 added on crosslocks.
709
710However, the final graph will be, without crossrelease:
711
712 A -> D
713
714 where A and D are different lock classes.
715
716The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
717'BX -> E' giving additional opportunities to check if they cause
718deadlocks. This way lockdep can detect a deadlock or its possibility
719caused by crosslocks.
720
721CONCLUSION
722
723We checked how crossrelease works with several examples.
724
725
726=============
727Optimizations
728=============
729
730Avoid duplication
731-----------------
732
733Crossrelease feature uses a cache like what lockdep already uses for
734dependency chains, but this time it's for caching CT type dependencies.
735Once that dependency is cached, the same will never be added again.
736
737
738Lockless for hot paths
739----------------------
740
741To keep all locks for later use at the commit step, crossrelease adopts
742a local array embedded in task_struct, which makes access to the data
743lockless by forcing it to happen only within the owner context. It's
744like how lockdep handles held_locks. Lockless implmentation is important
745since typical locks are very frequently acquired and released.
746
747
748=================================================
749APPENDIX A: What lockdep does to work aggresively
750=================================================
751
752A deadlock actually occurs when all wait operations creating circular
753dependencies run at the same time. Even though they don't, a potential
754deadlock exists if the problematic dependencies exist. Thus it's
755meaningful to detect not only an actual deadlock but also its potential
756possibility. The latter is rather valuable. When a deadlock occurs
757actually, we can identify what happens in the system by some means or
758other even without lockdep. However, there's no way to detect possiblity
759without lockdep unless the whole code is parsed in head. It's terrible.
760Lockdep does the both, and crossrelease only focuses on the latter.
761
762Whether or not a deadlock actually occurs depends on several factors.
763For example, what order contexts are switched in is a factor. Assuming
764circular dependencies exist, a deadlock would occur when contexts are
765switched so that all wait operations creating the dependencies run
766simultaneously. Thus to detect a deadlock possibility even in the case
767that it has not occured yet, lockdep should consider all possible
768combinations of dependencies, trying to:
769
7701. Use a global dependency graph.
771
772 Lockdep combines all dependencies into one global graph and uses them,
773 regardless of which context generates them or what order contexts are
774 switched in. Aggregated dependencies are only considered so they are
775 prone to be circular if a problem exists.
776
7772. Check dependencies between classes instead of instances.
778
779 What actually causes a deadlock are instances of lock. However,
780 lockdep checks dependencies between classes instead of instances.
781 This way lockdep can detect a deadlock which has not happened but
782 might happen in future by others but the same class.
783
7843. Assume all acquisitions lead to waiting.
785
786 Although locks might be acquired without waiting which is essential
787 to create dependencies, lockdep assumes all acquisitions lead to
788 waiting since it might be true some time or another.
789
790CONCLUSION
791
792Lockdep detects not only an actual deadlock but also its possibility,
793and the latter is more valuable.
794
795
796==================================================
797APPENDIX B: How to avoid adding false dependencies
798==================================================
799
800Remind what a dependency is. A dependency exists if:
801
802 1. There are two waiters waiting for each event at a given time.
803 2. The only way to wake up each waiter is to trigger its event.
804 3. Whether one can be woken up depends on whether the other can.
805
806For example:
807
808 acquire A
809 acquire B /* A dependency 'A -> B' exists */
810 release B
811 release A
812
813 where A and B are different lock classes.
814
815A depedency 'A -> B' exists since:
816
817 1. A waiter for A and a waiter for B might exist when acquiring B.
818 2. Only way to wake up each is to release what it waits for.
819 3. Whether the waiter for A can be woken up depends on whether the
820 other can. IOW, TASK X cannot release A if it fails to acquire B.
821
822For another example:
823
824 TASK X TASK Y
825 ------ ------
826 acquire AX
827 acquire B /* A dependency 'AX -> B' exists */
828 release B
829 release AX held by Y
830
831 where AX and B are different lock classes, and a suffix 'X' is added
832 on crosslocks.
833
834Even in this case involving crosslocks, the same rule can be applied. A
835depedency 'AX -> B' exists since:
836
837 1. A waiter for AX and a waiter for B might exist when acquiring B.
838 2. Only way to wake up each is to release what it waits for.
839 3. Whether the waiter for AX can be woken up depends on whether the
840 other can. IOW, TASK X cannot release AX if it fails to acquire B.
841
842Let's take a look at more complicated example:
843
844 TASK X TASK Y
845 ------ ------
846 acquire B
847 release B
848 fork Y
849 acquire AX
850 acquire C /* A dependency 'AX -> C' exists */
851 release C
852 release AX held by Y
853
854 where AX, B and C are different lock classes, and a suffix 'X' is
855 added on crosslocks.
856
857Does a dependency 'AX -> B' exist? Nope.
858
859Two waiters are essential to create a dependency. However, waiters for
860AX and B to create 'AX -> B' cannot exist at the same time in this
861example. Thus the dependency 'AX -> B' cannot be created.
862
863It would be ideal if the full set of true ones can be considered. But
864we can ensure nothing but what actually happened. Relying on what
865actually happens at runtime, we can anyway add only true ones, though
866they might be a subset of true ones. It's similar to how lockdep works
867for typical locks. There might be more true dependencies than what
868lockdep has detected in runtime. Lockdep has no choice but to rely on
869what actually happens. Crossrelease also relies on it.
870
871CONCLUSION
872
873Relying on what actually happens, lockdep can avoid adding false
874dependencies.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index e2ee0a1c299a..b759a60624fd 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -498,11 +498,11 @@ And a couple of implicit varieties:
498 This means that ACQUIRE acts as a minimal "acquire" operation and 498 This means that ACQUIRE acts as a minimal "acquire" operation and
499 RELEASE acts as a minimal "release" operation. 499 RELEASE acts as a minimal "release" operation.
500 500
501A subset of the atomic operations described in core-api/atomic_ops.rst have 501A subset of the atomic operations described in atomic_t.txt have ACQUIRE and
502ACQUIRE and RELEASE variants in addition to fully-ordered and relaxed (no 502RELEASE variants in addition to fully-ordered and relaxed (no barrier
503barrier semantics) definitions. For compound atomics performing both a load 503semantics) definitions. For compound atomics performing both a load and a
504and a store, ACQUIRE semantics apply only to the load and RELEASE semantics 504store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
505apply only to the store portion of the operation. 505only to the store portion of the operation.
506 506
507Memory barriers are only required where there's a possibility of interaction 507Memory barriers are only required where there's a possibility of interaction
508between two CPUs or between a CPU and a device. If it can be guaranteed that 508between two CPUs or between a CPU and a device. If it can be guaranteed that
@@ -1883,8 +1883,7 @@ There are some more advanced barrier functions:
1883 This makes sure that the death mark on the object is perceived to be set 1883 This makes sure that the death mark on the object is perceived to be set
1884 *before* the reference counter is decremented. 1884 *before* the reference counter is decremented.
1885 1885
1886 See Documentation/core-api/atomic_ops.rst for more information. See the 1886 See Documentation/atomic_{t,bitops}.txt for more information.
1887 "Atomic operations" subsection for information on where to use these.
1888 1887
1889 1888
1890 (*) lockless_dereference(); 1889 (*) lockless_dereference();
@@ -1989,10 +1988,7 @@ for each construct. These operations all imply certain barriers:
1989 ACQUIRE operation has completed. 1988 ACQUIRE operation has completed.
1990 1989
1991 Memory operations issued before the ACQUIRE may be completed after 1990 Memory operations issued before the ACQUIRE may be completed after
1992 the ACQUIRE operation has completed. An smp_mb__before_spinlock(), 1991 the ACQUIRE operation has completed.
1993 combined with a following ACQUIRE, orders prior stores against
1994 subsequent loads and stores. Note that this is weaker than smp_mb()!
1995 The smp_mb__before_spinlock() primitive is free on many architectures.
1996 1992
1997 (2) RELEASE operation implication: 1993 (2) RELEASE operation implication:
1998 1994
@@ -2510,88 +2506,7 @@ operations are noted specially as some of them imply full memory barriers and
2510some don't, but they're very heavily relied on as a group throughout the 2506some don't, but they're very heavily relied on as a group throughout the
2511kernel. 2507kernel.
2512 2508
2513Any atomic operation that modifies some state in memory and returns information 2509See Documentation/atomic_t.txt for more information.
2514about the state (old or new) implies an SMP-conditional general memory barrier
2515(smp_mb()) on each side of the actual operation (with the exception of
2516explicit lock operations, described later). These include:
2517
2518 xchg();
2519 atomic_xchg(); atomic_long_xchg();
2520 atomic_inc_return(); atomic_long_inc_return();
2521 atomic_dec_return(); atomic_long_dec_return();
2522 atomic_add_return(); atomic_long_add_return();
2523 atomic_sub_return(); atomic_long_sub_return();
2524 atomic_inc_and_test(); atomic_long_inc_and_test();
2525 atomic_dec_and_test(); atomic_long_dec_and_test();
2526 atomic_sub_and_test(); atomic_long_sub_and_test();
2527 atomic_add_negative(); atomic_long_add_negative();
2528 test_and_set_bit();
2529 test_and_clear_bit();
2530 test_and_change_bit();
2531
2532 /* when succeeds */
2533 cmpxchg();
2534 atomic_cmpxchg(); atomic_long_cmpxchg();
2535 atomic_add_unless(); atomic_long_add_unless();
2536
2537These are used for such things as implementing ACQUIRE-class and RELEASE-class
2538operations and adjusting reference counters towards object destruction, and as
2539such the implicit memory barrier effects are necessary.
2540
2541
2542The following operations are potential problems as they do _not_ imply memory
2543barriers, but might be used for implementing such things as RELEASE-class
2544operations:
2545
2546 atomic_set();
2547 set_bit();
2548 clear_bit();
2549 change_bit();
2550
2551With these the appropriate explicit memory barrier should be used if necessary
2552(smp_mb__before_atomic() for instance).
2553
2554
2555The following also do _not_ imply memory barriers, and so may require explicit
2556memory barriers under some circumstances (smp_mb__before_atomic() for
2557instance):
2558
2559 atomic_add();
2560 atomic_sub();
2561 atomic_inc();
2562 atomic_dec();
2563
2564If they're used for statistics generation, then they probably don't need memory
2565barriers, unless there's a coupling between statistical data.
2566
2567If they're used for reference counting on an object to control its lifetime,
2568they probably don't need memory barriers because either the reference count
2569will be adjusted inside a locked section, or the caller will already hold
2570sufficient references to make the lock, and thus a memory barrier unnecessary.
2571
2572If they're used for constructing a lock of some description, then they probably
2573do need memory barriers as a lock primitive generally has to do things in a
2574specific order.
2575
2576Basically, each usage case has to be carefully considered as to whether memory
2577barriers are needed or not.
2578
2579The following operations are special locking primitives:
2580
2581 test_and_set_bit_lock();
2582 clear_bit_unlock();
2583 __clear_bit_unlock();
2584
2585These implement ACQUIRE-class and RELEASE-class operations. These should be
2586used in preference to other operations when implementing locking primitives,
2587because their implementations can be optimised on many architectures.
2588
2589[!] Note that special memory barrier primitives are available for these
2590situations because on some CPUs the atomic instructions used imply full memory
2591barriers, and so barrier instructions are superfluous in conjunction with them,
2592and in such cases the special barrier primitives will be no-ops.
2593
2594See Documentation/core-api/atomic_ops.rst for more information.
2595 2510
2596 2511
2597ACCESSING DEVICES 2512ACCESSING DEVICES
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index b83dfa1c0602..ab16efe0c79d 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -149,6 +149,26 @@ static_branch_inc(), will change the branch back to true. Likewise, if the
149key is initialized false, a 'static_branch_inc()', will change the branch to 149key is initialized false, a 'static_branch_inc()', will change the branch to
150true. And then a 'static_branch_dec()', will again make the branch false. 150true. And then a 'static_branch_dec()', will again make the branch false.
151 151
152The state and the reference count can be retrieved with 'static_key_enabled()'
153and 'static_key_count()'. In general, if you use these functions, they
154should be protected with the same mutex used around the enable/disable
155or increment/decrement function.
156
157Note that switching branches results in some locks being taken,
158particularly the CPU hotplug lock (in order to avoid races against
159CPUs being brought in the kernel whilst the kernel is getting
160patched). Calling the static key API from within a hotplug notifier is
161thus a sure deadlock recipe. In order to still allow use of the
162functionnality, the following functions are provided:
163
164 static_key_enable_cpuslocked()
165 static_key_disable_cpuslocked()
166 static_branch_enable_cpuslocked()
167 static_branch_disable_cpuslocked()
168
169These functions are *not* general purpose, and must only be used when
170you really know that you're in the above context, and no other.
171
152Where an array of keys is required, it can be defined as:: 172Where an array of keys is required, it can be defined as::
153 173
154 DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); 174 DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 38310dcd6620..bc80fc0e210f 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -1956,10 +1956,7 @@ MMIO 쓰기 배리어
1956 뒤에 완료됩니다. 1956 뒤에 완료됩니다.
1957 1957
1958 ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에 1958 ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에
1959 완료될 수 있습니다. smp_mb__before_spinlock() 뒤에 ACQUIRE 가 실행되는 1959 완료될 수 있습니다.
1960 코드 블록은 블록 앞의 스토어를 블록 뒤의 로드와 스토어에 대해 순서
1961 맞춥니다. 이건 smp_mb() 보다 완화된 것임을 기억하세요! 많은 아키텍쳐에서
1962 smp_mb__before_spinlock() 은 사실 아무일도 하지 않습니다.
1963 1960
1964 (2) RELEASE 오퍼레이션의 영향: 1961 (2) RELEASE 오퍼레이션의 영향:
1965 1962
diff --git a/arch/Kconfig b/arch/Kconfig
index 21d0089117fe..2520ca5b42eb 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -931,6 +931,18 @@ config STRICT_MODULE_RWX
931config ARCH_WANT_RELAX_ORDER 931config ARCH_WANT_RELAX_ORDER
932 bool 932 bool
933 933
934config ARCH_HAS_REFCOUNT
935 bool
936 help
937 An architecture selects this when it has implemented refcount_t
938 using open coded assembly primitives that provide an optimized
939 refcount_t implementation, possibly at the expense of some full
940 refcount state checks of CONFIG_REFCOUNT_FULL=y.
941
942 The refcount overflow check behavior, however, must be retained.
943 Catching overflows is the primary security concern for protecting
944 against bugs in reference counts.
945
934config REFCOUNT_FULL 946config REFCOUNT_FULL
935 bool "Perform full reference count validation at the expense of speed" 947 bool "Perform full reference count validation at the expense of speed"
936 help 948 help
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index fb01dfb760c2..05a70edd57b6 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -25,18 +25,10 @@
25 : "r" (uaddr), "r"(oparg) \ 25 : "r" (uaddr), "r"(oparg) \
26 : "memory") 26 : "memory")
27 27
28static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) 28static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
29 u32 __user *uaddr)
29{ 30{
30 int op = (encoded_op >> 28) & 7;
31 int cmp = (encoded_op >> 24) & 15;
32 int oparg = (encoded_op << 8) >> 20;
33 int cmparg = (encoded_op << 20) >> 20;
34 int oldval = 0, ret; 31 int oldval = 0, ret;
35 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
36 oparg = 1 << oparg;
37
38 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
39 return -EFAULT;
40 32
41 pagefault_disable(); 33 pagefault_disable();
42 34
@@ -62,17 +54,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
62 54
63 pagefault_enable(); 55 pagefault_enable();
64 56
65 if (!ret) { 57 if (!ret)
66 switch (cmp) { 58 *oval = oldval;
67 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 59
68 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
69 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
70 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
71 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
72 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
73 default: ret = -ENOSYS;
74 }
75 }
76 return ret; 60 return ret;
77} 61}
78 62
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 54b54da6384c..11859287c52a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -123,6 +123,8 @@ static inline void atomic_set(atomic_t *v, int i)
123 atomic_ops_unlock(flags); 123 atomic_ops_unlock(flags);
124} 124}
125 125
126#define atomic_set_release(v, i) atomic_set((v), (i))
127
126#endif 128#endif
127 129
128/* 130/*
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h
index 11e1b1f3acda..eb887dd13e74 100644
--- a/arch/arc/include/asm/futex.h
+++ b/arch/arc/include/asm/futex.h
@@ -73,20 +73,11 @@
73 73
74#endif 74#endif
75 75
76static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 76static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
77 u32 __user *uaddr)
77{ 78{
78 int op = (encoded_op >> 28) & 7;
79 int cmp = (encoded_op >> 24) & 15;
80 int oparg = (encoded_op << 8) >> 20;
81 int cmparg = (encoded_op << 20) >> 20;
82 int oldval = 0, ret; 79 int oldval = 0, ret;
83 80
84 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
85 oparg = 1 << oparg;
86
87 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
88 return -EFAULT;
89
90#ifndef CONFIG_ARC_HAS_LLSC 81#ifndef CONFIG_ARC_HAS_LLSC
91 preempt_disable(); /* to guarantee atomic r-m-w of futex op */ 82 preempt_disable(); /* to guarantee atomic r-m-w of futex op */
92#endif 83#endif
@@ -118,30 +109,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
118 preempt_enable(); 109 preempt_enable();
119#endif 110#endif
120 111
121 if (!ret) { 112 if (!ret)
122 switch (cmp) { 113 *oval = oldval;
123 case FUTEX_OP_CMP_EQ: 114
124 ret = (oldval == cmparg);
125 break;
126 case FUTEX_OP_CMP_NE:
127 ret = (oldval != cmparg);
128 break;
129 case FUTEX_OP_CMP_LT:
130 ret = (oldval < cmparg);
131 break;
132 case FUTEX_OP_CMP_GE:
133 ret = (oldval >= cmparg);
134 break;
135 case FUTEX_OP_CMP_LE:
136 ret = (oldval <= cmparg);
137 break;
138 case FUTEX_OP_CMP_GT:
139 ret = (oldval > cmparg);
140 break;
141 default:
142 ret = -ENOSYS;
143 }
144 }
145 return ret; 115 return ret;
146} 116}
147 117
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 6795368ad023..cc414382dab4 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -128,20 +128,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
128#endif /* !SMP */ 128#endif /* !SMP */
129 129
130static inline int 130static inline int
131futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) 131arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
132{ 132{
133 int op = (encoded_op >> 28) & 7;
134 int cmp = (encoded_op >> 24) & 15;
135 int oparg = (encoded_op << 8) >> 20;
136 int cmparg = (encoded_op << 20) >> 20;
137 int oldval = 0, ret, tmp; 133 int oldval = 0, ret, tmp;
138 134
139 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
140 oparg = 1 << oparg;
141
142 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
143 return -EFAULT;
144
145#ifndef CONFIG_SMP 135#ifndef CONFIG_SMP
146 preempt_disable(); 136 preempt_disable();
147#endif 137#endif
@@ -172,17 +162,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
172 preempt_enable(); 162 preempt_enable();
173#endif 163#endif
174 164
175 if (!ret) { 165 if (!ret)
176 switch (cmp) { 166 *oval = oldval;
177 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 167
178 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
179 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
180 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
181 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
182 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
183 default: ret = -ENOSYS;
184 }
185 }
186 return ret; 168 return ret;
187} 169}
188 170
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index f32b42e8725d..5bb2fd4674e7 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -48,20 +48,10 @@ do { \
48} while (0) 48} while (0)
49 49
50static inline int 50static inline int
51futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) 51arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
52{ 52{
53 int op = (encoded_op >> 28) & 7;
54 int cmp = (encoded_op >> 24) & 15;
55 int oparg = (int)(encoded_op << 8) >> 20;
56 int cmparg = (int)(encoded_op << 20) >> 20;
57 int oldval = 0, ret, tmp; 53 int oldval = 0, ret, tmp;
58 54
59 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
60 oparg = 1U << (oparg & 0x1f);
61
62 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
63 return -EFAULT;
64
65 pagefault_disable(); 55 pagefault_disable();
66 56
67 switch (op) { 57 switch (op) {
@@ -91,17 +81,9 @@ futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
91 81
92 pagefault_enable(); 82 pagefault_enable();
93 83
94 if (!ret) { 84 if (!ret)
95 switch (cmp) { 85 *oval = oldval;
96 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 86
97 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
98 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
99 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
100 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
101 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
102 default: ret = -ENOSYS;
103 }
104 }
105 return ret; 87 return ret;
106} 88}
107 89
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index f445bd7f2b9f..95ad7102b63c 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -310,14 +310,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
310#define arch_read_relax(lock) cpu_relax() 310#define arch_read_relax(lock) cpu_relax()
311#define arch_write_relax(lock) cpu_relax() 311#define arch_write_relax(lock) cpu_relax()
312 312
313/* 313/* See include/linux/spinlock.h */
314 * Accesses appearing in program order before a spin_lock() operation 314#define smp_mb__after_spinlock() smp_mb()
315 * can be reordered with accesses inside the critical section, by virtue
316 * of arch_spin_lock being constructed using acquire semantics.
317 *
318 * In cases where this is problematic (e.g. try_to_wake_up), an
319 * smp_mb__before_spinlock() can restore the required ordering.
320 */
321#define smp_mb__before_spinlock() smp_mb()
322 315
323#endif /* __ASM_SPINLOCK_H */ 316#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 2e1da71e27a4..ab346f5f8820 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,7 +7,8 @@
7#include <asm/errno.h> 7#include <asm/errno.h>
8#include <linux/uaccess.h> 8#include <linux/uaccess.h>
9 9
10extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr); 10extern int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
11 u32 __user *uaddr);
11 12
12static inline int 13static inline int
13futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, 14futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index d155ca9e5098..37f7b2bf7f73 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -186,20 +186,10 @@ static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_o
186/* 186/*
187 * do the futex operations 187 * do the futex operations
188 */ 188 */
189int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 189int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
190{ 190{
191 int op = (encoded_op >> 28) & 7;
192 int cmp = (encoded_op >> 24) & 15;
193 int oparg = (encoded_op << 8) >> 20;
194 int cmparg = (encoded_op << 20) >> 20;
195 int oldval = 0, ret; 191 int oldval = 0, ret;
196 192
197 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
198 oparg = 1 << oparg;
199
200 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
201 return -EFAULT;
202
203 pagefault_disable(); 193 pagefault_disable();
204 194
205 switch (op) { 195 switch (op) {
@@ -225,18 +215,9 @@ int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
225 215
226 pagefault_enable(); 216 pagefault_enable();
227 217
228 if (!ret) { 218 if (!ret)
229 switch (cmp) { 219 *oval = oldval;
230 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
231 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
232 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
233 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
234 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
235 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
236 default: ret = -ENOSYS; break;
237 }
238 }
239 220
240 return ret; 221 return ret;
241 222
242} /* end futex_atomic_op_inuser() */ 223} /* end arch_futex_atomic_op_inuser() */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index a62ba368b27d..fb3dfb2a667e 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -42,6 +42,8 @@ static inline void atomic_set(atomic_t *v, int new)
42 ); 42 );
43} 43}
44 44
45#define atomic_set_release(v, i) atomic_set((v), (i))
46
45/** 47/**
46 * atomic_read - reads a word, atomically 48 * atomic_read - reads a word, atomically
47 * @v: pointer to atomic value 49 * @v: pointer to atomic value
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h
index 7e597f8434da..c607b77c8215 100644
--- a/arch/hexagon/include/asm/futex.h
+++ b/arch/hexagon/include/asm/futex.h
@@ -31,18 +31,9 @@
31 31
32 32
33static inline int 33static inline int
34futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 34arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
35{ 35{
36 int op = (encoded_op >> 28) & 7;
37 int cmp = (encoded_op >> 24) & 15;
38 int oparg = (encoded_op << 8) >> 20;
39 int cmparg = (encoded_op << 20) >> 20;
40 int oldval = 0, ret; 36 int oldval = 0, ret;
41 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
42 oparg = 1 << oparg;
43
44 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
45 return -EFAULT;
46 37
47 pagefault_disable(); 38 pagefault_disable();
48 39
@@ -72,30 +63,9 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
72 63
73 pagefault_enable(); 64 pagefault_enable();
74 65
75 if (!ret) { 66 if (!ret)
76 switch (cmp) { 67 *oval = oldval;
77 case FUTEX_OP_CMP_EQ: 68
78 ret = (oldval == cmparg);
79 break;
80 case FUTEX_OP_CMP_NE:
81 ret = (oldval != cmparg);
82 break;
83 case FUTEX_OP_CMP_LT:
84 ret = (oldval < cmparg);
85 break;
86 case FUTEX_OP_CMP_GE:
87 ret = (oldval >= cmparg);
88 break;
89 case FUTEX_OP_CMP_LE:
90 ret = (oldval <= cmparg);
91 break;
92 case FUTEX_OP_CMP_GT:
93 ret = (oldval > cmparg);
94 break;
95 default:
96 ret = -ENOSYS;
97 }
98 }
99 return ret; 69 return ret;
100} 70}
101 71
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index 76acbcd5c060..6d67dc1eaf2b 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -45,18 +45,9 @@ do { \
45} while (0) 45} while (0)
46 46
47static inline int 47static inline int
48futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) 48arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
49{ 49{
50 int op = (encoded_op >> 28) & 7;
51 int cmp = (encoded_op >> 24) & 15;
52 int oparg = (encoded_op << 8) >> 20;
53 int cmparg = (encoded_op << 20) >> 20;
54 int oldval = 0, ret; 50 int oldval = 0, ret;
55 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
56 oparg = 1 << oparg;
57
58 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
59 return -EFAULT;
60 51
61 pagefault_disable(); 52 pagefault_disable();
62 53
@@ -84,17 +75,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
84 75
85 pagefault_enable(); 76 pagefault_enable();
86 77
87 if (!ret) { 78 if (!ret)
88 switch (cmp) { 79 *oval = oldval;
89 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 80
90 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
91 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
92 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
93 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
94 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
95 default: ret = -ENOSYS;
96 }
97 }
98 return ret; 81 return ret;
99} 82}
100 83
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 6c1380a8a0d4..eee779f26cc4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -37,6 +37,8 @@ static inline int atomic_set(atomic_t *v, int i)
37 return i; 37 return i;
38} 38}
39 39
40#define atomic_set_release(v, i) atomic_set((v), (i))
41
40#define ATOMIC_OP(op, c_op) \ 42#define ATOMIC_OP(op, c_op) \
41static inline void atomic_##op(int i, atomic_t *v) \ 43static inline void atomic_##op(int i, atomic_t *v) \
42{ \ 44{ \
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index 01848f056f43..a9dad9e5e132 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,18 +29,9 @@
29}) 29})
30 30
31static inline int 31static inline int
32futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 32arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
33{ 33{
34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15;
36 int oparg = (encoded_op << 8) >> 20;
37 int cmparg = (encoded_op << 20) >> 20;
38 int oldval = 0, ret; 34 int oldval = 0, ret;
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg;
41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT;
44 35
45 pagefault_disable(); 36 pagefault_disable();
46 37
@@ -66,30 +57,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
66 57
67 pagefault_enable(); 58 pagefault_enable();
68 59
69 if (!ret) { 60 if (!ret)
70 switch (cmp) { 61 *oval = oldval;
71 case FUTEX_OP_CMP_EQ: 62
72 ret = (oldval == cmparg);
73 break;
74 case FUTEX_OP_CMP_NE:
75 ret = (oldval != cmparg);
76 break;
77 case FUTEX_OP_CMP_LT:
78 ret = (oldval < cmparg);
79 break;
80 case FUTEX_OP_CMP_GE:
81 ret = (oldval >= cmparg);
82 break;
83 case FUTEX_OP_CMP_LE:
84 ret = (oldval <= cmparg);
85 break;
86 case FUTEX_OP_CMP_GT:
87 ret = (oldval > cmparg);
88 break;
89 default:
90 ret = -ENOSYS;
91 }
92 }
93 return ret; 63 return ret;
94} 64}
95 65
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 1de190bdfb9c..a9e61ea54ca9 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -83,18 +83,9 @@
83} 83}
84 84
85static inline int 85static inline int
86futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 86arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
87{ 87{
88 int op = (encoded_op >> 28) & 7;
89 int cmp = (encoded_op >> 24) & 15;
90 int oparg = (encoded_op << 8) >> 20;
91 int cmparg = (encoded_op << 20) >> 20;
92 int oldval = 0, ret; 88 int oldval = 0, ret;
93 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
94 oparg = 1 << oparg;
95
96 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
97 return -EFAULT;
98 89
99 pagefault_disable(); 90 pagefault_disable();
100 91
@@ -125,17 +116,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
125 116
126 pagefault_enable(); 117 pagefault_enable();
127 118
128 if (!ret) { 119 if (!ret)
129 switch (cmp) { 120 *oval = oldval;
130 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 121
131 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
132 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
133 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
134 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
135 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
136 default: ret = -ENOSYS;
137 }
138 }
139 return ret; 122 return ret;
140} 123}
141 124
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 6bace7695788..c7cbddfcdc3b 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one);
648#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 648#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
649 649
650static DEFINE_PER_CPU(atomic_t, tick_broadcast_count); 650static DEFINE_PER_CPU(atomic_t, tick_broadcast_count);
651static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd); 651static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
652 652
653void tick_broadcast(const struct cpumask *mask) 653void tick_broadcast(const struct cpumask *mask)
654{ 654{
655 atomic_t *count; 655 atomic_t *count;
656 struct call_single_data *csd; 656 call_single_data_t *csd;
657 int cpu; 657 int cpu;
658 658
659 for_each_cpu(cpu, mask) { 659 for_each_cpu(cpu, mask) {
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info)
674 674
675static int __init tick_broadcast_init(void) 675static int __init tick_broadcast_init(void)
676{ 676{
677 struct call_single_data *csd; 677 call_single_data_t *csd;
678 int cpu; 678 int cpu;
679 679
680 for (cpu = 0; cpu < NR_CPUS; cpu++) { 680 for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h
index 778087341977..8fed278a24b8 100644
--- a/arch/openrisc/include/asm/futex.h
+++ b/arch/openrisc/include/asm/futex.h
@@ -30,20 +30,10 @@
30}) 30})
31 31
32static inline int 32static inline int
33futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 33arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
34{ 34{
35 int op = (encoded_op >> 28) & 7;
36 int cmp = (encoded_op >> 24) & 15;
37 int oparg = (encoded_op << 8) >> 20;
38 int cmparg = (encoded_op << 20) >> 20;
39 int oldval = 0, ret; 35 int oldval = 0, ret;
40 36
41 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
42 oparg = 1 << oparg;
43
44 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
45 return -EFAULT;
46
47 pagefault_disable(); 37 pagefault_disable();
48 38
49 switch (op) { 39 switch (op) {
@@ -68,30 +58,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
68 58
69 pagefault_enable(); 59 pagefault_enable();
70 60
71 if (!ret) { 61 if (!ret)
72 switch (cmp) { 62 *oval = oldval;
73 case FUTEX_OP_CMP_EQ: 63
74 ret = (oldval == cmparg);
75 break;
76 case FUTEX_OP_CMP_NE:
77 ret = (oldval != cmparg);
78 break;
79 case FUTEX_OP_CMP_LT:
80 ret = (oldval < cmparg);
81 break;
82 case FUTEX_OP_CMP_GE:
83 ret = (oldval >= cmparg);
84 break;
85 case FUTEX_OP_CMP_LE:
86 ret = (oldval <= cmparg);
87 break;
88 case FUTEX_OP_CMP_GT:
89 ret = (oldval > cmparg);
90 break;
91 default:
92 ret = -ENOSYS;
93 }
94 }
95 return ret; 64 return ret;
96} 65}
97 66
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 5394b9c5f914..17b98a87e5e2 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -65,6 +65,8 @@ static __inline__ void atomic_set(atomic_t *v, int i)
65 _atomic_spin_unlock_irqrestore(v, flags); 65 _atomic_spin_unlock_irqrestore(v, flags);
66} 66}
67 67
68#define atomic_set_release(v, i) atomic_set((v), (i))
69
68static __inline__ int atomic_read(const atomic_t *v) 70static __inline__ int atomic_read(const atomic_t *v)
69{ 71{
70 return READ_ONCE((v)->counter); 72 return READ_ONCE((v)->counter);
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0ba14300cd8e..c601aab2fb36 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -32,22 +32,12 @@ _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
32} 32}
33 33
34static inline int 34static inline int
35futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) 35arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
36{ 36{
37 unsigned long int flags; 37 unsigned long int flags;
38 int op = (encoded_op >> 28) & 7;
39 int cmp = (encoded_op >> 24) & 15;
40 int oparg = (encoded_op << 8) >> 20;
41 int cmparg = (encoded_op << 20) >> 20;
42 int oldval, ret; 38 int oldval, ret;
43 u32 tmp; 39 u32 tmp;
44 40
45 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
46 oparg = 1 << oparg;
47
48 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)))
49 return -EFAULT;
50
51 _futex_spin_lock_irqsave(uaddr, &flags); 41 _futex_spin_lock_irqsave(uaddr, &flags);
52 pagefault_disable(); 42 pagefault_disable();
53 43
@@ -85,17 +75,9 @@ out_pagefault_enable:
85 pagefault_enable(); 75 pagefault_enable();
86 _futex_spin_unlock_irqrestore(uaddr, &flags); 76 _futex_spin_unlock_irqrestore(uaddr, &flags);
87 77
88 if (ret == 0) { 78 if (!ret)
89 switch (cmp) { 79 *oval = oldval;
90 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 80
91 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
92 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
93 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
94 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
95 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
96 default: ret = -ENOSYS;
97 }
98 }
99 return ret; 81 return ret;
100} 82}
101 83
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 25d42bd3f114..9c601adfc500 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -74,13 +74,6 @@ do { \
74 ___p1; \ 74 ___p1; \
75}) 75})
76 76
77/*
78 * This must resolve to hwsync on SMP for the context switch path.
79 * See _switch, and core scheduler context switch memory ordering
80 * comments.
81 */
82#define smp_mb__before_spinlock() smp_mb()
83
84#include <asm-generic/barrier.h> 77#include <asm-generic/barrier.h>
85 78
86#endif /* _ASM_POWERPC_BARRIER_H */ 79#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index eaada6c92344..719ed9b61ea7 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -29,18 +29,10 @@
29 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ 29 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
30 : "cr0", "memory") 30 : "cr0", "memory")
31 31
32static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) 32static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
33 u32 __user *uaddr)
33{ 34{
34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15;
36 int oparg = (encoded_op << 8) >> 20;
37 int cmparg = (encoded_op << 20) >> 20;
38 int oldval = 0, ret; 35 int oldval = 0, ret;
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg;
41
42 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT;
44 36
45 pagefault_disable(); 37 pagefault_disable();
46 38
@@ -66,17 +58,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
66 58
67 pagefault_enable(); 59 pagefault_enable();
68 60
69 if (!ret) { 61 if (!ret)
70 switch (cmp) { 62 *oval = oldval;
71 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 63
72 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
73 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
74 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
75 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
76 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
77 default: ret = -ENOSYS;
78 }
79 }
80 return ret; 64 return ret;
81} 65}
82 66
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index d256e448ea49..edbe571bcc54 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -309,5 +309,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
309#define arch_read_relax(lock) __rw_yield(lock) 309#define arch_read_relax(lock) __rw_yield(lock)
310#define arch_write_relax(lock) __rw_yield(lock) 310#define arch_write_relax(lock) __rw_yield(lock)
311 311
312/* See include/linux/spinlock.h */
313#define smp_mb__after_spinlock() smp_mb()
314
312#endif /* __KERNEL__ */ 315#endif /* __KERNEL__ */
313#endif /* __ASM_SPINLOCK_H */ 316#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index a4811aa0304d..8f8eec9e1198 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -21,17 +21,12 @@
21 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ 21 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
22 "m" (*uaddr) : "cc"); 22 "m" (*uaddr) : "cc");
23 23
24static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 24static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
25 u32 __user *uaddr)
25{ 26{
26 int op = (encoded_op >> 28) & 7;
27 int cmp = (encoded_op >> 24) & 15;
28 int oparg = (encoded_op << 8) >> 20;
29 int cmparg = (encoded_op << 20) >> 20;
30 int oldval = 0, newval, ret; 27 int oldval = 0, newval, ret;
31 28
32 load_kernel_asce(); 29 load_kernel_asce();
33 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
34 oparg = 1 << oparg;
35 30
36 pagefault_disable(); 31 pagefault_disable();
37 switch (op) { 32 switch (op) {
@@ -60,17 +55,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
60 } 55 }
61 pagefault_enable(); 56 pagefault_enable();
62 57
63 if (!ret) { 58 if (!ret)
64 switch (cmp) { 59 *oval = oldval;
65 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 60
66 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
67 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
68 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
69 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
70 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
71 default: ret = -ENOSYS;
72 }
73 }
74 return ret; 61 return ret;
75} 62}
76 63
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index d0078747d308..8f8cf941a8cd 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -27,21 +27,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
27 return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval); 27 return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
28} 28}
29 29
30static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 30static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
31 u32 __user *uaddr)
31{ 32{
32 int op = (encoded_op >> 28) & 7;
33 int cmp = (encoded_op >> 24) & 15;
34 u32 oparg = (encoded_op << 8) >> 20;
35 u32 cmparg = (encoded_op << 20) >> 20;
36 u32 oldval, newval, prev; 33 u32 oldval, newval, prev;
37 int ret; 34 int ret;
38 35
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg;
41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT;
44
45 pagefault_disable(); 36 pagefault_disable();
46 37
47 do { 38 do {
@@ -80,17 +71,8 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
80 71
81 pagefault_enable(); 72 pagefault_enable();
82 73
83 if (!ret) { 74 if (!ret)
84 switch (cmp) { 75 *oval = oldval;
85 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
86 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
87 case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break;
88 case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break;
89 case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break;
90 case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break;
91 default: ret = -ENOSYS;
92 }
93 }
94 76
95 return ret; 77 return ret;
96} 78}
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index ee3f11c43cda..7643e979e333 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -29,6 +29,8 @@ int atomic_xchg(atomic_t *, int);
29int __atomic_add_unless(atomic_t *, int, int); 29int __atomic_add_unless(atomic_t *, int, int);
30void atomic_set(atomic_t *, int); 30void atomic_set(atomic_t *, int);
31 31
32#define atomic_set_release(v, i) atomic_set((v), (i))
33
32#define atomic_read(v) ACCESS_ONCE((v)->counter) 34#define atomic_read(v) ACCESS_ONCE((v)->counter)
33 35
34#define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) 36#define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v)))
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 4e899b0dabf7..1cfd89d92208 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -29,22 +29,14 @@
29 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ 29 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
30 : "memory") 30 : "memory")
31 31
32static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 32static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
33 u32 __user *uaddr)
33{ 34{
34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15;
36 int oparg = (encoded_op << 8) >> 20;
37 int cmparg = (encoded_op << 20) >> 20;
38 int oldval = 0, ret, tem; 35 int oldval = 0, ret, tem;
39 36
40 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
41 return -EFAULT;
42 if (unlikely((((unsigned long) uaddr) & 0x3UL))) 37 if (unlikely((((unsigned long) uaddr) & 0x3UL)))
43 return -EINVAL; 38 return -EINVAL;
44 39
45 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
46 oparg = 1 << oparg;
47
48 pagefault_disable(); 40 pagefault_disable();
49 41
50 switch (op) { 42 switch (op) {
@@ -69,17 +61,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
69 61
70 pagefault_enable(); 62 pagefault_enable();
71 63
72 if (!ret) { 64 if (!ret)
73 switch (cmp) { 65 *oval = oldval;
74 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 66
75 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
76 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
77 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
78 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
79 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
80 default: ret = -ENOSYS;
81 }
82 }
83 return ret; 67 return ret;
84} 68}
85 69
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index a93774255136..53a423e7cb92 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -101,6 +101,8 @@ static inline void atomic_set(atomic_t *v, int n)
101 _atomic_xchg(&v->counter, n); 101 _atomic_xchg(&v->counter, n);
102} 102}
103 103
104#define atomic_set_release(v, i) atomic_set((v), (i))
105
104/* A 64bit atomic type */ 106/* A 64bit atomic type */
105 107
106typedef struct { 108typedef struct {
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index e64a1b75fc38..83c1e639b411 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -106,12 +106,9 @@
106 lock = __atomic_hashed_lock((int __force *)uaddr) 106 lock = __atomic_hashed_lock((int __force *)uaddr)
107#endif 107#endif
108 108
109static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 109static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
110 u32 __user *uaddr)
110{ 111{
111 int op = (encoded_op >> 28) & 7;
112 int cmp = (encoded_op >> 24) & 15;
113 int oparg = (encoded_op << 8) >> 20;
114 int cmparg = (encoded_op << 20) >> 20;
115 int uninitialized_var(val), ret; 112 int uninitialized_var(val), ret;
116 113
117 __futex_prolog(); 114 __futex_prolog();
@@ -119,12 +116,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
119 /* The 32-bit futex code makes this assumption, so validate it here. */ 116 /* The 32-bit futex code makes this assumption, so validate it here. */
120 BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); 117 BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
121 118
122 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
123 oparg = 1 << oparg;
124
125 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
126 return -EFAULT;
127
128 pagefault_disable(); 119 pagefault_disable();
129 switch (op) { 120 switch (op) {
130 case FUTEX_OP_SET: 121 case FUTEX_OP_SET:
@@ -148,30 +139,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
148 } 139 }
149 pagefault_enable(); 140 pagefault_enable();
150 141
151 if (!ret) { 142 if (!ret)
152 switch (cmp) { 143 *oval = val;
153 case FUTEX_OP_CMP_EQ: 144
154 ret = (val == cmparg);
155 break;
156 case FUTEX_OP_CMP_NE:
157 ret = (val != cmparg);
158 break;
159 case FUTEX_OP_CMP_LT:
160 ret = (val < cmparg);
161 break;
162 case FUTEX_OP_CMP_GE:
163 ret = (val >= cmparg);
164 break;
165 case FUTEX_OP_CMP_LE:
166 ret = (val <= cmparg);
167 break;
168 case FUTEX_OP_CMP_GT:
169 ret = (val > cmparg);
170 break;
171 default:
172 ret = -ENOSYS;
173 }
174 }
175 return ret; 145 return ret;
176} 146}
177 147
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9c95aa417e9b..cce15191e9e9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,8 @@ config X86
55 select ARCH_HAS_KCOV if X86_64 55 select ARCH_HAS_KCOV if X86_64
56 select ARCH_HAS_MMIO_FLUSH 56 select ARCH_HAS_MMIO_FLUSH
57 select ARCH_HAS_PMEM_API if X86_64 57 select ARCH_HAS_PMEM_API if X86_64
58 # Causing hangs/crashes, see the commit that added this change for details.
59 select ARCH_HAS_REFCOUNT if BROKEN
58 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 60 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
59 select ARCH_HAS_SET_MEMORY 61 select ARCH_HAS_SET_MEMORY
60 select ARCH_HAS_SG_CHAIN 62 select ARCH_HAS_SG_CHAIN
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7a9df3beb89b..676ee5807d86 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -74,6 +74,9 @@
74# define _ASM_EXTABLE_EX(from, to) \ 74# define _ASM_EXTABLE_EX(from, to) \
75 _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) 75 _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
76 76
77# define _ASM_EXTABLE_REFCOUNT(from, to) \
78 _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
79
77# define _ASM_NOKPROBE(entry) \ 80# define _ASM_NOKPROBE(entry) \
78 .pushsection "_kprobe_blacklist","aw" ; \ 81 .pushsection "_kprobe_blacklist","aw" ; \
79 _ASM_ALIGN ; \ 82 _ASM_ALIGN ; \
@@ -123,6 +126,9 @@
123# define _ASM_EXTABLE_EX(from, to) \ 126# define _ASM_EXTABLE_EX(from, to) \
124 _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) 127 _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
125 128
129# define _ASM_EXTABLE_REFCOUNT(from, to) \
130 _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
131
126/* For C file, we already have NOKPROBE_SYMBOL macro */ 132/* For C file, we already have NOKPROBE_SYMBOL macro */
127#endif 133#endif
128 134
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 33380b871463..0874ebda3069 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new)
197 return xchg(&v->counter, new); 197 return xchg(&v->counter, new);
198} 198}
199 199
200#define ATOMIC_OP(op) \ 200static inline void atomic_and(int i, atomic_t *v)
201static inline void atomic_##op(int i, atomic_t *v) \ 201{
202{ \ 202 asm volatile(LOCK_PREFIX "andl %1,%0"
203 asm volatile(LOCK_PREFIX #op"l %1,%0" \ 203 : "+m" (v->counter)
204 : "+m" (v->counter) \ 204 : "ir" (i)
205 : "ir" (i) \ 205 : "memory");
206 : "memory"); \ 206}
207
208static inline int atomic_fetch_and(int i, atomic_t *v)
209{
210 int val = atomic_read(v);
211
212 do { } while (!atomic_try_cmpxchg(v, &val, val & i));
213
214 return val;
207} 215}
208 216
209#define ATOMIC_FETCH_OP(op, c_op) \ 217static inline void atomic_or(int i, atomic_t *v)
210static inline int atomic_fetch_##op(int i, atomic_t *v) \ 218{
211{ \ 219 asm volatile(LOCK_PREFIX "orl %1,%0"
212 int val = atomic_read(v); \ 220 : "+m" (v->counter)
213 do { \ 221 : "ir" (i)
214 } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \ 222 : "memory");
215 return val; \
216} 223}
217 224
218#define ATOMIC_OPS(op, c_op) \ 225static inline int atomic_fetch_or(int i, atomic_t *v)
219 ATOMIC_OP(op) \ 226{
220 ATOMIC_FETCH_OP(op, c_op) 227 int val = atomic_read(v);
221 228
222ATOMIC_OPS(and, &) 229 do { } while (!atomic_try_cmpxchg(v, &val, val | i));
223ATOMIC_OPS(or , |)
224ATOMIC_OPS(xor, ^)
225 230
226#undef ATOMIC_OPS 231 return val;
227#undef ATOMIC_FETCH_OP 232}
228#undef ATOMIC_OP 233
234static inline void atomic_xor(int i, atomic_t *v)
235{
236 asm volatile(LOCK_PREFIX "xorl %1,%0"
237 : "+m" (v->counter)
238 : "ir" (i)
239 : "memory");
240}
241
242static inline int atomic_fetch_xor(int i, atomic_t *v)
243{
244 int val = atomic_read(v);
245
246 do { } while (!atomic_try_cmpxchg(v, &val, val ^ i));
247
248 return val;
249}
229 250
230/** 251/**
231 * __atomic_add_unless - add unless the number is already a given value 252 * __atomic_add_unless - add unless the number is already a given value
@@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^)
239static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) 260static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
240{ 261{
241 int c = atomic_read(v); 262 int c = atomic_read(v);
263
242 do { 264 do {
243 if (unlikely(c == u)) 265 if (unlikely(c == u))
244 break; 266 break;
245 } while (!atomic_try_cmpxchg(v, &c, c + a)); 267 } while (!atomic_try_cmpxchg(v, &c, c + a));
268
246 return c; 269 return c;
247} 270}
248 271
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 71d7705fb303..9e206f31ce2a 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
312#undef alternative_atomic64 312#undef alternative_atomic64
313#undef __alternative_atomic64 313#undef __alternative_atomic64
314 314
315#define ATOMIC64_OP(op, c_op) \ 315static inline void atomic64_and(long long i, atomic64_t *v)
316static inline void atomic64_##op(long long i, atomic64_t *v) \ 316{
317{ \ 317 long long old, c = 0;
318 long long old, c = 0; \ 318
319 while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ 319 while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
320 c = old; \ 320 c = old;
321} 321}
322 322
323#define ATOMIC64_FETCH_OP(op, c_op) \ 323static inline long long atomic64_fetch_and(long long i, atomic64_t *v)
324static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ 324{
325{ \ 325 long long old, c = 0;
326 long long old, c = 0; \ 326
327 while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ 327 while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
328 c = old; \ 328 c = old;
329 return old; \ 329
330 return old;
330} 331}
331 332
332ATOMIC64_FETCH_OP(add, +) 333static inline void atomic64_or(long long i, atomic64_t *v)
334{
335 long long old, c = 0;
333 336
334#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) 337 while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
338 c = old;
339}
340
341static inline long long atomic64_fetch_or(long long i, atomic64_t *v)
342{
343 long long old, c = 0;
344
345 while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
346 c = old;
347
348 return old;
349}
335 350
336#define ATOMIC64_OPS(op, c_op) \ 351static inline void atomic64_xor(long long i, atomic64_t *v)
337 ATOMIC64_OP(op, c_op) \ 352{
338 ATOMIC64_FETCH_OP(op, c_op) 353 long long old, c = 0;
354
355 while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
356 c = old;
357}
339 358
340ATOMIC64_OPS(and, &) 359static inline long long atomic64_fetch_xor(long long i, atomic64_t *v)
341ATOMIC64_OPS(or, |) 360{
342ATOMIC64_OPS(xor, ^) 361 long long old, c = 0;
362
363 while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
364 c = old;
365
366 return old;
367}
343 368
344#undef ATOMIC64_OPS 369static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
345#undef ATOMIC64_FETCH_OP 370{
346#undef ATOMIC64_OP 371 long long old, c = 0;
372
373 while ((old = atomic64_cmpxchg(v, c, c + i)) != c)
374 c = old;
375
376 return old;
377}
378
379#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
347 380
348#endif /* _ASM_X86_ATOMIC64_32_H */ 381#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6189a433c9a9..5d9de36a2f04 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
177} 177}
178 178
179#define atomic64_try_cmpxchg atomic64_try_cmpxchg 179#define atomic64_try_cmpxchg atomic64_try_cmpxchg
180static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new) 180static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
181{ 181{
182 return try_cmpxchg(&v->counter, old, new); 182 return try_cmpxchg(&v->counter, old, new);
183} 183}
@@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
198 */ 198 */
199static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) 199static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
200{ 200{
201 long c = atomic64_read(v); 201 s64 c = atomic64_read(v);
202 do { 202 do {
203 if (unlikely(c == u)) 203 if (unlikely(c == u))
204 return false; 204 return false;
@@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
217 */ 217 */
218static inline long atomic64_dec_if_positive(atomic64_t *v) 218static inline long atomic64_dec_if_positive(atomic64_t *v)
219{ 219{
220 long dec, c = atomic64_read(v); 220 s64 dec, c = atomic64_read(v);
221 do { 221 do {
222 dec = c - 1; 222 dec = c - 1;
223 if (unlikely(dec < 0)) 223 if (unlikely(dec < 0))
@@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
226 return dec; 226 return dec;
227} 227}
228 228
229#define ATOMIC64_OP(op) \ 229static inline void atomic64_and(long i, atomic64_t *v)
230static inline void atomic64_##op(long i, atomic64_t *v) \ 230{
231{ \ 231 asm volatile(LOCK_PREFIX "andq %1,%0"
232 asm volatile(LOCK_PREFIX #op"q %1,%0" \ 232 : "+m" (v->counter)
233 : "+m" (v->counter) \ 233 : "er" (i)
234 : "er" (i) \ 234 : "memory");
235 : "memory"); \
236} 235}
237 236
238#define ATOMIC64_FETCH_OP(op, c_op) \ 237static inline long atomic64_fetch_and(long i, atomic64_t *v)
239static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ 238{
240{ \ 239 s64 val = atomic64_read(v);
241 long val = atomic64_read(v); \ 240
242 do { \ 241 do {
243 } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \ 242 } while (!atomic64_try_cmpxchg(v, &val, val & i));
244 return val; \ 243 return val;
245} 244}
246 245
247#define ATOMIC64_OPS(op, c_op) \ 246static inline void atomic64_or(long i, atomic64_t *v)
248 ATOMIC64_OP(op) \ 247{
249 ATOMIC64_FETCH_OP(op, c_op) 248 asm volatile(LOCK_PREFIX "orq %1,%0"
249 : "+m" (v->counter)
250 : "er" (i)
251 : "memory");
252}
250 253
251ATOMIC64_OPS(and, &) 254static inline long atomic64_fetch_or(long i, atomic64_t *v)
252ATOMIC64_OPS(or, |) 255{
253ATOMIC64_OPS(xor, ^) 256 s64 val = atomic64_read(v);
254 257
255#undef ATOMIC64_OPS 258 do {
256#undef ATOMIC64_FETCH_OP 259 } while (!atomic64_try_cmpxchg(v, &val, val | i));
257#undef ATOMIC64_OP 260 return val;
261}
262
263static inline void atomic64_xor(long i, atomic64_t *v)
264{
265 asm volatile(LOCK_PREFIX "xorq %1,%0"
266 : "+m" (v->counter)
267 : "er" (i)
268 : "memory");
269}
270
271static inline long atomic64_fetch_xor(long i, atomic64_t *v)
272{
273 s64 val = atomic64_read(v);
274
275 do {
276 } while (!atomic64_try_cmpxchg(v, &val, val ^ i));
277 return val;
278}
258 279
259#endif /* _ASM_X86_ATOMIC64_64_H */ 280#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d90296d061e8..b5069e802d5c 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -157,7 +157,7 @@ extern void __add_wrong_size(void)
157#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ 157#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
158({ \ 158({ \
159 bool success; \ 159 bool success; \
160 __typeof__(_ptr) _old = (_pold); \ 160 __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
161 __typeof__(*(_ptr)) __old = *_old; \ 161 __typeof__(*(_ptr)) __old = *_old; \
162 __typeof__(*(_ptr)) __new = (_new); \ 162 __typeof__(*(_ptr)) __new = (_new); \
163 switch (size) { \ 163 switch (size) { \
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index b4c1f5453436..f4dc9b63bdda 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -41,20 +41,11 @@
41 "+m" (*uaddr), "=&r" (tem) \ 41 "+m" (*uaddr), "=&r" (tem) \
42 : "r" (oparg), "i" (-EFAULT), "1" (0)) 42 : "r" (oparg), "i" (-EFAULT), "1" (0))
43 43
44static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 44static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
45 u32 __user *uaddr)
45{ 46{
46 int op = (encoded_op >> 28) & 7;
47 int cmp = (encoded_op >> 24) & 15;
48 int oparg = (encoded_op << 8) >> 20;
49 int cmparg = (encoded_op << 20) >> 20;
50 int oldval = 0, ret, tem; 47 int oldval = 0, ret, tem;
51 48
52 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
53 oparg = 1 << oparg;
54
55 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
56 return -EFAULT;
57
58 pagefault_disable(); 49 pagefault_disable();
59 50
60 switch (op) { 51 switch (op) {
@@ -80,30 +71,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
80 71
81 pagefault_enable(); 72 pagefault_enable();
82 73
83 if (!ret) { 74 if (!ret)
84 switch (cmp) { 75 *oval = oldval;
85 case FUTEX_OP_CMP_EQ: 76
86 ret = (oldval == cmparg);
87 break;
88 case FUTEX_OP_CMP_NE:
89 ret = (oldval != cmparg);
90 break;
91 case FUTEX_OP_CMP_LT:
92 ret = (oldval < cmparg);
93 break;
94 case FUTEX_OP_CMP_GE:
95 ret = (oldval >= cmparg);
96 break;
97 case FUTEX_OP_CMP_LE:
98 ret = (oldval <= cmparg);
99 break;
100 case FUTEX_OP_CMP_GT:
101 ret = (oldval > cmparg);
102 break;
103 default:
104 ret = -ENOSYS;
105 }
106 }
107 return ret; 77 return ret;
108} 78}
109 79
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
new file mode 100644
index 000000000000..ff871210b9f2
--- /dev/null
+++ b/arch/x86/include/asm/refcount.h
@@ -0,0 +1,109 @@
1#ifndef __ASM_X86_REFCOUNT_H
2#define __ASM_X86_REFCOUNT_H
3/*
4 * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
5 * PaX/grsecurity.
6 */
7#include <linux/refcount.h>
8
9/*
10 * This is the first portion of the refcount error handling, which lives in
11 * .text.unlikely, and is jumped to from the CPU flag check (in the
12 * following macros). This saves the refcount value location into CX for
13 * the exception handler to use (in mm/extable.c), and then triggers the
14 * central refcount exception. The fixup address for the exception points
15 * back to the regular execution flow in .text.
16 */
17#define _REFCOUNT_EXCEPTION \
18 ".pushsection .text.unlikely\n" \
19 "111:\tlea %[counter], %%" _ASM_CX "\n" \
20 "112:\t" ASM_UD0 "\n" \
21 ASM_UNREACHABLE \
22 ".popsection\n" \
23 "113:\n" \
24 _ASM_EXTABLE_REFCOUNT(112b, 113b)
25
26/* Trigger refcount exception if refcount result is negative. */
27#define REFCOUNT_CHECK_LT_ZERO \
28 "js 111f\n\t" \
29 _REFCOUNT_EXCEPTION
30
31/* Trigger refcount exception if refcount result is zero or negative. */
32#define REFCOUNT_CHECK_LE_ZERO \
33 "jz 111f\n\t" \
34 REFCOUNT_CHECK_LT_ZERO
35
36/* Trigger refcount exception unconditionally. */
37#define REFCOUNT_ERROR \
38 "jmp 111f\n\t" \
39 _REFCOUNT_EXCEPTION
40
41static __always_inline void refcount_add(unsigned int i, refcount_t *r)
42{
43 asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
44 REFCOUNT_CHECK_LT_ZERO
45 : [counter] "+m" (r->refs.counter)
46 : "ir" (i)
47 : "cc", "cx");
48}
49
50static __always_inline void refcount_inc(refcount_t *r)
51{
52 asm volatile(LOCK_PREFIX "incl %0\n\t"
53 REFCOUNT_CHECK_LT_ZERO
54 : [counter] "+m" (r->refs.counter)
55 : : "cc", "cx");
56}
57
58static __always_inline void refcount_dec(refcount_t *r)
59{
60 asm volatile(LOCK_PREFIX "decl %0\n\t"
61 REFCOUNT_CHECK_LE_ZERO
62 : [counter] "+m" (r->refs.counter)
63 : : "cc", "cx");
64}
65
66static __always_inline __must_check
67bool refcount_sub_and_test(unsigned int i, refcount_t *r)
68{
69 GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
70 r->refs.counter, "er", i, "%0", e);
71}
72
73static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
74{
75 GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
76 r->refs.counter, "%0", e);
77}
78
79static __always_inline __must_check
80bool refcount_add_not_zero(unsigned int i, refcount_t *r)
81{
82 int c, result;
83
84 c = atomic_read(&(r->refs));
85 do {
86 if (unlikely(c == 0))
87 return false;
88
89 result = c + i;
90
91 /* Did we try to increment from/to an undesirable state? */
92 if (unlikely(c < 0 || c == INT_MAX || result < c)) {
93 asm volatile(REFCOUNT_ERROR
94 : : [counter] "m" (r->refs.counter)
95 : "cc", "cx");
96 break;
97 }
98
99 } while (!atomic_try_cmpxchg(&(r->refs), &c, result));
100
101 return c != 0;
102}
103
104static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
105{
106 return refcount_add_not_zero(1, r);
107}
108
109#endif
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index fb2ddcdf7c73..c076f710de4c 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -36,6 +36,48 @@ bool ex_handler_fault(const struct exception_table_entry *fixup,
36} 36}
37EXPORT_SYMBOL_GPL(ex_handler_fault); 37EXPORT_SYMBOL_GPL(ex_handler_fault);
38 38
39/*
40 * Handler for UD0 exception following a failed test against the
41 * result of a refcount inc/dec/add/sub.
42 */
43bool ex_handler_refcount(const struct exception_table_entry *fixup,
44 struct pt_regs *regs, int trapnr)
45{
46 /* First unconditionally saturate the refcount. */
47 *(int *)regs->cx = INT_MIN / 2;
48
49 /*
50 * Strictly speaking, this reports the fixup destination, not
51 * the fault location, and not the actually overflowing
52 * instruction, which is the instruction before the "js", but
53 * since that instruction could be a variety of lengths, just
54 * report the location after the overflow, which should be close
55 * enough for finding the overflow, as it's at least back in
56 * the function, having returned from .text.unlikely.
57 */
58 regs->ip = ex_fixup_addr(fixup);
59
60 /*
61 * This function has been called because either a negative refcount
62 * value was seen by any of the refcount functions, or a zero
63 * refcount value was seen by refcount_dec().
64 *
65 * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
66 * wrapped around) will be set. Additionally, seeing the refcount
67 * reach 0 will set ZF (Zero Flag: result was zero). In each of
68 * these cases we want a report, since it's a boundary condition.
69 *
70 */
71 if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
72 bool zero = regs->flags & X86_EFLAGS_ZF;
73
74 refcount_error_report(regs, zero ? "hit zero" : "overflow");
75 }
76
77 return true;
78}
79EXPORT_SYMBOL_GPL(ex_handler_refcount);
80
39bool ex_handler_ext(const struct exception_table_entry *fixup, 81bool ex_handler_ext(const struct exception_table_entry *fixup,
40 struct pt_regs *regs, int trapnr) 82 struct pt_regs *regs, int trapnr)
41{ 83{
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h
index b39531babec0..eaaf1ebcc7a4 100644
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -44,18 +44,10 @@
44 : "r" (uaddr), "I" (-EFAULT), "r" (oparg) \ 44 : "r" (uaddr), "I" (-EFAULT), "r" (oparg) \
45 : "memory") 45 : "memory")
46 46
47static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 47static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
48 u32 __user *uaddr)
48{ 49{
49 int op = (encoded_op >> 28) & 7;
50 int cmp = (encoded_op >> 24) & 15;
51 int oparg = (encoded_op << 8) >> 20;
52 int cmparg = (encoded_op << 20) >> 20;
53 int oldval = 0, ret; 50 int oldval = 0, ret;
54 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
55 oparg = 1 << oparg;
56
57 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
58 return -EFAULT;
59 51
60#if !XCHAL_HAVE_S32C1I 52#if !XCHAL_HAVE_S32C1I
61 return -ENOSYS; 53 return -ENOSYS;
@@ -89,19 +81,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
89 81
90 pagefault_enable(); 82 pagefault_enable();
91 83
92 if (ret) 84 if (!ret)
93 return ret; 85 *oval = oldval;
94 86
95 switch (cmp) { 87 return ret;
96 case FUTEX_OP_CMP_EQ: return (oldval == cmparg);
97 case FUTEX_OP_CMP_NE: return (oldval != cmparg);
98 case FUTEX_OP_CMP_LT: return (oldval < cmparg);
99 case FUTEX_OP_CMP_GE: return (oldval >= cmparg);
100 case FUTEX_OP_CMP_LE: return (oldval <= cmparg);
101 case FUTEX_OP_CMP_GT: return (oldval > cmparg);
102 }
103
104 return -ENOSYS;
105} 88}
106 89
107static inline int 90static inline int
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 87b7df4851bf..07125e7941f4 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -60,7 +60,7 @@ static void trigger_softirq(void *data)
60static int raise_blk_irq(int cpu, struct request *rq) 60static int raise_blk_irq(int cpu, struct request *rq)
61{ 61{
62 if (cpu_online(cpu)) { 62 if (cpu_online(cpu)) {
63 struct call_single_data *data = &rq->csd; 63 call_single_data_t *data = &rq->csd;
64 64
65 data->func = trigger_softirq; 65 data->func = trigger_softirq;
66 data->info = rq; 66 data->info = rq;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 19182d091587..1893e416e7c0 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2884,7 +2884,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
2884 * need to be interruptible while waiting. 2884 * need to be interruptible while waiting.
2885 */ 2885 */
2886 INIT_WORK_ONSTACK(&flush.work, flush_probe); 2886 INIT_WORK_ONSTACK(&flush.work, flush_probe);
2887 COMPLETION_INITIALIZER_ONSTACK(flush.cmp); 2887 init_completion(&flush.cmp);
2888 queue_work(nfit_wq, &flush.work); 2888 queue_work(nfit_wq, &flush.work);
2889 mutex_unlock(&acpi_desc->init_mutex); 2889 mutex_unlock(&acpi_desc->init_mutex);
2890 2890
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 85c24cace973..81142ce781da 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -13,7 +13,7 @@
13struct nullb_cmd { 13struct nullb_cmd {
14 struct list_head list; 14 struct list_head list;
15 struct llist_node ll_list; 15 struct llist_node ll_list;
16 struct call_single_data csd; 16 call_single_data_t csd;
17 struct request *rq; 17 struct request *rq;
18 struct bio *bio; 18 struct bio *bio;
19 unsigned int tag; 19 unsigned int tag;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 72bbfccef113..fd4b7f684bd0 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -455,7 +455,11 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
455 per_cpu(timer_unstable_counter_workaround, i) = wa; 455 per_cpu(timer_unstable_counter_workaround, i) = wa;
456 } 456 }
457 457
458 static_branch_enable(&arch_timer_read_ool_enabled); 458 /*
459 * Use the locked version, as we're called from the CPU
460 * hotplug framework. Otherwise, we end-up in deadlock-land.
461 */
462 static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled);
459 463
460 /* 464 /*
461 * Don't use the vdso fastpath if errata require using the 465 * Don't use the vdso fastpath if errata require using the
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 71e586d7df71..147f38ea0fcd 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -119,13 +119,13 @@ struct cpuidle_coupled {
119 119
120#define CPUIDLE_COUPLED_NOT_IDLE (-1) 120#define CPUIDLE_COUPLED_NOT_IDLE (-1)
121 121
122static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); 122static DEFINE_PER_CPU(call_single_data_t, cpuidle_coupled_poke_cb);
123 123
124/* 124/*
125 * The cpuidle_coupled_poke_pending mask is used to avoid calling 125 * The cpuidle_coupled_poke_pending mask is used to avoid calling
126 * __smp_call_function_single with the per cpu call_single_data struct already 126 * __smp_call_function_single with the per cpu call_single_data_t struct already
127 * in use. This prevents a deadlock where two cpus are waiting for each others 127 * in use. This prevents a deadlock where two cpus are waiting for each others
128 * call_single_data struct to be available 128 * call_single_data_t struct to be available
129 */ 129 */
130static cpumask_t cpuidle_coupled_poke_pending; 130static cpumask_t cpuidle_coupled_poke_pending;
131 131
@@ -339,7 +339,7 @@ static void cpuidle_coupled_handle_poke(void *info)
339 */ 339 */
340static void cpuidle_coupled_poke(int cpu) 340static void cpuidle_coupled_poke(int cpu)
341{ 341{
342 struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); 342 call_single_data_t *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
343 343
344 if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) 344 if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
345 smp_call_function_single_async(cpu, csd); 345 smp_call_function_single_async(cpu, csd);
@@ -651,7 +651,7 @@ int cpuidle_coupled_register_device(struct cpuidle_device *dev)
651{ 651{
652 int cpu; 652 int cpu;
653 struct cpuidle_device *other_dev; 653 struct cpuidle_device *other_dev;
654 struct call_single_data *csd; 654 call_single_data_t *csd;
655 struct cpuidle_coupled *coupled; 655 struct cpuidle_coupled *coupled;
656 656
657 if (cpumask_empty(&dev->coupled_cpus)) 657 if (cpumask_empty(&dev->coupled_cpus))
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..a36216bd2a84 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -28,6 +28,7 @@
28 28
29#include <linux/debugfs.h> 29#include <linux/debugfs.h>
30#include <linux/sort.h> 30#include <linux/sort.h>
31#include <linux/sched/mm.h>
31#include "intel_drv.h" 32#include "intel_drv.h"
32 33
33static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) 34static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
@@ -4305,7 +4306,7 @@ i915_drop_caches_set(void *data, u64 val)
4305 mutex_unlock(&dev->struct_mutex); 4306 mutex_unlock(&dev->struct_mutex);
4306 } 4307 }
4307 4308
4308 lockdep_set_current_reclaim_state(GFP_KERNEL); 4309 fs_reclaim_acquire(GFP_KERNEL);
4309 if (val & DROP_BOUND) 4310 if (val & DROP_BOUND)
4310 i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); 4311 i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND);
4311 4312
@@ -4314,7 +4315,7 @@ i915_drop_caches_set(void *data, u64 val)
4314 4315
4315 if (val & DROP_SHRINK_ALL) 4316 if (val & DROP_SHRINK_ALL)
4316 i915_gem_shrink_all(dev_priv); 4317 i915_gem_shrink_all(dev_priv);
4317 lockdep_clear_current_reclaim_state(); 4318 fs_reclaim_release(GFP_KERNEL);
4318 4319
4319 if (val & DROP_FREED) { 4320 if (val & DROP_FREED) {
4320 synchronize_rcu(); 4321 synchronize_rcu();
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 51583ae4b1eb..120b6e537b28 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2468,7 +2468,7 @@ static void liquidio_napi_drv_callback(void *arg)
2468 if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) { 2468 if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) {
2469 napi_schedule_irqoff(&droq->napi); 2469 napi_schedule_irqoff(&droq->napi);
2470 } else { 2470 } else {
2471 struct call_single_data *csd = &droq->csd; 2471 call_single_data_t *csd = &droq->csd;
2472 2472
2473 csd->func = napi_schedule_wrapper; 2473 csd->func = napi_schedule_wrapper;
2474 csd->info = &droq->napi; 2474 csd->info = &droq->napi;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 6efd139b894d..f91bc84d1719 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -328,7 +328,7 @@ struct octeon_droq {
328 328
329 u32 cpu_id; 329 u32 cpu_id;
330 330
331 struct call_single_data csd; 331 call_single_data_t csd;
332}; 332};
333 333
334#define OCT_DROQ_SIZE (sizeof(struct octeon_droq)) 334#define OCT_DROQ_SIZE (sizeof(struct octeon_droq))
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 3d424a51cabb..f0fd3adb1693 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -446,14 +446,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
446 446
447 ovl_path_upper(dentry, &upperpath); 447 ovl_path_upper(dentry, &upperpath);
448 realfile = ovl_path_open(&upperpath, O_RDONLY); 448 realfile = ovl_path_open(&upperpath, O_RDONLY);
449 smp_mb__before_spinlock(); 449
450 inode_lock(inode); 450 inode_lock(inode);
451 if (!od->upperfile) { 451 if (!od->upperfile) {
452 if (IS_ERR(realfile)) { 452 if (IS_ERR(realfile)) {
453 inode_unlock(inode); 453 inode_unlock(inode);
454 return PTR_ERR(realfile); 454 return PTR_ERR(realfile);
455 } 455 }
456 od->upperfile = realfile; 456 smp_store_release(&od->upperfile, realfile);
457 } else { 457 } else {
458 /* somebody has beaten us to it */ 458 /* somebody has beaten us to it */
459 if (!IS_ERR(realfile)) 459 if (!IS_ERR(realfile))
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index b0d5897bc4e6..886085b47c75 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -109,27 +109,24 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
109 goto out; 109 goto out;
110 WRITE_ONCE(uwq->waken, true); 110 WRITE_ONCE(uwq->waken, true);
111 /* 111 /*
112 * The implicit smp_mb__before_spinlock in try_to_wake_up() 112 * The Program-Order guarantees provided by the scheduler
113 * renders uwq->waken visible to other CPUs before the task is 113 * ensure uwq->waken is visible before the task is woken.
114 * waken.
115 */ 114 */
116 ret = wake_up_state(wq->private, mode); 115 ret = wake_up_state(wq->private, mode);
117 if (ret) 116 if (ret) {
118 /* 117 /*
119 * Wake only once, autoremove behavior. 118 * Wake only once, autoremove behavior.
120 * 119 *
121 * After the effect of list_del_init is visible to the 120 * After the effect of list_del_init is visible to the other
122 * other CPUs, the waitqueue may disappear from under 121 * CPUs, the waitqueue may disappear from under us, see the
123 * us, see the !list_empty_careful() in 122 * !list_empty_careful() in handle_userfault().
124 * handle_userfault(). try_to_wake_up() has an 123 *
125 * implicit smp_mb__before_spinlock, and the 124 * try_to_wake_up() has an implicit smp_mb(), and the
126 * wq->private is read before calling the extern 125 * wq->private is read before calling the extern function
127 * function "wake_up_state" (which in turns calls 126 * "wake_up_state" (which in turns calls try_to_wake_up).
128 * try_to_wake_up). While the spin_lock;spin_unlock;
129 * wouldn't be enough, the smp_mb__before_spinlock is
130 * enough to avoid an explicit smp_mb() here.
131 */ 127 */
132 list_del_init(&wq->entry); 128 list_del_init(&wq->entry);
129 }
133out: 130out:
134 return ret; 131 return ret;
135} 132}
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index dad68bf46c77..8d28eb010d0d 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -21,6 +21,8 @@ typedef struct {
21extern long long atomic64_read(const atomic64_t *v); 21extern long long atomic64_read(const atomic64_t *v);
22extern void atomic64_set(atomic64_t *v, long long i); 22extern void atomic64_set(atomic64_t *v, long long i);
23 23
24#define atomic64_set_release(v, i) atomic64_set((v), (i))
25
24#define ATOMIC64_OP(op) \ 26#define ATOMIC64_OP(op) \
25extern void atomic64_##op(long long a, atomic64_t *v); 27extern void atomic64_##op(long long a, atomic64_t *v);
26 28
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index bf2d34c9d804..f0d8b1c51343 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -13,7 +13,7 @@
13 */ 13 */
14 14
15/** 15/**
16 * futex_atomic_op_inuser() - Atomic arithmetic operation with constant 16 * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
17 * argument and comparison of the previous 17 * argument and comparison of the previous
18 * futex value with another constant. 18 * futex value with another constant.
19 * 19 *
@@ -25,18 +25,11 @@
25 * <0 - On error 25 * <0 - On error
26 */ 26 */
27static inline int 27static inline int
28futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) 28arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
29{ 29{
30 int op = (encoded_op >> 28) & 7;
31 int cmp = (encoded_op >> 24) & 15;
32 int oparg = (encoded_op << 8) >> 20;
33 int cmparg = (encoded_op << 20) >> 20;
34 int oldval, ret; 30 int oldval, ret;
35 u32 tmp; 31 u32 tmp;
36 32
37 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
38 oparg = 1 << oparg;
39
40 preempt_disable(); 33 preempt_disable();
41 pagefault_disable(); 34 pagefault_disable();
42 35
@@ -74,17 +67,9 @@ out_pagefault_enable:
74 pagefault_enable(); 67 pagefault_enable();
75 preempt_enable(); 68 preempt_enable();
76 69
77 if (ret == 0) { 70 if (ret == 0)
78 switch (cmp) { 71 *oval = oldval;
79 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 72
80 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
81 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
82 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
83 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
84 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
85 default: ret = -ENOSYS;
86 }
87 }
88 return ret; 73 return ret;
89} 74}
90 75
@@ -126,18 +111,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
126 111
127#else 112#else
128static inline int 113static inline int
129futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) 114arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
130{ 115{
131 int op = (encoded_op >> 28) & 7;
132 int cmp = (encoded_op >> 24) & 15;
133 int oparg = (encoded_op << 8) >> 20;
134 int cmparg = (encoded_op << 20) >> 20;
135 int oldval = 0, ret; 116 int oldval = 0, ret;
136 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
137 oparg = 1 << oparg;
138
139 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
140 return -EFAULT;
141 117
142 pagefault_disable(); 118 pagefault_disable();
143 119
@@ -153,17 +129,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
153 129
154 pagefault_enable(); 130 pagefault_enable();
155 131
156 if (!ret) { 132 if (!ret)
157 switch (cmp) { 133 *oval = oldval;
158 case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; 134
159 case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
160 case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
161 case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
162 case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
163 case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
164 default: ret = -ENOSYS;
165 }
166 }
167 return ret; 135 return ret;
168} 136}
169 137
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index c56be7410130..40d6bfec0e0d 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -38,6 +38,9 @@
38 * Besides, if an arch has a special barrier for acquire/release, it could 38 * Besides, if an arch has a special barrier for acquire/release, it could
39 * implement its own __atomic_op_* and use the same framework for building 39 * implement its own __atomic_op_* and use the same framework for building
40 * variants 40 * variants
41 *
42 * If an architecture overrides __atomic_op_acquire() it will probably want
43 * to define smp_mb__after_spinlock().
41 */ 44 */
42#ifndef __atomic_op_acquire 45#ifndef __atomic_op_acquire
43#define __atomic_op_acquire(op, args...) \ 46#define __atomic_op_acquire(op, args...) \
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2a5d52fa90f5..4b99b13c7e68 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -134,7 +134,7 @@ typedef __u32 __bitwise req_flags_t;
134struct request { 134struct request {
135 struct list_head queuelist; 135 struct list_head queuelist;
136 union { 136 union {
137 struct call_single_data csd; 137 call_single_data_t csd;
138 u64 fifo_time; 138 u64 fifo_time;
139 }; 139 };
140 140
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 5d5aaae3af43..cae5400022a3 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -9,6 +9,9 @@
9 */ 9 */
10 10
11#include <linux/wait.h> 11#include <linux/wait.h>
12#ifdef CONFIG_LOCKDEP_COMPLETIONS
13#include <linux/lockdep.h>
14#endif
12 15
13/* 16/*
14 * struct completion - structure used to maintain state for a "completion" 17 * struct completion - structure used to maintain state for a "completion"
@@ -25,13 +28,53 @@
25struct completion { 28struct completion {
26 unsigned int done; 29 unsigned int done;
27 wait_queue_head_t wait; 30 wait_queue_head_t wait;
31#ifdef CONFIG_LOCKDEP_COMPLETIONS
32 struct lockdep_map_cross map;
33#endif
28}; 34};
29 35
36#ifdef CONFIG_LOCKDEP_COMPLETIONS
37static inline void complete_acquire(struct completion *x)
38{
39 lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
40}
41
42static inline void complete_release(struct completion *x)
43{
44 lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_);
45}
46
47static inline void complete_release_commit(struct completion *x)
48{
49 lock_commit_crosslock((struct lockdep_map *)&x->map);
50}
51
52#define init_completion(x) \
53do { \
54 static struct lock_class_key __key; \
55 lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \
56 "(complete)" #x, \
57 &__key, 0); \
58 __init_completion(x); \
59} while (0)
60#else
61#define init_completion(x) __init_completion(x)
62static inline void complete_acquire(struct completion *x) {}
63static inline void complete_release(struct completion *x) {}
64static inline void complete_release_commit(struct completion *x) {}
65#endif
66
67#ifdef CONFIG_LOCKDEP_COMPLETIONS
68#define COMPLETION_INITIALIZER(work) \
69 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
70 STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) }
71#else
30#define COMPLETION_INITIALIZER(work) \ 72#define COMPLETION_INITIALIZER(work) \
31 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } 73 { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
74#endif
32 75
33#define COMPLETION_INITIALIZER_ONSTACK(work) \ 76#define COMPLETION_INITIALIZER_ONSTACK(work) \
34 ({ init_completion(&work); work; }) 77 (*({ init_completion(&work); &work; }))
35 78
36/** 79/**
37 * DECLARE_COMPLETION - declare and initialize a completion structure 80 * DECLARE_COMPLETION - declare and initialize a completion structure
@@ -70,7 +113,7 @@ struct completion {
70 * This inline function will initialize a dynamically created completion 113 * This inline function will initialize a dynamically created completion
71 * structure. 114 * structure.
72 */ 115 */
73static inline void init_completion(struct completion *x) 116static inline void __init_completion(struct completion *x)
74{ 117{
75 x->done = 0; 118 x->done = 0;
76 init_waitqueue_head(&x->wait); 119 init_waitqueue_head(&x->wait);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 898cfe2eeb42..e74655d941b7 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void)
37 return static_branch_unlikely(&cpusets_enabled_key); 37 return static_branch_unlikely(&cpusets_enabled_key);
38} 38}
39 39
40static inline int nr_cpusets(void)
41{
42 /* jump label reference count + the top-level cpuset */
43 return static_key_count(&cpusets_enabled_key.key) + 1;
44}
45
46static inline void cpuset_inc(void) 40static inline void cpuset_inc(void)
47{ 41{
48 static_branch_inc(&cpusets_pre_enable_key); 42 static_branch_inc(&cpusets_pre_enable_key);
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 7c5b694864cd..f36bfd26f998 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -54,7 +54,6 @@ union futex_key {
54 54
55#ifdef CONFIG_FUTEX 55#ifdef CONFIG_FUTEX
56extern void exit_robust_list(struct task_struct *curr); 56extern void exit_robust_list(struct task_struct *curr);
57extern void exit_pi_state_list(struct task_struct *curr);
58#ifdef CONFIG_HAVE_FUTEX_CMPXCHG 57#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
59#define futex_cmpxchg_enabled 1 58#define futex_cmpxchg_enabled 1
60#else 59#else
@@ -64,8 +63,14 @@ extern int futex_cmpxchg_enabled;
64static inline void exit_robust_list(struct task_struct *curr) 63static inline void exit_robust_list(struct task_struct *curr)
65{ 64{
66} 65}
66#endif
67
68#ifdef CONFIG_FUTEX_PI
69extern void exit_pi_state_list(struct task_struct *curr);
70#else
67static inline void exit_pi_state_list(struct task_struct *curr) 71static inline void exit_pi_state_list(struct task_struct *curr)
68{ 72{
69} 73}
70#endif 74#endif
75
71#endif 76#endif
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 5dd1272d1ab2..5fdd93bb9300 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -23,10 +23,26 @@
23# define trace_softirq_context(p) ((p)->softirq_context) 23# define trace_softirq_context(p) ((p)->softirq_context)
24# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) 24# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
25# define trace_softirqs_enabled(p) ((p)->softirqs_enabled) 25# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
26# define trace_hardirq_enter() do { current->hardirq_context++; } while (0) 26# define trace_hardirq_enter() \
27# define trace_hardirq_exit() do { current->hardirq_context--; } while (0) 27do { \
28# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) 28 current->hardirq_context++; \
29# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) 29 crossrelease_hist_start(XHLOCK_HARD); \
30} while (0)
31# define trace_hardirq_exit() \
32do { \
33 current->hardirq_context--; \
34 crossrelease_hist_end(XHLOCK_HARD); \
35} while (0)
36# define lockdep_softirq_enter() \
37do { \
38 current->softirq_context++; \
39 crossrelease_hist_start(XHLOCK_SOFT); \
40} while (0)
41# define lockdep_softirq_exit() \
42do { \
43 current->softirq_context--; \
44 crossrelease_hist_end(XHLOCK_SOFT); \
45} while (0)
30# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, 46# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
31#else 47#else
32# define trace_hardirqs_on() do { } while (0) 48# define trace_hardirqs_on() do { } while (0)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 2afd74b9d844..cd5861651b17 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -163,6 +163,8 @@ extern void jump_label_apply_nops(struct module *mod);
163extern int static_key_count(struct static_key *key); 163extern int static_key_count(struct static_key *key);
164extern void static_key_enable(struct static_key *key); 164extern void static_key_enable(struct static_key *key);
165extern void static_key_disable(struct static_key *key); 165extern void static_key_disable(struct static_key *key);
166extern void static_key_enable_cpuslocked(struct static_key *key);
167extern void static_key_disable_cpuslocked(struct static_key *key);
166 168
167/* 169/*
168 * We should be using ATOMIC_INIT() for initializing .enabled, but 170 * We should be using ATOMIC_INIT() for initializing .enabled, but
@@ -234,24 +236,29 @@ static inline int jump_label_apply_nops(struct module *mod)
234 236
235static inline void static_key_enable(struct static_key *key) 237static inline void static_key_enable(struct static_key *key)
236{ 238{
237 int count = static_key_count(key); 239 STATIC_KEY_CHECK_USE();
238
239 WARN_ON_ONCE(count < 0 || count > 1);
240 240
241 if (!count) 241 if (atomic_read(&key->enabled) != 0) {
242 static_key_slow_inc(key); 242 WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
243 return;
244 }
245 atomic_set(&key->enabled, 1);
243} 246}
244 247
245static inline void static_key_disable(struct static_key *key) 248static inline void static_key_disable(struct static_key *key)
246{ 249{
247 int count = static_key_count(key); 250 STATIC_KEY_CHECK_USE();
248
249 WARN_ON_ONCE(count < 0 || count > 1);
250 251
251 if (count) 252 if (atomic_read(&key->enabled) != 1) {
252 static_key_slow_dec(key); 253 WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
254 return;
255 }
256 atomic_set(&key->enabled, 0);
253} 257}
254 258
259#define static_key_enable_cpuslocked(k) static_key_enable((k))
260#define static_key_disable_cpuslocked(k) static_key_disable((k))
261
255#define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } 262#define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) }
256#define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } 263#define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) }
257 264
@@ -413,8 +420,10 @@ extern bool ____wrong_branch_error(void);
413 * Normal usage; boolean enable/disable. 420 * Normal usage; boolean enable/disable.
414 */ 421 */
415 422
416#define static_branch_enable(x) static_key_enable(&(x)->key) 423#define static_branch_enable(x) static_key_enable(&(x)->key)
417#define static_branch_disable(x) static_key_disable(&(x)->key) 424#define static_branch_disable(x) static_key_disable(&(x)->key)
425#define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key)
426#define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key)
418 427
419#endif /* __ASSEMBLY__ */ 428#endif /* __ASSEMBLY__ */
420 429
diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h
index b7f8aced7870..41960fecf783 100644
--- a/include/linux/kasan-checks.h
+++ b/include/linux/kasan-checks.h
@@ -2,11 +2,13 @@
2#define _LINUX_KASAN_CHECKS_H 2#define _LINUX_KASAN_CHECKS_H
3 3
4#ifdef CONFIG_KASAN 4#ifdef CONFIG_KASAN
5void kasan_check_read(const void *p, unsigned int size); 5void kasan_check_read(const volatile void *p, unsigned int size);
6void kasan_check_write(const void *p, unsigned int size); 6void kasan_check_write(const volatile void *p, unsigned int size);
7#else 7#else
8static inline void kasan_check_read(const void *p, unsigned int size) { } 8static inline void kasan_check_read(const volatile void *p, unsigned int size)
9static inline void kasan_check_write(const void *p, unsigned int size) { } 9{ }
10static inline void kasan_check_write(const volatile void *p, unsigned int size)
11{ }
10#endif 12#endif
11 13
12#endif 14#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bd6d96cf80b1..6607225d0ea4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -277,6 +277,13 @@ extern int oops_may_print(void);
277void do_exit(long error_code) __noreturn; 277void do_exit(long error_code) __noreturn;
278void complete_and_exit(struct completion *, long) __noreturn; 278void complete_and_exit(struct completion *, long) __noreturn;
279 279
280#ifdef CONFIG_ARCH_HAS_REFCOUNT
281void refcount_error_report(struct pt_regs *regs, const char *err);
282#else
283static inline void refcount_error_report(struct pt_regs *regs, const char *err)
284{ }
285#endif
286
280/* Internal, do not use. */ 287/* Internal, do not use. */
281int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); 288int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
282int __must_check _kstrtol(const char *s, unsigned int base, long *res); 289int __must_check _kstrtol(const char *s, unsigned int base, long *res);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index fffe49f188e6..bfa8e0b0d6f1 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -18,6 +18,8 @@ extern int lock_stat;
18 18
19#define MAX_LOCKDEP_SUBCLASSES 8UL 19#define MAX_LOCKDEP_SUBCLASSES 8UL
20 20
21#include <linux/types.h>
22
21#ifdef CONFIG_LOCKDEP 23#ifdef CONFIG_LOCKDEP
22 24
23#include <linux/linkage.h> 25#include <linux/linkage.h>
@@ -29,7 +31,7 @@ extern int lock_stat;
29 * We'd rather not expose kernel/lockdep_states.h this wide, but we do need 31 * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
30 * the total number of states... :-( 32 * the total number of states... :-(
31 */ 33 */
32#define XXX_LOCK_USAGE_STATES (1+3*4) 34#define XXX_LOCK_USAGE_STATES (1+2*4)
33 35
34/* 36/*
35 * NR_LOCKDEP_CACHING_CLASSES ... Number of classes 37 * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
@@ -155,6 +157,12 @@ struct lockdep_map {
155 int cpu; 157 int cpu;
156 unsigned long ip; 158 unsigned long ip;
157#endif 159#endif
160#ifdef CONFIG_LOCKDEP_CROSSRELEASE
161 /*
162 * Whether it's a crosslock.
163 */
164 int cross;
165#endif
158}; 166};
159 167
160static inline void lockdep_copy_map(struct lockdep_map *to, 168static inline void lockdep_copy_map(struct lockdep_map *to,
@@ -258,8 +266,95 @@ struct held_lock {
258 unsigned int hardirqs_off:1; 266 unsigned int hardirqs_off:1;
259 unsigned int references:12; /* 32 bits */ 267 unsigned int references:12; /* 32 bits */
260 unsigned int pin_count; 268 unsigned int pin_count;
269#ifdef CONFIG_LOCKDEP_CROSSRELEASE
270 /*
271 * Generation id.
272 *
273 * A value of cross_gen_id will be stored when holding this,
274 * which is globally increased whenever each crosslock is held.
275 */
276 unsigned int gen_id;
277#endif
278};
279
280#ifdef CONFIG_LOCKDEP_CROSSRELEASE
281#define MAX_XHLOCK_TRACE_ENTRIES 5
282
283/*
284 * This is for keeping locks waiting for commit so that true dependencies
285 * can be added at commit step.
286 */
287struct hist_lock {
288 /*
289 * Id for each entry in the ring buffer. This is used to
290 * decide whether the ring buffer was overwritten or not.
291 *
292 * For example,
293 *
294 * |<----------- hist_lock ring buffer size ------->|
295 * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii
296 * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii.......................
297 *
298 * where 'p' represents an acquisition in process
299 * context, 'i' represents an acquisition in irq
300 * context.
301 *
302 * In this example, the ring buffer was overwritten by
303 * acquisitions in irq context, that should be detected on
304 * rollback or commit.
305 */
306 unsigned int hist_id;
307
308 /*
309 * Seperate stack_trace data. This will be used at commit step.
310 */
311 struct stack_trace trace;
312 unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES];
313
314 /*
315 * Seperate hlock instance. This will be used at commit step.
316 *
317 * TODO: Use a smaller data structure containing only necessary
318 * data. However, we should make lockdep code able to handle the
319 * smaller one first.
320 */
321 struct held_lock hlock;
322};
323
324/*
325 * To initialize a lock as crosslock, lockdep_init_map_crosslock() should
326 * be called instead of lockdep_init_map().
327 */
328struct cross_lock {
329 /*
330 * When more than one acquisition of crosslocks are overlapped,
331 * we have to perform commit for them based on cross_gen_id of
332 * the first acquisition, which allows us to add more true
333 * dependencies.
334 *
335 * Moreover, when no acquisition of a crosslock is in progress,
336 * we should not perform commit because the lock might not exist
337 * any more, which might cause incorrect memory access. So we
338 * have to track the number of acquisitions of a crosslock.
339 */
340 int nr_acquire;
341
342 /*
343 * Seperate hlock instance. This will be used at commit step.
344 *
345 * TODO: Use a smaller data structure containing only necessary
346 * data. However, we should make lockdep code able to handle the
347 * smaller one first.
348 */
349 struct held_lock hlock;
261}; 350};
262 351
352struct lockdep_map_cross {
353 struct lockdep_map map;
354 struct cross_lock xlock;
355};
356#endif
357
263/* 358/*
264 * Initialization, self-test and debugging-output methods: 359 * Initialization, self-test and debugging-output methods:
265 */ 360 */
@@ -282,13 +377,6 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
282 struct lock_class_key *key, int subclass); 377 struct lock_class_key *key, int subclass);
283 378
284/* 379/*
285 * To initialize a lockdep_map statically use this macro.
286 * Note that _name must not be NULL.
287 */
288#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
289 { .name = (_name), .key = (void *)(_key), }
290
291/*
292 * Reinitialize a lock key - for cases where there is special locking or 380 * Reinitialize a lock key - for cases where there is special locking or
293 * special initialization of locks so that the validator gets the scope 381 * special initialization of locks so that the validator gets the scope
294 * of dependencies wrong: they are either too broad (they need a class-split) 382 * of dependencies wrong: they are either too broad (they need a class-split)
@@ -363,10 +451,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
363 451
364extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); 452extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
365 453
366extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
367extern void lockdep_clear_current_reclaim_state(void);
368extern void lockdep_trace_alloc(gfp_t mask);
369
370struct pin_cookie { unsigned int val; }; 454struct pin_cookie { unsigned int val; };
371 455
372#define NIL_COOKIE (struct pin_cookie){ .val = 0U, } 456#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
@@ -375,7 +459,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
375extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); 459extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
376extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); 460extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
377 461
378# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, 462# define INIT_LOCKDEP .lockdep_recursion = 0,
379 463
380#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) 464#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
381 465
@@ -416,9 +500,6 @@ static inline void lockdep_on(void)
416# define lock_downgrade(l, i) do { } while (0) 500# define lock_downgrade(l, i) do { } while (0)
417# define lock_set_class(l, n, k, s, i) do { } while (0) 501# define lock_set_class(l, n, k, s, i) do { } while (0)
418# define lock_set_subclass(l, s, i) do { } while (0) 502# define lock_set_subclass(l, s, i) do { } while (0)
419# define lockdep_set_current_reclaim_state(g) do { } while (0)
420# define lockdep_clear_current_reclaim_state() do { } while (0)
421# define lockdep_trace_alloc(g) do { } while (0)
422# define lockdep_info() do { } while (0) 503# define lockdep_info() do { } while (0)
423# define lockdep_init_map(lock, name, key, sub) \ 504# define lockdep_init_map(lock, name, key, sub) \
424 do { (void)(name); (void)(key); } while (0) 505 do { (void)(name); (void)(key); } while (0)
@@ -467,6 +548,58 @@ struct pin_cookie { };
467 548
468#endif /* !LOCKDEP */ 549#endif /* !LOCKDEP */
469 550
551enum xhlock_context_t {
552 XHLOCK_HARD,
553 XHLOCK_SOFT,
554 XHLOCK_CTX_NR,
555};
556
557#ifdef CONFIG_LOCKDEP_CROSSRELEASE
558extern void lockdep_init_map_crosslock(struct lockdep_map *lock,
559 const char *name,
560 struct lock_class_key *key,
561 int subclass);
562extern void lock_commit_crosslock(struct lockdep_map *lock);
563
564/*
565 * What we essencially have to initialize is 'nr_acquire'. Other members
566 * will be initialized in add_xlock().
567 */
568#define STATIC_CROSS_LOCK_INIT() \
569 { .nr_acquire = 0,}
570
571#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \
572 { .map.name = (_name), .map.key = (void *)(_key), \
573 .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), }
574
575/*
576 * To initialize a lockdep_map statically use this macro.
577 * Note that _name must not be NULL.
578 */
579#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
580 { .name = (_name), .key = (void *)(_key), .cross = 0, }
581
582extern void crossrelease_hist_start(enum xhlock_context_t c);
583extern void crossrelease_hist_end(enum xhlock_context_t c);
584extern void lockdep_invariant_state(bool force);
585extern void lockdep_init_task(struct task_struct *task);
586extern void lockdep_free_task(struct task_struct *task);
587#else /* !CROSSRELEASE */
588#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0)
589/*
590 * To initialize a lockdep_map statically use this macro.
591 * Note that _name must not be NULL.
592 */
593#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
594 { .name = (_name), .key = (void *)(_key), }
595
596static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
597static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
598static inline void lockdep_invariant_state(bool force) {}
599static inline void lockdep_init_task(struct task_struct *task) {}
600static inline void lockdep_free_task(struct task_struct *task) {}
601#endif /* CROSSRELEASE */
602
470#ifdef CONFIG_LOCK_STAT 603#ifdef CONFIG_LOCK_STAT
471 604
472extern void lock_contended(struct lockdep_map *lock, unsigned long ip); 605extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3cadee0a3508..57378c7cb5f8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -526,26 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
526extern void tlb_finish_mmu(struct mmu_gather *tlb, 526extern void tlb_finish_mmu(struct mmu_gather *tlb,
527 unsigned long start, unsigned long end); 527 unsigned long start, unsigned long end);
528 528
529/*
530 * Memory barriers to keep this state in sync are graciously provided by
531 * the page table locks, outside of which no page table modifications happen.
532 * The barriers are used to ensure the order between tlb_flush_pending updates,
533 * which happen while the lock is not taken, and the PTE updates, which happen
534 * while the lock is taken, are serialized.
535 */
536static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
537{
538 return atomic_read(&mm->tlb_flush_pending) > 0;
539}
540
541/*
542 * Returns true if there are two above TLB batching threads in parallel.
543 */
544static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
545{
546 return atomic_read(&mm->tlb_flush_pending) > 1;
547}
548
549static inline void init_tlb_flush_pending(struct mm_struct *mm) 529static inline void init_tlb_flush_pending(struct mm_struct *mm)
550{ 530{
551 atomic_set(&mm->tlb_flush_pending, 0); 531 atomic_set(&mm->tlb_flush_pending, 0);
@@ -554,27 +534,82 @@ static inline void init_tlb_flush_pending(struct mm_struct *mm)
554static inline void inc_tlb_flush_pending(struct mm_struct *mm) 534static inline void inc_tlb_flush_pending(struct mm_struct *mm)
555{ 535{
556 atomic_inc(&mm->tlb_flush_pending); 536 atomic_inc(&mm->tlb_flush_pending);
557
558 /* 537 /*
559 * Guarantee that the tlb_flush_pending increase does not leak into the 538 * The only time this value is relevant is when there are indeed pages
560 * critical section updating the page tables 539 * to flush. And we'll only flush pages after changing them, which
540 * requires the PTL.
541 *
542 * So the ordering here is:
543 *
544 * atomic_inc(&mm->tlb_flush_pending);
545 * spin_lock(&ptl);
546 * ...
547 * set_pte_at();
548 * spin_unlock(&ptl);
549 *
550 * spin_lock(&ptl)
551 * mm_tlb_flush_pending();
552 * ....
553 * spin_unlock(&ptl);
554 *
555 * flush_tlb_range();
556 * atomic_dec(&mm->tlb_flush_pending);
557 *
558 * Where the increment if constrained by the PTL unlock, it thus
559 * ensures that the increment is visible if the PTE modification is
560 * visible. After all, if there is no PTE modification, nobody cares
561 * about TLB flushes either.
562 *
563 * This very much relies on users (mm_tlb_flush_pending() and
564 * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
565 * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
566 * locks (PPC) the unlock of one doesn't order against the lock of
567 * another PTL.
568 *
569 * The decrement is ordered by the flush_tlb_range(), such that
570 * mm_tlb_flush_pending() will not return false unless all flushes have
571 * completed.
561 */ 572 */
562 smp_mb__before_spinlock();
563} 573}
564 574
565/* Clearing is done after a TLB flush, which also provides a barrier. */
566static inline void dec_tlb_flush_pending(struct mm_struct *mm) 575static inline void dec_tlb_flush_pending(struct mm_struct *mm)
567{ 576{
568 /* 577 /*
569 * Guarantee that the tlb_flush_pending does not not leak into the 578 * See inc_tlb_flush_pending().
570 * critical section, since we must order the PTE change and changes to 579 *
571 * the pending TLB flush indication. We could have relied on TLB flush 580 * This cannot be smp_mb__before_atomic() because smp_mb() simply does
572 * as a memory barrier, but this behavior is not clearly documented. 581 * not order against TLB invalidate completion, which is what we need.
582 *
583 * Therefore we must rely on tlb_flush_*() to guarantee order.
573 */ 584 */
574 smp_mb__before_atomic();
575 atomic_dec(&mm->tlb_flush_pending); 585 atomic_dec(&mm->tlb_flush_pending);
576} 586}
577 587
588static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
589{
590 /*
591 * Must be called after having acquired the PTL; orders against that
592 * PTLs release and therefore ensures that if we observe the modified
593 * PTE we must also observe the increment from inc_tlb_flush_pending().
594 *
595 * That is, it only guarantees to return true if there is a flush
596 * pending for _this_ PTL.
597 */
598 return atomic_read(&mm->tlb_flush_pending);
599}
600
601static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
602{
603 /*
604 * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
605 * for which there is a TLB flush pending in order to guarantee
606 * we've seen both that PTE modification and the increment.
607 *
608 * (no requirement on actually still holding the PTL, that is irrelevant)
609 */
610 return atomic_read(&mm->tlb_flush_pending) > 1;
611}
612
578struct vm_fault; 613struct vm_fault;
579 614
580struct vm_special_mapping { 615struct vm_special_mapping {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c99ba7914c0a..461bd5757af6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2774,7 +2774,7 @@ struct softnet_data {
2774 unsigned int input_queue_head ____cacheline_aligned_in_smp; 2774 unsigned int input_queue_head ____cacheline_aligned_in_smp;
2775 2775
2776 /* Elements below can be accessed between CPUs for RPS/RFS */ 2776 /* Elements below can be accessed between CPUs for RPS/RFS */
2777 struct call_single_data csd ____cacheline_aligned_in_smp; 2777 call_single_data_t csd ____cacheline_aligned_in_smp;
2778 struct softnet_data *rps_ipi_next; 2778 struct softnet_data *rps_ipi_next;
2779 unsigned int cpu; 2779 unsigned int cpu;
2780 unsigned int input_queue_tail; 2780 unsigned int input_queue_tail;
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 591792c8e5b0..48b7c9c68c4d 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -53,6 +53,9 @@ extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
53extern __must_check bool refcount_dec_and_test(refcount_t *r); 53extern __must_check bool refcount_dec_and_test(refcount_t *r);
54extern void refcount_dec(refcount_t *r); 54extern void refcount_dec(refcount_t *r);
55#else 55#else
56# ifdef CONFIG_ARCH_HAS_REFCOUNT
57# include <asm/refcount.h>
58# else
56static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) 59static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r)
57{ 60{
58 return atomic_add_unless(&r->refs, i, 0); 61 return atomic_add_unless(&r->refs, i, 0);
@@ -87,6 +90,7 @@ static inline void refcount_dec(refcount_t *r)
87{ 90{
88 atomic_dec(&r->refs); 91 atomic_dec(&r->refs);
89} 92}
93# endif /* !CONFIG_ARCH_HAS_REFCOUNT */
90#endif /* CONFIG_REFCOUNT_FULL */ 94#endif /* CONFIG_REFCOUNT_FULL */
91 95
92extern __must_check bool refcount_dec_if_one(refcount_t *r); 96extern __must_check bool refcount_dec_if_one(refcount_t *r);
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index ae0528b834cd..e784761a4443 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -32,6 +32,7 @@ struct rw_semaphore {
32#define RWSEM_UNLOCKED_VALUE 0x00000000 32#define RWSEM_UNLOCKED_VALUE 0x00000000
33 33
34extern void __down_read(struct rw_semaphore *sem); 34extern void __down_read(struct rw_semaphore *sem);
35extern int __must_check __down_read_killable(struct rw_semaphore *sem);
35extern int __down_read_trylock(struct rw_semaphore *sem); 36extern int __down_read_trylock(struct rw_semaphore *sem);
36extern void __down_write(struct rw_semaphore *sem); 37extern void __down_write(struct rw_semaphore *sem);
37extern int __must_check __down_write_killable(struct rw_semaphore *sem); 38extern int __must_check __down_write_killable(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index dd1d14250340..0ad7318ff299 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -44,6 +44,7 @@ struct rw_semaphore {
44}; 44};
45 45
46extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); 46extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
47extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
47extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); 48extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
48extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); 49extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
49extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); 50extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e5fbce866073..9ba42c663fba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -847,7 +847,17 @@ struct task_struct {
847 int lockdep_depth; 847 int lockdep_depth;
848 unsigned int lockdep_recursion; 848 unsigned int lockdep_recursion;
849 struct held_lock held_locks[MAX_LOCK_DEPTH]; 849 struct held_lock held_locks[MAX_LOCK_DEPTH];
850 gfp_t lockdep_reclaim_gfp; 850#endif
851
852#ifdef CONFIG_LOCKDEP_CROSSRELEASE
853#define MAX_XHLOCKS_NR 64UL
854 struct hist_lock *xhlocks; /* Crossrelease history locks */
855 unsigned int xhlock_idx;
856 /* For restoring at history boundaries */
857 unsigned int xhlock_idx_hist[XHLOCK_CTX_NR];
858 unsigned int hist_id;
859 /* For overwrite check at each context exit */
860 unsigned int hist_id_save[XHLOCK_CTX_NR];
851#endif 861#endif
852 862
853#ifdef CONFIG_UBSAN 863#ifdef CONFIG_UBSAN
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2b24a6974847..2b0a281f9d26 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags)
167 return flags; 167 return flags;
168} 168}
169 169
170#ifdef CONFIG_LOCKDEP
171extern void fs_reclaim_acquire(gfp_t gfp_mask);
172extern void fs_reclaim_release(gfp_t gfp_mask);
173#else
174static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
175static inline void fs_reclaim_release(gfp_t gfp_mask) { }
176#endif
177
170static inline unsigned int memalloc_noio_save(void) 178static inline unsigned int memalloc_noio_save(void)
171{ 179{
172 unsigned int flags = current->flags & PF_MEMALLOC_NOIO; 180 unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 68123c1fe549..98b1fe027fc9 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -14,13 +14,17 @@
14#include <linux/llist.h> 14#include <linux/llist.h>
15 15
16typedef void (*smp_call_func_t)(void *info); 16typedef void (*smp_call_func_t)(void *info);
17struct call_single_data { 17struct __call_single_data {
18 struct llist_node llist; 18 struct llist_node llist;
19 smp_call_func_t func; 19 smp_call_func_t func;
20 void *info; 20 void *info;
21 unsigned int flags; 21 unsigned int flags;
22}; 22};
23 23
24/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
25typedef struct __call_single_data call_single_data_t
26 __aligned(sizeof(struct __call_single_data));
27
24/* total number of cpus in this system (may exceed NR_CPUS) */ 28/* total number of cpus in this system (may exceed NR_CPUS) */
25extern unsigned int total_cpus; 29extern unsigned int total_cpus;
26 30
@@ -48,7 +52,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
48 smp_call_func_t func, void *info, bool wait, 52 smp_call_func_t func, void *info, bool wait,
49 gfp_t gfp_flags); 53 gfp_t gfp_flags);
50 54
51int smp_call_function_single_async(int cpu, struct call_single_data *csd); 55int smp_call_function_single_async(int cpu, call_single_data_t *csd);
52 56
53#ifdef CONFIG_SMP 57#ifdef CONFIG_SMP
54 58
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ef018a6e4985..69e079c5ff98 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -118,16 +118,39 @@ do { \
118#endif 118#endif
119 119
120/* 120/*
121 * Despite its name it doesn't necessarily has to be a full barrier. 121 * This barrier must provide two things:
122 * It should only guarantee that a STORE before the critical section 122 *
123 * can not be reordered with LOADs and STOREs inside this section. 123 * - it must guarantee a STORE before the spin_lock() is ordered against a
124 * spin_lock() is the one-way barrier, this LOAD can not escape out 124 * LOAD after it, see the comments at its two usage sites.
125 * of the region. So the default implementation simply ensures that 125 *
126 * a STORE can not move into the critical section, smp_wmb() should 126 * - it must ensure the critical section is RCsc.
127 * serialize it with another STORE done by spin_lock(). 127 *
128 * The latter is important for cases where we observe values written by other
129 * CPUs in spin-loops, without barriers, while being subject to scheduling.
130 *
131 * CPU0 CPU1 CPU2
132 *
133 * for (;;) {
134 * if (READ_ONCE(X))
135 * break;
136 * }
137 * X=1
138 * <sched-out>
139 * <sched-in>
140 * r = X;
141 *
142 * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
143 * we get migrated and CPU2 sees X==0.
144 *
145 * Since most load-store architectures implement ACQUIRE with an smp_mb() after
146 * the LL/SC loop, they need no further barriers. Similarly all our TSO
147 * architectures imply an smp_mb() for each atomic instruction and equally don't
148 * need more.
149 *
150 * Architectures that can implement ACQUIRE better need to take care.
128 */ 151 */
129#ifndef smp_mb__before_spinlock 152#ifndef smp_mb__after_spinlock
130#define smp_mb__before_spinlock() smp_wmb() 153#define smp_mb__after_spinlock() do { } while (0)
131#endif 154#endif
132 155
133#ifdef CONFIG_DEBUG_SPINLOCK 156#ifdef CONFIG_DEBUG_SPINLOCK
diff --git a/init/Kconfig b/init/Kconfig
index 8514b25db21c..5f0ef850e808 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1275,12 +1275,17 @@ config BASE_FULL
1275config FUTEX 1275config FUTEX
1276 bool "Enable futex support" if EXPERT 1276 bool "Enable futex support" if EXPERT
1277 default y 1277 default y
1278 select RT_MUTEXES 1278 imply RT_MUTEXES
1279 help 1279 help
1280 Disabling this option will cause the kernel to be built without 1280 Disabling this option will cause the kernel to be built without
1281 support for "fast userspace mutexes". The resulting kernel may not 1281 support for "fast userspace mutexes". The resulting kernel may not
1282 run glibc-based applications correctly. 1282 run glibc-based applications correctly.
1283 1283
1284config FUTEX_PI
1285 bool
1286 depends on FUTEX && RT_MUTEXES
1287 default y
1288
1284config HAVE_FUTEX_CMPXCHG 1289config HAVE_FUTEX_CMPXCHG
1285 bool 1290 bool
1286 depends on FUTEX 1291 depends on FUTEX
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index df403e97b073..2f4039bafebb 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -577,6 +577,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
577 rcu_read_unlock(); 577 rcu_read_unlock();
578} 578}
579 579
580/* Must be called with cpuset_mutex held. */
581static inline int nr_cpusets(void)
582{
583 /* jump label reference count + the top-level cpuset */
584 return static_key_count(&cpusets_enabled_key.key) + 1;
585}
586
580/* 587/*
581 * generate_sched_domains() 588 * generate_sched_domains()
582 * 589 *
diff --git a/kernel/exit.c b/kernel/exit.c
index f9ef3ecc78c1..a35d8a17e01f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -918,6 +918,7 @@ void __noreturn do_exit(long code)
918 exit_rcu(); 918 exit_rcu();
919 exit_tasks_rcu_finish(); 919 exit_tasks_rcu_finish();
920 920
921 lockdep_free_task(tsk);
921 do_task_dead(); 922 do_task_dead();
922} 923}
923EXPORT_SYMBOL_GPL(do_exit); 924EXPORT_SYMBOL_GPL(do_exit);
diff --git a/kernel/fork.c b/kernel/fork.c
index b7e9e57b71ea..dab73d18bc4d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -484,6 +484,8 @@ void __init fork_init(void)
484 cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", 484 cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
485 NULL, free_vm_stack_cache); 485 NULL, free_vm_stack_cache);
486#endif 486#endif
487
488 lockdep_init_task(&init_task);
487} 489}
488 490
489int __weak arch_dup_task_struct(struct task_struct *dst, 491int __weak arch_dup_task_struct(struct task_struct *dst,
@@ -1700,6 +1702,7 @@ static __latent_entropy struct task_struct *copy_process(
1700 p->lockdep_depth = 0; /* no locks held yet */ 1702 p->lockdep_depth = 0; /* no locks held yet */
1701 p->curr_chain_key = 0; 1703 p->curr_chain_key = 0;
1702 p->lockdep_recursion = 0; 1704 p->lockdep_recursion = 0;
1705 lockdep_init_task(p);
1703#endif 1706#endif
1704 1707
1705#ifdef CONFIG_DEBUG_MUTEXES 1708#ifdef CONFIG_DEBUG_MUTEXES
@@ -1958,6 +1961,7 @@ bad_fork_cleanup_audit:
1958bad_fork_cleanup_perf: 1961bad_fork_cleanup_perf:
1959 perf_event_free_task(p); 1962 perf_event_free_task(p);
1960bad_fork_cleanup_policy: 1963bad_fork_cleanup_policy:
1964 lockdep_free_task(p);
1961#ifdef CONFIG_NUMA 1965#ifdef CONFIG_NUMA
1962 mpol_put(p->mempolicy); 1966 mpol_put(p->mempolicy);
1963bad_fork_cleanup_threadgroup_lock: 1967bad_fork_cleanup_threadgroup_lock:
diff --git a/kernel/futex.c b/kernel/futex.c
index f50b434756c1..3d38eaf05492 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -876,6 +876,8 @@ static struct task_struct *futex_find_get_task(pid_t pid)
876 return p; 876 return p;
877} 877}
878 878
879#ifdef CONFIG_FUTEX_PI
880
879/* 881/*
880 * This task is holding PI mutexes at exit time => bad. 882 * This task is holding PI mutexes at exit time => bad.
881 * Kernel cleans up PI-state, but userspace is likely hosed. 883 * Kernel cleans up PI-state, but userspace is likely hosed.
@@ -933,6 +935,8 @@ void exit_pi_state_list(struct task_struct *curr)
933 raw_spin_unlock_irq(&curr->pi_lock); 935 raw_spin_unlock_irq(&curr->pi_lock);
934} 936}
935 937
938#endif
939
936/* 940/*
937 * We need to check the following states: 941 * We need to check the following states:
938 * 942 *
@@ -1547,6 +1551,45 @@ out:
1547 return ret; 1551 return ret;
1548} 1552}
1549 1553
1554static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
1555{
1556 unsigned int op = (encoded_op & 0x70000000) >> 28;
1557 unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
1558 int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 12);
1559 int cmparg = sign_extend32(encoded_op & 0x00000fff, 12);
1560 int oldval, ret;
1561
1562 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
1563 if (oparg < 0 || oparg > 31)
1564 return -EINVAL;
1565 oparg = 1 << oparg;
1566 }
1567
1568 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
1569 return -EFAULT;
1570
1571 ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
1572 if (ret)
1573 return ret;
1574
1575 switch (cmp) {
1576 case FUTEX_OP_CMP_EQ:
1577 return oldval == cmparg;
1578 case FUTEX_OP_CMP_NE:
1579 return oldval != cmparg;
1580 case FUTEX_OP_CMP_LT:
1581 return oldval < cmparg;
1582 case FUTEX_OP_CMP_GE:
1583 return oldval >= cmparg;
1584 case FUTEX_OP_CMP_LE:
1585 return oldval <= cmparg;
1586 case FUTEX_OP_CMP_GT:
1587 return oldval > cmparg;
1588 default:
1589 return -ENOSYS;
1590 }
1591}
1592
1550/* 1593/*
1551 * Wake up all waiters hashed on the physical page that is mapped 1594 * Wake up all waiters hashed on the physical page that is mapped
1552 * to this virtual address: 1595 * to this virtual address:
@@ -1800,6 +1843,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1800 struct futex_q *this, *next; 1843 struct futex_q *this, *next;
1801 DEFINE_WAKE_Q(wake_q); 1844 DEFINE_WAKE_Q(wake_q);
1802 1845
1846 /*
1847 * When PI not supported: return -ENOSYS if requeue_pi is true,
1848 * consequently the compiler knows requeue_pi is always false past
1849 * this point which will optimize away all the conditional code
1850 * further down.
1851 */
1852 if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
1853 return -ENOSYS;
1854
1803 if (requeue_pi) { 1855 if (requeue_pi) {
1804 /* 1856 /*
1805 * Requeue PI only works on two distinct uaddrs. This 1857 * Requeue PI only works on two distinct uaddrs. This
@@ -2595,6 +2647,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
2595 struct futex_q q = futex_q_init; 2647 struct futex_q q = futex_q_init;
2596 int res, ret; 2648 int res, ret;
2597 2649
2650 if (!IS_ENABLED(CONFIG_FUTEX_PI))
2651 return -ENOSYS;
2652
2598 if (refill_pi_state_cache()) 2653 if (refill_pi_state_cache())
2599 return -ENOMEM; 2654 return -ENOMEM;
2600 2655
@@ -2774,6 +2829,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2774 struct futex_q *top_waiter; 2829 struct futex_q *top_waiter;
2775 int ret; 2830 int ret;
2776 2831
2832 if (!IS_ENABLED(CONFIG_FUTEX_PI))
2833 return -ENOSYS;
2834
2777retry: 2835retry:
2778 if (get_user(uval, uaddr)) 2836 if (get_user(uval, uaddr))
2779 return -EFAULT; 2837 return -EFAULT;
@@ -2984,6 +3042,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2984 struct futex_q q = futex_q_init; 3042 struct futex_q q = futex_q_init;
2985 int res, ret; 3043 int res, ret;
2986 3044
3045 if (!IS_ENABLED(CONFIG_FUTEX_PI))
3046 return -ENOSYS;
3047
2987 if (uaddr == uaddr2) 3048 if (uaddr == uaddr2)
2988 return -EINVAL; 3049 return -EINVAL;
2989 3050
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index d11c506a6ac3..0bf2e8f5244a 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -79,29 +79,7 @@ int static_key_count(struct static_key *key)
79} 79}
80EXPORT_SYMBOL_GPL(static_key_count); 80EXPORT_SYMBOL_GPL(static_key_count);
81 81
82void static_key_enable(struct static_key *key) 82static void static_key_slow_inc_cpuslocked(struct static_key *key)
83{
84 int count = static_key_count(key);
85
86 WARN_ON_ONCE(count < 0 || count > 1);
87
88 if (!count)
89 static_key_slow_inc(key);
90}
91EXPORT_SYMBOL_GPL(static_key_enable);
92
93void static_key_disable(struct static_key *key)
94{
95 int count = static_key_count(key);
96
97 WARN_ON_ONCE(count < 0 || count > 1);
98
99 if (count)
100 static_key_slow_dec(key);
101}
102EXPORT_SYMBOL_GPL(static_key_disable);
103
104void static_key_slow_inc(struct static_key *key)
105{ 83{
106 int v, v1; 84 int v, v1;
107 85
@@ -125,24 +103,87 @@ void static_key_slow_inc(struct static_key *key)
125 return; 103 return;
126 } 104 }
127 105
128 cpus_read_lock();
129 jump_label_lock(); 106 jump_label_lock();
130 if (atomic_read(&key->enabled) == 0) { 107 if (atomic_read(&key->enabled) == 0) {
131 atomic_set(&key->enabled, -1); 108 atomic_set(&key->enabled, -1);
132 jump_label_update(key); 109 jump_label_update(key);
133 atomic_set(&key->enabled, 1); 110 /*
111 * Ensure that if the above cmpxchg loop observes our positive
112 * value, it must also observe all the text changes.
113 */
114 atomic_set_release(&key->enabled, 1);
134 } else { 115 } else {
135 atomic_inc(&key->enabled); 116 atomic_inc(&key->enabled);
136 } 117 }
137 jump_label_unlock(); 118 jump_label_unlock();
119}
120
121void static_key_slow_inc(struct static_key *key)
122{
123 cpus_read_lock();
124 static_key_slow_inc_cpuslocked(key);
138 cpus_read_unlock(); 125 cpus_read_unlock();
139} 126}
140EXPORT_SYMBOL_GPL(static_key_slow_inc); 127EXPORT_SYMBOL_GPL(static_key_slow_inc);
141 128
142static void __static_key_slow_dec(struct static_key *key, 129void static_key_enable_cpuslocked(struct static_key *key)
143 unsigned long rate_limit, struct delayed_work *work) 130{
131 STATIC_KEY_CHECK_USE();
132
133 if (atomic_read(&key->enabled) > 0) {
134 WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
135 return;
136 }
137
138 jump_label_lock();
139 if (atomic_read(&key->enabled) == 0) {
140 atomic_set(&key->enabled, -1);
141 jump_label_update(key);
142 /*
143 * See static_key_slow_inc().
144 */
145 atomic_set_release(&key->enabled, 1);
146 }
147 jump_label_unlock();
148}
149EXPORT_SYMBOL_GPL(static_key_enable_cpuslocked);
150
151void static_key_enable(struct static_key *key)
152{
153 cpus_read_lock();
154 static_key_enable_cpuslocked(key);
155 cpus_read_unlock();
156}
157EXPORT_SYMBOL_GPL(static_key_enable);
158
159void static_key_disable_cpuslocked(struct static_key *key)
160{
161 STATIC_KEY_CHECK_USE();
162
163 if (atomic_read(&key->enabled) != 1) {
164 WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
165 return;
166 }
167
168 jump_label_lock();
169 if (atomic_cmpxchg(&key->enabled, 1, 0))
170 jump_label_update(key);
171 jump_label_unlock();
172}
173EXPORT_SYMBOL_GPL(static_key_disable_cpuslocked);
174
175void static_key_disable(struct static_key *key)
144{ 176{
145 cpus_read_lock(); 177 cpus_read_lock();
178 static_key_disable_cpuslocked(key);
179 cpus_read_unlock();
180}
181EXPORT_SYMBOL_GPL(static_key_disable);
182
183static void static_key_slow_dec_cpuslocked(struct static_key *key,
184 unsigned long rate_limit,
185 struct delayed_work *work)
186{
146 /* 187 /*
147 * The negative count check is valid even when a negative 188 * The negative count check is valid even when a negative
148 * key->enabled is in use by static_key_slow_inc(); a 189 * key->enabled is in use by static_key_slow_inc(); a
@@ -153,7 +194,6 @@ static void __static_key_slow_dec(struct static_key *key,
153 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { 194 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
154 WARN(atomic_read(&key->enabled) < 0, 195 WARN(atomic_read(&key->enabled) < 0,
155 "jump label: negative count!\n"); 196 "jump label: negative count!\n");
156 cpus_read_unlock();
157 return; 197 return;
158 } 198 }
159 199
@@ -164,6 +204,14 @@ static void __static_key_slow_dec(struct static_key *key,
164 jump_label_update(key); 204 jump_label_update(key);
165 } 205 }
166 jump_label_unlock(); 206 jump_label_unlock();
207}
208
209static void __static_key_slow_dec(struct static_key *key,
210 unsigned long rate_limit,
211 struct delayed_work *work)
212{
213 cpus_read_lock();
214 static_key_slow_dec_cpuslocked(key, rate_limit, work);
167 cpus_read_unlock(); 215 cpus_read_unlock();
168} 216}
169 217
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7d2499bec5fe..44c8d0d17170 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -58,6 +58,10 @@
58#define CREATE_TRACE_POINTS 58#define CREATE_TRACE_POINTS
59#include <trace/events/lock.h> 59#include <trace/events/lock.h>
60 60
61#ifdef CONFIG_LOCKDEP_CROSSRELEASE
62#include <linux/slab.h>
63#endif
64
61#ifdef CONFIG_PROVE_LOCKING 65#ifdef CONFIG_PROVE_LOCKING
62int prove_locking = 1; 66int prove_locking = 1;
63module_param(prove_locking, int, 0644); 67module_param(prove_locking, int, 0644);
@@ -344,14 +348,12 @@ EXPORT_SYMBOL(lockdep_on);
344#if VERBOSE 348#if VERBOSE
345# define HARDIRQ_VERBOSE 1 349# define HARDIRQ_VERBOSE 1
346# define SOFTIRQ_VERBOSE 1 350# define SOFTIRQ_VERBOSE 1
347# define RECLAIM_VERBOSE 1
348#else 351#else
349# define HARDIRQ_VERBOSE 0 352# define HARDIRQ_VERBOSE 0
350# define SOFTIRQ_VERBOSE 0 353# define SOFTIRQ_VERBOSE 0
351# define RECLAIM_VERBOSE 0
352#endif 354#endif
353 355
354#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE 356#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
355/* 357/*
356 * Quick filtering for interesting events: 358 * Quick filtering for interesting events:
357 */ 359 */
@@ -726,6 +728,18 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
726 return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); 728 return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
727} 729}
728 730
731#ifdef CONFIG_LOCKDEP_CROSSRELEASE
732static void cross_init(struct lockdep_map *lock, int cross);
733static int cross_lock(struct lockdep_map *lock);
734static int lock_acquire_crosslock(struct held_lock *hlock);
735static int lock_release_crosslock(struct lockdep_map *lock);
736#else
737static inline void cross_init(struct lockdep_map *lock, int cross) {}
738static inline int cross_lock(struct lockdep_map *lock) { return 0; }
739static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; }
740static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; }
741#endif
742
729/* 743/*
730 * Register a lock's class in the hash-table, if the class is not present 744 * Register a lock's class in the hash-table, if the class is not present
731 * yet. Otherwise we look it up. We cache the result in the lock object 745 * yet. Otherwise we look it up. We cache the result in the lock object
@@ -1125,22 +1139,41 @@ print_circular_lock_scenario(struct held_lock *src,
1125 printk(KERN_CONT "\n\n"); 1139 printk(KERN_CONT "\n\n");
1126 } 1140 }
1127 1141
1128 printk(" Possible unsafe locking scenario:\n\n"); 1142 if (cross_lock(tgt->instance)) {
1129 printk(" CPU0 CPU1\n"); 1143 printk(" Possible unsafe locking scenario by crosslock:\n\n");
1130 printk(" ---- ----\n"); 1144 printk(" CPU0 CPU1\n");
1131 printk(" lock("); 1145 printk(" ---- ----\n");
1132 __print_lock_name(target); 1146 printk(" lock(");
1133 printk(KERN_CONT ");\n"); 1147 __print_lock_name(parent);
1134 printk(" lock("); 1148 printk(KERN_CONT ");\n");
1135 __print_lock_name(parent); 1149 printk(" lock(");
1136 printk(KERN_CONT ");\n"); 1150 __print_lock_name(target);
1137 printk(" lock("); 1151 printk(KERN_CONT ");\n");
1138 __print_lock_name(target); 1152 printk(" lock(");
1139 printk(KERN_CONT ");\n"); 1153 __print_lock_name(source);
1140 printk(" lock("); 1154 printk(KERN_CONT ");\n");
1141 __print_lock_name(source); 1155 printk(" unlock(");
1142 printk(KERN_CONT ");\n"); 1156 __print_lock_name(target);
1143 printk("\n *** DEADLOCK ***\n\n"); 1157 printk(KERN_CONT ");\n");
1158 printk("\n *** DEADLOCK ***\n\n");
1159 } else {
1160 printk(" Possible unsafe locking scenario:\n\n");
1161 printk(" CPU0 CPU1\n");
1162 printk(" ---- ----\n");
1163 printk(" lock(");
1164 __print_lock_name(target);
1165 printk(KERN_CONT ");\n");
1166 printk(" lock(");
1167 __print_lock_name(parent);
1168 printk(KERN_CONT ");\n");
1169 printk(" lock(");
1170 __print_lock_name(target);
1171 printk(KERN_CONT ");\n");
1172 printk(" lock(");
1173 __print_lock_name(source);
1174 printk(KERN_CONT ");\n");
1175 printk("\n *** DEADLOCK ***\n\n");
1176 }
1144} 1177}
1145 1178
1146/* 1179/*
@@ -1165,7 +1198,12 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1165 pr_warn("%s/%d is trying to acquire lock:\n", 1198 pr_warn("%s/%d is trying to acquire lock:\n",
1166 curr->comm, task_pid_nr(curr)); 1199 curr->comm, task_pid_nr(curr));
1167 print_lock(check_src); 1200 print_lock(check_src);
1168 pr_warn("\nbut task is already holding lock:\n"); 1201
1202 if (cross_lock(check_tgt->instance))
1203 pr_warn("\nbut now in release context of a crosslock acquired at the following:\n");
1204 else
1205 pr_warn("\nbut task is already holding lock:\n");
1206
1169 print_lock(check_tgt); 1207 print_lock(check_tgt);
1170 pr_warn("\nwhich lock already depends on the new lock.\n\n"); 1208 pr_warn("\nwhich lock already depends on the new lock.\n\n");
1171 pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); 1209 pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
@@ -1183,7 +1221,8 @@ static inline int class_equal(struct lock_list *entry, void *data)
1183static noinline int print_circular_bug(struct lock_list *this, 1221static noinline int print_circular_bug(struct lock_list *this,
1184 struct lock_list *target, 1222 struct lock_list *target,
1185 struct held_lock *check_src, 1223 struct held_lock *check_src,
1186 struct held_lock *check_tgt) 1224 struct held_lock *check_tgt,
1225 struct stack_trace *trace)
1187{ 1226{
1188 struct task_struct *curr = current; 1227 struct task_struct *curr = current;
1189 struct lock_list *parent; 1228 struct lock_list *parent;
@@ -1193,7 +1232,9 @@ static noinline int print_circular_bug(struct lock_list *this,
1193 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1232 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1194 return 0; 1233 return 0;
1195 1234
1196 if (!save_trace(&this->trace)) 1235 if (cross_lock(check_tgt->instance))
1236 this->trace = *trace;
1237 else if (!save_trace(&this->trace))
1197 return 0; 1238 return 0;
1198 1239
1199 depth = get_lock_depth(target); 1240 depth = get_lock_depth(target);
@@ -1309,6 +1350,19 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
1309 return result; 1350 return result;
1310} 1351}
1311 1352
1353static noinline int
1354check_redundant(struct lock_list *root, struct lock_class *target,
1355 struct lock_list **target_entry)
1356{
1357 int result;
1358
1359 debug_atomic_inc(nr_redundant_checks);
1360
1361 result = __bfs_forwards(root, target, class_equal, target_entry);
1362
1363 return result;
1364}
1365
1312#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) 1366#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
1313/* 1367/*
1314 * Forwards and backwards subgraph searching, for the purposes of 1368 * Forwards and backwards subgraph searching, for the purposes of
@@ -1784,6 +1838,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
1784 if (nest) 1838 if (nest)
1785 return 2; 1839 return 2;
1786 1840
1841 if (cross_lock(prev->instance))
1842 continue;
1843
1787 return print_deadlock_bug(curr, prev, next); 1844 return print_deadlock_bug(curr, prev, next);
1788 } 1845 }
1789 return 1; 1846 return 1;
@@ -1813,20 +1870,13 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
1813 */ 1870 */
1814static int 1871static int
1815check_prev_add(struct task_struct *curr, struct held_lock *prev, 1872check_prev_add(struct task_struct *curr, struct held_lock *prev,
1816 struct held_lock *next, int distance, int *stack_saved) 1873 struct held_lock *next, int distance, struct stack_trace *trace,
1874 int (*save)(struct stack_trace *trace))
1817{ 1875{
1818 struct lock_list *entry; 1876 struct lock_list *entry;
1819 int ret; 1877 int ret;
1820 struct lock_list this; 1878 struct lock_list this;
1821 struct lock_list *uninitialized_var(target_entry); 1879 struct lock_list *uninitialized_var(target_entry);
1822 /*
1823 * Static variable, serialized by the graph_lock().
1824 *
1825 * We use this static variable to save the stack trace in case
1826 * we call into this function multiple times due to encountering
1827 * trylocks in the held lock stack.
1828 */
1829 static struct stack_trace trace;
1830 1880
1831 /* 1881 /*
1832 * Prove that the new <prev> -> <next> dependency would not 1882 * Prove that the new <prev> -> <next> dependency would not
@@ -1841,7 +1891,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1841 this.parent = NULL; 1891 this.parent = NULL;
1842 ret = check_noncircular(&this, hlock_class(prev), &target_entry); 1892 ret = check_noncircular(&this, hlock_class(prev), &target_entry);
1843 if (unlikely(!ret)) 1893 if (unlikely(!ret))
1844 return print_circular_bug(&this, target_entry, next, prev); 1894 return print_circular_bug(&this, target_entry, next, prev, trace);
1845 else if (unlikely(ret < 0)) 1895 else if (unlikely(ret < 0))
1846 return print_bfs_bug(ret); 1896 return print_bfs_bug(ret);
1847 1897
@@ -1870,15 +1920,26 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1870 if (entry->class == hlock_class(next)) { 1920 if (entry->class == hlock_class(next)) {
1871 if (distance == 1) 1921 if (distance == 1)
1872 entry->distance = 1; 1922 entry->distance = 1;
1873 return 2; 1923 return 1;
1874 } 1924 }
1875 } 1925 }
1876 1926
1877 if (!*stack_saved) { 1927 /*
1878 if (!save_trace(&trace)) 1928 * Is the <prev> -> <next> link redundant?
1879 return 0; 1929 */
1880 *stack_saved = 1; 1930 this.class = hlock_class(prev);
1931 this.parent = NULL;
1932 ret = check_redundant(&this, hlock_class(next), &target_entry);
1933 if (!ret) {
1934 debug_atomic_inc(nr_redundant);
1935 return 2;
1881 } 1936 }
1937 if (ret < 0)
1938 return print_bfs_bug(ret);
1939
1940
1941 if (save && !save(trace))
1942 return 0;
1882 1943
1883 /* 1944 /*
1884 * Ok, all validations passed, add the new lock 1945 * Ok, all validations passed, add the new lock
@@ -1886,14 +1947,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1886 */ 1947 */
1887 ret = add_lock_to_list(hlock_class(next), 1948 ret = add_lock_to_list(hlock_class(next),
1888 &hlock_class(prev)->locks_after, 1949 &hlock_class(prev)->locks_after,
1889 next->acquire_ip, distance, &trace); 1950 next->acquire_ip, distance, trace);
1890 1951
1891 if (!ret) 1952 if (!ret)
1892 return 0; 1953 return 0;
1893 1954
1894 ret = add_lock_to_list(hlock_class(prev), 1955 ret = add_lock_to_list(hlock_class(prev),
1895 &hlock_class(next)->locks_before, 1956 &hlock_class(next)->locks_before,
1896 next->acquire_ip, distance, &trace); 1957 next->acquire_ip, distance, trace);
1897 if (!ret) 1958 if (!ret)
1898 return 0; 1959 return 0;
1899 1960
@@ -1901,8 +1962,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1901 * Debugging printouts: 1962 * Debugging printouts:
1902 */ 1963 */
1903 if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { 1964 if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
1904 /* We drop graph lock, so another thread can overwrite trace. */
1905 *stack_saved = 0;
1906 graph_unlock(); 1965 graph_unlock();
1907 printk("\n new dependency: "); 1966 printk("\n new dependency: ");
1908 print_lock_name(hlock_class(prev)); 1967 print_lock_name(hlock_class(prev));
@@ -1910,9 +1969,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
1910 print_lock_name(hlock_class(next)); 1969 print_lock_name(hlock_class(next));
1911 printk(KERN_CONT "\n"); 1970 printk(KERN_CONT "\n");
1912 dump_stack(); 1971 dump_stack();
1913 return graph_lock(); 1972 if (!graph_lock())
1973 return 0;
1914 } 1974 }
1915 return 1; 1975 return 2;
1916} 1976}
1917 1977
1918/* 1978/*
@@ -1925,8 +1985,9 @@ static int
1925check_prevs_add(struct task_struct *curr, struct held_lock *next) 1985check_prevs_add(struct task_struct *curr, struct held_lock *next)
1926{ 1986{
1927 int depth = curr->lockdep_depth; 1987 int depth = curr->lockdep_depth;
1928 int stack_saved = 0;
1929 struct held_lock *hlock; 1988 struct held_lock *hlock;
1989 struct stack_trace trace;
1990 int (*save)(struct stack_trace *trace) = save_trace;
1930 1991
1931 /* 1992 /*
1932 * Debugging checks. 1993 * Debugging checks.
@@ -1947,21 +2008,36 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
1947 int distance = curr->lockdep_depth - depth + 1; 2008 int distance = curr->lockdep_depth - depth + 1;
1948 hlock = curr->held_locks + depth - 1; 2009 hlock = curr->held_locks + depth - 1;
1949 /* 2010 /*
1950 * Only non-recursive-read entries get new dependencies 2011 * Only non-crosslock entries get new dependencies added.
1951 * added: 2012 * Crosslock entries will be added by commit later:
1952 */ 2013 */
1953 if (hlock->read != 2 && hlock->check) { 2014 if (!cross_lock(hlock->instance)) {
1954 if (!check_prev_add(curr, hlock, next,
1955 distance, &stack_saved))
1956 return 0;
1957 /* 2015 /*
1958 * Stop after the first non-trylock entry, 2016 * Only non-recursive-read entries get new dependencies
1959 * as non-trylock entries have added their 2017 * added:
1960 * own direct dependencies already, so this
1961 * lock is connected to them indirectly:
1962 */ 2018 */
1963 if (!hlock->trylock) 2019 if (hlock->read != 2 && hlock->check) {
1964 break; 2020 int ret = check_prev_add(curr, hlock, next,
2021 distance, &trace, save);
2022 if (!ret)
2023 return 0;
2024
2025 /*
2026 * Stop saving stack_trace if save_trace() was
2027 * called at least once:
2028 */
2029 if (save && ret == 2)
2030 save = NULL;
2031
2032 /*
2033 * Stop after the first non-trylock entry,
2034 * as non-trylock entries have added their
2035 * own direct dependencies already, so this
2036 * lock is connected to them indirectly:
2037 */
2038 if (!hlock->trylock)
2039 break;
2040 }
1965 } 2041 }
1966 depth--; 2042 depth--;
1967 /* 2043 /*
@@ -2126,19 +2202,26 @@ static int check_no_collision(struct task_struct *curr,
2126} 2202}
2127 2203
2128/* 2204/*
2129 * Look up a dependency chain. If the key is not present yet then 2205 * This is for building a chain between just two different classes,
2130 * add it and return 1 - in this case the new dependency chain is 2206 * instead of adding a new hlock upon current, which is done by
2131 * validated. If the key is already hashed, return 0. 2207 * add_chain_cache().
2132 * (On return with 1 graph_lock is held.) 2208 *
2209 * This can be called in any context with two classes, while
2210 * add_chain_cache() must be done within the lock owener's context
2211 * since it uses hlock which might be racy in another context.
2133 */ 2212 */
2134static inline int lookup_chain_cache(struct task_struct *curr, 2213static inline int add_chain_cache_classes(unsigned int prev,
2135 struct held_lock *hlock, 2214 unsigned int next,
2136 u64 chain_key) 2215 unsigned int irq_context,
2216 u64 chain_key)
2137{ 2217{
2138 struct lock_class *class = hlock_class(hlock);
2139 struct hlist_head *hash_head = chainhashentry(chain_key); 2218 struct hlist_head *hash_head = chainhashentry(chain_key);
2140 struct lock_chain *chain; 2219 struct lock_chain *chain;
2141 int i, j; 2220
2221 /*
2222 * Allocate a new chain entry from the static array, and add
2223 * it to the hash:
2224 */
2142 2225
2143 /* 2226 /*
2144 * We might need to take the graph lock, ensure we've got IRQs 2227 * We might need to take the graph lock, ensure we've got IRQs
@@ -2147,43 +2230,76 @@ static inline int lookup_chain_cache(struct task_struct *curr,
2147 */ 2230 */
2148 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) 2231 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2149 return 0; 2232 return 0;
2233
2234 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
2235 if (!debug_locks_off_graph_unlock())
2236 return 0;
2237
2238 print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
2239 dump_stack();
2240 return 0;
2241 }
2242
2243 chain = lock_chains + nr_lock_chains++;
2244 chain->chain_key = chain_key;
2245 chain->irq_context = irq_context;
2246 chain->depth = 2;
2247 if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
2248 chain->base = nr_chain_hlocks;
2249 nr_chain_hlocks += chain->depth;
2250 chain_hlocks[chain->base] = prev - 1;
2251 chain_hlocks[chain->base + 1] = next -1;
2252 }
2253#ifdef CONFIG_DEBUG_LOCKDEP
2150 /* 2254 /*
2151 * We can walk it lock-free, because entries only get added 2255 * Important for check_no_collision().
2152 * to the hash:
2153 */ 2256 */
2154 hlist_for_each_entry_rcu(chain, hash_head, entry) { 2257 else {
2155 if (chain->chain_key == chain_key) { 2258 if (!debug_locks_off_graph_unlock())
2156cache_hit:
2157 debug_atomic_inc(chain_lookup_hits);
2158 if (!check_no_collision(curr, hlock, chain))
2159 return 0;
2160
2161 if (very_verbose(class))
2162 printk("\nhash chain already cached, key: "
2163 "%016Lx tail class: [%p] %s\n",
2164 (unsigned long long)chain_key,
2165 class->key, class->name);
2166 return 0; 2259 return 0;
2167 } 2260
2261 print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
2262 dump_stack();
2263 return 0;
2168 } 2264 }
2169 if (very_verbose(class)) 2265#endif
2170 printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", 2266
2171 (unsigned long long)chain_key, class->key, class->name); 2267 hlist_add_head_rcu(&chain->entry, hash_head);
2268 debug_atomic_inc(chain_lookup_misses);
2269 inc_chains();
2270
2271 return 1;
2272}
2273
2274/*
2275 * Adds a dependency chain into chain hashtable. And must be called with
2276 * graph_lock held.
2277 *
2278 * Return 0 if fail, and graph_lock is released.
2279 * Return 1 if succeed, with graph_lock held.
2280 */
2281static inline int add_chain_cache(struct task_struct *curr,
2282 struct held_lock *hlock,
2283 u64 chain_key)
2284{
2285 struct lock_class *class = hlock_class(hlock);
2286 struct hlist_head *hash_head = chainhashentry(chain_key);
2287 struct lock_chain *chain;
2288 int i, j;
2289
2172 /* 2290 /*
2173 * Allocate a new chain entry from the static array, and add 2291 * Allocate a new chain entry from the static array, and add
2174 * it to the hash: 2292 * it to the hash:
2175 */ 2293 */
2176 if (!graph_lock()) 2294
2177 return 0;
2178 /* 2295 /*
2179 * We have to walk the chain again locked - to avoid duplicates: 2296 * We might need to take the graph lock, ensure we've got IRQs
2297 * disabled to make this an IRQ-safe lock.. for recursion reasons
2298 * lockdep won't complain about its own locking errors.
2180 */ 2299 */
2181 hlist_for_each_entry(chain, hash_head, entry) { 2300 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2182 if (chain->chain_key == chain_key) { 2301 return 0;
2183 graph_unlock(); 2302
2184 goto cache_hit;
2185 }
2186 }
2187 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { 2303 if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
2188 if (!debug_locks_off_graph_unlock()) 2304 if (!debug_locks_off_graph_unlock())
2189 return 0; 2305 return 0;
@@ -2235,6 +2351,78 @@ cache_hit:
2235 return 1; 2351 return 1;
2236} 2352}
2237 2353
2354/*
2355 * Look up a dependency chain.
2356 */
2357static inline struct lock_chain *lookup_chain_cache(u64 chain_key)
2358{
2359 struct hlist_head *hash_head = chainhashentry(chain_key);
2360 struct lock_chain *chain;
2361
2362 /*
2363 * We can walk it lock-free, because entries only get added
2364 * to the hash:
2365 */
2366 hlist_for_each_entry_rcu(chain, hash_head, entry) {
2367 if (chain->chain_key == chain_key) {
2368 debug_atomic_inc(chain_lookup_hits);
2369 return chain;
2370 }
2371 }
2372 return NULL;
2373}
2374
2375/*
2376 * If the key is not present yet in dependency chain cache then
2377 * add it and return 1 - in this case the new dependency chain is
2378 * validated. If the key is already hashed, return 0.
2379 * (On return with 1 graph_lock is held.)
2380 */
2381static inline int lookup_chain_cache_add(struct task_struct *curr,
2382 struct held_lock *hlock,
2383 u64 chain_key)
2384{
2385 struct lock_class *class = hlock_class(hlock);
2386 struct lock_chain *chain = lookup_chain_cache(chain_key);
2387
2388 if (chain) {
2389cache_hit:
2390 if (!check_no_collision(curr, hlock, chain))
2391 return 0;
2392
2393 if (very_verbose(class)) {
2394 printk("\nhash chain already cached, key: "
2395 "%016Lx tail class: [%p] %s\n",
2396 (unsigned long long)chain_key,
2397 class->key, class->name);
2398 }
2399
2400 return 0;
2401 }
2402
2403 if (very_verbose(class)) {
2404 printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n",
2405 (unsigned long long)chain_key, class->key, class->name);
2406 }
2407
2408 if (!graph_lock())
2409 return 0;
2410
2411 /*
2412 * We have to walk the chain again locked - to avoid duplicates:
2413 */
2414 chain = lookup_chain_cache(chain_key);
2415 if (chain) {
2416 graph_unlock();
2417 goto cache_hit;
2418 }
2419
2420 if (!add_chain_cache(curr, hlock, chain_key))
2421 return 0;
2422
2423 return 1;
2424}
2425
2238static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, 2426static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
2239 struct held_lock *hlock, int chain_head, u64 chain_key) 2427 struct held_lock *hlock, int chain_head, u64 chain_key)
2240{ 2428{
@@ -2245,11 +2433,11 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
2245 * 2433 *
2246 * We look up the chain_key and do the O(N^2) check and update of 2434 * We look up the chain_key and do the O(N^2) check and update of
2247 * the dependencies only if this is a new dependency chain. 2435 * the dependencies only if this is a new dependency chain.
2248 * (If lookup_chain_cache() returns with 1 it acquires 2436 * (If lookup_chain_cache_add() return with 1 it acquires
2249 * graph_lock for us) 2437 * graph_lock for us)
2250 */ 2438 */
2251 if (!hlock->trylock && hlock->check && 2439 if (!hlock->trylock && hlock->check &&
2252 lookup_chain_cache(curr, hlock, chain_key)) { 2440 lookup_chain_cache_add(curr, hlock, chain_key)) {
2253 /* 2441 /*
2254 * Check whether last held lock: 2442 * Check whether last held lock:
2255 * 2443 *
@@ -2277,14 +2465,17 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
2277 * Add dependency only if this lock is not the head 2465 * Add dependency only if this lock is not the head
2278 * of the chain, and if it's not a secondary read-lock: 2466 * of the chain, and if it's not a secondary read-lock:
2279 */ 2467 */
2280 if (!chain_head && ret != 2) 2468 if (!chain_head && ret != 2) {
2281 if (!check_prevs_add(curr, hlock)) 2469 if (!check_prevs_add(curr, hlock))
2282 return 0; 2470 return 0;
2471 }
2472
2283 graph_unlock(); 2473 graph_unlock();
2284 } else 2474 } else {
2285 /* after lookup_chain_cache(): */ 2475 /* after lookup_chain_cache_add(): */
2286 if (unlikely(!debug_locks)) 2476 if (unlikely(!debug_locks))
2287 return 0; 2477 return 0;
2478 }
2288 2479
2289 return 1; 2480 return 1;
2290} 2481}
@@ -2567,14 +2758,6 @@ static int SOFTIRQ_verbose(struct lock_class *class)
2567 return 0; 2758 return 0;
2568} 2759}
2569 2760
2570static int RECLAIM_FS_verbose(struct lock_class *class)
2571{
2572#if RECLAIM_VERBOSE
2573 return class_filter(class);
2574#endif
2575 return 0;
2576}
2577
2578#define STRICT_READ_CHECKS 1 2761#define STRICT_READ_CHECKS 1
2579 2762
2580static int (*state_verbose_f[])(struct lock_class *class) = { 2763static int (*state_verbose_f[])(struct lock_class *class) = {
@@ -2870,57 +3053,6 @@ void trace_softirqs_off(unsigned long ip)
2870 debug_atomic_inc(redundant_softirqs_off); 3053 debug_atomic_inc(redundant_softirqs_off);
2871} 3054}
2872 3055
2873static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
2874{
2875 struct task_struct *curr = current;
2876
2877 if (unlikely(!debug_locks))
2878 return;
2879
2880 gfp_mask = current_gfp_context(gfp_mask);
2881
2882 /* no reclaim without waiting on it */
2883 if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
2884 return;
2885
2886 /* this guy won't enter reclaim */
2887 if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
2888 return;
2889
2890 /* We're only interested __GFP_FS allocations for now */
2891 if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS))
2892 return;
2893
2894 /*
2895 * Oi! Can't be having __GFP_FS allocations with IRQs disabled.
2896 */
2897 if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
2898 return;
2899
2900 /* Disable lockdep if explicitly requested */
2901 if (gfp_mask & __GFP_NOLOCKDEP)
2902 return;
2903
2904 mark_held_locks(curr, RECLAIM_FS);
2905}
2906
2907static void check_flags(unsigned long flags);
2908
2909void lockdep_trace_alloc(gfp_t gfp_mask)
2910{
2911 unsigned long flags;
2912
2913 if (unlikely(current->lockdep_recursion))
2914 return;
2915
2916 raw_local_irq_save(flags);
2917 check_flags(flags);
2918 current->lockdep_recursion = 1;
2919 __lockdep_trace_alloc(gfp_mask, flags);
2920 current->lockdep_recursion = 0;
2921 raw_local_irq_restore(flags);
2922}
2923
2924static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) 3056static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2925{ 3057{
2926 /* 3058 /*
@@ -2966,22 +3098,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
2966 } 3098 }
2967 } 3099 }
2968 3100
2969 /*
2970 * We reuse the irq context infrastructure more broadly as a general
2971 * context checking code. This tests GFP_FS recursion (a lock taken
2972 * during reclaim for a GFP_FS allocation is held over a GFP_FS
2973 * allocation).
2974 */
2975 if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
2976 if (hlock->read) {
2977 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
2978 return 0;
2979 } else {
2980 if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
2981 return 0;
2982 }
2983 }
2984
2985 return 1; 3101 return 1;
2986} 3102}
2987 3103
@@ -3040,10 +3156,6 @@ static inline int separate_irq_context(struct task_struct *curr,
3040 return 0; 3156 return 0;
3041} 3157}
3042 3158
3043void lockdep_trace_alloc(gfp_t gfp_mask)
3044{
3045}
3046
3047#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ 3159#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
3048 3160
3049/* 3161/*
@@ -3116,7 +3228,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
3116/* 3228/*
3117 * Initialize a lock instance's lock-class mapping info: 3229 * Initialize a lock instance's lock-class mapping info:
3118 */ 3230 */
3119void lockdep_init_map(struct lockdep_map *lock, const char *name, 3231static void __lockdep_init_map(struct lockdep_map *lock, const char *name,
3120 struct lock_class_key *key, int subclass) 3232 struct lock_class_key *key, int subclass)
3121{ 3233{
3122 int i; 3234 int i;
@@ -3174,8 +3286,25 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
3174 raw_local_irq_restore(flags); 3286 raw_local_irq_restore(flags);
3175 } 3287 }
3176} 3288}
3289
3290void lockdep_init_map(struct lockdep_map *lock, const char *name,
3291 struct lock_class_key *key, int subclass)
3292{
3293 cross_init(lock, 0);
3294 __lockdep_init_map(lock, name, key, subclass);
3295}
3177EXPORT_SYMBOL_GPL(lockdep_init_map); 3296EXPORT_SYMBOL_GPL(lockdep_init_map);
3178 3297
3298#ifdef CONFIG_LOCKDEP_CROSSRELEASE
3299void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name,
3300 struct lock_class_key *key, int subclass)
3301{
3302 cross_init(lock, 1);
3303 __lockdep_init_map(lock, name, key, subclass);
3304}
3305EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock);
3306#endif
3307
3179struct lock_class_key __lockdep_no_validate__; 3308struct lock_class_key __lockdep_no_validate__;
3180EXPORT_SYMBOL_GPL(__lockdep_no_validate__); 3309EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
3181 3310
@@ -3231,6 +3360,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3231 int chain_head = 0; 3360 int chain_head = 0;
3232 int class_idx; 3361 int class_idx;
3233 u64 chain_key; 3362 u64 chain_key;
3363 int ret;
3234 3364
3235 if (unlikely(!debug_locks)) 3365 if (unlikely(!debug_locks))
3236 return 0; 3366 return 0;
@@ -3279,7 +3409,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3279 3409
3280 class_idx = class - lock_classes + 1; 3410 class_idx = class - lock_classes + 1;
3281 3411
3282 if (depth) { 3412 /* TODO: nest_lock is not implemented for crosslock yet. */
3413 if (depth && !cross_lock(lock)) {
3283 hlock = curr->held_locks + depth - 1; 3414 hlock = curr->held_locks + depth - 1;
3284 if (hlock->class_idx == class_idx && nest_lock) { 3415 if (hlock->class_idx == class_idx && nest_lock) {
3285 if (hlock->references) { 3416 if (hlock->references) {
@@ -3367,6 +3498,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3367 if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) 3498 if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
3368 return 0; 3499 return 0;
3369 3500
3501 ret = lock_acquire_crosslock(hlock);
3502 /*
3503 * 2 means normal acquire operations are needed. Otherwise, it's
3504 * ok just to return with '0:fail, 1:success'.
3505 */
3506 if (ret != 2)
3507 return ret;
3508
3370 curr->curr_chain_key = chain_key; 3509 curr->curr_chain_key = chain_key;
3371 curr->lockdep_depth++; 3510 curr->lockdep_depth++;
3372 check_chain_key(curr); 3511 check_chain_key(curr);
@@ -3604,11 +3743,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
3604 struct task_struct *curr = current; 3743 struct task_struct *curr = current;
3605 struct held_lock *hlock; 3744 struct held_lock *hlock;
3606 unsigned int depth; 3745 unsigned int depth;
3607 int i; 3746 int ret, i;
3608 3747
3609 if (unlikely(!debug_locks)) 3748 if (unlikely(!debug_locks))
3610 return 0; 3749 return 0;
3611 3750
3751 ret = lock_release_crosslock(lock);
3752 /*
3753 * 2 means normal release operations are needed. Otherwise, it's
3754 * ok just to return with '0:fail, 1:success'.
3755 */
3756 if (ret != 2)
3757 return ret;
3758
3612 depth = curr->lockdep_depth; 3759 depth = curr->lockdep_depth;
3613 /* 3760 /*
3614 * So we're all set to release this lock.. wait what lock? We don't 3761 * So we're all set to release this lock.. wait what lock? We don't
@@ -3952,18 +4099,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
3952} 4099}
3953EXPORT_SYMBOL_GPL(lock_unpin_lock); 4100EXPORT_SYMBOL_GPL(lock_unpin_lock);
3954 4101
3955void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
3956{
3957 current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask);
3958}
3959EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state);
3960
3961void lockdep_clear_current_reclaim_state(void)
3962{
3963 current->lockdep_reclaim_gfp = 0;
3964}
3965EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state);
3966
3967#ifdef CONFIG_LOCK_STAT 4102#ifdef CONFIG_LOCK_STAT
3968static int 4103static int
3969print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, 4104print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
@@ -4484,6 +4619,12 @@ asmlinkage __visible void lockdep_sys_exit(void)
4484 curr->comm, curr->pid); 4619 curr->comm, curr->pid);
4485 lockdep_print_held_locks(curr); 4620 lockdep_print_held_locks(curr);
4486 } 4621 }
4622
4623 /*
4624 * The lock history for each syscall should be independent. So wipe the
4625 * slate clean on return to userspace.
4626 */
4627 lockdep_invariant_state(false);
4487} 4628}
4488 4629
4489void lockdep_rcu_suspicious(const char *file, const int line, const char *s) 4630void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
@@ -4532,3 +4673,488 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4532 dump_stack(); 4673 dump_stack();
4533} 4674}
4534EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); 4675EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
4676
4677#ifdef CONFIG_LOCKDEP_CROSSRELEASE
4678
4679/*
4680 * Crossrelease works by recording a lock history for each thread and
4681 * connecting those historic locks that were taken after the
4682 * wait_for_completion() in the complete() context.
4683 *
4684 * Task-A Task-B
4685 *
4686 * mutex_lock(&A);
4687 * mutex_unlock(&A);
4688 *
4689 * wait_for_completion(&C);
4690 * lock_acquire_crosslock();
4691 * atomic_inc_return(&cross_gen_id);
4692 * |
4693 * | mutex_lock(&B);
4694 * | mutex_unlock(&B);
4695 * |
4696 * | complete(&C);
4697 * `-- lock_commit_crosslock();
4698 *
4699 * Which will then add a dependency between B and C.
4700 */
4701
4702#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR])
4703
4704/*
4705 * Whenever a crosslock is held, cross_gen_id will be increased.
4706 */
4707static atomic_t cross_gen_id; /* Can be wrapped */
4708
4709/*
4710 * Make an entry of the ring buffer invalid.
4711 */
4712static inline void invalidate_xhlock(struct hist_lock *xhlock)
4713{
4714 /*
4715 * Normally, xhlock->hlock.instance must be !NULL.
4716 */
4717 xhlock->hlock.instance = NULL;
4718}
4719
4720/*
4721 * Lock history stacks; we have 2 nested lock history stacks:
4722 *
4723 * HARD(IRQ)
4724 * SOFT(IRQ)
4725 *
4726 * The thing is that once we complete a HARD/SOFT IRQ the future task locks
4727 * should not depend on any of the locks observed while running the IRQ. So
4728 * what we do is rewind the history buffer and erase all our knowledge of that
4729 * temporal event.
4730 */
4731
4732void crossrelease_hist_start(enum xhlock_context_t c)
4733{
4734 struct task_struct *cur = current;
4735
4736 if (!cur->xhlocks)
4737 return;
4738
4739 cur->xhlock_idx_hist[c] = cur->xhlock_idx;
4740 cur->hist_id_save[c] = cur->hist_id;
4741}
4742
4743void crossrelease_hist_end(enum xhlock_context_t c)
4744{
4745 struct task_struct *cur = current;
4746
4747 if (cur->xhlocks) {
4748 unsigned int idx = cur->xhlock_idx_hist[c];
4749 struct hist_lock *h = &xhlock(idx);
4750
4751 cur->xhlock_idx = idx;
4752
4753 /* Check if the ring was overwritten. */
4754 if (h->hist_id != cur->hist_id_save[c])
4755 invalidate_xhlock(h);
4756 }
4757}
4758
4759/*
4760 * lockdep_invariant_state() is used to annotate independence inside a task, to
4761 * make one task look like multiple independent 'tasks'.
4762 *
4763 * Take for instance workqueues; each work is independent of the last. The
4764 * completion of a future work does not depend on the completion of a past work
4765 * (in general). Therefore we must not carry that (lock) dependency across
4766 * works.
4767 *
4768 * This is true for many things; pretty much all kthreads fall into this
4769 * pattern, where they have an invariant state and future completions do not
4770 * depend on past completions. Its just that since they all have the 'same'
4771 * form -- the kthread does the same over and over -- it doesn't typically
4772 * matter.
4773 *
4774 * The same is true for system-calls, once a system call is completed (we've
4775 * returned to userspace) the next system call does not depend on the lock
4776 * history of the previous system call.
4777 *
4778 * They key property for independence, this invariant state, is that it must be
4779 * a point where we hold no locks and have no history. Because if we were to
4780 * hold locks, the restore at _end() would not necessarily recover it's history
4781 * entry. Similarly, independence per-definition means it does not depend on
4782 * prior state.
4783 */
4784void lockdep_invariant_state(bool force)
4785{
4786 /*
4787 * We call this at an invariant point, no current state, no history.
4788 * Verify the former, enforce the latter.
4789 */
4790 WARN_ON_ONCE(!force && current->lockdep_depth);
4791 invalidate_xhlock(&xhlock(current->xhlock_idx));
4792}
4793
4794static int cross_lock(struct lockdep_map *lock)
4795{
4796 return lock ? lock->cross : 0;
4797}
4798
4799/*
4800 * This is needed to decide the relationship between wrapable variables.
4801 */
4802static inline int before(unsigned int a, unsigned int b)
4803{
4804 return (int)(a - b) < 0;
4805}
4806
4807static inline struct lock_class *xhlock_class(struct hist_lock *xhlock)
4808{
4809 return hlock_class(&xhlock->hlock);
4810}
4811
4812static inline struct lock_class *xlock_class(struct cross_lock *xlock)
4813{
4814 return hlock_class(&xlock->hlock);
4815}
4816
4817/*
4818 * Should we check a dependency with previous one?
4819 */
4820static inline int depend_before(struct held_lock *hlock)
4821{
4822 return hlock->read != 2 && hlock->check && !hlock->trylock;
4823}
4824
4825/*
4826 * Should we check a dependency with next one?
4827 */
4828static inline int depend_after(struct held_lock *hlock)
4829{
4830 return hlock->read != 2 && hlock->check;
4831}
4832
4833/*
4834 * Check if the xhlock is valid, which would be false if,
4835 *
4836 * 1. Has not used after initializaion yet.
4837 * 2. Got invalidated.
4838 *
4839 * Remind hist_lock is implemented as a ring buffer.
4840 */
4841static inline int xhlock_valid(struct hist_lock *xhlock)
4842{
4843 /*
4844 * xhlock->hlock.instance must be !NULL.
4845 */
4846 return !!xhlock->hlock.instance;
4847}
4848
4849/*
4850 * Record a hist_lock entry.
4851 *
4852 * Irq disable is only required.
4853 */
4854static void add_xhlock(struct held_lock *hlock)
4855{
4856 unsigned int idx = ++current->xhlock_idx;
4857 struct hist_lock *xhlock = &xhlock(idx);
4858
4859#ifdef CONFIG_DEBUG_LOCKDEP
4860 /*
4861 * This can be done locklessly because they are all task-local
4862 * state, we must however ensure IRQs are disabled.
4863 */
4864 WARN_ON_ONCE(!irqs_disabled());
4865#endif
4866
4867 /* Initialize hist_lock's members */
4868 xhlock->hlock = *hlock;
4869 xhlock->hist_id = ++current->hist_id;
4870
4871 xhlock->trace.nr_entries = 0;
4872 xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES;
4873 xhlock->trace.entries = xhlock->trace_entries;
4874 xhlock->trace.skip = 3;
4875 save_stack_trace(&xhlock->trace);
4876}
4877
4878static inline int same_context_xhlock(struct hist_lock *xhlock)
4879{
4880 return xhlock->hlock.irq_context == task_irq_context(current);
4881}
4882
4883/*
4884 * This should be lockless as far as possible because this would be
4885 * called very frequently.
4886 */
4887static void check_add_xhlock(struct held_lock *hlock)
4888{
4889 /*
4890 * Record a hist_lock, only in case that acquisitions ahead
4891 * could depend on the held_lock. For example, if the held_lock
4892 * is trylock then acquisitions ahead never depends on that.
4893 * In that case, we don't need to record it. Just return.
4894 */
4895 if (!current->xhlocks || !depend_before(hlock))
4896 return;
4897
4898 add_xhlock(hlock);
4899}
4900
4901/*
4902 * For crosslock.
4903 */
4904static int add_xlock(struct held_lock *hlock)
4905{
4906 struct cross_lock *xlock;
4907 unsigned int gen_id;
4908
4909 if (!graph_lock())
4910 return 0;
4911
4912 xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock;
4913
4914 /*
4915 * When acquisitions for a crosslock are overlapped, we use
4916 * nr_acquire to perform commit for them, based on cross_gen_id
4917 * of the first acquisition, which allows to add additional
4918 * dependencies.
4919 *
4920 * Moreover, when no acquisition of a crosslock is in progress,
4921 * we should not perform commit because the lock might not exist
4922 * any more, which might cause incorrect memory access. So we
4923 * have to track the number of acquisitions of a crosslock.
4924 *
4925 * depend_after() is necessary to initialize only the first
4926 * valid xlock so that the xlock can be used on its commit.
4927 */
4928 if (xlock->nr_acquire++ && depend_after(&xlock->hlock))
4929 goto unlock;
4930
4931 gen_id = (unsigned int)atomic_inc_return(&cross_gen_id);
4932 xlock->hlock = *hlock;
4933 xlock->hlock.gen_id = gen_id;
4934unlock:
4935 graph_unlock();
4936 return 1;
4937}
4938
4939/*
4940 * Called for both normal and crosslock acquires. Normal locks will be
4941 * pushed on the hist_lock queue. Cross locks will record state and
4942 * stop regular lock_acquire() to avoid being placed on the held_lock
4943 * stack.
4944 *
4945 * Return: 0 - failure;
4946 * 1 - crosslock, done;
4947 * 2 - normal lock, continue to held_lock[] ops.
4948 */
4949static int lock_acquire_crosslock(struct held_lock *hlock)
4950{
4951 /*
4952 * CONTEXT 1 CONTEXT 2
4953 * --------- ---------
4954 * lock A (cross)
4955 * X = atomic_inc_return(&cross_gen_id)
4956 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4957 * Y = atomic_read_acquire(&cross_gen_id)
4958 * lock B
4959 *
4960 * atomic_read_acquire() is for ordering between A and B,
4961 * IOW, A happens before B, when CONTEXT 2 see Y >= X.
4962 *
4963 * Pairs with atomic_inc_return() in add_xlock().
4964 */
4965 hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id);
4966
4967 if (cross_lock(hlock->instance))
4968 return add_xlock(hlock);
4969
4970 check_add_xhlock(hlock);
4971 return 2;
4972}
4973
4974static int copy_trace(struct stack_trace *trace)
4975{
4976 unsigned long *buf = stack_trace + nr_stack_trace_entries;
4977 unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
4978 unsigned int nr = min(max_nr, trace->nr_entries);
4979
4980 trace->nr_entries = nr;
4981 memcpy(buf, trace->entries, nr * sizeof(trace->entries[0]));
4982 trace->entries = buf;
4983 nr_stack_trace_entries += nr;
4984
4985 if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
4986 if (!debug_locks_off_graph_unlock())
4987 return 0;
4988
4989 print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
4990 dump_stack();
4991
4992 return 0;
4993 }
4994
4995 return 1;
4996}
4997
4998static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock)
4999{
5000 unsigned int xid, pid;
5001 u64 chain_key;
5002
5003 xid = xlock_class(xlock) - lock_classes;
5004 chain_key = iterate_chain_key((u64)0, xid);
5005 pid = xhlock_class(xhlock) - lock_classes;
5006 chain_key = iterate_chain_key(chain_key, pid);
5007
5008 if (lookup_chain_cache(chain_key))
5009 return 1;
5010
5011 if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context,
5012 chain_key))
5013 return 0;
5014
5015 if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1,
5016 &xhlock->trace, copy_trace))
5017 return 0;
5018
5019 return 1;
5020}
5021
5022static void commit_xhlocks(struct cross_lock *xlock)
5023{
5024 unsigned int cur = current->xhlock_idx;
5025 unsigned int prev_hist_id = xhlock(cur).hist_id;
5026 unsigned int i;
5027
5028 if (!graph_lock())
5029 return;
5030
5031 if (xlock->nr_acquire) {
5032 for (i = 0; i < MAX_XHLOCKS_NR; i++) {
5033 struct hist_lock *xhlock = &xhlock(cur - i);
5034
5035 if (!xhlock_valid(xhlock))
5036 break;
5037
5038 if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id))
5039 break;
5040
5041 if (!same_context_xhlock(xhlock))
5042 break;
5043
5044 /*
5045 * Filter out the cases where the ring buffer was
5046 * overwritten and the current entry has a bigger
5047 * hist_id than the previous one, which is impossible
5048 * otherwise:
5049 */
5050 if (unlikely(before(prev_hist_id, xhlock->hist_id)))
5051 break;
5052
5053 prev_hist_id = xhlock->hist_id;
5054
5055 /*
5056 * commit_xhlock() returns 0 with graph_lock already
5057 * released if fail.
5058 */
5059 if (!commit_xhlock(xlock, xhlock))
5060 return;
5061 }
5062 }
5063
5064 graph_unlock();
5065}
5066
5067void lock_commit_crosslock(struct lockdep_map *lock)
5068{
5069 struct cross_lock *xlock;
5070 unsigned long flags;
5071
5072 if (unlikely(!debug_locks || current->lockdep_recursion))
5073 return;
5074
5075 if (!current->xhlocks)
5076 return;
5077
5078 /*
5079 * Do commit hist_locks with the cross_lock, only in case that
5080 * the cross_lock could depend on acquisitions after that.
5081 *
5082 * For example, if the cross_lock does not have the 'check' flag
5083 * then we don't need to check dependencies and commit for that.
5084 * Just skip it. In that case, of course, the cross_lock does
5085 * not depend on acquisitions ahead, either.
5086 *
5087 * WARNING: Don't do that in add_xlock() in advance. When an
5088 * acquisition context is different from the commit context,
5089 * invalid(skipped) cross_lock might be accessed.
5090 */
5091 if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock))
5092 return;
5093
5094 raw_local_irq_save(flags);
5095 check_flags(flags);
5096 current->lockdep_recursion = 1;
5097 xlock = &((struct lockdep_map_cross *)lock)->xlock;
5098 commit_xhlocks(xlock);
5099 current->lockdep_recursion = 0;
5100 raw_local_irq_restore(flags);
5101}
5102EXPORT_SYMBOL_GPL(lock_commit_crosslock);
5103
5104/*
5105 * Return: 0 - failure;
5106 * 1 - crosslock, done;
5107 * 2 - normal lock, continue to held_lock[] ops.
5108 */
5109static int lock_release_crosslock(struct lockdep_map *lock)
5110{
5111 if (cross_lock(lock)) {
5112 if (!graph_lock())
5113 return 0;
5114 ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--;
5115 graph_unlock();
5116 return 1;
5117 }
5118 return 2;
5119}
5120
5121static void cross_init(struct lockdep_map *lock, int cross)
5122{
5123 if (cross)
5124 ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0;
5125
5126 lock->cross = cross;
5127
5128 /*
5129 * Crossrelease assumes that the ring buffer size of xhlocks
5130 * is aligned with power of 2. So force it on build.
5131 */
5132 BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
5133}
5134
5135void lockdep_init_task(struct task_struct *task)
5136{
5137 int i;
5138
5139 task->xhlock_idx = UINT_MAX;
5140 task->hist_id = 0;
5141
5142 for (i = 0; i < XHLOCK_CTX_NR; i++) {
5143 task->xhlock_idx_hist[i] = UINT_MAX;
5144 task->hist_id_save[i] = 0;
5145 }
5146
5147 task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
5148 GFP_KERNEL);
5149}
5150
5151void lockdep_free_task(struct task_struct *task)
5152{
5153 if (task->xhlocks) {
5154 void *tmp = task->xhlocks;
5155 /* Diable crossrelease for current */
5156 task->xhlocks = NULL;
5157 kfree(tmp);
5158 }
5159}
5160#endif
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index c08fbd2f5ba9..1da4669d57a7 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -143,6 +143,8 @@ struct lockdep_stats {
143 int redundant_softirqs_on; 143 int redundant_softirqs_on;
144 int redundant_softirqs_off; 144 int redundant_softirqs_off;
145 int nr_unused_locks; 145 int nr_unused_locks;
146 int nr_redundant_checks;
147 int nr_redundant;
146 int nr_cyclic_checks; 148 int nr_cyclic_checks;
147 int nr_cyclic_check_recursions; 149 int nr_cyclic_check_recursions;
148 int nr_find_usage_forwards_checks; 150 int nr_find_usage_forwards_checks;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 6d1fcc786081..68d9e267ccd4 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -201,6 +201,10 @@ static void lockdep_stats_debug_show(struct seq_file *m)
201 debug_atomic_read(chain_lookup_hits)); 201 debug_atomic_read(chain_lookup_hits));
202 seq_printf(m, " cyclic checks: %11llu\n", 202 seq_printf(m, " cyclic checks: %11llu\n",
203 debug_atomic_read(nr_cyclic_checks)); 203 debug_atomic_read(nr_cyclic_checks));
204 seq_printf(m, " redundant checks: %11llu\n",
205 debug_atomic_read(nr_redundant_checks));
206 seq_printf(m, " redundant links: %11llu\n",
207 debug_atomic_read(nr_redundant));
204 seq_printf(m, " find-mask forwards checks: %11llu\n", 208 seq_printf(m, " find-mask forwards checks: %11llu\n",
205 debug_atomic_read(nr_find_usage_forwards_checks)); 209 debug_atomic_read(nr_find_usage_forwards_checks));
206 seq_printf(m, " find-mask backwards checks: %11llu\n", 210 seq_printf(m, " find-mask backwards checks: %11llu\n",
diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h
index 995b0cc2b84c..35ca09f2ed0b 100644
--- a/kernel/locking/lockdep_states.h
+++ b/kernel/locking/lockdep_states.h
@@ -6,4 +6,3 @@
6 */ 6 */
7LOCKDEP_STATE(HARDIRQ) 7LOCKDEP_STATE(HARDIRQ)
8LOCKDEP_STATE(SOFTIRQ) 8LOCKDEP_STATE(SOFTIRQ)
9LOCKDEP_STATE(RECLAIM_FS)
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index a3167941093b..a74ee6abd039 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -109,6 +109,19 @@ bool osq_lock(struct optimistic_spin_queue *lock)
109 109
110 prev = decode_cpu(old); 110 prev = decode_cpu(old);
111 node->prev = prev; 111 node->prev = prev;
112
113 /*
114 * osq_lock() unqueue
115 *
116 * node->prev = prev osq_wait_next()
117 * WMB MB
118 * prev->next = node next->prev = prev // unqueue-C
119 *
120 * Here 'node->prev' and 'next->prev' are the same variable and we need
121 * to ensure these stores happen in-order to avoid corrupting the list.
122 */
123 smp_wmb();
124
112 WRITE_ONCE(prev->next, node); 125 WRITE_ONCE(prev->next, node);
113 126
114 /* 127 /*
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 4ccfcaae5b89..43555681c40b 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -72,7 +72,7 @@ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
72 struct __qspinlock *l = (void *)lock; 72 struct __qspinlock *l = (void *)lock;
73 73
74 if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && 74 if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
75 (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { 75 (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
76 qstat_inc(qstat_pv_lock_stealing, true); 76 qstat_inc(qstat_pv_lock_stealing, true);
77 return true; 77 return true;
78 } 78 }
@@ -101,16 +101,16 @@ static __always_inline void clear_pending(struct qspinlock *lock)
101 101
102/* 102/*
103 * The pending bit check in pv_queued_spin_steal_lock() isn't a memory 103 * The pending bit check in pv_queued_spin_steal_lock() isn't a memory
104 * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock 104 * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the
105 * just to be sure that it will get it. 105 * lock just to be sure that it will get it.
106 */ 106 */
107static __always_inline int trylock_clear_pending(struct qspinlock *lock) 107static __always_inline int trylock_clear_pending(struct qspinlock *lock)
108{ 108{
109 struct __qspinlock *l = (void *)lock; 109 struct __qspinlock *l = (void *)lock;
110 110
111 return !READ_ONCE(l->locked) && 111 return !READ_ONCE(l->locked) &&
112 (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL) 112 (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL,
113 == _Q_PENDING_VAL); 113 _Q_LOCKED_VAL) == _Q_PENDING_VAL);
114} 114}
115#else /* _Q_PENDING_BITS == 8 */ 115#else /* _Q_PENDING_BITS == 8 */
116static __always_inline void set_pending(struct qspinlock *lock) 116static __always_inline void set_pending(struct qspinlock *lock)
@@ -138,7 +138,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
138 */ 138 */
139 old = val; 139 old = val;
140 new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; 140 new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
141 val = atomic_cmpxchg(&lock->val, old, new); 141 val = atomic_cmpxchg_acquire(&lock->val, old, new);
142 142
143 if (val == old) 143 if (val == old)
144 return 1; 144 return 1;
@@ -362,8 +362,18 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
362 * observe its next->locked value and advance itself. 362 * observe its next->locked value and advance itself.
363 * 363 *
364 * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() 364 * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
365 *
366 * The write to next->locked in arch_mcs_spin_unlock_contended()
367 * must be ordered before the read of pn->state in the cmpxchg()
368 * below for the code to work correctly. To guarantee full ordering
369 * irrespective of the success or failure of the cmpxchg(),
370 * a relaxed version with explicit barrier is used. The control
371 * dependency will order the reading of pn->state before any
372 * subsequent writes.
365 */ 373 */
366 if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted) 374 smp_mb__before_atomic();
375 if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed)
376 != vcpu_halted)
367 return; 377 return;
368 378
369 /* 379 /*
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 72ad45a9a794..8d039b928d61 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -40,6 +40,9 @@ struct rt_mutex_waiter {
40/* 40/*
41 * Various helpers to access the waiters-tree: 41 * Various helpers to access the waiters-tree:
42 */ 42 */
43
44#ifdef CONFIG_RT_MUTEXES
45
43static inline int rt_mutex_has_waiters(struct rt_mutex *lock) 46static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
44{ 47{
45 return !RB_EMPTY_ROOT(&lock->waiters); 48 return !RB_EMPTY_ROOT(&lock->waiters);
@@ -69,6 +72,32 @@ task_top_pi_waiter(struct task_struct *p)
69 pi_tree_entry); 72 pi_tree_entry);
70} 73}
71 74
75#else
76
77static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
78{
79 return false;
80}
81
82static inline struct rt_mutex_waiter *
83rt_mutex_top_waiter(struct rt_mutex *lock)
84{
85 return NULL;
86}
87
88static inline int task_has_pi_waiters(struct task_struct *p)
89{
90 return false;
91}
92
93static inline struct rt_mutex_waiter *
94task_top_pi_waiter(struct task_struct *p)
95{
96 return NULL;
97}
98
99#endif
100
72/* 101/*
73 * lock->owner state tracking: 102 * lock->owner state tracking:
74 */ 103 */
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 20819df98125..0848634c5512 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -126,7 +126,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
126/* 126/*
127 * get a read lock on the semaphore 127 * get a read lock on the semaphore
128 */ 128 */
129void __sched __down_read(struct rw_semaphore *sem) 129int __sched __down_read_common(struct rw_semaphore *sem, int state)
130{ 130{
131 struct rwsem_waiter waiter; 131 struct rwsem_waiter waiter;
132 unsigned long flags; 132 unsigned long flags;
@@ -140,8 +140,6 @@ void __sched __down_read(struct rw_semaphore *sem)
140 goto out; 140 goto out;
141 } 141 }
142 142
143 set_current_state(TASK_UNINTERRUPTIBLE);
144
145 /* set up my own style of waitqueue */ 143 /* set up my own style of waitqueue */
146 waiter.task = current; 144 waiter.task = current;
147 waiter.type = RWSEM_WAITING_FOR_READ; 145 waiter.type = RWSEM_WAITING_FOR_READ;
@@ -149,20 +147,41 @@ void __sched __down_read(struct rw_semaphore *sem)
149 147
150 list_add_tail(&waiter.list, &sem->wait_list); 148 list_add_tail(&waiter.list, &sem->wait_list);
151 149
152 /* we don't need to touch the semaphore struct anymore */
153 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
154
155 /* wait to be given the lock */ 150 /* wait to be given the lock */
156 for (;;) { 151 for (;;) {
157 if (!waiter.task) 152 if (!waiter.task)
158 break; 153 break;
154 if (signal_pending_state(state, current))
155 goto out_nolock;
156 set_current_state(state);
157 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
159 schedule(); 158 schedule();
160 set_current_state(TASK_UNINTERRUPTIBLE); 159 raw_spin_lock_irqsave(&sem->wait_lock, flags);
161 } 160 }
162 161
163 __set_current_state(TASK_RUNNING); 162 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
164 out: 163 out:
165 ; 164 return 0;
165
166out_nolock:
167 /*
168 * We didn't take the lock, so that there is a writer, which
169 * is owner or the first waiter of the sem. If it's a waiter,
170 * it will be woken by current owner. Not need to wake anybody.
171 */
172 list_del(&waiter.list);
173 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
174 return -EINTR;
175}
176
177void __sched __down_read(struct rw_semaphore *sem)
178{
179 __down_read_common(sem, TASK_UNINTERRUPTIBLE);
180}
181
182int __sched __down_read_killable(struct rw_semaphore *sem)
183{
184 return __down_read_common(sem, TASK_KILLABLE);
166} 185}
167 186
168/* 187/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 34e727f18e49..02f660666ab8 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -221,8 +221,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
221/* 221/*
222 * Wait for the read lock to be granted 222 * Wait for the read lock to be granted
223 */ 223 */
224__visible 224static inline struct rw_semaphore __sched *
225struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) 225__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
226{ 226{
227 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; 227 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
228 struct rwsem_waiter waiter; 228 struct rwsem_waiter waiter;
@@ -255,17 +255,44 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
255 255
256 /* wait to be given the lock */ 256 /* wait to be given the lock */
257 while (true) { 257 while (true) {
258 set_current_state(TASK_UNINTERRUPTIBLE); 258 set_current_state(state);
259 if (!waiter.task) 259 if (!waiter.task)
260 break; 260 break;
261 if (signal_pending_state(state, current)) {
262 raw_spin_lock_irq(&sem->wait_lock);
263 if (waiter.task)
264 goto out_nolock;
265 raw_spin_unlock_irq(&sem->wait_lock);
266 break;
267 }
261 schedule(); 268 schedule();
262 } 269 }
263 270
264 __set_current_state(TASK_RUNNING); 271 __set_current_state(TASK_RUNNING);
265 return sem; 272 return sem;
273out_nolock:
274 list_del(&waiter.list);
275 if (list_empty(&sem->wait_list))
276 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
277 raw_spin_unlock_irq(&sem->wait_lock);
278 __set_current_state(TASK_RUNNING);
279 return ERR_PTR(-EINTR);
280}
281
282__visible struct rw_semaphore * __sched
283rwsem_down_read_failed(struct rw_semaphore *sem)
284{
285 return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
266} 286}
267EXPORT_SYMBOL(rwsem_down_read_failed); 287EXPORT_SYMBOL(rwsem_down_read_failed);
268 288
289__visible struct rw_semaphore * __sched
290rwsem_down_read_failed_killable(struct rw_semaphore *sem)
291{
292 return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
293}
294EXPORT_SYMBOL(rwsem_down_read_failed_killable);
295
269/* 296/*
270 * This function must be called with the sem->wait_lock held to prevent 297 * This function must be called with the sem->wait_lock held to prevent
271 * race conditions between checking the rwsem wait list and setting the 298 * race conditions between checking the rwsem wait list and setting the
diff --git a/kernel/panic.c b/kernel/panic.c
index a58932b41700..bdd18afa19a4 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -26,6 +26,7 @@
26#include <linux/nmi.h> 26#include <linux/nmi.h>
27#include <linux/console.h> 27#include <linux/console.h>
28#include <linux/bug.h> 28#include <linux/bug.h>
29#include <linux/ratelimit.h>
29 30
30#define PANIC_TIMER_STEP 100 31#define PANIC_TIMER_STEP 100
31#define PANIC_BLINK_SPD 18 32#define PANIC_BLINK_SPD 18
@@ -601,6 +602,17 @@ EXPORT_SYMBOL(__stack_chk_fail);
601 602
602#endif 603#endif
603 604
605#ifdef CONFIG_ARCH_HAS_REFCOUNT
606void refcount_error_report(struct pt_regs *regs, const char *err)
607{
608 WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
609 err, (void *)instruction_pointer(regs),
610 current->comm, task_pid_nr(current),
611 from_kuid_munged(&init_user_ns, current_uid()),
612 from_kuid_munged(&init_user_ns, current_euid()));
613}
614#endif
615
604core_param(panic, panic_timeout, int, 0644); 616core_param(panic, panic_timeout, int, 0644);
605core_param(pause_on_oops, pause_on_oops, int, 0644); 617core_param(pause_on_oops, pause_on_oops, int, 0644);
606core_param(panic_on_warn, panic_on_warn, int, 0644); 618core_param(panic_on_warn, panic_on_warn, int, 0644);
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 5d9131aa846f..cc873075c3bd 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -32,6 +32,12 @@ void complete(struct completion *x)
32 unsigned long flags; 32 unsigned long flags;
33 33
34 spin_lock_irqsave(&x->wait.lock, flags); 34 spin_lock_irqsave(&x->wait.lock, flags);
35
36 /*
37 * Perform commit of crossrelease here.
38 */
39 complete_release_commit(x);
40
35 if (x->done != UINT_MAX) 41 if (x->done != UINT_MAX)
36 x->done++; 42 x->done++;
37 __wake_up_locked(&x->wait, TASK_NORMAL, 1); 43 __wake_up_locked(&x->wait, TASK_NORMAL, 1);
@@ -99,9 +105,14 @@ __wait_for_common(struct completion *x,
99{ 105{
100 might_sleep(); 106 might_sleep();
101 107
108 complete_acquire(x);
109
102 spin_lock_irq(&x->wait.lock); 110 spin_lock_irq(&x->wait.lock);
103 timeout = do_wait_for_common(x, action, timeout, state); 111 timeout = do_wait_for_common(x, action, timeout, state);
104 spin_unlock_irq(&x->wait.lock); 112 spin_unlock_irq(&x->wait.lock);
113
114 complete_release(x);
115
105 return timeout; 116 return timeout;
106} 117}
107 118
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c1fcd96cf432..6d2c7ff9ba98 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1972,8 +1972,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1972 * reordered with p->state check below. This pairs with mb() in 1972 * reordered with p->state check below. This pairs with mb() in
1973 * set_current_state() the waiting thread does. 1973 * set_current_state() the waiting thread does.
1974 */ 1974 */
1975 smp_mb__before_spinlock();
1976 raw_spin_lock_irqsave(&p->pi_lock, flags); 1975 raw_spin_lock_irqsave(&p->pi_lock, flags);
1976 smp_mb__after_spinlock();
1977 if (!(p->state & state)) 1977 if (!(p->state & state))
1978 goto out; 1978 goto out;
1979 1979
@@ -3296,8 +3296,8 @@ static void __sched notrace __schedule(bool preempt)
3296 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) 3296 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
3297 * done by the caller to avoid the race with signal_wake_up(). 3297 * done by the caller to avoid the race with signal_wake_up().
3298 */ 3298 */
3299 smp_mb__before_spinlock();
3300 rq_lock(rq, &rf); 3299 rq_lock(rq, &rf);
3300 smp_mb__after_spinlock();
3301 3301
3302 /* Promote REQ to ACT */ 3302 /* Promote REQ to ACT */
3303 rq->clock_update_flags <<= 1; 3303 rq->clock_update_flags <<= 1;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 25e5cb1107f3..ab1c7f5409a0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -769,7 +769,7 @@ struct rq {
769#ifdef CONFIG_SCHED_HRTICK 769#ifdef CONFIG_SCHED_HRTICK
770#ifdef CONFIG_SMP 770#ifdef CONFIG_SMP
771 int hrtick_csd_pending; 771 int hrtick_csd_pending;
772 struct call_single_data hrtick_csd; 772 call_single_data_t hrtick_csd;
773#endif 773#endif
774 struct hrtimer hrtick_timer; 774 struct hrtimer hrtick_timer;
775#endif 775#endif
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 3d5610dcce11..2227e183e202 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -33,9 +33,6 @@ void swake_up(struct swait_queue_head *q)
33{ 33{
34 unsigned long flags; 34 unsigned long flags;
35 35
36 if (!swait_active(q))
37 return;
38
39 raw_spin_lock_irqsave(&q->lock, flags); 36 raw_spin_lock_irqsave(&q->lock, flags);
40 swake_up_locked(q); 37 swake_up_locked(q);
41 raw_spin_unlock_irqrestore(&q->lock, flags); 38 raw_spin_unlock_irqrestore(&q->lock, flags);
@@ -51,9 +48,6 @@ void swake_up_all(struct swait_queue_head *q)
51 struct swait_queue *curr; 48 struct swait_queue *curr;
52 LIST_HEAD(tmp); 49 LIST_HEAD(tmp);
53 50
54 if (!swait_active(q))
55 return;
56
57 raw_spin_lock_irq(&q->lock); 51 raw_spin_lock_irq(&q->lock);
58 list_splice_init(&q->task_list, &tmp); 52 list_splice_init(&q->task_list, &tmp);
59 while (!list_empty(&tmp)) { 53 while (!list_empty(&tmp)) {
diff --git a/kernel/smp.c b/kernel/smp.c
index 3061483cb3ad..81cfca9b4cc3 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -28,7 +28,7 @@ enum {
28}; 28};
29 29
30struct call_function_data { 30struct call_function_data {
31 struct call_single_data __percpu *csd; 31 call_single_data_t __percpu *csd;
32 cpumask_var_t cpumask; 32 cpumask_var_t cpumask;
33 cpumask_var_t cpumask_ipi; 33 cpumask_var_t cpumask_ipi;
34}; 34};
@@ -51,7 +51,7 @@ int smpcfd_prepare_cpu(unsigned int cpu)
51 free_cpumask_var(cfd->cpumask); 51 free_cpumask_var(cfd->cpumask);
52 return -ENOMEM; 52 return -ENOMEM;
53 } 53 }
54 cfd->csd = alloc_percpu(struct call_single_data); 54 cfd->csd = alloc_percpu(call_single_data_t);
55 if (!cfd->csd) { 55 if (!cfd->csd) {
56 free_cpumask_var(cfd->cpumask); 56 free_cpumask_var(cfd->cpumask);
57 free_cpumask_var(cfd->cpumask_ipi); 57 free_cpumask_var(cfd->cpumask_ipi);
@@ -103,12 +103,12 @@ void __init call_function_init(void)
103 * previous function call. For multi-cpu calls its even more interesting 103 * previous function call. For multi-cpu calls its even more interesting
104 * as we'll have to ensure no other cpu is observing our csd. 104 * as we'll have to ensure no other cpu is observing our csd.
105 */ 105 */
106static __always_inline void csd_lock_wait(struct call_single_data *csd) 106static __always_inline void csd_lock_wait(call_single_data_t *csd)
107{ 107{
108 smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); 108 smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
109} 109}
110 110
111static __always_inline void csd_lock(struct call_single_data *csd) 111static __always_inline void csd_lock(call_single_data_t *csd)
112{ 112{
113 csd_lock_wait(csd); 113 csd_lock_wait(csd);
114 csd->flags |= CSD_FLAG_LOCK; 114 csd->flags |= CSD_FLAG_LOCK;
@@ -116,12 +116,12 @@ static __always_inline void csd_lock(struct call_single_data *csd)
116 /* 116 /*
117 * prevent CPU from reordering the above assignment 117 * prevent CPU from reordering the above assignment
118 * to ->flags with any subsequent assignments to other 118 * to ->flags with any subsequent assignments to other
119 * fields of the specified call_single_data structure: 119 * fields of the specified call_single_data_t structure:
120 */ 120 */
121 smp_wmb(); 121 smp_wmb();
122} 122}
123 123
124static __always_inline void csd_unlock(struct call_single_data *csd) 124static __always_inline void csd_unlock(call_single_data_t *csd)
125{ 125{
126 WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); 126 WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
127 127
@@ -131,14 +131,14 @@ static __always_inline void csd_unlock(struct call_single_data *csd)
131 smp_store_release(&csd->flags, 0); 131 smp_store_release(&csd->flags, 0);
132} 132}
133 133
134static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); 134static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
135 135
136/* 136/*
137 * Insert a previously allocated call_single_data element 137 * Insert a previously allocated call_single_data_t element
138 * for execution on the given CPU. data must already have 138 * for execution on the given CPU. data must already have
139 * ->func, ->info, and ->flags set. 139 * ->func, ->info, and ->flags set.
140 */ 140 */
141static int generic_exec_single(int cpu, struct call_single_data *csd, 141static int generic_exec_single(int cpu, call_single_data_t *csd,
142 smp_call_func_t func, void *info) 142 smp_call_func_t func, void *info)
143{ 143{
144 if (cpu == smp_processor_id()) { 144 if (cpu == smp_processor_id()) {
@@ -210,7 +210,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
210{ 210{
211 struct llist_head *head; 211 struct llist_head *head;
212 struct llist_node *entry; 212 struct llist_node *entry;
213 struct call_single_data *csd, *csd_next; 213 call_single_data_t *csd, *csd_next;
214 static bool warned; 214 static bool warned;
215 215
216 WARN_ON(!irqs_disabled()); 216 WARN_ON(!irqs_disabled());
@@ -268,8 +268,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
268int smp_call_function_single(int cpu, smp_call_func_t func, void *info, 268int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
269 int wait) 269 int wait)
270{ 270{
271 struct call_single_data *csd; 271 call_single_data_t *csd;
272 struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS }; 272 call_single_data_t csd_stack = {
273 .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
274 };
273 int this_cpu; 275 int this_cpu;
274 int err; 276 int err;
275 277
@@ -321,7 +323,7 @@ EXPORT_SYMBOL(smp_call_function_single);
321 * NOTE: Be careful, there is unfortunately no current debugging facility to 323 * NOTE: Be careful, there is unfortunately no current debugging facility to
322 * validate the correctness of this serialization. 324 * validate the correctness of this serialization.
323 */ 325 */
324int smp_call_function_single_async(int cpu, struct call_single_data *csd) 326int smp_call_function_single_async(int cpu, call_single_data_t *csd)
325{ 327{
326 int err = 0; 328 int err = 0;
327 329
@@ -444,7 +446,7 @@ void smp_call_function_many(const struct cpumask *mask,
444 446
445 cpumask_clear(cfd->cpumask_ipi); 447 cpumask_clear(cfd->cpumask_ipi);
446 for_each_cpu(cpu, cfd->cpumask) { 448 for_each_cpu(cpu, cfd->cpumask) {
447 struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); 449 call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
448 450
449 csd_lock(csd); 451 csd_lock(csd);
450 if (wait) 452 if (wait)
@@ -460,7 +462,7 @@ void smp_call_function_many(const struct cpumask *mask,
460 462
461 if (wait) { 463 if (wait) {
462 for_each_cpu(cpu, cfd->cpumask) { 464 for_each_cpu(cpu, cfd->cpumask) {
463 struct call_single_data *csd; 465 call_single_data_t *csd;
464 466
465 csd = per_cpu_ptr(cfd->csd, cpu); 467 csd = per_cpu_ptr(cfd->csd, cpu);
466 csd_lock_wait(csd); 468 csd_lock_wait(csd);
diff --git a/kernel/up.c b/kernel/up.c
index ee81ac9af4ca..42c46bf3e0a5 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -23,7 +23,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
23} 23}
24EXPORT_SYMBOL(smp_call_function_single); 24EXPORT_SYMBOL(smp_call_function_single);
25 25
26int smp_call_function_single_async(int cpu, struct call_single_data *csd) 26int smp_call_function_single_async(int cpu, call_single_data_t *csd)
27{ 27{
28 unsigned long flags; 28 unsigned long flags;
29 29
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca937b0c3a96..ab3c0dc8c7ed 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2091,8 +2091,30 @@ __acquires(&pool->lock)
2091 2091
2092 spin_unlock_irq(&pool->lock); 2092 spin_unlock_irq(&pool->lock);
2093 2093
2094 lock_map_acquire_read(&pwq->wq->lockdep_map); 2094 lock_map_acquire(&pwq->wq->lockdep_map);
2095 lock_map_acquire(&lockdep_map); 2095 lock_map_acquire(&lockdep_map);
2096 /*
2097 * Strictly speaking we should mark the invariant state without holding
2098 * any locks, that is, before these two lock_map_acquire()'s.
2099 *
2100 * However, that would result in:
2101 *
2102 * A(W1)
2103 * WFC(C)
2104 * A(W1)
2105 * C(C)
2106 *
2107 * Which would create W1->C->W1 dependencies, even though there is no
2108 * actual deadlock possible. There are two solutions, using a
2109 * read-recursive acquire on the work(queue) 'locks', but this will then
2110 * hit the lockdep limitation on recursive locks, or simply discard
2111 * these locks.
2112 *
2113 * AFAICT there is no possible deadlock scenario between the
2114 * flush_work() and complete() primitives (except for single-threaded
2115 * workqueues), so hiding them isn't a problem.
2116 */
2117 lockdep_invariant_state(true);
2096 trace_workqueue_execute_start(work); 2118 trace_workqueue_execute_start(work);
2097 worker->current_func(work); 2119 worker->current_func(work);
2098 /* 2120 /*
@@ -2474,7 +2496,16 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
2474 */ 2496 */
2475 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); 2497 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2476 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); 2498 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2477 init_completion(&barr->done); 2499
2500 /*
2501 * Explicitly init the crosslock for wq_barrier::done, make its lock
2502 * key a subkey of the corresponding work. As a result we won't
2503 * build a dependency between wq_barrier::done and unrelated work.
2504 */
2505 lockdep_init_map_crosslock((struct lockdep_map *)&barr->done.map,
2506 "(complete)wq_barr::done",
2507 target->lockdep_map.key, 1);
2508 __init_completion(&barr->done);
2478 barr->task = current; 2509 barr->task = current;
2479 2510
2480 /* 2511 /*
@@ -2815,16 +2846,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2815 spin_unlock_irq(&pool->lock); 2846 spin_unlock_irq(&pool->lock);
2816 2847
2817 /* 2848 /*
2818 * If @max_active is 1 or rescuer is in use, flushing another work 2849 * Force a lock recursion deadlock when using flush_work() inside a
2819 * item on the same workqueue may lead to deadlock. Make sure the 2850 * single-threaded or rescuer equipped workqueue.
2820 * flusher is not running on the same workqueue by verifying write 2851 *
2821 * access. 2852 * For single threaded workqueues the deadlock happens when the work
2853 * is after the work issuing the flush_work(). For rescuer equipped
2854 * workqueues the deadlock happens when the rescuer stalls, blocking
2855 * forward progress.
2822 */ 2856 */
2823 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) 2857 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
2824 lock_map_acquire(&pwq->wq->lockdep_map); 2858 lock_map_acquire(&pwq->wq->lockdep_map);
2825 else 2859 lock_map_release(&pwq->wq->lockdep_map);
2826 lock_map_acquire_read(&pwq->wq->lockdep_map); 2860 }
2827 lock_map_release(&pwq->wq->lockdep_map);
2828 2861
2829 return true; 2862 return true;
2830already_gone: 2863already_gone:
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2a9a8759752b..7396f5044397 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1091,6 +1091,8 @@ config PROVE_LOCKING
1091 select DEBUG_MUTEXES 1091 select DEBUG_MUTEXES
1092 select DEBUG_RT_MUTEXES if RT_MUTEXES 1092 select DEBUG_RT_MUTEXES if RT_MUTEXES
1093 select DEBUG_LOCK_ALLOC 1093 select DEBUG_LOCK_ALLOC
1094 select LOCKDEP_CROSSRELEASE
1095 select LOCKDEP_COMPLETIONS
1094 select TRACE_IRQFLAGS 1096 select TRACE_IRQFLAGS
1095 default n 1097 default n
1096 help 1098 help
@@ -1160,6 +1162,22 @@ config LOCK_STAT
1160 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. 1162 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
1161 (CONFIG_LOCKDEP defines "acquire" and "release" events.) 1163 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
1162 1164
1165config LOCKDEP_CROSSRELEASE
1166 bool
1167 help
1168 This makes lockdep work for crosslock which is a lock allowed to
1169 be released in a different context from the acquisition context.
1170 Normally a lock must be released in the context acquiring the lock.
1171 However, relexing this constraint helps synchronization primitives
1172 such as page locks or completions can use the lock correctness
1173 detector, lockdep.
1174
1175config LOCKDEP_COMPLETIONS
1176 bool
1177 help
1178 A deadlock caused by wait_for_completion() and complete() can be
1179 detected by lockdep using crossrelease feature.
1180
1163config DEBUG_LOCKDEP 1181config DEBUG_LOCKDEP
1164 bool "Lock dependency engine debugging" 1182 bool "Lock dependency engine debugging"
1165 depends on DEBUG_KERNEL && LOCKDEP 1183 depends on DEBUG_KERNEL && LOCKDEP
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6f2b135dc5e8..cd0b5c964bd0 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -363,6 +363,103 @@ static void rsem_AA3(void)
363} 363}
364 364
365/* 365/*
366 * read_lock(A)
367 * spin_lock(B)
368 * spin_lock(B)
369 * write_lock(A)
370 */
371static void rlock_ABBA1(void)
372{
373 RL(X1);
374 L(Y1);
375 U(Y1);
376 RU(X1);
377
378 L(Y1);
379 WL(X1);
380 WU(X1);
381 U(Y1); // should fail
382}
383
384static void rwsem_ABBA1(void)
385{
386 RSL(X1);
387 ML(Y1);
388 MU(Y1);
389 RSU(X1);
390
391 ML(Y1);
392 WSL(X1);
393 WSU(X1);
394 MU(Y1); // should fail
395}
396
397/*
398 * read_lock(A)
399 * spin_lock(B)
400 * spin_lock(B)
401 * read_lock(A)
402 */
403static void rlock_ABBA2(void)
404{
405 RL(X1);
406 L(Y1);
407 U(Y1);
408 RU(X1);
409
410 L(Y1);
411 RL(X1);
412 RU(X1);
413 U(Y1); // should NOT fail
414}
415
416static void rwsem_ABBA2(void)
417{
418 RSL(X1);
419 ML(Y1);
420 MU(Y1);
421 RSU(X1);
422
423 ML(Y1);
424 RSL(X1);
425 RSU(X1);
426 MU(Y1); // should fail
427}
428
429
430/*
431 * write_lock(A)
432 * spin_lock(B)
433 * spin_lock(B)
434 * write_lock(A)
435 */
436static void rlock_ABBA3(void)
437{
438 WL(X1);
439 L(Y1);
440 U(Y1);
441 WU(X1);
442
443 L(Y1);
444 WL(X1);
445 WU(X1);
446 U(Y1); // should fail
447}
448
449static void rwsem_ABBA3(void)
450{
451 WSL(X1);
452 ML(Y1);
453 MU(Y1);
454 WSU(X1);
455
456 ML(Y1);
457 WSL(X1);
458 WSU(X1);
459 MU(Y1); // should fail
460}
461
462/*
366 * ABBA deadlock: 463 * ABBA deadlock:
367 */ 464 */
368 465
@@ -1056,8 +1153,6 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
1056 if (debug_locks != expected) { 1153 if (debug_locks != expected) {
1057 unexpected_testcase_failures++; 1154 unexpected_testcase_failures++;
1058 pr_cont("FAILED|"); 1155 pr_cont("FAILED|");
1059
1060 dump_stack();
1061 } else { 1156 } else {
1062 testcase_successes++; 1157 testcase_successes++;
1063 pr_cont(" ok |"); 1158 pr_cont(" ok |");
@@ -1933,6 +2028,30 @@ void locking_selftest(void)
1933 dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM); 2028 dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
1934 pr_cont("\n"); 2029 pr_cont("\n");
1935 2030
2031 print_testname("mixed read-lock/lock-write ABBA");
2032 pr_cont(" |");
2033 dotest(rlock_ABBA1, FAILURE, LOCKTYPE_RWLOCK);
2034 /*
2035 * Lockdep does indeed fail here, but there's nothing we can do about
2036 * that now. Don't kill lockdep for it.
2037 */
2038 unexpected_testcase_failures--;
2039
2040 pr_cont(" |");
2041 dotest(rwsem_ABBA1, FAILURE, LOCKTYPE_RWSEM);
2042
2043 print_testname("mixed read-lock/lock-read ABBA");
2044 pr_cont(" |");
2045 dotest(rlock_ABBA2, SUCCESS, LOCKTYPE_RWLOCK);
2046 pr_cont(" |");
2047 dotest(rwsem_ABBA2, FAILURE, LOCKTYPE_RWSEM);
2048
2049 print_testname("mixed write-lock/lock-write ABBA");
2050 pr_cont(" |");
2051 dotest(rlock_ABBA3, FAILURE, LOCKTYPE_RWLOCK);
2052 pr_cont(" |");
2053 dotest(rwsem_ABBA3, FAILURE, LOCKTYPE_RWSEM);
2054
1936 printk(" --------------------------------------------------------------------------\n"); 2055 printk(" --------------------------------------------------------------------------\n");
1937 2056
1938 /* 2057 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 90731e3b7e58..3644ff918434 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1510,8 +1510,15 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
1510 } 1510 }
1511 1511
1512 /* 1512 /*
1513 * The page_table_lock above provides a memory barrier 1513 * Since we took the NUMA fault, we must have observed the !accessible
1514 * with change_protection_range. 1514 * bit. Make sure all other CPUs agree with that, to avoid them
1515 * modifying the page we're about to migrate.
1516 *
1517 * Must be done under PTL such that we'll observe the relevant
1518 * inc_tlb_flush_pending().
1519 *
1520 * We are not sure a pending tlb flush here is for a huge page
1521 * mapping or not. Hence use the tlb range variant
1515 */ 1522 */
1516 if (mm_tlb_flush_pending(vma->vm_mm)) 1523 if (mm_tlb_flush_pending(vma->vm_mm))
1517 flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); 1524 flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
@@ -1521,6 +1528,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
1521 * and access rights restored. 1528 * and access rights restored.
1522 */ 1529 */
1523 spin_unlock(vmf->ptl); 1530 spin_unlock(vmf->ptl);
1531
1524 migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, 1532 migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
1525 vmf->pmd, pmd, vmf->address, page, target_nid); 1533 vmf->pmd, pmd, vmf->address, page, target_nid);
1526 if (migrated) { 1534 if (migrated) {
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index ca11bc4ce205..6f319fb81718 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -267,13 +267,13 @@ static void check_memory_region(unsigned long addr,
267 check_memory_region_inline(addr, size, write, ret_ip); 267 check_memory_region_inline(addr, size, write, ret_ip);
268} 268}
269 269
270void kasan_check_read(const void *p, unsigned int size) 270void kasan_check_read(const volatile void *p, unsigned int size)
271{ 271{
272 check_memory_region((unsigned long)p, size, false, _RET_IP_); 272 check_memory_region((unsigned long)p, size, false, _RET_IP_);
273} 273}
274EXPORT_SYMBOL(kasan_check_read); 274EXPORT_SYMBOL(kasan_check_read);
275 275
276void kasan_check_write(const void *p, unsigned int size) 276void kasan_check_write(const volatile void *p, unsigned int size)
277{ 277{
278 check_memory_region((unsigned long)p, size, true, _RET_IP_); 278 check_memory_region((unsigned long)p, size, true, _RET_IP_);
279} 279}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1423da8dd16f..9327a940e373 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -66,6 +66,7 @@
66#include <linux/kthread.h> 66#include <linux/kthread.h>
67#include <linux/memcontrol.h> 67#include <linux/memcontrol.h>
68#include <linux/ftrace.h> 68#include <linux/ftrace.h>
69#include <linux/lockdep.h>
69#include <linux/nmi.h> 70#include <linux/nmi.h>
70 71
71#include <asm/sections.h> 72#include <asm/sections.h>
@@ -3513,6 +3514,47 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
3513} 3514}
3514#endif /* CONFIG_COMPACTION */ 3515#endif /* CONFIG_COMPACTION */
3515 3516
3517#ifdef CONFIG_LOCKDEP
3518struct lockdep_map __fs_reclaim_map =
3519 STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
3520
3521static bool __need_fs_reclaim(gfp_t gfp_mask)
3522{
3523 gfp_mask = current_gfp_context(gfp_mask);
3524
3525 /* no reclaim without waiting on it */
3526 if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
3527 return false;
3528
3529 /* this guy won't enter reclaim */
3530 if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
3531 return false;
3532
3533 /* We're only interested __GFP_FS allocations for now */
3534 if (!(gfp_mask & __GFP_FS))
3535 return false;
3536
3537 if (gfp_mask & __GFP_NOLOCKDEP)
3538 return false;
3539
3540 return true;
3541}
3542
3543void fs_reclaim_acquire(gfp_t gfp_mask)
3544{
3545 if (__need_fs_reclaim(gfp_mask))
3546 lock_map_acquire(&__fs_reclaim_map);
3547}
3548EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
3549
3550void fs_reclaim_release(gfp_t gfp_mask)
3551{
3552 if (__need_fs_reclaim(gfp_mask))
3553 lock_map_release(&__fs_reclaim_map);
3554}
3555EXPORT_SYMBOL_GPL(fs_reclaim_release);
3556#endif
3557
3516/* Perform direct synchronous page reclaim */ 3558/* Perform direct synchronous page reclaim */
3517static int 3559static int
3518__perform_reclaim(gfp_t gfp_mask, unsigned int order, 3560__perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -3527,7 +3569,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
3527 /* We now go into synchronous reclaim */ 3569 /* We now go into synchronous reclaim */
3528 cpuset_memory_pressure_bump(); 3570 cpuset_memory_pressure_bump();
3529 noreclaim_flag = memalloc_noreclaim_save(); 3571 noreclaim_flag = memalloc_noreclaim_save();
3530 lockdep_set_current_reclaim_state(gfp_mask); 3572 fs_reclaim_acquire(gfp_mask);
3531 reclaim_state.reclaimed_slab = 0; 3573 reclaim_state.reclaimed_slab = 0;
3532 current->reclaim_state = &reclaim_state; 3574 current->reclaim_state = &reclaim_state;
3533 3575
@@ -3535,7 +3577,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
3535 ac->nodemask); 3577 ac->nodemask);
3536 3578
3537 current->reclaim_state = NULL; 3579 current->reclaim_state = NULL;
3538 lockdep_clear_current_reclaim_state(); 3580 fs_reclaim_release(gfp_mask);
3539 memalloc_noreclaim_restore(noreclaim_flag); 3581 memalloc_noreclaim_restore(noreclaim_flag);
3540 3582
3541 cond_resched(); 3583 cond_resched();
@@ -4064,7 +4106,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
4064 *alloc_flags |= ALLOC_CPUSET; 4106 *alloc_flags |= ALLOC_CPUSET;
4065 } 4107 }
4066 4108
4067 lockdep_trace_alloc(gfp_mask); 4109 fs_reclaim_acquire(gfp_mask);
4110 fs_reclaim_release(gfp_mask);
4068 4111
4069 might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); 4112 might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
4070 4113
diff --git a/mm/slab.h b/mm/slab.h
index 6885e1192ec5..073362816acc 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -43,6 +43,7 @@ struct kmem_cache {
43#include <linux/kasan.h> 43#include <linux/kasan.h>
44#include <linux/kmemleak.h> 44#include <linux/kmemleak.h>
45#include <linux/random.h> 45#include <linux/random.h>
46#include <linux/sched/mm.h>
46 47
47/* 48/*
48 * State of the slab allocator. 49 * State of the slab allocator.
@@ -412,7 +413,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
412 gfp_t flags) 413 gfp_t flags)
413{ 414{
414 flags &= gfp_allowed_mask; 415 flags &= gfp_allowed_mask;
415 lockdep_trace_alloc(flags); 416
417 fs_reclaim_acquire(flags);
418 fs_reclaim_release(flags);
419
416 might_sleep_if(gfpflags_allow_blocking(flags)); 420 might_sleep_if(gfpflags_allow_blocking(flags));
417 421
418 if (should_failslab(s, flags)) 422 if (should_failslab(s, flags))
diff --git a/mm/slob.c b/mm/slob.c
index 1bae78d71096..a8bd6fa11a66 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -432,7 +432,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
432 432
433 gfp &= gfp_allowed_mask; 433 gfp &= gfp_allowed_mask;
434 434
435 lockdep_trace_alloc(gfp); 435 fs_reclaim_acquire(gfp);
436 fs_reclaim_release(gfp);
436 437
437 if (size < PAGE_SIZE - align) { 438 if (size < PAGE_SIZE - align) {
438 if (!size) 439 if (!size)
@@ -538,7 +539,8 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
538 539
539 flags &= gfp_allowed_mask; 540 flags &= gfp_allowed_mask;
540 541
541 lockdep_trace_alloc(flags); 542 fs_reclaim_acquire(flags);
543 fs_reclaim_release(flags);
542 544
543 if (c->size < PAGE_SIZE) { 545 if (c->size < PAGE_SIZE) {
544 b = slob_alloc(c->size, flags, c->align, node); 546 b = slob_alloc(c->size, flags, c->align, node);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1af041930a6..f957afe900ec 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3525,8 +3525,6 @@ static int kswapd(void *p)
3525 }; 3525 };
3526 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 3526 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
3527 3527
3528 lockdep_set_current_reclaim_state(GFP_KERNEL);
3529
3530 if (!cpumask_empty(cpumask)) 3528 if (!cpumask_empty(cpumask))
3531 set_cpus_allowed_ptr(tsk, cpumask); 3529 set_cpus_allowed_ptr(tsk, cpumask);
3532 current->reclaim_state = &reclaim_state; 3530 current->reclaim_state = &reclaim_state;
@@ -3585,14 +3583,15 @@ kswapd_try_sleep:
3585 */ 3583 */
3586 trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, 3584 trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
3587 alloc_order); 3585 alloc_order);
3586 fs_reclaim_acquire(GFP_KERNEL);
3588 reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); 3587 reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
3588 fs_reclaim_release(GFP_KERNEL);
3589 if (reclaim_order < alloc_order) 3589 if (reclaim_order < alloc_order)
3590 goto kswapd_try_sleep; 3590 goto kswapd_try_sleep;
3591 } 3591 }
3592 3592
3593 tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); 3593 tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
3594 current->reclaim_state = NULL; 3594 current->reclaim_state = NULL;
3595 lockdep_clear_current_reclaim_state();
3596 3595
3597 return 0; 3596 return 0;
3598} 3597}
@@ -3655,14 +3654,14 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
3655 unsigned int noreclaim_flag; 3654 unsigned int noreclaim_flag;
3656 3655
3657 noreclaim_flag = memalloc_noreclaim_save(); 3656 noreclaim_flag = memalloc_noreclaim_save();
3658 lockdep_set_current_reclaim_state(sc.gfp_mask); 3657 fs_reclaim_acquire(sc.gfp_mask);
3659 reclaim_state.reclaimed_slab = 0; 3658 reclaim_state.reclaimed_slab = 0;
3660 p->reclaim_state = &reclaim_state; 3659 p->reclaim_state = &reclaim_state;
3661 3660
3662 nr_reclaimed = do_try_to_free_pages(zonelist, &sc); 3661 nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
3663 3662
3664 p->reclaim_state = NULL; 3663 p->reclaim_state = NULL;
3665 lockdep_clear_current_reclaim_state(); 3664 fs_reclaim_release(sc.gfp_mask);
3666 memalloc_noreclaim_restore(noreclaim_flag); 3665 memalloc_noreclaim_restore(noreclaim_flag);
3667 3666
3668 return nr_reclaimed; 3667 return nr_reclaimed;
@@ -3847,7 +3846,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
3847 */ 3846 */
3848 noreclaim_flag = memalloc_noreclaim_save(); 3847 noreclaim_flag = memalloc_noreclaim_save();
3849 p->flags |= PF_SWAPWRITE; 3848 p->flags |= PF_SWAPWRITE;
3850 lockdep_set_current_reclaim_state(sc.gfp_mask); 3849 fs_reclaim_acquire(sc.gfp_mask);
3851 reclaim_state.reclaimed_slab = 0; 3850 reclaim_state.reclaimed_slab = 0;
3852 p->reclaim_state = &reclaim_state; 3851 p->reclaim_state = &reclaim_state;
3853 3852
@@ -3862,9 +3861,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
3862 } 3861 }
3863 3862
3864 p->reclaim_state = NULL; 3863 p->reclaim_state = NULL;
3864 fs_reclaim_release(gfp_mask);
3865 current->flags &= ~PF_SWAPWRITE; 3865 current->flags &= ~PF_SWAPWRITE;
3866 memalloc_noreclaim_restore(noreclaim_flag); 3866 memalloc_noreclaim_restore(noreclaim_flag);
3867 lockdep_clear_current_reclaim_state();
3868 return sc.nr_reclaimed >= nr_pages; 3867 return sc.nr_reclaimed >= nr_pages;
3869} 3868}
3870 3869
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 62344804baae..38e795e0c4bf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1810,8 +1810,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1810static struct static_key udp_encap_needed __read_mostly; 1810static struct static_key udp_encap_needed __read_mostly;
1811void udp_encap_enable(void) 1811void udp_encap_enable(void)
1812{ 1812{
1813 if (!static_key_enabled(&udp_encap_needed)) 1813 static_key_enable(&udp_encap_needed);
1814 static_key_slow_inc(&udp_encap_needed);
1815} 1814}
1816EXPORT_SYMBOL(udp_encap_enable); 1815EXPORT_SYMBOL(udp_encap_enable);
1817 1816
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d6886228e1d0..56030d45823a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -575,8 +575,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
575static struct static_key udpv6_encap_needed __read_mostly; 575static struct static_key udpv6_encap_needed __read_mostly;
576void udpv6_encap_enable(void) 576void udpv6_encap_enable(void)
577{ 577{
578 if (!static_key_enabled(&udpv6_encap_needed)) 578 static_key_enable(&udpv6_encap_needed);
579 static_key_slow_inc(&udpv6_encap_needed);
580} 579}
581EXPORT_SYMBOL(udpv6_encap_enable); 580EXPORT_SYMBOL(udpv6_encap_enable);
582 581