diff options
108 files changed, 3460 insertions, 1112 deletions
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt new file mode 100644 index 000000000000..5550bfdcce5f --- /dev/null +++ b/Documentation/atomic_bitops.txt | |||
@@ -0,0 +1,66 @@ | |||
1 | |||
2 | On atomic bitops. | ||
3 | |||
4 | |||
5 | While our bitmap_{}() functions are non-atomic, we have a number of operations | ||
6 | operating on single bits in a bitmap that are atomic. | ||
7 | |||
8 | |||
9 | API | ||
10 | --- | ||
11 | |||
12 | The single bit operations are: | ||
13 | |||
14 | Non-RMW ops: | ||
15 | |||
16 | test_bit() | ||
17 | |||
18 | RMW atomic operations without return value: | ||
19 | |||
20 | {set,clear,change}_bit() | ||
21 | clear_bit_unlock() | ||
22 | |||
23 | RMW atomic operations with return value: | ||
24 | |||
25 | test_and_{set,clear,change}_bit() | ||
26 | test_and_set_bit_lock() | ||
27 | |||
28 | Barriers: | ||
29 | |||
30 | smp_mb__{before,after}_atomic() | ||
31 | |||
32 | |||
33 | All RMW atomic operations have a '__' prefixed variant which is non-atomic. | ||
34 | |||
35 | |||
36 | SEMANTICS | ||
37 | --------- | ||
38 | |||
39 | Non-atomic ops: | ||
40 | |||
41 | In particular __clear_bit_unlock() suffers the same issue as atomic_set(), | ||
42 | which is why the generic version maps to clear_bit_unlock(), see atomic_t.txt. | ||
43 | |||
44 | |||
45 | RMW ops: | ||
46 | |||
47 | The test_and_{}_bit() operations return the original value of the bit. | ||
48 | |||
49 | |||
50 | ORDERING | ||
51 | -------- | ||
52 | |||
53 | Like with atomic_t, the rule of thumb is: | ||
54 | |||
55 | - non-RMW operations are unordered; | ||
56 | |||
57 | - RMW operations that have no return value are unordered; | ||
58 | |||
59 | - RMW operations that have a return value are fully ordered. | ||
60 | |||
61 | Except for test_and_set_bit_lock() which has ACQUIRE semantics and | ||
62 | clear_bit_unlock() which has RELEASE semantics. | ||
63 | |||
64 | Since a platform only has a single means of achieving atomic operations | ||
65 | the same barriers as for atomic_t are used, see atomic_t.txt. | ||
66 | |||
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt new file mode 100644 index 000000000000..913396ac5824 --- /dev/null +++ b/Documentation/atomic_t.txt | |||
@@ -0,0 +1,242 @@ | |||
1 | |||
2 | On atomic types (atomic_t atomic64_t and atomic_long_t). | ||
3 | |||
4 | The atomic type provides an interface to the architecture's means of atomic | ||
5 | RMW operations between CPUs (atomic operations on MMIO are not supported and | ||
6 | can lead to fatal traps on some platforms). | ||
7 | |||
8 | API | ||
9 | --- | ||
10 | |||
11 | The 'full' API consists of (atomic64_ and atomic_long_ prefixes omitted for | ||
12 | brevity): | ||
13 | |||
14 | Non-RMW ops: | ||
15 | |||
16 | atomic_read(), atomic_set() | ||
17 | atomic_read_acquire(), atomic_set_release() | ||
18 | |||
19 | |||
20 | RMW atomic operations: | ||
21 | |||
22 | Arithmetic: | ||
23 | |||
24 | atomic_{add,sub,inc,dec}() | ||
25 | atomic_{add,sub,inc,dec}_return{,_relaxed,_acquire,_release}() | ||
26 | atomic_fetch_{add,sub,inc,dec}{,_relaxed,_acquire,_release}() | ||
27 | |||
28 | |||
29 | Bitwise: | ||
30 | |||
31 | atomic_{and,or,xor,andnot}() | ||
32 | atomic_fetch_{and,or,xor,andnot}{,_relaxed,_acquire,_release}() | ||
33 | |||
34 | |||
35 | Swap: | ||
36 | |||
37 | atomic_xchg{,_relaxed,_acquire,_release}() | ||
38 | atomic_cmpxchg{,_relaxed,_acquire,_release}() | ||
39 | atomic_try_cmpxchg{,_relaxed,_acquire,_release}() | ||
40 | |||
41 | |||
42 | Reference count (but please see refcount_t): | ||
43 | |||
44 | atomic_add_unless(), atomic_inc_not_zero() | ||
45 | atomic_sub_and_test(), atomic_dec_and_test() | ||
46 | |||
47 | |||
48 | Misc: | ||
49 | |||
50 | atomic_inc_and_test(), atomic_add_negative() | ||
51 | atomic_dec_unless_positive(), atomic_inc_unless_negative() | ||
52 | |||
53 | |||
54 | Barriers: | ||
55 | |||
56 | smp_mb__{before,after}_atomic() | ||
57 | |||
58 | |||
59 | |||
60 | SEMANTICS | ||
61 | --------- | ||
62 | |||
63 | Non-RMW ops: | ||
64 | |||
65 | The non-RMW ops are (typically) regular LOADs and STOREs and are canonically | ||
66 | implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and | ||
67 | smp_store_release() respectively. | ||
68 | |||
69 | The one detail to this is that atomic_set{}() should be observable to the RMW | ||
70 | ops. That is: | ||
71 | |||
72 | C atomic-set | ||
73 | |||
74 | { | ||
75 | atomic_set(v, 1); | ||
76 | } | ||
77 | |||
78 | P1(atomic_t *v) | ||
79 | { | ||
80 | atomic_add_unless(v, 1, 0); | ||
81 | } | ||
82 | |||
83 | P2(atomic_t *v) | ||
84 | { | ||
85 | atomic_set(v, 0); | ||
86 | } | ||
87 | |||
88 | exists | ||
89 | (v=2) | ||
90 | |||
91 | In this case we would expect the atomic_set() from CPU1 to either happen | ||
92 | before the atomic_add_unless(), in which case that latter one would no-op, or | ||
93 | _after_ in which case we'd overwrite its result. In no case is "2" a valid | ||
94 | outcome. | ||
95 | |||
96 | This is typically true on 'normal' platforms, where a regular competing STORE | ||
97 | will invalidate a LL/SC or fail a CMPXCHG. | ||
98 | |||
99 | The obvious case where this is not so is when we need to implement atomic ops | ||
100 | with a lock: | ||
101 | |||
102 | CPU0 CPU1 | ||
103 | |||
104 | atomic_add_unless(v, 1, 0); | ||
105 | lock(); | ||
106 | ret = READ_ONCE(v->counter); // == 1 | ||
107 | atomic_set(v, 0); | ||
108 | if (ret != u) WRITE_ONCE(v->counter, 0); | ||
109 | WRITE_ONCE(v->counter, ret + 1); | ||
110 | unlock(); | ||
111 | |||
112 | the typical solution is to then implement atomic_set{}() with atomic_xchg(). | ||
113 | |||
114 | |||
115 | RMW ops: | ||
116 | |||
117 | These come in various forms: | ||
118 | |||
119 | - plain operations without return value: atomic_{}() | ||
120 | |||
121 | - operations which return the modified value: atomic_{}_return() | ||
122 | |||
123 | these are limited to the arithmetic operations because those are | ||
124 | reversible. Bitops are irreversible and therefore the modified value | ||
125 | is of dubious utility. | ||
126 | |||
127 | - operations which return the original value: atomic_fetch_{}() | ||
128 | |||
129 | - swap operations: xchg(), cmpxchg() and try_cmpxchg() | ||
130 | |||
131 | - misc; the special purpose operations that are commonly used and would, | ||
132 | given the interface, normally be implemented using (try_)cmpxchg loops but | ||
133 | are time critical and can, (typically) on LL/SC architectures, be more | ||
134 | efficiently implemented. | ||
135 | |||
136 | All these operations are SMP atomic; that is, the operations (for a single | ||
137 | atomic variable) can be fully ordered and no intermediate state is lost or | ||
138 | visible. | ||
139 | |||
140 | |||
141 | ORDERING (go read memory-barriers.txt first) | ||
142 | -------- | ||
143 | |||
144 | The rule of thumb: | ||
145 | |||
146 | - non-RMW operations are unordered; | ||
147 | |||
148 | - RMW operations that have no return value are unordered; | ||
149 | |||
150 | - RMW operations that have a return value are fully ordered; | ||
151 | |||
152 | - RMW operations that are conditional are unordered on FAILURE, | ||
153 | otherwise the above rules apply. | ||
154 | |||
155 | Except of course when an operation has an explicit ordering like: | ||
156 | |||
157 | {}_relaxed: unordered | ||
158 | {}_acquire: the R of the RMW (or atomic_read) is an ACQUIRE | ||
159 | {}_release: the W of the RMW (or atomic_set) is a RELEASE | ||
160 | |||
161 | Where 'unordered' is against other memory locations. Address dependencies are | ||
162 | not defeated. | ||
163 | |||
164 | Fully ordered primitives are ordered against everything prior and everything | ||
165 | subsequent. Therefore a fully ordered primitive is like having an smp_mb() | ||
166 | before and an smp_mb() after the primitive. | ||
167 | |||
168 | |||
169 | The barriers: | ||
170 | |||
171 | smp_mb__{before,after}_atomic() | ||
172 | |||
173 | only apply to the RMW ops and can be used to augment/upgrade the ordering | ||
174 | inherent to the used atomic op. These barriers provide a full smp_mb(). | ||
175 | |||
176 | These helper barriers exist because architectures have varying implicit | ||
177 | ordering on their SMP atomic primitives. For example our TSO architectures | ||
178 | provide full ordered atomics and these barriers are no-ops. | ||
179 | |||
180 | Thus: | ||
181 | |||
182 | atomic_fetch_add(); | ||
183 | |||
184 | is equivalent to: | ||
185 | |||
186 | smp_mb__before_atomic(); | ||
187 | atomic_fetch_add_relaxed(); | ||
188 | smp_mb__after_atomic(); | ||
189 | |||
190 | However the atomic_fetch_add() might be implemented more efficiently. | ||
191 | |||
192 | Further, while something like: | ||
193 | |||
194 | smp_mb__before_atomic(); | ||
195 | atomic_dec(&X); | ||
196 | |||
197 | is a 'typical' RELEASE pattern, the barrier is strictly stronger than | ||
198 | a RELEASE. Similarly for something like: | ||
199 | |||
200 | atomic_inc(&X); | ||
201 | smp_mb__after_atomic(); | ||
202 | |||
203 | is an ACQUIRE pattern (though very much not typical), but again the barrier is | ||
204 | strictly stronger than ACQUIRE. As illustrated: | ||
205 | |||
206 | C strong-acquire | ||
207 | |||
208 | { | ||
209 | } | ||
210 | |||
211 | P1(int *x, atomic_t *y) | ||
212 | { | ||
213 | r0 = READ_ONCE(*x); | ||
214 | smp_rmb(); | ||
215 | r1 = atomic_read(y); | ||
216 | } | ||
217 | |||
218 | P2(int *x, atomic_t *y) | ||
219 | { | ||
220 | atomic_inc(y); | ||
221 | smp_mb__after_atomic(); | ||
222 | WRITE_ONCE(*x, 1); | ||
223 | } | ||
224 | |||
225 | exists | ||
226 | (r0=1 /\ r1=0) | ||
227 | |||
228 | This should not happen; but a hypothetical atomic_inc_acquire() -- | ||
229 | (void)atomic_fetch_inc_acquire() for instance -- would allow the outcome, | ||
230 | since then: | ||
231 | |||
232 | P1 P2 | ||
233 | |||
234 | t = LL.acq *y (0) | ||
235 | t++; | ||
236 | *x = 1; | ||
237 | r0 = *x (1) | ||
238 | RMB | ||
239 | r1 = *y (0) | ||
240 | SC *y, t; | ||
241 | |||
242 | is allowed. | ||
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt new file mode 100644 index 000000000000..bdf1423d5f99 --- /dev/null +++ b/Documentation/locking/crossrelease.txt | |||
@@ -0,0 +1,874 @@ | |||
1 | Crossrelease | ||
2 | ============ | ||
3 | |||
4 | Started by Byungchul Park <byungchul.park@lge.com> | ||
5 | |||
6 | Contents: | ||
7 | |||
8 | (*) Background | ||
9 | |||
10 | - What causes deadlock | ||
11 | - How lockdep works | ||
12 | |||
13 | (*) Limitation | ||
14 | |||
15 | - Limit lockdep | ||
16 | - Pros from the limitation | ||
17 | - Cons from the limitation | ||
18 | - Relax the limitation | ||
19 | |||
20 | (*) Crossrelease | ||
21 | |||
22 | - Introduce crossrelease | ||
23 | - Introduce commit | ||
24 | |||
25 | (*) Implementation | ||
26 | |||
27 | - Data structures | ||
28 | - How crossrelease works | ||
29 | |||
30 | (*) Optimizations | ||
31 | |||
32 | - Avoid duplication | ||
33 | - Lockless for hot paths | ||
34 | |||
35 | (*) APPENDIX A: What lockdep does to work aggresively | ||
36 | |||
37 | (*) APPENDIX B: How to avoid adding false dependencies | ||
38 | |||
39 | |||
40 | ========== | ||
41 | Background | ||
42 | ========== | ||
43 | |||
44 | What causes deadlock | ||
45 | -------------------- | ||
46 | |||
47 | A deadlock occurs when a context is waiting for an event to happen, | ||
48 | which is impossible because another (or the) context who can trigger the | ||
49 | event is also waiting for another (or the) event to happen, which is | ||
50 | also impossible due to the same reason. | ||
51 | |||
52 | For example: | ||
53 | |||
54 | A context going to trigger event C is waiting for event A to happen. | ||
55 | A context going to trigger event A is waiting for event B to happen. | ||
56 | A context going to trigger event B is waiting for event C to happen. | ||
57 | |||
58 | A deadlock occurs when these three wait operations run at the same time, | ||
59 | because event C cannot be triggered if event A does not happen, which in | ||
60 | turn cannot be triggered if event B does not happen, which in turn | ||
61 | cannot be triggered if event C does not happen. After all, no event can | ||
62 | be triggered since any of them never meets its condition to wake up. | ||
63 | |||
64 | A dependency might exist between two waiters and a deadlock might happen | ||
65 | due to an incorrect releationship between dependencies. Thus, we must | ||
66 | define what a dependency is first. A dependency exists between them if: | ||
67 | |||
68 | 1. There are two waiters waiting for each event at a given time. | ||
69 | 2. The only way to wake up each waiter is to trigger its event. | ||
70 | 3. Whether one can be woken up depends on whether the other can. | ||
71 | |||
72 | Each wait in the example creates its dependency like: | ||
73 | |||
74 | Event C depends on event A. | ||
75 | Event A depends on event B. | ||
76 | Event B depends on event C. | ||
77 | |||
78 | NOTE: Precisely speaking, a dependency is one between whether a | ||
79 | waiter for an event can be woken up and whether another waiter for | ||
80 | another event can be woken up. However from now on, we will describe | ||
81 | a dependency as if it's one between an event and another event for | ||
82 | simplicity. | ||
83 | |||
84 | And they form circular dependencies like: | ||
85 | |||
86 | -> C -> A -> B - | ||
87 | / \ | ||
88 | \ / | ||
89 | ---------------- | ||
90 | |||
91 | where 'A -> B' means that event A depends on event B. | ||
92 | |||
93 | Such circular dependencies lead to a deadlock since no waiter can meet | ||
94 | its condition to wake up as described. | ||
95 | |||
96 | CONCLUSION | ||
97 | |||
98 | Circular dependencies cause a deadlock. | ||
99 | |||
100 | |||
101 | How lockdep works | ||
102 | ----------------- | ||
103 | |||
104 | Lockdep tries to detect a deadlock by checking dependencies created by | ||
105 | lock operations, acquire and release. Waiting for a lock corresponds to | ||
106 | waiting for an event, and releasing a lock corresponds to triggering an | ||
107 | event in the previous section. | ||
108 | |||
109 | In short, lockdep does: | ||
110 | |||
111 | 1. Detect a new dependency. | ||
112 | 2. Add the dependency into a global graph. | ||
113 | 3. Check if that makes dependencies circular. | ||
114 | 4. Report a deadlock or its possibility if so. | ||
115 | |||
116 | For example, consider a graph built by lockdep that looks like: | ||
117 | |||
118 | A -> B - | ||
119 | \ | ||
120 | -> E | ||
121 | / | ||
122 | C -> D - | ||
123 | |||
124 | where A, B,..., E are different lock classes. | ||
125 | |||
126 | Lockdep will add a dependency into the graph on detection of a new | ||
127 | dependency. For example, it will add a dependency 'E -> C' when a new | ||
128 | dependency between lock E and lock C is detected. Then the graph will be: | ||
129 | |||
130 | A -> B - | ||
131 | \ | ||
132 | -> E - | ||
133 | / \ | ||
134 | -> C -> D - \ | ||
135 | / / | ||
136 | \ / | ||
137 | ------------------ | ||
138 | |||
139 | where A, B,..., E are different lock classes. | ||
140 | |||
141 | This graph contains a subgraph which demonstrates circular dependencies: | ||
142 | |||
143 | -> E - | ||
144 | / \ | ||
145 | -> C -> D - \ | ||
146 | / / | ||
147 | \ / | ||
148 | ------------------ | ||
149 | |||
150 | where C, D and E are different lock classes. | ||
151 | |||
152 | This is the condition under which a deadlock might occur. Lockdep | ||
153 | reports it on detection after adding a new dependency. This is the way | ||
154 | how lockdep works. | ||
155 | |||
156 | CONCLUSION | ||
157 | |||
158 | Lockdep detects a deadlock or its possibility by checking if circular | ||
159 | dependencies were created after adding each new dependency. | ||
160 | |||
161 | |||
162 | ========== | ||
163 | Limitation | ||
164 | ========== | ||
165 | |||
166 | Limit lockdep | ||
167 | ------------- | ||
168 | |||
169 | Limiting lockdep to work on only typical locks e.g. spin locks and | ||
170 | mutexes, which are released within the acquire context, the | ||
171 | implementation becomes simple but its capacity for detection becomes | ||
172 | limited. Let's check pros and cons in next section. | ||
173 | |||
174 | |||
175 | Pros from the limitation | ||
176 | ------------------------ | ||
177 | |||
178 | Given the limitation, when acquiring a lock, locks in a held_locks | ||
179 | cannot be released if the context cannot acquire it so has to wait to | ||
180 | acquire it, which means all waiters for the locks in the held_locks are | ||
181 | stuck. It's an exact case to create dependencies between each lock in | ||
182 | the held_locks and the lock to acquire. | ||
183 | |||
184 | For example: | ||
185 | |||
186 | CONTEXT X | ||
187 | --------- | ||
188 | acquire A | ||
189 | acquire B /* Add a dependency 'A -> B' */ | ||
190 | release B | ||
191 | release A | ||
192 | |||
193 | where A and B are different lock classes. | ||
194 | |||
195 | When acquiring lock A, the held_locks of CONTEXT X is empty thus no | ||
196 | dependency is added. But when acquiring lock B, lockdep detects and adds | ||
197 | a new dependency 'A -> B' between lock A in the held_locks and lock B. | ||
198 | They can be simply added whenever acquiring each lock. | ||
199 | |||
200 | And data required by lockdep exists in a local structure, held_locks | ||
201 | embedded in task_struct. Forcing to access the data within the context, | ||
202 | lockdep can avoid racy problems without explicit locks while handling | ||
203 | the local data. | ||
204 | |||
205 | Lastly, lockdep only needs to keep locks currently being held, to build | ||
206 | a dependency graph. However, relaxing the limitation, it needs to keep | ||
207 | even locks already released, because a decision whether they created | ||
208 | dependencies might be long-deferred. | ||
209 | |||
210 | To sum up, we can expect several advantages from the limitation: | ||
211 | |||
212 | 1. Lockdep can easily identify a dependency when acquiring a lock. | ||
213 | 2. Races are avoidable while accessing local locks in a held_locks. | ||
214 | 3. Lockdep only needs to keep locks currently being held. | ||
215 | |||
216 | CONCLUSION | ||
217 | |||
218 | Given the limitation, the implementation becomes simple and efficient. | ||
219 | |||
220 | |||
221 | Cons from the limitation | ||
222 | ------------------------ | ||
223 | |||
224 | Given the limitation, lockdep is applicable only to typical locks. For | ||
225 | example, page locks for page access or completions for synchronization | ||
226 | cannot work with lockdep. | ||
227 | |||
228 | Can we detect deadlocks below, under the limitation? | ||
229 | |||
230 | Example 1: | ||
231 | |||
232 | CONTEXT X CONTEXT Y CONTEXT Z | ||
233 | --------- --------- ---------- | ||
234 | mutex_lock A | ||
235 | lock_page B | ||
236 | lock_page B | ||
237 | mutex_lock A /* DEADLOCK */ | ||
238 | unlock_page B held by X | ||
239 | unlock_page B | ||
240 | mutex_unlock A | ||
241 | mutex_unlock A | ||
242 | |||
243 | where A and B are different lock classes. | ||
244 | |||
245 | No, we cannot. | ||
246 | |||
247 | Example 2: | ||
248 | |||
249 | CONTEXT X CONTEXT Y | ||
250 | --------- --------- | ||
251 | mutex_lock A | ||
252 | mutex_lock A | ||
253 | wait_for_complete B /* DEADLOCK */ | ||
254 | complete B | ||
255 | mutex_unlock A | ||
256 | mutex_unlock A | ||
257 | |||
258 | where A is a lock class and B is a completion variable. | ||
259 | |||
260 | No, we cannot. | ||
261 | |||
262 | CONCLUSION | ||
263 | |||
264 | Given the limitation, lockdep cannot detect a deadlock or its | ||
265 | possibility caused by page locks or completions. | ||
266 | |||
267 | |||
268 | Relax the limitation | ||
269 | -------------------- | ||
270 | |||
271 | Under the limitation, things to create dependencies are limited to | ||
272 | typical locks. However, synchronization primitives like page locks and | ||
273 | completions, which are allowed to be released in any context, also | ||
274 | create dependencies and can cause a deadlock. So lockdep should track | ||
275 | these locks to do a better job. We have to relax the limitation for | ||
276 | these locks to work with lockdep. | ||
277 | |||
278 | Detecting dependencies is very important for lockdep to work because | ||
279 | adding a dependency means adding an opportunity to check whether it | ||
280 | causes a deadlock. The more lockdep adds dependencies, the more it | ||
281 | thoroughly works. Thus Lockdep has to do its best to detect and add as | ||
282 | many true dependencies into a graph as possible. | ||
283 | |||
284 | For example, considering only typical locks, lockdep builds a graph like: | ||
285 | |||
286 | A -> B - | ||
287 | \ | ||
288 | -> E | ||
289 | / | ||
290 | C -> D - | ||
291 | |||
292 | where A, B,..., E are different lock classes. | ||
293 | |||
294 | On the other hand, under the relaxation, additional dependencies might | ||
295 | be created and added. Assuming additional 'FX -> C' and 'E -> GX' are | ||
296 | added thanks to the relaxation, the graph will be: | ||
297 | |||
298 | A -> B - | ||
299 | \ | ||
300 | -> E -> GX | ||
301 | / | ||
302 | FX -> C -> D - | ||
303 | |||
304 | where A, B,..., E, FX and GX are different lock classes, and a suffix | ||
305 | 'X' is added on non-typical locks. | ||
306 | |||
307 | The latter graph gives us more chances to check circular dependencies | ||
308 | than the former. However, it might suffer performance degradation since | ||
309 | relaxing the limitation, with which design and implementation of lockdep | ||
310 | can be efficient, might introduce inefficiency inevitably. So lockdep | ||
311 | should provide two options, strong detection and efficient detection. | ||
312 | |||
313 | Choosing efficient detection: | ||
314 | |||
315 | Lockdep works with only locks restricted to be released within the | ||
316 | acquire context. However, lockdep works efficiently. | ||
317 | |||
318 | Choosing strong detection: | ||
319 | |||
320 | Lockdep works with all synchronization primitives. However, lockdep | ||
321 | suffers performance degradation. | ||
322 | |||
323 | CONCLUSION | ||
324 | |||
325 | Relaxing the limitation, lockdep can add additional dependencies giving | ||
326 | additional opportunities to check circular dependencies. | ||
327 | |||
328 | |||
329 | ============ | ||
330 | Crossrelease | ||
331 | ============ | ||
332 | |||
333 | Introduce crossrelease | ||
334 | ---------------------- | ||
335 | |||
336 | In order to allow lockdep to handle additional dependencies by what | ||
337 | might be released in any context, namely 'crosslock', we have to be able | ||
338 | to identify those created by crosslocks. The proposed 'crossrelease' | ||
339 | feature provoides a way to do that. | ||
340 | |||
341 | Crossrelease feature has to do: | ||
342 | |||
343 | 1. Identify dependencies created by crosslocks. | ||
344 | 2. Add the dependencies into a dependency graph. | ||
345 | |||
346 | That's all. Once a meaningful dependency is added into graph, then | ||
347 | lockdep would work with the graph as it did. The most important thing | ||
348 | crossrelease feature has to do is to correctly identify and add true | ||
349 | dependencies into the global graph. | ||
350 | |||
351 | A dependency e.g. 'A -> B' can be identified only in the A's release | ||
352 | context because a decision required to identify the dependency can be | ||
353 | made only in the release context. That is to decide whether A can be | ||
354 | released so that a waiter for A can be woken up. It cannot be made in | ||
355 | other than the A's release context. | ||
356 | |||
357 | It's no matter for typical locks because each acquire context is same as | ||
358 | its release context, thus lockdep can decide whether a lock can be | ||
359 | released in the acquire context. However for crosslocks, lockdep cannot | ||
360 | make the decision in the acquire context but has to wait until the | ||
361 | release context is identified. | ||
362 | |||
363 | Therefore, deadlocks by crosslocks cannot be detected just when it | ||
364 | happens, because those cannot be identified until the crosslocks are | ||
365 | released. However, deadlock possibilities can be detected and it's very | ||
366 | worth. See 'APPENDIX A' section to check why. | ||
367 | |||
368 | CONCLUSION | ||
369 | |||
370 | Using crossrelease feature, lockdep can work with what might be released | ||
371 | in any context, namely crosslock. | ||
372 | |||
373 | |||
374 | Introduce commit | ||
375 | ---------------- | ||
376 | |||
377 | Since crossrelease defers the work adding true dependencies of | ||
378 | crosslocks until they are actually released, crossrelease has to queue | ||
379 | all acquisitions which might create dependencies with the crosslocks. | ||
380 | Then it identifies dependencies using the queued data in batches at a | ||
381 | proper time. We call it 'commit'. | ||
382 | |||
383 | There are four types of dependencies: | ||
384 | |||
385 | 1. TT type: 'typical lock A -> typical lock B' | ||
386 | |||
387 | Just when acquiring B, lockdep can see it's in the A's release | ||
388 | context. So the dependency between A and B can be identified | ||
389 | immediately. Commit is unnecessary. | ||
390 | |||
391 | 2. TC type: 'typical lock A -> crosslock BX' | ||
392 | |||
393 | Just when acquiring BX, lockdep can see it's in the A's release | ||
394 | context. So the dependency between A and BX can be identified | ||
395 | immediately. Commit is unnecessary, too. | ||
396 | |||
397 | 3. CT type: 'crosslock AX -> typical lock B' | ||
398 | |||
399 | When acquiring B, lockdep cannot identify the dependency because | ||
400 | there's no way to know if it's in the AX's release context. It has | ||
401 | to wait until the decision can be made. Commit is necessary. | ||
402 | |||
403 | 4. CC type: 'crosslock AX -> crosslock BX' | ||
404 | |||
405 | When acquiring BX, lockdep cannot identify the dependency because | ||
406 | there's no way to know if it's in the AX's release context. It has | ||
407 | to wait until the decision can be made. Commit is necessary. | ||
408 | But, handling CC type is not implemented yet. It's a future work. | ||
409 | |||
410 | Lockdep can work without commit for typical locks, but commit step is | ||
411 | necessary once crosslocks are involved. Introducing commit, lockdep | ||
412 | performs three steps. What lockdep does in each step is: | ||
413 | |||
414 | 1. Acquisition: For typical locks, lockdep does what it originally did | ||
415 | and queues the lock so that CT type dependencies can be checked using | ||
416 | it at the commit step. For crosslocks, it saves data which will be | ||
417 | used at the commit step and increases a reference count for it. | ||
418 | |||
419 | 2. Commit: No action is reauired for typical locks. For crosslocks, | ||
420 | lockdep adds CT type dependencies using the data saved at the | ||
421 | acquisition step. | ||
422 | |||
423 | 3. Release: No changes are required for typical locks. When a crosslock | ||
424 | is released, it decreases a reference count for it. | ||
425 | |||
426 | CONCLUSION | ||
427 | |||
428 | Crossrelease introduces commit step to handle dependencies of crosslocks | ||
429 | in batches at a proper time. | ||
430 | |||
431 | |||
432 | ============== | ||
433 | Implementation | ||
434 | ============== | ||
435 | |||
436 | Data structures | ||
437 | --------------- | ||
438 | |||
439 | Crossrelease introduces two main data structures. | ||
440 | |||
441 | 1. hist_lock | ||
442 | |||
443 | This is an array embedded in task_struct, for keeping lock history so | ||
444 | that dependencies can be added using them at the commit step. Since | ||
445 | it's local data, it can be accessed locklessly in the owner context. | ||
446 | The array is filled at the acquisition step and consumed at the | ||
447 | commit step. And it's managed in circular manner. | ||
448 | |||
449 | 2. cross_lock | ||
450 | |||
451 | One per lockdep_map exists. This is for keeping data of crosslocks | ||
452 | and used at the commit step. | ||
453 | |||
454 | |||
455 | How crossrelease works | ||
456 | ---------------------- | ||
457 | |||
458 | It's the key of how crossrelease works, to defer necessary works to an | ||
459 | appropriate point in time and perform in at once at the commit step. | ||
460 | Let's take a look with examples step by step, starting from how lockdep | ||
461 | works without crossrelease for typical locks. | ||
462 | |||
463 | acquire A /* Push A onto held_locks */ | ||
464 | acquire B /* Push B onto held_locks and add 'A -> B' */ | ||
465 | acquire C /* Push C onto held_locks and add 'B -> C' */ | ||
466 | release C /* Pop C from held_locks */ | ||
467 | release B /* Pop B from held_locks */ | ||
468 | release A /* Pop A from held_locks */ | ||
469 | |||
470 | where A, B and C are different lock classes. | ||
471 | |||
472 | NOTE: This document assumes that readers already understand how | ||
473 | lockdep works without crossrelease thus omits details. But there's | ||
474 | one thing to note. Lockdep pretends to pop a lock from held_locks | ||
475 | when releasing it. But it's subtly different from the original pop | ||
476 | operation because lockdep allows other than the top to be poped. | ||
477 | |||
478 | In this case, lockdep adds 'the top of held_locks -> the lock to acquire' | ||
479 | dependency every time acquiring a lock. | ||
480 | |||
481 | After adding 'A -> B', a dependency graph will be: | ||
482 | |||
483 | A -> B | ||
484 | |||
485 | where A and B are different lock classes. | ||
486 | |||
487 | And after adding 'B -> C', the graph will be: | ||
488 | |||
489 | A -> B -> C | ||
490 | |||
491 | where A, B and C are different lock classes. | ||
492 | |||
493 | Let's performs commit step even for typical locks to add dependencies. | ||
494 | Of course, commit step is not necessary for them, however, it would work | ||
495 | well because this is a more general way. | ||
496 | |||
497 | acquire A | ||
498 | /* | ||
499 | * Queue A into hist_locks | ||
500 | * | ||
501 | * In hist_locks: A | ||
502 | * In graph: Empty | ||
503 | */ | ||
504 | |||
505 | acquire B | ||
506 | /* | ||
507 | * Queue B into hist_locks | ||
508 | * | ||
509 | * In hist_locks: A, B | ||
510 | * In graph: Empty | ||
511 | */ | ||
512 | |||
513 | acquire C | ||
514 | /* | ||
515 | * Queue C into hist_locks | ||
516 | * | ||
517 | * In hist_locks: A, B, C | ||
518 | * In graph: Empty | ||
519 | */ | ||
520 | |||
521 | commit C | ||
522 | /* | ||
523 | * Add 'C -> ?' | ||
524 | * Answer the following to decide '?' | ||
525 | * What has been queued since acquire C: Nothing | ||
526 | * | ||
527 | * In hist_locks: A, B, C | ||
528 | * In graph: Empty | ||
529 | */ | ||
530 | |||
531 | release C | ||
532 | |||
533 | commit B | ||
534 | /* | ||
535 | * Add 'B -> ?' | ||
536 | * Answer the following to decide '?' | ||
537 | * What has been queued since acquire B: C | ||
538 | * | ||
539 | * In hist_locks: A, B, C | ||
540 | * In graph: 'B -> C' | ||
541 | */ | ||
542 | |||
543 | release B | ||
544 | |||
545 | commit A | ||
546 | /* | ||
547 | * Add 'A -> ?' | ||
548 | * Answer the following to decide '?' | ||
549 | * What has been queued since acquire A: B, C | ||
550 | * | ||
551 | * In hist_locks: A, B, C | ||
552 | * In graph: 'B -> C', 'A -> B', 'A -> C' | ||
553 | */ | ||
554 | |||
555 | release A | ||
556 | |||
557 | where A, B and C are different lock classes. | ||
558 | |||
559 | In this case, dependencies are added at the commit step as described. | ||
560 | |||
561 | After commits for A, B and C, the graph will be: | ||
562 | |||
563 | A -> B -> C | ||
564 | |||
565 | where A, B and C are different lock classes. | ||
566 | |||
567 | NOTE: A dependency 'A -> C' is optimized out. | ||
568 | |||
569 | We can see the former graph built without commit step is same as the | ||
570 | latter graph built using commit steps. Of course the former way leads to | ||
571 | earlier finish for building the graph, which means we can detect a | ||
572 | deadlock or its possibility sooner. So the former way would be prefered | ||
573 | when possible. But we cannot avoid using the latter way for crosslocks. | ||
574 | |||
575 | Let's look at how commit steps work for crosslocks. In this case, the | ||
576 | commit step is performed only on crosslock AX as real. And it assumes | ||
577 | that the AX release context is different from the AX acquire context. | ||
578 | |||
579 | BX RELEASE CONTEXT BX ACQUIRE CONTEXT | ||
580 | ------------------ ------------------ | ||
581 | acquire A | ||
582 | /* | ||
583 | * Push A onto held_locks | ||
584 | * Queue A into hist_locks | ||
585 | * | ||
586 | * In held_locks: A | ||
587 | * In hist_locks: A | ||
588 | * In graph: Empty | ||
589 | */ | ||
590 | |||
591 | acquire BX | ||
592 | /* | ||
593 | * Add 'the top of held_locks -> BX' | ||
594 | * | ||
595 | * In held_locks: A | ||
596 | * In hist_locks: A | ||
597 | * In graph: 'A -> BX' | ||
598 | */ | ||
599 | |||
600 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
601 | It must be guaranteed that the following operations are seen after | ||
602 | acquiring BX globally. It can be done by things like barrier. | ||
603 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
604 | |||
605 | acquire C | ||
606 | /* | ||
607 | * Push C onto held_locks | ||
608 | * Queue C into hist_locks | ||
609 | * | ||
610 | * In held_locks: C | ||
611 | * In hist_locks: C | ||
612 | * In graph: 'A -> BX' | ||
613 | */ | ||
614 | |||
615 | release C | ||
616 | /* | ||
617 | * Pop C from held_locks | ||
618 | * | ||
619 | * In held_locks: Empty | ||
620 | * In hist_locks: C | ||
621 | * In graph: 'A -> BX' | ||
622 | */ | ||
623 | acquire D | ||
624 | /* | ||
625 | * Push D onto held_locks | ||
626 | * Queue D into hist_locks | ||
627 | * Add 'the top of held_locks -> D' | ||
628 | * | ||
629 | * In held_locks: A, D | ||
630 | * In hist_locks: A, D | ||
631 | * In graph: 'A -> BX', 'A -> D' | ||
632 | */ | ||
633 | acquire E | ||
634 | /* | ||
635 | * Push E onto held_locks | ||
636 | * Queue E into hist_locks | ||
637 | * | ||
638 | * In held_locks: E | ||
639 | * In hist_locks: C, E | ||
640 | * In graph: 'A -> BX', 'A -> D' | ||
641 | */ | ||
642 | |||
643 | release E | ||
644 | /* | ||
645 | * Pop E from held_locks | ||
646 | * | ||
647 | * In held_locks: Empty | ||
648 | * In hist_locks: D, E | ||
649 | * In graph: 'A -> BX', 'A -> D' | ||
650 | */ | ||
651 | release D | ||
652 | /* | ||
653 | * Pop D from held_locks | ||
654 | * | ||
655 | * In held_locks: A | ||
656 | * In hist_locks: A, D | ||
657 | * In graph: 'A -> BX', 'A -> D' | ||
658 | */ | ||
659 | commit BX | ||
660 | /* | ||
661 | * Add 'BX -> ?' | ||
662 | * What has been queued since acquire BX: C, E | ||
663 | * | ||
664 | * In held_locks: Empty | ||
665 | * In hist_locks: D, E | ||
666 | * In graph: 'A -> BX', 'A -> D', | ||
667 | * 'BX -> C', 'BX -> E' | ||
668 | */ | ||
669 | |||
670 | release BX | ||
671 | /* | ||
672 | * In held_locks: Empty | ||
673 | * In hist_locks: D, E | ||
674 | * In graph: 'A -> BX', 'A -> D', | ||
675 | * 'BX -> C', 'BX -> E' | ||
676 | */ | ||
677 | release A | ||
678 | /* | ||
679 | * Pop A from held_locks | ||
680 | * | ||
681 | * In held_locks: Empty | ||
682 | * In hist_locks: A, D | ||
683 | * In graph: 'A -> BX', 'A -> D', | ||
684 | * 'BX -> C', 'BX -> E' | ||
685 | */ | ||
686 | |||
687 | where A, BX, C,..., E are different lock classes, and a suffix 'X' is | ||
688 | added on crosslocks. | ||
689 | |||
690 | Crossrelease considers all acquisitions after acqiuring BX are | ||
691 | candidates which might create dependencies with BX. True dependencies | ||
692 | will be determined when identifying the release context of BX. Meanwhile, | ||
693 | all typical locks are queued so that they can be used at the commit step. | ||
694 | And then two dependencies 'BX -> C' and 'BX -> E' are added at the | ||
695 | commit step when identifying the release context. | ||
696 | |||
697 | The final graph will be, with crossrelease: | ||
698 | |||
699 | -> C | ||
700 | / | ||
701 | -> BX - | ||
702 | / \ | ||
703 | A - -> E | ||
704 | \ | ||
705 | -> D | ||
706 | |||
707 | where A, BX, C,..., E are different lock classes, and a suffix 'X' is | ||
708 | added on crosslocks. | ||
709 | |||
710 | However, the final graph will be, without crossrelease: | ||
711 | |||
712 | A -> D | ||
713 | |||
714 | where A and D are different lock classes. | ||
715 | |||
716 | The former graph has three more dependencies, 'A -> BX', 'BX -> C' and | ||
717 | 'BX -> E' giving additional opportunities to check if they cause | ||
718 | deadlocks. This way lockdep can detect a deadlock or its possibility | ||
719 | caused by crosslocks. | ||
720 | |||
721 | CONCLUSION | ||
722 | |||
723 | We checked how crossrelease works with several examples. | ||
724 | |||
725 | |||
726 | ============= | ||
727 | Optimizations | ||
728 | ============= | ||
729 | |||
730 | Avoid duplication | ||
731 | ----------------- | ||
732 | |||
733 | Crossrelease feature uses a cache like what lockdep already uses for | ||
734 | dependency chains, but this time it's for caching CT type dependencies. | ||
735 | Once that dependency is cached, the same will never be added again. | ||
736 | |||
737 | |||
738 | Lockless for hot paths | ||
739 | ---------------------- | ||
740 | |||
741 | To keep all locks for later use at the commit step, crossrelease adopts | ||
742 | a local array embedded in task_struct, which makes access to the data | ||
743 | lockless by forcing it to happen only within the owner context. It's | ||
744 | like how lockdep handles held_locks. Lockless implmentation is important | ||
745 | since typical locks are very frequently acquired and released. | ||
746 | |||
747 | |||
748 | ================================================= | ||
749 | APPENDIX A: What lockdep does to work aggresively | ||
750 | ================================================= | ||
751 | |||
752 | A deadlock actually occurs when all wait operations creating circular | ||
753 | dependencies run at the same time. Even though they don't, a potential | ||
754 | deadlock exists if the problematic dependencies exist. Thus it's | ||
755 | meaningful to detect not only an actual deadlock but also its potential | ||
756 | possibility. The latter is rather valuable. When a deadlock occurs | ||
757 | actually, we can identify what happens in the system by some means or | ||
758 | other even without lockdep. However, there's no way to detect possiblity | ||
759 | without lockdep unless the whole code is parsed in head. It's terrible. | ||
760 | Lockdep does the both, and crossrelease only focuses on the latter. | ||
761 | |||
762 | Whether or not a deadlock actually occurs depends on several factors. | ||
763 | For example, what order contexts are switched in is a factor. Assuming | ||
764 | circular dependencies exist, a deadlock would occur when contexts are | ||
765 | switched so that all wait operations creating the dependencies run | ||
766 | simultaneously. Thus to detect a deadlock possibility even in the case | ||
767 | that it has not occured yet, lockdep should consider all possible | ||
768 | combinations of dependencies, trying to: | ||
769 | |||
770 | 1. Use a global dependency graph. | ||
771 | |||
772 | Lockdep combines all dependencies into one global graph and uses them, | ||
773 | regardless of which context generates them or what order contexts are | ||
774 | switched in. Aggregated dependencies are only considered so they are | ||
775 | prone to be circular if a problem exists. | ||
776 | |||
777 | 2. Check dependencies between classes instead of instances. | ||
778 | |||
779 | What actually causes a deadlock are instances of lock. However, | ||
780 | lockdep checks dependencies between classes instead of instances. | ||
781 | This way lockdep can detect a deadlock which has not happened but | ||
782 | might happen in future by others but the same class. | ||
783 | |||
784 | 3. Assume all acquisitions lead to waiting. | ||
785 | |||
786 | Although locks might be acquired without waiting which is essential | ||
787 | to create dependencies, lockdep assumes all acquisitions lead to | ||
788 | waiting since it might be true some time or another. | ||
789 | |||
790 | CONCLUSION | ||
791 | |||
792 | Lockdep detects not only an actual deadlock but also its possibility, | ||
793 | and the latter is more valuable. | ||
794 | |||
795 | |||
796 | ================================================== | ||
797 | APPENDIX B: How to avoid adding false dependencies | ||
798 | ================================================== | ||
799 | |||
800 | Remind what a dependency is. A dependency exists if: | ||
801 | |||
802 | 1. There are two waiters waiting for each event at a given time. | ||
803 | 2. The only way to wake up each waiter is to trigger its event. | ||
804 | 3. Whether one can be woken up depends on whether the other can. | ||
805 | |||
806 | For example: | ||
807 | |||
808 | acquire A | ||
809 | acquire B /* A dependency 'A -> B' exists */ | ||
810 | release B | ||
811 | release A | ||
812 | |||
813 | where A and B are different lock classes. | ||
814 | |||
815 | A depedency 'A -> B' exists since: | ||
816 | |||
817 | 1. A waiter for A and a waiter for B might exist when acquiring B. | ||
818 | 2. Only way to wake up each is to release what it waits for. | ||
819 | 3. Whether the waiter for A can be woken up depends on whether the | ||
820 | other can. IOW, TASK X cannot release A if it fails to acquire B. | ||
821 | |||
822 | For another example: | ||
823 | |||
824 | TASK X TASK Y | ||
825 | ------ ------ | ||
826 | acquire AX | ||
827 | acquire B /* A dependency 'AX -> B' exists */ | ||
828 | release B | ||
829 | release AX held by Y | ||
830 | |||
831 | where AX and B are different lock classes, and a suffix 'X' is added | ||
832 | on crosslocks. | ||
833 | |||
834 | Even in this case involving crosslocks, the same rule can be applied. A | ||
835 | depedency 'AX -> B' exists since: | ||
836 | |||
837 | 1. A waiter for AX and a waiter for B might exist when acquiring B. | ||
838 | 2. Only way to wake up each is to release what it waits for. | ||
839 | 3. Whether the waiter for AX can be woken up depends on whether the | ||
840 | other can. IOW, TASK X cannot release AX if it fails to acquire B. | ||
841 | |||
842 | Let's take a look at more complicated example: | ||
843 | |||
844 | TASK X TASK Y | ||
845 | ------ ------ | ||
846 | acquire B | ||
847 | release B | ||
848 | fork Y | ||
849 | acquire AX | ||
850 | acquire C /* A dependency 'AX -> C' exists */ | ||
851 | release C | ||
852 | release AX held by Y | ||
853 | |||
854 | where AX, B and C are different lock classes, and a suffix 'X' is | ||
855 | added on crosslocks. | ||
856 | |||
857 | Does a dependency 'AX -> B' exist? Nope. | ||
858 | |||
859 | Two waiters are essential to create a dependency. However, waiters for | ||
860 | AX and B to create 'AX -> B' cannot exist at the same time in this | ||
861 | example. Thus the dependency 'AX -> B' cannot be created. | ||
862 | |||
863 | It would be ideal if the full set of true ones can be considered. But | ||
864 | we can ensure nothing but what actually happened. Relying on what | ||
865 | actually happens at runtime, we can anyway add only true ones, though | ||
866 | they might be a subset of true ones. It's similar to how lockdep works | ||
867 | for typical locks. There might be more true dependencies than what | ||
868 | lockdep has detected in runtime. Lockdep has no choice but to rely on | ||
869 | what actually happens. Crossrelease also relies on it. | ||
870 | |||
871 | CONCLUSION | ||
872 | |||
873 | Relying on what actually happens, lockdep can avoid adding false | ||
874 | dependencies. | ||
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index e2ee0a1c299a..b759a60624fd 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
@@ -498,11 +498,11 @@ And a couple of implicit varieties: | |||
498 | This means that ACQUIRE acts as a minimal "acquire" operation and | 498 | This means that ACQUIRE acts as a minimal "acquire" operation and |
499 | RELEASE acts as a minimal "release" operation. | 499 | RELEASE acts as a minimal "release" operation. |
500 | 500 | ||
501 | A subset of the atomic operations described in core-api/atomic_ops.rst have | 501 | A subset of the atomic operations described in atomic_t.txt have ACQUIRE and |
502 | ACQUIRE and RELEASE variants in addition to fully-ordered and relaxed (no | 502 | RELEASE variants in addition to fully-ordered and relaxed (no barrier |
503 | barrier semantics) definitions. For compound atomics performing both a load | 503 | semantics) definitions. For compound atomics performing both a load and a |
504 | and a store, ACQUIRE semantics apply only to the load and RELEASE semantics | 504 | store, ACQUIRE semantics apply only to the load and RELEASE semantics apply |
505 | apply only to the store portion of the operation. | 505 | only to the store portion of the operation. |
506 | 506 | ||
507 | Memory barriers are only required where there's a possibility of interaction | 507 | Memory barriers are only required where there's a possibility of interaction |
508 | between two CPUs or between a CPU and a device. If it can be guaranteed that | 508 | between two CPUs or between a CPU and a device. If it can be guaranteed that |
@@ -1883,8 +1883,7 @@ There are some more advanced barrier functions: | |||
1883 | This makes sure that the death mark on the object is perceived to be set | 1883 | This makes sure that the death mark on the object is perceived to be set |
1884 | *before* the reference counter is decremented. | 1884 | *before* the reference counter is decremented. |
1885 | 1885 | ||
1886 | See Documentation/core-api/atomic_ops.rst for more information. See the | 1886 | See Documentation/atomic_{t,bitops}.txt for more information. |
1887 | "Atomic operations" subsection for information on where to use these. | ||
1888 | 1887 | ||
1889 | 1888 | ||
1890 | (*) lockless_dereference(); | 1889 | (*) lockless_dereference(); |
@@ -1989,10 +1988,7 @@ for each construct. These operations all imply certain barriers: | |||
1989 | ACQUIRE operation has completed. | 1988 | ACQUIRE operation has completed. |
1990 | 1989 | ||
1991 | Memory operations issued before the ACQUIRE may be completed after | 1990 | Memory operations issued before the ACQUIRE may be completed after |
1992 | the ACQUIRE operation has completed. An smp_mb__before_spinlock(), | 1991 | the ACQUIRE operation has completed. |
1993 | combined with a following ACQUIRE, orders prior stores against | ||
1994 | subsequent loads and stores. Note that this is weaker than smp_mb()! | ||
1995 | The smp_mb__before_spinlock() primitive is free on many architectures. | ||
1996 | 1992 | ||
1997 | (2) RELEASE operation implication: | 1993 | (2) RELEASE operation implication: |
1998 | 1994 | ||
@@ -2510,88 +2506,7 @@ operations are noted specially as some of them imply full memory barriers and | |||
2510 | some don't, but they're very heavily relied on as a group throughout the | 2506 | some don't, but they're very heavily relied on as a group throughout the |
2511 | kernel. | 2507 | kernel. |
2512 | 2508 | ||
2513 | Any atomic operation that modifies some state in memory and returns information | 2509 | See Documentation/atomic_t.txt for more information. |
2514 | about the state (old or new) implies an SMP-conditional general memory barrier | ||
2515 | (smp_mb()) on each side of the actual operation (with the exception of | ||
2516 | explicit lock operations, described later). These include: | ||
2517 | |||
2518 | xchg(); | ||
2519 | atomic_xchg(); atomic_long_xchg(); | ||
2520 | atomic_inc_return(); atomic_long_inc_return(); | ||
2521 | atomic_dec_return(); atomic_long_dec_return(); | ||
2522 | atomic_add_return(); atomic_long_add_return(); | ||
2523 | atomic_sub_return(); atomic_long_sub_return(); | ||
2524 | atomic_inc_and_test(); atomic_long_inc_and_test(); | ||
2525 | atomic_dec_and_test(); atomic_long_dec_and_test(); | ||
2526 | atomic_sub_and_test(); atomic_long_sub_and_test(); | ||
2527 | atomic_add_negative(); atomic_long_add_negative(); | ||
2528 | test_and_set_bit(); | ||
2529 | test_and_clear_bit(); | ||
2530 | test_and_change_bit(); | ||
2531 | |||
2532 | /* when succeeds */ | ||
2533 | cmpxchg(); | ||
2534 | atomic_cmpxchg(); atomic_long_cmpxchg(); | ||
2535 | atomic_add_unless(); atomic_long_add_unless(); | ||
2536 | |||
2537 | These are used for such things as implementing ACQUIRE-class and RELEASE-class | ||
2538 | operations and adjusting reference counters towards object destruction, and as | ||
2539 | such the implicit memory barrier effects are necessary. | ||
2540 | |||
2541 | |||
2542 | The following operations are potential problems as they do _not_ imply memory | ||
2543 | barriers, but might be used for implementing such things as RELEASE-class | ||
2544 | operations: | ||
2545 | |||
2546 | atomic_set(); | ||
2547 | set_bit(); | ||
2548 | clear_bit(); | ||
2549 | change_bit(); | ||
2550 | |||
2551 | With these the appropriate explicit memory barrier should be used if necessary | ||
2552 | (smp_mb__before_atomic() for instance). | ||
2553 | |||
2554 | |||
2555 | The following also do _not_ imply memory barriers, and so may require explicit | ||
2556 | memory barriers under some circumstances (smp_mb__before_atomic() for | ||
2557 | instance): | ||
2558 | |||
2559 | atomic_add(); | ||
2560 | atomic_sub(); | ||
2561 | atomic_inc(); | ||
2562 | atomic_dec(); | ||
2563 | |||
2564 | If they're used for statistics generation, then they probably don't need memory | ||
2565 | barriers, unless there's a coupling between statistical data. | ||
2566 | |||
2567 | If they're used for reference counting on an object to control its lifetime, | ||
2568 | they probably don't need memory barriers because either the reference count | ||
2569 | will be adjusted inside a locked section, or the caller will already hold | ||
2570 | sufficient references to make the lock, and thus a memory barrier unnecessary. | ||
2571 | |||
2572 | If they're used for constructing a lock of some description, then they probably | ||
2573 | do need memory barriers as a lock primitive generally has to do things in a | ||
2574 | specific order. | ||
2575 | |||
2576 | Basically, each usage case has to be carefully considered as to whether memory | ||
2577 | barriers are needed or not. | ||
2578 | |||
2579 | The following operations are special locking primitives: | ||
2580 | |||
2581 | test_and_set_bit_lock(); | ||
2582 | clear_bit_unlock(); | ||
2583 | __clear_bit_unlock(); | ||
2584 | |||
2585 | These implement ACQUIRE-class and RELEASE-class operations. These should be | ||
2586 | used in preference to other operations when implementing locking primitives, | ||
2587 | because their implementations can be optimised on many architectures. | ||
2588 | |||
2589 | [!] Note that special memory barrier primitives are available for these | ||
2590 | situations because on some CPUs the atomic instructions used imply full memory | ||
2591 | barriers, and so barrier instructions are superfluous in conjunction with them, | ||
2592 | and in such cases the special barrier primitives will be no-ops. | ||
2593 | |||
2594 | See Documentation/core-api/atomic_ops.rst for more information. | ||
2595 | 2510 | ||
2596 | 2511 | ||
2597 | ACCESSING DEVICES | 2512 | ACCESSING DEVICES |
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index b83dfa1c0602..ab16efe0c79d 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt | |||
@@ -149,6 +149,26 @@ static_branch_inc(), will change the branch back to true. Likewise, if the | |||
149 | key is initialized false, a 'static_branch_inc()', will change the branch to | 149 | key is initialized false, a 'static_branch_inc()', will change the branch to |
150 | true. And then a 'static_branch_dec()', will again make the branch false. | 150 | true. And then a 'static_branch_dec()', will again make the branch false. |
151 | 151 | ||
152 | The state and the reference count can be retrieved with 'static_key_enabled()' | ||
153 | and 'static_key_count()'. In general, if you use these functions, they | ||
154 | should be protected with the same mutex used around the enable/disable | ||
155 | or increment/decrement function. | ||
156 | |||
157 | Note that switching branches results in some locks being taken, | ||
158 | particularly the CPU hotplug lock (in order to avoid races against | ||
159 | CPUs being brought in the kernel whilst the kernel is getting | ||
160 | patched). Calling the static key API from within a hotplug notifier is | ||
161 | thus a sure deadlock recipe. In order to still allow use of the | ||
162 | functionnality, the following functions are provided: | ||
163 | |||
164 | static_key_enable_cpuslocked() | ||
165 | static_key_disable_cpuslocked() | ||
166 | static_branch_enable_cpuslocked() | ||
167 | static_branch_disable_cpuslocked() | ||
168 | |||
169 | These functions are *not* general purpose, and must only be used when | ||
170 | you really know that you're in the above context, and no other. | ||
171 | |||
152 | Where an array of keys is required, it can be defined as:: | 172 | Where an array of keys is required, it can be defined as:: |
153 | 173 | ||
154 | DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); | 174 | DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); |
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 38310dcd6620..bc80fc0e210f 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt | |||
@@ -1956,10 +1956,7 @@ MMIO 쓰기 배리어 | |||
1956 | 뒤에 완료됩니다. | 1956 | 뒤에 완료됩니다. |
1957 | 1957 | ||
1958 | ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에 | 1958 | ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에 |
1959 | 완료될 수 있습니다. smp_mb__before_spinlock() 뒤에 ACQUIRE 가 실행되는 | 1959 | 완료될 수 있습니다. |
1960 | 코드 블록은 블록 앞의 스토어를 블록 뒤의 로드와 스토어에 대해 순서 | ||
1961 | 맞춥니다. 이건 smp_mb() 보다 완화된 것임을 기억하세요! 많은 아키텍쳐에서 | ||
1962 | smp_mb__before_spinlock() 은 사실 아무일도 하지 않습니다. | ||
1963 | 1960 | ||
1964 | (2) RELEASE 오퍼레이션의 영향: | 1961 | (2) RELEASE 오퍼레이션의 영향: |
1965 | 1962 | ||
diff --git a/arch/Kconfig b/arch/Kconfig index 21d0089117fe..2520ca5b42eb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -931,6 +931,18 @@ config STRICT_MODULE_RWX | |||
931 | config ARCH_WANT_RELAX_ORDER | 931 | config ARCH_WANT_RELAX_ORDER |
932 | bool | 932 | bool |
933 | 933 | ||
934 | config ARCH_HAS_REFCOUNT | ||
935 | bool | ||
936 | help | ||
937 | An architecture selects this when it has implemented refcount_t | ||
938 | using open coded assembly primitives that provide an optimized | ||
939 | refcount_t implementation, possibly at the expense of some full | ||
940 | refcount state checks of CONFIG_REFCOUNT_FULL=y. | ||
941 | |||
942 | The refcount overflow check behavior, however, must be retained. | ||
943 | Catching overflows is the primary security concern for protecting | ||
944 | against bugs in reference counts. | ||
945 | |||
934 | config REFCOUNT_FULL | 946 | config REFCOUNT_FULL |
935 | bool "Perform full reference count validation at the expense of speed" | 947 | bool "Perform full reference count validation at the expense of speed" |
936 | help | 948 | help |
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h index fb01dfb760c2..05a70edd57b6 100644 --- a/arch/alpha/include/asm/futex.h +++ b/arch/alpha/include/asm/futex.h | |||
@@ -25,18 +25,10 @@ | |||
25 | : "r" (uaddr), "r"(oparg) \ | 25 | : "r" (uaddr), "r"(oparg) \ |
26 | : "memory") | 26 | : "memory") |
27 | 27 | ||
28 | static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | 28 | static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
29 | u32 __user *uaddr) | ||
29 | { | 30 | { |
30 | int op = (encoded_op >> 28) & 7; | ||
31 | int cmp = (encoded_op >> 24) & 15; | ||
32 | int oparg = (encoded_op << 8) >> 20; | ||
33 | int cmparg = (encoded_op << 20) >> 20; | ||
34 | int oldval = 0, ret; | 31 | int oldval = 0, ret; |
35 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
36 | oparg = 1 << oparg; | ||
37 | |||
38 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
39 | return -EFAULT; | ||
40 | 32 | ||
41 | pagefault_disable(); | 33 | pagefault_disable(); |
42 | 34 | ||
@@ -62,17 +54,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
62 | 54 | ||
63 | pagefault_enable(); | 55 | pagefault_enable(); |
64 | 56 | ||
65 | if (!ret) { | 57 | if (!ret) |
66 | switch (cmp) { | 58 | *oval = oldval; |
67 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 59 | |
68 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
69 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
70 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
71 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
72 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
73 | default: ret = -ENOSYS; | ||
74 | } | ||
75 | } | ||
76 | return ret; | 60 | return ret; |
77 | } | 61 | } |
78 | 62 | ||
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 54b54da6384c..11859287c52a 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h | |||
@@ -123,6 +123,8 @@ static inline void atomic_set(atomic_t *v, int i) | |||
123 | atomic_ops_unlock(flags); | 123 | atomic_ops_unlock(flags); |
124 | } | 124 | } |
125 | 125 | ||
126 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
127 | |||
126 | #endif | 128 | #endif |
127 | 129 | ||
128 | /* | 130 | /* |
diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h index 11e1b1f3acda..eb887dd13e74 100644 --- a/arch/arc/include/asm/futex.h +++ b/arch/arc/include/asm/futex.h | |||
@@ -73,20 +73,11 @@ | |||
73 | 73 | ||
74 | #endif | 74 | #endif |
75 | 75 | ||
76 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 76 | static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
77 | u32 __user *uaddr) | ||
77 | { | 78 | { |
78 | int op = (encoded_op >> 28) & 7; | ||
79 | int cmp = (encoded_op >> 24) & 15; | ||
80 | int oparg = (encoded_op << 8) >> 20; | ||
81 | int cmparg = (encoded_op << 20) >> 20; | ||
82 | int oldval = 0, ret; | 79 | int oldval = 0, ret; |
83 | 80 | ||
84 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
85 | oparg = 1 << oparg; | ||
86 | |||
87 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) | ||
88 | return -EFAULT; | ||
89 | |||
90 | #ifndef CONFIG_ARC_HAS_LLSC | 81 | #ifndef CONFIG_ARC_HAS_LLSC |
91 | preempt_disable(); /* to guarantee atomic r-m-w of futex op */ | 82 | preempt_disable(); /* to guarantee atomic r-m-w of futex op */ |
92 | #endif | 83 | #endif |
@@ -118,30 +109,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
118 | preempt_enable(); | 109 | preempt_enable(); |
119 | #endif | 110 | #endif |
120 | 111 | ||
121 | if (!ret) { | 112 | if (!ret) |
122 | switch (cmp) { | 113 | *oval = oldval; |
123 | case FUTEX_OP_CMP_EQ: | 114 | |
124 | ret = (oldval == cmparg); | ||
125 | break; | ||
126 | case FUTEX_OP_CMP_NE: | ||
127 | ret = (oldval != cmparg); | ||
128 | break; | ||
129 | case FUTEX_OP_CMP_LT: | ||
130 | ret = (oldval < cmparg); | ||
131 | break; | ||
132 | case FUTEX_OP_CMP_GE: | ||
133 | ret = (oldval >= cmparg); | ||
134 | break; | ||
135 | case FUTEX_OP_CMP_LE: | ||
136 | ret = (oldval <= cmparg); | ||
137 | break; | ||
138 | case FUTEX_OP_CMP_GT: | ||
139 | ret = (oldval > cmparg); | ||
140 | break; | ||
141 | default: | ||
142 | ret = -ENOSYS; | ||
143 | } | ||
144 | } | ||
145 | return ret; | 115 | return ret; |
146 | } | 116 | } |
147 | 117 | ||
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 6795368ad023..cc414382dab4 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h | |||
@@ -128,20 +128,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
128 | #endif /* !SMP */ | 128 | #endif /* !SMP */ |
129 | 129 | ||
130 | static inline int | 130 | static inline int |
131 | futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | 131 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
132 | { | 132 | { |
133 | int op = (encoded_op >> 28) & 7; | ||
134 | int cmp = (encoded_op >> 24) & 15; | ||
135 | int oparg = (encoded_op << 8) >> 20; | ||
136 | int cmparg = (encoded_op << 20) >> 20; | ||
137 | int oldval = 0, ret, tmp; | 133 | int oldval = 0, ret, tmp; |
138 | 134 | ||
139 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
140 | oparg = 1 << oparg; | ||
141 | |||
142 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
143 | return -EFAULT; | ||
144 | |||
145 | #ifndef CONFIG_SMP | 135 | #ifndef CONFIG_SMP |
146 | preempt_disable(); | 136 | preempt_disable(); |
147 | #endif | 137 | #endif |
@@ -172,17 +162,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
172 | preempt_enable(); | 162 | preempt_enable(); |
173 | #endif | 163 | #endif |
174 | 164 | ||
175 | if (!ret) { | 165 | if (!ret) |
176 | switch (cmp) { | 166 | *oval = oldval; |
177 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 167 | |
178 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
179 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
180 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
181 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
182 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
183 | default: ret = -ENOSYS; | ||
184 | } | ||
185 | } | ||
186 | return ret; | 168 | return ret; |
187 | } | 169 | } |
188 | 170 | ||
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index f32b42e8725d..5bb2fd4674e7 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h | |||
@@ -48,20 +48,10 @@ do { \ | |||
48 | } while (0) | 48 | } while (0) |
49 | 49 | ||
50 | static inline int | 50 | static inline int |
51 | futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) | 51 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
52 | { | 52 | { |
53 | int op = (encoded_op >> 28) & 7; | ||
54 | int cmp = (encoded_op >> 24) & 15; | ||
55 | int oparg = (int)(encoded_op << 8) >> 20; | ||
56 | int cmparg = (int)(encoded_op << 20) >> 20; | ||
57 | int oldval = 0, ret, tmp; | 53 | int oldval = 0, ret, tmp; |
58 | 54 | ||
59 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
60 | oparg = 1U << (oparg & 0x1f); | ||
61 | |||
62 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
63 | return -EFAULT; | ||
64 | |||
65 | pagefault_disable(); | 55 | pagefault_disable(); |
66 | 56 | ||
67 | switch (op) { | 57 | switch (op) { |
@@ -91,17 +81,9 @@ futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) | |||
91 | 81 | ||
92 | pagefault_enable(); | 82 | pagefault_enable(); |
93 | 83 | ||
94 | if (!ret) { | 84 | if (!ret) |
95 | switch (cmp) { | 85 | *oval = oldval; |
96 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 86 | |
97 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
98 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
99 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
100 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
101 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
102 | default: ret = -ENOSYS; | ||
103 | } | ||
104 | } | ||
105 | return ret; | 87 | return ret; |
106 | } | 88 | } |
107 | 89 | ||
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index f445bd7f2b9f..95ad7102b63c 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h | |||
@@ -310,14 +310,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) | |||
310 | #define arch_read_relax(lock) cpu_relax() | 310 | #define arch_read_relax(lock) cpu_relax() |
311 | #define arch_write_relax(lock) cpu_relax() | 311 | #define arch_write_relax(lock) cpu_relax() |
312 | 312 | ||
313 | /* | 313 | /* See include/linux/spinlock.h */ |
314 | * Accesses appearing in program order before a spin_lock() operation | 314 | #define smp_mb__after_spinlock() smp_mb() |
315 | * can be reordered with accesses inside the critical section, by virtue | ||
316 | * of arch_spin_lock being constructed using acquire semantics. | ||
317 | * | ||
318 | * In cases where this is problematic (e.g. try_to_wake_up), an | ||
319 | * smp_mb__before_spinlock() can restore the required ordering. | ||
320 | */ | ||
321 | #define smp_mb__before_spinlock() smp_mb() | ||
322 | 315 | ||
323 | #endif /* __ASM_SPINLOCK_H */ | 316 | #endif /* __ASM_SPINLOCK_H */ |
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h index 2e1da71e27a4..ab346f5f8820 100644 --- a/arch/frv/include/asm/futex.h +++ b/arch/frv/include/asm/futex.h | |||
@@ -7,7 +7,8 @@ | |||
7 | #include <asm/errno.h> | 7 | #include <asm/errno.h> |
8 | #include <linux/uaccess.h> | 8 | #include <linux/uaccess.h> |
9 | 9 | ||
10 | extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr); | 10 | extern int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
11 | u32 __user *uaddr); | ||
11 | 12 | ||
12 | static inline int | 13 | static inline int |
13 | futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | 14 | futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, |
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c index d155ca9e5098..37f7b2bf7f73 100644 --- a/arch/frv/kernel/futex.c +++ b/arch/frv/kernel/futex.c | |||
@@ -186,20 +186,10 @@ static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_o | |||
186 | /* | 186 | /* |
187 | * do the futex operations | 187 | * do the futex operations |
188 | */ | 188 | */ |
189 | int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 189 | int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
190 | { | 190 | { |
191 | int op = (encoded_op >> 28) & 7; | ||
192 | int cmp = (encoded_op >> 24) & 15; | ||
193 | int oparg = (encoded_op << 8) >> 20; | ||
194 | int cmparg = (encoded_op << 20) >> 20; | ||
195 | int oldval = 0, ret; | 191 | int oldval = 0, ret; |
196 | 192 | ||
197 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
198 | oparg = 1 << oparg; | ||
199 | |||
200 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
201 | return -EFAULT; | ||
202 | |||
203 | pagefault_disable(); | 193 | pagefault_disable(); |
204 | 194 | ||
205 | switch (op) { | 195 | switch (op) { |
@@ -225,18 +215,9 @@ int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
225 | 215 | ||
226 | pagefault_enable(); | 216 | pagefault_enable(); |
227 | 217 | ||
228 | if (!ret) { | 218 | if (!ret) |
229 | switch (cmp) { | 219 | *oval = oldval; |
230 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | ||
231 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
232 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
233 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
234 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
235 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
236 | default: ret = -ENOSYS; break; | ||
237 | } | ||
238 | } | ||
239 | 220 | ||
240 | return ret; | 221 | return ret; |
241 | 222 | ||
242 | } /* end futex_atomic_op_inuser() */ | 223 | } /* end arch_futex_atomic_op_inuser() */ |
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index a62ba368b27d..fb3dfb2a667e 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h | |||
@@ -42,6 +42,8 @@ static inline void atomic_set(atomic_t *v, int new) | |||
42 | ); | 42 | ); |
43 | } | 43 | } |
44 | 44 | ||
45 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
46 | |||
45 | /** | 47 | /** |
46 | * atomic_read - reads a word, atomically | 48 | * atomic_read - reads a word, atomically |
47 | * @v: pointer to atomic value | 49 | * @v: pointer to atomic value |
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index 7e597f8434da..c607b77c8215 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h | |||
@@ -31,18 +31,9 @@ | |||
31 | 31 | ||
32 | 32 | ||
33 | static inline int | 33 | static inline int |
34 | futex_atomic_op_inuser(int encoded_op, int __user *uaddr) | 34 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
35 | { | 35 | { |
36 | int op = (encoded_op >> 28) & 7; | ||
37 | int cmp = (encoded_op >> 24) & 15; | ||
38 | int oparg = (encoded_op << 8) >> 20; | ||
39 | int cmparg = (encoded_op << 20) >> 20; | ||
40 | int oldval = 0, ret; | 36 | int oldval = 0, ret; |
41 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
42 | oparg = 1 << oparg; | ||
43 | |||
44 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) | ||
45 | return -EFAULT; | ||
46 | 37 | ||
47 | pagefault_disable(); | 38 | pagefault_disable(); |
48 | 39 | ||
@@ -72,30 +63,9 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) | |||
72 | 63 | ||
73 | pagefault_enable(); | 64 | pagefault_enable(); |
74 | 65 | ||
75 | if (!ret) { | 66 | if (!ret) |
76 | switch (cmp) { | 67 | *oval = oldval; |
77 | case FUTEX_OP_CMP_EQ: | 68 | |
78 | ret = (oldval == cmparg); | ||
79 | break; | ||
80 | case FUTEX_OP_CMP_NE: | ||
81 | ret = (oldval != cmparg); | ||
82 | break; | ||
83 | case FUTEX_OP_CMP_LT: | ||
84 | ret = (oldval < cmparg); | ||
85 | break; | ||
86 | case FUTEX_OP_CMP_GE: | ||
87 | ret = (oldval >= cmparg); | ||
88 | break; | ||
89 | case FUTEX_OP_CMP_LE: | ||
90 | ret = (oldval <= cmparg); | ||
91 | break; | ||
92 | case FUTEX_OP_CMP_GT: | ||
93 | ret = (oldval > cmparg); | ||
94 | break; | ||
95 | default: | ||
96 | ret = -ENOSYS; | ||
97 | } | ||
98 | } | ||
99 | return ret; | 69 | return ret; |
100 | } | 70 | } |
101 | 71 | ||
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h index 76acbcd5c060..6d67dc1eaf2b 100644 --- a/arch/ia64/include/asm/futex.h +++ b/arch/ia64/include/asm/futex.h | |||
@@ -45,18 +45,9 @@ do { \ | |||
45 | } while (0) | 45 | } while (0) |
46 | 46 | ||
47 | static inline int | 47 | static inline int |
48 | futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | 48 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
49 | { | 49 | { |
50 | int op = (encoded_op >> 28) & 7; | ||
51 | int cmp = (encoded_op >> 24) & 15; | ||
52 | int oparg = (encoded_op << 8) >> 20; | ||
53 | int cmparg = (encoded_op << 20) >> 20; | ||
54 | int oldval = 0, ret; | 50 | int oldval = 0, ret; |
55 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
56 | oparg = 1 << oparg; | ||
57 | |||
58 | if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) | ||
59 | return -EFAULT; | ||
60 | 51 | ||
61 | pagefault_disable(); | 52 | pagefault_disable(); |
62 | 53 | ||
@@ -84,17 +75,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
84 | 75 | ||
85 | pagefault_enable(); | 76 | pagefault_enable(); |
86 | 77 | ||
87 | if (!ret) { | 78 | if (!ret) |
88 | switch (cmp) { | 79 | *oval = oldval; |
89 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 80 | |
90 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
91 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
92 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
93 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
94 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
95 | default: ret = -ENOSYS; | ||
96 | } | ||
97 | } | ||
98 | return ret; | 81 | return ret; |
99 | } | 82 | } |
100 | 83 | ||
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index 6c1380a8a0d4..eee779f26cc4 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h | |||
@@ -37,6 +37,8 @@ static inline int atomic_set(atomic_t *v, int i) | |||
37 | return i; | 37 | return i; |
38 | } | 38 | } |
39 | 39 | ||
40 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
41 | |||
40 | #define ATOMIC_OP(op, c_op) \ | 42 | #define ATOMIC_OP(op, c_op) \ |
41 | static inline void atomic_##op(int i, atomic_t *v) \ | 43 | static inline void atomic_##op(int i, atomic_t *v) \ |
42 | { \ | 44 | { \ |
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h index 01848f056f43..a9dad9e5e132 100644 --- a/arch/microblaze/include/asm/futex.h +++ b/arch/microblaze/include/asm/futex.h | |||
@@ -29,18 +29,9 @@ | |||
29 | }) | 29 | }) |
30 | 30 | ||
31 | static inline int | 31 | static inline int |
32 | futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 32 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
33 | { | 33 | { |
34 | int op = (encoded_op >> 28) & 7; | ||
35 | int cmp = (encoded_op >> 24) & 15; | ||
36 | int oparg = (encoded_op << 8) >> 20; | ||
37 | int cmparg = (encoded_op << 20) >> 20; | ||
38 | int oldval = 0, ret; | 34 | int oldval = 0, ret; |
39 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
40 | oparg = 1 << oparg; | ||
41 | |||
42 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
43 | return -EFAULT; | ||
44 | 35 | ||
45 | pagefault_disable(); | 36 | pagefault_disable(); |
46 | 37 | ||
@@ -66,30 +57,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
66 | 57 | ||
67 | pagefault_enable(); | 58 | pagefault_enable(); |
68 | 59 | ||
69 | if (!ret) { | 60 | if (!ret) |
70 | switch (cmp) { | 61 | *oval = oldval; |
71 | case FUTEX_OP_CMP_EQ: | 62 | |
72 | ret = (oldval == cmparg); | ||
73 | break; | ||
74 | case FUTEX_OP_CMP_NE: | ||
75 | ret = (oldval != cmparg); | ||
76 | break; | ||
77 | case FUTEX_OP_CMP_LT: | ||
78 | ret = (oldval < cmparg); | ||
79 | break; | ||
80 | case FUTEX_OP_CMP_GE: | ||
81 | ret = (oldval >= cmparg); | ||
82 | break; | ||
83 | case FUTEX_OP_CMP_LE: | ||
84 | ret = (oldval <= cmparg); | ||
85 | break; | ||
86 | case FUTEX_OP_CMP_GT: | ||
87 | ret = (oldval > cmparg); | ||
88 | break; | ||
89 | default: | ||
90 | ret = -ENOSYS; | ||
91 | } | ||
92 | } | ||
93 | return ret; | 63 | return ret; |
94 | } | 64 | } |
95 | 65 | ||
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index 1de190bdfb9c..a9e61ea54ca9 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h | |||
@@ -83,18 +83,9 @@ | |||
83 | } | 83 | } |
84 | 84 | ||
85 | static inline int | 85 | static inline int |
86 | futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 86 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
87 | { | 87 | { |
88 | int op = (encoded_op >> 28) & 7; | ||
89 | int cmp = (encoded_op >> 24) & 15; | ||
90 | int oparg = (encoded_op << 8) >> 20; | ||
91 | int cmparg = (encoded_op << 20) >> 20; | ||
92 | int oldval = 0, ret; | 88 | int oldval = 0, ret; |
93 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
94 | oparg = 1 << oparg; | ||
95 | |||
96 | if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) | ||
97 | return -EFAULT; | ||
98 | 89 | ||
99 | pagefault_disable(); | 90 | pagefault_disable(); |
100 | 91 | ||
@@ -125,17 +116,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
125 | 116 | ||
126 | pagefault_enable(); | 117 | pagefault_enable(); |
127 | 118 | ||
128 | if (!ret) { | 119 | if (!ret) |
129 | switch (cmp) { | 120 | *oval = oldval; |
130 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 121 | |
131 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
132 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
133 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
134 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
135 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
136 | default: ret = -ENOSYS; | ||
137 | } | ||
138 | } | ||
139 | return ret; | 122 | return ret; |
140 | } | 123 | } |
141 | 124 | ||
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 6bace7695788..c7cbddfcdc3b 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c | |||
@@ -648,12 +648,12 @@ EXPORT_SYMBOL(flush_tlb_one); | |||
648 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | 648 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST |
649 | 649 | ||
650 | static DEFINE_PER_CPU(atomic_t, tick_broadcast_count); | 650 | static DEFINE_PER_CPU(atomic_t, tick_broadcast_count); |
651 | static DEFINE_PER_CPU(struct call_single_data, tick_broadcast_csd); | 651 | static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd); |
652 | 652 | ||
653 | void tick_broadcast(const struct cpumask *mask) | 653 | void tick_broadcast(const struct cpumask *mask) |
654 | { | 654 | { |
655 | atomic_t *count; | 655 | atomic_t *count; |
656 | struct call_single_data *csd; | 656 | call_single_data_t *csd; |
657 | int cpu; | 657 | int cpu; |
658 | 658 | ||
659 | for_each_cpu(cpu, mask) { | 659 | for_each_cpu(cpu, mask) { |
@@ -674,7 +674,7 @@ static void tick_broadcast_callee(void *info) | |||
674 | 674 | ||
675 | static int __init tick_broadcast_init(void) | 675 | static int __init tick_broadcast_init(void) |
676 | { | 676 | { |
677 | struct call_single_data *csd; | 677 | call_single_data_t *csd; |
678 | int cpu; | 678 | int cpu; |
679 | 679 | ||
680 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 680 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h index 778087341977..8fed278a24b8 100644 --- a/arch/openrisc/include/asm/futex.h +++ b/arch/openrisc/include/asm/futex.h | |||
@@ -30,20 +30,10 @@ | |||
30 | }) | 30 | }) |
31 | 31 | ||
32 | static inline int | 32 | static inline int |
33 | futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 33 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
34 | { | 34 | { |
35 | int op = (encoded_op >> 28) & 7; | ||
36 | int cmp = (encoded_op >> 24) & 15; | ||
37 | int oparg = (encoded_op << 8) >> 20; | ||
38 | int cmparg = (encoded_op << 20) >> 20; | ||
39 | int oldval = 0, ret; | 35 | int oldval = 0, ret; |
40 | 36 | ||
41 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
42 | oparg = 1 << oparg; | ||
43 | |||
44 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
45 | return -EFAULT; | ||
46 | |||
47 | pagefault_disable(); | 37 | pagefault_disable(); |
48 | 38 | ||
49 | switch (op) { | 39 | switch (op) { |
@@ -68,30 +58,9 @@ futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
68 | 58 | ||
69 | pagefault_enable(); | 59 | pagefault_enable(); |
70 | 60 | ||
71 | if (!ret) { | 61 | if (!ret) |
72 | switch (cmp) { | 62 | *oval = oldval; |
73 | case FUTEX_OP_CMP_EQ: | 63 | |
74 | ret = (oldval == cmparg); | ||
75 | break; | ||
76 | case FUTEX_OP_CMP_NE: | ||
77 | ret = (oldval != cmparg); | ||
78 | break; | ||
79 | case FUTEX_OP_CMP_LT: | ||
80 | ret = (oldval < cmparg); | ||
81 | break; | ||
82 | case FUTEX_OP_CMP_GE: | ||
83 | ret = (oldval >= cmparg); | ||
84 | break; | ||
85 | case FUTEX_OP_CMP_LE: | ||
86 | ret = (oldval <= cmparg); | ||
87 | break; | ||
88 | case FUTEX_OP_CMP_GT: | ||
89 | ret = (oldval > cmparg); | ||
90 | break; | ||
91 | default: | ||
92 | ret = -ENOSYS; | ||
93 | } | ||
94 | } | ||
95 | return ret; | 64 | return ret; |
96 | } | 65 | } |
97 | 66 | ||
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 5394b9c5f914..17b98a87e5e2 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h | |||
@@ -65,6 +65,8 @@ static __inline__ void atomic_set(atomic_t *v, int i) | |||
65 | _atomic_spin_unlock_irqrestore(v, flags); | 65 | _atomic_spin_unlock_irqrestore(v, flags); |
66 | } | 66 | } |
67 | 67 | ||
68 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
69 | |||
68 | static __inline__ int atomic_read(const atomic_t *v) | 70 | static __inline__ int atomic_read(const atomic_t *v) |
69 | { | 71 | { |
70 | return READ_ONCE((v)->counter); | 72 | return READ_ONCE((v)->counter); |
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 0ba14300cd8e..c601aab2fb36 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h | |||
@@ -32,22 +32,12 @@ _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags) | |||
32 | } | 32 | } |
33 | 33 | ||
34 | static inline int | 34 | static inline int |
35 | futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | 35 | arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) |
36 | { | 36 | { |
37 | unsigned long int flags; | 37 | unsigned long int flags; |
38 | int op = (encoded_op >> 28) & 7; | ||
39 | int cmp = (encoded_op >> 24) & 15; | ||
40 | int oparg = (encoded_op << 8) >> 20; | ||
41 | int cmparg = (encoded_op << 20) >> 20; | ||
42 | int oldval, ret; | 38 | int oldval, ret; |
43 | u32 tmp; | 39 | u32 tmp; |
44 | 40 | ||
45 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
46 | oparg = 1 << oparg; | ||
47 | |||
48 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr))) | ||
49 | return -EFAULT; | ||
50 | |||
51 | _futex_spin_lock_irqsave(uaddr, &flags); | 41 | _futex_spin_lock_irqsave(uaddr, &flags); |
52 | pagefault_disable(); | 42 | pagefault_disable(); |
53 | 43 | ||
@@ -85,17 +75,9 @@ out_pagefault_enable: | |||
85 | pagefault_enable(); | 75 | pagefault_enable(); |
86 | _futex_spin_unlock_irqrestore(uaddr, &flags); | 76 | _futex_spin_unlock_irqrestore(uaddr, &flags); |
87 | 77 | ||
88 | if (ret == 0) { | 78 | if (!ret) |
89 | switch (cmp) { | 79 | *oval = oldval; |
90 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 80 | |
91 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
92 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
93 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
94 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
95 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
96 | default: ret = -ENOSYS; | ||
97 | } | ||
98 | } | ||
99 | return ret; | 81 | return ret; |
100 | } | 82 | } |
101 | 83 | ||
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 25d42bd3f114..9c601adfc500 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h | |||
@@ -74,13 +74,6 @@ do { \ | |||
74 | ___p1; \ | 74 | ___p1; \ |
75 | }) | 75 | }) |
76 | 76 | ||
77 | /* | ||
78 | * This must resolve to hwsync on SMP for the context switch path. | ||
79 | * See _switch, and core scheduler context switch memory ordering | ||
80 | * comments. | ||
81 | */ | ||
82 | #define smp_mb__before_spinlock() smp_mb() | ||
83 | |||
84 | #include <asm-generic/barrier.h> | 77 | #include <asm-generic/barrier.h> |
85 | 78 | ||
86 | #endif /* _ASM_POWERPC_BARRIER_H */ | 79 | #endif /* _ASM_POWERPC_BARRIER_H */ |
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index eaada6c92344..719ed9b61ea7 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h | |||
@@ -29,18 +29,10 @@ | |||
29 | : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ | 29 | : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ |
30 | : "cr0", "memory") | 30 | : "cr0", "memory") |
31 | 31 | ||
32 | static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | 32 | static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
33 | u32 __user *uaddr) | ||
33 | { | 34 | { |
34 | int op = (encoded_op >> 28) & 7; | ||
35 | int cmp = (encoded_op >> 24) & 15; | ||
36 | int oparg = (encoded_op << 8) >> 20; | ||
37 | int cmparg = (encoded_op << 20) >> 20; | ||
38 | int oldval = 0, ret; | 35 | int oldval = 0, ret; |
39 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
40 | oparg = 1 << oparg; | ||
41 | |||
42 | if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) | ||
43 | return -EFAULT; | ||
44 | 36 | ||
45 | pagefault_disable(); | 37 | pagefault_disable(); |
46 | 38 | ||
@@ -66,17 +58,9 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
66 | 58 | ||
67 | pagefault_enable(); | 59 | pagefault_enable(); |
68 | 60 | ||
69 | if (!ret) { | 61 | if (!ret) |
70 | switch (cmp) { | 62 | *oval = oldval; |
71 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 63 | |
72 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
73 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
74 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
75 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
76 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
77 | default: ret = -ENOSYS; | ||
78 | } | ||
79 | } | ||
80 | return ret; | 64 | return ret; |
81 | } | 65 | } |
82 | 66 | ||
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index d256e448ea49..edbe571bcc54 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h | |||
@@ -309,5 +309,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) | |||
309 | #define arch_read_relax(lock) __rw_yield(lock) | 309 | #define arch_read_relax(lock) __rw_yield(lock) |
310 | #define arch_write_relax(lock) __rw_yield(lock) | 310 | #define arch_write_relax(lock) __rw_yield(lock) |
311 | 311 | ||
312 | /* See include/linux/spinlock.h */ | ||
313 | #define smp_mb__after_spinlock() smp_mb() | ||
314 | |||
312 | #endif /* __KERNEL__ */ | 315 | #endif /* __KERNEL__ */ |
313 | #endif /* __ASM_SPINLOCK_H */ | 316 | #endif /* __ASM_SPINLOCK_H */ |
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index a4811aa0304d..8f8eec9e1198 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h | |||
@@ -21,17 +21,12 @@ | |||
21 | : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ | 21 | : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ |
22 | "m" (*uaddr) : "cc"); | 22 | "m" (*uaddr) : "cc"); |
23 | 23 | ||
24 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 24 | static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
25 | u32 __user *uaddr) | ||
25 | { | 26 | { |
26 | int op = (encoded_op >> 28) & 7; | ||
27 | int cmp = (encoded_op >> 24) & 15; | ||
28 | int oparg = (encoded_op << 8) >> 20; | ||
29 | int cmparg = (encoded_op << 20) >> 20; | ||
30 | int oldval = 0, newval, ret; | 27 | int oldval = 0, newval, ret; |
31 | 28 | ||
32 | load_kernel_asce(); | 29 | load_kernel_asce(); |
33 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
34 | oparg = 1 << oparg; | ||
35 | 30 | ||
36 | pagefault_disable(); | 31 | pagefault_disable(); |
37 | switch (op) { | 32 | switch (op) { |
@@ -60,17 +55,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
60 | } | 55 | } |
61 | pagefault_enable(); | 56 | pagefault_enable(); |
62 | 57 | ||
63 | if (!ret) { | 58 | if (!ret) |
64 | switch (cmp) { | 59 | *oval = oldval; |
65 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 60 | |
66 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
67 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
68 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
69 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
70 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
71 | default: ret = -ENOSYS; | ||
72 | } | ||
73 | } | ||
74 | return ret; | 61 | return ret; |
75 | } | 62 | } |
76 | 63 | ||
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h index d0078747d308..8f8cf941a8cd 100644 --- a/arch/sh/include/asm/futex.h +++ b/arch/sh/include/asm/futex.h | |||
@@ -27,21 +27,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
27 | return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval); | 27 | return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval); |
28 | } | 28 | } |
29 | 29 | ||
30 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 30 | static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, |
31 | u32 __user *uaddr) | ||
31 | { | 32 | { |
32 | int op = (encoded_op >> 28) & 7; | ||
33 | int cmp = (encoded_op >> 24) & 15; | ||
34 | u32 oparg = (encoded_op << 8) >> 20; | ||
35 | u32 cmparg = (encoded_op << 20) >> 20; | ||
36 | u32 oldval, newval, prev; | 33 | u32 oldval, newval, prev; |
37 | int ret; | 34 | int ret; |
38 | 35 | ||
39 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
40 | oparg = 1 << oparg; | ||
41 | |||
42 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
43 | return -EFAULT; | ||
44 | |||
45 | pagefault_disable(); | 36 | pagefault_disable(); |
46 | 37 | ||
47 | do { | 38 | do { |
@@ -80,17 +71,8 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
80 | 71 | ||
81 | pagefault_enable(); | 72 | pagefault_enable(); |
82 | 73 | ||
83 | if (!ret) { | 74 | if (!ret) |
84 | switch (cmp) { | 75 | *oval = oldval; |
85 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | ||
86 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
87 | case FUTEX_OP_CMP_LT: ret = ((int)oldval < (int)cmparg); break; | ||
88 | case FUTEX_OP_CMP_GE: ret = ((int)oldval >= (int)cmparg); break; | ||
89 | case FUTEX_OP_CMP_LE: ret = ((int)oldval <= (int)cmparg); break; | ||
90 | case FUTEX_OP_CMP_GT: ret = ((int)oldval > (int)cmparg); break; | ||
91 | default: ret = -ENOSYS; | ||
92 | } | ||
93 | } | ||
94 | 76 | ||
95 | return ret; | 77 | return ret; |
96 | } | 78 | } |
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index ee3f11c43cda..7643e979e333 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h | |||
@@ -29,6 +29,8 @@ int atomic_xchg(atomic_t *, int); | |||
29 | int __atomic_add_unless(atomic_t *, int, int); | 29 | int __atomic_add_unless(atomic_t *, int, int); |
30 | void atomic_set(atomic_t *, int); | 30 | void atomic_set(atomic_t *, int); |
31 | 31 | ||
32 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
33 | |||
32 | #define atomic_read(v) ACCESS_ONCE((v)->counter) | 34 | #define atomic_read(v) ACCESS_ONCE((v)->counter) |
33 | 35 | ||
34 | #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) | 36 | #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) |
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h index 4e899b0dabf7..1cfd89d92208 100644 --- a/arch/sparc/include/asm/futex_64.h +++ b/arch/sparc/include/asm/futex_64.h | |||
@@ -29,22 +29,14 @@ | |||
29 | : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ | 29 | : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ |
30 | : "memory") | 30 | : "memory") |
31 | 31 | ||
32 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 32 | static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
33 | u32 __user *uaddr) | ||
33 | { | 34 | { |
34 | int op = (encoded_op >> 28) & 7; | ||
35 | int cmp = (encoded_op >> 24) & 15; | ||
36 | int oparg = (encoded_op << 8) >> 20; | ||
37 | int cmparg = (encoded_op << 20) >> 20; | ||
38 | int oldval = 0, ret, tem; | 35 | int oldval = 0, ret, tem; |
39 | 36 | ||
40 | if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) | ||
41 | return -EFAULT; | ||
42 | if (unlikely((((unsigned long) uaddr) & 0x3UL))) | 37 | if (unlikely((((unsigned long) uaddr) & 0x3UL))) |
43 | return -EINVAL; | 38 | return -EINVAL; |
44 | 39 | ||
45 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
46 | oparg = 1 << oparg; | ||
47 | |||
48 | pagefault_disable(); | 40 | pagefault_disable(); |
49 | 41 | ||
50 | switch (op) { | 42 | switch (op) { |
@@ -69,17 +61,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
69 | 61 | ||
70 | pagefault_enable(); | 62 | pagefault_enable(); |
71 | 63 | ||
72 | if (!ret) { | 64 | if (!ret) |
73 | switch (cmp) { | 65 | *oval = oldval; |
74 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 66 | |
75 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
76 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
77 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
78 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
79 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
80 | default: ret = -ENOSYS; | ||
81 | } | ||
82 | } | ||
83 | return ret; | 67 | return ret; |
84 | } | 68 | } |
85 | 69 | ||
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index a93774255136..53a423e7cb92 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h | |||
@@ -101,6 +101,8 @@ static inline void atomic_set(atomic_t *v, int n) | |||
101 | _atomic_xchg(&v->counter, n); | 101 | _atomic_xchg(&v->counter, n); |
102 | } | 102 | } |
103 | 103 | ||
104 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
105 | |||
104 | /* A 64bit atomic type */ | 106 | /* A 64bit atomic type */ |
105 | 107 | ||
106 | typedef struct { | 108 | typedef struct { |
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index e64a1b75fc38..83c1e639b411 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h | |||
@@ -106,12 +106,9 @@ | |||
106 | lock = __atomic_hashed_lock((int __force *)uaddr) | 106 | lock = __atomic_hashed_lock((int __force *)uaddr) |
107 | #endif | 107 | #endif |
108 | 108 | ||
109 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 109 | static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, |
110 | u32 __user *uaddr) | ||
110 | { | 111 | { |
111 | int op = (encoded_op >> 28) & 7; | ||
112 | int cmp = (encoded_op >> 24) & 15; | ||
113 | int oparg = (encoded_op << 8) >> 20; | ||
114 | int cmparg = (encoded_op << 20) >> 20; | ||
115 | int uninitialized_var(val), ret; | 112 | int uninitialized_var(val), ret; |
116 | 113 | ||
117 | __futex_prolog(); | 114 | __futex_prolog(); |
@@ -119,12 +116,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
119 | /* The 32-bit futex code makes this assumption, so validate it here. */ | 116 | /* The 32-bit futex code makes this assumption, so validate it here. */ |
120 | BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); | 117 | BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); |
121 | 118 | ||
122 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
123 | oparg = 1 << oparg; | ||
124 | |||
125 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
126 | return -EFAULT; | ||
127 | |||
128 | pagefault_disable(); | 119 | pagefault_disable(); |
129 | switch (op) { | 120 | switch (op) { |
130 | case FUTEX_OP_SET: | 121 | case FUTEX_OP_SET: |
@@ -148,30 +139,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
148 | } | 139 | } |
149 | pagefault_enable(); | 140 | pagefault_enable(); |
150 | 141 | ||
151 | if (!ret) { | 142 | if (!ret) |
152 | switch (cmp) { | 143 | *oval = val; |
153 | case FUTEX_OP_CMP_EQ: | 144 | |
154 | ret = (val == cmparg); | ||
155 | break; | ||
156 | case FUTEX_OP_CMP_NE: | ||
157 | ret = (val != cmparg); | ||
158 | break; | ||
159 | case FUTEX_OP_CMP_LT: | ||
160 | ret = (val < cmparg); | ||
161 | break; | ||
162 | case FUTEX_OP_CMP_GE: | ||
163 | ret = (val >= cmparg); | ||
164 | break; | ||
165 | case FUTEX_OP_CMP_LE: | ||
166 | ret = (val <= cmparg); | ||
167 | break; | ||
168 | case FUTEX_OP_CMP_GT: | ||
169 | ret = (val > cmparg); | ||
170 | break; | ||
171 | default: | ||
172 | ret = -ENOSYS; | ||
173 | } | ||
174 | } | ||
175 | return ret; | 145 | return ret; |
176 | } | 146 | } |
177 | 147 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9c95aa417e9b..cce15191e9e9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -55,6 +55,8 @@ config X86 | |||
55 | select ARCH_HAS_KCOV if X86_64 | 55 | select ARCH_HAS_KCOV if X86_64 |
56 | select ARCH_HAS_MMIO_FLUSH | 56 | select ARCH_HAS_MMIO_FLUSH |
57 | select ARCH_HAS_PMEM_API if X86_64 | 57 | select ARCH_HAS_PMEM_API if X86_64 |
58 | # Causing hangs/crashes, see the commit that added this change for details. | ||
59 | select ARCH_HAS_REFCOUNT if BROKEN | ||
58 | select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 | 60 | select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 |
59 | select ARCH_HAS_SET_MEMORY | 61 | select ARCH_HAS_SET_MEMORY |
60 | select ARCH_HAS_SG_CHAIN | 62 | select ARCH_HAS_SG_CHAIN |
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7a9df3beb89b..676ee5807d86 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
@@ -74,6 +74,9 @@ | |||
74 | # define _ASM_EXTABLE_EX(from, to) \ | 74 | # define _ASM_EXTABLE_EX(from, to) \ |
75 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) | 75 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) |
76 | 76 | ||
77 | # define _ASM_EXTABLE_REFCOUNT(from, to) \ | ||
78 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) | ||
79 | |||
77 | # define _ASM_NOKPROBE(entry) \ | 80 | # define _ASM_NOKPROBE(entry) \ |
78 | .pushsection "_kprobe_blacklist","aw" ; \ | 81 | .pushsection "_kprobe_blacklist","aw" ; \ |
79 | _ASM_ALIGN ; \ | 82 | _ASM_ALIGN ; \ |
@@ -123,6 +126,9 @@ | |||
123 | # define _ASM_EXTABLE_EX(from, to) \ | 126 | # define _ASM_EXTABLE_EX(from, to) \ |
124 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) | 127 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) |
125 | 128 | ||
129 | # define _ASM_EXTABLE_REFCOUNT(from, to) \ | ||
130 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) | ||
131 | |||
126 | /* For C file, we already have NOKPROBE_SYMBOL macro */ | 132 | /* For C file, we already have NOKPROBE_SYMBOL macro */ |
127 | #endif | 133 | #endif |
128 | 134 | ||
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 33380b871463..0874ebda3069 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
@@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new) | |||
197 | return xchg(&v->counter, new); | 197 | return xchg(&v->counter, new); |
198 | } | 198 | } |
199 | 199 | ||
200 | #define ATOMIC_OP(op) \ | 200 | static inline void atomic_and(int i, atomic_t *v) |
201 | static inline void atomic_##op(int i, atomic_t *v) \ | 201 | { |
202 | { \ | 202 | asm volatile(LOCK_PREFIX "andl %1,%0" |
203 | asm volatile(LOCK_PREFIX #op"l %1,%0" \ | 203 | : "+m" (v->counter) |
204 | : "+m" (v->counter) \ | 204 | : "ir" (i) |
205 | : "ir" (i) \ | 205 | : "memory"); |
206 | : "memory"); \ | 206 | } |
207 | |||
208 | static inline int atomic_fetch_and(int i, atomic_t *v) | ||
209 | { | ||
210 | int val = atomic_read(v); | ||
211 | |||
212 | do { } while (!atomic_try_cmpxchg(v, &val, val & i)); | ||
213 | |||
214 | return val; | ||
207 | } | 215 | } |
208 | 216 | ||
209 | #define ATOMIC_FETCH_OP(op, c_op) \ | 217 | static inline void atomic_or(int i, atomic_t *v) |
210 | static inline int atomic_fetch_##op(int i, atomic_t *v) \ | 218 | { |
211 | { \ | 219 | asm volatile(LOCK_PREFIX "orl %1,%0" |
212 | int val = atomic_read(v); \ | 220 | : "+m" (v->counter) |
213 | do { \ | 221 | : "ir" (i) |
214 | } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \ | 222 | : "memory"); |
215 | return val; \ | ||
216 | } | 223 | } |
217 | 224 | ||
218 | #define ATOMIC_OPS(op, c_op) \ | 225 | static inline int atomic_fetch_or(int i, atomic_t *v) |
219 | ATOMIC_OP(op) \ | 226 | { |
220 | ATOMIC_FETCH_OP(op, c_op) | 227 | int val = atomic_read(v); |
221 | 228 | ||
222 | ATOMIC_OPS(and, &) | 229 | do { } while (!atomic_try_cmpxchg(v, &val, val | i)); |
223 | ATOMIC_OPS(or , |) | ||
224 | ATOMIC_OPS(xor, ^) | ||
225 | 230 | ||
226 | #undef ATOMIC_OPS | 231 | return val; |
227 | #undef ATOMIC_FETCH_OP | 232 | } |
228 | #undef ATOMIC_OP | 233 | |
234 | static inline void atomic_xor(int i, atomic_t *v) | ||
235 | { | ||
236 | asm volatile(LOCK_PREFIX "xorl %1,%0" | ||
237 | : "+m" (v->counter) | ||
238 | : "ir" (i) | ||
239 | : "memory"); | ||
240 | } | ||
241 | |||
242 | static inline int atomic_fetch_xor(int i, atomic_t *v) | ||
243 | { | ||
244 | int val = atomic_read(v); | ||
245 | |||
246 | do { } while (!atomic_try_cmpxchg(v, &val, val ^ i)); | ||
247 | |||
248 | return val; | ||
249 | } | ||
229 | 250 | ||
230 | /** | 251 | /** |
231 | * __atomic_add_unless - add unless the number is already a given value | 252 | * __atomic_add_unless - add unless the number is already a given value |
@@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^) | |||
239 | static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) | 260 | static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) |
240 | { | 261 | { |
241 | int c = atomic_read(v); | 262 | int c = atomic_read(v); |
263 | |||
242 | do { | 264 | do { |
243 | if (unlikely(c == u)) | 265 | if (unlikely(c == u)) |
244 | break; | 266 | break; |
245 | } while (!atomic_try_cmpxchg(v, &c, c + a)); | 267 | } while (!atomic_try_cmpxchg(v, &c, c + a)); |
268 | |||
246 | return c; | 269 | return c; |
247 | } | 270 | } |
248 | 271 | ||
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 71d7705fb303..9e206f31ce2a 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h | |||
@@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) | |||
312 | #undef alternative_atomic64 | 312 | #undef alternative_atomic64 |
313 | #undef __alternative_atomic64 | 313 | #undef __alternative_atomic64 |
314 | 314 | ||
315 | #define ATOMIC64_OP(op, c_op) \ | 315 | static inline void atomic64_and(long long i, atomic64_t *v) |
316 | static inline void atomic64_##op(long long i, atomic64_t *v) \ | 316 | { |
317 | { \ | 317 | long long old, c = 0; |
318 | long long old, c = 0; \ | 318 | |
319 | while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ | 319 | while ((old = atomic64_cmpxchg(v, c, c & i)) != c) |
320 | c = old; \ | 320 | c = old; |
321 | } | 321 | } |
322 | 322 | ||
323 | #define ATOMIC64_FETCH_OP(op, c_op) \ | 323 | static inline long long atomic64_fetch_and(long long i, atomic64_t *v) |
324 | static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ | 324 | { |
325 | { \ | 325 | long long old, c = 0; |
326 | long long old, c = 0; \ | 326 | |
327 | while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ | 327 | while ((old = atomic64_cmpxchg(v, c, c & i)) != c) |
328 | c = old; \ | 328 | c = old; |
329 | return old; \ | 329 | |
330 | return old; | ||
330 | } | 331 | } |
331 | 332 | ||
332 | ATOMIC64_FETCH_OP(add, +) | 333 | static inline void atomic64_or(long long i, atomic64_t *v) |
334 | { | ||
335 | long long old, c = 0; | ||
333 | 336 | ||
334 | #define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) | 337 | while ((old = atomic64_cmpxchg(v, c, c | i)) != c) |
338 | c = old; | ||
339 | } | ||
340 | |||
341 | static inline long long atomic64_fetch_or(long long i, atomic64_t *v) | ||
342 | { | ||
343 | long long old, c = 0; | ||
344 | |||
345 | while ((old = atomic64_cmpxchg(v, c, c | i)) != c) | ||
346 | c = old; | ||
347 | |||
348 | return old; | ||
349 | } | ||
335 | 350 | ||
336 | #define ATOMIC64_OPS(op, c_op) \ | 351 | static inline void atomic64_xor(long long i, atomic64_t *v) |
337 | ATOMIC64_OP(op, c_op) \ | 352 | { |
338 | ATOMIC64_FETCH_OP(op, c_op) | 353 | long long old, c = 0; |
354 | |||
355 | while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) | ||
356 | c = old; | ||
357 | } | ||
339 | 358 | ||
340 | ATOMIC64_OPS(and, &) | 359 | static inline long long atomic64_fetch_xor(long long i, atomic64_t *v) |
341 | ATOMIC64_OPS(or, |) | 360 | { |
342 | ATOMIC64_OPS(xor, ^) | 361 | long long old, c = 0; |
362 | |||
363 | while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) | ||
364 | c = old; | ||
365 | |||
366 | return old; | ||
367 | } | ||
343 | 368 | ||
344 | #undef ATOMIC64_OPS | 369 | static inline long long atomic64_fetch_add(long long i, atomic64_t *v) |
345 | #undef ATOMIC64_FETCH_OP | 370 | { |
346 | #undef ATOMIC64_OP | 371 | long long old, c = 0; |
372 | |||
373 | while ((old = atomic64_cmpxchg(v, c, c + i)) != c) | ||
374 | c = old; | ||
375 | |||
376 | return old; | ||
377 | } | ||
378 | |||
379 | #define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) | ||
347 | 380 | ||
348 | #endif /* _ASM_X86_ATOMIC64_32_H */ | 381 | #endif /* _ASM_X86_ATOMIC64_32_H */ |
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 6189a433c9a9..5d9de36a2f04 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h | |||
@@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) | |||
177 | } | 177 | } |
178 | 178 | ||
179 | #define atomic64_try_cmpxchg atomic64_try_cmpxchg | 179 | #define atomic64_try_cmpxchg atomic64_try_cmpxchg |
180 | static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new) | 180 | static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) |
181 | { | 181 | { |
182 | return try_cmpxchg(&v->counter, old, new); | 182 | return try_cmpxchg(&v->counter, old, new); |
183 | } | 183 | } |
@@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new) | |||
198 | */ | 198 | */ |
199 | static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) | 199 | static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) |
200 | { | 200 | { |
201 | long c = atomic64_read(v); | 201 | s64 c = atomic64_read(v); |
202 | do { | 202 | do { |
203 | if (unlikely(c == u)) | 203 | if (unlikely(c == u)) |
204 | return false; | 204 | return false; |
@@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) | |||
217 | */ | 217 | */ |
218 | static inline long atomic64_dec_if_positive(atomic64_t *v) | 218 | static inline long atomic64_dec_if_positive(atomic64_t *v) |
219 | { | 219 | { |
220 | long dec, c = atomic64_read(v); | 220 | s64 dec, c = atomic64_read(v); |
221 | do { | 221 | do { |
222 | dec = c - 1; | 222 | dec = c - 1; |
223 | if (unlikely(dec < 0)) | 223 | if (unlikely(dec < 0)) |
@@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) | |||
226 | return dec; | 226 | return dec; |
227 | } | 227 | } |
228 | 228 | ||
229 | #define ATOMIC64_OP(op) \ | 229 | static inline void atomic64_and(long i, atomic64_t *v) |
230 | static inline void atomic64_##op(long i, atomic64_t *v) \ | 230 | { |
231 | { \ | 231 | asm volatile(LOCK_PREFIX "andq %1,%0" |
232 | asm volatile(LOCK_PREFIX #op"q %1,%0" \ | 232 | : "+m" (v->counter) |
233 | : "+m" (v->counter) \ | 233 | : "er" (i) |
234 | : "er" (i) \ | 234 | : "memory"); |
235 | : "memory"); \ | ||
236 | } | 235 | } |
237 | 236 | ||
238 | #define ATOMIC64_FETCH_OP(op, c_op) \ | 237 | static inline long atomic64_fetch_and(long i, atomic64_t *v) |
239 | static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ | 238 | { |
240 | { \ | 239 | s64 val = atomic64_read(v); |
241 | long val = atomic64_read(v); \ | 240 | |
242 | do { \ | 241 | do { |
243 | } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \ | 242 | } while (!atomic64_try_cmpxchg(v, &val, val & i)); |
244 | return val; \ | 243 | return val; |
245 | } | 244 | } |
246 | 245 | ||
247 | #define ATOMIC64_OPS(op, c_op) \ | 246 | static inline void atomic64_or(long i, atomic64_t *v) |
248 | ATOMIC64_OP(op) \ | 247 | { |
249 | ATOMIC64_FETCH_OP(op, c_op) | 248 | asm volatile(LOCK_PREFIX "orq %1,%0" |
249 | : "+m" (v->counter) | ||
250 | : "er" (i) | ||
251 | : "memory"); | ||
252 | } | ||
250 | 253 | ||
251 | ATOMIC64_OPS(and, &) | 254 | static inline long atomic64_fetch_or(long i, atomic64_t *v) |
252 | ATOMIC64_OPS(or, |) | 255 | { |
253 | ATOMIC64_OPS(xor, ^) | 256 | s64 val = atomic64_read(v); |
254 | 257 | ||
255 | #undef ATOMIC64_OPS | 258 | do { |
256 | #undef ATOMIC64_FETCH_OP | 259 | } while (!atomic64_try_cmpxchg(v, &val, val | i)); |
257 | #undef ATOMIC64_OP | 260 | return val; |
261 | } | ||
262 | |||
263 | static inline void atomic64_xor(long i, atomic64_t *v) | ||
264 | { | ||
265 | asm volatile(LOCK_PREFIX "xorq %1,%0" | ||
266 | : "+m" (v->counter) | ||
267 | : "er" (i) | ||
268 | : "memory"); | ||
269 | } | ||
270 | |||
271 | static inline long atomic64_fetch_xor(long i, atomic64_t *v) | ||
272 | { | ||
273 | s64 val = atomic64_read(v); | ||
274 | |||
275 | do { | ||
276 | } while (!atomic64_try_cmpxchg(v, &val, val ^ i)); | ||
277 | return val; | ||
278 | } | ||
258 | 279 | ||
259 | #endif /* _ASM_X86_ATOMIC64_64_H */ | 280 | #endif /* _ASM_X86_ATOMIC64_64_H */ |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index d90296d061e8..b5069e802d5c 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
@@ -157,7 +157,7 @@ extern void __add_wrong_size(void) | |||
157 | #define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ | 157 | #define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ |
158 | ({ \ | 158 | ({ \ |
159 | bool success; \ | 159 | bool success; \ |
160 | __typeof__(_ptr) _old = (_pold); \ | 160 | __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ |
161 | __typeof__(*(_ptr)) __old = *_old; \ | 161 | __typeof__(*(_ptr)) __old = *_old; \ |
162 | __typeof__(*(_ptr)) __new = (_new); \ | 162 | __typeof__(*(_ptr)) __new = (_new); \ |
163 | switch (size) { \ | 163 | switch (size) { \ |
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index b4c1f5453436..f4dc9b63bdda 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h | |||
@@ -41,20 +41,11 @@ | |||
41 | "+m" (*uaddr), "=&r" (tem) \ | 41 | "+m" (*uaddr), "=&r" (tem) \ |
42 | : "r" (oparg), "i" (-EFAULT), "1" (0)) | 42 | : "r" (oparg), "i" (-EFAULT), "1" (0)) |
43 | 43 | ||
44 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 44 | static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
45 | u32 __user *uaddr) | ||
45 | { | 46 | { |
46 | int op = (encoded_op >> 28) & 7; | ||
47 | int cmp = (encoded_op >> 24) & 15; | ||
48 | int oparg = (encoded_op << 8) >> 20; | ||
49 | int cmparg = (encoded_op << 20) >> 20; | ||
50 | int oldval = 0, ret, tem; | 47 | int oldval = 0, ret, tem; |
51 | 48 | ||
52 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
53 | oparg = 1 << oparg; | ||
54 | |||
55 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
56 | return -EFAULT; | ||
57 | |||
58 | pagefault_disable(); | 49 | pagefault_disable(); |
59 | 50 | ||
60 | switch (op) { | 51 | switch (op) { |
@@ -80,30 +71,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
80 | 71 | ||
81 | pagefault_enable(); | 72 | pagefault_enable(); |
82 | 73 | ||
83 | if (!ret) { | 74 | if (!ret) |
84 | switch (cmp) { | 75 | *oval = oldval; |
85 | case FUTEX_OP_CMP_EQ: | 76 | |
86 | ret = (oldval == cmparg); | ||
87 | break; | ||
88 | case FUTEX_OP_CMP_NE: | ||
89 | ret = (oldval != cmparg); | ||
90 | break; | ||
91 | case FUTEX_OP_CMP_LT: | ||
92 | ret = (oldval < cmparg); | ||
93 | break; | ||
94 | case FUTEX_OP_CMP_GE: | ||
95 | ret = (oldval >= cmparg); | ||
96 | break; | ||
97 | case FUTEX_OP_CMP_LE: | ||
98 | ret = (oldval <= cmparg); | ||
99 | break; | ||
100 | case FUTEX_OP_CMP_GT: | ||
101 | ret = (oldval > cmparg); | ||
102 | break; | ||
103 | default: | ||
104 | ret = -ENOSYS; | ||
105 | } | ||
106 | } | ||
107 | return ret; | 77 | return ret; |
108 | } | 78 | } |
109 | 79 | ||
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h new file mode 100644 index 000000000000..ff871210b9f2 --- /dev/null +++ b/arch/x86/include/asm/refcount.h | |||
@@ -0,0 +1,109 @@ | |||
1 | #ifndef __ASM_X86_REFCOUNT_H | ||
2 | #define __ASM_X86_REFCOUNT_H | ||
3 | /* | ||
4 | * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from | ||
5 | * PaX/grsecurity. | ||
6 | */ | ||
7 | #include <linux/refcount.h> | ||
8 | |||
9 | /* | ||
10 | * This is the first portion of the refcount error handling, which lives in | ||
11 | * .text.unlikely, and is jumped to from the CPU flag check (in the | ||
12 | * following macros). This saves the refcount value location into CX for | ||
13 | * the exception handler to use (in mm/extable.c), and then triggers the | ||
14 | * central refcount exception. The fixup address for the exception points | ||
15 | * back to the regular execution flow in .text. | ||
16 | */ | ||
17 | #define _REFCOUNT_EXCEPTION \ | ||
18 | ".pushsection .text.unlikely\n" \ | ||
19 | "111:\tlea %[counter], %%" _ASM_CX "\n" \ | ||
20 | "112:\t" ASM_UD0 "\n" \ | ||
21 | ASM_UNREACHABLE \ | ||
22 | ".popsection\n" \ | ||
23 | "113:\n" \ | ||
24 | _ASM_EXTABLE_REFCOUNT(112b, 113b) | ||
25 | |||
26 | /* Trigger refcount exception if refcount result is negative. */ | ||
27 | #define REFCOUNT_CHECK_LT_ZERO \ | ||
28 | "js 111f\n\t" \ | ||
29 | _REFCOUNT_EXCEPTION | ||
30 | |||
31 | /* Trigger refcount exception if refcount result is zero or negative. */ | ||
32 | #define REFCOUNT_CHECK_LE_ZERO \ | ||
33 | "jz 111f\n\t" \ | ||
34 | REFCOUNT_CHECK_LT_ZERO | ||
35 | |||
36 | /* Trigger refcount exception unconditionally. */ | ||
37 | #define REFCOUNT_ERROR \ | ||
38 | "jmp 111f\n\t" \ | ||
39 | _REFCOUNT_EXCEPTION | ||
40 | |||
41 | static __always_inline void refcount_add(unsigned int i, refcount_t *r) | ||
42 | { | ||
43 | asm volatile(LOCK_PREFIX "addl %1,%0\n\t" | ||
44 | REFCOUNT_CHECK_LT_ZERO | ||
45 | : [counter] "+m" (r->refs.counter) | ||
46 | : "ir" (i) | ||
47 | : "cc", "cx"); | ||
48 | } | ||
49 | |||
50 | static __always_inline void refcount_inc(refcount_t *r) | ||
51 | { | ||
52 | asm volatile(LOCK_PREFIX "incl %0\n\t" | ||
53 | REFCOUNT_CHECK_LT_ZERO | ||
54 | : [counter] "+m" (r->refs.counter) | ||
55 | : : "cc", "cx"); | ||
56 | } | ||
57 | |||
58 | static __always_inline void refcount_dec(refcount_t *r) | ||
59 | { | ||
60 | asm volatile(LOCK_PREFIX "decl %0\n\t" | ||
61 | REFCOUNT_CHECK_LE_ZERO | ||
62 | : [counter] "+m" (r->refs.counter) | ||
63 | : : "cc", "cx"); | ||
64 | } | ||
65 | |||
66 | static __always_inline __must_check | ||
67 | bool refcount_sub_and_test(unsigned int i, refcount_t *r) | ||
68 | { | ||
69 | GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, | ||
70 | r->refs.counter, "er", i, "%0", e); | ||
71 | } | ||
72 | |||
73 | static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) | ||
74 | { | ||
75 | GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, | ||
76 | r->refs.counter, "%0", e); | ||
77 | } | ||
78 | |||
79 | static __always_inline __must_check | ||
80 | bool refcount_add_not_zero(unsigned int i, refcount_t *r) | ||
81 | { | ||
82 | int c, result; | ||
83 | |||
84 | c = atomic_read(&(r->refs)); | ||
85 | do { | ||
86 | if (unlikely(c == 0)) | ||
87 | return false; | ||
88 | |||
89 | result = c + i; | ||
90 | |||
91 | /* Did we try to increment from/to an undesirable state? */ | ||
92 | if (unlikely(c < 0 || c == INT_MAX || result < c)) { | ||
93 | asm volatile(REFCOUNT_ERROR | ||
94 | : : [counter] "m" (r->refs.counter) | ||
95 | : "cc", "cx"); | ||
96 | break; | ||
97 | } | ||
98 | |||
99 | } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); | ||
100 | |||
101 | return c != 0; | ||
102 | } | ||
103 | |||
104 | static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) | ||
105 | { | ||
106 | return refcount_add_not_zero(1, r); | ||
107 | } | ||
108 | |||
109 | #endif | ||
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index fb2ddcdf7c73..c076f710de4c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
@@ -36,6 +36,48 @@ bool ex_handler_fault(const struct exception_table_entry *fixup, | |||
36 | } | 36 | } |
37 | EXPORT_SYMBOL_GPL(ex_handler_fault); | 37 | EXPORT_SYMBOL_GPL(ex_handler_fault); |
38 | 38 | ||
39 | /* | ||
40 | * Handler for UD0 exception following a failed test against the | ||
41 | * result of a refcount inc/dec/add/sub. | ||
42 | */ | ||
43 | bool ex_handler_refcount(const struct exception_table_entry *fixup, | ||
44 | struct pt_regs *regs, int trapnr) | ||
45 | { | ||
46 | /* First unconditionally saturate the refcount. */ | ||
47 | *(int *)regs->cx = INT_MIN / 2; | ||
48 | |||
49 | /* | ||
50 | * Strictly speaking, this reports the fixup destination, not | ||
51 | * the fault location, and not the actually overflowing | ||
52 | * instruction, which is the instruction before the "js", but | ||
53 | * since that instruction could be a variety of lengths, just | ||
54 | * report the location after the overflow, which should be close | ||
55 | * enough for finding the overflow, as it's at least back in | ||
56 | * the function, having returned from .text.unlikely. | ||
57 | */ | ||
58 | regs->ip = ex_fixup_addr(fixup); | ||
59 | |||
60 | /* | ||
61 | * This function has been called because either a negative refcount | ||
62 | * value was seen by any of the refcount functions, or a zero | ||
63 | * refcount value was seen by refcount_dec(). | ||
64 | * | ||
65 | * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result | ||
66 | * wrapped around) will be set. Additionally, seeing the refcount | ||
67 | * reach 0 will set ZF (Zero Flag: result was zero). In each of | ||
68 | * these cases we want a report, since it's a boundary condition. | ||
69 | * | ||
70 | */ | ||
71 | if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { | ||
72 | bool zero = regs->flags & X86_EFLAGS_ZF; | ||
73 | |||
74 | refcount_error_report(regs, zero ? "hit zero" : "overflow"); | ||
75 | } | ||
76 | |||
77 | return true; | ||
78 | } | ||
79 | EXPORT_SYMBOL_GPL(ex_handler_refcount); | ||
80 | |||
39 | bool ex_handler_ext(const struct exception_table_entry *fixup, | 81 | bool ex_handler_ext(const struct exception_table_entry *fixup, |
40 | struct pt_regs *regs, int trapnr) | 82 | struct pt_regs *regs, int trapnr) |
41 | { | 83 | { |
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h index b39531babec0..eaaf1ebcc7a4 100644 --- a/arch/xtensa/include/asm/futex.h +++ b/arch/xtensa/include/asm/futex.h | |||
@@ -44,18 +44,10 @@ | |||
44 | : "r" (uaddr), "I" (-EFAULT), "r" (oparg) \ | 44 | : "r" (uaddr), "I" (-EFAULT), "r" (oparg) \ |
45 | : "memory") | 45 | : "memory") |
46 | 46 | ||
47 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 47 | static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, |
48 | u32 __user *uaddr) | ||
48 | { | 49 | { |
49 | int op = (encoded_op >> 28) & 7; | ||
50 | int cmp = (encoded_op >> 24) & 15; | ||
51 | int oparg = (encoded_op << 8) >> 20; | ||
52 | int cmparg = (encoded_op << 20) >> 20; | ||
53 | int oldval = 0, ret; | 50 | int oldval = 0, ret; |
54 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
55 | oparg = 1 << oparg; | ||
56 | |||
57 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
58 | return -EFAULT; | ||
59 | 51 | ||
60 | #if !XCHAL_HAVE_S32C1I | 52 | #if !XCHAL_HAVE_S32C1I |
61 | return -ENOSYS; | 53 | return -ENOSYS; |
@@ -89,19 +81,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
89 | 81 | ||
90 | pagefault_enable(); | 82 | pagefault_enable(); |
91 | 83 | ||
92 | if (ret) | 84 | if (!ret) |
93 | return ret; | 85 | *oval = oldval; |
94 | 86 | ||
95 | switch (cmp) { | 87 | return ret; |
96 | case FUTEX_OP_CMP_EQ: return (oldval == cmparg); | ||
97 | case FUTEX_OP_CMP_NE: return (oldval != cmparg); | ||
98 | case FUTEX_OP_CMP_LT: return (oldval < cmparg); | ||
99 | case FUTEX_OP_CMP_GE: return (oldval >= cmparg); | ||
100 | case FUTEX_OP_CMP_LE: return (oldval <= cmparg); | ||
101 | case FUTEX_OP_CMP_GT: return (oldval > cmparg); | ||
102 | } | ||
103 | |||
104 | return -ENOSYS; | ||
105 | } | 88 | } |
106 | 89 | ||
107 | static inline int | 90 | static inline int |
diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 87b7df4851bf..07125e7941f4 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c | |||
@@ -60,7 +60,7 @@ static void trigger_softirq(void *data) | |||
60 | static int raise_blk_irq(int cpu, struct request *rq) | 60 | static int raise_blk_irq(int cpu, struct request *rq) |
61 | { | 61 | { |
62 | if (cpu_online(cpu)) { | 62 | if (cpu_online(cpu)) { |
63 | struct call_single_data *data = &rq->csd; | 63 | call_single_data_t *data = &rq->csd; |
64 | 64 | ||
65 | data->func = trigger_softirq; | 65 | data->func = trigger_softirq; |
66 | data->info = rq; | 66 | data->info = rq; |
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 19182d091587..1893e416e7c0 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c | |||
@@ -2884,7 +2884,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) | |||
2884 | * need to be interruptible while waiting. | 2884 | * need to be interruptible while waiting. |
2885 | */ | 2885 | */ |
2886 | INIT_WORK_ONSTACK(&flush.work, flush_probe); | 2886 | INIT_WORK_ONSTACK(&flush.work, flush_probe); |
2887 | COMPLETION_INITIALIZER_ONSTACK(flush.cmp); | 2887 | init_completion(&flush.cmp); |
2888 | queue_work(nfit_wq, &flush.work); | 2888 | queue_work(nfit_wq, &flush.work); |
2889 | mutex_unlock(&acpi_desc->init_mutex); | 2889 | mutex_unlock(&acpi_desc->init_mutex); |
2890 | 2890 | ||
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 85c24cace973..81142ce781da 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -13,7 +13,7 @@ | |||
13 | struct nullb_cmd { | 13 | struct nullb_cmd { |
14 | struct list_head list; | 14 | struct list_head list; |
15 | struct llist_node ll_list; | 15 | struct llist_node ll_list; |
16 | struct call_single_data csd; | 16 | call_single_data_t csd; |
17 | struct request *rq; | 17 | struct request *rq; |
18 | struct bio *bio; | 18 | struct bio *bio; |
19 | unsigned int tag; | 19 | unsigned int tag; |
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 72bbfccef113..fd4b7f684bd0 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c | |||
@@ -455,7 +455,11 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa | |||
455 | per_cpu(timer_unstable_counter_workaround, i) = wa; | 455 | per_cpu(timer_unstable_counter_workaround, i) = wa; |
456 | } | 456 | } |
457 | 457 | ||
458 | static_branch_enable(&arch_timer_read_ool_enabled); | 458 | /* |
459 | * Use the locked version, as we're called from the CPU | ||
460 | * hotplug framework. Otherwise, we end-up in deadlock-land. | ||
461 | */ | ||
462 | static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled); | ||
459 | 463 | ||
460 | /* | 464 | /* |
461 | * Don't use the vdso fastpath if errata require using the | 465 | * Don't use the vdso fastpath if errata require using the |
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 71e586d7df71..147f38ea0fcd 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c | |||
@@ -119,13 +119,13 @@ struct cpuidle_coupled { | |||
119 | 119 | ||
120 | #define CPUIDLE_COUPLED_NOT_IDLE (-1) | 120 | #define CPUIDLE_COUPLED_NOT_IDLE (-1) |
121 | 121 | ||
122 | static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); | 122 | static DEFINE_PER_CPU(call_single_data_t, cpuidle_coupled_poke_cb); |
123 | 123 | ||
124 | /* | 124 | /* |
125 | * The cpuidle_coupled_poke_pending mask is used to avoid calling | 125 | * The cpuidle_coupled_poke_pending mask is used to avoid calling |
126 | * __smp_call_function_single with the per cpu call_single_data struct already | 126 | * __smp_call_function_single with the per cpu call_single_data_t struct already |
127 | * in use. This prevents a deadlock where two cpus are waiting for each others | 127 | * in use. This prevents a deadlock where two cpus are waiting for each others |
128 | * call_single_data struct to be available | 128 | * call_single_data_t struct to be available |
129 | */ | 129 | */ |
130 | static cpumask_t cpuidle_coupled_poke_pending; | 130 | static cpumask_t cpuidle_coupled_poke_pending; |
131 | 131 | ||
@@ -339,7 +339,7 @@ static void cpuidle_coupled_handle_poke(void *info) | |||
339 | */ | 339 | */ |
340 | static void cpuidle_coupled_poke(int cpu) | 340 | static void cpuidle_coupled_poke(int cpu) |
341 | { | 341 | { |
342 | struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); | 342 | call_single_data_t *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); |
343 | 343 | ||
344 | if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) | 344 | if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) |
345 | smp_call_function_single_async(cpu, csd); | 345 | smp_call_function_single_async(cpu, csd); |
@@ -651,7 +651,7 @@ int cpuidle_coupled_register_device(struct cpuidle_device *dev) | |||
651 | { | 651 | { |
652 | int cpu; | 652 | int cpu; |
653 | struct cpuidle_device *other_dev; | 653 | struct cpuidle_device *other_dev; |
654 | struct call_single_data *csd; | 654 | call_single_data_t *csd; |
655 | struct cpuidle_coupled *coupled; | 655 | struct cpuidle_coupled *coupled; |
656 | 656 | ||
657 | if (cpumask_empty(&dev->coupled_cpus)) | 657 | if (cpumask_empty(&dev->coupled_cpus)) |
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 48572b157222..a36216bd2a84 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #include <linux/debugfs.h> | 29 | #include <linux/debugfs.h> |
30 | #include <linux/sort.h> | 30 | #include <linux/sort.h> |
31 | #include <linux/sched/mm.h> | ||
31 | #include "intel_drv.h" | 32 | #include "intel_drv.h" |
32 | 33 | ||
33 | static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) | 34 | static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) |
@@ -4305,7 +4306,7 @@ i915_drop_caches_set(void *data, u64 val) | |||
4305 | mutex_unlock(&dev->struct_mutex); | 4306 | mutex_unlock(&dev->struct_mutex); |
4306 | } | 4307 | } |
4307 | 4308 | ||
4308 | lockdep_set_current_reclaim_state(GFP_KERNEL); | 4309 | fs_reclaim_acquire(GFP_KERNEL); |
4309 | if (val & DROP_BOUND) | 4310 | if (val & DROP_BOUND) |
4310 | i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); | 4311 | i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); |
4311 | 4312 | ||
@@ -4314,7 +4315,7 @@ i915_drop_caches_set(void *data, u64 val) | |||
4314 | 4315 | ||
4315 | if (val & DROP_SHRINK_ALL) | 4316 | if (val & DROP_SHRINK_ALL) |
4316 | i915_gem_shrink_all(dev_priv); | 4317 | i915_gem_shrink_all(dev_priv); |
4317 | lockdep_clear_current_reclaim_state(); | 4318 | fs_reclaim_release(GFP_KERNEL); |
4318 | 4319 | ||
4319 | if (val & DROP_FREED) { | 4320 | if (val & DROP_FREED) { |
4320 | synchronize_rcu(); | 4321 | synchronize_rcu(); |
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 51583ae4b1eb..120b6e537b28 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c | |||
@@ -2468,7 +2468,7 @@ static void liquidio_napi_drv_callback(void *arg) | |||
2468 | if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) { | 2468 | if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) { |
2469 | napi_schedule_irqoff(&droq->napi); | 2469 | napi_schedule_irqoff(&droq->napi); |
2470 | } else { | 2470 | } else { |
2471 | struct call_single_data *csd = &droq->csd; | 2471 | call_single_data_t *csd = &droq->csd; |
2472 | 2472 | ||
2473 | csd->func = napi_schedule_wrapper; | 2473 | csd->func = napi_schedule_wrapper; |
2474 | csd->info = &droq->napi; | 2474 | csd->info = &droq->napi; |
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index 6efd139b894d..f91bc84d1719 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h | |||
@@ -328,7 +328,7 @@ struct octeon_droq { | |||
328 | 328 | ||
329 | u32 cpu_id; | 329 | u32 cpu_id; |
330 | 330 | ||
331 | struct call_single_data csd; | 331 | call_single_data_t csd; |
332 | }; | 332 | }; |
333 | 333 | ||
334 | #define OCT_DROQ_SIZE (sizeof(struct octeon_droq)) | 334 | #define OCT_DROQ_SIZE (sizeof(struct octeon_droq)) |
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 3d424a51cabb..f0fd3adb1693 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c | |||
@@ -446,14 +446,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | |||
446 | 446 | ||
447 | ovl_path_upper(dentry, &upperpath); | 447 | ovl_path_upper(dentry, &upperpath); |
448 | realfile = ovl_path_open(&upperpath, O_RDONLY); | 448 | realfile = ovl_path_open(&upperpath, O_RDONLY); |
449 | smp_mb__before_spinlock(); | 449 | |
450 | inode_lock(inode); | 450 | inode_lock(inode); |
451 | if (!od->upperfile) { | 451 | if (!od->upperfile) { |
452 | if (IS_ERR(realfile)) { | 452 | if (IS_ERR(realfile)) { |
453 | inode_unlock(inode); | 453 | inode_unlock(inode); |
454 | return PTR_ERR(realfile); | 454 | return PTR_ERR(realfile); |
455 | } | 455 | } |
456 | od->upperfile = realfile; | 456 | smp_store_release(&od->upperfile, realfile); |
457 | } else { | 457 | } else { |
458 | /* somebody has beaten us to it */ | 458 | /* somebody has beaten us to it */ |
459 | if (!IS_ERR(realfile)) | 459 | if (!IS_ERR(realfile)) |
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index b0d5897bc4e6..886085b47c75 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
@@ -109,27 +109,24 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, | |||
109 | goto out; | 109 | goto out; |
110 | WRITE_ONCE(uwq->waken, true); | 110 | WRITE_ONCE(uwq->waken, true); |
111 | /* | 111 | /* |
112 | * The implicit smp_mb__before_spinlock in try_to_wake_up() | 112 | * The Program-Order guarantees provided by the scheduler |
113 | * renders uwq->waken visible to other CPUs before the task is | 113 | * ensure uwq->waken is visible before the task is woken. |
114 | * waken. | ||
115 | */ | 114 | */ |
116 | ret = wake_up_state(wq->private, mode); | 115 | ret = wake_up_state(wq->private, mode); |
117 | if (ret) | 116 | if (ret) { |
118 | /* | 117 | /* |
119 | * Wake only once, autoremove behavior. | 118 | * Wake only once, autoremove behavior. |
120 | * | 119 | * |
121 | * After the effect of list_del_init is visible to the | 120 | * After the effect of list_del_init is visible to the other |
122 | * other CPUs, the waitqueue may disappear from under | 121 | * CPUs, the waitqueue may disappear from under us, see the |
123 | * us, see the !list_empty_careful() in | 122 | * !list_empty_careful() in handle_userfault(). |
124 | * handle_userfault(). try_to_wake_up() has an | 123 | * |
125 | * implicit smp_mb__before_spinlock, and the | 124 | * try_to_wake_up() has an implicit smp_mb(), and the |
126 | * wq->private is read before calling the extern | 125 | * wq->private is read before calling the extern function |
127 | * function "wake_up_state" (which in turns calls | 126 | * "wake_up_state" (which in turns calls try_to_wake_up). |
128 | * try_to_wake_up). While the spin_lock;spin_unlock; | ||
129 | * wouldn't be enough, the smp_mb__before_spinlock is | ||
130 | * enough to avoid an explicit smp_mb() here. | ||
131 | */ | 127 | */ |
132 | list_del_init(&wq->entry); | 128 | list_del_init(&wq->entry); |
129 | } | ||
133 | out: | 130 | out: |
134 | return ret; | 131 | return ret; |
135 | } | 132 | } |
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index dad68bf46c77..8d28eb010d0d 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h | |||
@@ -21,6 +21,8 @@ typedef struct { | |||
21 | extern long long atomic64_read(const atomic64_t *v); | 21 | extern long long atomic64_read(const atomic64_t *v); |
22 | extern void atomic64_set(atomic64_t *v, long long i); | 22 | extern void atomic64_set(atomic64_t *v, long long i); |
23 | 23 | ||
24 | #define atomic64_set_release(v, i) atomic64_set((v), (i)) | ||
25 | |||
24 | #define ATOMIC64_OP(op) \ | 26 | #define ATOMIC64_OP(op) \ |
25 | extern void atomic64_##op(long long a, atomic64_t *v); | 27 | extern void atomic64_##op(long long a, atomic64_t *v); |
26 | 28 | ||
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index bf2d34c9d804..f0d8b1c51343 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h | |||
@@ -13,7 +13,7 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | /** | 15 | /** |
16 | * futex_atomic_op_inuser() - Atomic arithmetic operation with constant | 16 | * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant |
17 | * argument and comparison of the previous | 17 | * argument and comparison of the previous |
18 | * futex value with another constant. | 18 | * futex value with another constant. |
19 | * | 19 | * |
@@ -25,18 +25,11 @@ | |||
25 | * <0 - On error | 25 | * <0 - On error |
26 | */ | 26 | */ |
27 | static inline int | 27 | static inline int |
28 | futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | 28 | arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) |
29 | { | 29 | { |
30 | int op = (encoded_op >> 28) & 7; | ||
31 | int cmp = (encoded_op >> 24) & 15; | ||
32 | int oparg = (encoded_op << 8) >> 20; | ||
33 | int cmparg = (encoded_op << 20) >> 20; | ||
34 | int oldval, ret; | 30 | int oldval, ret; |
35 | u32 tmp; | 31 | u32 tmp; |
36 | 32 | ||
37 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
38 | oparg = 1 << oparg; | ||
39 | |||
40 | preempt_disable(); | 33 | preempt_disable(); |
41 | pagefault_disable(); | 34 | pagefault_disable(); |
42 | 35 | ||
@@ -74,17 +67,9 @@ out_pagefault_enable: | |||
74 | pagefault_enable(); | 67 | pagefault_enable(); |
75 | preempt_enable(); | 68 | preempt_enable(); |
76 | 69 | ||
77 | if (ret == 0) { | 70 | if (ret == 0) |
78 | switch (cmp) { | 71 | *oval = oldval; |
79 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 72 | |
80 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
81 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
82 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
83 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
84 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
85 | default: ret = -ENOSYS; | ||
86 | } | ||
87 | } | ||
88 | return ret; | 73 | return ret; |
89 | } | 74 | } |
90 | 75 | ||
@@ -126,18 +111,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | |||
126 | 111 | ||
127 | #else | 112 | #else |
128 | static inline int | 113 | static inline int |
129 | futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | 114 | arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) |
130 | { | 115 | { |
131 | int op = (encoded_op >> 28) & 7; | ||
132 | int cmp = (encoded_op >> 24) & 15; | ||
133 | int oparg = (encoded_op << 8) >> 20; | ||
134 | int cmparg = (encoded_op << 20) >> 20; | ||
135 | int oldval = 0, ret; | 116 | int oldval = 0, ret; |
136 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | ||
137 | oparg = 1 << oparg; | ||
138 | |||
139 | if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) | ||
140 | return -EFAULT; | ||
141 | 117 | ||
142 | pagefault_disable(); | 118 | pagefault_disable(); |
143 | 119 | ||
@@ -153,17 +129,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) | |||
153 | 129 | ||
154 | pagefault_enable(); | 130 | pagefault_enable(); |
155 | 131 | ||
156 | if (!ret) { | 132 | if (!ret) |
157 | switch (cmp) { | 133 | *oval = oldval; |
158 | case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; | 134 | |
159 | case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; | ||
160 | case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; | ||
161 | case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; | ||
162 | case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; | ||
163 | case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; | ||
164 | default: ret = -ENOSYS; | ||
165 | } | ||
166 | } | ||
167 | return ret; | 135 | return ret; |
168 | } | 136 | } |
169 | 137 | ||
diff --git a/include/linux/atomic.h b/include/linux/atomic.h index c56be7410130..40d6bfec0e0d 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h | |||
@@ -38,6 +38,9 @@ | |||
38 | * Besides, if an arch has a special barrier for acquire/release, it could | 38 | * Besides, if an arch has a special barrier for acquire/release, it could |
39 | * implement its own __atomic_op_* and use the same framework for building | 39 | * implement its own __atomic_op_* and use the same framework for building |
40 | * variants | 40 | * variants |
41 | * | ||
42 | * If an architecture overrides __atomic_op_acquire() it will probably want | ||
43 | * to define smp_mb__after_spinlock(). | ||
41 | */ | 44 | */ |
42 | #ifndef __atomic_op_acquire | 45 | #ifndef __atomic_op_acquire |
43 | #define __atomic_op_acquire(op, args...) \ | 46 | #define __atomic_op_acquire(op, args...) \ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2a5d52fa90f5..4b99b13c7e68 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -134,7 +134,7 @@ typedef __u32 __bitwise req_flags_t; | |||
134 | struct request { | 134 | struct request { |
135 | struct list_head queuelist; | 135 | struct list_head queuelist; |
136 | union { | 136 | union { |
137 | struct call_single_data csd; | 137 | call_single_data_t csd; |
138 | u64 fifo_time; | 138 | u64 fifo_time; |
139 | }; | 139 | }; |
140 | 140 | ||
diff --git a/include/linux/completion.h b/include/linux/completion.h index 5d5aaae3af43..cae5400022a3 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h | |||
@@ -9,6 +9,9 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/wait.h> | 11 | #include <linux/wait.h> |
12 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
13 | #include <linux/lockdep.h> | ||
14 | #endif | ||
12 | 15 | ||
13 | /* | 16 | /* |
14 | * struct completion - structure used to maintain state for a "completion" | 17 | * struct completion - structure used to maintain state for a "completion" |
@@ -25,13 +28,53 @@ | |||
25 | struct completion { | 28 | struct completion { |
26 | unsigned int done; | 29 | unsigned int done; |
27 | wait_queue_head_t wait; | 30 | wait_queue_head_t wait; |
31 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
32 | struct lockdep_map_cross map; | ||
33 | #endif | ||
28 | }; | 34 | }; |
29 | 35 | ||
36 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
37 | static inline void complete_acquire(struct completion *x) | ||
38 | { | ||
39 | lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); | ||
40 | } | ||
41 | |||
42 | static inline void complete_release(struct completion *x) | ||
43 | { | ||
44 | lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_); | ||
45 | } | ||
46 | |||
47 | static inline void complete_release_commit(struct completion *x) | ||
48 | { | ||
49 | lock_commit_crosslock((struct lockdep_map *)&x->map); | ||
50 | } | ||
51 | |||
52 | #define init_completion(x) \ | ||
53 | do { \ | ||
54 | static struct lock_class_key __key; \ | ||
55 | lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ | ||
56 | "(complete)" #x, \ | ||
57 | &__key, 0); \ | ||
58 | __init_completion(x); \ | ||
59 | } while (0) | ||
60 | #else | ||
61 | #define init_completion(x) __init_completion(x) | ||
62 | static inline void complete_acquire(struct completion *x) {} | ||
63 | static inline void complete_release(struct completion *x) {} | ||
64 | static inline void complete_release_commit(struct completion *x) {} | ||
65 | #endif | ||
66 | |||
67 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
68 | #define COMPLETION_INITIALIZER(work) \ | ||
69 | { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ | ||
70 | STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } | ||
71 | #else | ||
30 | #define COMPLETION_INITIALIZER(work) \ | 72 | #define COMPLETION_INITIALIZER(work) \ |
31 | { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | 73 | { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } |
74 | #endif | ||
32 | 75 | ||
33 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | 76 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ |
34 | ({ init_completion(&work); work; }) | 77 | (*({ init_completion(&work); &work; })) |
35 | 78 | ||
36 | /** | 79 | /** |
37 | * DECLARE_COMPLETION - declare and initialize a completion structure | 80 | * DECLARE_COMPLETION - declare and initialize a completion structure |
@@ -70,7 +113,7 @@ struct completion { | |||
70 | * This inline function will initialize a dynamically created completion | 113 | * This inline function will initialize a dynamically created completion |
71 | * structure. | 114 | * structure. |
72 | */ | 115 | */ |
73 | static inline void init_completion(struct completion *x) | 116 | static inline void __init_completion(struct completion *x) |
74 | { | 117 | { |
75 | x->done = 0; | 118 | x->done = 0; |
76 | init_waitqueue_head(&x->wait); | 119 | init_waitqueue_head(&x->wait); |
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 898cfe2eeb42..e74655d941b7 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
@@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void) | |||
37 | return static_branch_unlikely(&cpusets_enabled_key); | 37 | return static_branch_unlikely(&cpusets_enabled_key); |
38 | } | 38 | } |
39 | 39 | ||
40 | static inline int nr_cpusets(void) | ||
41 | { | ||
42 | /* jump label reference count + the top-level cpuset */ | ||
43 | return static_key_count(&cpusets_enabled_key.key) + 1; | ||
44 | } | ||
45 | |||
46 | static inline void cpuset_inc(void) | 40 | static inline void cpuset_inc(void) |
47 | { | 41 | { |
48 | static_branch_inc(&cpusets_pre_enable_key); | 42 | static_branch_inc(&cpusets_pre_enable_key); |
diff --git a/include/linux/futex.h b/include/linux/futex.h index 7c5b694864cd..f36bfd26f998 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -54,7 +54,6 @@ union futex_key { | |||
54 | 54 | ||
55 | #ifdef CONFIG_FUTEX | 55 | #ifdef CONFIG_FUTEX |
56 | extern void exit_robust_list(struct task_struct *curr); | 56 | extern void exit_robust_list(struct task_struct *curr); |
57 | extern void exit_pi_state_list(struct task_struct *curr); | ||
58 | #ifdef CONFIG_HAVE_FUTEX_CMPXCHG | 57 | #ifdef CONFIG_HAVE_FUTEX_CMPXCHG |
59 | #define futex_cmpxchg_enabled 1 | 58 | #define futex_cmpxchg_enabled 1 |
60 | #else | 59 | #else |
@@ -64,8 +63,14 @@ extern int futex_cmpxchg_enabled; | |||
64 | static inline void exit_robust_list(struct task_struct *curr) | 63 | static inline void exit_robust_list(struct task_struct *curr) |
65 | { | 64 | { |
66 | } | 65 | } |
66 | #endif | ||
67 | |||
68 | #ifdef CONFIG_FUTEX_PI | ||
69 | extern void exit_pi_state_list(struct task_struct *curr); | ||
70 | #else | ||
67 | static inline void exit_pi_state_list(struct task_struct *curr) | 71 | static inline void exit_pi_state_list(struct task_struct *curr) |
68 | { | 72 | { |
69 | } | 73 | } |
70 | #endif | 74 | #endif |
75 | |||
71 | #endif | 76 | #endif |
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5dd1272d1ab2..5fdd93bb9300 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h | |||
@@ -23,10 +23,26 @@ | |||
23 | # define trace_softirq_context(p) ((p)->softirq_context) | 23 | # define trace_softirq_context(p) ((p)->softirq_context) |
24 | # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) | 24 | # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) |
25 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) | 25 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) |
26 | # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) | 26 | # define trace_hardirq_enter() \ |
27 | # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) | 27 | do { \ |
28 | # define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | 28 | current->hardirq_context++; \ |
29 | # define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | 29 | crossrelease_hist_start(XHLOCK_HARD); \ |
30 | } while (0) | ||
31 | # define trace_hardirq_exit() \ | ||
32 | do { \ | ||
33 | current->hardirq_context--; \ | ||
34 | crossrelease_hist_end(XHLOCK_HARD); \ | ||
35 | } while (0) | ||
36 | # define lockdep_softirq_enter() \ | ||
37 | do { \ | ||
38 | current->softirq_context++; \ | ||
39 | crossrelease_hist_start(XHLOCK_SOFT); \ | ||
40 | } while (0) | ||
41 | # define lockdep_softirq_exit() \ | ||
42 | do { \ | ||
43 | current->softirq_context--; \ | ||
44 | crossrelease_hist_end(XHLOCK_SOFT); \ | ||
45 | } while (0) | ||
30 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | 46 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, |
31 | #else | 47 | #else |
32 | # define trace_hardirqs_on() do { } while (0) | 48 | # define trace_hardirqs_on() do { } while (0) |
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 2afd74b9d844..cd5861651b17 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h | |||
@@ -163,6 +163,8 @@ extern void jump_label_apply_nops(struct module *mod); | |||
163 | extern int static_key_count(struct static_key *key); | 163 | extern int static_key_count(struct static_key *key); |
164 | extern void static_key_enable(struct static_key *key); | 164 | extern void static_key_enable(struct static_key *key); |
165 | extern void static_key_disable(struct static_key *key); | 165 | extern void static_key_disable(struct static_key *key); |
166 | extern void static_key_enable_cpuslocked(struct static_key *key); | ||
167 | extern void static_key_disable_cpuslocked(struct static_key *key); | ||
166 | 168 | ||
167 | /* | 169 | /* |
168 | * We should be using ATOMIC_INIT() for initializing .enabled, but | 170 | * We should be using ATOMIC_INIT() for initializing .enabled, but |
@@ -234,24 +236,29 @@ static inline int jump_label_apply_nops(struct module *mod) | |||
234 | 236 | ||
235 | static inline void static_key_enable(struct static_key *key) | 237 | static inline void static_key_enable(struct static_key *key) |
236 | { | 238 | { |
237 | int count = static_key_count(key); | 239 | STATIC_KEY_CHECK_USE(); |
238 | |||
239 | WARN_ON_ONCE(count < 0 || count > 1); | ||
240 | 240 | ||
241 | if (!count) | 241 | if (atomic_read(&key->enabled) != 0) { |
242 | static_key_slow_inc(key); | 242 | WARN_ON_ONCE(atomic_read(&key->enabled) != 1); |
243 | return; | ||
244 | } | ||
245 | atomic_set(&key->enabled, 1); | ||
243 | } | 246 | } |
244 | 247 | ||
245 | static inline void static_key_disable(struct static_key *key) | 248 | static inline void static_key_disable(struct static_key *key) |
246 | { | 249 | { |
247 | int count = static_key_count(key); | 250 | STATIC_KEY_CHECK_USE(); |
248 | |||
249 | WARN_ON_ONCE(count < 0 || count > 1); | ||
250 | 251 | ||
251 | if (count) | 252 | if (atomic_read(&key->enabled) != 1) { |
252 | static_key_slow_dec(key); | 253 | WARN_ON_ONCE(atomic_read(&key->enabled) != 0); |
254 | return; | ||
255 | } | ||
256 | atomic_set(&key->enabled, 0); | ||
253 | } | 257 | } |
254 | 258 | ||
259 | #define static_key_enable_cpuslocked(k) static_key_enable((k)) | ||
260 | #define static_key_disable_cpuslocked(k) static_key_disable((k)) | ||
261 | |||
255 | #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } | 262 | #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } |
256 | #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } | 263 | #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } |
257 | 264 | ||
@@ -413,8 +420,10 @@ extern bool ____wrong_branch_error(void); | |||
413 | * Normal usage; boolean enable/disable. | 420 | * Normal usage; boolean enable/disable. |
414 | */ | 421 | */ |
415 | 422 | ||
416 | #define static_branch_enable(x) static_key_enable(&(x)->key) | 423 | #define static_branch_enable(x) static_key_enable(&(x)->key) |
417 | #define static_branch_disable(x) static_key_disable(&(x)->key) | 424 | #define static_branch_disable(x) static_key_disable(&(x)->key) |
425 | #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) | ||
426 | #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) | ||
418 | 427 | ||
419 | #endif /* __ASSEMBLY__ */ | 428 | #endif /* __ASSEMBLY__ */ |
420 | 429 | ||
diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h index b7f8aced7870..41960fecf783 100644 --- a/include/linux/kasan-checks.h +++ b/include/linux/kasan-checks.h | |||
@@ -2,11 +2,13 @@ | |||
2 | #define _LINUX_KASAN_CHECKS_H | 2 | #define _LINUX_KASAN_CHECKS_H |
3 | 3 | ||
4 | #ifdef CONFIG_KASAN | 4 | #ifdef CONFIG_KASAN |
5 | void kasan_check_read(const void *p, unsigned int size); | 5 | void kasan_check_read(const volatile void *p, unsigned int size); |
6 | void kasan_check_write(const void *p, unsigned int size); | 6 | void kasan_check_write(const volatile void *p, unsigned int size); |
7 | #else | 7 | #else |
8 | static inline void kasan_check_read(const void *p, unsigned int size) { } | 8 | static inline void kasan_check_read(const volatile void *p, unsigned int size) |
9 | static inline void kasan_check_write(const void *p, unsigned int size) { } | 9 | { } |
10 | static inline void kasan_check_write(const volatile void *p, unsigned int size) | ||
11 | { } | ||
10 | #endif | 12 | #endif |
11 | 13 | ||
12 | #endif | 14 | #endif |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bd6d96cf80b1..6607225d0ea4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -277,6 +277,13 @@ extern int oops_may_print(void); | |||
277 | void do_exit(long error_code) __noreturn; | 277 | void do_exit(long error_code) __noreturn; |
278 | void complete_and_exit(struct completion *, long) __noreturn; | 278 | void complete_and_exit(struct completion *, long) __noreturn; |
279 | 279 | ||
280 | #ifdef CONFIG_ARCH_HAS_REFCOUNT | ||
281 | void refcount_error_report(struct pt_regs *regs, const char *err); | ||
282 | #else | ||
283 | static inline void refcount_error_report(struct pt_regs *regs, const char *err) | ||
284 | { } | ||
285 | #endif | ||
286 | |||
280 | /* Internal, do not use. */ | 287 | /* Internal, do not use. */ |
281 | int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); | 288 | int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); |
282 | int __must_check _kstrtol(const char *s, unsigned int base, long *res); | 289 | int __must_check _kstrtol(const char *s, unsigned int base, long *res); |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fffe49f188e6..bfa8e0b0d6f1 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
@@ -18,6 +18,8 @@ extern int lock_stat; | |||
18 | 18 | ||
19 | #define MAX_LOCKDEP_SUBCLASSES 8UL | 19 | #define MAX_LOCKDEP_SUBCLASSES 8UL |
20 | 20 | ||
21 | #include <linux/types.h> | ||
22 | |||
21 | #ifdef CONFIG_LOCKDEP | 23 | #ifdef CONFIG_LOCKDEP |
22 | 24 | ||
23 | #include <linux/linkage.h> | 25 | #include <linux/linkage.h> |
@@ -29,7 +31,7 @@ extern int lock_stat; | |||
29 | * We'd rather not expose kernel/lockdep_states.h this wide, but we do need | 31 | * We'd rather not expose kernel/lockdep_states.h this wide, but we do need |
30 | * the total number of states... :-( | 32 | * the total number of states... :-( |
31 | */ | 33 | */ |
32 | #define XXX_LOCK_USAGE_STATES (1+3*4) | 34 | #define XXX_LOCK_USAGE_STATES (1+2*4) |
33 | 35 | ||
34 | /* | 36 | /* |
35 | * NR_LOCKDEP_CACHING_CLASSES ... Number of classes | 37 | * NR_LOCKDEP_CACHING_CLASSES ... Number of classes |
@@ -155,6 +157,12 @@ struct lockdep_map { | |||
155 | int cpu; | 157 | int cpu; |
156 | unsigned long ip; | 158 | unsigned long ip; |
157 | #endif | 159 | #endif |
160 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
161 | /* | ||
162 | * Whether it's a crosslock. | ||
163 | */ | ||
164 | int cross; | ||
165 | #endif | ||
158 | }; | 166 | }; |
159 | 167 | ||
160 | static inline void lockdep_copy_map(struct lockdep_map *to, | 168 | static inline void lockdep_copy_map(struct lockdep_map *to, |
@@ -258,8 +266,95 @@ struct held_lock { | |||
258 | unsigned int hardirqs_off:1; | 266 | unsigned int hardirqs_off:1; |
259 | unsigned int references:12; /* 32 bits */ | 267 | unsigned int references:12; /* 32 bits */ |
260 | unsigned int pin_count; | 268 | unsigned int pin_count; |
269 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
270 | /* | ||
271 | * Generation id. | ||
272 | * | ||
273 | * A value of cross_gen_id will be stored when holding this, | ||
274 | * which is globally increased whenever each crosslock is held. | ||
275 | */ | ||
276 | unsigned int gen_id; | ||
277 | #endif | ||
278 | }; | ||
279 | |||
280 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
281 | #define MAX_XHLOCK_TRACE_ENTRIES 5 | ||
282 | |||
283 | /* | ||
284 | * This is for keeping locks waiting for commit so that true dependencies | ||
285 | * can be added at commit step. | ||
286 | */ | ||
287 | struct hist_lock { | ||
288 | /* | ||
289 | * Id for each entry in the ring buffer. This is used to | ||
290 | * decide whether the ring buffer was overwritten or not. | ||
291 | * | ||
292 | * For example, | ||
293 | * | ||
294 | * |<----------- hist_lock ring buffer size ------->| | ||
295 | * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii | ||
296 | * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii....................... | ||
297 | * | ||
298 | * where 'p' represents an acquisition in process | ||
299 | * context, 'i' represents an acquisition in irq | ||
300 | * context. | ||
301 | * | ||
302 | * In this example, the ring buffer was overwritten by | ||
303 | * acquisitions in irq context, that should be detected on | ||
304 | * rollback or commit. | ||
305 | */ | ||
306 | unsigned int hist_id; | ||
307 | |||
308 | /* | ||
309 | * Seperate stack_trace data. This will be used at commit step. | ||
310 | */ | ||
311 | struct stack_trace trace; | ||
312 | unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES]; | ||
313 | |||
314 | /* | ||
315 | * Seperate hlock instance. This will be used at commit step. | ||
316 | * | ||
317 | * TODO: Use a smaller data structure containing only necessary | ||
318 | * data. However, we should make lockdep code able to handle the | ||
319 | * smaller one first. | ||
320 | */ | ||
321 | struct held_lock hlock; | ||
322 | }; | ||
323 | |||
324 | /* | ||
325 | * To initialize a lock as crosslock, lockdep_init_map_crosslock() should | ||
326 | * be called instead of lockdep_init_map(). | ||
327 | */ | ||
328 | struct cross_lock { | ||
329 | /* | ||
330 | * When more than one acquisition of crosslocks are overlapped, | ||
331 | * we have to perform commit for them based on cross_gen_id of | ||
332 | * the first acquisition, which allows us to add more true | ||
333 | * dependencies. | ||
334 | * | ||
335 | * Moreover, when no acquisition of a crosslock is in progress, | ||
336 | * we should not perform commit because the lock might not exist | ||
337 | * any more, which might cause incorrect memory access. So we | ||
338 | * have to track the number of acquisitions of a crosslock. | ||
339 | */ | ||
340 | int nr_acquire; | ||
341 | |||
342 | /* | ||
343 | * Seperate hlock instance. This will be used at commit step. | ||
344 | * | ||
345 | * TODO: Use a smaller data structure containing only necessary | ||
346 | * data. However, we should make lockdep code able to handle the | ||
347 | * smaller one first. | ||
348 | */ | ||
349 | struct held_lock hlock; | ||
261 | }; | 350 | }; |
262 | 351 | ||
352 | struct lockdep_map_cross { | ||
353 | struct lockdep_map map; | ||
354 | struct cross_lock xlock; | ||
355 | }; | ||
356 | #endif | ||
357 | |||
263 | /* | 358 | /* |
264 | * Initialization, self-test and debugging-output methods: | 359 | * Initialization, self-test and debugging-output methods: |
265 | */ | 360 | */ |
@@ -282,13 +377,6 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
282 | struct lock_class_key *key, int subclass); | 377 | struct lock_class_key *key, int subclass); |
283 | 378 | ||
284 | /* | 379 | /* |
285 | * To initialize a lockdep_map statically use this macro. | ||
286 | * Note that _name must not be NULL. | ||
287 | */ | ||
288 | #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ | ||
289 | { .name = (_name), .key = (void *)(_key), } | ||
290 | |||
291 | /* | ||
292 | * Reinitialize a lock key - for cases where there is special locking or | 380 | * Reinitialize a lock key - for cases where there is special locking or |
293 | * special initialization of locks so that the validator gets the scope | 381 | * special initialization of locks so that the validator gets the scope |
294 | * of dependencies wrong: they are either too broad (they need a class-split) | 382 | * of dependencies wrong: they are either too broad (they need a class-split) |
@@ -363,10 +451,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock, | |||
363 | 451 | ||
364 | extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); | 452 | extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); |
365 | 453 | ||
366 | extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); | ||
367 | extern void lockdep_clear_current_reclaim_state(void); | ||
368 | extern void lockdep_trace_alloc(gfp_t mask); | ||
369 | |||
370 | struct pin_cookie { unsigned int val; }; | 454 | struct pin_cookie { unsigned int val; }; |
371 | 455 | ||
372 | #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } | 456 | #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } |
@@ -375,7 +459,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); | |||
375 | extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); | 459 | extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); |
376 | extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); | 460 | extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); |
377 | 461 | ||
378 | # define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, | 462 | # define INIT_LOCKDEP .lockdep_recursion = 0, |
379 | 463 | ||
380 | #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) | 464 | #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) |
381 | 465 | ||
@@ -416,9 +500,6 @@ static inline void lockdep_on(void) | |||
416 | # define lock_downgrade(l, i) do { } while (0) | 500 | # define lock_downgrade(l, i) do { } while (0) |
417 | # define lock_set_class(l, n, k, s, i) do { } while (0) | 501 | # define lock_set_class(l, n, k, s, i) do { } while (0) |
418 | # define lock_set_subclass(l, s, i) do { } while (0) | 502 | # define lock_set_subclass(l, s, i) do { } while (0) |
419 | # define lockdep_set_current_reclaim_state(g) do { } while (0) | ||
420 | # define lockdep_clear_current_reclaim_state() do { } while (0) | ||
421 | # define lockdep_trace_alloc(g) do { } while (0) | ||
422 | # define lockdep_info() do { } while (0) | 503 | # define lockdep_info() do { } while (0) |
423 | # define lockdep_init_map(lock, name, key, sub) \ | 504 | # define lockdep_init_map(lock, name, key, sub) \ |
424 | do { (void)(name); (void)(key); } while (0) | 505 | do { (void)(name); (void)(key); } while (0) |
@@ -467,6 +548,58 @@ struct pin_cookie { }; | |||
467 | 548 | ||
468 | #endif /* !LOCKDEP */ | 549 | #endif /* !LOCKDEP */ |
469 | 550 | ||
551 | enum xhlock_context_t { | ||
552 | XHLOCK_HARD, | ||
553 | XHLOCK_SOFT, | ||
554 | XHLOCK_CTX_NR, | ||
555 | }; | ||
556 | |||
557 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
558 | extern void lockdep_init_map_crosslock(struct lockdep_map *lock, | ||
559 | const char *name, | ||
560 | struct lock_class_key *key, | ||
561 | int subclass); | ||
562 | extern void lock_commit_crosslock(struct lockdep_map *lock); | ||
563 | |||
564 | /* | ||
565 | * What we essencially have to initialize is 'nr_acquire'. Other members | ||
566 | * will be initialized in add_xlock(). | ||
567 | */ | ||
568 | #define STATIC_CROSS_LOCK_INIT() \ | ||
569 | { .nr_acquire = 0,} | ||
570 | |||
571 | #define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \ | ||
572 | { .map.name = (_name), .map.key = (void *)(_key), \ | ||
573 | .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), } | ||
574 | |||
575 | /* | ||
576 | * To initialize a lockdep_map statically use this macro. | ||
577 | * Note that _name must not be NULL. | ||
578 | */ | ||
579 | #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ | ||
580 | { .name = (_name), .key = (void *)(_key), .cross = 0, } | ||
581 | |||
582 | extern void crossrelease_hist_start(enum xhlock_context_t c); | ||
583 | extern void crossrelease_hist_end(enum xhlock_context_t c); | ||
584 | extern void lockdep_invariant_state(bool force); | ||
585 | extern void lockdep_init_task(struct task_struct *task); | ||
586 | extern void lockdep_free_task(struct task_struct *task); | ||
587 | #else /* !CROSSRELEASE */ | ||
588 | #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) | ||
589 | /* | ||
590 | * To initialize a lockdep_map statically use this macro. | ||
591 | * Note that _name must not be NULL. | ||
592 | */ | ||
593 | #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ | ||
594 | { .name = (_name), .key = (void *)(_key), } | ||
595 | |||
596 | static inline void crossrelease_hist_start(enum xhlock_context_t c) {} | ||
597 | static inline void crossrelease_hist_end(enum xhlock_context_t c) {} | ||
598 | static inline void lockdep_invariant_state(bool force) {} | ||
599 | static inline void lockdep_init_task(struct task_struct *task) {} | ||
600 | static inline void lockdep_free_task(struct task_struct *task) {} | ||
601 | #endif /* CROSSRELEASE */ | ||
602 | |||
470 | #ifdef CONFIG_LOCK_STAT | 603 | #ifdef CONFIG_LOCK_STAT |
471 | 604 | ||
472 | extern void lock_contended(struct lockdep_map *lock, unsigned long ip); | 605 | extern void lock_contended(struct lockdep_map *lock, unsigned long ip); |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3cadee0a3508..57378c7cb5f8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -526,26 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, | |||
526 | extern void tlb_finish_mmu(struct mmu_gather *tlb, | 526 | extern void tlb_finish_mmu(struct mmu_gather *tlb, |
527 | unsigned long start, unsigned long end); | 527 | unsigned long start, unsigned long end); |
528 | 528 | ||
529 | /* | ||
530 | * Memory barriers to keep this state in sync are graciously provided by | ||
531 | * the page table locks, outside of which no page table modifications happen. | ||
532 | * The barriers are used to ensure the order between tlb_flush_pending updates, | ||
533 | * which happen while the lock is not taken, and the PTE updates, which happen | ||
534 | * while the lock is taken, are serialized. | ||
535 | */ | ||
536 | static inline bool mm_tlb_flush_pending(struct mm_struct *mm) | ||
537 | { | ||
538 | return atomic_read(&mm->tlb_flush_pending) > 0; | ||
539 | } | ||
540 | |||
541 | /* | ||
542 | * Returns true if there are two above TLB batching threads in parallel. | ||
543 | */ | ||
544 | static inline bool mm_tlb_flush_nested(struct mm_struct *mm) | ||
545 | { | ||
546 | return atomic_read(&mm->tlb_flush_pending) > 1; | ||
547 | } | ||
548 | |||
549 | static inline void init_tlb_flush_pending(struct mm_struct *mm) | 529 | static inline void init_tlb_flush_pending(struct mm_struct *mm) |
550 | { | 530 | { |
551 | atomic_set(&mm->tlb_flush_pending, 0); | 531 | atomic_set(&mm->tlb_flush_pending, 0); |
@@ -554,27 +534,82 @@ static inline void init_tlb_flush_pending(struct mm_struct *mm) | |||
554 | static inline void inc_tlb_flush_pending(struct mm_struct *mm) | 534 | static inline void inc_tlb_flush_pending(struct mm_struct *mm) |
555 | { | 535 | { |
556 | atomic_inc(&mm->tlb_flush_pending); | 536 | atomic_inc(&mm->tlb_flush_pending); |
557 | |||
558 | /* | 537 | /* |
559 | * Guarantee that the tlb_flush_pending increase does not leak into the | 538 | * The only time this value is relevant is when there are indeed pages |
560 | * critical section updating the page tables | 539 | * to flush. And we'll only flush pages after changing them, which |
540 | * requires the PTL. | ||
541 | * | ||
542 | * So the ordering here is: | ||
543 | * | ||
544 | * atomic_inc(&mm->tlb_flush_pending); | ||
545 | * spin_lock(&ptl); | ||
546 | * ... | ||
547 | * set_pte_at(); | ||
548 | * spin_unlock(&ptl); | ||
549 | * | ||
550 | * spin_lock(&ptl) | ||
551 | * mm_tlb_flush_pending(); | ||
552 | * .... | ||
553 | * spin_unlock(&ptl); | ||
554 | * | ||
555 | * flush_tlb_range(); | ||
556 | * atomic_dec(&mm->tlb_flush_pending); | ||
557 | * | ||
558 | * Where the increment if constrained by the PTL unlock, it thus | ||
559 | * ensures that the increment is visible if the PTE modification is | ||
560 | * visible. After all, if there is no PTE modification, nobody cares | ||
561 | * about TLB flushes either. | ||
562 | * | ||
563 | * This very much relies on users (mm_tlb_flush_pending() and | ||
564 | * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and | ||
565 | * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc | ||
566 | * locks (PPC) the unlock of one doesn't order against the lock of | ||
567 | * another PTL. | ||
568 | * | ||
569 | * The decrement is ordered by the flush_tlb_range(), such that | ||
570 | * mm_tlb_flush_pending() will not return false unless all flushes have | ||
571 | * completed. | ||
561 | */ | 572 | */ |
562 | smp_mb__before_spinlock(); | ||
563 | } | 573 | } |
564 | 574 | ||
565 | /* Clearing is done after a TLB flush, which also provides a barrier. */ | ||
566 | static inline void dec_tlb_flush_pending(struct mm_struct *mm) | 575 | static inline void dec_tlb_flush_pending(struct mm_struct *mm) |
567 | { | 576 | { |
568 | /* | 577 | /* |
569 | * Guarantee that the tlb_flush_pending does not not leak into the | 578 | * See inc_tlb_flush_pending(). |
570 | * critical section, since we must order the PTE change and changes to | 579 | * |
571 | * the pending TLB flush indication. We could have relied on TLB flush | 580 | * This cannot be smp_mb__before_atomic() because smp_mb() simply does |
572 | * as a memory barrier, but this behavior is not clearly documented. | 581 | * not order against TLB invalidate completion, which is what we need. |
582 | * | ||
583 | * Therefore we must rely on tlb_flush_*() to guarantee order. | ||
573 | */ | 584 | */ |
574 | smp_mb__before_atomic(); | ||
575 | atomic_dec(&mm->tlb_flush_pending); | 585 | atomic_dec(&mm->tlb_flush_pending); |
576 | } | 586 | } |
577 | 587 | ||
588 | static inline bool mm_tlb_flush_pending(struct mm_struct *mm) | ||
589 | { | ||
590 | /* | ||
591 | * Must be called after having acquired the PTL; orders against that | ||
592 | * PTLs release and therefore ensures that if we observe the modified | ||
593 | * PTE we must also observe the increment from inc_tlb_flush_pending(). | ||
594 | * | ||
595 | * That is, it only guarantees to return true if there is a flush | ||
596 | * pending for _this_ PTL. | ||
597 | */ | ||
598 | return atomic_read(&mm->tlb_flush_pending); | ||
599 | } | ||
600 | |||
601 | static inline bool mm_tlb_flush_nested(struct mm_struct *mm) | ||
602 | { | ||
603 | /* | ||
604 | * Similar to mm_tlb_flush_pending(), we must have acquired the PTL | ||
605 | * for which there is a TLB flush pending in order to guarantee | ||
606 | * we've seen both that PTE modification and the increment. | ||
607 | * | ||
608 | * (no requirement on actually still holding the PTL, that is irrelevant) | ||
609 | */ | ||
610 | return atomic_read(&mm->tlb_flush_pending) > 1; | ||
611 | } | ||
612 | |||
578 | struct vm_fault; | 613 | struct vm_fault; |
579 | 614 | ||
580 | struct vm_special_mapping { | 615 | struct vm_special_mapping { |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c99ba7914c0a..461bd5757af6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -2774,7 +2774,7 @@ struct softnet_data { | |||
2774 | unsigned int input_queue_head ____cacheline_aligned_in_smp; | 2774 | unsigned int input_queue_head ____cacheline_aligned_in_smp; |
2775 | 2775 | ||
2776 | /* Elements below can be accessed between CPUs for RPS/RFS */ | 2776 | /* Elements below can be accessed between CPUs for RPS/RFS */ |
2777 | struct call_single_data csd ____cacheline_aligned_in_smp; | 2777 | call_single_data_t csd ____cacheline_aligned_in_smp; |
2778 | struct softnet_data *rps_ipi_next; | 2778 | struct softnet_data *rps_ipi_next; |
2779 | unsigned int cpu; | 2779 | unsigned int cpu; |
2780 | unsigned int input_queue_tail; | 2780 | unsigned int input_queue_tail; |
diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 591792c8e5b0..48b7c9c68c4d 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h | |||
@@ -53,6 +53,9 @@ extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); | |||
53 | extern __must_check bool refcount_dec_and_test(refcount_t *r); | 53 | extern __must_check bool refcount_dec_and_test(refcount_t *r); |
54 | extern void refcount_dec(refcount_t *r); | 54 | extern void refcount_dec(refcount_t *r); |
55 | #else | 55 | #else |
56 | # ifdef CONFIG_ARCH_HAS_REFCOUNT | ||
57 | # include <asm/refcount.h> | ||
58 | # else | ||
56 | static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) | 59 | static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) |
57 | { | 60 | { |
58 | return atomic_add_unless(&r->refs, i, 0); | 61 | return atomic_add_unless(&r->refs, i, 0); |
@@ -87,6 +90,7 @@ static inline void refcount_dec(refcount_t *r) | |||
87 | { | 90 | { |
88 | atomic_dec(&r->refs); | 91 | atomic_dec(&r->refs); |
89 | } | 92 | } |
93 | # endif /* !CONFIG_ARCH_HAS_REFCOUNT */ | ||
90 | #endif /* CONFIG_REFCOUNT_FULL */ | 94 | #endif /* CONFIG_REFCOUNT_FULL */ |
91 | 95 | ||
92 | extern __must_check bool refcount_dec_if_one(refcount_t *r); | 96 | extern __must_check bool refcount_dec_if_one(refcount_t *r); |
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index ae0528b834cd..e784761a4443 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h | |||
@@ -32,6 +32,7 @@ struct rw_semaphore { | |||
32 | #define RWSEM_UNLOCKED_VALUE 0x00000000 | 32 | #define RWSEM_UNLOCKED_VALUE 0x00000000 |
33 | 33 | ||
34 | extern void __down_read(struct rw_semaphore *sem); | 34 | extern void __down_read(struct rw_semaphore *sem); |
35 | extern int __must_check __down_read_killable(struct rw_semaphore *sem); | ||
35 | extern int __down_read_trylock(struct rw_semaphore *sem); | 36 | extern int __down_read_trylock(struct rw_semaphore *sem); |
36 | extern void __down_write(struct rw_semaphore *sem); | 37 | extern void __down_write(struct rw_semaphore *sem); |
37 | extern int __must_check __down_write_killable(struct rw_semaphore *sem); | 38 | extern int __must_check __down_write_killable(struct rw_semaphore *sem); |
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index dd1d14250340..0ad7318ff299 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h | |||
@@ -44,6 +44,7 @@ struct rw_semaphore { | |||
44 | }; | 44 | }; |
45 | 45 | ||
46 | extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); | 46 | extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); |
47 | extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); | ||
47 | extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); | 48 | extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); |
48 | extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); | 49 | extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); |
49 | extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); | 50 | extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index e5fbce866073..9ba42c663fba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -847,7 +847,17 @@ struct task_struct { | |||
847 | int lockdep_depth; | 847 | int lockdep_depth; |
848 | unsigned int lockdep_recursion; | 848 | unsigned int lockdep_recursion; |
849 | struct held_lock held_locks[MAX_LOCK_DEPTH]; | 849 | struct held_lock held_locks[MAX_LOCK_DEPTH]; |
850 | gfp_t lockdep_reclaim_gfp; | 850 | #endif |
851 | |||
852 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
853 | #define MAX_XHLOCKS_NR 64UL | ||
854 | struct hist_lock *xhlocks; /* Crossrelease history locks */ | ||
855 | unsigned int xhlock_idx; | ||
856 | /* For restoring at history boundaries */ | ||
857 | unsigned int xhlock_idx_hist[XHLOCK_CTX_NR]; | ||
858 | unsigned int hist_id; | ||
859 | /* For overwrite check at each context exit */ | ||
860 | unsigned int hist_id_save[XHLOCK_CTX_NR]; | ||
851 | #endif | 861 | #endif |
852 | 862 | ||
853 | #ifdef CONFIG_UBSAN | 863 | #ifdef CONFIG_UBSAN |
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 2b24a6974847..2b0a281f9d26 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h | |||
@@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags) | |||
167 | return flags; | 167 | return flags; |
168 | } | 168 | } |
169 | 169 | ||
170 | #ifdef CONFIG_LOCKDEP | ||
171 | extern void fs_reclaim_acquire(gfp_t gfp_mask); | ||
172 | extern void fs_reclaim_release(gfp_t gfp_mask); | ||
173 | #else | ||
174 | static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } | ||
175 | static inline void fs_reclaim_release(gfp_t gfp_mask) { } | ||
176 | #endif | ||
177 | |||
170 | static inline unsigned int memalloc_noio_save(void) | 178 | static inline unsigned int memalloc_noio_save(void) |
171 | { | 179 | { |
172 | unsigned int flags = current->flags & PF_MEMALLOC_NOIO; | 180 | unsigned int flags = current->flags & PF_MEMALLOC_NOIO; |
diff --git a/include/linux/smp.h b/include/linux/smp.h index 68123c1fe549..98b1fe027fc9 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -14,13 +14,17 @@ | |||
14 | #include <linux/llist.h> | 14 | #include <linux/llist.h> |
15 | 15 | ||
16 | typedef void (*smp_call_func_t)(void *info); | 16 | typedef void (*smp_call_func_t)(void *info); |
17 | struct call_single_data { | 17 | struct __call_single_data { |
18 | struct llist_node llist; | 18 | struct llist_node llist; |
19 | smp_call_func_t func; | 19 | smp_call_func_t func; |
20 | void *info; | 20 | void *info; |
21 | unsigned int flags; | 21 | unsigned int flags; |
22 | }; | 22 | }; |
23 | 23 | ||
24 | /* Use __aligned() to avoid to use 2 cache lines for 1 csd */ | ||
25 | typedef struct __call_single_data call_single_data_t | ||
26 | __aligned(sizeof(struct __call_single_data)); | ||
27 | |||
24 | /* total number of cpus in this system (may exceed NR_CPUS) */ | 28 | /* total number of cpus in this system (may exceed NR_CPUS) */ |
25 | extern unsigned int total_cpus; | 29 | extern unsigned int total_cpus; |
26 | 30 | ||
@@ -48,7 +52,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), | |||
48 | smp_call_func_t func, void *info, bool wait, | 52 | smp_call_func_t func, void *info, bool wait, |
49 | gfp_t gfp_flags); | 53 | gfp_t gfp_flags); |
50 | 54 | ||
51 | int smp_call_function_single_async(int cpu, struct call_single_data *csd); | 55 | int smp_call_function_single_async(int cpu, call_single_data_t *csd); |
52 | 56 | ||
53 | #ifdef CONFIG_SMP | 57 | #ifdef CONFIG_SMP |
54 | 58 | ||
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index ef018a6e4985..69e079c5ff98 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h | |||
@@ -118,16 +118,39 @@ do { \ | |||
118 | #endif | 118 | #endif |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * Despite its name it doesn't necessarily has to be a full barrier. | 121 | * This barrier must provide two things: |
122 | * It should only guarantee that a STORE before the critical section | 122 | * |
123 | * can not be reordered with LOADs and STOREs inside this section. | 123 | * - it must guarantee a STORE before the spin_lock() is ordered against a |
124 | * spin_lock() is the one-way barrier, this LOAD can not escape out | 124 | * LOAD after it, see the comments at its two usage sites. |
125 | * of the region. So the default implementation simply ensures that | 125 | * |
126 | * a STORE can not move into the critical section, smp_wmb() should | 126 | * - it must ensure the critical section is RCsc. |
127 | * serialize it with another STORE done by spin_lock(). | 127 | * |
128 | * The latter is important for cases where we observe values written by other | ||
129 | * CPUs in spin-loops, without barriers, while being subject to scheduling. | ||
130 | * | ||
131 | * CPU0 CPU1 CPU2 | ||
132 | * | ||
133 | * for (;;) { | ||
134 | * if (READ_ONCE(X)) | ||
135 | * break; | ||
136 | * } | ||
137 | * X=1 | ||
138 | * <sched-out> | ||
139 | * <sched-in> | ||
140 | * r = X; | ||
141 | * | ||
142 | * without transitivity it could be that CPU1 observes X!=0 breaks the loop, | ||
143 | * we get migrated and CPU2 sees X==0. | ||
144 | * | ||
145 | * Since most load-store architectures implement ACQUIRE with an smp_mb() after | ||
146 | * the LL/SC loop, they need no further barriers. Similarly all our TSO | ||
147 | * architectures imply an smp_mb() for each atomic instruction and equally don't | ||
148 | * need more. | ||
149 | * | ||
150 | * Architectures that can implement ACQUIRE better need to take care. | ||
128 | */ | 151 | */ |
129 | #ifndef smp_mb__before_spinlock | 152 | #ifndef smp_mb__after_spinlock |
130 | #define smp_mb__before_spinlock() smp_wmb() | 153 | #define smp_mb__after_spinlock() do { } while (0) |
131 | #endif | 154 | #endif |
132 | 155 | ||
133 | #ifdef CONFIG_DEBUG_SPINLOCK | 156 | #ifdef CONFIG_DEBUG_SPINLOCK |
diff --git a/init/Kconfig b/init/Kconfig index 8514b25db21c..5f0ef850e808 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1275,12 +1275,17 @@ config BASE_FULL | |||
1275 | config FUTEX | 1275 | config FUTEX |
1276 | bool "Enable futex support" if EXPERT | 1276 | bool "Enable futex support" if EXPERT |
1277 | default y | 1277 | default y |
1278 | select RT_MUTEXES | 1278 | imply RT_MUTEXES |
1279 | help | 1279 | help |
1280 | Disabling this option will cause the kernel to be built without | 1280 | Disabling this option will cause the kernel to be built without |
1281 | support for "fast userspace mutexes". The resulting kernel may not | 1281 | support for "fast userspace mutexes". The resulting kernel may not |
1282 | run glibc-based applications correctly. | 1282 | run glibc-based applications correctly. |
1283 | 1283 | ||
1284 | config FUTEX_PI | ||
1285 | bool | ||
1286 | depends on FUTEX && RT_MUTEXES | ||
1287 | default y | ||
1288 | |||
1284 | config HAVE_FUTEX_CMPXCHG | 1289 | config HAVE_FUTEX_CMPXCHG |
1285 | bool | 1290 | bool |
1286 | depends on FUTEX | 1291 | depends on FUTEX |
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index df403e97b073..2f4039bafebb 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
@@ -577,6 +577,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, | |||
577 | rcu_read_unlock(); | 577 | rcu_read_unlock(); |
578 | } | 578 | } |
579 | 579 | ||
580 | /* Must be called with cpuset_mutex held. */ | ||
581 | static inline int nr_cpusets(void) | ||
582 | { | ||
583 | /* jump label reference count + the top-level cpuset */ | ||
584 | return static_key_count(&cpusets_enabled_key.key) + 1; | ||
585 | } | ||
586 | |||
580 | /* | 587 | /* |
581 | * generate_sched_domains() | 588 | * generate_sched_domains() |
582 | * | 589 | * |
diff --git a/kernel/exit.c b/kernel/exit.c index f9ef3ecc78c1..a35d8a17e01f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -918,6 +918,7 @@ void __noreturn do_exit(long code) | |||
918 | exit_rcu(); | 918 | exit_rcu(); |
919 | exit_tasks_rcu_finish(); | 919 | exit_tasks_rcu_finish(); |
920 | 920 | ||
921 | lockdep_free_task(tsk); | ||
921 | do_task_dead(); | 922 | do_task_dead(); |
922 | } | 923 | } |
923 | EXPORT_SYMBOL_GPL(do_exit); | 924 | EXPORT_SYMBOL_GPL(do_exit); |
diff --git a/kernel/fork.c b/kernel/fork.c index b7e9e57b71ea..dab73d18bc4d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -484,6 +484,8 @@ void __init fork_init(void) | |||
484 | cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", | 484 | cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", |
485 | NULL, free_vm_stack_cache); | 485 | NULL, free_vm_stack_cache); |
486 | #endif | 486 | #endif |
487 | |||
488 | lockdep_init_task(&init_task); | ||
487 | } | 489 | } |
488 | 490 | ||
489 | int __weak arch_dup_task_struct(struct task_struct *dst, | 491 | int __weak arch_dup_task_struct(struct task_struct *dst, |
@@ -1700,6 +1702,7 @@ static __latent_entropy struct task_struct *copy_process( | |||
1700 | p->lockdep_depth = 0; /* no locks held yet */ | 1702 | p->lockdep_depth = 0; /* no locks held yet */ |
1701 | p->curr_chain_key = 0; | 1703 | p->curr_chain_key = 0; |
1702 | p->lockdep_recursion = 0; | 1704 | p->lockdep_recursion = 0; |
1705 | lockdep_init_task(p); | ||
1703 | #endif | 1706 | #endif |
1704 | 1707 | ||
1705 | #ifdef CONFIG_DEBUG_MUTEXES | 1708 | #ifdef CONFIG_DEBUG_MUTEXES |
@@ -1958,6 +1961,7 @@ bad_fork_cleanup_audit: | |||
1958 | bad_fork_cleanup_perf: | 1961 | bad_fork_cleanup_perf: |
1959 | perf_event_free_task(p); | 1962 | perf_event_free_task(p); |
1960 | bad_fork_cleanup_policy: | 1963 | bad_fork_cleanup_policy: |
1964 | lockdep_free_task(p); | ||
1961 | #ifdef CONFIG_NUMA | 1965 | #ifdef CONFIG_NUMA |
1962 | mpol_put(p->mempolicy); | 1966 | mpol_put(p->mempolicy); |
1963 | bad_fork_cleanup_threadgroup_lock: | 1967 | bad_fork_cleanup_threadgroup_lock: |
diff --git a/kernel/futex.c b/kernel/futex.c index f50b434756c1..3d38eaf05492 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -876,6 +876,8 @@ static struct task_struct *futex_find_get_task(pid_t pid) | |||
876 | return p; | 876 | return p; |
877 | } | 877 | } |
878 | 878 | ||
879 | #ifdef CONFIG_FUTEX_PI | ||
880 | |||
879 | /* | 881 | /* |
880 | * This task is holding PI mutexes at exit time => bad. | 882 | * This task is holding PI mutexes at exit time => bad. |
881 | * Kernel cleans up PI-state, but userspace is likely hosed. | 883 | * Kernel cleans up PI-state, but userspace is likely hosed. |
@@ -933,6 +935,8 @@ void exit_pi_state_list(struct task_struct *curr) | |||
933 | raw_spin_unlock_irq(&curr->pi_lock); | 935 | raw_spin_unlock_irq(&curr->pi_lock); |
934 | } | 936 | } |
935 | 937 | ||
938 | #endif | ||
939 | |||
936 | /* | 940 | /* |
937 | * We need to check the following states: | 941 | * We need to check the following states: |
938 | * | 942 | * |
@@ -1547,6 +1551,45 @@ out: | |||
1547 | return ret; | 1551 | return ret; |
1548 | } | 1552 | } |
1549 | 1553 | ||
1554 | static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) | ||
1555 | { | ||
1556 | unsigned int op = (encoded_op & 0x70000000) >> 28; | ||
1557 | unsigned int cmp = (encoded_op & 0x0f000000) >> 24; | ||
1558 | int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 12); | ||
1559 | int cmparg = sign_extend32(encoded_op & 0x00000fff, 12); | ||
1560 | int oldval, ret; | ||
1561 | |||
1562 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { | ||
1563 | if (oparg < 0 || oparg > 31) | ||
1564 | return -EINVAL; | ||
1565 | oparg = 1 << oparg; | ||
1566 | } | ||
1567 | |||
1568 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
1569 | return -EFAULT; | ||
1570 | |||
1571 | ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); | ||
1572 | if (ret) | ||
1573 | return ret; | ||
1574 | |||
1575 | switch (cmp) { | ||
1576 | case FUTEX_OP_CMP_EQ: | ||
1577 | return oldval == cmparg; | ||
1578 | case FUTEX_OP_CMP_NE: | ||
1579 | return oldval != cmparg; | ||
1580 | case FUTEX_OP_CMP_LT: | ||
1581 | return oldval < cmparg; | ||
1582 | case FUTEX_OP_CMP_GE: | ||
1583 | return oldval >= cmparg; | ||
1584 | case FUTEX_OP_CMP_LE: | ||
1585 | return oldval <= cmparg; | ||
1586 | case FUTEX_OP_CMP_GT: | ||
1587 | return oldval > cmparg; | ||
1588 | default: | ||
1589 | return -ENOSYS; | ||
1590 | } | ||
1591 | } | ||
1592 | |||
1550 | /* | 1593 | /* |
1551 | * Wake up all waiters hashed on the physical page that is mapped | 1594 | * Wake up all waiters hashed on the physical page that is mapped |
1552 | * to this virtual address: | 1595 | * to this virtual address: |
@@ -1800,6 +1843,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
1800 | struct futex_q *this, *next; | 1843 | struct futex_q *this, *next; |
1801 | DEFINE_WAKE_Q(wake_q); | 1844 | DEFINE_WAKE_Q(wake_q); |
1802 | 1845 | ||
1846 | /* | ||
1847 | * When PI not supported: return -ENOSYS if requeue_pi is true, | ||
1848 | * consequently the compiler knows requeue_pi is always false past | ||
1849 | * this point which will optimize away all the conditional code | ||
1850 | * further down. | ||
1851 | */ | ||
1852 | if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi) | ||
1853 | return -ENOSYS; | ||
1854 | |||
1803 | if (requeue_pi) { | 1855 | if (requeue_pi) { |
1804 | /* | 1856 | /* |
1805 | * Requeue PI only works on two distinct uaddrs. This | 1857 | * Requeue PI only works on two distinct uaddrs. This |
@@ -2595,6 +2647,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, | |||
2595 | struct futex_q q = futex_q_init; | 2647 | struct futex_q q = futex_q_init; |
2596 | int res, ret; | 2648 | int res, ret; |
2597 | 2649 | ||
2650 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | ||
2651 | return -ENOSYS; | ||
2652 | |||
2598 | if (refill_pi_state_cache()) | 2653 | if (refill_pi_state_cache()) |
2599 | return -ENOMEM; | 2654 | return -ENOMEM; |
2600 | 2655 | ||
@@ -2774,6 +2829,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) | |||
2774 | struct futex_q *top_waiter; | 2829 | struct futex_q *top_waiter; |
2775 | int ret; | 2830 | int ret; |
2776 | 2831 | ||
2832 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | ||
2833 | return -ENOSYS; | ||
2834 | |||
2777 | retry: | 2835 | retry: |
2778 | if (get_user(uval, uaddr)) | 2836 | if (get_user(uval, uaddr)) |
2779 | return -EFAULT; | 2837 | return -EFAULT; |
@@ -2984,6 +3042,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |||
2984 | struct futex_q q = futex_q_init; | 3042 | struct futex_q q = futex_q_init; |
2985 | int res, ret; | 3043 | int res, ret; |
2986 | 3044 | ||
3045 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | ||
3046 | return -ENOSYS; | ||
3047 | |||
2987 | if (uaddr == uaddr2) | 3048 | if (uaddr == uaddr2) |
2988 | return -EINVAL; | 3049 | return -EINVAL; |
2989 | 3050 | ||
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index d11c506a6ac3..0bf2e8f5244a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -79,29 +79,7 @@ int static_key_count(struct static_key *key) | |||
79 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(static_key_count); | 80 | EXPORT_SYMBOL_GPL(static_key_count); |
81 | 81 | ||
82 | void static_key_enable(struct static_key *key) | 82 | static void static_key_slow_inc_cpuslocked(struct static_key *key) |
83 | { | ||
84 | int count = static_key_count(key); | ||
85 | |||
86 | WARN_ON_ONCE(count < 0 || count > 1); | ||
87 | |||
88 | if (!count) | ||
89 | static_key_slow_inc(key); | ||
90 | } | ||
91 | EXPORT_SYMBOL_GPL(static_key_enable); | ||
92 | |||
93 | void static_key_disable(struct static_key *key) | ||
94 | { | ||
95 | int count = static_key_count(key); | ||
96 | |||
97 | WARN_ON_ONCE(count < 0 || count > 1); | ||
98 | |||
99 | if (count) | ||
100 | static_key_slow_dec(key); | ||
101 | } | ||
102 | EXPORT_SYMBOL_GPL(static_key_disable); | ||
103 | |||
104 | void static_key_slow_inc(struct static_key *key) | ||
105 | { | 83 | { |
106 | int v, v1; | 84 | int v, v1; |
107 | 85 | ||
@@ -125,24 +103,87 @@ void static_key_slow_inc(struct static_key *key) | |||
125 | return; | 103 | return; |
126 | } | 104 | } |
127 | 105 | ||
128 | cpus_read_lock(); | ||
129 | jump_label_lock(); | 106 | jump_label_lock(); |
130 | if (atomic_read(&key->enabled) == 0) { | 107 | if (atomic_read(&key->enabled) == 0) { |
131 | atomic_set(&key->enabled, -1); | 108 | atomic_set(&key->enabled, -1); |
132 | jump_label_update(key); | 109 | jump_label_update(key); |
133 | atomic_set(&key->enabled, 1); | 110 | /* |
111 | * Ensure that if the above cmpxchg loop observes our positive | ||
112 | * value, it must also observe all the text changes. | ||
113 | */ | ||
114 | atomic_set_release(&key->enabled, 1); | ||
134 | } else { | 115 | } else { |
135 | atomic_inc(&key->enabled); | 116 | atomic_inc(&key->enabled); |
136 | } | 117 | } |
137 | jump_label_unlock(); | 118 | jump_label_unlock(); |
119 | } | ||
120 | |||
121 | void static_key_slow_inc(struct static_key *key) | ||
122 | { | ||
123 | cpus_read_lock(); | ||
124 | static_key_slow_inc_cpuslocked(key); | ||
138 | cpus_read_unlock(); | 125 | cpus_read_unlock(); |
139 | } | 126 | } |
140 | EXPORT_SYMBOL_GPL(static_key_slow_inc); | 127 | EXPORT_SYMBOL_GPL(static_key_slow_inc); |
141 | 128 | ||
142 | static void __static_key_slow_dec(struct static_key *key, | 129 | void static_key_enable_cpuslocked(struct static_key *key) |
143 | unsigned long rate_limit, struct delayed_work *work) | 130 | { |
131 | STATIC_KEY_CHECK_USE(); | ||
132 | |||
133 | if (atomic_read(&key->enabled) > 0) { | ||
134 | WARN_ON_ONCE(atomic_read(&key->enabled) != 1); | ||
135 | return; | ||
136 | } | ||
137 | |||
138 | jump_label_lock(); | ||
139 | if (atomic_read(&key->enabled) == 0) { | ||
140 | atomic_set(&key->enabled, -1); | ||
141 | jump_label_update(key); | ||
142 | /* | ||
143 | * See static_key_slow_inc(). | ||
144 | */ | ||
145 | atomic_set_release(&key->enabled, 1); | ||
146 | } | ||
147 | jump_label_unlock(); | ||
148 | } | ||
149 | EXPORT_SYMBOL_GPL(static_key_enable_cpuslocked); | ||
150 | |||
151 | void static_key_enable(struct static_key *key) | ||
152 | { | ||
153 | cpus_read_lock(); | ||
154 | static_key_enable_cpuslocked(key); | ||
155 | cpus_read_unlock(); | ||
156 | } | ||
157 | EXPORT_SYMBOL_GPL(static_key_enable); | ||
158 | |||
159 | void static_key_disable_cpuslocked(struct static_key *key) | ||
160 | { | ||
161 | STATIC_KEY_CHECK_USE(); | ||
162 | |||
163 | if (atomic_read(&key->enabled) != 1) { | ||
164 | WARN_ON_ONCE(atomic_read(&key->enabled) != 0); | ||
165 | return; | ||
166 | } | ||
167 | |||
168 | jump_label_lock(); | ||
169 | if (atomic_cmpxchg(&key->enabled, 1, 0)) | ||
170 | jump_label_update(key); | ||
171 | jump_label_unlock(); | ||
172 | } | ||
173 | EXPORT_SYMBOL_GPL(static_key_disable_cpuslocked); | ||
174 | |||
175 | void static_key_disable(struct static_key *key) | ||
144 | { | 176 | { |
145 | cpus_read_lock(); | 177 | cpus_read_lock(); |
178 | static_key_disable_cpuslocked(key); | ||
179 | cpus_read_unlock(); | ||
180 | } | ||
181 | EXPORT_SYMBOL_GPL(static_key_disable); | ||
182 | |||
183 | static void static_key_slow_dec_cpuslocked(struct static_key *key, | ||
184 | unsigned long rate_limit, | ||
185 | struct delayed_work *work) | ||
186 | { | ||
146 | /* | 187 | /* |
147 | * The negative count check is valid even when a negative | 188 | * The negative count check is valid even when a negative |
148 | * key->enabled is in use by static_key_slow_inc(); a | 189 | * key->enabled is in use by static_key_slow_inc(); a |
@@ -153,7 +194,6 @@ static void __static_key_slow_dec(struct static_key *key, | |||
153 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { | 194 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { |
154 | WARN(atomic_read(&key->enabled) < 0, | 195 | WARN(atomic_read(&key->enabled) < 0, |
155 | "jump label: negative count!\n"); | 196 | "jump label: negative count!\n"); |
156 | cpus_read_unlock(); | ||
157 | return; | 197 | return; |
158 | } | 198 | } |
159 | 199 | ||
@@ -164,6 +204,14 @@ static void __static_key_slow_dec(struct static_key *key, | |||
164 | jump_label_update(key); | 204 | jump_label_update(key); |
165 | } | 205 | } |
166 | jump_label_unlock(); | 206 | jump_label_unlock(); |
207 | } | ||
208 | |||
209 | static void __static_key_slow_dec(struct static_key *key, | ||
210 | unsigned long rate_limit, | ||
211 | struct delayed_work *work) | ||
212 | { | ||
213 | cpus_read_lock(); | ||
214 | static_key_slow_dec_cpuslocked(key, rate_limit, work); | ||
167 | cpus_read_unlock(); | 215 | cpus_read_unlock(); |
168 | } | 216 | } |
169 | 217 | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 7d2499bec5fe..44c8d0d17170 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -58,6 +58,10 @@ | |||
58 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
59 | #include <trace/events/lock.h> | 59 | #include <trace/events/lock.h> |
60 | 60 | ||
61 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
62 | #include <linux/slab.h> | ||
63 | #endif | ||
64 | |||
61 | #ifdef CONFIG_PROVE_LOCKING | 65 | #ifdef CONFIG_PROVE_LOCKING |
62 | int prove_locking = 1; | 66 | int prove_locking = 1; |
63 | module_param(prove_locking, int, 0644); | 67 | module_param(prove_locking, int, 0644); |
@@ -344,14 +348,12 @@ EXPORT_SYMBOL(lockdep_on); | |||
344 | #if VERBOSE | 348 | #if VERBOSE |
345 | # define HARDIRQ_VERBOSE 1 | 349 | # define HARDIRQ_VERBOSE 1 |
346 | # define SOFTIRQ_VERBOSE 1 | 350 | # define SOFTIRQ_VERBOSE 1 |
347 | # define RECLAIM_VERBOSE 1 | ||
348 | #else | 351 | #else |
349 | # define HARDIRQ_VERBOSE 0 | 352 | # define HARDIRQ_VERBOSE 0 |
350 | # define SOFTIRQ_VERBOSE 0 | 353 | # define SOFTIRQ_VERBOSE 0 |
351 | # define RECLAIM_VERBOSE 0 | ||
352 | #endif | 354 | #endif |
353 | 355 | ||
354 | #if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE | 356 | #if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE |
355 | /* | 357 | /* |
356 | * Quick filtering for interesting events: | 358 | * Quick filtering for interesting events: |
357 | */ | 359 | */ |
@@ -726,6 +728,18 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
726 | return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); | 728 | return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); |
727 | } | 729 | } |
728 | 730 | ||
731 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
732 | static void cross_init(struct lockdep_map *lock, int cross); | ||
733 | static int cross_lock(struct lockdep_map *lock); | ||
734 | static int lock_acquire_crosslock(struct held_lock *hlock); | ||
735 | static int lock_release_crosslock(struct lockdep_map *lock); | ||
736 | #else | ||
737 | static inline void cross_init(struct lockdep_map *lock, int cross) {} | ||
738 | static inline int cross_lock(struct lockdep_map *lock) { return 0; } | ||
739 | static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; } | ||
740 | static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; } | ||
741 | #endif | ||
742 | |||
729 | /* | 743 | /* |
730 | * Register a lock's class in the hash-table, if the class is not present | 744 | * Register a lock's class in the hash-table, if the class is not present |
731 | * yet. Otherwise we look it up. We cache the result in the lock object | 745 | * yet. Otherwise we look it up. We cache the result in the lock object |
@@ -1125,22 +1139,41 @@ print_circular_lock_scenario(struct held_lock *src, | |||
1125 | printk(KERN_CONT "\n\n"); | 1139 | printk(KERN_CONT "\n\n"); |
1126 | } | 1140 | } |
1127 | 1141 | ||
1128 | printk(" Possible unsafe locking scenario:\n\n"); | 1142 | if (cross_lock(tgt->instance)) { |
1129 | printk(" CPU0 CPU1\n"); | 1143 | printk(" Possible unsafe locking scenario by crosslock:\n\n"); |
1130 | printk(" ---- ----\n"); | 1144 | printk(" CPU0 CPU1\n"); |
1131 | printk(" lock("); | 1145 | printk(" ---- ----\n"); |
1132 | __print_lock_name(target); | 1146 | printk(" lock("); |
1133 | printk(KERN_CONT ");\n"); | 1147 | __print_lock_name(parent); |
1134 | printk(" lock("); | 1148 | printk(KERN_CONT ");\n"); |
1135 | __print_lock_name(parent); | 1149 | printk(" lock("); |
1136 | printk(KERN_CONT ");\n"); | 1150 | __print_lock_name(target); |
1137 | printk(" lock("); | 1151 | printk(KERN_CONT ");\n"); |
1138 | __print_lock_name(target); | 1152 | printk(" lock("); |
1139 | printk(KERN_CONT ");\n"); | 1153 | __print_lock_name(source); |
1140 | printk(" lock("); | 1154 | printk(KERN_CONT ");\n"); |
1141 | __print_lock_name(source); | 1155 | printk(" unlock("); |
1142 | printk(KERN_CONT ");\n"); | 1156 | __print_lock_name(target); |
1143 | printk("\n *** DEADLOCK ***\n\n"); | 1157 | printk(KERN_CONT ");\n"); |
1158 | printk("\n *** DEADLOCK ***\n\n"); | ||
1159 | } else { | ||
1160 | printk(" Possible unsafe locking scenario:\n\n"); | ||
1161 | printk(" CPU0 CPU1\n"); | ||
1162 | printk(" ---- ----\n"); | ||
1163 | printk(" lock("); | ||
1164 | __print_lock_name(target); | ||
1165 | printk(KERN_CONT ");\n"); | ||
1166 | printk(" lock("); | ||
1167 | __print_lock_name(parent); | ||
1168 | printk(KERN_CONT ");\n"); | ||
1169 | printk(" lock("); | ||
1170 | __print_lock_name(target); | ||
1171 | printk(KERN_CONT ");\n"); | ||
1172 | printk(" lock("); | ||
1173 | __print_lock_name(source); | ||
1174 | printk(KERN_CONT ");\n"); | ||
1175 | printk("\n *** DEADLOCK ***\n\n"); | ||
1176 | } | ||
1144 | } | 1177 | } |
1145 | 1178 | ||
1146 | /* | 1179 | /* |
@@ -1165,7 +1198,12 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | |||
1165 | pr_warn("%s/%d is trying to acquire lock:\n", | 1198 | pr_warn("%s/%d is trying to acquire lock:\n", |
1166 | curr->comm, task_pid_nr(curr)); | 1199 | curr->comm, task_pid_nr(curr)); |
1167 | print_lock(check_src); | 1200 | print_lock(check_src); |
1168 | pr_warn("\nbut task is already holding lock:\n"); | 1201 | |
1202 | if (cross_lock(check_tgt->instance)) | ||
1203 | pr_warn("\nbut now in release context of a crosslock acquired at the following:\n"); | ||
1204 | else | ||
1205 | pr_warn("\nbut task is already holding lock:\n"); | ||
1206 | |||
1169 | print_lock(check_tgt); | 1207 | print_lock(check_tgt); |
1170 | pr_warn("\nwhich lock already depends on the new lock.\n\n"); | 1208 | pr_warn("\nwhich lock already depends on the new lock.\n\n"); |
1171 | pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); | 1209 | pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); |
@@ -1183,7 +1221,8 @@ static inline int class_equal(struct lock_list *entry, void *data) | |||
1183 | static noinline int print_circular_bug(struct lock_list *this, | 1221 | static noinline int print_circular_bug(struct lock_list *this, |
1184 | struct lock_list *target, | 1222 | struct lock_list *target, |
1185 | struct held_lock *check_src, | 1223 | struct held_lock *check_src, |
1186 | struct held_lock *check_tgt) | 1224 | struct held_lock *check_tgt, |
1225 | struct stack_trace *trace) | ||
1187 | { | 1226 | { |
1188 | struct task_struct *curr = current; | 1227 | struct task_struct *curr = current; |
1189 | struct lock_list *parent; | 1228 | struct lock_list *parent; |
@@ -1193,7 +1232,9 @@ static noinline int print_circular_bug(struct lock_list *this, | |||
1193 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1232 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
1194 | return 0; | 1233 | return 0; |
1195 | 1234 | ||
1196 | if (!save_trace(&this->trace)) | 1235 | if (cross_lock(check_tgt->instance)) |
1236 | this->trace = *trace; | ||
1237 | else if (!save_trace(&this->trace)) | ||
1197 | return 0; | 1238 | return 0; |
1198 | 1239 | ||
1199 | depth = get_lock_depth(target); | 1240 | depth = get_lock_depth(target); |
@@ -1309,6 +1350,19 @@ check_noncircular(struct lock_list *root, struct lock_class *target, | |||
1309 | return result; | 1350 | return result; |
1310 | } | 1351 | } |
1311 | 1352 | ||
1353 | static noinline int | ||
1354 | check_redundant(struct lock_list *root, struct lock_class *target, | ||
1355 | struct lock_list **target_entry) | ||
1356 | { | ||
1357 | int result; | ||
1358 | |||
1359 | debug_atomic_inc(nr_redundant_checks); | ||
1360 | |||
1361 | result = __bfs_forwards(root, target, class_equal, target_entry); | ||
1362 | |||
1363 | return result; | ||
1364 | } | ||
1365 | |||
1312 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | 1366 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) |
1313 | /* | 1367 | /* |
1314 | * Forwards and backwards subgraph searching, for the purposes of | 1368 | * Forwards and backwards subgraph searching, for the purposes of |
@@ -1784,6 +1838,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, | |||
1784 | if (nest) | 1838 | if (nest) |
1785 | return 2; | 1839 | return 2; |
1786 | 1840 | ||
1841 | if (cross_lock(prev->instance)) | ||
1842 | continue; | ||
1843 | |||
1787 | return print_deadlock_bug(curr, prev, next); | 1844 | return print_deadlock_bug(curr, prev, next); |
1788 | } | 1845 | } |
1789 | return 1; | 1846 | return 1; |
@@ -1813,20 +1870,13 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, | |||
1813 | */ | 1870 | */ |
1814 | static int | 1871 | static int |
1815 | check_prev_add(struct task_struct *curr, struct held_lock *prev, | 1872 | check_prev_add(struct task_struct *curr, struct held_lock *prev, |
1816 | struct held_lock *next, int distance, int *stack_saved) | 1873 | struct held_lock *next, int distance, struct stack_trace *trace, |
1874 | int (*save)(struct stack_trace *trace)) | ||
1817 | { | 1875 | { |
1818 | struct lock_list *entry; | 1876 | struct lock_list *entry; |
1819 | int ret; | 1877 | int ret; |
1820 | struct lock_list this; | 1878 | struct lock_list this; |
1821 | struct lock_list *uninitialized_var(target_entry); | 1879 | struct lock_list *uninitialized_var(target_entry); |
1822 | /* | ||
1823 | * Static variable, serialized by the graph_lock(). | ||
1824 | * | ||
1825 | * We use this static variable to save the stack trace in case | ||
1826 | * we call into this function multiple times due to encountering | ||
1827 | * trylocks in the held lock stack. | ||
1828 | */ | ||
1829 | static struct stack_trace trace; | ||
1830 | 1880 | ||
1831 | /* | 1881 | /* |
1832 | * Prove that the new <prev> -> <next> dependency would not | 1882 | * Prove that the new <prev> -> <next> dependency would not |
@@ -1841,7 +1891,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
1841 | this.parent = NULL; | 1891 | this.parent = NULL; |
1842 | ret = check_noncircular(&this, hlock_class(prev), &target_entry); | 1892 | ret = check_noncircular(&this, hlock_class(prev), &target_entry); |
1843 | if (unlikely(!ret)) | 1893 | if (unlikely(!ret)) |
1844 | return print_circular_bug(&this, target_entry, next, prev); | 1894 | return print_circular_bug(&this, target_entry, next, prev, trace); |
1845 | else if (unlikely(ret < 0)) | 1895 | else if (unlikely(ret < 0)) |
1846 | return print_bfs_bug(ret); | 1896 | return print_bfs_bug(ret); |
1847 | 1897 | ||
@@ -1870,15 +1920,26 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
1870 | if (entry->class == hlock_class(next)) { | 1920 | if (entry->class == hlock_class(next)) { |
1871 | if (distance == 1) | 1921 | if (distance == 1) |
1872 | entry->distance = 1; | 1922 | entry->distance = 1; |
1873 | return 2; | 1923 | return 1; |
1874 | } | 1924 | } |
1875 | } | 1925 | } |
1876 | 1926 | ||
1877 | if (!*stack_saved) { | 1927 | /* |
1878 | if (!save_trace(&trace)) | 1928 | * Is the <prev> -> <next> link redundant? |
1879 | return 0; | 1929 | */ |
1880 | *stack_saved = 1; | 1930 | this.class = hlock_class(prev); |
1931 | this.parent = NULL; | ||
1932 | ret = check_redundant(&this, hlock_class(next), &target_entry); | ||
1933 | if (!ret) { | ||
1934 | debug_atomic_inc(nr_redundant); | ||
1935 | return 2; | ||
1881 | } | 1936 | } |
1937 | if (ret < 0) | ||
1938 | return print_bfs_bug(ret); | ||
1939 | |||
1940 | |||
1941 | if (save && !save(trace)) | ||
1942 | return 0; | ||
1882 | 1943 | ||
1883 | /* | 1944 | /* |
1884 | * Ok, all validations passed, add the new lock | 1945 | * Ok, all validations passed, add the new lock |
@@ -1886,14 +1947,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
1886 | */ | 1947 | */ |
1887 | ret = add_lock_to_list(hlock_class(next), | 1948 | ret = add_lock_to_list(hlock_class(next), |
1888 | &hlock_class(prev)->locks_after, | 1949 | &hlock_class(prev)->locks_after, |
1889 | next->acquire_ip, distance, &trace); | 1950 | next->acquire_ip, distance, trace); |
1890 | 1951 | ||
1891 | if (!ret) | 1952 | if (!ret) |
1892 | return 0; | 1953 | return 0; |
1893 | 1954 | ||
1894 | ret = add_lock_to_list(hlock_class(prev), | 1955 | ret = add_lock_to_list(hlock_class(prev), |
1895 | &hlock_class(next)->locks_before, | 1956 | &hlock_class(next)->locks_before, |
1896 | next->acquire_ip, distance, &trace); | 1957 | next->acquire_ip, distance, trace); |
1897 | if (!ret) | 1958 | if (!ret) |
1898 | return 0; | 1959 | return 0; |
1899 | 1960 | ||
@@ -1901,8 +1962,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
1901 | * Debugging printouts: | 1962 | * Debugging printouts: |
1902 | */ | 1963 | */ |
1903 | if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { | 1964 | if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { |
1904 | /* We drop graph lock, so another thread can overwrite trace. */ | ||
1905 | *stack_saved = 0; | ||
1906 | graph_unlock(); | 1965 | graph_unlock(); |
1907 | printk("\n new dependency: "); | 1966 | printk("\n new dependency: "); |
1908 | print_lock_name(hlock_class(prev)); | 1967 | print_lock_name(hlock_class(prev)); |
@@ -1910,9 +1969,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
1910 | print_lock_name(hlock_class(next)); | 1969 | print_lock_name(hlock_class(next)); |
1911 | printk(KERN_CONT "\n"); | 1970 | printk(KERN_CONT "\n"); |
1912 | dump_stack(); | 1971 | dump_stack(); |
1913 | return graph_lock(); | 1972 | if (!graph_lock()) |
1973 | return 0; | ||
1914 | } | 1974 | } |
1915 | return 1; | 1975 | return 2; |
1916 | } | 1976 | } |
1917 | 1977 | ||
1918 | /* | 1978 | /* |
@@ -1925,8 +1985,9 @@ static int | |||
1925 | check_prevs_add(struct task_struct *curr, struct held_lock *next) | 1985 | check_prevs_add(struct task_struct *curr, struct held_lock *next) |
1926 | { | 1986 | { |
1927 | int depth = curr->lockdep_depth; | 1987 | int depth = curr->lockdep_depth; |
1928 | int stack_saved = 0; | ||
1929 | struct held_lock *hlock; | 1988 | struct held_lock *hlock; |
1989 | struct stack_trace trace; | ||
1990 | int (*save)(struct stack_trace *trace) = save_trace; | ||
1930 | 1991 | ||
1931 | /* | 1992 | /* |
1932 | * Debugging checks. | 1993 | * Debugging checks. |
@@ -1947,21 +2008,36 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
1947 | int distance = curr->lockdep_depth - depth + 1; | 2008 | int distance = curr->lockdep_depth - depth + 1; |
1948 | hlock = curr->held_locks + depth - 1; | 2009 | hlock = curr->held_locks + depth - 1; |
1949 | /* | 2010 | /* |
1950 | * Only non-recursive-read entries get new dependencies | 2011 | * Only non-crosslock entries get new dependencies added. |
1951 | * added: | 2012 | * Crosslock entries will be added by commit later: |
1952 | */ | 2013 | */ |
1953 | if (hlock->read != 2 && hlock->check) { | 2014 | if (!cross_lock(hlock->instance)) { |
1954 | if (!check_prev_add(curr, hlock, next, | ||
1955 | distance, &stack_saved)) | ||
1956 | return 0; | ||
1957 | /* | 2015 | /* |
1958 | * Stop after the first non-trylock entry, | 2016 | * Only non-recursive-read entries get new dependencies |
1959 | * as non-trylock entries have added their | 2017 | * added: |
1960 | * own direct dependencies already, so this | ||
1961 | * lock is connected to them indirectly: | ||
1962 | */ | 2018 | */ |
1963 | if (!hlock->trylock) | 2019 | if (hlock->read != 2 && hlock->check) { |
1964 | break; | 2020 | int ret = check_prev_add(curr, hlock, next, |
2021 | distance, &trace, save); | ||
2022 | if (!ret) | ||
2023 | return 0; | ||
2024 | |||
2025 | /* | ||
2026 | * Stop saving stack_trace if save_trace() was | ||
2027 | * called at least once: | ||
2028 | */ | ||
2029 | if (save && ret == 2) | ||
2030 | save = NULL; | ||
2031 | |||
2032 | /* | ||
2033 | * Stop after the first non-trylock entry, | ||
2034 | * as non-trylock entries have added their | ||
2035 | * own direct dependencies already, so this | ||
2036 | * lock is connected to them indirectly: | ||
2037 | */ | ||
2038 | if (!hlock->trylock) | ||
2039 | break; | ||
2040 | } | ||
1965 | } | 2041 | } |
1966 | depth--; | 2042 | depth--; |
1967 | /* | 2043 | /* |
@@ -2126,19 +2202,26 @@ static int check_no_collision(struct task_struct *curr, | |||
2126 | } | 2202 | } |
2127 | 2203 | ||
2128 | /* | 2204 | /* |
2129 | * Look up a dependency chain. If the key is not present yet then | 2205 | * This is for building a chain between just two different classes, |
2130 | * add it and return 1 - in this case the new dependency chain is | 2206 | * instead of adding a new hlock upon current, which is done by |
2131 | * validated. If the key is already hashed, return 0. | 2207 | * add_chain_cache(). |
2132 | * (On return with 1 graph_lock is held.) | 2208 | * |
2209 | * This can be called in any context with two classes, while | ||
2210 | * add_chain_cache() must be done within the lock owener's context | ||
2211 | * since it uses hlock which might be racy in another context. | ||
2133 | */ | 2212 | */ |
2134 | static inline int lookup_chain_cache(struct task_struct *curr, | 2213 | static inline int add_chain_cache_classes(unsigned int prev, |
2135 | struct held_lock *hlock, | 2214 | unsigned int next, |
2136 | u64 chain_key) | 2215 | unsigned int irq_context, |
2216 | u64 chain_key) | ||
2137 | { | 2217 | { |
2138 | struct lock_class *class = hlock_class(hlock); | ||
2139 | struct hlist_head *hash_head = chainhashentry(chain_key); | 2218 | struct hlist_head *hash_head = chainhashentry(chain_key); |
2140 | struct lock_chain *chain; | 2219 | struct lock_chain *chain; |
2141 | int i, j; | 2220 | |
2221 | /* | ||
2222 | * Allocate a new chain entry from the static array, and add | ||
2223 | * it to the hash: | ||
2224 | */ | ||
2142 | 2225 | ||
2143 | /* | 2226 | /* |
2144 | * We might need to take the graph lock, ensure we've got IRQs | 2227 | * We might need to take the graph lock, ensure we've got IRQs |
@@ -2147,43 +2230,76 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
2147 | */ | 2230 | */ |
2148 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2231 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2149 | return 0; | 2232 | return 0; |
2233 | |||
2234 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { | ||
2235 | if (!debug_locks_off_graph_unlock()) | ||
2236 | return 0; | ||
2237 | |||
2238 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); | ||
2239 | dump_stack(); | ||
2240 | return 0; | ||
2241 | } | ||
2242 | |||
2243 | chain = lock_chains + nr_lock_chains++; | ||
2244 | chain->chain_key = chain_key; | ||
2245 | chain->irq_context = irq_context; | ||
2246 | chain->depth = 2; | ||
2247 | if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { | ||
2248 | chain->base = nr_chain_hlocks; | ||
2249 | nr_chain_hlocks += chain->depth; | ||
2250 | chain_hlocks[chain->base] = prev - 1; | ||
2251 | chain_hlocks[chain->base + 1] = next -1; | ||
2252 | } | ||
2253 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
2150 | /* | 2254 | /* |
2151 | * We can walk it lock-free, because entries only get added | 2255 | * Important for check_no_collision(). |
2152 | * to the hash: | ||
2153 | */ | 2256 | */ |
2154 | hlist_for_each_entry_rcu(chain, hash_head, entry) { | 2257 | else { |
2155 | if (chain->chain_key == chain_key) { | 2258 | if (!debug_locks_off_graph_unlock()) |
2156 | cache_hit: | ||
2157 | debug_atomic_inc(chain_lookup_hits); | ||
2158 | if (!check_no_collision(curr, hlock, chain)) | ||
2159 | return 0; | ||
2160 | |||
2161 | if (very_verbose(class)) | ||
2162 | printk("\nhash chain already cached, key: " | ||
2163 | "%016Lx tail class: [%p] %s\n", | ||
2164 | (unsigned long long)chain_key, | ||
2165 | class->key, class->name); | ||
2166 | return 0; | 2259 | return 0; |
2167 | } | 2260 | |
2261 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); | ||
2262 | dump_stack(); | ||
2263 | return 0; | ||
2168 | } | 2264 | } |
2169 | if (very_verbose(class)) | 2265 | #endif |
2170 | printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", | 2266 | |
2171 | (unsigned long long)chain_key, class->key, class->name); | 2267 | hlist_add_head_rcu(&chain->entry, hash_head); |
2268 | debug_atomic_inc(chain_lookup_misses); | ||
2269 | inc_chains(); | ||
2270 | |||
2271 | return 1; | ||
2272 | } | ||
2273 | |||
2274 | /* | ||
2275 | * Adds a dependency chain into chain hashtable. And must be called with | ||
2276 | * graph_lock held. | ||
2277 | * | ||
2278 | * Return 0 if fail, and graph_lock is released. | ||
2279 | * Return 1 if succeed, with graph_lock held. | ||
2280 | */ | ||
2281 | static inline int add_chain_cache(struct task_struct *curr, | ||
2282 | struct held_lock *hlock, | ||
2283 | u64 chain_key) | ||
2284 | { | ||
2285 | struct lock_class *class = hlock_class(hlock); | ||
2286 | struct hlist_head *hash_head = chainhashentry(chain_key); | ||
2287 | struct lock_chain *chain; | ||
2288 | int i, j; | ||
2289 | |||
2172 | /* | 2290 | /* |
2173 | * Allocate a new chain entry from the static array, and add | 2291 | * Allocate a new chain entry from the static array, and add |
2174 | * it to the hash: | 2292 | * it to the hash: |
2175 | */ | 2293 | */ |
2176 | if (!graph_lock()) | 2294 | |
2177 | return 0; | ||
2178 | /* | 2295 | /* |
2179 | * We have to walk the chain again locked - to avoid duplicates: | 2296 | * We might need to take the graph lock, ensure we've got IRQs |
2297 | * disabled to make this an IRQ-safe lock.. for recursion reasons | ||
2298 | * lockdep won't complain about its own locking errors. | ||
2180 | */ | 2299 | */ |
2181 | hlist_for_each_entry(chain, hash_head, entry) { | 2300 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
2182 | if (chain->chain_key == chain_key) { | 2301 | return 0; |
2183 | graph_unlock(); | 2302 | |
2184 | goto cache_hit; | ||
2185 | } | ||
2186 | } | ||
2187 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { | 2303 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { |
2188 | if (!debug_locks_off_graph_unlock()) | 2304 | if (!debug_locks_off_graph_unlock()) |
2189 | return 0; | 2305 | return 0; |
@@ -2235,6 +2351,78 @@ cache_hit: | |||
2235 | return 1; | 2351 | return 1; |
2236 | } | 2352 | } |
2237 | 2353 | ||
2354 | /* | ||
2355 | * Look up a dependency chain. | ||
2356 | */ | ||
2357 | static inline struct lock_chain *lookup_chain_cache(u64 chain_key) | ||
2358 | { | ||
2359 | struct hlist_head *hash_head = chainhashentry(chain_key); | ||
2360 | struct lock_chain *chain; | ||
2361 | |||
2362 | /* | ||
2363 | * We can walk it lock-free, because entries only get added | ||
2364 | * to the hash: | ||
2365 | */ | ||
2366 | hlist_for_each_entry_rcu(chain, hash_head, entry) { | ||
2367 | if (chain->chain_key == chain_key) { | ||
2368 | debug_atomic_inc(chain_lookup_hits); | ||
2369 | return chain; | ||
2370 | } | ||
2371 | } | ||
2372 | return NULL; | ||
2373 | } | ||
2374 | |||
2375 | /* | ||
2376 | * If the key is not present yet in dependency chain cache then | ||
2377 | * add it and return 1 - in this case the new dependency chain is | ||
2378 | * validated. If the key is already hashed, return 0. | ||
2379 | * (On return with 1 graph_lock is held.) | ||
2380 | */ | ||
2381 | static inline int lookup_chain_cache_add(struct task_struct *curr, | ||
2382 | struct held_lock *hlock, | ||
2383 | u64 chain_key) | ||
2384 | { | ||
2385 | struct lock_class *class = hlock_class(hlock); | ||
2386 | struct lock_chain *chain = lookup_chain_cache(chain_key); | ||
2387 | |||
2388 | if (chain) { | ||
2389 | cache_hit: | ||
2390 | if (!check_no_collision(curr, hlock, chain)) | ||
2391 | return 0; | ||
2392 | |||
2393 | if (very_verbose(class)) { | ||
2394 | printk("\nhash chain already cached, key: " | ||
2395 | "%016Lx tail class: [%p] %s\n", | ||
2396 | (unsigned long long)chain_key, | ||
2397 | class->key, class->name); | ||
2398 | } | ||
2399 | |||
2400 | return 0; | ||
2401 | } | ||
2402 | |||
2403 | if (very_verbose(class)) { | ||
2404 | printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", | ||
2405 | (unsigned long long)chain_key, class->key, class->name); | ||
2406 | } | ||
2407 | |||
2408 | if (!graph_lock()) | ||
2409 | return 0; | ||
2410 | |||
2411 | /* | ||
2412 | * We have to walk the chain again locked - to avoid duplicates: | ||
2413 | */ | ||
2414 | chain = lookup_chain_cache(chain_key); | ||
2415 | if (chain) { | ||
2416 | graph_unlock(); | ||
2417 | goto cache_hit; | ||
2418 | } | ||
2419 | |||
2420 | if (!add_chain_cache(curr, hlock, chain_key)) | ||
2421 | return 0; | ||
2422 | |||
2423 | return 1; | ||
2424 | } | ||
2425 | |||
2238 | static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | 2426 | static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, |
2239 | struct held_lock *hlock, int chain_head, u64 chain_key) | 2427 | struct held_lock *hlock, int chain_head, u64 chain_key) |
2240 | { | 2428 | { |
@@ -2245,11 +2433,11 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | |||
2245 | * | 2433 | * |
2246 | * We look up the chain_key and do the O(N^2) check and update of | 2434 | * We look up the chain_key and do the O(N^2) check and update of |
2247 | * the dependencies only if this is a new dependency chain. | 2435 | * the dependencies only if this is a new dependency chain. |
2248 | * (If lookup_chain_cache() returns with 1 it acquires | 2436 | * (If lookup_chain_cache_add() return with 1 it acquires |
2249 | * graph_lock for us) | 2437 | * graph_lock for us) |
2250 | */ | 2438 | */ |
2251 | if (!hlock->trylock && hlock->check && | 2439 | if (!hlock->trylock && hlock->check && |
2252 | lookup_chain_cache(curr, hlock, chain_key)) { | 2440 | lookup_chain_cache_add(curr, hlock, chain_key)) { |
2253 | /* | 2441 | /* |
2254 | * Check whether last held lock: | 2442 | * Check whether last held lock: |
2255 | * | 2443 | * |
@@ -2277,14 +2465,17 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | |||
2277 | * Add dependency only if this lock is not the head | 2465 | * Add dependency only if this lock is not the head |
2278 | * of the chain, and if it's not a secondary read-lock: | 2466 | * of the chain, and if it's not a secondary read-lock: |
2279 | */ | 2467 | */ |
2280 | if (!chain_head && ret != 2) | 2468 | if (!chain_head && ret != 2) { |
2281 | if (!check_prevs_add(curr, hlock)) | 2469 | if (!check_prevs_add(curr, hlock)) |
2282 | return 0; | 2470 | return 0; |
2471 | } | ||
2472 | |||
2283 | graph_unlock(); | 2473 | graph_unlock(); |
2284 | } else | 2474 | } else { |
2285 | /* after lookup_chain_cache(): */ | 2475 | /* after lookup_chain_cache_add(): */ |
2286 | if (unlikely(!debug_locks)) | 2476 | if (unlikely(!debug_locks)) |
2287 | return 0; | 2477 | return 0; |
2478 | } | ||
2288 | 2479 | ||
2289 | return 1; | 2480 | return 1; |
2290 | } | 2481 | } |
@@ -2567,14 +2758,6 @@ static int SOFTIRQ_verbose(struct lock_class *class) | |||
2567 | return 0; | 2758 | return 0; |
2568 | } | 2759 | } |
2569 | 2760 | ||
2570 | static int RECLAIM_FS_verbose(struct lock_class *class) | ||
2571 | { | ||
2572 | #if RECLAIM_VERBOSE | ||
2573 | return class_filter(class); | ||
2574 | #endif | ||
2575 | return 0; | ||
2576 | } | ||
2577 | |||
2578 | #define STRICT_READ_CHECKS 1 | 2761 | #define STRICT_READ_CHECKS 1 |
2579 | 2762 | ||
2580 | static int (*state_verbose_f[])(struct lock_class *class) = { | 2763 | static int (*state_verbose_f[])(struct lock_class *class) = { |
@@ -2870,57 +3053,6 @@ void trace_softirqs_off(unsigned long ip) | |||
2870 | debug_atomic_inc(redundant_softirqs_off); | 3053 | debug_atomic_inc(redundant_softirqs_off); |
2871 | } | 3054 | } |
2872 | 3055 | ||
2873 | static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) | ||
2874 | { | ||
2875 | struct task_struct *curr = current; | ||
2876 | |||
2877 | if (unlikely(!debug_locks)) | ||
2878 | return; | ||
2879 | |||
2880 | gfp_mask = current_gfp_context(gfp_mask); | ||
2881 | |||
2882 | /* no reclaim without waiting on it */ | ||
2883 | if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) | ||
2884 | return; | ||
2885 | |||
2886 | /* this guy won't enter reclaim */ | ||
2887 | if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) | ||
2888 | return; | ||
2889 | |||
2890 | /* We're only interested __GFP_FS allocations for now */ | ||
2891 | if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS)) | ||
2892 | return; | ||
2893 | |||
2894 | /* | ||
2895 | * Oi! Can't be having __GFP_FS allocations with IRQs disabled. | ||
2896 | */ | ||
2897 | if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) | ||
2898 | return; | ||
2899 | |||
2900 | /* Disable lockdep if explicitly requested */ | ||
2901 | if (gfp_mask & __GFP_NOLOCKDEP) | ||
2902 | return; | ||
2903 | |||
2904 | mark_held_locks(curr, RECLAIM_FS); | ||
2905 | } | ||
2906 | |||
2907 | static void check_flags(unsigned long flags); | ||
2908 | |||
2909 | void lockdep_trace_alloc(gfp_t gfp_mask) | ||
2910 | { | ||
2911 | unsigned long flags; | ||
2912 | |||
2913 | if (unlikely(current->lockdep_recursion)) | ||
2914 | return; | ||
2915 | |||
2916 | raw_local_irq_save(flags); | ||
2917 | check_flags(flags); | ||
2918 | current->lockdep_recursion = 1; | ||
2919 | __lockdep_trace_alloc(gfp_mask, flags); | ||
2920 | current->lockdep_recursion = 0; | ||
2921 | raw_local_irq_restore(flags); | ||
2922 | } | ||
2923 | |||
2924 | static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) | 3056 | static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) |
2925 | { | 3057 | { |
2926 | /* | 3058 | /* |
@@ -2966,22 +3098,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) | |||
2966 | } | 3098 | } |
2967 | } | 3099 | } |
2968 | 3100 | ||
2969 | /* | ||
2970 | * We reuse the irq context infrastructure more broadly as a general | ||
2971 | * context checking code. This tests GFP_FS recursion (a lock taken | ||
2972 | * during reclaim for a GFP_FS allocation is held over a GFP_FS | ||
2973 | * allocation). | ||
2974 | */ | ||
2975 | if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) { | ||
2976 | if (hlock->read) { | ||
2977 | if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ)) | ||
2978 | return 0; | ||
2979 | } else { | ||
2980 | if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS)) | ||
2981 | return 0; | ||
2982 | } | ||
2983 | } | ||
2984 | |||
2985 | return 1; | 3101 | return 1; |
2986 | } | 3102 | } |
2987 | 3103 | ||
@@ -3040,10 +3156,6 @@ static inline int separate_irq_context(struct task_struct *curr, | |||
3040 | return 0; | 3156 | return 0; |
3041 | } | 3157 | } |
3042 | 3158 | ||
3043 | void lockdep_trace_alloc(gfp_t gfp_mask) | ||
3044 | { | ||
3045 | } | ||
3046 | |||
3047 | #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ | 3159 | #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ |
3048 | 3160 | ||
3049 | /* | 3161 | /* |
@@ -3116,7 +3228,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
3116 | /* | 3228 | /* |
3117 | * Initialize a lock instance's lock-class mapping info: | 3229 | * Initialize a lock instance's lock-class mapping info: |
3118 | */ | 3230 | */ |
3119 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 3231 | static void __lockdep_init_map(struct lockdep_map *lock, const char *name, |
3120 | struct lock_class_key *key, int subclass) | 3232 | struct lock_class_key *key, int subclass) |
3121 | { | 3233 | { |
3122 | int i; | 3234 | int i; |
@@ -3174,8 +3286,25 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
3174 | raw_local_irq_restore(flags); | 3286 | raw_local_irq_restore(flags); |
3175 | } | 3287 | } |
3176 | } | 3288 | } |
3289 | |||
3290 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | ||
3291 | struct lock_class_key *key, int subclass) | ||
3292 | { | ||
3293 | cross_init(lock, 0); | ||
3294 | __lockdep_init_map(lock, name, key, subclass); | ||
3295 | } | ||
3177 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 3296 | EXPORT_SYMBOL_GPL(lockdep_init_map); |
3178 | 3297 | ||
3298 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
3299 | void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name, | ||
3300 | struct lock_class_key *key, int subclass) | ||
3301 | { | ||
3302 | cross_init(lock, 1); | ||
3303 | __lockdep_init_map(lock, name, key, subclass); | ||
3304 | } | ||
3305 | EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock); | ||
3306 | #endif | ||
3307 | |||
3179 | struct lock_class_key __lockdep_no_validate__; | 3308 | struct lock_class_key __lockdep_no_validate__; |
3180 | EXPORT_SYMBOL_GPL(__lockdep_no_validate__); | 3309 | EXPORT_SYMBOL_GPL(__lockdep_no_validate__); |
3181 | 3310 | ||
@@ -3231,6 +3360,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3231 | int chain_head = 0; | 3360 | int chain_head = 0; |
3232 | int class_idx; | 3361 | int class_idx; |
3233 | u64 chain_key; | 3362 | u64 chain_key; |
3363 | int ret; | ||
3234 | 3364 | ||
3235 | if (unlikely(!debug_locks)) | 3365 | if (unlikely(!debug_locks)) |
3236 | return 0; | 3366 | return 0; |
@@ -3279,7 +3409,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3279 | 3409 | ||
3280 | class_idx = class - lock_classes + 1; | 3410 | class_idx = class - lock_classes + 1; |
3281 | 3411 | ||
3282 | if (depth) { | 3412 | /* TODO: nest_lock is not implemented for crosslock yet. */ |
3413 | if (depth && !cross_lock(lock)) { | ||
3283 | hlock = curr->held_locks + depth - 1; | 3414 | hlock = curr->held_locks + depth - 1; |
3284 | if (hlock->class_idx == class_idx && nest_lock) { | 3415 | if (hlock->class_idx == class_idx && nest_lock) { |
3285 | if (hlock->references) { | 3416 | if (hlock->references) { |
@@ -3367,6 +3498,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3367 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) | 3498 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) |
3368 | return 0; | 3499 | return 0; |
3369 | 3500 | ||
3501 | ret = lock_acquire_crosslock(hlock); | ||
3502 | /* | ||
3503 | * 2 means normal acquire operations are needed. Otherwise, it's | ||
3504 | * ok just to return with '0:fail, 1:success'. | ||
3505 | */ | ||
3506 | if (ret != 2) | ||
3507 | return ret; | ||
3508 | |||
3370 | curr->curr_chain_key = chain_key; | 3509 | curr->curr_chain_key = chain_key; |
3371 | curr->lockdep_depth++; | 3510 | curr->lockdep_depth++; |
3372 | check_chain_key(curr); | 3511 | check_chain_key(curr); |
@@ -3604,11 +3743,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
3604 | struct task_struct *curr = current; | 3743 | struct task_struct *curr = current; |
3605 | struct held_lock *hlock; | 3744 | struct held_lock *hlock; |
3606 | unsigned int depth; | 3745 | unsigned int depth; |
3607 | int i; | 3746 | int ret, i; |
3608 | 3747 | ||
3609 | if (unlikely(!debug_locks)) | 3748 | if (unlikely(!debug_locks)) |
3610 | return 0; | 3749 | return 0; |
3611 | 3750 | ||
3751 | ret = lock_release_crosslock(lock); | ||
3752 | /* | ||
3753 | * 2 means normal release operations are needed. Otherwise, it's | ||
3754 | * ok just to return with '0:fail, 1:success'. | ||
3755 | */ | ||
3756 | if (ret != 2) | ||
3757 | return ret; | ||
3758 | |||
3612 | depth = curr->lockdep_depth; | 3759 | depth = curr->lockdep_depth; |
3613 | /* | 3760 | /* |
3614 | * So we're all set to release this lock.. wait what lock? We don't | 3761 | * So we're all set to release this lock.. wait what lock? We don't |
@@ -3952,18 +4099,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie) | |||
3952 | } | 4099 | } |
3953 | EXPORT_SYMBOL_GPL(lock_unpin_lock); | 4100 | EXPORT_SYMBOL_GPL(lock_unpin_lock); |
3954 | 4101 | ||
3955 | void lockdep_set_current_reclaim_state(gfp_t gfp_mask) | ||
3956 | { | ||
3957 | current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask); | ||
3958 | } | ||
3959 | EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state); | ||
3960 | |||
3961 | void lockdep_clear_current_reclaim_state(void) | ||
3962 | { | ||
3963 | current->lockdep_reclaim_gfp = 0; | ||
3964 | } | ||
3965 | EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state); | ||
3966 | |||
3967 | #ifdef CONFIG_LOCK_STAT | 4102 | #ifdef CONFIG_LOCK_STAT |
3968 | static int | 4103 | static int |
3969 | print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | 4104 | print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, |
@@ -4484,6 +4619,12 @@ asmlinkage __visible void lockdep_sys_exit(void) | |||
4484 | curr->comm, curr->pid); | 4619 | curr->comm, curr->pid); |
4485 | lockdep_print_held_locks(curr); | 4620 | lockdep_print_held_locks(curr); |
4486 | } | 4621 | } |
4622 | |||
4623 | /* | ||
4624 | * The lock history for each syscall should be independent. So wipe the | ||
4625 | * slate clean on return to userspace. | ||
4626 | */ | ||
4627 | lockdep_invariant_state(false); | ||
4487 | } | 4628 | } |
4488 | 4629 | ||
4489 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | 4630 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) |
@@ -4532,3 +4673,488 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | |||
4532 | dump_stack(); | 4673 | dump_stack(); |
4533 | } | 4674 | } |
4534 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); | 4675 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); |
4676 | |||
4677 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
4678 | |||
4679 | /* | ||
4680 | * Crossrelease works by recording a lock history for each thread and | ||
4681 | * connecting those historic locks that were taken after the | ||
4682 | * wait_for_completion() in the complete() context. | ||
4683 | * | ||
4684 | * Task-A Task-B | ||
4685 | * | ||
4686 | * mutex_lock(&A); | ||
4687 | * mutex_unlock(&A); | ||
4688 | * | ||
4689 | * wait_for_completion(&C); | ||
4690 | * lock_acquire_crosslock(); | ||
4691 | * atomic_inc_return(&cross_gen_id); | ||
4692 | * | | ||
4693 | * | mutex_lock(&B); | ||
4694 | * | mutex_unlock(&B); | ||
4695 | * | | ||
4696 | * | complete(&C); | ||
4697 | * `-- lock_commit_crosslock(); | ||
4698 | * | ||
4699 | * Which will then add a dependency between B and C. | ||
4700 | */ | ||
4701 | |||
4702 | #define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR]) | ||
4703 | |||
4704 | /* | ||
4705 | * Whenever a crosslock is held, cross_gen_id will be increased. | ||
4706 | */ | ||
4707 | static atomic_t cross_gen_id; /* Can be wrapped */ | ||
4708 | |||
4709 | /* | ||
4710 | * Make an entry of the ring buffer invalid. | ||
4711 | */ | ||
4712 | static inline void invalidate_xhlock(struct hist_lock *xhlock) | ||
4713 | { | ||
4714 | /* | ||
4715 | * Normally, xhlock->hlock.instance must be !NULL. | ||
4716 | */ | ||
4717 | xhlock->hlock.instance = NULL; | ||
4718 | } | ||
4719 | |||
4720 | /* | ||
4721 | * Lock history stacks; we have 2 nested lock history stacks: | ||
4722 | * | ||
4723 | * HARD(IRQ) | ||
4724 | * SOFT(IRQ) | ||
4725 | * | ||
4726 | * The thing is that once we complete a HARD/SOFT IRQ the future task locks | ||
4727 | * should not depend on any of the locks observed while running the IRQ. So | ||
4728 | * what we do is rewind the history buffer and erase all our knowledge of that | ||
4729 | * temporal event. | ||
4730 | */ | ||
4731 | |||
4732 | void crossrelease_hist_start(enum xhlock_context_t c) | ||
4733 | { | ||
4734 | struct task_struct *cur = current; | ||
4735 | |||
4736 | if (!cur->xhlocks) | ||
4737 | return; | ||
4738 | |||
4739 | cur->xhlock_idx_hist[c] = cur->xhlock_idx; | ||
4740 | cur->hist_id_save[c] = cur->hist_id; | ||
4741 | } | ||
4742 | |||
4743 | void crossrelease_hist_end(enum xhlock_context_t c) | ||
4744 | { | ||
4745 | struct task_struct *cur = current; | ||
4746 | |||
4747 | if (cur->xhlocks) { | ||
4748 | unsigned int idx = cur->xhlock_idx_hist[c]; | ||
4749 | struct hist_lock *h = &xhlock(idx); | ||
4750 | |||
4751 | cur->xhlock_idx = idx; | ||
4752 | |||
4753 | /* Check if the ring was overwritten. */ | ||
4754 | if (h->hist_id != cur->hist_id_save[c]) | ||
4755 | invalidate_xhlock(h); | ||
4756 | } | ||
4757 | } | ||
4758 | |||
4759 | /* | ||
4760 | * lockdep_invariant_state() is used to annotate independence inside a task, to | ||
4761 | * make one task look like multiple independent 'tasks'. | ||
4762 | * | ||
4763 | * Take for instance workqueues; each work is independent of the last. The | ||
4764 | * completion of a future work does not depend on the completion of a past work | ||
4765 | * (in general). Therefore we must not carry that (lock) dependency across | ||
4766 | * works. | ||
4767 | * | ||
4768 | * This is true for many things; pretty much all kthreads fall into this | ||
4769 | * pattern, where they have an invariant state and future completions do not | ||
4770 | * depend on past completions. Its just that since they all have the 'same' | ||
4771 | * form -- the kthread does the same over and over -- it doesn't typically | ||
4772 | * matter. | ||
4773 | * | ||
4774 | * The same is true for system-calls, once a system call is completed (we've | ||
4775 | * returned to userspace) the next system call does not depend on the lock | ||
4776 | * history of the previous system call. | ||
4777 | * | ||
4778 | * They key property for independence, this invariant state, is that it must be | ||
4779 | * a point where we hold no locks and have no history. Because if we were to | ||
4780 | * hold locks, the restore at _end() would not necessarily recover it's history | ||
4781 | * entry. Similarly, independence per-definition means it does not depend on | ||
4782 | * prior state. | ||
4783 | */ | ||
4784 | void lockdep_invariant_state(bool force) | ||
4785 | { | ||
4786 | /* | ||
4787 | * We call this at an invariant point, no current state, no history. | ||
4788 | * Verify the former, enforce the latter. | ||
4789 | */ | ||
4790 | WARN_ON_ONCE(!force && current->lockdep_depth); | ||
4791 | invalidate_xhlock(&xhlock(current->xhlock_idx)); | ||
4792 | } | ||
4793 | |||
4794 | static int cross_lock(struct lockdep_map *lock) | ||
4795 | { | ||
4796 | return lock ? lock->cross : 0; | ||
4797 | } | ||
4798 | |||
4799 | /* | ||
4800 | * This is needed to decide the relationship between wrapable variables. | ||
4801 | */ | ||
4802 | static inline int before(unsigned int a, unsigned int b) | ||
4803 | { | ||
4804 | return (int)(a - b) < 0; | ||
4805 | } | ||
4806 | |||
4807 | static inline struct lock_class *xhlock_class(struct hist_lock *xhlock) | ||
4808 | { | ||
4809 | return hlock_class(&xhlock->hlock); | ||
4810 | } | ||
4811 | |||
4812 | static inline struct lock_class *xlock_class(struct cross_lock *xlock) | ||
4813 | { | ||
4814 | return hlock_class(&xlock->hlock); | ||
4815 | } | ||
4816 | |||
4817 | /* | ||
4818 | * Should we check a dependency with previous one? | ||
4819 | */ | ||
4820 | static inline int depend_before(struct held_lock *hlock) | ||
4821 | { | ||
4822 | return hlock->read != 2 && hlock->check && !hlock->trylock; | ||
4823 | } | ||
4824 | |||
4825 | /* | ||
4826 | * Should we check a dependency with next one? | ||
4827 | */ | ||
4828 | static inline int depend_after(struct held_lock *hlock) | ||
4829 | { | ||
4830 | return hlock->read != 2 && hlock->check; | ||
4831 | } | ||
4832 | |||
4833 | /* | ||
4834 | * Check if the xhlock is valid, which would be false if, | ||
4835 | * | ||
4836 | * 1. Has not used after initializaion yet. | ||
4837 | * 2. Got invalidated. | ||
4838 | * | ||
4839 | * Remind hist_lock is implemented as a ring buffer. | ||
4840 | */ | ||
4841 | static inline int xhlock_valid(struct hist_lock *xhlock) | ||
4842 | { | ||
4843 | /* | ||
4844 | * xhlock->hlock.instance must be !NULL. | ||
4845 | */ | ||
4846 | return !!xhlock->hlock.instance; | ||
4847 | } | ||
4848 | |||
4849 | /* | ||
4850 | * Record a hist_lock entry. | ||
4851 | * | ||
4852 | * Irq disable is only required. | ||
4853 | */ | ||
4854 | static void add_xhlock(struct held_lock *hlock) | ||
4855 | { | ||
4856 | unsigned int idx = ++current->xhlock_idx; | ||
4857 | struct hist_lock *xhlock = &xhlock(idx); | ||
4858 | |||
4859 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
4860 | /* | ||
4861 | * This can be done locklessly because they are all task-local | ||
4862 | * state, we must however ensure IRQs are disabled. | ||
4863 | */ | ||
4864 | WARN_ON_ONCE(!irqs_disabled()); | ||
4865 | #endif | ||
4866 | |||
4867 | /* Initialize hist_lock's members */ | ||
4868 | xhlock->hlock = *hlock; | ||
4869 | xhlock->hist_id = ++current->hist_id; | ||
4870 | |||
4871 | xhlock->trace.nr_entries = 0; | ||
4872 | xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; | ||
4873 | xhlock->trace.entries = xhlock->trace_entries; | ||
4874 | xhlock->trace.skip = 3; | ||
4875 | save_stack_trace(&xhlock->trace); | ||
4876 | } | ||
4877 | |||
4878 | static inline int same_context_xhlock(struct hist_lock *xhlock) | ||
4879 | { | ||
4880 | return xhlock->hlock.irq_context == task_irq_context(current); | ||
4881 | } | ||
4882 | |||
4883 | /* | ||
4884 | * This should be lockless as far as possible because this would be | ||
4885 | * called very frequently. | ||
4886 | */ | ||
4887 | static void check_add_xhlock(struct held_lock *hlock) | ||
4888 | { | ||
4889 | /* | ||
4890 | * Record a hist_lock, only in case that acquisitions ahead | ||
4891 | * could depend on the held_lock. For example, if the held_lock | ||
4892 | * is trylock then acquisitions ahead never depends on that. | ||
4893 | * In that case, we don't need to record it. Just return. | ||
4894 | */ | ||
4895 | if (!current->xhlocks || !depend_before(hlock)) | ||
4896 | return; | ||
4897 | |||
4898 | add_xhlock(hlock); | ||
4899 | } | ||
4900 | |||
4901 | /* | ||
4902 | * For crosslock. | ||
4903 | */ | ||
4904 | static int add_xlock(struct held_lock *hlock) | ||
4905 | { | ||
4906 | struct cross_lock *xlock; | ||
4907 | unsigned int gen_id; | ||
4908 | |||
4909 | if (!graph_lock()) | ||
4910 | return 0; | ||
4911 | |||
4912 | xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock; | ||
4913 | |||
4914 | /* | ||
4915 | * When acquisitions for a crosslock are overlapped, we use | ||
4916 | * nr_acquire to perform commit for them, based on cross_gen_id | ||
4917 | * of the first acquisition, which allows to add additional | ||
4918 | * dependencies. | ||
4919 | * | ||
4920 | * Moreover, when no acquisition of a crosslock is in progress, | ||
4921 | * we should not perform commit because the lock might not exist | ||
4922 | * any more, which might cause incorrect memory access. So we | ||
4923 | * have to track the number of acquisitions of a crosslock. | ||
4924 | * | ||
4925 | * depend_after() is necessary to initialize only the first | ||
4926 | * valid xlock so that the xlock can be used on its commit. | ||
4927 | */ | ||
4928 | if (xlock->nr_acquire++ && depend_after(&xlock->hlock)) | ||
4929 | goto unlock; | ||
4930 | |||
4931 | gen_id = (unsigned int)atomic_inc_return(&cross_gen_id); | ||
4932 | xlock->hlock = *hlock; | ||
4933 | xlock->hlock.gen_id = gen_id; | ||
4934 | unlock: | ||
4935 | graph_unlock(); | ||
4936 | return 1; | ||
4937 | } | ||
4938 | |||
4939 | /* | ||
4940 | * Called for both normal and crosslock acquires. Normal locks will be | ||
4941 | * pushed on the hist_lock queue. Cross locks will record state and | ||
4942 | * stop regular lock_acquire() to avoid being placed on the held_lock | ||
4943 | * stack. | ||
4944 | * | ||
4945 | * Return: 0 - failure; | ||
4946 | * 1 - crosslock, done; | ||
4947 | * 2 - normal lock, continue to held_lock[] ops. | ||
4948 | */ | ||
4949 | static int lock_acquire_crosslock(struct held_lock *hlock) | ||
4950 | { | ||
4951 | /* | ||
4952 | * CONTEXT 1 CONTEXT 2 | ||
4953 | * --------- --------- | ||
4954 | * lock A (cross) | ||
4955 | * X = atomic_inc_return(&cross_gen_id) | ||
4956 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
4957 | * Y = atomic_read_acquire(&cross_gen_id) | ||
4958 | * lock B | ||
4959 | * | ||
4960 | * atomic_read_acquire() is for ordering between A and B, | ||
4961 | * IOW, A happens before B, when CONTEXT 2 see Y >= X. | ||
4962 | * | ||
4963 | * Pairs with atomic_inc_return() in add_xlock(). | ||
4964 | */ | ||
4965 | hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id); | ||
4966 | |||
4967 | if (cross_lock(hlock->instance)) | ||
4968 | return add_xlock(hlock); | ||
4969 | |||
4970 | check_add_xhlock(hlock); | ||
4971 | return 2; | ||
4972 | } | ||
4973 | |||
4974 | static int copy_trace(struct stack_trace *trace) | ||
4975 | { | ||
4976 | unsigned long *buf = stack_trace + nr_stack_trace_entries; | ||
4977 | unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; | ||
4978 | unsigned int nr = min(max_nr, trace->nr_entries); | ||
4979 | |||
4980 | trace->nr_entries = nr; | ||
4981 | memcpy(buf, trace->entries, nr * sizeof(trace->entries[0])); | ||
4982 | trace->entries = buf; | ||
4983 | nr_stack_trace_entries += nr; | ||
4984 | |||
4985 | if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { | ||
4986 | if (!debug_locks_off_graph_unlock()) | ||
4987 | return 0; | ||
4988 | |||
4989 | print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); | ||
4990 | dump_stack(); | ||
4991 | |||
4992 | return 0; | ||
4993 | } | ||
4994 | |||
4995 | return 1; | ||
4996 | } | ||
4997 | |||
4998 | static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock) | ||
4999 | { | ||
5000 | unsigned int xid, pid; | ||
5001 | u64 chain_key; | ||
5002 | |||
5003 | xid = xlock_class(xlock) - lock_classes; | ||
5004 | chain_key = iterate_chain_key((u64)0, xid); | ||
5005 | pid = xhlock_class(xhlock) - lock_classes; | ||
5006 | chain_key = iterate_chain_key(chain_key, pid); | ||
5007 | |||
5008 | if (lookup_chain_cache(chain_key)) | ||
5009 | return 1; | ||
5010 | |||
5011 | if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context, | ||
5012 | chain_key)) | ||
5013 | return 0; | ||
5014 | |||
5015 | if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1, | ||
5016 | &xhlock->trace, copy_trace)) | ||
5017 | return 0; | ||
5018 | |||
5019 | return 1; | ||
5020 | } | ||
5021 | |||
5022 | static void commit_xhlocks(struct cross_lock *xlock) | ||
5023 | { | ||
5024 | unsigned int cur = current->xhlock_idx; | ||
5025 | unsigned int prev_hist_id = xhlock(cur).hist_id; | ||
5026 | unsigned int i; | ||
5027 | |||
5028 | if (!graph_lock()) | ||
5029 | return; | ||
5030 | |||
5031 | if (xlock->nr_acquire) { | ||
5032 | for (i = 0; i < MAX_XHLOCKS_NR; i++) { | ||
5033 | struct hist_lock *xhlock = &xhlock(cur - i); | ||
5034 | |||
5035 | if (!xhlock_valid(xhlock)) | ||
5036 | break; | ||
5037 | |||
5038 | if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) | ||
5039 | break; | ||
5040 | |||
5041 | if (!same_context_xhlock(xhlock)) | ||
5042 | break; | ||
5043 | |||
5044 | /* | ||
5045 | * Filter out the cases where the ring buffer was | ||
5046 | * overwritten and the current entry has a bigger | ||
5047 | * hist_id than the previous one, which is impossible | ||
5048 | * otherwise: | ||
5049 | */ | ||
5050 | if (unlikely(before(prev_hist_id, xhlock->hist_id))) | ||
5051 | break; | ||
5052 | |||
5053 | prev_hist_id = xhlock->hist_id; | ||
5054 | |||
5055 | /* | ||
5056 | * commit_xhlock() returns 0 with graph_lock already | ||
5057 | * released if fail. | ||
5058 | */ | ||
5059 | if (!commit_xhlock(xlock, xhlock)) | ||
5060 | return; | ||
5061 | } | ||
5062 | } | ||
5063 | |||
5064 | graph_unlock(); | ||
5065 | } | ||
5066 | |||
5067 | void lock_commit_crosslock(struct lockdep_map *lock) | ||
5068 | { | ||
5069 | struct cross_lock *xlock; | ||
5070 | unsigned long flags; | ||
5071 | |||
5072 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
5073 | return; | ||
5074 | |||
5075 | if (!current->xhlocks) | ||
5076 | return; | ||
5077 | |||
5078 | /* | ||
5079 | * Do commit hist_locks with the cross_lock, only in case that | ||
5080 | * the cross_lock could depend on acquisitions after that. | ||
5081 | * | ||
5082 | * For example, if the cross_lock does not have the 'check' flag | ||
5083 | * then we don't need to check dependencies and commit for that. | ||
5084 | * Just skip it. In that case, of course, the cross_lock does | ||
5085 | * not depend on acquisitions ahead, either. | ||
5086 | * | ||
5087 | * WARNING: Don't do that in add_xlock() in advance. When an | ||
5088 | * acquisition context is different from the commit context, | ||
5089 | * invalid(skipped) cross_lock might be accessed. | ||
5090 | */ | ||
5091 | if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock)) | ||
5092 | return; | ||
5093 | |||
5094 | raw_local_irq_save(flags); | ||
5095 | check_flags(flags); | ||
5096 | current->lockdep_recursion = 1; | ||
5097 | xlock = &((struct lockdep_map_cross *)lock)->xlock; | ||
5098 | commit_xhlocks(xlock); | ||
5099 | current->lockdep_recursion = 0; | ||
5100 | raw_local_irq_restore(flags); | ||
5101 | } | ||
5102 | EXPORT_SYMBOL_GPL(lock_commit_crosslock); | ||
5103 | |||
5104 | /* | ||
5105 | * Return: 0 - failure; | ||
5106 | * 1 - crosslock, done; | ||
5107 | * 2 - normal lock, continue to held_lock[] ops. | ||
5108 | */ | ||
5109 | static int lock_release_crosslock(struct lockdep_map *lock) | ||
5110 | { | ||
5111 | if (cross_lock(lock)) { | ||
5112 | if (!graph_lock()) | ||
5113 | return 0; | ||
5114 | ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--; | ||
5115 | graph_unlock(); | ||
5116 | return 1; | ||
5117 | } | ||
5118 | return 2; | ||
5119 | } | ||
5120 | |||
5121 | static void cross_init(struct lockdep_map *lock, int cross) | ||
5122 | { | ||
5123 | if (cross) | ||
5124 | ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0; | ||
5125 | |||
5126 | lock->cross = cross; | ||
5127 | |||
5128 | /* | ||
5129 | * Crossrelease assumes that the ring buffer size of xhlocks | ||
5130 | * is aligned with power of 2. So force it on build. | ||
5131 | */ | ||
5132 | BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1)); | ||
5133 | } | ||
5134 | |||
5135 | void lockdep_init_task(struct task_struct *task) | ||
5136 | { | ||
5137 | int i; | ||
5138 | |||
5139 | task->xhlock_idx = UINT_MAX; | ||
5140 | task->hist_id = 0; | ||
5141 | |||
5142 | for (i = 0; i < XHLOCK_CTX_NR; i++) { | ||
5143 | task->xhlock_idx_hist[i] = UINT_MAX; | ||
5144 | task->hist_id_save[i] = 0; | ||
5145 | } | ||
5146 | |||
5147 | task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR, | ||
5148 | GFP_KERNEL); | ||
5149 | } | ||
5150 | |||
5151 | void lockdep_free_task(struct task_struct *task) | ||
5152 | { | ||
5153 | if (task->xhlocks) { | ||
5154 | void *tmp = task->xhlocks; | ||
5155 | /* Diable crossrelease for current */ | ||
5156 | task->xhlocks = NULL; | ||
5157 | kfree(tmp); | ||
5158 | } | ||
5159 | } | ||
5160 | #endif | ||
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index c08fbd2f5ba9..1da4669d57a7 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h | |||
@@ -143,6 +143,8 @@ struct lockdep_stats { | |||
143 | int redundant_softirqs_on; | 143 | int redundant_softirqs_on; |
144 | int redundant_softirqs_off; | 144 | int redundant_softirqs_off; |
145 | int nr_unused_locks; | 145 | int nr_unused_locks; |
146 | int nr_redundant_checks; | ||
147 | int nr_redundant; | ||
146 | int nr_cyclic_checks; | 148 | int nr_cyclic_checks; |
147 | int nr_cyclic_check_recursions; | 149 | int nr_cyclic_check_recursions; |
148 | int nr_find_usage_forwards_checks; | 150 | int nr_find_usage_forwards_checks; |
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 6d1fcc786081..68d9e267ccd4 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c | |||
@@ -201,6 +201,10 @@ static void lockdep_stats_debug_show(struct seq_file *m) | |||
201 | debug_atomic_read(chain_lookup_hits)); | 201 | debug_atomic_read(chain_lookup_hits)); |
202 | seq_printf(m, " cyclic checks: %11llu\n", | 202 | seq_printf(m, " cyclic checks: %11llu\n", |
203 | debug_atomic_read(nr_cyclic_checks)); | 203 | debug_atomic_read(nr_cyclic_checks)); |
204 | seq_printf(m, " redundant checks: %11llu\n", | ||
205 | debug_atomic_read(nr_redundant_checks)); | ||
206 | seq_printf(m, " redundant links: %11llu\n", | ||
207 | debug_atomic_read(nr_redundant)); | ||
204 | seq_printf(m, " find-mask forwards checks: %11llu\n", | 208 | seq_printf(m, " find-mask forwards checks: %11llu\n", |
205 | debug_atomic_read(nr_find_usage_forwards_checks)); | 209 | debug_atomic_read(nr_find_usage_forwards_checks)); |
206 | seq_printf(m, " find-mask backwards checks: %11llu\n", | 210 | seq_printf(m, " find-mask backwards checks: %11llu\n", |
diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h index 995b0cc2b84c..35ca09f2ed0b 100644 --- a/kernel/locking/lockdep_states.h +++ b/kernel/locking/lockdep_states.h | |||
@@ -6,4 +6,3 @@ | |||
6 | */ | 6 | */ |
7 | LOCKDEP_STATE(HARDIRQ) | 7 | LOCKDEP_STATE(HARDIRQ) |
8 | LOCKDEP_STATE(SOFTIRQ) | 8 | LOCKDEP_STATE(SOFTIRQ) |
9 | LOCKDEP_STATE(RECLAIM_FS) | ||
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index a3167941093b..a74ee6abd039 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c | |||
@@ -109,6 +109,19 @@ bool osq_lock(struct optimistic_spin_queue *lock) | |||
109 | 109 | ||
110 | prev = decode_cpu(old); | 110 | prev = decode_cpu(old); |
111 | node->prev = prev; | 111 | node->prev = prev; |
112 | |||
113 | /* | ||
114 | * osq_lock() unqueue | ||
115 | * | ||
116 | * node->prev = prev osq_wait_next() | ||
117 | * WMB MB | ||
118 | * prev->next = node next->prev = prev // unqueue-C | ||
119 | * | ||
120 | * Here 'node->prev' and 'next->prev' are the same variable and we need | ||
121 | * to ensure these stores happen in-order to avoid corrupting the list. | ||
122 | */ | ||
123 | smp_wmb(); | ||
124 | |||
112 | WRITE_ONCE(prev->next, node); | 125 | WRITE_ONCE(prev->next, node); |
113 | 126 | ||
114 | /* | 127 | /* |
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 4ccfcaae5b89..43555681c40b 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h | |||
@@ -72,7 +72,7 @@ static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) | |||
72 | struct __qspinlock *l = (void *)lock; | 72 | struct __qspinlock *l = (void *)lock; |
73 | 73 | ||
74 | if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && | 74 | if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && |
75 | (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { | 75 | (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) { |
76 | qstat_inc(qstat_pv_lock_stealing, true); | 76 | qstat_inc(qstat_pv_lock_stealing, true); |
77 | return true; | 77 | return true; |
78 | } | 78 | } |
@@ -101,16 +101,16 @@ static __always_inline void clear_pending(struct qspinlock *lock) | |||
101 | 101 | ||
102 | /* | 102 | /* |
103 | * The pending bit check in pv_queued_spin_steal_lock() isn't a memory | 103 | * The pending bit check in pv_queued_spin_steal_lock() isn't a memory |
104 | * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock | 104 | * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the |
105 | * just to be sure that it will get it. | 105 | * lock just to be sure that it will get it. |
106 | */ | 106 | */ |
107 | static __always_inline int trylock_clear_pending(struct qspinlock *lock) | 107 | static __always_inline int trylock_clear_pending(struct qspinlock *lock) |
108 | { | 108 | { |
109 | struct __qspinlock *l = (void *)lock; | 109 | struct __qspinlock *l = (void *)lock; |
110 | 110 | ||
111 | return !READ_ONCE(l->locked) && | 111 | return !READ_ONCE(l->locked) && |
112 | (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL) | 112 | (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL, |
113 | == _Q_PENDING_VAL); | 113 | _Q_LOCKED_VAL) == _Q_PENDING_VAL); |
114 | } | 114 | } |
115 | #else /* _Q_PENDING_BITS == 8 */ | 115 | #else /* _Q_PENDING_BITS == 8 */ |
116 | static __always_inline void set_pending(struct qspinlock *lock) | 116 | static __always_inline void set_pending(struct qspinlock *lock) |
@@ -138,7 +138,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock) | |||
138 | */ | 138 | */ |
139 | old = val; | 139 | old = val; |
140 | new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; | 140 | new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; |
141 | val = atomic_cmpxchg(&lock->val, old, new); | 141 | val = atomic_cmpxchg_acquire(&lock->val, old, new); |
142 | 142 | ||
143 | if (val == old) | 143 | if (val == old) |
144 | return 1; | 144 | return 1; |
@@ -362,8 +362,18 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) | |||
362 | * observe its next->locked value and advance itself. | 362 | * observe its next->locked value and advance itself. |
363 | * | 363 | * |
364 | * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() | 364 | * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() |
365 | * | ||
366 | * The write to next->locked in arch_mcs_spin_unlock_contended() | ||
367 | * must be ordered before the read of pn->state in the cmpxchg() | ||
368 | * below for the code to work correctly. To guarantee full ordering | ||
369 | * irrespective of the success or failure of the cmpxchg(), | ||
370 | * a relaxed version with explicit barrier is used. The control | ||
371 | * dependency will order the reading of pn->state before any | ||
372 | * subsequent writes. | ||
365 | */ | 373 | */ |
366 | if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted) | 374 | smp_mb__before_atomic(); |
375 | if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) | ||
376 | != vcpu_halted) | ||
367 | return; | 377 | return; |
368 | 378 | ||
369 | /* | 379 | /* |
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 72ad45a9a794..8d039b928d61 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h | |||
@@ -40,6 +40,9 @@ struct rt_mutex_waiter { | |||
40 | /* | 40 | /* |
41 | * Various helpers to access the waiters-tree: | 41 | * Various helpers to access the waiters-tree: |
42 | */ | 42 | */ |
43 | |||
44 | #ifdef CONFIG_RT_MUTEXES | ||
45 | |||
43 | static inline int rt_mutex_has_waiters(struct rt_mutex *lock) | 46 | static inline int rt_mutex_has_waiters(struct rt_mutex *lock) |
44 | { | 47 | { |
45 | return !RB_EMPTY_ROOT(&lock->waiters); | 48 | return !RB_EMPTY_ROOT(&lock->waiters); |
@@ -69,6 +72,32 @@ task_top_pi_waiter(struct task_struct *p) | |||
69 | pi_tree_entry); | 72 | pi_tree_entry); |
70 | } | 73 | } |
71 | 74 | ||
75 | #else | ||
76 | |||
77 | static inline int rt_mutex_has_waiters(struct rt_mutex *lock) | ||
78 | { | ||
79 | return false; | ||
80 | } | ||
81 | |||
82 | static inline struct rt_mutex_waiter * | ||
83 | rt_mutex_top_waiter(struct rt_mutex *lock) | ||
84 | { | ||
85 | return NULL; | ||
86 | } | ||
87 | |||
88 | static inline int task_has_pi_waiters(struct task_struct *p) | ||
89 | { | ||
90 | return false; | ||
91 | } | ||
92 | |||
93 | static inline struct rt_mutex_waiter * | ||
94 | task_top_pi_waiter(struct task_struct *p) | ||
95 | { | ||
96 | return NULL; | ||
97 | } | ||
98 | |||
99 | #endif | ||
100 | |||
72 | /* | 101 | /* |
73 | * lock->owner state tracking: | 102 | * lock->owner state tracking: |
74 | */ | 103 | */ |
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 20819df98125..0848634c5512 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c | |||
@@ -126,7 +126,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem) | |||
126 | /* | 126 | /* |
127 | * get a read lock on the semaphore | 127 | * get a read lock on the semaphore |
128 | */ | 128 | */ |
129 | void __sched __down_read(struct rw_semaphore *sem) | 129 | int __sched __down_read_common(struct rw_semaphore *sem, int state) |
130 | { | 130 | { |
131 | struct rwsem_waiter waiter; | 131 | struct rwsem_waiter waiter; |
132 | unsigned long flags; | 132 | unsigned long flags; |
@@ -140,8 +140,6 @@ void __sched __down_read(struct rw_semaphore *sem) | |||
140 | goto out; | 140 | goto out; |
141 | } | 141 | } |
142 | 142 | ||
143 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
144 | |||
145 | /* set up my own style of waitqueue */ | 143 | /* set up my own style of waitqueue */ |
146 | waiter.task = current; | 144 | waiter.task = current; |
147 | waiter.type = RWSEM_WAITING_FOR_READ; | 145 | waiter.type = RWSEM_WAITING_FOR_READ; |
@@ -149,20 +147,41 @@ void __sched __down_read(struct rw_semaphore *sem) | |||
149 | 147 | ||
150 | list_add_tail(&waiter.list, &sem->wait_list); | 148 | list_add_tail(&waiter.list, &sem->wait_list); |
151 | 149 | ||
152 | /* we don't need to touch the semaphore struct anymore */ | ||
153 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
154 | |||
155 | /* wait to be given the lock */ | 150 | /* wait to be given the lock */ |
156 | for (;;) { | 151 | for (;;) { |
157 | if (!waiter.task) | 152 | if (!waiter.task) |
158 | break; | 153 | break; |
154 | if (signal_pending_state(state, current)) | ||
155 | goto out_nolock; | ||
156 | set_current_state(state); | ||
157 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
159 | schedule(); | 158 | schedule(); |
160 | set_current_state(TASK_UNINTERRUPTIBLE); | 159 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
161 | } | 160 | } |
162 | 161 | ||
163 | __set_current_state(TASK_RUNNING); | 162 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
164 | out: | 163 | out: |
165 | ; | 164 | return 0; |
165 | |||
166 | out_nolock: | ||
167 | /* | ||
168 | * We didn't take the lock, so that there is a writer, which | ||
169 | * is owner or the first waiter of the sem. If it's a waiter, | ||
170 | * it will be woken by current owner. Not need to wake anybody. | ||
171 | */ | ||
172 | list_del(&waiter.list); | ||
173 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
174 | return -EINTR; | ||
175 | } | ||
176 | |||
177 | void __sched __down_read(struct rw_semaphore *sem) | ||
178 | { | ||
179 | __down_read_common(sem, TASK_UNINTERRUPTIBLE); | ||
180 | } | ||
181 | |||
182 | int __sched __down_read_killable(struct rw_semaphore *sem) | ||
183 | { | ||
184 | return __down_read_common(sem, TASK_KILLABLE); | ||
166 | } | 185 | } |
167 | 186 | ||
168 | /* | 187 | /* |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 34e727f18e49..02f660666ab8 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
@@ -221,8 +221,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
221 | /* | 221 | /* |
222 | * Wait for the read lock to be granted | 222 | * Wait for the read lock to be granted |
223 | */ | 223 | */ |
224 | __visible | 224 | static inline struct rw_semaphore __sched * |
225 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | 225 | __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) |
226 | { | 226 | { |
227 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | 227 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; |
228 | struct rwsem_waiter waiter; | 228 | struct rwsem_waiter waiter; |
@@ -255,17 +255,44 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
255 | 255 | ||
256 | /* wait to be given the lock */ | 256 | /* wait to be given the lock */ |
257 | while (true) { | 257 | while (true) { |
258 | set_current_state(TASK_UNINTERRUPTIBLE); | 258 | set_current_state(state); |
259 | if (!waiter.task) | 259 | if (!waiter.task) |
260 | break; | 260 | break; |
261 | if (signal_pending_state(state, current)) { | ||
262 | raw_spin_lock_irq(&sem->wait_lock); | ||
263 | if (waiter.task) | ||
264 | goto out_nolock; | ||
265 | raw_spin_unlock_irq(&sem->wait_lock); | ||
266 | break; | ||
267 | } | ||
261 | schedule(); | 268 | schedule(); |
262 | } | 269 | } |
263 | 270 | ||
264 | __set_current_state(TASK_RUNNING); | 271 | __set_current_state(TASK_RUNNING); |
265 | return sem; | 272 | return sem; |
273 | out_nolock: | ||
274 | list_del(&waiter.list); | ||
275 | if (list_empty(&sem->wait_list)) | ||
276 | atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); | ||
277 | raw_spin_unlock_irq(&sem->wait_lock); | ||
278 | __set_current_state(TASK_RUNNING); | ||
279 | return ERR_PTR(-EINTR); | ||
280 | } | ||
281 | |||
282 | __visible struct rw_semaphore * __sched | ||
283 | rwsem_down_read_failed(struct rw_semaphore *sem) | ||
284 | { | ||
285 | return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); | ||
266 | } | 286 | } |
267 | EXPORT_SYMBOL(rwsem_down_read_failed); | 287 | EXPORT_SYMBOL(rwsem_down_read_failed); |
268 | 288 | ||
289 | __visible struct rw_semaphore * __sched | ||
290 | rwsem_down_read_failed_killable(struct rw_semaphore *sem) | ||
291 | { | ||
292 | return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); | ||
293 | } | ||
294 | EXPORT_SYMBOL(rwsem_down_read_failed_killable); | ||
295 | |||
269 | /* | 296 | /* |
270 | * This function must be called with the sem->wait_lock held to prevent | 297 | * This function must be called with the sem->wait_lock held to prevent |
271 | * race conditions between checking the rwsem wait list and setting the | 298 | * race conditions between checking the rwsem wait list and setting the |
diff --git a/kernel/panic.c b/kernel/panic.c index a58932b41700..bdd18afa19a4 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/nmi.h> | 26 | #include <linux/nmi.h> |
27 | #include <linux/console.h> | 27 | #include <linux/console.h> |
28 | #include <linux/bug.h> | 28 | #include <linux/bug.h> |
29 | #include <linux/ratelimit.h> | ||
29 | 30 | ||
30 | #define PANIC_TIMER_STEP 100 | 31 | #define PANIC_TIMER_STEP 100 |
31 | #define PANIC_BLINK_SPD 18 | 32 | #define PANIC_BLINK_SPD 18 |
@@ -601,6 +602,17 @@ EXPORT_SYMBOL(__stack_chk_fail); | |||
601 | 602 | ||
602 | #endif | 603 | #endif |
603 | 604 | ||
605 | #ifdef CONFIG_ARCH_HAS_REFCOUNT | ||
606 | void refcount_error_report(struct pt_regs *regs, const char *err) | ||
607 | { | ||
608 | WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n", | ||
609 | err, (void *)instruction_pointer(regs), | ||
610 | current->comm, task_pid_nr(current), | ||
611 | from_kuid_munged(&init_user_ns, current_uid()), | ||
612 | from_kuid_munged(&init_user_ns, current_euid())); | ||
613 | } | ||
614 | #endif | ||
615 | |||
604 | core_param(panic, panic_timeout, int, 0644); | 616 | core_param(panic, panic_timeout, int, 0644); |
605 | core_param(pause_on_oops, pause_on_oops, int, 0644); | 617 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
606 | core_param(panic_on_warn, panic_on_warn, int, 0644); | 618 | core_param(panic_on_warn, panic_on_warn, int, 0644); |
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 5d9131aa846f..cc873075c3bd 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c | |||
@@ -32,6 +32,12 @@ void complete(struct completion *x) | |||
32 | unsigned long flags; | 32 | unsigned long flags; |
33 | 33 | ||
34 | spin_lock_irqsave(&x->wait.lock, flags); | 34 | spin_lock_irqsave(&x->wait.lock, flags); |
35 | |||
36 | /* | ||
37 | * Perform commit of crossrelease here. | ||
38 | */ | ||
39 | complete_release_commit(x); | ||
40 | |||
35 | if (x->done != UINT_MAX) | 41 | if (x->done != UINT_MAX) |
36 | x->done++; | 42 | x->done++; |
37 | __wake_up_locked(&x->wait, TASK_NORMAL, 1); | 43 | __wake_up_locked(&x->wait, TASK_NORMAL, 1); |
@@ -99,9 +105,14 @@ __wait_for_common(struct completion *x, | |||
99 | { | 105 | { |
100 | might_sleep(); | 106 | might_sleep(); |
101 | 107 | ||
108 | complete_acquire(x); | ||
109 | |||
102 | spin_lock_irq(&x->wait.lock); | 110 | spin_lock_irq(&x->wait.lock); |
103 | timeout = do_wait_for_common(x, action, timeout, state); | 111 | timeout = do_wait_for_common(x, action, timeout, state); |
104 | spin_unlock_irq(&x->wait.lock); | 112 | spin_unlock_irq(&x->wait.lock); |
113 | |||
114 | complete_release(x); | ||
115 | |||
105 | return timeout; | 116 | return timeout; |
106 | } | 117 | } |
107 | 118 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1fcd96cf432..6d2c7ff9ba98 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1972,8 +1972,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
1972 | * reordered with p->state check below. This pairs with mb() in | 1972 | * reordered with p->state check below. This pairs with mb() in |
1973 | * set_current_state() the waiting thread does. | 1973 | * set_current_state() the waiting thread does. |
1974 | */ | 1974 | */ |
1975 | smp_mb__before_spinlock(); | ||
1976 | raw_spin_lock_irqsave(&p->pi_lock, flags); | 1975 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
1976 | smp_mb__after_spinlock(); | ||
1977 | if (!(p->state & state)) | 1977 | if (!(p->state & state)) |
1978 | goto out; | 1978 | goto out; |
1979 | 1979 | ||
@@ -3296,8 +3296,8 @@ static void __sched notrace __schedule(bool preempt) | |||
3296 | * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) | 3296 | * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) |
3297 | * done by the caller to avoid the race with signal_wake_up(). | 3297 | * done by the caller to avoid the race with signal_wake_up(). |
3298 | */ | 3298 | */ |
3299 | smp_mb__before_spinlock(); | ||
3300 | rq_lock(rq, &rf); | 3299 | rq_lock(rq, &rf); |
3300 | smp_mb__after_spinlock(); | ||
3301 | 3301 | ||
3302 | /* Promote REQ to ACT */ | 3302 | /* Promote REQ to ACT */ |
3303 | rq->clock_update_flags <<= 1; | 3303 | rq->clock_update_flags <<= 1; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 25e5cb1107f3..ab1c7f5409a0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -769,7 +769,7 @@ struct rq { | |||
769 | #ifdef CONFIG_SCHED_HRTICK | 769 | #ifdef CONFIG_SCHED_HRTICK |
770 | #ifdef CONFIG_SMP | 770 | #ifdef CONFIG_SMP |
771 | int hrtick_csd_pending; | 771 | int hrtick_csd_pending; |
772 | struct call_single_data hrtick_csd; | 772 | call_single_data_t hrtick_csd; |
773 | #endif | 773 | #endif |
774 | struct hrtimer hrtick_timer; | 774 | struct hrtimer hrtick_timer; |
775 | #endif | 775 | #endif |
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 3d5610dcce11..2227e183e202 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c | |||
@@ -33,9 +33,6 @@ void swake_up(struct swait_queue_head *q) | |||
33 | { | 33 | { |
34 | unsigned long flags; | 34 | unsigned long flags; |
35 | 35 | ||
36 | if (!swait_active(q)) | ||
37 | return; | ||
38 | |||
39 | raw_spin_lock_irqsave(&q->lock, flags); | 36 | raw_spin_lock_irqsave(&q->lock, flags); |
40 | swake_up_locked(q); | 37 | swake_up_locked(q); |
41 | raw_spin_unlock_irqrestore(&q->lock, flags); | 38 | raw_spin_unlock_irqrestore(&q->lock, flags); |
@@ -51,9 +48,6 @@ void swake_up_all(struct swait_queue_head *q) | |||
51 | struct swait_queue *curr; | 48 | struct swait_queue *curr; |
52 | LIST_HEAD(tmp); | 49 | LIST_HEAD(tmp); |
53 | 50 | ||
54 | if (!swait_active(q)) | ||
55 | return; | ||
56 | |||
57 | raw_spin_lock_irq(&q->lock); | 51 | raw_spin_lock_irq(&q->lock); |
58 | list_splice_init(&q->task_list, &tmp); | 52 | list_splice_init(&q->task_list, &tmp); |
59 | while (!list_empty(&tmp)) { | 53 | while (!list_empty(&tmp)) { |
diff --git a/kernel/smp.c b/kernel/smp.c index 3061483cb3ad..81cfca9b4cc3 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -28,7 +28,7 @@ enum { | |||
28 | }; | 28 | }; |
29 | 29 | ||
30 | struct call_function_data { | 30 | struct call_function_data { |
31 | struct call_single_data __percpu *csd; | 31 | call_single_data_t __percpu *csd; |
32 | cpumask_var_t cpumask; | 32 | cpumask_var_t cpumask; |
33 | cpumask_var_t cpumask_ipi; | 33 | cpumask_var_t cpumask_ipi; |
34 | }; | 34 | }; |
@@ -51,7 +51,7 @@ int smpcfd_prepare_cpu(unsigned int cpu) | |||
51 | free_cpumask_var(cfd->cpumask); | 51 | free_cpumask_var(cfd->cpumask); |
52 | return -ENOMEM; | 52 | return -ENOMEM; |
53 | } | 53 | } |
54 | cfd->csd = alloc_percpu(struct call_single_data); | 54 | cfd->csd = alloc_percpu(call_single_data_t); |
55 | if (!cfd->csd) { | 55 | if (!cfd->csd) { |
56 | free_cpumask_var(cfd->cpumask); | 56 | free_cpumask_var(cfd->cpumask); |
57 | free_cpumask_var(cfd->cpumask_ipi); | 57 | free_cpumask_var(cfd->cpumask_ipi); |
@@ -103,12 +103,12 @@ void __init call_function_init(void) | |||
103 | * previous function call. For multi-cpu calls its even more interesting | 103 | * previous function call. For multi-cpu calls its even more interesting |
104 | * as we'll have to ensure no other cpu is observing our csd. | 104 | * as we'll have to ensure no other cpu is observing our csd. |
105 | */ | 105 | */ |
106 | static __always_inline void csd_lock_wait(struct call_single_data *csd) | 106 | static __always_inline void csd_lock_wait(call_single_data_t *csd) |
107 | { | 107 | { |
108 | smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); | 108 | smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); |
109 | } | 109 | } |
110 | 110 | ||
111 | static __always_inline void csd_lock(struct call_single_data *csd) | 111 | static __always_inline void csd_lock(call_single_data_t *csd) |
112 | { | 112 | { |
113 | csd_lock_wait(csd); | 113 | csd_lock_wait(csd); |
114 | csd->flags |= CSD_FLAG_LOCK; | 114 | csd->flags |= CSD_FLAG_LOCK; |
@@ -116,12 +116,12 @@ static __always_inline void csd_lock(struct call_single_data *csd) | |||
116 | /* | 116 | /* |
117 | * prevent CPU from reordering the above assignment | 117 | * prevent CPU from reordering the above assignment |
118 | * to ->flags with any subsequent assignments to other | 118 | * to ->flags with any subsequent assignments to other |
119 | * fields of the specified call_single_data structure: | 119 | * fields of the specified call_single_data_t structure: |
120 | */ | 120 | */ |
121 | smp_wmb(); | 121 | smp_wmb(); |
122 | } | 122 | } |
123 | 123 | ||
124 | static __always_inline void csd_unlock(struct call_single_data *csd) | 124 | static __always_inline void csd_unlock(call_single_data_t *csd) |
125 | { | 125 | { |
126 | WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); | 126 | WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); |
127 | 127 | ||
@@ -131,14 +131,14 @@ static __always_inline void csd_unlock(struct call_single_data *csd) | |||
131 | smp_store_release(&csd->flags, 0); | 131 | smp_store_release(&csd->flags, 0); |
132 | } | 132 | } |
133 | 133 | ||
134 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); | 134 | static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); |
135 | 135 | ||
136 | /* | 136 | /* |
137 | * Insert a previously allocated call_single_data element | 137 | * Insert a previously allocated call_single_data_t element |
138 | * for execution on the given CPU. data must already have | 138 | * for execution on the given CPU. data must already have |
139 | * ->func, ->info, and ->flags set. | 139 | * ->func, ->info, and ->flags set. |
140 | */ | 140 | */ |
141 | static int generic_exec_single(int cpu, struct call_single_data *csd, | 141 | static int generic_exec_single(int cpu, call_single_data_t *csd, |
142 | smp_call_func_t func, void *info) | 142 | smp_call_func_t func, void *info) |
143 | { | 143 | { |
144 | if (cpu == smp_processor_id()) { | 144 | if (cpu == smp_processor_id()) { |
@@ -210,7 +210,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) | |||
210 | { | 210 | { |
211 | struct llist_head *head; | 211 | struct llist_head *head; |
212 | struct llist_node *entry; | 212 | struct llist_node *entry; |
213 | struct call_single_data *csd, *csd_next; | 213 | call_single_data_t *csd, *csd_next; |
214 | static bool warned; | 214 | static bool warned; |
215 | 215 | ||
216 | WARN_ON(!irqs_disabled()); | 216 | WARN_ON(!irqs_disabled()); |
@@ -268,8 +268,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) | |||
268 | int smp_call_function_single(int cpu, smp_call_func_t func, void *info, | 268 | int smp_call_function_single(int cpu, smp_call_func_t func, void *info, |
269 | int wait) | 269 | int wait) |
270 | { | 270 | { |
271 | struct call_single_data *csd; | 271 | call_single_data_t *csd; |
272 | struct call_single_data csd_stack = { .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS }; | 272 | call_single_data_t csd_stack = { |
273 | .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS, | ||
274 | }; | ||
273 | int this_cpu; | 275 | int this_cpu; |
274 | int err; | 276 | int err; |
275 | 277 | ||
@@ -321,7 +323,7 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
321 | * NOTE: Be careful, there is unfortunately no current debugging facility to | 323 | * NOTE: Be careful, there is unfortunately no current debugging facility to |
322 | * validate the correctness of this serialization. | 324 | * validate the correctness of this serialization. |
323 | */ | 325 | */ |
324 | int smp_call_function_single_async(int cpu, struct call_single_data *csd) | 326 | int smp_call_function_single_async(int cpu, call_single_data_t *csd) |
325 | { | 327 | { |
326 | int err = 0; | 328 | int err = 0; |
327 | 329 | ||
@@ -444,7 +446,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
444 | 446 | ||
445 | cpumask_clear(cfd->cpumask_ipi); | 447 | cpumask_clear(cfd->cpumask_ipi); |
446 | for_each_cpu(cpu, cfd->cpumask) { | 448 | for_each_cpu(cpu, cfd->cpumask) { |
447 | struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu); | 449 | call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); |
448 | 450 | ||
449 | csd_lock(csd); | 451 | csd_lock(csd); |
450 | if (wait) | 452 | if (wait) |
@@ -460,7 +462,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
460 | 462 | ||
461 | if (wait) { | 463 | if (wait) { |
462 | for_each_cpu(cpu, cfd->cpumask) { | 464 | for_each_cpu(cpu, cfd->cpumask) { |
463 | struct call_single_data *csd; | 465 | call_single_data_t *csd; |
464 | 466 | ||
465 | csd = per_cpu_ptr(cfd->csd, cpu); | 467 | csd = per_cpu_ptr(cfd->csd, cpu); |
466 | csd_lock_wait(csd); | 468 | csd_lock_wait(csd); |
diff --git a/kernel/up.c b/kernel/up.c index ee81ac9af4ca..42c46bf3e0a5 100644 --- a/kernel/up.c +++ b/kernel/up.c | |||
@@ -23,7 +23,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
23 | } | 23 | } |
24 | EXPORT_SYMBOL(smp_call_function_single); | 24 | EXPORT_SYMBOL(smp_call_function_single); |
25 | 25 | ||
26 | int smp_call_function_single_async(int cpu, struct call_single_data *csd) | 26 | int smp_call_function_single_async(int cpu, call_single_data_t *csd) |
27 | { | 27 | { |
28 | unsigned long flags; | 28 | unsigned long flags; |
29 | 29 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca937b0c3a96..ab3c0dc8c7ed 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -2091,8 +2091,30 @@ __acquires(&pool->lock) | |||
2091 | 2091 | ||
2092 | spin_unlock_irq(&pool->lock); | 2092 | spin_unlock_irq(&pool->lock); |
2093 | 2093 | ||
2094 | lock_map_acquire_read(&pwq->wq->lockdep_map); | 2094 | lock_map_acquire(&pwq->wq->lockdep_map); |
2095 | lock_map_acquire(&lockdep_map); | 2095 | lock_map_acquire(&lockdep_map); |
2096 | /* | ||
2097 | * Strictly speaking we should mark the invariant state without holding | ||
2098 | * any locks, that is, before these two lock_map_acquire()'s. | ||
2099 | * | ||
2100 | * However, that would result in: | ||
2101 | * | ||
2102 | * A(W1) | ||
2103 | * WFC(C) | ||
2104 | * A(W1) | ||
2105 | * C(C) | ||
2106 | * | ||
2107 | * Which would create W1->C->W1 dependencies, even though there is no | ||
2108 | * actual deadlock possible. There are two solutions, using a | ||
2109 | * read-recursive acquire on the work(queue) 'locks', but this will then | ||
2110 | * hit the lockdep limitation on recursive locks, or simply discard | ||
2111 | * these locks. | ||
2112 | * | ||
2113 | * AFAICT there is no possible deadlock scenario between the | ||
2114 | * flush_work() and complete() primitives (except for single-threaded | ||
2115 | * workqueues), so hiding them isn't a problem. | ||
2116 | */ | ||
2117 | lockdep_invariant_state(true); | ||
2096 | trace_workqueue_execute_start(work); | 2118 | trace_workqueue_execute_start(work); |
2097 | worker->current_func(work); | 2119 | worker->current_func(work); |
2098 | /* | 2120 | /* |
@@ -2474,7 +2496,16 @@ static void insert_wq_barrier(struct pool_workqueue *pwq, | |||
2474 | */ | 2496 | */ |
2475 | INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); | 2497 | INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); |
2476 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); | 2498 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); |
2477 | init_completion(&barr->done); | 2499 | |
2500 | /* | ||
2501 | * Explicitly init the crosslock for wq_barrier::done, make its lock | ||
2502 | * key a subkey of the corresponding work. As a result we won't | ||
2503 | * build a dependency between wq_barrier::done and unrelated work. | ||
2504 | */ | ||
2505 | lockdep_init_map_crosslock((struct lockdep_map *)&barr->done.map, | ||
2506 | "(complete)wq_barr::done", | ||
2507 | target->lockdep_map.key, 1); | ||
2508 | __init_completion(&barr->done); | ||
2478 | barr->task = current; | 2509 | barr->task = current; |
2479 | 2510 | ||
2480 | /* | 2511 | /* |
@@ -2815,16 +2846,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) | |||
2815 | spin_unlock_irq(&pool->lock); | 2846 | spin_unlock_irq(&pool->lock); |
2816 | 2847 | ||
2817 | /* | 2848 | /* |
2818 | * If @max_active is 1 or rescuer is in use, flushing another work | 2849 | * Force a lock recursion deadlock when using flush_work() inside a |
2819 | * item on the same workqueue may lead to deadlock. Make sure the | 2850 | * single-threaded or rescuer equipped workqueue. |
2820 | * flusher is not running on the same workqueue by verifying write | 2851 | * |
2821 | * access. | 2852 | * For single threaded workqueues the deadlock happens when the work |
2853 | * is after the work issuing the flush_work(). For rescuer equipped | ||
2854 | * workqueues the deadlock happens when the rescuer stalls, blocking | ||
2855 | * forward progress. | ||
2822 | */ | 2856 | */ |
2823 | if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) | 2857 | if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) { |
2824 | lock_map_acquire(&pwq->wq->lockdep_map); | 2858 | lock_map_acquire(&pwq->wq->lockdep_map); |
2825 | else | 2859 | lock_map_release(&pwq->wq->lockdep_map); |
2826 | lock_map_acquire_read(&pwq->wq->lockdep_map); | 2860 | } |
2827 | lock_map_release(&pwq->wq->lockdep_map); | ||
2828 | 2861 | ||
2829 | return true; | 2862 | return true; |
2830 | already_gone: | 2863 | already_gone: |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2a9a8759752b..7396f5044397 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1091,6 +1091,8 @@ config PROVE_LOCKING | |||
1091 | select DEBUG_MUTEXES | 1091 | select DEBUG_MUTEXES |
1092 | select DEBUG_RT_MUTEXES if RT_MUTEXES | 1092 | select DEBUG_RT_MUTEXES if RT_MUTEXES |
1093 | select DEBUG_LOCK_ALLOC | 1093 | select DEBUG_LOCK_ALLOC |
1094 | select LOCKDEP_CROSSRELEASE | ||
1095 | select LOCKDEP_COMPLETIONS | ||
1094 | select TRACE_IRQFLAGS | 1096 | select TRACE_IRQFLAGS |
1095 | default n | 1097 | default n |
1096 | help | 1098 | help |
@@ -1160,6 +1162,22 @@ config LOCK_STAT | |||
1160 | CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. | 1162 | CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. |
1161 | (CONFIG_LOCKDEP defines "acquire" and "release" events.) | 1163 | (CONFIG_LOCKDEP defines "acquire" and "release" events.) |
1162 | 1164 | ||
1165 | config LOCKDEP_CROSSRELEASE | ||
1166 | bool | ||
1167 | help | ||
1168 | This makes lockdep work for crosslock which is a lock allowed to | ||
1169 | be released in a different context from the acquisition context. | ||
1170 | Normally a lock must be released in the context acquiring the lock. | ||
1171 | However, relexing this constraint helps synchronization primitives | ||
1172 | such as page locks or completions can use the lock correctness | ||
1173 | detector, lockdep. | ||
1174 | |||
1175 | config LOCKDEP_COMPLETIONS | ||
1176 | bool | ||
1177 | help | ||
1178 | A deadlock caused by wait_for_completion() and complete() can be | ||
1179 | detected by lockdep using crossrelease feature. | ||
1180 | |||
1163 | config DEBUG_LOCKDEP | 1181 | config DEBUG_LOCKDEP |
1164 | bool "Lock dependency engine debugging" | 1182 | bool "Lock dependency engine debugging" |
1165 | depends on DEBUG_KERNEL && LOCKDEP | 1183 | depends on DEBUG_KERNEL && LOCKDEP |
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 6f2b135dc5e8..cd0b5c964bd0 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c | |||
@@ -363,6 +363,103 @@ static void rsem_AA3(void) | |||
363 | } | 363 | } |
364 | 364 | ||
365 | /* | 365 | /* |
366 | * read_lock(A) | ||
367 | * spin_lock(B) | ||
368 | * spin_lock(B) | ||
369 | * write_lock(A) | ||
370 | */ | ||
371 | static void rlock_ABBA1(void) | ||
372 | { | ||
373 | RL(X1); | ||
374 | L(Y1); | ||
375 | U(Y1); | ||
376 | RU(X1); | ||
377 | |||
378 | L(Y1); | ||
379 | WL(X1); | ||
380 | WU(X1); | ||
381 | U(Y1); // should fail | ||
382 | } | ||
383 | |||
384 | static void rwsem_ABBA1(void) | ||
385 | { | ||
386 | RSL(X1); | ||
387 | ML(Y1); | ||
388 | MU(Y1); | ||
389 | RSU(X1); | ||
390 | |||
391 | ML(Y1); | ||
392 | WSL(X1); | ||
393 | WSU(X1); | ||
394 | MU(Y1); // should fail | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * read_lock(A) | ||
399 | * spin_lock(B) | ||
400 | * spin_lock(B) | ||
401 | * read_lock(A) | ||
402 | */ | ||
403 | static void rlock_ABBA2(void) | ||
404 | { | ||
405 | RL(X1); | ||
406 | L(Y1); | ||
407 | U(Y1); | ||
408 | RU(X1); | ||
409 | |||
410 | L(Y1); | ||
411 | RL(X1); | ||
412 | RU(X1); | ||
413 | U(Y1); // should NOT fail | ||
414 | } | ||
415 | |||
416 | static void rwsem_ABBA2(void) | ||
417 | { | ||
418 | RSL(X1); | ||
419 | ML(Y1); | ||
420 | MU(Y1); | ||
421 | RSU(X1); | ||
422 | |||
423 | ML(Y1); | ||
424 | RSL(X1); | ||
425 | RSU(X1); | ||
426 | MU(Y1); // should fail | ||
427 | } | ||
428 | |||
429 | |||
430 | /* | ||
431 | * write_lock(A) | ||
432 | * spin_lock(B) | ||
433 | * spin_lock(B) | ||
434 | * write_lock(A) | ||
435 | */ | ||
436 | static void rlock_ABBA3(void) | ||
437 | { | ||
438 | WL(X1); | ||
439 | L(Y1); | ||
440 | U(Y1); | ||
441 | WU(X1); | ||
442 | |||
443 | L(Y1); | ||
444 | WL(X1); | ||
445 | WU(X1); | ||
446 | U(Y1); // should fail | ||
447 | } | ||
448 | |||
449 | static void rwsem_ABBA3(void) | ||
450 | { | ||
451 | WSL(X1); | ||
452 | ML(Y1); | ||
453 | MU(Y1); | ||
454 | WSU(X1); | ||
455 | |||
456 | ML(Y1); | ||
457 | WSL(X1); | ||
458 | WSU(X1); | ||
459 | MU(Y1); // should fail | ||
460 | } | ||
461 | |||
462 | /* | ||
366 | * ABBA deadlock: | 463 | * ABBA deadlock: |
367 | */ | 464 | */ |
368 | 465 | ||
@@ -1056,8 +1153,6 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) | |||
1056 | if (debug_locks != expected) { | 1153 | if (debug_locks != expected) { |
1057 | unexpected_testcase_failures++; | 1154 | unexpected_testcase_failures++; |
1058 | pr_cont("FAILED|"); | 1155 | pr_cont("FAILED|"); |
1059 | |||
1060 | dump_stack(); | ||
1061 | } else { | 1156 | } else { |
1062 | testcase_successes++; | 1157 | testcase_successes++; |
1063 | pr_cont(" ok |"); | 1158 | pr_cont(" ok |"); |
@@ -1933,6 +2028,30 @@ void locking_selftest(void) | |||
1933 | dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM); | 2028 | dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM); |
1934 | pr_cont("\n"); | 2029 | pr_cont("\n"); |
1935 | 2030 | ||
2031 | print_testname("mixed read-lock/lock-write ABBA"); | ||
2032 | pr_cont(" |"); | ||
2033 | dotest(rlock_ABBA1, FAILURE, LOCKTYPE_RWLOCK); | ||
2034 | /* | ||
2035 | * Lockdep does indeed fail here, but there's nothing we can do about | ||
2036 | * that now. Don't kill lockdep for it. | ||
2037 | */ | ||
2038 | unexpected_testcase_failures--; | ||
2039 | |||
2040 | pr_cont(" |"); | ||
2041 | dotest(rwsem_ABBA1, FAILURE, LOCKTYPE_RWSEM); | ||
2042 | |||
2043 | print_testname("mixed read-lock/lock-read ABBA"); | ||
2044 | pr_cont(" |"); | ||
2045 | dotest(rlock_ABBA2, SUCCESS, LOCKTYPE_RWLOCK); | ||
2046 | pr_cont(" |"); | ||
2047 | dotest(rwsem_ABBA2, FAILURE, LOCKTYPE_RWSEM); | ||
2048 | |||
2049 | print_testname("mixed write-lock/lock-write ABBA"); | ||
2050 | pr_cont(" |"); | ||
2051 | dotest(rlock_ABBA3, FAILURE, LOCKTYPE_RWLOCK); | ||
2052 | pr_cont(" |"); | ||
2053 | dotest(rwsem_ABBA3, FAILURE, LOCKTYPE_RWSEM); | ||
2054 | |||
1936 | printk(" --------------------------------------------------------------------------\n"); | 2055 | printk(" --------------------------------------------------------------------------\n"); |
1937 | 2056 | ||
1938 | /* | 2057 | /* |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 90731e3b7e58..3644ff918434 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1510,8 +1510,15 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) | |||
1510 | } | 1510 | } |
1511 | 1511 | ||
1512 | /* | 1512 | /* |
1513 | * The page_table_lock above provides a memory barrier | 1513 | * Since we took the NUMA fault, we must have observed the !accessible |
1514 | * with change_protection_range. | 1514 | * bit. Make sure all other CPUs agree with that, to avoid them |
1515 | * modifying the page we're about to migrate. | ||
1516 | * | ||
1517 | * Must be done under PTL such that we'll observe the relevant | ||
1518 | * inc_tlb_flush_pending(). | ||
1519 | * | ||
1520 | * We are not sure a pending tlb flush here is for a huge page | ||
1521 | * mapping or not. Hence use the tlb range variant | ||
1515 | */ | 1522 | */ |
1516 | if (mm_tlb_flush_pending(vma->vm_mm)) | 1523 | if (mm_tlb_flush_pending(vma->vm_mm)) |
1517 | flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); | 1524 | flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); |
@@ -1521,6 +1528,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) | |||
1521 | * and access rights restored. | 1528 | * and access rights restored. |
1522 | */ | 1529 | */ |
1523 | spin_unlock(vmf->ptl); | 1530 | spin_unlock(vmf->ptl); |
1531 | |||
1524 | migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, | 1532 | migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, |
1525 | vmf->pmd, pmd, vmf->address, page, target_nid); | 1533 | vmf->pmd, pmd, vmf->address, page, target_nid); |
1526 | if (migrated) { | 1534 | if (migrated) { |
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index ca11bc4ce205..6f319fb81718 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c | |||
@@ -267,13 +267,13 @@ static void check_memory_region(unsigned long addr, | |||
267 | check_memory_region_inline(addr, size, write, ret_ip); | 267 | check_memory_region_inline(addr, size, write, ret_ip); |
268 | } | 268 | } |
269 | 269 | ||
270 | void kasan_check_read(const void *p, unsigned int size) | 270 | void kasan_check_read(const volatile void *p, unsigned int size) |
271 | { | 271 | { |
272 | check_memory_region((unsigned long)p, size, false, _RET_IP_); | 272 | check_memory_region((unsigned long)p, size, false, _RET_IP_); |
273 | } | 273 | } |
274 | EXPORT_SYMBOL(kasan_check_read); | 274 | EXPORT_SYMBOL(kasan_check_read); |
275 | 275 | ||
276 | void kasan_check_write(const void *p, unsigned int size) | 276 | void kasan_check_write(const volatile void *p, unsigned int size) |
277 | { | 277 | { |
278 | check_memory_region((unsigned long)p, size, true, _RET_IP_); | 278 | check_memory_region((unsigned long)p, size, true, _RET_IP_); |
279 | } | 279 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1423da8dd16f..9327a940e373 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -66,6 +66,7 @@ | |||
66 | #include <linux/kthread.h> | 66 | #include <linux/kthread.h> |
67 | #include <linux/memcontrol.h> | 67 | #include <linux/memcontrol.h> |
68 | #include <linux/ftrace.h> | 68 | #include <linux/ftrace.h> |
69 | #include <linux/lockdep.h> | ||
69 | #include <linux/nmi.h> | 70 | #include <linux/nmi.h> |
70 | 71 | ||
71 | #include <asm/sections.h> | 72 | #include <asm/sections.h> |
@@ -3513,6 +3514,47 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla | |||
3513 | } | 3514 | } |
3514 | #endif /* CONFIG_COMPACTION */ | 3515 | #endif /* CONFIG_COMPACTION */ |
3515 | 3516 | ||
3517 | #ifdef CONFIG_LOCKDEP | ||
3518 | struct lockdep_map __fs_reclaim_map = | ||
3519 | STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map); | ||
3520 | |||
3521 | static bool __need_fs_reclaim(gfp_t gfp_mask) | ||
3522 | { | ||
3523 | gfp_mask = current_gfp_context(gfp_mask); | ||
3524 | |||
3525 | /* no reclaim without waiting on it */ | ||
3526 | if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) | ||
3527 | return false; | ||
3528 | |||
3529 | /* this guy won't enter reclaim */ | ||
3530 | if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) | ||
3531 | return false; | ||
3532 | |||
3533 | /* We're only interested __GFP_FS allocations for now */ | ||
3534 | if (!(gfp_mask & __GFP_FS)) | ||
3535 | return false; | ||
3536 | |||
3537 | if (gfp_mask & __GFP_NOLOCKDEP) | ||
3538 | return false; | ||
3539 | |||
3540 | return true; | ||
3541 | } | ||
3542 | |||
3543 | void fs_reclaim_acquire(gfp_t gfp_mask) | ||
3544 | { | ||
3545 | if (__need_fs_reclaim(gfp_mask)) | ||
3546 | lock_map_acquire(&__fs_reclaim_map); | ||
3547 | } | ||
3548 | EXPORT_SYMBOL_GPL(fs_reclaim_acquire); | ||
3549 | |||
3550 | void fs_reclaim_release(gfp_t gfp_mask) | ||
3551 | { | ||
3552 | if (__need_fs_reclaim(gfp_mask)) | ||
3553 | lock_map_release(&__fs_reclaim_map); | ||
3554 | } | ||
3555 | EXPORT_SYMBOL_GPL(fs_reclaim_release); | ||
3556 | #endif | ||
3557 | |||
3516 | /* Perform direct synchronous page reclaim */ | 3558 | /* Perform direct synchronous page reclaim */ |
3517 | static int | 3559 | static int |
3518 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, | 3560 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, |
@@ -3527,7 +3569,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
3527 | /* We now go into synchronous reclaim */ | 3569 | /* We now go into synchronous reclaim */ |
3528 | cpuset_memory_pressure_bump(); | 3570 | cpuset_memory_pressure_bump(); |
3529 | noreclaim_flag = memalloc_noreclaim_save(); | 3571 | noreclaim_flag = memalloc_noreclaim_save(); |
3530 | lockdep_set_current_reclaim_state(gfp_mask); | 3572 | fs_reclaim_acquire(gfp_mask); |
3531 | reclaim_state.reclaimed_slab = 0; | 3573 | reclaim_state.reclaimed_slab = 0; |
3532 | current->reclaim_state = &reclaim_state; | 3574 | current->reclaim_state = &reclaim_state; |
3533 | 3575 | ||
@@ -3535,7 +3577,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
3535 | ac->nodemask); | 3577 | ac->nodemask); |
3536 | 3578 | ||
3537 | current->reclaim_state = NULL; | 3579 | current->reclaim_state = NULL; |
3538 | lockdep_clear_current_reclaim_state(); | 3580 | fs_reclaim_release(gfp_mask); |
3539 | memalloc_noreclaim_restore(noreclaim_flag); | 3581 | memalloc_noreclaim_restore(noreclaim_flag); |
3540 | 3582 | ||
3541 | cond_resched(); | 3583 | cond_resched(); |
@@ -4064,7 +4106,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
4064 | *alloc_flags |= ALLOC_CPUSET; | 4106 | *alloc_flags |= ALLOC_CPUSET; |
4065 | } | 4107 | } |
4066 | 4108 | ||
4067 | lockdep_trace_alloc(gfp_mask); | 4109 | fs_reclaim_acquire(gfp_mask); |
4110 | fs_reclaim_release(gfp_mask); | ||
4068 | 4111 | ||
4069 | might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); | 4112 | might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); |
4070 | 4113 | ||
@@ -43,6 +43,7 @@ struct kmem_cache { | |||
43 | #include <linux/kasan.h> | 43 | #include <linux/kasan.h> |
44 | #include <linux/kmemleak.h> | 44 | #include <linux/kmemleak.h> |
45 | #include <linux/random.h> | 45 | #include <linux/random.h> |
46 | #include <linux/sched/mm.h> | ||
46 | 47 | ||
47 | /* | 48 | /* |
48 | * State of the slab allocator. | 49 | * State of the slab allocator. |
@@ -412,7 +413,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, | |||
412 | gfp_t flags) | 413 | gfp_t flags) |
413 | { | 414 | { |
414 | flags &= gfp_allowed_mask; | 415 | flags &= gfp_allowed_mask; |
415 | lockdep_trace_alloc(flags); | 416 | |
417 | fs_reclaim_acquire(flags); | ||
418 | fs_reclaim_release(flags); | ||
419 | |||
416 | might_sleep_if(gfpflags_allow_blocking(flags)); | 420 | might_sleep_if(gfpflags_allow_blocking(flags)); |
417 | 421 | ||
418 | if (should_failslab(s, flags)) | 422 | if (should_failslab(s, flags)) |
@@ -432,7 +432,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) | |||
432 | 432 | ||
433 | gfp &= gfp_allowed_mask; | 433 | gfp &= gfp_allowed_mask; |
434 | 434 | ||
435 | lockdep_trace_alloc(gfp); | 435 | fs_reclaim_acquire(gfp); |
436 | fs_reclaim_release(gfp); | ||
436 | 437 | ||
437 | if (size < PAGE_SIZE - align) { | 438 | if (size < PAGE_SIZE - align) { |
438 | if (!size) | 439 | if (!size) |
@@ -538,7 +539,8 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
538 | 539 | ||
539 | flags &= gfp_allowed_mask; | 540 | flags &= gfp_allowed_mask; |
540 | 541 | ||
541 | lockdep_trace_alloc(flags); | 542 | fs_reclaim_acquire(flags); |
543 | fs_reclaim_release(flags); | ||
542 | 544 | ||
543 | if (c->size < PAGE_SIZE) { | 545 | if (c->size < PAGE_SIZE) { |
544 | b = slob_alloc(c->size, flags, c->align, node); | 546 | b = slob_alloc(c->size, flags, c->align, node); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a1af041930a6..f957afe900ec 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -3525,8 +3525,6 @@ static int kswapd(void *p) | |||
3525 | }; | 3525 | }; |
3526 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 3526 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
3527 | 3527 | ||
3528 | lockdep_set_current_reclaim_state(GFP_KERNEL); | ||
3529 | |||
3530 | if (!cpumask_empty(cpumask)) | 3528 | if (!cpumask_empty(cpumask)) |
3531 | set_cpus_allowed_ptr(tsk, cpumask); | 3529 | set_cpus_allowed_ptr(tsk, cpumask); |
3532 | current->reclaim_state = &reclaim_state; | 3530 | current->reclaim_state = &reclaim_state; |
@@ -3585,14 +3583,15 @@ kswapd_try_sleep: | |||
3585 | */ | 3583 | */ |
3586 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, | 3584 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, |
3587 | alloc_order); | 3585 | alloc_order); |
3586 | fs_reclaim_acquire(GFP_KERNEL); | ||
3588 | reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); | 3587 | reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); |
3588 | fs_reclaim_release(GFP_KERNEL); | ||
3589 | if (reclaim_order < alloc_order) | 3589 | if (reclaim_order < alloc_order) |
3590 | goto kswapd_try_sleep; | 3590 | goto kswapd_try_sleep; |
3591 | } | 3591 | } |
3592 | 3592 | ||
3593 | tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); | 3593 | tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); |
3594 | current->reclaim_state = NULL; | 3594 | current->reclaim_state = NULL; |
3595 | lockdep_clear_current_reclaim_state(); | ||
3596 | 3595 | ||
3597 | return 0; | 3596 | return 0; |
3598 | } | 3597 | } |
@@ -3655,14 +3654,14 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |||
3655 | unsigned int noreclaim_flag; | 3654 | unsigned int noreclaim_flag; |
3656 | 3655 | ||
3657 | noreclaim_flag = memalloc_noreclaim_save(); | 3656 | noreclaim_flag = memalloc_noreclaim_save(); |
3658 | lockdep_set_current_reclaim_state(sc.gfp_mask); | 3657 | fs_reclaim_acquire(sc.gfp_mask); |
3659 | reclaim_state.reclaimed_slab = 0; | 3658 | reclaim_state.reclaimed_slab = 0; |
3660 | p->reclaim_state = &reclaim_state; | 3659 | p->reclaim_state = &reclaim_state; |
3661 | 3660 | ||
3662 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | 3661 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); |
3663 | 3662 | ||
3664 | p->reclaim_state = NULL; | 3663 | p->reclaim_state = NULL; |
3665 | lockdep_clear_current_reclaim_state(); | 3664 | fs_reclaim_release(sc.gfp_mask); |
3666 | memalloc_noreclaim_restore(noreclaim_flag); | 3665 | memalloc_noreclaim_restore(noreclaim_flag); |
3667 | 3666 | ||
3668 | return nr_reclaimed; | 3667 | return nr_reclaimed; |
@@ -3847,7 +3846,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
3847 | */ | 3846 | */ |
3848 | noreclaim_flag = memalloc_noreclaim_save(); | 3847 | noreclaim_flag = memalloc_noreclaim_save(); |
3849 | p->flags |= PF_SWAPWRITE; | 3848 | p->flags |= PF_SWAPWRITE; |
3850 | lockdep_set_current_reclaim_state(sc.gfp_mask); | 3849 | fs_reclaim_acquire(sc.gfp_mask); |
3851 | reclaim_state.reclaimed_slab = 0; | 3850 | reclaim_state.reclaimed_slab = 0; |
3852 | p->reclaim_state = &reclaim_state; | 3851 | p->reclaim_state = &reclaim_state; |
3853 | 3852 | ||
@@ -3862,9 +3861,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
3862 | } | 3861 | } |
3863 | 3862 | ||
3864 | p->reclaim_state = NULL; | 3863 | p->reclaim_state = NULL; |
3864 | fs_reclaim_release(gfp_mask); | ||
3865 | current->flags &= ~PF_SWAPWRITE; | 3865 | current->flags &= ~PF_SWAPWRITE; |
3866 | memalloc_noreclaim_restore(noreclaim_flag); | 3866 | memalloc_noreclaim_restore(noreclaim_flag); |
3867 | lockdep_clear_current_reclaim_state(); | ||
3868 | return sc.nr_reclaimed >= nr_pages; | 3867 | return sc.nr_reclaimed >= nr_pages; |
3869 | } | 3868 | } |
3870 | 3869 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 62344804baae..38e795e0c4bf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -1810,8 +1810,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1810 | static struct static_key udp_encap_needed __read_mostly; | 1810 | static struct static_key udp_encap_needed __read_mostly; |
1811 | void udp_encap_enable(void) | 1811 | void udp_encap_enable(void) |
1812 | { | 1812 | { |
1813 | if (!static_key_enabled(&udp_encap_needed)) | 1813 | static_key_enable(&udp_encap_needed); |
1814 | static_key_slow_inc(&udp_encap_needed); | ||
1815 | } | 1814 | } |
1816 | EXPORT_SYMBOL(udp_encap_enable); | 1815 | EXPORT_SYMBOL(udp_encap_enable); |
1817 | 1816 | ||
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d6886228e1d0..56030d45823a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -575,8 +575,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, | |||
575 | static struct static_key udpv6_encap_needed __read_mostly; | 575 | static struct static_key udpv6_encap_needed __read_mostly; |
576 | void udpv6_encap_enable(void) | 576 | void udpv6_encap_enable(void) |
577 | { | 577 | { |
578 | if (!static_key_enabled(&udpv6_encap_needed)) | 578 | static_key_enable(&udpv6_encap_needed); |
579 | static_key_slow_inc(&udpv6_encap_needed); | ||
580 | } | 579 | } |
581 | EXPORT_SYMBOL(udpv6_encap_enable); | 580 | EXPORT_SYMBOL(udpv6_encap_enable); |
582 | 581 | ||