diff options
105 files changed, 4638 insertions, 738 deletions
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt new file mode 100644 index 000000000000..5550bfdcce5f --- /dev/null +++ b/Documentation/atomic_bitops.txt | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | |||
| 2 | On atomic bitops. | ||
| 3 | |||
| 4 | |||
| 5 | While our bitmap_{}() functions are non-atomic, we have a number of operations | ||
| 6 | operating on single bits in a bitmap that are atomic. | ||
| 7 | |||
| 8 | |||
| 9 | API | ||
| 10 | --- | ||
| 11 | |||
| 12 | The single bit operations are: | ||
| 13 | |||
| 14 | Non-RMW ops: | ||
| 15 | |||
| 16 | test_bit() | ||
| 17 | |||
| 18 | RMW atomic operations without return value: | ||
| 19 | |||
| 20 | {set,clear,change}_bit() | ||
| 21 | clear_bit_unlock() | ||
| 22 | |||
| 23 | RMW atomic operations with return value: | ||
| 24 | |||
| 25 | test_and_{set,clear,change}_bit() | ||
| 26 | test_and_set_bit_lock() | ||
| 27 | |||
| 28 | Barriers: | ||
| 29 | |||
| 30 | smp_mb__{before,after}_atomic() | ||
| 31 | |||
| 32 | |||
| 33 | All RMW atomic operations have a '__' prefixed variant which is non-atomic. | ||
| 34 | |||
| 35 | |||
| 36 | SEMANTICS | ||
| 37 | --------- | ||
| 38 | |||
| 39 | Non-atomic ops: | ||
| 40 | |||
| 41 | In particular __clear_bit_unlock() suffers the same issue as atomic_set(), | ||
| 42 | which is why the generic version maps to clear_bit_unlock(), see atomic_t.txt. | ||
| 43 | |||
| 44 | |||
| 45 | RMW ops: | ||
| 46 | |||
| 47 | The test_and_{}_bit() operations return the original value of the bit. | ||
| 48 | |||
| 49 | |||
| 50 | ORDERING | ||
| 51 | -------- | ||
| 52 | |||
| 53 | Like with atomic_t, the rule of thumb is: | ||
| 54 | |||
| 55 | - non-RMW operations are unordered; | ||
| 56 | |||
| 57 | - RMW operations that have no return value are unordered; | ||
| 58 | |||
| 59 | - RMW operations that have a return value are fully ordered. | ||
| 60 | |||
| 61 | Except for test_and_set_bit_lock() which has ACQUIRE semantics and | ||
| 62 | clear_bit_unlock() which has RELEASE semantics. | ||
| 63 | |||
| 64 | Since a platform only has a single means of achieving atomic operations | ||
| 65 | the same barriers as for atomic_t are used, see atomic_t.txt. | ||
| 66 | |||
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt new file mode 100644 index 000000000000..eee127115277 --- /dev/null +++ b/Documentation/atomic_t.txt | |||
| @@ -0,0 +1,200 @@ | |||
| 1 | |||
| 2 | On atomic types (atomic_t atomic64_t and atomic_long_t). | ||
| 3 | |||
| 4 | The atomic type provides an interface to the architecture's means of atomic | ||
| 5 | RMW operations between CPUs (atomic operations on MMIO are not supported and | ||
| 6 | can lead to fatal traps on some platforms). | ||
| 7 | |||
| 8 | API | ||
| 9 | --- | ||
| 10 | |||
| 11 | The 'full' API consists of (atomic64_ and atomic_long_ prefixes omitted for | ||
| 12 | brevity): | ||
| 13 | |||
| 14 | Non-RMW ops: | ||
| 15 | |||
| 16 | atomic_read(), atomic_set() | ||
| 17 | atomic_read_acquire(), atomic_set_release() | ||
| 18 | |||
| 19 | |||
| 20 | RMW atomic operations: | ||
| 21 | |||
| 22 | Arithmetic: | ||
| 23 | |||
| 24 | atomic_{add,sub,inc,dec}() | ||
| 25 | atomic_{add,sub,inc,dec}_return{,_relaxed,_acquire,_release}() | ||
| 26 | atomic_fetch_{add,sub,inc,dec}{,_relaxed,_acquire,_release}() | ||
| 27 | |||
| 28 | |||
| 29 | Bitwise: | ||
| 30 | |||
| 31 | atomic_{and,or,xor,andnot}() | ||
| 32 | atomic_fetch_{and,or,xor,andnot}{,_relaxed,_acquire,_release}() | ||
| 33 | |||
| 34 | |||
| 35 | Swap: | ||
| 36 | |||
| 37 | atomic_xchg{,_relaxed,_acquire,_release}() | ||
| 38 | atomic_cmpxchg{,_relaxed,_acquire,_release}() | ||
| 39 | atomic_try_cmpxchg{,_relaxed,_acquire,_release}() | ||
| 40 | |||
| 41 | |||
| 42 | Reference count (but please see refcount_t): | ||
| 43 | |||
| 44 | atomic_add_unless(), atomic_inc_not_zero() | ||
| 45 | atomic_sub_and_test(), atomic_dec_and_test() | ||
| 46 | |||
| 47 | |||
| 48 | Misc: | ||
| 49 | |||
| 50 | atomic_inc_and_test(), atomic_add_negative() | ||
| 51 | atomic_dec_unless_positive(), atomic_inc_unless_negative() | ||
| 52 | |||
| 53 | |||
| 54 | Barriers: | ||
| 55 | |||
| 56 | smp_mb__{before,after}_atomic() | ||
| 57 | |||
| 58 | |||
| 59 | |||
| 60 | SEMANTICS | ||
| 61 | --------- | ||
| 62 | |||
| 63 | Non-RMW ops: | ||
| 64 | |||
| 65 | The non-RMW ops are (typically) regular LOADs and STOREs and are canonically | ||
| 66 | implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and | ||
| 67 | smp_store_release() respectively. | ||
| 68 | |||
| 69 | The one detail to this is that atomic_set{}() should be observable to the RMW | ||
| 70 | ops. That is: | ||
| 71 | |||
| 72 | C atomic-set | ||
| 73 | |||
| 74 | { | ||
| 75 | atomic_set(v, 1); | ||
| 76 | } | ||
| 77 | |||
| 78 | P1(atomic_t *v) | ||
| 79 | { | ||
| 80 | atomic_add_unless(v, 1, 0); | ||
| 81 | } | ||
| 82 | |||
| 83 | P2(atomic_t *v) | ||
| 84 | { | ||
| 85 | atomic_set(v, 0); | ||
| 86 | } | ||
| 87 | |||
| 88 | exists | ||
| 89 | (v=2) | ||
| 90 | |||
| 91 | In this case we would expect the atomic_set() from CPU1 to either happen | ||
| 92 | before the atomic_add_unless(), in which case that latter one would no-op, or | ||
| 93 | _after_ in which case we'd overwrite its result. In no case is "2" a valid | ||
| 94 | outcome. | ||
| 95 | |||
| 96 | This is typically true on 'normal' platforms, where a regular competing STORE | ||
| 97 | will invalidate a LL/SC or fail a CMPXCHG. | ||
| 98 | |||
| 99 | The obvious case where this is not so is when we need to implement atomic ops | ||
| 100 | with a lock: | ||
| 101 | |||
| 102 | CPU0 CPU1 | ||
| 103 | |||
| 104 | atomic_add_unless(v, 1, 0); | ||
| 105 | lock(); | ||
| 106 | ret = READ_ONCE(v->counter); // == 1 | ||
| 107 | atomic_set(v, 0); | ||
| 108 | if (ret != u) WRITE_ONCE(v->counter, 0); | ||
| 109 | WRITE_ONCE(v->counter, ret + 1); | ||
| 110 | unlock(); | ||
| 111 | |||
| 112 | the typical solution is to then implement atomic_set{}() with atomic_xchg(). | ||
| 113 | |||
| 114 | |||
| 115 | RMW ops: | ||
| 116 | |||
| 117 | These come in various forms: | ||
| 118 | |||
| 119 | - plain operations without return value: atomic_{}() | ||
| 120 | |||
| 121 | - operations which return the modified value: atomic_{}_return() | ||
| 122 | |||
| 123 | these are limited to the arithmetic operations because those are | ||
| 124 | reversible. Bitops are irreversible and therefore the modified value | ||
| 125 | is of dubious utility. | ||
| 126 | |||
| 127 | - operations which return the original value: atomic_fetch_{}() | ||
| 128 | |||
| 129 | - swap operations: xchg(), cmpxchg() and try_cmpxchg() | ||
| 130 | |||
| 131 | - misc; the special purpose operations that are commonly used and would, | ||
| 132 | given the interface, normally be implemented using (try_)cmpxchg loops but | ||
| 133 | are time critical and can, (typically) on LL/SC architectures, be more | ||
| 134 | efficiently implemented. | ||
| 135 | |||
| 136 | All these operations are SMP atomic; that is, the operations (for a single | ||
| 137 | atomic variable) can be fully ordered and no intermediate state is lost or | ||
| 138 | visible. | ||
| 139 | |||
| 140 | |||
| 141 | ORDERING (go read memory-barriers.txt first) | ||
| 142 | -------- | ||
| 143 | |||
| 144 | The rule of thumb: | ||
| 145 | |||
| 146 | - non-RMW operations are unordered; | ||
| 147 | |||
| 148 | - RMW operations that have no return value are unordered; | ||
| 149 | |||
| 150 | - RMW operations that have a return value are fully ordered; | ||
| 151 | |||
| 152 | - RMW operations that are conditional are unordered on FAILURE, | ||
| 153 | otherwise the above rules apply. | ||
| 154 | |||
| 155 | Except of course when an operation has an explicit ordering like: | ||
| 156 | |||
| 157 | {}_relaxed: unordered | ||
| 158 | {}_acquire: the R of the RMW (or atomic_read) is an ACQUIRE | ||
| 159 | {}_release: the W of the RMW (or atomic_set) is a RELEASE | ||
| 160 | |||
| 161 | Where 'unordered' is against other memory locations. Address dependencies are | ||
| 162 | not defeated. | ||
| 163 | |||
| 164 | Fully ordered primitives are ordered against everything prior and everything | ||
| 165 | subsequent. Therefore a fully ordered primitive is like having an smp_mb() | ||
| 166 | before and an smp_mb() after the primitive. | ||
| 167 | |||
| 168 | |||
| 169 | The barriers: | ||
| 170 | |||
| 171 | smp_mb__{before,after}_atomic() | ||
| 172 | |||
| 173 | only apply to the RMW ops and can be used to augment/upgrade the ordering | ||
| 174 | inherent to the used atomic op. These barriers provide a full smp_mb(). | ||
| 175 | |||
| 176 | These helper barriers exist because architectures have varying implicit | ||
| 177 | ordering on their SMP atomic primitives. For example our TSO architectures | ||
| 178 | provide full ordered atomics and these barriers are no-ops. | ||
| 179 | |||
| 180 | Thus: | ||
| 181 | |||
| 182 | atomic_fetch_add(); | ||
| 183 | |||
| 184 | is equivalent to: | ||
| 185 | |||
| 186 | smp_mb__before_atomic(); | ||
| 187 | atomic_fetch_add_relaxed(); | ||
| 188 | smp_mb__after_atomic(); | ||
| 189 | |||
| 190 | However the atomic_fetch_add() might be implemented more efficiently. | ||
| 191 | |||
| 192 | Further, while something like: | ||
| 193 | |||
| 194 | smp_mb__before_atomic(); | ||
| 195 | atomic_dec(&X); | ||
| 196 | |||
| 197 | is a 'typical' RELEASE pattern, the barrier is strictly stronger than | ||
| 198 | a RELEASE. Similarly for something like: | ||
| 199 | |||
| 200 | |||
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt new file mode 100644 index 000000000000..bdf1423d5f99 --- /dev/null +++ b/Documentation/locking/crossrelease.txt | |||
| @@ -0,0 +1,874 @@ | |||
| 1 | Crossrelease | ||
| 2 | ============ | ||
| 3 | |||
| 4 | Started by Byungchul Park <byungchul.park@lge.com> | ||
| 5 | |||
| 6 | Contents: | ||
| 7 | |||
| 8 | (*) Background | ||
| 9 | |||
| 10 | - What causes deadlock | ||
| 11 | - How lockdep works | ||
| 12 | |||
| 13 | (*) Limitation | ||
| 14 | |||
| 15 | - Limit lockdep | ||
| 16 | - Pros from the limitation | ||
| 17 | - Cons from the limitation | ||
| 18 | - Relax the limitation | ||
| 19 | |||
| 20 | (*) Crossrelease | ||
| 21 | |||
| 22 | - Introduce crossrelease | ||
| 23 | - Introduce commit | ||
| 24 | |||
| 25 | (*) Implementation | ||
| 26 | |||
| 27 | - Data structures | ||
| 28 | - How crossrelease works | ||
| 29 | |||
| 30 | (*) Optimizations | ||
| 31 | |||
| 32 | - Avoid duplication | ||
| 33 | - Lockless for hot paths | ||
| 34 | |||
| 35 | (*) APPENDIX A: What lockdep does to work aggresively | ||
| 36 | |||
| 37 | (*) APPENDIX B: How to avoid adding false dependencies | ||
| 38 | |||
| 39 | |||
| 40 | ========== | ||
| 41 | Background | ||
| 42 | ========== | ||
| 43 | |||
| 44 | What causes deadlock | ||
| 45 | -------------------- | ||
| 46 | |||
| 47 | A deadlock occurs when a context is waiting for an event to happen, | ||
| 48 | which is impossible because another (or the) context who can trigger the | ||
| 49 | event is also waiting for another (or the) event to happen, which is | ||
| 50 | also impossible due to the same reason. | ||
| 51 | |||
| 52 | For example: | ||
| 53 | |||
| 54 | A context going to trigger event C is waiting for event A to happen. | ||
| 55 | A context going to trigger event A is waiting for event B to happen. | ||
| 56 | A context going to trigger event B is waiting for event C to happen. | ||
| 57 | |||
| 58 | A deadlock occurs when these three wait operations run at the same time, | ||
| 59 | because event C cannot be triggered if event A does not happen, which in | ||
| 60 | turn cannot be triggered if event B does not happen, which in turn | ||
| 61 | cannot be triggered if event C does not happen. After all, no event can | ||
| 62 | be triggered since any of them never meets its condition to wake up. | ||
| 63 | |||
| 64 | A dependency might exist between two waiters and a deadlock might happen | ||
| 65 | due to an incorrect releationship between dependencies. Thus, we must | ||
| 66 | define what a dependency is first. A dependency exists between them if: | ||
| 67 | |||
| 68 | 1. There are two waiters waiting for each event at a given time. | ||
| 69 | 2. The only way to wake up each waiter is to trigger its event. | ||
| 70 | 3. Whether one can be woken up depends on whether the other can. | ||
| 71 | |||
| 72 | Each wait in the example creates its dependency like: | ||
| 73 | |||
| 74 | Event C depends on event A. | ||
| 75 | Event A depends on event B. | ||
| 76 | Event B depends on event C. | ||
| 77 | |||
| 78 | NOTE: Precisely speaking, a dependency is one between whether a | ||
| 79 | waiter for an event can be woken up and whether another waiter for | ||
| 80 | another event can be woken up. However from now on, we will describe | ||
| 81 | a dependency as if it's one between an event and another event for | ||
| 82 | simplicity. | ||
| 83 | |||
| 84 | And they form circular dependencies like: | ||
| 85 | |||
| 86 | -> C -> A -> B - | ||
| 87 | / \ | ||
| 88 | \ / | ||
| 89 | ---------------- | ||
| 90 | |||
| 91 | where 'A -> B' means that event A depends on event B. | ||
| 92 | |||
| 93 | Such circular dependencies lead to a deadlock since no waiter can meet | ||
| 94 | its condition to wake up as described. | ||
| 95 | |||
| 96 | CONCLUSION | ||
| 97 | |||
| 98 | Circular dependencies cause a deadlock. | ||
| 99 | |||
| 100 | |||
| 101 | How lockdep works | ||
| 102 | ----------------- | ||
| 103 | |||
| 104 | Lockdep tries to detect a deadlock by checking dependencies created by | ||
| 105 | lock operations, acquire and release. Waiting for a lock corresponds to | ||
| 106 | waiting for an event, and releasing a lock corresponds to triggering an | ||
| 107 | event in the previous section. | ||
| 108 | |||
| 109 | In short, lockdep does: | ||
| 110 | |||
| 111 | 1. Detect a new dependency. | ||
| 112 | 2. Add the dependency into a global graph. | ||
| 113 | 3. Check if that makes dependencies circular. | ||
| 114 | 4. Report a deadlock or its possibility if so. | ||
| 115 | |||
| 116 | For example, consider a graph built by lockdep that looks like: | ||
| 117 | |||
| 118 | A -> B - | ||
| 119 | \ | ||
| 120 | -> E | ||
| 121 | / | ||
| 122 | C -> D - | ||
| 123 | |||
| 124 | where A, B,..., E are different lock classes. | ||
| 125 | |||
| 126 | Lockdep will add a dependency into the graph on detection of a new | ||
| 127 | dependency. For example, it will add a dependency 'E -> C' when a new | ||
| 128 | dependency between lock E and lock C is detected. Then the graph will be: | ||
| 129 | |||
| 130 | A -> B - | ||
| 131 | \ | ||
| 132 | -> E - | ||
| 133 | / \ | ||
| 134 | -> C -> D - \ | ||
| 135 | / / | ||
| 136 | \ / | ||
| 137 | ------------------ | ||
| 138 | |||
| 139 | where A, B,..., E are different lock classes. | ||
| 140 | |||
| 141 | This graph contains a subgraph which demonstrates circular dependencies: | ||
| 142 | |||
| 143 | -> E - | ||
| 144 | / \ | ||
| 145 | -> C -> D - \ | ||
| 146 | / / | ||
| 147 | \ / | ||
| 148 | ------------------ | ||
| 149 | |||
| 150 | where C, D and E are different lock classes. | ||
| 151 | |||
| 152 | This is the condition under which a deadlock might occur. Lockdep | ||
| 153 | reports it on detection after adding a new dependency. This is the way | ||
| 154 | how lockdep works. | ||
| 155 | |||
| 156 | CONCLUSION | ||
| 157 | |||
| 158 | Lockdep detects a deadlock or its possibility by checking if circular | ||
| 159 | dependencies were created after adding each new dependency. | ||
| 160 | |||
| 161 | |||
| 162 | ========== | ||
| 163 | Limitation | ||
| 164 | ========== | ||
| 165 | |||
| 166 | Limit lockdep | ||
| 167 | ------------- | ||
| 168 | |||
| 169 | Limiting lockdep to work on only typical locks e.g. spin locks and | ||
| 170 | mutexes, which are released within the acquire context, the | ||
| 171 | implementation becomes simple but its capacity for detection becomes | ||
| 172 | limited. Let's check pros and cons in next section. | ||
| 173 | |||
| 174 | |||
| 175 | Pros from the limitation | ||
| 176 | ------------------------ | ||
| 177 | |||
| 178 | Given the limitation, when acquiring a lock, locks in a held_locks | ||
| 179 | cannot be released if the context cannot acquire it so has to wait to | ||
| 180 | acquire it, which means all waiters for the locks in the held_locks are | ||
| 181 | stuck. It's an exact case to create dependencies between each lock in | ||
| 182 | the held_locks and the lock to acquire. | ||
| 183 | |||
| 184 | For example: | ||
| 185 | |||
| 186 | CONTEXT X | ||
| 187 | --------- | ||
| 188 | acquire A | ||
| 189 | acquire B /* Add a dependency 'A -> B' */ | ||
| 190 | release B | ||
| 191 | release A | ||
| 192 | |||
| 193 | where A and B are different lock classes. | ||
| 194 | |||
| 195 | When acquiring lock A, the held_locks of CONTEXT X is empty thus no | ||
| 196 | dependency is added. But when acquiring lock B, lockdep detects and adds | ||
| 197 | a new dependency 'A -> B' between lock A in the held_locks and lock B. | ||
| 198 | They can be simply added whenever acquiring each lock. | ||
| 199 | |||
| 200 | And data required by lockdep exists in a local structure, held_locks | ||
| 201 | embedded in task_struct. Forcing to access the data within the context, | ||
| 202 | lockdep can avoid racy problems without explicit locks while handling | ||
| 203 | the local data. | ||
| 204 | |||
| 205 | Lastly, lockdep only needs to keep locks currently being held, to build | ||
| 206 | a dependency graph. However, relaxing the limitation, it needs to keep | ||
| 207 | even locks already released, because a decision whether they created | ||
| 208 | dependencies might be long-deferred. | ||
| 209 | |||
| 210 | To sum up, we can expect several advantages from the limitation: | ||
| 211 | |||
| 212 | 1. Lockdep can easily identify a dependency when acquiring a lock. | ||
| 213 | 2. Races are avoidable while accessing local locks in a held_locks. | ||
| 214 | 3. Lockdep only needs to keep locks currently being held. | ||
| 215 | |||
| 216 | CONCLUSION | ||
| 217 | |||
| 218 | Given the limitation, the implementation becomes simple and efficient. | ||
| 219 | |||
| 220 | |||
| 221 | Cons from the limitation | ||
| 222 | ------------------------ | ||
| 223 | |||
| 224 | Given the limitation, lockdep is applicable only to typical locks. For | ||
| 225 | example, page locks for page access or completions for synchronization | ||
| 226 | cannot work with lockdep. | ||
| 227 | |||
| 228 | Can we detect deadlocks below, under the limitation? | ||
| 229 | |||
| 230 | Example 1: | ||
| 231 | |||
| 232 | CONTEXT X CONTEXT Y CONTEXT Z | ||
| 233 | --------- --------- ---------- | ||
| 234 | mutex_lock A | ||
| 235 | lock_page B | ||
| 236 | lock_page B | ||
| 237 | mutex_lock A /* DEADLOCK */ | ||
| 238 | unlock_page B held by X | ||
| 239 | unlock_page B | ||
| 240 | mutex_unlock A | ||
| 241 | mutex_unlock A | ||
| 242 | |||
| 243 | where A and B are different lock classes. | ||
| 244 | |||
| 245 | No, we cannot. | ||
| 246 | |||
| 247 | Example 2: | ||
| 248 | |||
| 249 | CONTEXT X CONTEXT Y | ||
| 250 | --------- --------- | ||
| 251 | mutex_lock A | ||
| 252 | mutex_lock A | ||
| 253 | wait_for_complete B /* DEADLOCK */ | ||
| 254 | complete B | ||
| 255 | mutex_unlock A | ||
| 256 | mutex_unlock A | ||
| 257 | |||
| 258 | where A is a lock class and B is a completion variable. | ||
| 259 | |||
| 260 | No, we cannot. | ||
| 261 | |||
| 262 | CONCLUSION | ||
| 263 | |||
| 264 | Given the limitation, lockdep cannot detect a deadlock or its | ||
| 265 | possibility caused by page locks or completions. | ||
| 266 | |||
| 267 | |||
| 268 | Relax the limitation | ||
| 269 | -------------------- | ||
| 270 | |||
| 271 | Under the limitation, things to create dependencies are limited to | ||
| 272 | typical locks. However, synchronization primitives like page locks and | ||
| 273 | completions, which are allowed to be released in any context, also | ||
| 274 | create dependencies and can cause a deadlock. So lockdep should track | ||
| 275 | these locks to do a better job. We have to relax the limitation for | ||
| 276 | these locks to work with lockdep. | ||
| 277 | |||
| 278 | Detecting dependencies is very important for lockdep to work because | ||
| 279 | adding a dependency means adding an opportunity to check whether it | ||
| 280 | causes a deadlock. The more lockdep adds dependencies, the more it | ||
| 281 | thoroughly works. Thus Lockdep has to do its best to detect and add as | ||
| 282 | many true dependencies into a graph as possible. | ||
| 283 | |||
| 284 | For example, considering only typical locks, lockdep builds a graph like: | ||
| 285 | |||
| 286 | A -> B - | ||
| 287 | \ | ||
| 288 | -> E | ||
| 289 | / | ||
| 290 | C -> D - | ||
| 291 | |||
| 292 | where A, B,..., E are different lock classes. | ||
| 293 | |||
| 294 | On the other hand, under the relaxation, additional dependencies might | ||
| 295 | be created and added. Assuming additional 'FX -> C' and 'E -> GX' are | ||
| 296 | added thanks to the relaxation, the graph will be: | ||
| 297 | |||
| 298 | A -> B - | ||
| 299 | \ | ||
| 300 | -> E -> GX | ||
| 301 | / | ||
| 302 | FX -> C -> D - | ||
| 303 | |||
| 304 | where A, B,..., E, FX and GX are different lock classes, and a suffix | ||
| 305 | 'X' is added on non-typical locks. | ||
| 306 | |||
| 307 | The latter graph gives us more chances to check circular dependencies | ||
| 308 | than the former. However, it might suffer performance degradation since | ||
| 309 | relaxing the limitation, with which design and implementation of lockdep | ||
| 310 | can be efficient, might introduce inefficiency inevitably. So lockdep | ||
| 311 | should provide two options, strong detection and efficient detection. | ||
| 312 | |||
| 313 | Choosing efficient detection: | ||
| 314 | |||
| 315 | Lockdep works with only locks restricted to be released within the | ||
| 316 | acquire context. However, lockdep works efficiently. | ||
| 317 | |||
| 318 | Choosing strong detection: | ||
| 319 | |||
| 320 | Lockdep works with all synchronization primitives. However, lockdep | ||
| 321 | suffers performance degradation. | ||
| 322 | |||
| 323 | CONCLUSION | ||
| 324 | |||
| 325 | Relaxing the limitation, lockdep can add additional dependencies giving | ||
| 326 | additional opportunities to check circular dependencies. | ||
| 327 | |||
| 328 | |||
| 329 | ============ | ||
| 330 | Crossrelease | ||
| 331 | ============ | ||
| 332 | |||
| 333 | Introduce crossrelease | ||
| 334 | ---------------------- | ||
| 335 | |||
| 336 | In order to allow lockdep to handle additional dependencies by what | ||
| 337 | might be released in any context, namely 'crosslock', we have to be able | ||
| 338 | to identify those created by crosslocks. The proposed 'crossrelease' | ||
| 339 | feature provoides a way to do that. | ||
| 340 | |||
| 341 | Crossrelease feature has to do: | ||
| 342 | |||
| 343 | 1. Identify dependencies created by crosslocks. | ||
| 344 | 2. Add the dependencies into a dependency graph. | ||
| 345 | |||
| 346 | That's all. Once a meaningful dependency is added into graph, then | ||
| 347 | lockdep would work with the graph as it did. The most important thing | ||
| 348 | crossrelease feature has to do is to correctly identify and add true | ||
| 349 | dependencies into the global graph. | ||
| 350 | |||
| 351 | A dependency e.g. 'A -> B' can be identified only in the A's release | ||
| 352 | context because a decision required to identify the dependency can be | ||
| 353 | made only in the release context. That is to decide whether A can be | ||
| 354 | released so that a waiter for A can be woken up. It cannot be made in | ||
| 355 | other than the A's release context. | ||
| 356 | |||
| 357 | It's no matter for typical locks because each acquire context is same as | ||
| 358 | its release context, thus lockdep can decide whether a lock can be | ||
| 359 | released in the acquire context. However for crosslocks, lockdep cannot | ||
| 360 | make the decision in the acquire context but has to wait until the | ||
| 361 | release context is identified. | ||
| 362 | |||
| 363 | Therefore, deadlocks by crosslocks cannot be detected just when it | ||
| 364 | happens, because those cannot be identified until the crosslocks are | ||
| 365 | released. However, deadlock possibilities can be detected and it's very | ||
| 366 | worth. See 'APPENDIX A' section to check why. | ||
| 367 | |||
| 368 | CONCLUSION | ||
| 369 | |||
| 370 | Using crossrelease feature, lockdep can work with what might be released | ||
| 371 | in any context, namely crosslock. | ||
| 372 | |||
| 373 | |||
| 374 | Introduce commit | ||
| 375 | ---------------- | ||
| 376 | |||
| 377 | Since crossrelease defers the work adding true dependencies of | ||
| 378 | crosslocks until they are actually released, crossrelease has to queue | ||
| 379 | all acquisitions which might create dependencies with the crosslocks. | ||
| 380 | Then it identifies dependencies using the queued data in batches at a | ||
| 381 | proper time. We call it 'commit'. | ||
| 382 | |||
| 383 | There are four types of dependencies: | ||
| 384 | |||
| 385 | 1. TT type: 'typical lock A -> typical lock B' | ||
| 386 | |||
| 387 | Just when acquiring B, lockdep can see it's in the A's release | ||
| 388 | context. So the dependency between A and B can be identified | ||
| 389 | immediately. Commit is unnecessary. | ||
| 390 | |||
| 391 | 2. TC type: 'typical lock A -> crosslock BX' | ||
| 392 | |||
| 393 | Just when acquiring BX, lockdep can see it's in the A's release | ||
| 394 | context. So the dependency between A and BX can be identified | ||
| 395 | immediately. Commit is unnecessary, too. | ||
| 396 | |||
| 397 | 3. CT type: 'crosslock AX -> typical lock B' | ||
| 398 | |||
| 399 | When acquiring B, lockdep cannot identify the dependency because | ||
| 400 | there's no way to know if it's in the AX's release context. It has | ||
| 401 | to wait until the decision can be made. Commit is necessary. | ||
| 402 | |||
| 403 | 4. CC type: 'crosslock AX -> crosslock BX' | ||
| 404 | |||
| 405 | When acquiring BX, lockdep cannot identify the dependency because | ||
| 406 | there's no way to know if it's in the AX's release context. It has | ||
| 407 | to wait until the decision can be made. Commit is necessary. | ||
| 408 | But, handling CC type is not implemented yet. It's a future work. | ||
| 409 | |||
| 410 | Lockdep can work without commit for typical locks, but commit step is | ||
| 411 | necessary once crosslocks are involved. Introducing commit, lockdep | ||
| 412 | performs three steps. What lockdep does in each step is: | ||
| 413 | |||
| 414 | 1. Acquisition: For typical locks, lockdep does what it originally did | ||
| 415 | and queues the lock so that CT type dependencies can be checked using | ||
| 416 | it at the commit step. For crosslocks, it saves data which will be | ||
| 417 | used at the commit step and increases a reference count for it. | ||
| 418 | |||
| 419 | 2. Commit: No action is reauired for typical locks. For crosslocks, | ||
| 420 | lockdep adds CT type dependencies using the data saved at the | ||
| 421 | acquisition step. | ||
| 422 | |||
| 423 | 3. Release: No changes are required for typical locks. When a crosslock | ||
| 424 | is released, it decreases a reference count for it. | ||
| 425 | |||
| 426 | CONCLUSION | ||
| 427 | |||
| 428 | Crossrelease introduces commit step to handle dependencies of crosslocks | ||
| 429 | in batches at a proper time. | ||
| 430 | |||
| 431 | |||
| 432 | ============== | ||
| 433 | Implementation | ||
| 434 | ============== | ||
| 435 | |||
| 436 | Data structures | ||
| 437 | --------------- | ||
| 438 | |||
| 439 | Crossrelease introduces two main data structures. | ||
| 440 | |||
| 441 | 1. hist_lock | ||
| 442 | |||
| 443 | This is an array embedded in task_struct, for keeping lock history so | ||
| 444 | that dependencies can be added using them at the commit step. Since | ||
| 445 | it's local data, it can be accessed locklessly in the owner context. | ||
| 446 | The array is filled at the acquisition step and consumed at the | ||
| 447 | commit step. And it's managed in circular manner. | ||
| 448 | |||
| 449 | 2. cross_lock | ||
| 450 | |||
| 451 | One per lockdep_map exists. This is for keeping data of crosslocks | ||
| 452 | and used at the commit step. | ||
| 453 | |||
| 454 | |||
| 455 | How crossrelease works | ||
| 456 | ---------------------- | ||
| 457 | |||
| 458 | It's the key of how crossrelease works, to defer necessary works to an | ||
| 459 | appropriate point in time and perform in at once at the commit step. | ||
| 460 | Let's take a look with examples step by step, starting from how lockdep | ||
| 461 | works without crossrelease for typical locks. | ||
| 462 | |||
| 463 | acquire A /* Push A onto held_locks */ | ||
| 464 | acquire B /* Push B onto held_locks and add 'A -> B' */ | ||
| 465 | acquire C /* Push C onto held_locks and add 'B -> C' */ | ||
| 466 | release C /* Pop C from held_locks */ | ||
| 467 | release B /* Pop B from held_locks */ | ||
| 468 | release A /* Pop A from held_locks */ | ||
| 469 | |||
| 470 | where A, B and C are different lock classes. | ||
| 471 | |||
| 472 | NOTE: This document assumes that readers already understand how | ||
| 473 | lockdep works without crossrelease thus omits details. But there's | ||
| 474 | one thing to note. Lockdep pretends to pop a lock from held_locks | ||
| 475 | when releasing it. But it's subtly different from the original pop | ||
| 476 | operation because lockdep allows other than the top to be poped. | ||
| 477 | |||
| 478 | In this case, lockdep adds 'the top of held_locks -> the lock to acquire' | ||
| 479 | dependency every time acquiring a lock. | ||
| 480 | |||
| 481 | After adding 'A -> B', a dependency graph will be: | ||
| 482 | |||
| 483 | A -> B | ||
| 484 | |||
| 485 | where A and B are different lock classes. | ||
| 486 | |||
| 487 | And after adding 'B -> C', the graph will be: | ||
| 488 | |||
| 489 | A -> B -> C | ||
| 490 | |||
| 491 | where A, B and C are different lock classes. | ||
| 492 | |||
| 493 | Let's performs commit step even for typical locks to add dependencies. | ||
| 494 | Of course, commit step is not necessary for them, however, it would work | ||
| 495 | well because this is a more general way. | ||
| 496 | |||
| 497 | acquire A | ||
| 498 | /* | ||
| 499 | * Queue A into hist_locks | ||
| 500 | * | ||
| 501 | * In hist_locks: A | ||
| 502 | * In graph: Empty | ||
| 503 | */ | ||
| 504 | |||
| 505 | acquire B | ||
| 506 | /* | ||
| 507 | * Queue B into hist_locks | ||
| 508 | * | ||
| 509 | * In hist_locks: A, B | ||
| 510 | * In graph: Empty | ||
| 511 | */ | ||
| 512 | |||
| 513 | acquire C | ||
| 514 | /* | ||
| 515 | * Queue C into hist_locks | ||
| 516 | * | ||
| 517 | * In hist_locks: A, B, C | ||
| 518 | * In graph: Empty | ||
| 519 | */ | ||
| 520 | |||
| 521 | commit C | ||
| 522 | /* | ||
| 523 | * Add 'C -> ?' | ||
| 524 | * Answer the following to decide '?' | ||
| 525 | * What has been queued since acquire C: Nothing | ||
| 526 | * | ||
| 527 | * In hist_locks: A, B, C | ||
| 528 | * In graph: Empty | ||
| 529 | */ | ||
| 530 | |||
| 531 | release C | ||
| 532 | |||
| 533 | commit B | ||
| 534 | /* | ||
| 535 | * Add 'B -> ?' | ||
| 536 | * Answer the following to decide '?' | ||
| 537 | * What has been queued since acquire B: C | ||
| 538 | * | ||
| 539 | * In hist_locks: A, B, C | ||
| 540 | * In graph: 'B -> C' | ||
| 541 | */ | ||
| 542 | |||
| 543 | release B | ||
| 544 | |||
| 545 | commit A | ||
| 546 | /* | ||
| 547 | * Add 'A -> ?' | ||
| 548 | * Answer the following to decide '?' | ||
| 549 | * What has been queued since acquire A: B, C | ||
| 550 | * | ||
| 551 | * In hist_locks: A, B, C | ||
| 552 | * In graph: 'B -> C', 'A -> B', 'A -> C' | ||
| 553 | */ | ||
| 554 | |||
| 555 | release A | ||
| 556 | |||
| 557 | where A, B and C are different lock classes. | ||
| 558 | |||
| 559 | In this case, dependencies are added at the commit step as described. | ||
| 560 | |||
| 561 | After commits for A, B and C, the graph will be: | ||
| 562 | |||
| 563 | A -> B -> C | ||
| 564 | |||
| 565 | where A, B and C are different lock classes. | ||
| 566 | |||
| 567 | NOTE: A dependency 'A -> C' is optimized out. | ||
| 568 | |||
| 569 | We can see the former graph built without commit step is same as the | ||
| 570 | latter graph built using commit steps. Of course the former way leads to | ||
| 571 | earlier finish for building the graph, which means we can detect a | ||
| 572 | deadlock or its possibility sooner. So the former way would be prefered | ||
| 573 | when possible. But we cannot avoid using the latter way for crosslocks. | ||
| 574 | |||
| 575 | Let's look at how commit steps work for crosslocks. In this case, the | ||
| 576 | commit step is performed only on crosslock AX as real. And it assumes | ||
| 577 | that the AX release context is different from the AX acquire context. | ||
| 578 | |||
| 579 | BX RELEASE CONTEXT BX ACQUIRE CONTEXT | ||
| 580 | ------------------ ------------------ | ||
| 581 | acquire A | ||
| 582 | /* | ||
| 583 | * Push A onto held_locks | ||
| 584 | * Queue A into hist_locks | ||
| 585 | * | ||
| 586 | * In held_locks: A | ||
| 587 | * In hist_locks: A | ||
| 588 | * In graph: Empty | ||
| 589 | */ | ||
| 590 | |||
| 591 | acquire BX | ||
| 592 | /* | ||
| 593 | * Add 'the top of held_locks -> BX' | ||
| 594 | * | ||
| 595 | * In held_locks: A | ||
| 596 | * In hist_locks: A | ||
| 597 | * In graph: 'A -> BX' | ||
| 598 | */ | ||
| 599 | |||
| 600 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
| 601 | It must be guaranteed that the following operations are seen after | ||
| 602 | acquiring BX globally. It can be done by things like barrier. | ||
| 603 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
| 604 | |||
| 605 | acquire C | ||
| 606 | /* | ||
| 607 | * Push C onto held_locks | ||
| 608 | * Queue C into hist_locks | ||
| 609 | * | ||
| 610 | * In held_locks: C | ||
| 611 | * In hist_locks: C | ||
| 612 | * In graph: 'A -> BX' | ||
| 613 | */ | ||
| 614 | |||
| 615 | release C | ||
| 616 | /* | ||
| 617 | * Pop C from held_locks | ||
| 618 | * | ||
| 619 | * In held_locks: Empty | ||
| 620 | * In hist_locks: C | ||
| 621 | * In graph: 'A -> BX' | ||
| 622 | */ | ||
| 623 | acquire D | ||
| 624 | /* | ||
| 625 | * Push D onto held_locks | ||
| 626 | * Queue D into hist_locks | ||
| 627 | * Add 'the top of held_locks -> D' | ||
| 628 | * | ||
| 629 | * In held_locks: A, D | ||
| 630 | * In hist_locks: A, D | ||
| 631 | * In graph: 'A -> BX', 'A -> D' | ||
| 632 | */ | ||
| 633 | acquire E | ||
| 634 | /* | ||
| 635 | * Push E onto held_locks | ||
| 636 | * Queue E into hist_locks | ||
| 637 | * | ||
| 638 | * In held_locks: E | ||
| 639 | * In hist_locks: C, E | ||
| 640 | * In graph: 'A -> BX', 'A -> D' | ||
| 641 | */ | ||
| 642 | |||
| 643 | release E | ||
| 644 | /* | ||
| 645 | * Pop E from held_locks | ||
| 646 | * | ||
| 647 | * In held_locks: Empty | ||
| 648 | * In hist_locks: D, E | ||
| 649 | * In graph: 'A -> BX', 'A -> D' | ||
| 650 | */ | ||
| 651 | release D | ||
| 652 | /* | ||
| 653 | * Pop D from held_locks | ||
| 654 | * | ||
| 655 | * In held_locks: A | ||
| 656 | * In hist_locks: A, D | ||
| 657 | * In graph: 'A -> BX', 'A -> D' | ||
| 658 | */ | ||
| 659 | commit BX | ||
| 660 | /* | ||
| 661 | * Add 'BX -> ?' | ||
| 662 | * What has been queued since acquire BX: C, E | ||
| 663 | * | ||
| 664 | * In held_locks: Empty | ||
| 665 | * In hist_locks: D, E | ||
| 666 | * In graph: 'A -> BX', 'A -> D', | ||
| 667 | * 'BX -> C', 'BX -> E' | ||
| 668 | */ | ||
| 669 | |||
| 670 | release BX | ||
| 671 | /* | ||
| 672 | * In held_locks: Empty | ||
| 673 | * In hist_locks: D, E | ||
| 674 | * In graph: 'A -> BX', 'A -> D', | ||
| 675 | * 'BX -> C', 'BX -> E' | ||
| 676 | */ | ||
| 677 | release A | ||
| 678 | /* | ||
| 679 | * Pop A from held_locks | ||
| 680 | * | ||
| 681 | * In held_locks: Empty | ||
| 682 | * In hist_locks: A, D | ||
| 683 | * In graph: 'A -> BX', 'A -> D', | ||
| 684 | * 'BX -> C', 'BX -> E' | ||
| 685 | */ | ||
| 686 | |||
| 687 | where A, BX, C,..., E are different lock classes, and a suffix 'X' is | ||
| 688 | added on crosslocks. | ||
| 689 | |||
| 690 | Crossrelease considers all acquisitions after acqiuring BX are | ||
| 691 | candidates which might create dependencies with BX. True dependencies | ||
| 692 | will be determined when identifying the release context of BX. Meanwhile, | ||
| 693 | all typical locks are queued so that they can be used at the commit step. | ||
| 694 | And then two dependencies 'BX -> C' and 'BX -> E' are added at the | ||
| 695 | commit step when identifying the release context. | ||
| 696 | |||
| 697 | The final graph will be, with crossrelease: | ||
| 698 | |||
| 699 | -> C | ||
| 700 | / | ||
| 701 | -> BX - | ||
| 702 | / \ | ||
| 703 | A - -> E | ||
| 704 | \ | ||
| 705 | -> D | ||
| 706 | |||
| 707 | where A, BX, C,..., E are different lock classes, and a suffix 'X' is | ||
| 708 | added on crosslocks. | ||
| 709 | |||
| 710 | However, the final graph will be, without crossrelease: | ||
| 711 | |||
| 712 | A -> D | ||
| 713 | |||
| 714 | where A and D are different lock classes. | ||
| 715 | |||
| 716 | The former graph has three more dependencies, 'A -> BX', 'BX -> C' and | ||
| 717 | 'BX -> E' giving additional opportunities to check if they cause | ||
| 718 | deadlocks. This way lockdep can detect a deadlock or its possibility | ||
| 719 | caused by crosslocks. | ||
| 720 | |||
| 721 | CONCLUSION | ||
| 722 | |||
| 723 | We checked how crossrelease works with several examples. | ||
| 724 | |||
| 725 | |||
| 726 | ============= | ||
| 727 | Optimizations | ||
| 728 | ============= | ||
| 729 | |||
| 730 | Avoid duplication | ||
| 731 | ----------------- | ||
| 732 | |||
| 733 | Crossrelease feature uses a cache like what lockdep already uses for | ||
| 734 | dependency chains, but this time it's for caching CT type dependencies. | ||
| 735 | Once that dependency is cached, the same will never be added again. | ||
| 736 | |||
| 737 | |||
| 738 | Lockless for hot paths | ||
| 739 | ---------------------- | ||
| 740 | |||
| 741 | To keep all locks for later use at the commit step, crossrelease adopts | ||
| 742 | a local array embedded in task_struct, which makes access to the data | ||
| 743 | lockless by forcing it to happen only within the owner context. It's | ||
| 744 | like how lockdep handles held_locks. Lockless implmentation is important | ||
| 745 | since typical locks are very frequently acquired and released. | ||
| 746 | |||
| 747 | |||
| 748 | ================================================= | ||
| 749 | APPENDIX A: What lockdep does to work aggresively | ||
| 750 | ================================================= | ||
| 751 | |||
| 752 | A deadlock actually occurs when all wait operations creating circular | ||
| 753 | dependencies run at the same time. Even though they don't, a potential | ||
| 754 | deadlock exists if the problematic dependencies exist. Thus it's | ||
| 755 | meaningful to detect not only an actual deadlock but also its potential | ||
| 756 | possibility. The latter is rather valuable. When a deadlock occurs | ||
| 757 | actually, we can identify what happens in the system by some means or | ||
| 758 | other even without lockdep. However, there's no way to detect possiblity | ||
| 759 | without lockdep unless the whole code is parsed in head. It's terrible. | ||
| 760 | Lockdep does the both, and crossrelease only focuses on the latter. | ||
| 761 | |||
| 762 | Whether or not a deadlock actually occurs depends on several factors. | ||
| 763 | For example, what order contexts are switched in is a factor. Assuming | ||
| 764 | circular dependencies exist, a deadlock would occur when contexts are | ||
| 765 | switched so that all wait operations creating the dependencies run | ||
| 766 | simultaneously. Thus to detect a deadlock possibility even in the case | ||
| 767 | that it has not occured yet, lockdep should consider all possible | ||
| 768 | combinations of dependencies, trying to: | ||
| 769 | |||
| 770 | 1. Use a global dependency graph. | ||
| 771 | |||
| 772 | Lockdep combines all dependencies into one global graph and uses them, | ||
| 773 | regardless of which context generates them or what order contexts are | ||
| 774 | switched in. Aggregated dependencies are only considered so they are | ||
| 775 | prone to be circular if a problem exists. | ||
| 776 | |||
| 777 | 2. Check dependencies between classes instead of instances. | ||
| 778 | |||
| 779 | What actually causes a deadlock are instances of lock. However, | ||
| 780 | lockdep checks dependencies between classes instead of instances. | ||
| 781 | This way lockdep can detect a deadlock which has not happened but | ||
| 782 | might happen in future by others but the same class. | ||
| 783 | |||
| 784 | 3. Assume all acquisitions lead to waiting. | ||
| 785 | |||
| 786 | Although locks might be acquired without waiting which is essential | ||
| 787 | to create dependencies, lockdep assumes all acquisitions lead to | ||
| 788 | waiting since it might be true some time or another. | ||
| 789 | |||
| 790 | CONCLUSION | ||
| 791 | |||
| 792 | Lockdep detects not only an actual deadlock but also its possibility, | ||
| 793 | and the latter is more valuable. | ||
| 794 | |||
| 795 | |||
| 796 | ================================================== | ||
| 797 | APPENDIX B: How to avoid adding false dependencies | ||
| 798 | ================================================== | ||
| 799 | |||
| 800 | Remind what a dependency is. A dependency exists if: | ||
| 801 | |||
| 802 | 1. There are two waiters waiting for each event at a given time. | ||
| 803 | 2. The only way to wake up each waiter is to trigger its event. | ||
| 804 | 3. Whether one can be woken up depends on whether the other can. | ||
| 805 | |||
| 806 | For example: | ||
| 807 | |||
| 808 | acquire A | ||
| 809 | acquire B /* A dependency 'A -> B' exists */ | ||
| 810 | release B | ||
| 811 | release A | ||
| 812 | |||
| 813 | where A and B are different lock classes. | ||
| 814 | |||
| 815 | A depedency 'A -> B' exists since: | ||
| 816 | |||
| 817 | 1. A waiter for A and a waiter for B might exist when acquiring B. | ||
| 818 | 2. Only way to wake up each is to release what it waits for. | ||
| 819 | 3. Whether the waiter for A can be woken up depends on whether the | ||
| 820 | other can. IOW, TASK X cannot release A if it fails to acquire B. | ||
| 821 | |||
| 822 | For another example: | ||
| 823 | |||
| 824 | TASK X TASK Y | ||
| 825 | ------ ------ | ||
| 826 | acquire AX | ||
| 827 | acquire B /* A dependency 'AX -> B' exists */ | ||
| 828 | release B | ||
| 829 | release AX held by Y | ||
| 830 | |||
| 831 | where AX and B are different lock classes, and a suffix 'X' is added | ||
| 832 | on crosslocks. | ||
| 833 | |||
| 834 | Even in this case involving crosslocks, the same rule can be applied. A | ||
| 835 | depedency 'AX -> B' exists since: | ||
| 836 | |||
| 837 | 1. A waiter for AX and a waiter for B might exist when acquiring B. | ||
| 838 | 2. Only way to wake up each is to release what it waits for. | ||
| 839 | 3. Whether the waiter for AX can be woken up depends on whether the | ||
| 840 | other can. IOW, TASK X cannot release AX if it fails to acquire B. | ||
| 841 | |||
| 842 | Let's take a look at more complicated example: | ||
| 843 | |||
| 844 | TASK X TASK Y | ||
| 845 | ------ ------ | ||
| 846 | acquire B | ||
| 847 | release B | ||
| 848 | fork Y | ||
| 849 | acquire AX | ||
| 850 | acquire C /* A dependency 'AX -> C' exists */ | ||
| 851 | release C | ||
| 852 | release AX held by Y | ||
| 853 | |||
| 854 | where AX, B and C are different lock classes, and a suffix 'X' is | ||
| 855 | added on crosslocks. | ||
| 856 | |||
| 857 | Does a dependency 'AX -> B' exist? Nope. | ||
| 858 | |||
| 859 | Two waiters are essential to create a dependency. However, waiters for | ||
| 860 | AX and B to create 'AX -> B' cannot exist at the same time in this | ||
| 861 | example. Thus the dependency 'AX -> B' cannot be created. | ||
| 862 | |||
| 863 | It would be ideal if the full set of true ones can be considered. But | ||
| 864 | we can ensure nothing but what actually happened. Relying on what | ||
| 865 | actually happens at runtime, we can anyway add only true ones, though | ||
| 866 | they might be a subset of true ones. It's similar to how lockdep works | ||
| 867 | for typical locks. There might be more true dependencies than what | ||
| 868 | lockdep has detected in runtime. Lockdep has no choice but to rely on | ||
| 869 | what actually happens. Crossrelease also relies on it. | ||
| 870 | |||
| 871 | CONCLUSION | ||
| 872 | |||
| 873 | Relying on what actually happens, lockdep can avoid adding false | ||
| 874 | dependencies. | ||
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index c4ddfcd5ee32..d1d1716f904b 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
| @@ -498,11 +498,11 @@ And a couple of implicit varieties: | |||
| 498 | This means that ACQUIRE acts as a minimal "acquire" operation and | 498 | This means that ACQUIRE acts as a minimal "acquire" operation and |
| 499 | RELEASE acts as a minimal "release" operation. | 499 | RELEASE acts as a minimal "release" operation. |
| 500 | 500 | ||
| 501 | A subset of the atomic operations described in core-api/atomic_ops.rst have | 501 | A subset of the atomic operations described in atomic_t.txt have ACQUIRE and |
| 502 | ACQUIRE and RELEASE variants in addition to fully-ordered and relaxed (no | 502 | RELEASE variants in addition to fully-ordered and relaxed (no barrier |
| 503 | barrier semantics) definitions. For compound atomics performing both a load | 503 | semantics) definitions. For compound atomics performing both a load and a |
| 504 | and a store, ACQUIRE semantics apply only to the load and RELEASE semantics | 504 | store, ACQUIRE semantics apply only to the load and RELEASE semantics apply |
| 505 | apply only to the store portion of the operation. | 505 | only to the store portion of the operation. |
| 506 | 506 | ||
| 507 | Memory barriers are only required where there's a possibility of interaction | 507 | Memory barriers are only required where there's a possibility of interaction |
| 508 | between two CPUs or between a CPU and a device. If it can be guaranteed that | 508 | between two CPUs or between a CPU and a device. If it can be guaranteed that |
| @@ -1876,8 +1876,7 @@ There are some more advanced barrier functions: | |||
| 1876 | This makes sure that the death mark on the object is perceived to be set | 1876 | This makes sure that the death mark on the object is perceived to be set |
| 1877 | *before* the reference counter is decremented. | 1877 | *before* the reference counter is decremented. |
| 1878 | 1878 | ||
| 1879 | See Documentation/core-api/atomic_ops.rst for more information. See the | 1879 | See Documentation/atomic_{t,bitops}.txt for more information. |
| 1880 | "Atomic operations" subsection for information on where to use these. | ||
| 1881 | 1880 | ||
| 1882 | 1881 | ||
| 1883 | (*) lockless_dereference(); | 1882 | (*) lockless_dereference(); |
| @@ -1982,10 +1981,7 @@ for each construct. These operations all imply certain barriers: | |||
| 1982 | ACQUIRE operation has completed. | 1981 | ACQUIRE operation has completed. |
| 1983 | 1982 | ||
| 1984 | Memory operations issued before the ACQUIRE may be completed after | 1983 | Memory operations issued before the ACQUIRE may be completed after |
| 1985 | the ACQUIRE operation has completed. An smp_mb__before_spinlock(), | 1984 | the ACQUIRE operation has completed. |
| 1986 | combined with a following ACQUIRE, orders prior stores against | ||
| 1987 | subsequent loads and stores. Note that this is weaker than smp_mb()! | ||
| 1988 | The smp_mb__before_spinlock() primitive is free on many architectures. | ||
| 1989 | 1985 | ||
| 1990 | (2) RELEASE operation implication: | 1986 | (2) RELEASE operation implication: |
| 1991 | 1987 | ||
| @@ -2503,88 +2499,7 @@ operations are noted specially as some of them imply full memory barriers and | |||
| 2503 | some don't, but they're very heavily relied on as a group throughout the | 2499 | some don't, but they're very heavily relied on as a group throughout the |
| 2504 | kernel. | 2500 | kernel. |
| 2505 | 2501 | ||
| 2506 | Any atomic operation that modifies some state in memory and returns information | 2502 | See Documentation/atomic_t.txt for more information. |
| 2507 | about the state (old or new) implies an SMP-conditional general memory barrier | ||
| 2508 | (smp_mb()) on each side of the actual operation (with the exception of | ||
| 2509 | explicit lock operations, described later). These include: | ||
| 2510 | |||
| 2511 | xchg(); | ||
| 2512 | atomic_xchg(); atomic_long_xchg(); | ||
| 2513 | atomic_inc_return(); atomic_long_inc_return(); | ||
| 2514 | atomic_dec_return(); atomic_long_dec_return(); | ||
| 2515 | atomic_add_return(); atomic_long_add_return(); | ||
| 2516 | atomic_sub_return(); atomic_long_sub_return(); | ||
| 2517 | atomic_inc_and_test(); atomic_long_inc_and_test(); | ||
| 2518 | atomic_dec_and_test(); atomic_long_dec_and_test(); | ||
| 2519 | atomic_sub_and_test(); atomic_long_sub_and_test(); | ||
| 2520 | atomic_add_negative(); atomic_long_add_negative(); | ||
| 2521 | test_and_set_bit(); | ||
| 2522 | test_and_clear_bit(); | ||
| 2523 | test_and_change_bit(); | ||
| 2524 | |||
| 2525 | /* when succeeds */ | ||
| 2526 | cmpxchg(); | ||
| 2527 | atomic_cmpxchg(); atomic_long_cmpxchg(); | ||
| 2528 | atomic_add_unless(); atomic_long_add_unless(); | ||
| 2529 | |||
| 2530 | These are used for such things as implementing ACQUIRE-class and RELEASE-class | ||
| 2531 | operations and adjusting reference counters towards object destruction, and as | ||
| 2532 | such the implicit memory barrier effects are necessary. | ||
| 2533 | |||
| 2534 | |||
| 2535 | The following operations are potential problems as they do _not_ imply memory | ||
| 2536 | barriers, but might be used for implementing such things as RELEASE-class | ||
| 2537 | operations: | ||
| 2538 | |||
| 2539 | atomic_set(); | ||
| 2540 | set_bit(); | ||
| 2541 | clear_bit(); | ||
| 2542 | change_bit(); | ||
| 2543 | |||
| 2544 | With these the appropriate explicit memory barrier should be used if necessary | ||
| 2545 | (smp_mb__before_atomic() for instance). | ||
| 2546 | |||
| 2547 | |||
| 2548 | The following also do _not_ imply memory barriers, and so may require explicit | ||
| 2549 | memory barriers under some circumstances (smp_mb__before_atomic() for | ||
| 2550 | instance): | ||
| 2551 | |||
| 2552 | atomic_add(); | ||
| 2553 | atomic_sub(); | ||
| 2554 | atomic_inc(); | ||
| 2555 | atomic_dec(); | ||
| 2556 | |||
| 2557 | If they're used for statistics generation, then they probably don't need memory | ||
| 2558 | barriers, unless there's a coupling between statistical data. | ||
| 2559 | |||
| 2560 | If they're used for reference counting on an object to control its lifetime, | ||
| 2561 | they probably don't need memory barriers because either the reference count | ||
| 2562 | will be adjusted inside a locked section, or the caller will already hold | ||
| 2563 | sufficient references to make the lock, and thus a memory barrier unnecessary. | ||
| 2564 | |||
| 2565 | If they're used for constructing a lock of some description, then they probably | ||
| 2566 | do need memory barriers as a lock primitive generally has to do things in a | ||
| 2567 | specific order. | ||
| 2568 | |||
| 2569 | Basically, each usage case has to be carefully considered as to whether memory | ||
| 2570 | barriers are needed or not. | ||
| 2571 | |||
| 2572 | The following operations are special locking primitives: | ||
| 2573 | |||
| 2574 | test_and_set_bit_lock(); | ||
| 2575 | clear_bit_unlock(); | ||
| 2576 | __clear_bit_unlock(); | ||
| 2577 | |||
| 2578 | These implement ACQUIRE-class and RELEASE-class operations. These should be | ||
| 2579 | used in preference to other operations when implementing locking primitives, | ||
| 2580 | because their implementations can be optimised on many architectures. | ||
| 2581 | |||
| 2582 | [!] Note that special memory barrier primitives are available for these | ||
| 2583 | situations because on some CPUs the atomic instructions used imply full memory | ||
| 2584 | barriers, and so barrier instructions are superfluous in conjunction with them, | ||
| 2585 | and in such cases the special barrier primitives will be no-ops. | ||
| 2586 | |||
| 2587 | See Documentation/core-api/atomic_ops.rst for more information. | ||
| 2588 | 2503 | ||
| 2589 | 2504 | ||
| 2590 | ACCESSING DEVICES | 2505 | ACCESSING DEVICES |
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index b83dfa1c0602..ab16efe0c79d 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt | |||
| @@ -149,6 +149,26 @@ static_branch_inc(), will change the branch back to true. Likewise, if the | |||
| 149 | key is initialized false, a 'static_branch_inc()', will change the branch to | 149 | key is initialized false, a 'static_branch_inc()', will change the branch to |
| 150 | true. And then a 'static_branch_dec()', will again make the branch false. | 150 | true. And then a 'static_branch_dec()', will again make the branch false. |
| 151 | 151 | ||
| 152 | The state and the reference count can be retrieved with 'static_key_enabled()' | ||
| 153 | and 'static_key_count()'. In general, if you use these functions, they | ||
| 154 | should be protected with the same mutex used around the enable/disable | ||
| 155 | or increment/decrement function. | ||
| 156 | |||
| 157 | Note that switching branches results in some locks being taken, | ||
| 158 | particularly the CPU hotplug lock (in order to avoid races against | ||
| 159 | CPUs being brought in the kernel whilst the kernel is getting | ||
| 160 | patched). Calling the static key API from within a hotplug notifier is | ||
| 161 | thus a sure deadlock recipe. In order to still allow use of the | ||
| 162 | functionnality, the following functions are provided: | ||
| 163 | |||
| 164 | static_key_enable_cpuslocked() | ||
| 165 | static_key_disable_cpuslocked() | ||
| 166 | static_branch_enable_cpuslocked() | ||
| 167 | static_branch_disable_cpuslocked() | ||
| 168 | |||
| 169 | These functions are *not* general purpose, and must only be used when | ||
| 170 | you really know that you're in the above context, and no other. | ||
| 171 | |||
| 152 | Where an array of keys is required, it can be defined as:: | 172 | Where an array of keys is required, it can be defined as:: |
| 153 | 173 | ||
| 154 | DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); | 174 | DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count); |
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index 38310dcd6620..bc80fc0e210f 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt | |||
| @@ -1956,10 +1956,7 @@ MMIO 쓰기 배리어 | |||
| 1956 | 뒤에 완료됩니다. | 1956 | 뒤에 완료됩니다. |
| 1957 | 1957 | ||
| 1958 | ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에 | 1958 | ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에 |
| 1959 | 완료될 수 있습니다. smp_mb__before_spinlock() 뒤에 ACQUIRE 가 실행되는 | 1959 | 완료될 수 있습니다. |
| 1960 | 코드 블록은 블록 앞의 스토어를 블록 뒤의 로드와 스토어에 대해 순서 | ||
| 1961 | 맞춥니다. 이건 smp_mb() 보다 완화된 것임을 기억하세요! 많은 아키텍쳐에서 | ||
| 1962 | smp_mb__before_spinlock() 은 사실 아무일도 하지 않습니다. | ||
| 1963 | 1960 | ||
| 1964 | (2) RELEASE 오퍼레이션의 영향: | 1961 | (2) RELEASE 오퍼레이션의 영향: |
| 1965 | 1962 | ||
diff --git a/arch/Kconfig b/arch/Kconfig index 21d0089117fe..2520ca5b42eb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -931,6 +931,18 @@ config STRICT_MODULE_RWX | |||
| 931 | config ARCH_WANT_RELAX_ORDER | 931 | config ARCH_WANT_RELAX_ORDER |
| 932 | bool | 932 | bool |
| 933 | 933 | ||
| 934 | config ARCH_HAS_REFCOUNT | ||
| 935 | bool | ||
| 936 | help | ||
| 937 | An architecture selects this when it has implemented refcount_t | ||
| 938 | using open coded assembly primitives that provide an optimized | ||
| 939 | refcount_t implementation, possibly at the expense of some full | ||
| 940 | refcount state checks of CONFIG_REFCOUNT_FULL=y. | ||
| 941 | |||
| 942 | The refcount overflow check behavior, however, must be retained. | ||
| 943 | Catching overflows is the primary security concern for protecting | ||
| 944 | against bugs in reference counts. | ||
| 945 | |||
| 934 | config REFCOUNT_FULL | 946 | config REFCOUNT_FULL |
| 935 | bool "Perform full reference count validation at the expense of speed" | 947 | bool "Perform full reference count validation at the expense of speed" |
| 936 | help | 948 | help |
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 54b54da6384c..11859287c52a 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h | |||
| @@ -123,6 +123,8 @@ static inline void atomic_set(atomic_t *v, int i) | |||
| 123 | atomic_ops_unlock(flags); | 123 | atomic_ops_unlock(flags); |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
| 127 | |||
| 126 | #endif | 128 | #endif |
| 127 | 129 | ||
| 128 | /* | 130 | /* |
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index cae331d553f8..ae4241ab19a8 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h | |||
| @@ -358,14 +358,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) | |||
| 358 | #define arch_read_relax(lock) cpu_relax() | 358 | #define arch_read_relax(lock) cpu_relax() |
| 359 | #define arch_write_relax(lock) cpu_relax() | 359 | #define arch_write_relax(lock) cpu_relax() |
| 360 | 360 | ||
| 361 | /* | 361 | /* See include/linux/spinlock.h */ |
| 362 | * Accesses appearing in program order before a spin_lock() operation | 362 | #define smp_mb__after_spinlock() smp_mb() |
| 363 | * can be reordered with accesses inside the critical section, by virtue | ||
| 364 | * of arch_spin_lock being constructed using acquire semantics. | ||
| 365 | * | ||
| 366 | * In cases where this is problematic (e.g. try_to_wake_up), an | ||
| 367 | * smp_mb__before_spinlock() can restore the required ordering. | ||
| 368 | */ | ||
| 369 | #define smp_mb__before_spinlock() smp_mb() | ||
| 370 | 363 | ||
| 371 | #endif /* __ASM_SPINLOCK_H */ | 364 | #endif /* __ASM_SPINLOCK_H */ |
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index a62ba368b27d..fb3dfb2a667e 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h | |||
| @@ -42,6 +42,8 @@ static inline void atomic_set(atomic_t *v, int new) | |||
| 42 | ); | 42 | ); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
| 46 | |||
| 45 | /** | 47 | /** |
| 46 | * atomic_read - reads a word, atomically | 48 | * atomic_read - reads a word, atomically |
| 47 | * @v: pointer to atomic value | 49 | * @v: pointer to atomic value |
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index 6c1380a8a0d4..eee779f26cc4 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h | |||
| @@ -37,6 +37,8 @@ static inline int atomic_set(atomic_t *v, int i) | |||
| 37 | return i; | 37 | return i; |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
| 41 | |||
| 40 | #define ATOMIC_OP(op, c_op) \ | 42 | #define ATOMIC_OP(op, c_op) \ |
| 41 | static inline void atomic_##op(int i, atomic_t *v) \ | 43 | static inline void atomic_##op(int i, atomic_t *v) \ |
| 42 | { \ | 44 | { \ |
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 5394b9c5f914..17b98a87e5e2 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h | |||
| @@ -65,6 +65,8 @@ static __inline__ void atomic_set(atomic_t *v, int i) | |||
| 65 | _atomic_spin_unlock_irqrestore(v, flags); | 65 | _atomic_spin_unlock_irqrestore(v, flags); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
| 69 | |||
| 68 | static __inline__ int atomic_read(const atomic_t *v) | 70 | static __inline__ int atomic_read(const atomic_t *v) |
| 69 | { | 71 | { |
| 70 | return READ_ONCE((v)->counter); | 72 | return READ_ONCE((v)->counter); |
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 25d42bd3f114..9c601adfc500 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h | |||
| @@ -74,13 +74,6 @@ do { \ | |||
| 74 | ___p1; \ | 74 | ___p1; \ |
| 75 | }) | 75 | }) |
| 76 | 76 | ||
| 77 | /* | ||
| 78 | * This must resolve to hwsync on SMP for the context switch path. | ||
| 79 | * See _switch, and core scheduler context switch memory ordering | ||
| 80 | * comments. | ||
| 81 | */ | ||
| 82 | #define smp_mb__before_spinlock() smp_mb() | ||
| 83 | |||
| 84 | #include <asm-generic/barrier.h> | 77 | #include <asm-generic/barrier.h> |
| 85 | 78 | ||
| 86 | #endif /* _ASM_POWERPC_BARRIER_H */ | 79 | #endif /* _ASM_POWERPC_BARRIER_H */ |
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 8c1b913de6d7..c1b1ec94b06c 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h | |||
| @@ -342,5 +342,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) | |||
| 342 | #define arch_read_relax(lock) __rw_yield(lock) | 342 | #define arch_read_relax(lock) __rw_yield(lock) |
| 343 | #define arch_write_relax(lock) __rw_yield(lock) | 343 | #define arch_write_relax(lock) __rw_yield(lock) |
| 344 | 344 | ||
| 345 | /* See include/linux/spinlock.h */ | ||
| 346 | #define smp_mb__after_spinlock() smp_mb() | ||
| 347 | |||
| 345 | #endif /* __KERNEL__ */ | 348 | #endif /* __KERNEL__ */ |
| 346 | #endif /* __ASM_SPINLOCK_H */ | 349 | #endif /* __ASM_SPINLOCK_H */ |
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index ee3f11c43cda..7643e979e333 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h | |||
| @@ -29,6 +29,8 @@ int atomic_xchg(atomic_t *, int); | |||
| 29 | int __atomic_add_unless(atomic_t *, int, int); | 29 | int __atomic_add_unless(atomic_t *, int, int); |
| 30 | void atomic_set(atomic_t *, int); | 30 | void atomic_set(atomic_t *, int); |
| 31 | 31 | ||
| 32 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
| 33 | |||
| 32 | #define atomic_read(v) ACCESS_ONCE((v)->counter) | 34 | #define atomic_read(v) ACCESS_ONCE((v)->counter) |
| 33 | 35 | ||
| 34 | #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) | 36 | #define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v))) |
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index a93774255136..53a423e7cb92 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h | |||
| @@ -101,6 +101,8 @@ static inline void atomic_set(atomic_t *v, int n) | |||
| 101 | _atomic_xchg(&v->counter, n); | 101 | _atomic_xchg(&v->counter, n); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | #define atomic_set_release(v, i) atomic_set((v), (i)) | ||
| 105 | |||
| 104 | /* A 64bit atomic type */ | 106 | /* A 64bit atomic type */ |
| 105 | 107 | ||
| 106 | typedef struct { | 108 | typedef struct { |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 323cb065be5e..6e01f585d57c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -55,6 +55,7 @@ config X86 | |||
| 55 | select ARCH_HAS_KCOV if X86_64 | 55 | select ARCH_HAS_KCOV if X86_64 |
| 56 | select ARCH_HAS_MMIO_FLUSH | 56 | select ARCH_HAS_MMIO_FLUSH |
| 57 | select ARCH_HAS_PMEM_API if X86_64 | 57 | select ARCH_HAS_PMEM_API if X86_64 |
| 58 | select ARCH_HAS_REFCOUNT | ||
| 58 | select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 | 59 | select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 |
| 59 | select ARCH_HAS_SET_MEMORY | 60 | select ARCH_HAS_SET_MEMORY |
| 60 | select ARCH_HAS_SG_CHAIN | 61 | select ARCH_HAS_SG_CHAIN |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index cd20ca0b4043..1fc519f3c49e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
| @@ -305,8 +305,6 @@ config DEBUG_ENTRY | |||
| 305 | Some of these sanity checks may slow down kernel entries and | 305 | Some of these sanity checks may slow down kernel entries and |
| 306 | exits or otherwise impact performance. | 306 | exits or otherwise impact performance. |
| 307 | 307 | ||
| 308 | This is currently used to help test NMI code. | ||
| 309 | |||
| 310 | If unsure, say N. | 308 | If unsure, say N. |
| 311 | 309 | ||
| 312 | config DEBUG_NMI_SELFTEST | 310 | config DEBUG_NMI_SELFTEST |
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 9976fcecd17e..af28a8a24366 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | # Makefile for the x86 low level entry code | 2 | # Makefile for the x86 low level entry code |
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y | ||
| 6 | OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y | 5 | OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y |
| 7 | 6 | ||
| 8 | CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) | 7 | CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) |
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 05ed3d393da7..640aafebdc00 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | #include <linux/jump_label.h> | 1 | #include <linux/jump_label.h> |
| 2 | #include <asm/unwind_hints.h> | ||
| 2 | 3 | ||
| 3 | /* | 4 | /* |
| 4 | 5 | ||
| @@ -112,6 +113,7 @@ For 32-bit we have the following conventions - kernel is built with | |||
| 112 | movq %rdx, 12*8+\offset(%rsp) | 113 | movq %rdx, 12*8+\offset(%rsp) |
| 113 | movq %rsi, 13*8+\offset(%rsp) | 114 | movq %rsi, 13*8+\offset(%rsp) |
| 114 | movq %rdi, 14*8+\offset(%rsp) | 115 | movq %rdi, 14*8+\offset(%rsp) |
| 116 | UNWIND_HINT_REGS offset=\offset extra=0 | ||
| 115 | .endm | 117 | .endm |
| 116 | .macro SAVE_C_REGS offset=0 | 118 | .macro SAVE_C_REGS offset=0 |
| 117 | SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 | 119 | SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 |
| @@ -136,6 +138,7 @@ For 32-bit we have the following conventions - kernel is built with | |||
| 136 | movq %r12, 3*8+\offset(%rsp) | 138 | movq %r12, 3*8+\offset(%rsp) |
| 137 | movq %rbp, 4*8+\offset(%rsp) | 139 | movq %rbp, 4*8+\offset(%rsp) |
| 138 | movq %rbx, 5*8+\offset(%rsp) | 140 | movq %rbx, 5*8+\offset(%rsp) |
| 141 | UNWIND_HINT_REGS offset=\offset | ||
| 139 | .endm | 142 | .endm |
| 140 | 143 | ||
| 141 | .macro RESTORE_EXTRA_REGS offset=0 | 144 | .macro RESTORE_EXTRA_REGS offset=0 |
| @@ -145,6 +148,7 @@ For 32-bit we have the following conventions - kernel is built with | |||
| 145 | movq 3*8+\offset(%rsp), %r12 | 148 | movq 3*8+\offset(%rsp), %r12 |
| 146 | movq 4*8+\offset(%rsp), %rbp | 149 | movq 4*8+\offset(%rsp), %rbp |
| 147 | movq 5*8+\offset(%rsp), %rbx | 150 | movq 5*8+\offset(%rsp), %rbx |
| 151 | UNWIND_HINT_REGS offset=\offset extra=0 | ||
| 148 | .endm | 152 | .endm |
| 149 | 153 | ||
| 150 | .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 | 154 | .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 |
| @@ -167,6 +171,7 @@ For 32-bit we have the following conventions - kernel is built with | |||
| 167 | .endif | 171 | .endif |
| 168 | movq 13*8(%rsp), %rsi | 172 | movq 13*8(%rsp), %rsi |
| 169 | movq 14*8(%rsp), %rdi | 173 | movq 14*8(%rsp), %rdi |
| 174 | UNWIND_HINT_IRET_REGS offset=16*8 | ||
| 170 | .endm | 175 | .endm |
| 171 | .macro RESTORE_C_REGS | 176 | .macro RESTORE_C_REGS |
| 172 | RESTORE_C_REGS_HELPER 1,1,1,1,1 | 177 | RESTORE_C_REGS_HELPER 1,1,1,1,1 |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6d078b89a5e8..64b233ab7cad 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <asm/smap.h> | 36 | #include <asm/smap.h> |
| 37 | #include <asm/pgtable_types.h> | 37 | #include <asm/pgtable_types.h> |
| 38 | #include <asm/export.h> | 38 | #include <asm/export.h> |
| 39 | #include <asm/frame.h> | ||
| 39 | #include <linux/err.h> | 40 | #include <linux/err.h> |
| 40 | 41 | ||
| 41 | .code64 | 42 | .code64 |
| @@ -43,9 +44,10 @@ | |||
| 43 | 44 | ||
| 44 | #ifdef CONFIG_PARAVIRT | 45 | #ifdef CONFIG_PARAVIRT |
| 45 | ENTRY(native_usergs_sysret64) | 46 | ENTRY(native_usergs_sysret64) |
| 47 | UNWIND_HINT_EMPTY | ||
| 46 | swapgs | 48 | swapgs |
| 47 | sysretq | 49 | sysretq |
| 48 | ENDPROC(native_usergs_sysret64) | 50 | END(native_usergs_sysret64) |
| 49 | #endif /* CONFIG_PARAVIRT */ | 51 | #endif /* CONFIG_PARAVIRT */ |
| 50 | 52 | ||
| 51 | .macro TRACE_IRQS_IRETQ | 53 | .macro TRACE_IRQS_IRETQ |
| @@ -134,6 +136,7 @@ ENDPROC(native_usergs_sysret64) | |||
| 134 | */ | 136 | */ |
| 135 | 137 | ||
| 136 | ENTRY(entry_SYSCALL_64) | 138 | ENTRY(entry_SYSCALL_64) |
| 139 | UNWIND_HINT_EMPTY | ||
| 137 | /* | 140 | /* |
| 138 | * Interrupts are off on entry. | 141 | * Interrupts are off on entry. |
| 139 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, | 142 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, |
| @@ -169,6 +172,7 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) | |||
| 169 | pushq %r10 /* pt_regs->r10 */ | 172 | pushq %r10 /* pt_regs->r10 */ |
| 170 | pushq %r11 /* pt_regs->r11 */ | 173 | pushq %r11 /* pt_regs->r11 */ |
| 171 | sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ | 174 | sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ |
| 175 | UNWIND_HINT_REGS extra=0 | ||
| 172 | 176 | ||
| 173 | /* | 177 | /* |
| 174 | * If we need to do entry work or if we guess we'll need to do | 178 | * If we need to do entry work or if we guess we'll need to do |
| @@ -223,6 +227,7 @@ entry_SYSCALL_64_fastpath: | |||
| 223 | movq EFLAGS(%rsp), %r11 | 227 | movq EFLAGS(%rsp), %r11 |
| 224 | RESTORE_C_REGS_EXCEPT_RCX_R11 | 228 | RESTORE_C_REGS_EXCEPT_RCX_R11 |
| 225 | movq RSP(%rsp), %rsp | 229 | movq RSP(%rsp), %rsp |
| 230 | UNWIND_HINT_EMPTY | ||
| 226 | USERGS_SYSRET64 | 231 | USERGS_SYSRET64 |
| 227 | 232 | ||
| 228 | 1: | 233 | 1: |
| @@ -316,6 +321,7 @@ syscall_return_via_sysret: | |||
| 316 | /* rcx and r11 are already restored (see code above) */ | 321 | /* rcx and r11 are already restored (see code above) */ |
| 317 | RESTORE_C_REGS_EXCEPT_RCX_R11 | 322 | RESTORE_C_REGS_EXCEPT_RCX_R11 |
| 318 | movq RSP(%rsp), %rsp | 323 | movq RSP(%rsp), %rsp |
| 324 | UNWIND_HINT_EMPTY | ||
| 319 | USERGS_SYSRET64 | 325 | USERGS_SYSRET64 |
| 320 | 326 | ||
| 321 | opportunistic_sysret_failed: | 327 | opportunistic_sysret_failed: |
| @@ -343,6 +349,7 @@ ENTRY(stub_ptregs_64) | |||
| 343 | DISABLE_INTERRUPTS(CLBR_ANY) | 349 | DISABLE_INTERRUPTS(CLBR_ANY) |
| 344 | TRACE_IRQS_OFF | 350 | TRACE_IRQS_OFF |
| 345 | popq %rax | 351 | popq %rax |
| 352 | UNWIND_HINT_REGS extra=0 | ||
| 346 | jmp entry_SYSCALL64_slow_path | 353 | jmp entry_SYSCALL64_slow_path |
| 347 | 354 | ||
| 348 | 1: | 355 | 1: |
| @@ -351,6 +358,7 @@ END(stub_ptregs_64) | |||
| 351 | 358 | ||
| 352 | .macro ptregs_stub func | 359 | .macro ptregs_stub func |
| 353 | ENTRY(ptregs_\func) | 360 | ENTRY(ptregs_\func) |
| 361 | UNWIND_HINT_FUNC | ||
| 354 | leaq \func(%rip), %rax | 362 | leaq \func(%rip), %rax |
| 355 | jmp stub_ptregs_64 | 363 | jmp stub_ptregs_64 |
| 356 | END(ptregs_\func) | 364 | END(ptregs_\func) |
| @@ -367,6 +375,7 @@ END(ptregs_\func) | |||
| 367 | * %rsi: next task | 375 | * %rsi: next task |
| 368 | */ | 376 | */ |
| 369 | ENTRY(__switch_to_asm) | 377 | ENTRY(__switch_to_asm) |
| 378 | UNWIND_HINT_FUNC | ||
| 370 | /* | 379 | /* |
| 371 | * Save callee-saved registers | 380 | * Save callee-saved registers |
| 372 | * This must match the order in inactive_task_frame | 381 | * This must match the order in inactive_task_frame |
| @@ -406,6 +415,7 @@ END(__switch_to_asm) | |||
| 406 | * r12: kernel thread arg | 415 | * r12: kernel thread arg |
| 407 | */ | 416 | */ |
| 408 | ENTRY(ret_from_fork) | 417 | ENTRY(ret_from_fork) |
| 418 | UNWIND_HINT_EMPTY | ||
| 409 | movq %rax, %rdi | 419 | movq %rax, %rdi |
| 410 | call schedule_tail /* rdi: 'prev' task parameter */ | 420 | call schedule_tail /* rdi: 'prev' task parameter */ |
| 411 | 421 | ||
| @@ -413,6 +423,7 @@ ENTRY(ret_from_fork) | |||
| 413 | jnz 1f /* kernel threads are uncommon */ | 423 | jnz 1f /* kernel threads are uncommon */ |
| 414 | 424 | ||
| 415 | 2: | 425 | 2: |
| 426 | UNWIND_HINT_REGS | ||
| 416 | movq %rsp, %rdi | 427 | movq %rsp, %rdi |
| 417 | call syscall_return_slowpath /* returns with IRQs disabled */ | 428 | call syscall_return_slowpath /* returns with IRQs disabled */ |
| 418 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ | 429 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ |
| @@ -440,13 +451,102 @@ END(ret_from_fork) | |||
| 440 | ENTRY(irq_entries_start) | 451 | ENTRY(irq_entries_start) |
| 441 | vector=FIRST_EXTERNAL_VECTOR | 452 | vector=FIRST_EXTERNAL_VECTOR |
| 442 | .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) | 453 | .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) |
| 454 | UNWIND_HINT_IRET_REGS | ||
| 443 | pushq $(~vector+0x80) /* Note: always in signed byte range */ | 455 | pushq $(~vector+0x80) /* Note: always in signed byte range */ |
| 444 | vector=vector+1 | ||
| 445 | jmp common_interrupt | 456 | jmp common_interrupt |
| 446 | .align 8 | 457 | .align 8 |
| 458 | vector=vector+1 | ||
| 447 | .endr | 459 | .endr |
| 448 | END(irq_entries_start) | 460 | END(irq_entries_start) |
| 449 | 461 | ||
| 462 | .macro DEBUG_ENTRY_ASSERT_IRQS_OFF | ||
| 463 | #ifdef CONFIG_DEBUG_ENTRY | ||
| 464 | pushfq | ||
| 465 | testl $X86_EFLAGS_IF, (%rsp) | ||
| 466 | jz .Lokay_\@ | ||
| 467 | ud2 | ||
| 468 | .Lokay_\@: | ||
| 469 | addq $8, %rsp | ||
| 470 | #endif | ||
| 471 | .endm | ||
| 472 | |||
| 473 | /* | ||
| 474 | * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers | ||
| 475 | * flags and puts old RSP into old_rsp, and leaves all other GPRs alone. | ||
| 476 | * Requires kernel GSBASE. | ||
| 477 | * | ||
| 478 | * The invariant is that, if irq_count != -1, then the IRQ stack is in use. | ||
| 479 | */ | ||
| 480 | .macro ENTER_IRQ_STACK regs=1 old_rsp | ||
| 481 | DEBUG_ENTRY_ASSERT_IRQS_OFF | ||
| 482 | movq %rsp, \old_rsp | ||
| 483 | |||
| 484 | .if \regs | ||
| 485 | UNWIND_HINT_REGS base=\old_rsp | ||
| 486 | .endif | ||
| 487 | |||
| 488 | incl PER_CPU_VAR(irq_count) | ||
| 489 | jnz .Lirq_stack_push_old_rsp_\@ | ||
| 490 | |||
| 491 | /* | ||
| 492 | * Right now, if we just incremented irq_count to zero, we've | ||
| 493 | * claimed the IRQ stack but we haven't switched to it yet. | ||
| 494 | * | ||
| 495 | * If anything is added that can interrupt us here without using IST, | ||
| 496 | * it must be *extremely* careful to limit its stack usage. This | ||
| 497 | * could include kprobes and a hypothetical future IST-less #DB | ||
| 498 | * handler. | ||
| 499 | * | ||
| 500 | * The OOPS unwinder relies on the word at the top of the IRQ | ||
| 501 | * stack linking back to the previous RSP for the entire time we're | ||
| 502 | * on the IRQ stack. For this to work reliably, we need to write | ||
| 503 | * it before we actually move ourselves to the IRQ stack. | ||
| 504 | */ | ||
| 505 | |||
| 506 | movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8) | ||
| 507 | movq PER_CPU_VAR(irq_stack_ptr), %rsp | ||
| 508 | |||
| 509 | #ifdef CONFIG_DEBUG_ENTRY | ||
| 510 | /* | ||
| 511 | * If the first movq above becomes wrong due to IRQ stack layout | ||
| 512 | * changes, the only way we'll notice is if we try to unwind right | ||
| 513 | * here. Assert that we set up the stack right to catch this type | ||
| 514 | * of bug quickly. | ||
| 515 | */ | ||
| 516 | cmpq -8(%rsp), \old_rsp | ||
| 517 | je .Lirq_stack_okay\@ | ||
| 518 | ud2 | ||
| 519 | .Lirq_stack_okay\@: | ||
| 520 | #endif | ||
| 521 | |||
| 522 | .Lirq_stack_push_old_rsp_\@: | ||
| 523 | pushq \old_rsp | ||
| 524 | |||
| 525 | .if \regs | ||
| 526 | UNWIND_HINT_REGS indirect=1 | ||
| 527 | .endif | ||
| 528 | .endm | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Undoes ENTER_IRQ_STACK. | ||
| 532 | */ | ||
| 533 | .macro LEAVE_IRQ_STACK regs=1 | ||
| 534 | DEBUG_ENTRY_ASSERT_IRQS_OFF | ||
| 535 | /* We need to be off the IRQ stack before decrementing irq_count. */ | ||
| 536 | popq %rsp | ||
| 537 | |||
| 538 | .if \regs | ||
| 539 | UNWIND_HINT_REGS | ||
| 540 | .endif | ||
| 541 | |||
| 542 | /* | ||
| 543 | * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming | ||
| 544 | * the irq stack but we're not on it. | ||
| 545 | */ | ||
| 546 | |||
| 547 | decl PER_CPU_VAR(irq_count) | ||
| 548 | .endm | ||
| 549 | |||
| 450 | /* | 550 | /* |
| 451 | * Interrupt entry/exit. | 551 | * Interrupt entry/exit. |
| 452 | * | 552 | * |
| @@ -485,17 +585,7 @@ END(irq_entries_start) | |||
| 485 | CALL_enter_from_user_mode | 585 | CALL_enter_from_user_mode |
| 486 | 586 | ||
| 487 | 1: | 587 | 1: |
| 488 | /* | 588 | ENTER_IRQ_STACK old_rsp=%rdi |
| 489 | * Save previous stack pointer, optionally switch to interrupt stack. | ||
| 490 | * irq_count is used to check if a CPU is already on an interrupt stack | ||
| 491 | * or not. While this is essentially redundant with preempt_count it is | ||
| 492 | * a little cheaper to use a separate counter in the PDA (short of | ||
| 493 | * moving irq_enter into assembly, which would be too much work) | ||
| 494 | */ | ||
| 495 | movq %rsp, %rdi | ||
| 496 | incl PER_CPU_VAR(irq_count) | ||
| 497 | cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp | ||
| 498 | pushq %rdi | ||
| 499 | /* We entered an interrupt context - irqs are off: */ | 589 | /* We entered an interrupt context - irqs are off: */ |
| 500 | TRACE_IRQS_OFF | 590 | TRACE_IRQS_OFF |
| 501 | 591 | ||
| @@ -515,10 +605,8 @@ common_interrupt: | |||
| 515 | ret_from_intr: | 605 | ret_from_intr: |
| 516 | DISABLE_INTERRUPTS(CLBR_ANY) | 606 | DISABLE_INTERRUPTS(CLBR_ANY) |
| 517 | TRACE_IRQS_OFF | 607 | TRACE_IRQS_OFF |
| 518 | decl PER_CPU_VAR(irq_count) | ||
| 519 | 608 | ||
| 520 | /* Restore saved previous stack */ | 609 | LEAVE_IRQ_STACK |
| 521 | popq %rsp | ||
| 522 | 610 | ||
| 523 | testb $3, CS(%rsp) | 611 | testb $3, CS(%rsp) |
| 524 | jz retint_kernel | 612 | jz retint_kernel |
| @@ -561,6 +649,7 @@ restore_c_regs_and_iret: | |||
| 561 | INTERRUPT_RETURN | 649 | INTERRUPT_RETURN |
| 562 | 650 | ||
| 563 | ENTRY(native_iret) | 651 | ENTRY(native_iret) |
| 652 | UNWIND_HINT_IRET_REGS | ||
| 564 | /* | 653 | /* |
| 565 | * Are we returning to a stack segment from the LDT? Note: in | 654 | * Are we returning to a stack segment from the LDT? Note: in |
| 566 | * 64-bit mode SS:RSP on the exception stack is always valid. | 655 | * 64-bit mode SS:RSP on the exception stack is always valid. |
| @@ -633,6 +722,7 @@ native_irq_return_ldt: | |||
| 633 | orq PER_CPU_VAR(espfix_stack), %rax | 722 | orq PER_CPU_VAR(espfix_stack), %rax |
| 634 | SWAPGS | 723 | SWAPGS |
| 635 | movq %rax, %rsp | 724 | movq %rax, %rsp |
| 725 | UNWIND_HINT_IRET_REGS offset=8 | ||
| 636 | 726 | ||
| 637 | /* | 727 | /* |
| 638 | * At this point, we cannot write to the stack any more, but we can | 728 | * At this point, we cannot write to the stack any more, but we can |
| @@ -654,6 +744,7 @@ END(common_interrupt) | |||
| 654 | */ | 744 | */ |
| 655 | .macro apicinterrupt3 num sym do_sym | 745 | .macro apicinterrupt3 num sym do_sym |
| 656 | ENTRY(\sym) | 746 | ENTRY(\sym) |
| 747 | UNWIND_HINT_IRET_REGS | ||
| 657 | ASM_CLAC | 748 | ASM_CLAC |
| 658 | pushq $~(\num) | 749 | pushq $~(\num) |
| 659 | .Lcommon_\sym: | 750 | .Lcommon_\sym: |
| @@ -740,6 +831,8 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt | |||
| 740 | 831 | ||
| 741 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 | 832 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 |
| 742 | ENTRY(\sym) | 833 | ENTRY(\sym) |
| 834 | UNWIND_HINT_IRET_REGS offset=8 | ||
| 835 | |||
| 743 | /* Sanity check */ | 836 | /* Sanity check */ |
| 744 | .if \shift_ist != -1 && \paranoid == 0 | 837 | .if \shift_ist != -1 && \paranoid == 0 |
| 745 | .error "using shift_ist requires paranoid=1" | 838 | .error "using shift_ist requires paranoid=1" |
| @@ -763,6 +856,7 @@ ENTRY(\sym) | |||
| 763 | .else | 856 | .else |
| 764 | call error_entry | 857 | call error_entry |
| 765 | .endif | 858 | .endif |
| 859 | UNWIND_HINT_REGS | ||
| 766 | /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ | 860 | /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ |
| 767 | 861 | ||
| 768 | .if \paranoid | 862 | .if \paranoid |
| @@ -860,6 +954,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0 | |||
| 860 | * edi: new selector | 954 | * edi: new selector |
| 861 | */ | 955 | */ |
| 862 | ENTRY(native_load_gs_index) | 956 | ENTRY(native_load_gs_index) |
| 957 | FRAME_BEGIN | ||
| 863 | pushfq | 958 | pushfq |
| 864 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) | 959 | DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) |
| 865 | SWAPGS | 960 | SWAPGS |
| @@ -868,8 +963,9 @@ ENTRY(native_load_gs_index) | |||
| 868 | 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE | 963 | 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE |
| 869 | SWAPGS | 964 | SWAPGS |
| 870 | popfq | 965 | popfq |
| 966 | FRAME_END | ||
| 871 | ret | 967 | ret |
| 872 | END(native_load_gs_index) | 968 | ENDPROC(native_load_gs_index) |
| 873 | EXPORT_SYMBOL(native_load_gs_index) | 969 | EXPORT_SYMBOL(native_load_gs_index) |
| 874 | 970 | ||
| 875 | _ASM_EXTABLE(.Lgs_change, bad_gs) | 971 | _ASM_EXTABLE(.Lgs_change, bad_gs) |
| @@ -892,14 +988,12 @@ bad_gs: | |||
| 892 | ENTRY(do_softirq_own_stack) | 988 | ENTRY(do_softirq_own_stack) |
| 893 | pushq %rbp | 989 | pushq %rbp |
| 894 | mov %rsp, %rbp | 990 | mov %rsp, %rbp |
| 895 | incl PER_CPU_VAR(irq_count) | 991 | ENTER_IRQ_STACK regs=0 old_rsp=%r11 |
| 896 | cmove PER_CPU_VAR(irq_stack_ptr), %rsp | ||
| 897 | push %rbp /* frame pointer backlink */ | ||
| 898 | call __do_softirq | 992 | call __do_softirq |
| 993 | LEAVE_IRQ_STACK regs=0 | ||
| 899 | leaveq | 994 | leaveq |
| 900 | decl PER_CPU_VAR(irq_count) | ||
| 901 | ret | 995 | ret |
| 902 | END(do_softirq_own_stack) | 996 | ENDPROC(do_softirq_own_stack) |
| 903 | 997 | ||
| 904 | #ifdef CONFIG_XEN | 998 | #ifdef CONFIG_XEN |
| 905 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 | 999 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 |
| @@ -923,14 +1017,14 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ | |||
| 923 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | 1017 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
| 924 | * see the correct pointer to the pt_regs | 1018 | * see the correct pointer to the pt_regs |
| 925 | */ | 1019 | */ |
| 1020 | UNWIND_HINT_FUNC | ||
| 926 | movq %rdi, %rsp /* we don't return, adjust the stack frame */ | 1021 | movq %rdi, %rsp /* we don't return, adjust the stack frame */ |
| 927 | 11: incl PER_CPU_VAR(irq_count) | 1022 | UNWIND_HINT_REGS |
| 928 | movq %rsp, %rbp | 1023 | |
| 929 | cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp | 1024 | ENTER_IRQ_STACK old_rsp=%r10 |
| 930 | pushq %rbp /* frame pointer backlink */ | ||
| 931 | call xen_evtchn_do_upcall | 1025 | call xen_evtchn_do_upcall |
| 932 | popq %rsp | 1026 | LEAVE_IRQ_STACK |
| 933 | decl PER_CPU_VAR(irq_count) | 1027 | |
| 934 | #ifndef CONFIG_PREEMPT | 1028 | #ifndef CONFIG_PREEMPT |
| 935 | call xen_maybe_preempt_hcall | 1029 | call xen_maybe_preempt_hcall |
| 936 | #endif | 1030 | #endif |
| @@ -951,6 +1045,7 @@ END(xen_do_hypervisor_callback) | |||
| 951 | * with its current contents: any discrepancy means we in category 1. | 1045 | * with its current contents: any discrepancy means we in category 1. |
| 952 | */ | 1046 | */ |
| 953 | ENTRY(xen_failsafe_callback) | 1047 | ENTRY(xen_failsafe_callback) |
| 1048 | UNWIND_HINT_EMPTY | ||
| 954 | movl %ds, %ecx | 1049 | movl %ds, %ecx |
| 955 | cmpw %cx, 0x10(%rsp) | 1050 | cmpw %cx, 0x10(%rsp) |
| 956 | jne 1f | 1051 | jne 1f |
| @@ -970,11 +1065,13 @@ ENTRY(xen_failsafe_callback) | |||
| 970 | pushq $0 /* RIP */ | 1065 | pushq $0 /* RIP */ |
| 971 | pushq %r11 | 1066 | pushq %r11 |
| 972 | pushq %rcx | 1067 | pushq %rcx |
| 1068 | UNWIND_HINT_IRET_REGS offset=8 | ||
| 973 | jmp general_protection | 1069 | jmp general_protection |
| 974 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | 1070 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
| 975 | movq (%rsp), %rcx | 1071 | movq (%rsp), %rcx |
| 976 | movq 8(%rsp), %r11 | 1072 | movq 8(%rsp), %r11 |
| 977 | addq $0x30, %rsp | 1073 | addq $0x30, %rsp |
| 1074 | UNWIND_HINT_IRET_REGS | ||
| 978 | pushq $-1 /* orig_ax = -1 => not a system call */ | 1075 | pushq $-1 /* orig_ax = -1 => not a system call */ |
| 979 | ALLOC_PT_GPREGS_ON_STACK | 1076 | ALLOC_PT_GPREGS_ON_STACK |
| 980 | SAVE_C_REGS | 1077 | SAVE_C_REGS |
| @@ -1020,6 +1117,7 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec | |||
| 1020 | * Return: ebx=0: need swapgs on exit, ebx=1: otherwise | 1117 | * Return: ebx=0: need swapgs on exit, ebx=1: otherwise |
| 1021 | */ | 1118 | */ |
| 1022 | ENTRY(paranoid_entry) | 1119 | ENTRY(paranoid_entry) |
| 1120 | UNWIND_HINT_FUNC | ||
| 1023 | cld | 1121 | cld |
| 1024 | SAVE_C_REGS 8 | 1122 | SAVE_C_REGS 8 |
| 1025 | SAVE_EXTRA_REGS 8 | 1123 | SAVE_EXTRA_REGS 8 |
| @@ -1047,6 +1145,7 @@ END(paranoid_entry) | |||
| 1047 | * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) | 1145 | * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) |
| 1048 | */ | 1146 | */ |
| 1049 | ENTRY(paranoid_exit) | 1147 | ENTRY(paranoid_exit) |
| 1148 | UNWIND_HINT_REGS | ||
| 1050 | DISABLE_INTERRUPTS(CLBR_ANY) | 1149 | DISABLE_INTERRUPTS(CLBR_ANY) |
| 1051 | TRACE_IRQS_OFF_DEBUG | 1150 | TRACE_IRQS_OFF_DEBUG |
| 1052 | testl %ebx, %ebx /* swapgs needed? */ | 1151 | testl %ebx, %ebx /* swapgs needed? */ |
| @@ -1068,6 +1167,7 @@ END(paranoid_exit) | |||
| 1068 | * Return: EBX=0: came from user mode; EBX=1: otherwise | 1167 | * Return: EBX=0: came from user mode; EBX=1: otherwise |
| 1069 | */ | 1168 | */ |
| 1070 | ENTRY(error_entry) | 1169 | ENTRY(error_entry) |
| 1170 | UNWIND_HINT_FUNC | ||
| 1071 | cld | 1171 | cld |
| 1072 | SAVE_C_REGS 8 | 1172 | SAVE_C_REGS 8 |
| 1073 | SAVE_EXTRA_REGS 8 | 1173 | SAVE_EXTRA_REGS 8 |
| @@ -1152,6 +1252,7 @@ END(error_entry) | |||
| 1152 | * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode | 1252 | * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode |
| 1153 | */ | 1253 | */ |
| 1154 | ENTRY(error_exit) | 1254 | ENTRY(error_exit) |
| 1255 | UNWIND_HINT_REGS | ||
| 1155 | DISABLE_INTERRUPTS(CLBR_ANY) | 1256 | DISABLE_INTERRUPTS(CLBR_ANY) |
| 1156 | TRACE_IRQS_OFF | 1257 | TRACE_IRQS_OFF |
| 1157 | testl %ebx, %ebx | 1258 | testl %ebx, %ebx |
| @@ -1161,6 +1262,7 @@ END(error_exit) | |||
| 1161 | 1262 | ||
| 1162 | /* Runs on exception stack */ | 1263 | /* Runs on exception stack */ |
| 1163 | ENTRY(nmi) | 1264 | ENTRY(nmi) |
| 1265 | UNWIND_HINT_IRET_REGS | ||
| 1164 | /* | 1266 | /* |
| 1165 | * Fix up the exception frame if we're on Xen. | 1267 | * Fix up the exception frame if we're on Xen. |
| 1166 | * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most | 1268 | * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most |
| @@ -1234,11 +1336,13 @@ ENTRY(nmi) | |||
| 1234 | cld | 1336 | cld |
| 1235 | movq %rsp, %rdx | 1337 | movq %rsp, %rdx |
| 1236 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | 1338 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
| 1339 | UNWIND_HINT_IRET_REGS base=%rdx offset=8 | ||
| 1237 | pushq 5*8(%rdx) /* pt_regs->ss */ | 1340 | pushq 5*8(%rdx) /* pt_regs->ss */ |
| 1238 | pushq 4*8(%rdx) /* pt_regs->rsp */ | 1341 | pushq 4*8(%rdx) /* pt_regs->rsp */ |
| 1239 | pushq 3*8(%rdx) /* pt_regs->flags */ | 1342 | pushq 3*8(%rdx) /* pt_regs->flags */ |
| 1240 | pushq 2*8(%rdx) /* pt_regs->cs */ | 1343 | pushq 2*8(%rdx) /* pt_regs->cs */ |
| 1241 | pushq 1*8(%rdx) /* pt_regs->rip */ | 1344 | pushq 1*8(%rdx) /* pt_regs->rip */ |
| 1345 | UNWIND_HINT_IRET_REGS | ||
| 1242 | pushq $-1 /* pt_regs->orig_ax */ | 1346 | pushq $-1 /* pt_regs->orig_ax */ |
| 1243 | pushq %rdi /* pt_regs->di */ | 1347 | pushq %rdi /* pt_regs->di */ |
| 1244 | pushq %rsi /* pt_regs->si */ | 1348 | pushq %rsi /* pt_regs->si */ |
| @@ -1255,6 +1359,7 @@ ENTRY(nmi) | |||
| 1255 | pushq %r13 /* pt_regs->r13 */ | 1359 | pushq %r13 /* pt_regs->r13 */ |
| 1256 | pushq %r14 /* pt_regs->r14 */ | 1360 | pushq %r14 /* pt_regs->r14 */ |
| 1257 | pushq %r15 /* pt_regs->r15 */ | 1361 | pushq %r15 /* pt_regs->r15 */ |
| 1362 | UNWIND_HINT_REGS | ||
| 1258 | ENCODE_FRAME_POINTER | 1363 | ENCODE_FRAME_POINTER |
| 1259 | 1364 | ||
| 1260 | /* | 1365 | /* |
| @@ -1409,6 +1514,7 @@ first_nmi: | |||
| 1409 | .rept 5 | 1514 | .rept 5 |
| 1410 | pushq 11*8(%rsp) | 1515 | pushq 11*8(%rsp) |
| 1411 | .endr | 1516 | .endr |
| 1517 | UNWIND_HINT_IRET_REGS | ||
| 1412 | 1518 | ||
| 1413 | /* Everything up to here is safe from nested NMIs */ | 1519 | /* Everything up to here is safe from nested NMIs */ |
| 1414 | 1520 | ||
| @@ -1424,6 +1530,7 @@ first_nmi: | |||
| 1424 | pushq $__KERNEL_CS /* CS */ | 1530 | pushq $__KERNEL_CS /* CS */ |
| 1425 | pushq $1f /* RIP */ | 1531 | pushq $1f /* RIP */ |
| 1426 | INTERRUPT_RETURN /* continues at repeat_nmi below */ | 1532 | INTERRUPT_RETURN /* continues at repeat_nmi below */ |
| 1533 | UNWIND_HINT_IRET_REGS | ||
| 1427 | 1: | 1534 | 1: |
| 1428 | #endif | 1535 | #endif |
| 1429 | 1536 | ||
| @@ -1473,6 +1580,7 @@ end_repeat_nmi: | |||
| 1473 | * exceptions might do. | 1580 | * exceptions might do. |
| 1474 | */ | 1581 | */ |
| 1475 | call paranoid_entry | 1582 | call paranoid_entry |
| 1583 | UNWIND_HINT_REGS | ||
| 1476 | 1584 | ||
| 1477 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1585 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
| 1478 | movq %rsp, %rdi | 1586 | movq %rsp, %rdi |
| @@ -1510,17 +1618,19 @@ nmi_restore: | |||
| 1510 | END(nmi) | 1618 | END(nmi) |
| 1511 | 1619 | ||
| 1512 | ENTRY(ignore_sysret) | 1620 | ENTRY(ignore_sysret) |
| 1621 | UNWIND_HINT_EMPTY | ||
| 1513 | mov $-ENOSYS, %eax | 1622 | mov $-ENOSYS, %eax |
| 1514 | sysret | 1623 | sysret |
| 1515 | END(ignore_sysret) | 1624 | END(ignore_sysret) |
| 1516 | 1625 | ||
| 1517 | ENTRY(rewind_stack_do_exit) | 1626 | ENTRY(rewind_stack_do_exit) |
| 1627 | UNWIND_HINT_FUNC | ||
| 1518 | /* Prevent any naive code from trying to unwind to our caller. */ | 1628 | /* Prevent any naive code from trying to unwind to our caller. */ |
| 1519 | xorl %ebp, %ebp | 1629 | xorl %ebp, %ebp |
| 1520 | 1630 | ||
| 1521 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rax | 1631 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rax |
| 1522 | leaq -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp | 1632 | leaq -PTREGS_SIZE(%rax), %rsp |
| 1633 | UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE | ||
| 1523 | 1634 | ||
| 1524 | call do_exit | 1635 | call do_exit |
| 1525 | 1: jmp 1b | ||
| 1526 | END(rewind_stack_do_exit) | 1636 | END(rewind_stack_do_exit) |
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 7a9df3beb89b..676ee5807d86 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
| @@ -74,6 +74,9 @@ | |||
| 74 | # define _ASM_EXTABLE_EX(from, to) \ | 74 | # define _ASM_EXTABLE_EX(from, to) \ |
| 75 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) | 75 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) |
| 76 | 76 | ||
| 77 | # define _ASM_EXTABLE_REFCOUNT(from, to) \ | ||
| 78 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) | ||
| 79 | |||
| 77 | # define _ASM_NOKPROBE(entry) \ | 80 | # define _ASM_NOKPROBE(entry) \ |
| 78 | .pushsection "_kprobe_blacklist","aw" ; \ | 81 | .pushsection "_kprobe_blacklist","aw" ; \ |
| 79 | _ASM_ALIGN ; \ | 82 | _ASM_ALIGN ; \ |
| @@ -123,6 +126,9 @@ | |||
| 123 | # define _ASM_EXTABLE_EX(from, to) \ | 126 | # define _ASM_EXTABLE_EX(from, to) \ |
| 124 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) | 127 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) |
| 125 | 128 | ||
| 129 | # define _ASM_EXTABLE_REFCOUNT(from, to) \ | ||
| 130 | _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) | ||
| 131 | |||
| 126 | /* For C file, we already have NOKPROBE_SYMBOL macro */ | 132 | /* For C file, we already have NOKPROBE_SYMBOL macro */ |
| 127 | #endif | 133 | #endif |
| 128 | 134 | ||
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 33380b871463..0874ebda3069 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
| @@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new) | |||
| 197 | return xchg(&v->counter, new); | 197 | return xchg(&v->counter, new); |
| 198 | } | 198 | } |
| 199 | 199 | ||
| 200 | #define ATOMIC_OP(op) \ | 200 | static inline void atomic_and(int i, atomic_t *v) |
| 201 | static inline void atomic_##op(int i, atomic_t *v) \ | 201 | { |
| 202 | { \ | 202 | asm volatile(LOCK_PREFIX "andl %1,%0" |
| 203 | asm volatile(LOCK_PREFIX #op"l %1,%0" \ | 203 | : "+m" (v->counter) |
| 204 | : "+m" (v->counter) \ | 204 | : "ir" (i) |
| 205 | : "ir" (i) \ | 205 | : "memory"); |
| 206 | : "memory"); \ | 206 | } |
| 207 | |||
| 208 | static inline int atomic_fetch_and(int i, atomic_t *v) | ||
| 209 | { | ||
| 210 | int val = atomic_read(v); | ||
| 211 | |||
| 212 | do { } while (!atomic_try_cmpxchg(v, &val, val & i)); | ||
| 213 | |||
| 214 | return val; | ||
| 207 | } | 215 | } |
| 208 | 216 | ||
| 209 | #define ATOMIC_FETCH_OP(op, c_op) \ | 217 | static inline void atomic_or(int i, atomic_t *v) |
| 210 | static inline int atomic_fetch_##op(int i, atomic_t *v) \ | 218 | { |
| 211 | { \ | 219 | asm volatile(LOCK_PREFIX "orl %1,%0" |
| 212 | int val = atomic_read(v); \ | 220 | : "+m" (v->counter) |
| 213 | do { \ | 221 | : "ir" (i) |
| 214 | } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \ | 222 | : "memory"); |
| 215 | return val; \ | ||
| 216 | } | 223 | } |
| 217 | 224 | ||
| 218 | #define ATOMIC_OPS(op, c_op) \ | 225 | static inline int atomic_fetch_or(int i, atomic_t *v) |
| 219 | ATOMIC_OP(op) \ | 226 | { |
| 220 | ATOMIC_FETCH_OP(op, c_op) | 227 | int val = atomic_read(v); |
| 221 | 228 | ||
| 222 | ATOMIC_OPS(and, &) | 229 | do { } while (!atomic_try_cmpxchg(v, &val, val | i)); |
| 223 | ATOMIC_OPS(or , |) | ||
| 224 | ATOMIC_OPS(xor, ^) | ||
| 225 | 230 | ||
| 226 | #undef ATOMIC_OPS | 231 | return val; |
| 227 | #undef ATOMIC_FETCH_OP | 232 | } |
| 228 | #undef ATOMIC_OP | 233 | |
| 234 | static inline void atomic_xor(int i, atomic_t *v) | ||
| 235 | { | ||
| 236 | asm volatile(LOCK_PREFIX "xorl %1,%0" | ||
| 237 | : "+m" (v->counter) | ||
| 238 | : "ir" (i) | ||
| 239 | : "memory"); | ||
| 240 | } | ||
| 241 | |||
| 242 | static inline int atomic_fetch_xor(int i, atomic_t *v) | ||
| 243 | { | ||
| 244 | int val = atomic_read(v); | ||
| 245 | |||
| 246 | do { } while (!atomic_try_cmpxchg(v, &val, val ^ i)); | ||
| 247 | |||
| 248 | return val; | ||
| 249 | } | ||
| 229 | 250 | ||
| 230 | /** | 251 | /** |
| 231 | * __atomic_add_unless - add unless the number is already a given value | 252 | * __atomic_add_unless - add unless the number is already a given value |
| @@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^) | |||
| 239 | static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) | 260 | static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) |
| 240 | { | 261 | { |
| 241 | int c = atomic_read(v); | 262 | int c = atomic_read(v); |
| 263 | |||
| 242 | do { | 264 | do { |
| 243 | if (unlikely(c == u)) | 265 | if (unlikely(c == u)) |
| 244 | break; | 266 | break; |
| 245 | } while (!atomic_try_cmpxchg(v, &c, c + a)); | 267 | } while (!atomic_try_cmpxchg(v, &c, c + a)); |
| 268 | |||
| 246 | return c; | 269 | return c; |
| 247 | } | 270 | } |
| 248 | 271 | ||
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 71d7705fb303..9e206f31ce2a 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h | |||
| @@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) | |||
| 312 | #undef alternative_atomic64 | 312 | #undef alternative_atomic64 |
| 313 | #undef __alternative_atomic64 | 313 | #undef __alternative_atomic64 |
| 314 | 314 | ||
| 315 | #define ATOMIC64_OP(op, c_op) \ | 315 | static inline void atomic64_and(long long i, atomic64_t *v) |
| 316 | static inline void atomic64_##op(long long i, atomic64_t *v) \ | 316 | { |
| 317 | { \ | 317 | long long old, c = 0; |
| 318 | long long old, c = 0; \ | 318 | |
| 319 | while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ | 319 | while ((old = atomic64_cmpxchg(v, c, c & i)) != c) |
| 320 | c = old; \ | 320 | c = old; |
| 321 | } | 321 | } |
| 322 | 322 | ||
| 323 | #define ATOMIC64_FETCH_OP(op, c_op) \ | 323 | static inline long long atomic64_fetch_and(long long i, atomic64_t *v) |
| 324 | static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ | 324 | { |
| 325 | { \ | 325 | long long old, c = 0; |
| 326 | long long old, c = 0; \ | 326 | |
| 327 | while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ | 327 | while ((old = atomic64_cmpxchg(v, c, c & i)) != c) |
| 328 | c = old; \ | 328 | c = old; |
| 329 | return old; \ | 329 | |
| 330 | return old; | ||
| 330 | } | 331 | } |
| 331 | 332 | ||
| 332 | ATOMIC64_FETCH_OP(add, +) | 333 | static inline void atomic64_or(long long i, atomic64_t *v) |
| 334 | { | ||
| 335 | long long old, c = 0; | ||
| 333 | 336 | ||
| 334 | #define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) | 337 | while ((old = atomic64_cmpxchg(v, c, c | i)) != c) |
| 338 | c = old; | ||
| 339 | } | ||
| 340 | |||
| 341 | static inline long long atomic64_fetch_or(long long i, atomic64_t *v) | ||
| 342 | { | ||
| 343 | long long old, c = 0; | ||
| 344 | |||
| 345 | while ((old = atomic64_cmpxchg(v, c, c | i)) != c) | ||
| 346 | c = old; | ||
| 347 | |||
| 348 | return old; | ||
| 349 | } | ||
| 335 | 350 | ||
| 336 | #define ATOMIC64_OPS(op, c_op) \ | 351 | static inline void atomic64_xor(long long i, atomic64_t *v) |
| 337 | ATOMIC64_OP(op, c_op) \ | 352 | { |
| 338 | ATOMIC64_FETCH_OP(op, c_op) | 353 | long long old, c = 0; |
| 354 | |||
| 355 | while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) | ||
| 356 | c = old; | ||
| 357 | } | ||
| 339 | 358 | ||
| 340 | ATOMIC64_OPS(and, &) | 359 | static inline long long atomic64_fetch_xor(long long i, atomic64_t *v) |
| 341 | ATOMIC64_OPS(or, |) | 360 | { |
| 342 | ATOMIC64_OPS(xor, ^) | 361 | long long old, c = 0; |
| 362 | |||
| 363 | while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) | ||
| 364 | c = old; | ||
| 365 | |||
| 366 | return old; | ||
| 367 | } | ||
| 343 | 368 | ||
| 344 | #undef ATOMIC64_OPS | 369 | static inline long long atomic64_fetch_add(long long i, atomic64_t *v) |
| 345 | #undef ATOMIC64_FETCH_OP | 370 | { |
| 346 | #undef ATOMIC64_OP | 371 | long long old, c = 0; |
| 372 | |||
| 373 | while ((old = atomic64_cmpxchg(v, c, c + i)) != c) | ||
| 374 | c = old; | ||
| 375 | |||
| 376 | return old; | ||
| 377 | } | ||
| 378 | |||
| 379 | #define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) | ||
| 347 | 380 | ||
| 348 | #endif /* _ASM_X86_ATOMIC64_32_H */ | 381 | #endif /* _ASM_X86_ATOMIC64_32_H */ |
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 6189a433c9a9..5d9de36a2f04 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h | |||
| @@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) | |||
| 177 | } | 177 | } |
| 178 | 178 | ||
| 179 | #define atomic64_try_cmpxchg atomic64_try_cmpxchg | 179 | #define atomic64_try_cmpxchg atomic64_try_cmpxchg |
| 180 | static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new) | 180 | static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) |
| 181 | { | 181 | { |
| 182 | return try_cmpxchg(&v->counter, old, new); | 182 | return try_cmpxchg(&v->counter, old, new); |
| 183 | } | 183 | } |
| @@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new) | |||
| 198 | */ | 198 | */ |
| 199 | static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) | 199 | static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) |
| 200 | { | 200 | { |
| 201 | long c = atomic64_read(v); | 201 | s64 c = atomic64_read(v); |
| 202 | do { | 202 | do { |
| 203 | if (unlikely(c == u)) | 203 | if (unlikely(c == u)) |
| 204 | return false; | 204 | return false; |
| @@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) | |||
| 217 | */ | 217 | */ |
| 218 | static inline long atomic64_dec_if_positive(atomic64_t *v) | 218 | static inline long atomic64_dec_if_positive(atomic64_t *v) |
| 219 | { | 219 | { |
| 220 | long dec, c = atomic64_read(v); | 220 | s64 dec, c = atomic64_read(v); |
| 221 | do { | 221 | do { |
| 222 | dec = c - 1; | 222 | dec = c - 1; |
| 223 | if (unlikely(dec < 0)) | 223 | if (unlikely(dec < 0)) |
| @@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) | |||
| 226 | return dec; | 226 | return dec; |
| 227 | } | 227 | } |
| 228 | 228 | ||
| 229 | #define ATOMIC64_OP(op) \ | 229 | static inline void atomic64_and(long i, atomic64_t *v) |
| 230 | static inline void atomic64_##op(long i, atomic64_t *v) \ | 230 | { |
| 231 | { \ | 231 | asm volatile(LOCK_PREFIX "andq %1,%0" |
| 232 | asm volatile(LOCK_PREFIX #op"q %1,%0" \ | 232 | : "+m" (v->counter) |
| 233 | : "+m" (v->counter) \ | 233 | : "er" (i) |
| 234 | : "er" (i) \ | 234 | : "memory"); |
| 235 | : "memory"); \ | ||
| 236 | } | 235 | } |
| 237 | 236 | ||
| 238 | #define ATOMIC64_FETCH_OP(op, c_op) \ | 237 | static inline long atomic64_fetch_and(long i, atomic64_t *v) |
| 239 | static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ | 238 | { |
| 240 | { \ | 239 | s64 val = atomic64_read(v); |
| 241 | long val = atomic64_read(v); \ | 240 | |
| 242 | do { \ | 241 | do { |
| 243 | } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \ | 242 | } while (!atomic64_try_cmpxchg(v, &val, val & i)); |
| 244 | return val; \ | 243 | return val; |
| 245 | } | 244 | } |
| 246 | 245 | ||
| 247 | #define ATOMIC64_OPS(op, c_op) \ | 246 | static inline void atomic64_or(long i, atomic64_t *v) |
| 248 | ATOMIC64_OP(op) \ | 247 | { |
| 249 | ATOMIC64_FETCH_OP(op, c_op) | 248 | asm volatile(LOCK_PREFIX "orq %1,%0" |
| 249 | : "+m" (v->counter) | ||
| 250 | : "er" (i) | ||
| 251 | : "memory"); | ||
| 252 | } | ||
| 250 | 253 | ||
| 251 | ATOMIC64_OPS(and, &) | 254 | static inline long atomic64_fetch_or(long i, atomic64_t *v) |
| 252 | ATOMIC64_OPS(or, |) | 255 | { |
| 253 | ATOMIC64_OPS(xor, ^) | 256 | s64 val = atomic64_read(v); |
| 254 | 257 | ||
| 255 | #undef ATOMIC64_OPS | 258 | do { |
| 256 | #undef ATOMIC64_FETCH_OP | 259 | } while (!atomic64_try_cmpxchg(v, &val, val | i)); |
| 257 | #undef ATOMIC64_OP | 260 | return val; |
| 261 | } | ||
| 262 | |||
| 263 | static inline void atomic64_xor(long i, atomic64_t *v) | ||
| 264 | { | ||
| 265 | asm volatile(LOCK_PREFIX "xorq %1,%0" | ||
| 266 | : "+m" (v->counter) | ||
| 267 | : "er" (i) | ||
| 268 | : "memory"); | ||
| 269 | } | ||
| 270 | |||
| 271 | static inline long atomic64_fetch_xor(long i, atomic64_t *v) | ||
| 272 | { | ||
| 273 | s64 val = atomic64_read(v); | ||
| 274 | |||
| 275 | do { | ||
| 276 | } while (!atomic64_try_cmpxchg(v, &val, val ^ i)); | ||
| 277 | return val; | ||
| 278 | } | ||
| 258 | 279 | ||
| 259 | #endif /* _ASM_X86_ATOMIC64_64_H */ | 280 | #endif /* _ASM_X86_ATOMIC64_64_H */ |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index d90296d061e8..b5069e802d5c 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
| @@ -157,7 +157,7 @@ extern void __add_wrong_size(void) | |||
| 157 | #define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ | 157 | #define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ |
| 158 | ({ \ | 158 | ({ \ |
| 159 | bool success; \ | 159 | bool success; \ |
| 160 | __typeof__(_ptr) _old = (_pold); \ | 160 | __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ |
| 161 | __typeof__(*(_ptr)) __old = *_old; \ | 161 | __typeof__(*(_ptr)) __old = *_old; \ |
| 162 | __typeof__(*(_ptr)) __new = (_new); \ | 162 | __typeof__(*(_ptr)) __new = (_new); \ |
| 163 | switch (size) { \ | 163 | switch (size) { \ |
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 48febf07e828..1310e1f1cd65 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
| @@ -69,6 +69,9 @@ build_mmio_write(__writeb, "b", unsigned char, "q", ) | |||
| 69 | build_mmio_write(__writew, "w", unsigned short, "r", ) | 69 | build_mmio_write(__writew, "w", unsigned short, "r", ) |
| 70 | build_mmio_write(__writel, "l", unsigned int, "r", ) | 70 | build_mmio_write(__writel, "l", unsigned int, "r", ) |
| 71 | 71 | ||
| 72 | #define readb readb | ||
| 73 | #define readw readw | ||
| 74 | #define readl readl | ||
| 72 | #define readb_relaxed(a) __readb(a) | 75 | #define readb_relaxed(a) __readb(a) |
| 73 | #define readw_relaxed(a) __readw(a) | 76 | #define readw_relaxed(a) __readw(a) |
| 74 | #define readl_relaxed(a) __readl(a) | 77 | #define readl_relaxed(a) __readl(a) |
| @@ -76,6 +79,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) | |||
| 76 | #define __raw_readw __readw | 79 | #define __raw_readw __readw |
| 77 | #define __raw_readl __readl | 80 | #define __raw_readl __readl |
| 78 | 81 | ||
| 82 | #define writeb writeb | ||
| 83 | #define writew writew | ||
| 84 | #define writel writel | ||
| 79 | #define writeb_relaxed(v, a) __writeb(v, a) | 85 | #define writeb_relaxed(v, a) __writeb(v, a) |
| 80 | #define writew_relaxed(v, a) __writew(v, a) | 86 | #define writew_relaxed(v, a) __writew(v, a) |
| 81 | #define writel_relaxed(v, a) __writel(v, a) | 87 | #define writel_relaxed(v, a) __writel(v, a) |
| @@ -88,13 +94,15 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) | |||
| 88 | #ifdef CONFIG_X86_64 | 94 | #ifdef CONFIG_X86_64 |
| 89 | 95 | ||
| 90 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") | 96 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") |
| 97 | build_mmio_read(__readq, "q", unsigned long, "=r", ) | ||
| 91 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") | 98 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") |
| 99 | build_mmio_write(__writeq, "q", unsigned long, "r", ) | ||
| 92 | 100 | ||
| 93 | #define readq_relaxed(a) readq(a) | 101 | #define readq_relaxed(a) __readq(a) |
| 94 | #define writeq_relaxed(v, a) writeq(v, a) | 102 | #define writeq_relaxed(v, a) __writeq(v, a) |
| 95 | 103 | ||
| 96 | #define __raw_readq(a) readq(a) | 104 | #define __raw_readq __readq |
| 97 | #define __raw_writeq(val, addr) writeq(val, addr) | 105 | #define __raw_writeq __writeq |
| 98 | 106 | ||
| 99 | /* Let people know that we have them */ | 107 | /* Let people know that we have them */ |
| 100 | #define readq readq | 108 | #define readq readq |
| @@ -119,6 +127,7 @@ static inline phys_addr_t virt_to_phys(volatile void *address) | |||
| 119 | { | 127 | { |
| 120 | return __pa(address); | 128 | return __pa(address); |
| 121 | } | 129 | } |
| 130 | #define virt_to_phys virt_to_phys | ||
| 122 | 131 | ||
| 123 | /** | 132 | /** |
| 124 | * phys_to_virt - map physical address to virtual | 133 | * phys_to_virt - map physical address to virtual |
| @@ -137,6 +146,7 @@ static inline void *phys_to_virt(phys_addr_t address) | |||
| 137 | { | 146 | { |
| 138 | return __va(address); | 147 | return __va(address); |
| 139 | } | 148 | } |
| 149 | #define phys_to_virt phys_to_virt | ||
| 140 | 150 | ||
| 141 | /* | 151 | /* |
| 142 | * Change "struct page" to physical address. | 152 | * Change "struct page" to physical address. |
| @@ -169,11 +179,14 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) | |||
| 169 | * else, you probably want one of the following. | 179 | * else, you probably want one of the following. |
| 170 | */ | 180 | */ |
| 171 | extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); | 181 | extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); |
| 182 | #define ioremap_nocache ioremap_nocache | ||
| 172 | extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); | 183 | extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); |
| 173 | #define ioremap_uc ioremap_uc | 184 | #define ioremap_uc ioremap_uc |
| 174 | 185 | ||
| 175 | extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); | 186 | extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); |
| 187 | #define ioremap_cache ioremap_cache | ||
| 176 | extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); | 188 | extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); |
| 189 | #define ioremap_prot ioremap_prot | ||
| 177 | 190 | ||
| 178 | /** | 191 | /** |
| 179 | * ioremap - map bus memory into CPU space | 192 | * ioremap - map bus memory into CPU space |
| @@ -193,8 +206,10 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) | |||
| 193 | { | 206 | { |
| 194 | return ioremap_nocache(offset, size); | 207 | return ioremap_nocache(offset, size); |
| 195 | } | 208 | } |
| 209 | #define ioremap ioremap | ||
| 196 | 210 | ||
| 197 | extern void iounmap(volatile void __iomem *addr); | 211 | extern void iounmap(volatile void __iomem *addr); |
| 212 | #define iounmap iounmap | ||
| 198 | 213 | ||
| 199 | extern void set_iounmap_nonlazy(void); | 214 | extern void set_iounmap_nonlazy(void); |
| 200 | 215 | ||
| @@ -203,53 +218,6 @@ extern void set_iounmap_nonlazy(void); | |||
| 203 | #include <asm-generic/iomap.h> | 218 | #include <asm-generic/iomap.h> |
| 204 | 219 | ||
| 205 | /* | 220 | /* |
| 206 | * Convert a virtual cached pointer to an uncached pointer | ||
| 207 | */ | ||
| 208 | #define xlate_dev_kmem_ptr(p) p | ||
| 209 | |||
| 210 | /** | ||
| 211 | * memset_io Set a range of I/O memory to a constant value | ||
| 212 | * @addr: The beginning of the I/O-memory range to set | ||
| 213 | * @val: The value to set the memory to | ||
| 214 | * @count: The number of bytes to set | ||
| 215 | * | ||
| 216 | * Set a range of I/O memory to a given value. | ||
| 217 | */ | ||
| 218 | static inline void | ||
| 219 | memset_io(volatile void __iomem *addr, unsigned char val, size_t count) | ||
| 220 | { | ||
| 221 | memset((void __force *)addr, val, count); | ||
| 222 | } | ||
| 223 | |||
| 224 | /** | ||
| 225 | * memcpy_fromio Copy a block of data from I/O memory | ||
| 226 | * @dst: The (RAM) destination for the copy | ||
| 227 | * @src: The (I/O memory) source for the data | ||
| 228 | * @count: The number of bytes to copy | ||
| 229 | * | ||
| 230 | * Copy a block of data from I/O memory. | ||
| 231 | */ | ||
| 232 | static inline void | ||
| 233 | memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) | ||
| 234 | { | ||
| 235 | memcpy(dst, (const void __force *)src, count); | ||
| 236 | } | ||
| 237 | |||
| 238 | /** | ||
| 239 | * memcpy_toio Copy a block of data into I/O memory | ||
| 240 | * @dst: The (I/O memory) destination for the copy | ||
| 241 | * @src: The (RAM) source for the data | ||
| 242 | * @count: The number of bytes to copy | ||
| 243 | * | ||
| 244 | * Copy a block of data to I/O memory. | ||
| 245 | */ | ||
| 246 | static inline void | ||
| 247 | memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) | ||
| 248 | { | ||
| 249 | memcpy((void __force *)dst, src, count); | ||
| 250 | } | ||
| 251 | |||
| 252 | /* | ||
| 253 | * ISA space is 'always mapped' on a typical x86 system, no need to | 221 | * ISA space is 'always mapped' on a typical x86 system, no need to |
| 254 | * explicitly ioremap() it. The fact that the ISA IO space is mapped | 222 | * explicitly ioremap() it. The fact that the ISA IO space is mapped |
| 255 | * to PAGE_OFFSET is pure coincidence - it does not mean ISA values | 223 | * to PAGE_OFFSET is pure coincidence - it does not mean ISA values |
| @@ -341,13 +309,38 @@ BUILDIO(b, b, char) | |||
| 341 | BUILDIO(w, w, short) | 309 | BUILDIO(w, w, short) |
| 342 | BUILDIO(l, , int) | 310 | BUILDIO(l, , int) |
| 343 | 311 | ||
| 312 | #define inb inb | ||
| 313 | #define inw inw | ||
| 314 | #define inl inl | ||
| 315 | #define inb_p inb_p | ||
| 316 | #define inw_p inw_p | ||
| 317 | #define inl_p inl_p | ||
| 318 | #define insb insb | ||
| 319 | #define insw insw | ||
| 320 | #define insl insl | ||
| 321 | |||
| 322 | #define outb outb | ||
| 323 | #define outw outw | ||
| 324 | #define outl outl | ||
| 325 | #define outb_p outb_p | ||
| 326 | #define outw_p outw_p | ||
| 327 | #define outl_p outl_p | ||
| 328 | #define outsb outsb | ||
| 329 | #define outsw outsw | ||
| 330 | #define outsl outsl | ||
| 331 | |||
| 344 | extern void *xlate_dev_mem_ptr(phys_addr_t phys); | 332 | extern void *xlate_dev_mem_ptr(phys_addr_t phys); |
| 345 | extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); | 333 | extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); |
| 346 | 334 | ||
| 335 | #define xlate_dev_mem_ptr xlate_dev_mem_ptr | ||
| 336 | #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr | ||
| 337 | |||
| 347 | extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, | 338 | extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, |
| 348 | enum page_cache_mode pcm); | 339 | enum page_cache_mode pcm); |
| 349 | extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); | 340 | extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); |
| 341 | #define ioremap_wc ioremap_wc | ||
| 350 | extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); | 342 | extern void __iomem *ioremap_wt(resource_size_t offset, unsigned long size); |
| 343 | #define ioremap_wt ioremap_wt | ||
| 351 | 344 | ||
| 352 | extern bool is_early_ioremap_ptep(pte_t *ptep); | 345 | extern bool is_early_ioremap_ptep(pte_t *ptep); |
| 353 | 346 | ||
| @@ -365,6 +358,9 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, | |||
| 365 | 358 | ||
| 366 | #define IO_SPACE_LIMIT 0xffff | 359 | #define IO_SPACE_LIMIT 0xffff |
| 367 | 360 | ||
| 361 | #include <asm-generic/io.h> | ||
| 362 | #undef PCI_IOBASE | ||
| 363 | |||
| 368 | #ifdef CONFIG_MTRR | 364 | #ifdef CONFIG_MTRR |
| 369 | extern int __must_check arch_phys_wc_index(int handle); | 365 | extern int __must_check arch_phys_wc_index(int handle); |
| 370 | #define arch_phys_wc_index arch_phys_wc_index | 366 | #define arch_phys_wc_index arch_phys_wc_index |
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h new file mode 100644 index 000000000000..7dc777a6cb40 --- /dev/null +++ b/arch/x86/include/asm/orc_types.h | |||
| @@ -0,0 +1,107 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version 2 | ||
| 7 | * of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #ifndef _ORC_TYPES_H | ||
| 19 | #define _ORC_TYPES_H | ||
| 20 | |||
| 21 | #include <linux/types.h> | ||
| 22 | #include <linux/compiler.h> | ||
| 23 | |||
| 24 | /* | ||
| 25 | * The ORC_REG_* registers are base registers which are used to find other | ||
| 26 | * registers on the stack. | ||
| 27 | * | ||
| 28 | * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the | ||
| 29 | * address of the previous frame: the caller's SP before it called the current | ||
| 30 | * function. | ||
| 31 | * | ||
| 32 | * ORC_REG_UNDEFINED means the corresponding register's value didn't change in | ||
| 33 | * the current frame. | ||
| 34 | * | ||
| 35 | * The most commonly used base registers are SP and BP -- which the previous SP | ||
| 36 | * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is | ||
| 37 | * usually based on. | ||
| 38 | * | ||
| 39 | * The rest of the base registers are needed for special cases like entry code | ||
| 40 | * and GCC realigned stacks. | ||
| 41 | */ | ||
| 42 | #define ORC_REG_UNDEFINED 0 | ||
| 43 | #define ORC_REG_PREV_SP 1 | ||
| 44 | #define ORC_REG_DX 2 | ||
| 45 | #define ORC_REG_DI 3 | ||
| 46 | #define ORC_REG_BP 4 | ||
| 47 | #define ORC_REG_SP 5 | ||
| 48 | #define ORC_REG_R10 6 | ||
| 49 | #define ORC_REG_R13 7 | ||
| 50 | #define ORC_REG_BP_INDIRECT 8 | ||
| 51 | #define ORC_REG_SP_INDIRECT 9 | ||
| 52 | #define ORC_REG_MAX 15 | ||
| 53 | |||
| 54 | /* | ||
| 55 | * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the | ||
| 56 | * caller's SP right before it made the call). Used for all callable | ||
| 57 | * functions, i.e. all C code and all callable asm functions. | ||
| 58 | * | ||
| 59 | * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points | ||
| 60 | * to a fully populated pt_regs from a syscall, interrupt, or exception. | ||
| 61 | * | ||
| 62 | * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset | ||
| 63 | * points to the iret return frame. | ||
| 64 | * | ||
| 65 | * The UNWIND_HINT macros are used only for the unwind_hint struct. They | ||
| 66 | * aren't used in struct orc_entry due to size and complexity constraints. | ||
| 67 | * Objtool converts them to real types when it converts the hints to orc | ||
| 68 | * entries. | ||
| 69 | */ | ||
| 70 | #define ORC_TYPE_CALL 0 | ||
| 71 | #define ORC_TYPE_REGS 1 | ||
| 72 | #define ORC_TYPE_REGS_IRET 2 | ||
| 73 | #define UNWIND_HINT_TYPE_SAVE 3 | ||
| 74 | #define UNWIND_HINT_TYPE_RESTORE 4 | ||
| 75 | |||
| 76 | #ifndef __ASSEMBLY__ | ||
| 77 | /* | ||
| 78 | * This struct is more or less a vastly simplified version of the DWARF Call | ||
| 79 | * Frame Information standard. It contains only the necessary parts of DWARF | ||
| 80 | * CFI, simplified for ease of access by the in-kernel unwinder. It tells the | ||
| 81 | * unwinder how to find the previous SP and BP (and sometimes entry regs) on | ||
| 82 | * the stack for a given code address. Each instance of the struct corresponds | ||
| 83 | * to one or more code locations. | ||
| 84 | */ | ||
| 85 | struct orc_entry { | ||
| 86 | s16 sp_offset; | ||
| 87 | s16 bp_offset; | ||
| 88 | unsigned sp_reg:4; | ||
| 89 | unsigned bp_reg:4; | ||
| 90 | unsigned type:2; | ||
| 91 | }; | ||
| 92 | |||
| 93 | /* | ||
| 94 | * This struct is used by asm and inline asm code to manually annotate the | ||
| 95 | * location of registers on the stack for the ORC unwinder. | ||
| 96 | * | ||
| 97 | * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. | ||
| 98 | */ | ||
| 99 | struct unwind_hint { | ||
| 100 | u32 ip; | ||
| 101 | s16 sp_offset; | ||
| 102 | u8 sp_reg; | ||
| 103 | u8 type; | ||
| 104 | }; | ||
| 105 | #endif /* __ASSEMBLY__ */ | ||
| 106 | |||
| 107 | #endif /* _ORC_TYPES_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 028245e1c42b..0b03d655db7c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -22,6 +22,7 @@ struct vm86; | |||
| 22 | #include <asm/nops.h> | 22 | #include <asm/nops.h> |
| 23 | #include <asm/special_insns.h> | 23 | #include <asm/special_insns.h> |
| 24 | #include <asm/fpu/types.h> | 24 | #include <asm/fpu/types.h> |
| 25 | #include <asm/unwind_hints.h> | ||
| 25 | 26 | ||
| 26 | #include <linux/personality.h> | 27 | #include <linux/personality.h> |
| 27 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
| @@ -684,6 +685,7 @@ static inline void sync_core(void) | |||
| 684 | unsigned int tmp; | 685 | unsigned int tmp; |
| 685 | 686 | ||
| 686 | asm volatile ( | 687 | asm volatile ( |
| 688 | UNWIND_HINT_SAVE | ||
| 687 | "mov %%ss, %0\n\t" | 689 | "mov %%ss, %0\n\t" |
| 688 | "pushq %q0\n\t" | 690 | "pushq %q0\n\t" |
| 689 | "pushq %%rsp\n\t" | 691 | "pushq %%rsp\n\t" |
| @@ -693,6 +695,7 @@ static inline void sync_core(void) | |||
| 693 | "pushq %q0\n\t" | 695 | "pushq %q0\n\t" |
| 694 | "pushq $1f\n\t" | 696 | "pushq $1f\n\t" |
| 695 | "iretq\n\t" | 697 | "iretq\n\t" |
| 698 | UNWIND_HINT_RESTORE | ||
| 696 | "1:" | 699 | "1:" |
| 697 | : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); | 700 | : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); |
| 698 | #endif | 701 | #endif |
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h new file mode 100644 index 000000000000..ff871210b9f2 --- /dev/null +++ b/arch/x86/include/asm/refcount.h | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | #ifndef __ASM_X86_REFCOUNT_H | ||
| 2 | #define __ASM_X86_REFCOUNT_H | ||
| 3 | /* | ||
| 4 | * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from | ||
| 5 | * PaX/grsecurity. | ||
| 6 | */ | ||
| 7 | #include <linux/refcount.h> | ||
| 8 | |||
| 9 | /* | ||
| 10 | * This is the first portion of the refcount error handling, which lives in | ||
| 11 | * .text.unlikely, and is jumped to from the CPU flag check (in the | ||
| 12 | * following macros). This saves the refcount value location into CX for | ||
| 13 | * the exception handler to use (in mm/extable.c), and then triggers the | ||
| 14 | * central refcount exception. The fixup address for the exception points | ||
| 15 | * back to the regular execution flow in .text. | ||
| 16 | */ | ||
| 17 | #define _REFCOUNT_EXCEPTION \ | ||
| 18 | ".pushsection .text.unlikely\n" \ | ||
| 19 | "111:\tlea %[counter], %%" _ASM_CX "\n" \ | ||
| 20 | "112:\t" ASM_UD0 "\n" \ | ||
| 21 | ASM_UNREACHABLE \ | ||
| 22 | ".popsection\n" \ | ||
| 23 | "113:\n" \ | ||
| 24 | _ASM_EXTABLE_REFCOUNT(112b, 113b) | ||
| 25 | |||
| 26 | /* Trigger refcount exception if refcount result is negative. */ | ||
| 27 | #define REFCOUNT_CHECK_LT_ZERO \ | ||
| 28 | "js 111f\n\t" \ | ||
| 29 | _REFCOUNT_EXCEPTION | ||
| 30 | |||
| 31 | /* Trigger refcount exception if refcount result is zero or negative. */ | ||
| 32 | #define REFCOUNT_CHECK_LE_ZERO \ | ||
| 33 | "jz 111f\n\t" \ | ||
| 34 | REFCOUNT_CHECK_LT_ZERO | ||
| 35 | |||
| 36 | /* Trigger refcount exception unconditionally. */ | ||
| 37 | #define REFCOUNT_ERROR \ | ||
| 38 | "jmp 111f\n\t" \ | ||
| 39 | _REFCOUNT_EXCEPTION | ||
| 40 | |||
| 41 | static __always_inline void refcount_add(unsigned int i, refcount_t *r) | ||
| 42 | { | ||
| 43 | asm volatile(LOCK_PREFIX "addl %1,%0\n\t" | ||
| 44 | REFCOUNT_CHECK_LT_ZERO | ||
| 45 | : [counter] "+m" (r->refs.counter) | ||
| 46 | : "ir" (i) | ||
| 47 | : "cc", "cx"); | ||
| 48 | } | ||
| 49 | |||
| 50 | static __always_inline void refcount_inc(refcount_t *r) | ||
| 51 | { | ||
| 52 | asm volatile(LOCK_PREFIX "incl %0\n\t" | ||
| 53 | REFCOUNT_CHECK_LT_ZERO | ||
| 54 | : [counter] "+m" (r->refs.counter) | ||
| 55 | : : "cc", "cx"); | ||
| 56 | } | ||
| 57 | |||
| 58 | static __always_inline void refcount_dec(refcount_t *r) | ||
| 59 | { | ||
| 60 | asm volatile(LOCK_PREFIX "decl %0\n\t" | ||
| 61 | REFCOUNT_CHECK_LE_ZERO | ||
| 62 | : [counter] "+m" (r->refs.counter) | ||
| 63 | : : "cc", "cx"); | ||
| 64 | } | ||
| 65 | |||
| 66 | static __always_inline __must_check | ||
| 67 | bool refcount_sub_and_test(unsigned int i, refcount_t *r) | ||
| 68 | { | ||
| 69 | GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, | ||
| 70 | r->refs.counter, "er", i, "%0", e); | ||
| 71 | } | ||
| 72 | |||
| 73 | static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) | ||
| 74 | { | ||
| 75 | GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, | ||
| 76 | r->refs.counter, "%0", e); | ||
| 77 | } | ||
| 78 | |||
| 79 | static __always_inline __must_check | ||
| 80 | bool refcount_add_not_zero(unsigned int i, refcount_t *r) | ||
| 81 | { | ||
| 82 | int c, result; | ||
| 83 | |||
| 84 | c = atomic_read(&(r->refs)); | ||
| 85 | do { | ||
| 86 | if (unlikely(c == 0)) | ||
| 87 | return false; | ||
| 88 | |||
| 89 | result = c + i; | ||
| 90 | |||
| 91 | /* Did we try to increment from/to an undesirable state? */ | ||
| 92 | if (unlikely(c < 0 || c == INT_MAX || result < c)) { | ||
| 93 | asm volatile(REFCOUNT_ERROR | ||
| 94 | : : [counter] "m" (r->refs.counter) | ||
| 95 | : "cc", "cx"); | ||
| 96 | break; | ||
| 97 | } | ||
| 98 | |||
| 99 | } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); | ||
| 100 | |||
| 101 | return c != 0; | ||
| 102 | } | ||
| 103 | |||
| 104 | static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) | ||
| 105 | { | ||
| 106 | return refcount_add_not_zero(1, r); | ||
| 107 | } | ||
| 108 | |||
| 109 | #endif | ||
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 661dd305694a..045f99211a99 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h | |||
| @@ -1,45 +1,56 @@ | |||
| 1 | #ifndef _ASM_X86_RMWcc | 1 | #ifndef _ASM_X86_RMWcc |
| 2 | #define _ASM_X86_RMWcc | 2 | #define _ASM_X86_RMWcc |
| 3 | 3 | ||
| 4 | #define __CLOBBERS_MEM "memory" | ||
| 5 | #define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx" | ||
| 6 | |||
| 4 | #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) | 7 | #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) |
| 5 | 8 | ||
| 6 | /* Use asm goto */ | 9 | /* Use asm goto */ |
| 7 | 10 | ||
| 8 | #define __GEN_RMWcc(fullop, var, cc, ...) \ | 11 | #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ |
| 9 | do { \ | 12 | do { \ |
| 10 | asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ | 13 | asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ |
| 11 | : : "m" (var), ## __VA_ARGS__ \ | 14 | : : [counter] "m" (var), ## __VA_ARGS__ \ |
| 12 | : "memory" : cc_label); \ | 15 | : clobbers : cc_label); \ |
| 13 | return 0; \ | 16 | return 0; \ |
| 14 | cc_label: \ | 17 | cc_label: \ |
| 15 | return 1; \ | 18 | return 1; \ |
| 16 | } while (0) | 19 | } while (0) |
| 17 | 20 | ||
| 18 | #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ | 21 | #define __BINARY_RMWcc_ARG " %1, " |
| 19 | __GEN_RMWcc(op " " arg0, var, cc) | ||
| 20 | 22 | ||
| 21 | #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ | ||
| 22 | __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) | ||
| 23 | 23 | ||
| 24 | #else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ | 24 | #else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ |
| 25 | 25 | ||
| 26 | /* Use flags output or a set instruction */ | 26 | /* Use flags output or a set instruction */ |
| 27 | 27 | ||
| 28 | #define __GEN_RMWcc(fullop, var, cc, ...) \ | 28 | #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ |
| 29 | do { \ | 29 | do { \ |
| 30 | bool c; \ | 30 | bool c; \ |
| 31 | asm volatile (fullop ";" CC_SET(cc) \ | 31 | asm volatile (fullop ";" CC_SET(cc) \ |
| 32 | : "+m" (var), CC_OUT(cc) (c) \ | 32 | : [counter] "+m" (var), CC_OUT(cc) (c) \ |
| 33 | : __VA_ARGS__ : "memory"); \ | 33 | : __VA_ARGS__ : clobbers); \ |
| 34 | return c; \ | 34 | return c; \ |
| 35 | } while (0) | 35 | } while (0) |
| 36 | 36 | ||
| 37 | #define __BINARY_RMWcc_ARG " %2, " | ||
| 38 | |||
| 39 | #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ | ||
| 40 | |||
| 37 | #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ | 41 | #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ |
| 38 | __GEN_RMWcc(op " " arg0, var, cc) | 42 | __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) |
| 43 | |||
| 44 | #define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ | ||
| 45 | __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ | ||
| 46 | __CLOBBERS_MEM_CC_CX) | ||
| 39 | 47 | ||
| 40 | #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ | 48 | #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ |
| 41 | __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) | 49 | __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ |
| 50 | __CLOBBERS_MEM, vcon (val)) | ||
| 42 | 51 | ||
| 43 | #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ | 52 | #define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ |
| 53 | __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ | ||
| 54 | __CLOBBERS_MEM_CC_CX, vcon (val)) | ||
| 44 | 55 | ||
| 45 | #endif /* _ASM_X86_RMWcc */ | 56 | #endif /* _ASM_X86_RMWcc */ |
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h new file mode 100644 index 000000000000..5e02b11c9b86 --- /dev/null +++ b/arch/x86/include/asm/unwind_hints.h | |||
| @@ -0,0 +1,103 @@ | |||
| 1 | #ifndef _ASM_X86_UNWIND_HINTS_H | ||
| 2 | #define _ASM_X86_UNWIND_HINTS_H | ||
| 3 | |||
| 4 | #include "orc_types.h" | ||
| 5 | |||
| 6 | #ifdef __ASSEMBLY__ | ||
| 7 | |||
| 8 | /* | ||
| 9 | * In asm, there are two kinds of code: normal C-type callable functions and | ||
| 10 | * the rest. The normal callable functions can be called by other code, and | ||
| 11 | * don't do anything unusual with the stack. Such normal callable functions | ||
| 12 | * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this | ||
| 13 | * category. In this case, no special debugging annotations are needed because | ||
| 14 | * objtool can automatically generate the ORC data for the ORC unwinder to read | ||
| 15 | * at runtime. | ||
| 16 | * | ||
| 17 | * Anything which doesn't fall into the above category, such as syscall and | ||
| 18 | * interrupt handlers, tends to not be called directly by other functions, and | ||
| 19 | * often does unusual non-C-function-type things with the stack pointer. Such | ||
| 20 | * code needs to be annotated such that objtool can understand it. The | ||
| 21 | * following CFI hint macros are for this type of code. | ||
| 22 | * | ||
| 23 | * These macros provide hints to objtool about the state of the stack at each | ||
| 24 | * instruction. Objtool starts from the hints and follows the code flow, | ||
| 25 | * making automatic CFI adjustments when it sees pushes and pops, filling out | ||
| 26 | * the debuginfo as necessary. It will also warn if it sees any | ||
| 27 | * inconsistencies. | ||
| 28 | */ | ||
| 29 | .macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL | ||
| 30 | #ifdef CONFIG_STACK_VALIDATION | ||
| 31 | .Lunwind_hint_ip_\@: | ||
| 32 | .pushsection .discard.unwind_hints | ||
| 33 | /* struct unwind_hint */ | ||
| 34 | .long .Lunwind_hint_ip_\@ - . | ||
| 35 | .short \sp_offset | ||
| 36 | .byte \sp_reg | ||
| 37 | .byte \type | ||
| 38 | .popsection | ||
| 39 | #endif | ||
| 40 | .endm | ||
| 41 | |||
| 42 | .macro UNWIND_HINT_EMPTY | ||
| 43 | UNWIND_HINT sp_reg=ORC_REG_UNDEFINED | ||
| 44 | .endm | ||
| 45 | |||
| 46 | .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0 | ||
| 47 | .if \base == %rsp && \indirect | ||
| 48 | .set sp_reg, ORC_REG_SP_INDIRECT | ||
| 49 | .elseif \base == %rsp | ||
| 50 | .set sp_reg, ORC_REG_SP | ||
| 51 | .elseif \base == %rbp | ||
| 52 | .set sp_reg, ORC_REG_BP | ||
| 53 | .elseif \base == %rdi | ||
| 54 | .set sp_reg, ORC_REG_DI | ||
| 55 | .elseif \base == %rdx | ||
| 56 | .set sp_reg, ORC_REG_DX | ||
| 57 | .elseif \base == %r10 | ||
| 58 | .set sp_reg, ORC_REG_R10 | ||
| 59 | .else | ||
| 60 | .error "UNWIND_HINT_REGS: bad base register" | ||
| 61 | .endif | ||
| 62 | |||
| 63 | .set sp_offset, \offset | ||
| 64 | |||
| 65 | .if \iret | ||
| 66 | .set type, ORC_TYPE_REGS_IRET | ||
| 67 | .elseif \extra == 0 | ||
| 68 | .set type, ORC_TYPE_REGS_IRET | ||
| 69 | .set sp_offset, \offset + (16*8) | ||
| 70 | .else | ||
| 71 | .set type, ORC_TYPE_REGS | ||
| 72 | .endif | ||
| 73 | |||
| 74 | UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type | ||
| 75 | .endm | ||
| 76 | |||
| 77 | .macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 | ||
| 78 | UNWIND_HINT_REGS base=\base offset=\offset iret=1 | ||
| 79 | .endm | ||
| 80 | |||
| 81 | .macro UNWIND_HINT_FUNC sp_offset=8 | ||
| 82 | UNWIND_HINT sp_offset=\sp_offset | ||
| 83 | .endm | ||
| 84 | |||
| 85 | #else /* !__ASSEMBLY__ */ | ||
| 86 | |||
| 87 | #define UNWIND_HINT(sp_reg, sp_offset, type) \ | ||
| 88 | "987: \n\t" \ | ||
| 89 | ".pushsection .discard.unwind_hints\n\t" \ | ||
| 90 | /* struct unwind_hint */ \ | ||
| 91 | ".long 987b - .\n\t" \ | ||
| 92 | ".short " __stringify(sp_offset) "\n\t" \ | ||
| 93 | ".byte " __stringify(sp_reg) "\n\t" \ | ||
| 94 | ".byte " __stringify(type) "\n\t" \ | ||
| 95 | ".popsection\n\t" | ||
| 96 | |||
| 97 | #define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE) | ||
| 98 | |||
| 99 | #define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE) | ||
| 100 | |||
| 101 | #endif /* __ASSEMBLY__ */ | ||
| 102 | |||
| 103 | #endif /* _ASM_X86_UNWIND_HINTS_H */ | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index dbce3cca94cb..bd265a4cf108 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
| @@ -94,6 +94,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 94 | if (stack_name) | 94 | if (stack_name) |
| 95 | printk("%s <%s>\n", log_lvl, stack_name); | 95 | printk("%s <%s>\n", log_lvl, stack_name); |
| 96 | 96 | ||
| 97 | if (regs && on_stack(&stack_info, regs, sizeof(*regs))) | ||
| 98 | __show_regs(regs, 0); | ||
| 99 | |||
| 97 | /* | 100 | /* |
| 98 | * Scan the stack, printing any text addresses we find. At the | 101 | * Scan the stack, printing any text addresses we find. At the |
| 99 | * same time, follow proper stack frames with the unwinder. | 102 | * same time, follow proper stack frames with the unwinder. |
| @@ -118,10 +121,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 118 | * Don't print regs->ip again if it was already printed | 121 | * Don't print regs->ip again if it was already printed |
| 119 | * by __show_regs() below. | 122 | * by __show_regs() below. |
| 120 | */ | 123 | */ |
| 121 | if (regs && stack == ®s->ip) { | 124 | if (regs && stack == ®s->ip) |
| 122 | unwind_next_frame(&state); | 125 | goto next; |
| 123 | continue; | ||
| 124 | } | ||
| 125 | 126 | ||
| 126 | if (stack == ret_addr_p) | 127 | if (stack == ret_addr_p) |
| 127 | reliable = 1; | 128 | reliable = 1; |
| @@ -144,6 +145,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 144 | if (!reliable) | 145 | if (!reliable) |
| 145 | continue; | 146 | continue; |
| 146 | 147 | ||
| 148 | next: | ||
| 147 | /* | 149 | /* |
| 148 | * Get the next frame from the unwinder. No need to | 150 | * Get the next frame from the unwinder. No need to |
| 149 | * check for an error: if anything goes wrong, the rest | 151 | * check for an error: if anything goes wrong, the rest |
| @@ -153,7 +155,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
| 153 | 155 | ||
| 154 | /* if the frame has entry regs, print them */ | 156 | /* if the frame has entry regs, print them */ |
| 155 | regs = unwind_get_entry_regs(&state); | 157 | regs = unwind_get_entry_regs(&state); |
| 156 | if (regs) | 158 | if (regs && on_stack(&stack_info, regs, sizeof(*regs))) |
| 157 | __show_regs(regs, 0); | 159 | __show_regs(regs, 0); |
| 158 | } | 160 | } |
| 159 | 161 | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e5f0b40e66d2..4f0481474903 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
| @@ -37,7 +37,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) | |||
| 37 | * This is a software stack, so 'end' can be a valid stack pointer. | 37 | * This is a software stack, so 'end' can be a valid stack pointer. |
| 38 | * It just means the stack is empty. | 38 | * It just means the stack is empty. |
| 39 | */ | 39 | */ |
| 40 | if (stack < begin || stack > end) | 40 | if (stack <= begin || stack > end) |
| 41 | return false; | 41 | return false; |
| 42 | 42 | ||
| 43 | info->type = STACK_TYPE_IRQ; | 43 | info->type = STACK_TYPE_IRQ; |
| @@ -62,7 +62,7 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) | |||
| 62 | * This is a software stack, so 'end' can be a valid stack pointer. | 62 | * This is a software stack, so 'end' can be a valid stack pointer. |
| 63 | * It just means the stack is empty. | 63 | * It just means the stack is empty. |
| 64 | */ | 64 | */ |
| 65 | if (stack < begin || stack > end) | 65 | if (stack <= begin || stack > end) |
| 66 | return false; | 66 | return false; |
| 67 | 67 | ||
| 68 | info->type = STACK_TYPE_SOFTIRQ; | 68 | info->type = STACK_TYPE_SOFTIRQ; |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 3e1471d57487..225af4184f06 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
| @@ -55,7 +55,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info) | |||
| 55 | begin = end - (exception_stack_sizes[k] / sizeof(long)); | 55 | begin = end - (exception_stack_sizes[k] / sizeof(long)); |
| 56 | regs = (struct pt_regs *)end - 1; | 56 | regs = (struct pt_regs *)end - 1; |
| 57 | 57 | ||
| 58 | if (stack < begin || stack >= end) | 58 | if (stack <= begin || stack >= end) |
| 59 | continue; | 59 | continue; |
| 60 | 60 | ||
| 61 | info->type = STACK_TYPE_EXCEPTION + k; | 61 | info->type = STACK_TYPE_EXCEPTION + k; |
| @@ -78,7 +78,7 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info) | |||
| 78 | * This is a software stack, so 'end' can be a valid stack pointer. | 78 | * This is a software stack, so 'end' can be a valid stack pointer. |
| 79 | * It just means the stack is empty. | 79 | * It just means the stack is empty. |
| 80 | */ | 80 | */ |
| 81 | if (stack < begin || stack > end) | 81 | if (stack <= begin || stack > end) |
| 82 | return false; | 82 | return false; |
| 83 | 83 | ||
| 84 | info->type = STACK_TYPE_IRQ; | 84 | info->type = STACK_TYPE_IRQ; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c3169be4c596..2987e3991c2b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -279,6 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 279 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | 279 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
| 280 | unsigned prev_fsindex, prev_gsindex; | 280 | unsigned prev_fsindex, prev_gsindex; |
| 281 | 281 | ||
| 282 | WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && | ||
| 283 | this_cpu_read(irq_count) != -1); | ||
| 284 | |||
| 282 | switch_fpu_prepare(prev_fpu, cpu); | 285 | switch_fpu_prepare(prev_fpu, cpu); |
| 283 | 286 | ||
| 284 | /* We must save %fs and %gs before load_TLS() because | 287 | /* We must save %fs and %gs before load_TLS() because |
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 0ea8afcb929c..761fc88cd820 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
| @@ -36,6 +36,48 @@ bool ex_handler_fault(const struct exception_table_entry *fixup, | |||
| 36 | } | 36 | } |
| 37 | EXPORT_SYMBOL_GPL(ex_handler_fault); | 37 | EXPORT_SYMBOL_GPL(ex_handler_fault); |
| 38 | 38 | ||
| 39 | /* | ||
| 40 | * Handler for UD0 exception following a failed test against the | ||
| 41 | * result of a refcount inc/dec/add/sub. | ||
| 42 | */ | ||
| 43 | bool ex_handler_refcount(const struct exception_table_entry *fixup, | ||
| 44 | struct pt_regs *regs, int trapnr) | ||
| 45 | { | ||
| 46 | /* First unconditionally saturate the refcount. */ | ||
| 47 | *(int *)regs->cx = INT_MIN / 2; | ||
| 48 | |||
| 49 | /* | ||
| 50 | * Strictly speaking, this reports the fixup destination, not | ||
| 51 | * the fault location, and not the actually overflowing | ||
| 52 | * instruction, which is the instruction before the "js", but | ||
| 53 | * since that instruction could be a variety of lengths, just | ||
| 54 | * report the location after the overflow, which should be close | ||
| 55 | * enough for finding the overflow, as it's at least back in | ||
| 56 | * the function, having returned from .text.unlikely. | ||
| 57 | */ | ||
| 58 | regs->ip = ex_fixup_addr(fixup); | ||
| 59 | |||
| 60 | /* | ||
| 61 | * This function has been called because either a negative refcount | ||
| 62 | * value was seen by any of the refcount functions, or a zero | ||
| 63 | * refcount value was seen by refcount_dec(). | ||
| 64 | * | ||
| 65 | * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result | ||
| 66 | * wrapped around) will be set. Additionally, seeing the refcount | ||
| 67 | * reach 0 will set ZF (Zero Flag: result was zero). In each of | ||
| 68 | * these cases we want a report, since it's a boundary condition. | ||
| 69 | * | ||
| 70 | */ | ||
| 71 | if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { | ||
| 72 | bool zero = regs->flags & X86_EFLAGS_ZF; | ||
| 73 | |||
| 74 | refcount_error_report(regs, zero ? "hit zero" : "overflow"); | ||
| 75 | } | ||
| 76 | |||
| 77 | return true; | ||
| 78 | } | ||
| 79 | EXPORT_SYMBOL_GPL(ex_handler_refcount); | ||
| 80 | |||
| 39 | bool ex_handler_ext(const struct exception_table_entry *fixup, | 81 | bool ex_handler_ext(const struct exception_table_entry *fixup, |
| 40 | struct pt_regs *regs, int trapnr) | 82 | struct pt_regs *regs, int trapnr) |
| 41 | { | 83 | { |
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 72bbfccef113..fd4b7f684bd0 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c | |||
| @@ -455,7 +455,11 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa | |||
| 455 | per_cpu(timer_unstable_counter_workaround, i) = wa; | 455 | per_cpu(timer_unstable_counter_workaround, i) = wa; |
| 456 | } | 456 | } |
| 457 | 457 | ||
| 458 | static_branch_enable(&arch_timer_read_ool_enabled); | 458 | /* |
| 459 | * Use the locked version, as we're called from the CPU | ||
| 460 | * hotplug framework. Otherwise, we end-up in deadlock-land. | ||
| 461 | */ | ||
| 462 | static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled); | ||
| 459 | 463 | ||
| 460 | /* | 464 | /* |
| 461 | * Don't use the vdso fastpath if errata require using the | 465 | * Don't use the vdso fastpath if errata require using the |
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d1bd53b73738..e3a81ed66bc2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | 28 | ||
| 29 | #include <linux/debugfs.h> | 29 | #include <linux/debugfs.h> |
| 30 | #include <linux/sort.h> | 30 | #include <linux/sort.h> |
| 31 | #include <linux/sched/mm.h> | ||
| 31 | #include "intel_drv.h" | 32 | #include "intel_drv.h" |
| 32 | 33 | ||
| 33 | static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) | 34 | static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) |
| @@ -4331,7 +4332,7 @@ i915_drop_caches_set(void *data, u64 val) | |||
| 4331 | mutex_unlock(&dev->struct_mutex); | 4332 | mutex_unlock(&dev->struct_mutex); |
| 4332 | } | 4333 | } |
| 4333 | 4334 | ||
| 4334 | lockdep_set_current_reclaim_state(GFP_KERNEL); | 4335 | fs_reclaim_acquire(GFP_KERNEL); |
| 4335 | if (val & DROP_BOUND) | 4336 | if (val & DROP_BOUND) |
| 4336 | i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); | 4337 | i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); |
| 4337 | 4338 | ||
| @@ -4340,7 +4341,7 @@ i915_drop_caches_set(void *data, u64 val) | |||
| 4340 | 4341 | ||
| 4341 | if (val & DROP_SHRINK_ALL) | 4342 | if (val & DROP_SHRINK_ALL) |
| 4342 | i915_gem_shrink_all(dev_priv); | 4343 | i915_gem_shrink_all(dev_priv); |
| 4343 | lockdep_clear_current_reclaim_state(); | 4344 | fs_reclaim_release(GFP_KERNEL); |
| 4344 | 4345 | ||
| 4345 | if (val & DROP_FREED) { | 4346 | if (val & DROP_FREED) { |
| 4346 | synchronize_rcu(); | 4347 | synchronize_rcu(); |
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 3d424a51cabb..f0fd3adb1693 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c | |||
| @@ -446,14 +446,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | |||
| 446 | 446 | ||
| 447 | ovl_path_upper(dentry, &upperpath); | 447 | ovl_path_upper(dentry, &upperpath); |
| 448 | realfile = ovl_path_open(&upperpath, O_RDONLY); | 448 | realfile = ovl_path_open(&upperpath, O_RDONLY); |
| 449 | smp_mb__before_spinlock(); | 449 | |
| 450 | inode_lock(inode); | 450 | inode_lock(inode); |
| 451 | if (!od->upperfile) { | 451 | if (!od->upperfile) { |
| 452 | if (IS_ERR(realfile)) { | 452 | if (IS_ERR(realfile)) { |
| 453 | inode_unlock(inode); | 453 | inode_unlock(inode); |
| 454 | return PTR_ERR(realfile); | 454 | return PTR_ERR(realfile); |
| 455 | } | 455 | } |
| 456 | od->upperfile = realfile; | 456 | smp_store_release(&od->upperfile, realfile); |
| 457 | } else { | 457 | } else { |
| 458 | /* somebody has beaten us to it */ | 458 | /* somebody has beaten us to it */ |
| 459 | if (!IS_ERR(realfile)) | 459 | if (!IS_ERR(realfile)) |
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index b0d5897bc4e6..886085b47c75 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
| @@ -109,27 +109,24 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, | |||
| 109 | goto out; | 109 | goto out; |
| 110 | WRITE_ONCE(uwq->waken, true); | 110 | WRITE_ONCE(uwq->waken, true); |
| 111 | /* | 111 | /* |
| 112 | * The implicit smp_mb__before_spinlock in try_to_wake_up() | 112 | * The Program-Order guarantees provided by the scheduler |
| 113 | * renders uwq->waken visible to other CPUs before the task is | 113 | * ensure uwq->waken is visible before the task is woken. |
| 114 | * waken. | ||
| 115 | */ | 114 | */ |
| 116 | ret = wake_up_state(wq->private, mode); | 115 | ret = wake_up_state(wq->private, mode); |
| 117 | if (ret) | 116 | if (ret) { |
| 118 | /* | 117 | /* |
| 119 | * Wake only once, autoremove behavior. | 118 | * Wake only once, autoremove behavior. |
| 120 | * | 119 | * |
| 121 | * After the effect of list_del_init is visible to the | 120 | * After the effect of list_del_init is visible to the other |
| 122 | * other CPUs, the waitqueue may disappear from under | 121 | * CPUs, the waitqueue may disappear from under us, see the |
| 123 | * us, see the !list_empty_careful() in | 122 | * !list_empty_careful() in handle_userfault(). |
| 124 | * handle_userfault(). try_to_wake_up() has an | 123 | * |
| 125 | * implicit smp_mb__before_spinlock, and the | 124 | * try_to_wake_up() has an implicit smp_mb(), and the |
| 126 | * wq->private is read before calling the extern | 125 | * wq->private is read before calling the extern function |
| 127 | * function "wake_up_state" (which in turns calls | 126 | * "wake_up_state" (which in turns calls try_to_wake_up). |
| 128 | * try_to_wake_up). While the spin_lock;spin_unlock; | ||
| 129 | * wouldn't be enough, the smp_mb__before_spinlock is | ||
| 130 | * enough to avoid an explicit smp_mb() here. | ||
| 131 | */ | 127 | */ |
| 132 | list_del_init(&wq->entry); | 128 | list_del_init(&wq->entry); |
| 129 | } | ||
| 133 | out: | 130 | out: |
| 134 | return ret; | 131 | return ret; |
| 135 | } | 132 | } |
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index dad68bf46c77..8d28eb010d0d 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h | |||
| @@ -21,6 +21,8 @@ typedef struct { | |||
| 21 | extern long long atomic64_read(const atomic64_t *v); | 21 | extern long long atomic64_read(const atomic64_t *v); |
| 22 | extern void atomic64_set(atomic64_t *v, long long i); | 22 | extern void atomic64_set(atomic64_t *v, long long i); |
| 23 | 23 | ||
| 24 | #define atomic64_set_release(v, i) atomic64_set((v), (i)) | ||
| 25 | |||
| 24 | #define ATOMIC64_OP(op) \ | 26 | #define ATOMIC64_OP(op) \ |
| 25 | extern void atomic64_##op(long long a, atomic64_t *v); | 27 | extern void atomic64_##op(long long a, atomic64_t *v); |
| 26 | 28 | ||
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 7ef015eb3403..b4531e3b2120 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h | |||
| @@ -915,6 +915,9 @@ extern void ioport_unmap(void __iomem *p); | |||
| 915 | #endif /* CONFIG_GENERIC_IOMAP */ | 915 | #endif /* CONFIG_GENERIC_IOMAP */ |
| 916 | #endif /* CONFIG_HAS_IOPORT_MAP */ | 916 | #endif /* CONFIG_HAS_IOPORT_MAP */ |
| 917 | 917 | ||
| 918 | /* | ||
| 919 | * Convert a virtual cached pointer to an uncached pointer | ||
| 920 | */ | ||
| 918 | #ifndef xlate_dev_kmem_ptr | 921 | #ifndef xlate_dev_kmem_ptr |
| 919 | #define xlate_dev_kmem_ptr xlate_dev_kmem_ptr | 922 | #define xlate_dev_kmem_ptr xlate_dev_kmem_ptr |
| 920 | static inline void *xlate_dev_kmem_ptr(void *addr) | 923 | static inline void *xlate_dev_kmem_ptr(void *addr) |
| @@ -954,6 +957,14 @@ static inline void *bus_to_virt(unsigned long address) | |||
| 954 | 957 | ||
| 955 | #ifndef memset_io | 958 | #ifndef memset_io |
| 956 | #define memset_io memset_io | 959 | #define memset_io memset_io |
| 960 | /** | ||
| 961 | * memset_io Set a range of I/O memory to a constant value | ||
| 962 | * @addr: The beginning of the I/O-memory range to set | ||
| 963 | * @val: The value to set the memory to | ||
| 964 | * @count: The number of bytes to set | ||
| 965 | * | ||
| 966 | * Set a range of I/O memory to a given value. | ||
| 967 | */ | ||
| 957 | static inline void memset_io(volatile void __iomem *addr, int value, | 968 | static inline void memset_io(volatile void __iomem *addr, int value, |
| 958 | size_t size) | 969 | size_t size) |
| 959 | { | 970 | { |
| @@ -963,6 +974,14 @@ static inline void memset_io(volatile void __iomem *addr, int value, | |||
| 963 | 974 | ||
| 964 | #ifndef memcpy_fromio | 975 | #ifndef memcpy_fromio |
| 965 | #define memcpy_fromio memcpy_fromio | 976 | #define memcpy_fromio memcpy_fromio |
| 977 | /** | ||
| 978 | * memcpy_fromio Copy a block of data from I/O memory | ||
| 979 | * @dst: The (RAM) destination for the copy | ||
| 980 | * @src: The (I/O memory) source for the data | ||
| 981 | * @count: The number of bytes to copy | ||
| 982 | * | ||
| 983 | * Copy a block of data from I/O memory. | ||
| 984 | */ | ||
| 966 | static inline void memcpy_fromio(void *buffer, | 985 | static inline void memcpy_fromio(void *buffer, |
| 967 | const volatile void __iomem *addr, | 986 | const volatile void __iomem *addr, |
| 968 | size_t size) | 987 | size_t size) |
| @@ -973,6 +992,14 @@ static inline void memcpy_fromio(void *buffer, | |||
| 973 | 992 | ||
| 974 | #ifndef memcpy_toio | 993 | #ifndef memcpy_toio |
| 975 | #define memcpy_toio memcpy_toio | 994 | #define memcpy_toio memcpy_toio |
| 995 | /** | ||
| 996 | * memcpy_toio Copy a block of data into I/O memory | ||
| 997 | * @dst: The (I/O memory) destination for the copy | ||
| 998 | * @src: The (RAM) source for the data | ||
| 999 | * @count: The number of bytes to copy | ||
| 1000 | * | ||
| 1001 | * Copy a block of data to I/O memory. | ||
| 1002 | */ | ||
| 976 | static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, | 1003 | static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, |
| 977 | size_t size) | 1004 | size_t size) |
| 978 | { | 1005 | { |
diff --git a/include/linux/atomic.h b/include/linux/atomic.h index c56be7410130..40d6bfec0e0d 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h | |||
| @@ -38,6 +38,9 @@ | |||
| 38 | * Besides, if an arch has a special barrier for acquire/release, it could | 38 | * Besides, if an arch has a special barrier for acquire/release, it could |
| 39 | * implement its own __atomic_op_* and use the same framework for building | 39 | * implement its own __atomic_op_* and use the same framework for building |
| 40 | * variants | 40 | * variants |
| 41 | * | ||
| 42 | * If an architecture overrides __atomic_op_acquire() it will probably want | ||
| 43 | * to define smp_mb__after_spinlock(). | ||
| 41 | */ | 44 | */ |
| 42 | #ifndef __atomic_op_acquire | 45 | #ifndef __atomic_op_acquire |
| 43 | #define __atomic_op_acquire(op, args...) \ | 46 | #define __atomic_op_acquire(op, args...) \ |
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index bdb80c4aef6e..10825052b03f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h | |||
| @@ -203,11 +203,16 @@ | |||
| 203 | 203 | ||
| 204 | #ifdef CONFIG_STACK_VALIDATION | 204 | #ifdef CONFIG_STACK_VALIDATION |
| 205 | #define annotate_unreachable() ({ \ | 205 | #define annotate_unreachable() ({ \ |
| 206 | asm("%c0:\t\n" \ | 206 | asm("%c0:\n\t" \ |
| 207 | ".pushsection .discard.unreachable\t\n" \ | 207 | ".pushsection .discard.unreachable\n\t" \ |
| 208 | ".long %c0b - .\t\n" \ | 208 | ".long %c0b - .\n\t" \ |
| 209 | ".popsection\t\n" : : "i" (__LINE__)); \ | 209 | ".popsection\n\t" : : "i" (__LINE__)); \ |
| 210 | }) | 210 | }) |
| 211 | #define ASM_UNREACHABLE \ | ||
| 212 | "999:\n\t" \ | ||
| 213 | ".pushsection .discard.unreachable\n\t" \ | ||
| 214 | ".long 999b - .\n\t" \ | ||
| 215 | ".popsection\n\t" | ||
| 211 | #else | 216 | #else |
| 212 | #define annotate_unreachable() | 217 | #define annotate_unreachable() |
| 213 | #endif | 218 | #endif |
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index eca8ad75e28b..e25746d88697 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
| @@ -185,6 +185,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, | |||
| 185 | #endif | 185 | #endif |
| 186 | 186 | ||
| 187 | /* Unreachable code */ | 187 | /* Unreachable code */ |
| 188 | #ifndef ASM_UNREACHABLE | ||
| 189 | # define ASM_UNREACHABLE | ||
| 190 | #endif | ||
| 188 | #ifndef unreachable | 191 | #ifndef unreachable |
| 189 | # define unreachable() do { } while (1) | 192 | # define unreachable() do { } while (1) |
| 190 | #endif | 193 | #endif |
diff --git a/include/linux/completion.h b/include/linux/completion.h index 5d5aaae3af43..791f053f28b7 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h | |||
| @@ -9,6 +9,9 @@ | |||
| 9 | */ | 9 | */ |
| 10 | 10 | ||
| 11 | #include <linux/wait.h> | 11 | #include <linux/wait.h> |
| 12 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
| 13 | #include <linux/lockdep.h> | ||
| 14 | #endif | ||
| 12 | 15 | ||
| 13 | /* | 16 | /* |
| 14 | * struct completion - structure used to maintain state for a "completion" | 17 | * struct completion - structure used to maintain state for a "completion" |
| @@ -25,10 +28,50 @@ | |||
| 25 | struct completion { | 28 | struct completion { |
| 26 | unsigned int done; | 29 | unsigned int done; |
| 27 | wait_queue_head_t wait; | 30 | wait_queue_head_t wait; |
| 31 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
| 32 | struct lockdep_map_cross map; | ||
| 33 | #endif | ||
| 28 | }; | 34 | }; |
| 29 | 35 | ||
| 36 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
| 37 | static inline void complete_acquire(struct completion *x) | ||
| 38 | { | ||
| 39 | lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); | ||
| 40 | } | ||
| 41 | |||
| 42 | static inline void complete_release(struct completion *x) | ||
| 43 | { | ||
| 44 | lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_); | ||
| 45 | } | ||
| 46 | |||
| 47 | static inline void complete_release_commit(struct completion *x) | ||
| 48 | { | ||
| 49 | lock_commit_crosslock((struct lockdep_map *)&x->map); | ||
| 50 | } | ||
| 51 | |||
| 52 | #define init_completion(x) \ | ||
| 53 | do { \ | ||
| 54 | static struct lock_class_key __key; \ | ||
| 55 | lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ | ||
| 56 | "(complete)" #x, \ | ||
| 57 | &__key, 0); \ | ||
| 58 | __init_completion(x); \ | ||
| 59 | } while (0) | ||
| 60 | #else | ||
| 61 | #define init_completion(x) __init_completion(x) | ||
| 62 | static inline void complete_acquire(struct completion *x) {} | ||
| 63 | static inline void complete_release(struct completion *x) {} | ||
| 64 | static inline void complete_release_commit(struct completion *x) {} | ||
| 65 | #endif | ||
| 66 | |||
| 67 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | ||
| 68 | #define COMPLETION_INITIALIZER(work) \ | ||
| 69 | { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ | ||
| 70 | STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } | ||
| 71 | #else | ||
| 30 | #define COMPLETION_INITIALIZER(work) \ | 72 | #define COMPLETION_INITIALIZER(work) \ |
| 31 | { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | 73 | { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } |
| 74 | #endif | ||
| 32 | 75 | ||
| 33 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | 76 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ |
| 34 | ({ init_completion(&work); work; }) | 77 | ({ init_completion(&work); work; }) |
| @@ -70,7 +113,7 @@ struct completion { | |||
| 70 | * This inline function will initialize a dynamically created completion | 113 | * This inline function will initialize a dynamically created completion |
| 71 | * structure. | 114 | * structure. |
| 72 | */ | 115 | */ |
| 73 | static inline void init_completion(struct completion *x) | 116 | static inline void __init_completion(struct completion *x) |
| 74 | { | 117 | { |
| 75 | x->done = 0; | 118 | x->done = 0; |
| 76 | init_waitqueue_head(&x->wait); | 119 | init_waitqueue_head(&x->wait); |
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 898cfe2eeb42..e74655d941b7 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
| @@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void) | |||
| 37 | return static_branch_unlikely(&cpusets_enabled_key); | 37 | return static_branch_unlikely(&cpusets_enabled_key); |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | static inline int nr_cpusets(void) | ||
| 41 | { | ||
| 42 | /* jump label reference count + the top-level cpuset */ | ||
| 43 | return static_key_count(&cpusets_enabled_key.key) + 1; | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void cpuset_inc(void) | 40 | static inline void cpuset_inc(void) |
| 47 | { | 41 | { |
| 48 | static_branch_inc(&cpusets_pre_enable_key); | 42 | static_branch_inc(&cpusets_pre_enable_key); |
diff --git a/include/linux/futex.h b/include/linux/futex.h index 7c5b694864cd..f36bfd26f998 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
| @@ -54,7 +54,6 @@ union futex_key { | |||
| 54 | 54 | ||
| 55 | #ifdef CONFIG_FUTEX | 55 | #ifdef CONFIG_FUTEX |
| 56 | extern void exit_robust_list(struct task_struct *curr); | 56 | extern void exit_robust_list(struct task_struct *curr); |
| 57 | extern void exit_pi_state_list(struct task_struct *curr); | ||
| 58 | #ifdef CONFIG_HAVE_FUTEX_CMPXCHG | 57 | #ifdef CONFIG_HAVE_FUTEX_CMPXCHG |
| 59 | #define futex_cmpxchg_enabled 1 | 58 | #define futex_cmpxchg_enabled 1 |
| 60 | #else | 59 | #else |
| @@ -64,8 +63,14 @@ extern int futex_cmpxchg_enabled; | |||
| 64 | static inline void exit_robust_list(struct task_struct *curr) | 63 | static inline void exit_robust_list(struct task_struct *curr) |
| 65 | { | 64 | { |
| 66 | } | 65 | } |
| 66 | #endif | ||
| 67 | |||
| 68 | #ifdef CONFIG_FUTEX_PI | ||
| 69 | extern void exit_pi_state_list(struct task_struct *curr); | ||
| 70 | #else | ||
| 67 | static inline void exit_pi_state_list(struct task_struct *curr) | 71 | static inline void exit_pi_state_list(struct task_struct *curr) |
| 68 | { | 72 | { |
| 69 | } | 73 | } |
| 70 | #endif | 74 | #endif |
| 75 | |||
| 71 | #endif | 76 | #endif |
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5dd1272d1ab2..5fdd93bb9300 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h | |||
| @@ -23,10 +23,26 @@ | |||
| 23 | # define trace_softirq_context(p) ((p)->softirq_context) | 23 | # define trace_softirq_context(p) ((p)->softirq_context) |
| 24 | # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) | 24 | # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) |
| 25 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) | 25 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) |
| 26 | # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) | 26 | # define trace_hardirq_enter() \ |
| 27 | # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) | 27 | do { \ |
| 28 | # define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | 28 | current->hardirq_context++; \ |
| 29 | # define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | 29 | crossrelease_hist_start(XHLOCK_HARD); \ |
| 30 | } while (0) | ||
| 31 | # define trace_hardirq_exit() \ | ||
| 32 | do { \ | ||
| 33 | current->hardirq_context--; \ | ||
| 34 | crossrelease_hist_end(XHLOCK_HARD); \ | ||
| 35 | } while (0) | ||
| 36 | # define lockdep_softirq_enter() \ | ||
| 37 | do { \ | ||
| 38 | current->softirq_context++; \ | ||
| 39 | crossrelease_hist_start(XHLOCK_SOFT); \ | ||
| 40 | } while (0) | ||
| 41 | # define lockdep_softirq_exit() \ | ||
| 42 | do { \ | ||
| 43 | current->softirq_context--; \ | ||
| 44 | crossrelease_hist_end(XHLOCK_SOFT); \ | ||
| 45 | } while (0) | ||
| 30 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | 46 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, |
| 31 | #else | 47 | #else |
| 32 | # define trace_hardirqs_on() do { } while (0) | 48 | # define trace_hardirqs_on() do { } while (0) |
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 2afd74b9d844..cd5861651b17 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h | |||
| @@ -163,6 +163,8 @@ extern void jump_label_apply_nops(struct module *mod); | |||
| 163 | extern int static_key_count(struct static_key *key); | 163 | extern int static_key_count(struct static_key *key); |
| 164 | extern void static_key_enable(struct static_key *key); | 164 | extern void static_key_enable(struct static_key *key); |
| 165 | extern void static_key_disable(struct static_key *key); | 165 | extern void static_key_disable(struct static_key *key); |
| 166 | extern void static_key_enable_cpuslocked(struct static_key *key); | ||
| 167 | extern void static_key_disable_cpuslocked(struct static_key *key); | ||
| 166 | 168 | ||
| 167 | /* | 169 | /* |
| 168 | * We should be using ATOMIC_INIT() for initializing .enabled, but | 170 | * We should be using ATOMIC_INIT() for initializing .enabled, but |
| @@ -234,24 +236,29 @@ static inline int jump_label_apply_nops(struct module *mod) | |||
| 234 | 236 | ||
| 235 | static inline void static_key_enable(struct static_key *key) | 237 | static inline void static_key_enable(struct static_key *key) |
| 236 | { | 238 | { |
| 237 | int count = static_key_count(key); | 239 | STATIC_KEY_CHECK_USE(); |
| 238 | |||
| 239 | WARN_ON_ONCE(count < 0 || count > 1); | ||
| 240 | 240 | ||
| 241 | if (!count) | 241 | if (atomic_read(&key->enabled) != 0) { |
| 242 | static_key_slow_inc(key); | 242 | WARN_ON_ONCE(atomic_read(&key->enabled) != 1); |
| 243 | return; | ||
| 244 | } | ||
| 245 | atomic_set(&key->enabled, 1); | ||
| 243 | } | 246 | } |
| 244 | 247 | ||
| 245 | static inline void static_key_disable(struct static_key *key) | 248 | static inline void static_key_disable(struct static_key *key) |
| 246 | { | 249 | { |
| 247 | int count = static_key_count(key); | 250 | STATIC_KEY_CHECK_USE(); |
| 248 | |||
| 249 | WARN_ON_ONCE(count < 0 || count > 1); | ||
| 250 | 251 | ||
| 251 | if (count) | 252 | if (atomic_read(&key->enabled) != 1) { |
| 252 | static_key_slow_dec(key); | 253 | WARN_ON_ONCE(atomic_read(&key->enabled) != 0); |
| 254 | return; | ||
| 255 | } | ||
| 256 | atomic_set(&key->enabled, 0); | ||
| 253 | } | 257 | } |
| 254 | 258 | ||
| 259 | #define static_key_enable_cpuslocked(k) static_key_enable((k)) | ||
| 260 | #define static_key_disable_cpuslocked(k) static_key_disable((k)) | ||
| 261 | |||
| 255 | #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } | 262 | #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } |
| 256 | #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } | 263 | #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } |
| 257 | 264 | ||
| @@ -413,8 +420,10 @@ extern bool ____wrong_branch_error(void); | |||
| 413 | * Normal usage; boolean enable/disable. | 420 | * Normal usage; boolean enable/disable. |
| 414 | */ | 421 | */ |
| 415 | 422 | ||
| 416 | #define static_branch_enable(x) static_key_enable(&(x)->key) | 423 | #define static_branch_enable(x) static_key_enable(&(x)->key) |
| 417 | #define static_branch_disable(x) static_key_disable(&(x)->key) | 424 | #define static_branch_disable(x) static_key_disable(&(x)->key) |
| 425 | #define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) | ||
| 426 | #define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) | ||
| 418 | 427 | ||
| 419 | #endif /* __ASSEMBLY__ */ | 428 | #endif /* __ASSEMBLY__ */ |
| 420 | 429 | ||
diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h index b7f8aced7870..41960fecf783 100644 --- a/include/linux/kasan-checks.h +++ b/include/linux/kasan-checks.h | |||
| @@ -2,11 +2,13 @@ | |||
| 2 | #define _LINUX_KASAN_CHECKS_H | 2 | #define _LINUX_KASAN_CHECKS_H |
| 3 | 3 | ||
| 4 | #ifdef CONFIG_KASAN | 4 | #ifdef CONFIG_KASAN |
| 5 | void kasan_check_read(const void *p, unsigned int size); | 5 | void kasan_check_read(const volatile void *p, unsigned int size); |
| 6 | void kasan_check_write(const void *p, unsigned int size); | 6 | void kasan_check_write(const volatile void *p, unsigned int size); |
| 7 | #else | 7 | #else |
| 8 | static inline void kasan_check_read(const void *p, unsigned int size) { } | 8 | static inline void kasan_check_read(const volatile void *p, unsigned int size) |
| 9 | static inline void kasan_check_write(const void *p, unsigned int size) { } | 9 | { } |
| 10 | static inline void kasan_check_write(const volatile void *p, unsigned int size) | ||
| 11 | { } | ||
| 10 | #endif | 12 | #endif |
| 11 | 13 | ||
| 12 | #endif | 14 | #endif |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bd6d96cf80b1..6607225d0ea4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
| @@ -277,6 +277,13 @@ extern int oops_may_print(void); | |||
| 277 | void do_exit(long error_code) __noreturn; | 277 | void do_exit(long error_code) __noreturn; |
| 278 | void complete_and_exit(struct completion *, long) __noreturn; | 278 | void complete_and_exit(struct completion *, long) __noreturn; |
| 279 | 279 | ||
| 280 | #ifdef CONFIG_ARCH_HAS_REFCOUNT | ||
| 281 | void refcount_error_report(struct pt_regs *regs, const char *err); | ||
| 282 | #else | ||
| 283 | static inline void refcount_error_report(struct pt_regs *regs, const char *err) | ||
| 284 | { } | ||
| 285 | #endif | ||
| 286 | |||
| 280 | /* Internal, do not use. */ | 287 | /* Internal, do not use. */ |
| 281 | int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); | 288 | int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); |
| 282 | int __must_check _kstrtol(const char *s, unsigned int base, long *res); | 289 | int __must_check _kstrtol(const char *s, unsigned int base, long *res); |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fffe49f188e6..fc827cab6d6e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
| @@ -29,7 +29,7 @@ extern int lock_stat; | |||
| 29 | * We'd rather not expose kernel/lockdep_states.h this wide, but we do need | 29 | * We'd rather not expose kernel/lockdep_states.h this wide, but we do need |
| 30 | * the total number of states... :-( | 30 | * the total number of states... :-( |
| 31 | */ | 31 | */ |
| 32 | #define XXX_LOCK_USAGE_STATES (1+3*4) | 32 | #define XXX_LOCK_USAGE_STATES (1+2*4) |
| 33 | 33 | ||
| 34 | /* | 34 | /* |
| 35 | * NR_LOCKDEP_CACHING_CLASSES ... Number of classes | 35 | * NR_LOCKDEP_CACHING_CLASSES ... Number of classes |
| @@ -155,6 +155,12 @@ struct lockdep_map { | |||
| 155 | int cpu; | 155 | int cpu; |
| 156 | unsigned long ip; | 156 | unsigned long ip; |
| 157 | #endif | 157 | #endif |
| 158 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 159 | /* | ||
| 160 | * Whether it's a crosslock. | ||
| 161 | */ | ||
| 162 | int cross; | ||
| 163 | #endif | ||
| 158 | }; | 164 | }; |
| 159 | 165 | ||
| 160 | static inline void lockdep_copy_map(struct lockdep_map *to, | 166 | static inline void lockdep_copy_map(struct lockdep_map *to, |
| @@ -258,8 +264,95 @@ struct held_lock { | |||
| 258 | unsigned int hardirqs_off:1; | 264 | unsigned int hardirqs_off:1; |
| 259 | unsigned int references:12; /* 32 bits */ | 265 | unsigned int references:12; /* 32 bits */ |
| 260 | unsigned int pin_count; | 266 | unsigned int pin_count; |
| 267 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 268 | /* | ||
| 269 | * Generation id. | ||
| 270 | * | ||
| 271 | * A value of cross_gen_id will be stored when holding this, | ||
| 272 | * which is globally increased whenever each crosslock is held. | ||
| 273 | */ | ||
| 274 | unsigned int gen_id; | ||
| 275 | #endif | ||
| 276 | }; | ||
| 277 | |||
| 278 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 279 | #define MAX_XHLOCK_TRACE_ENTRIES 5 | ||
| 280 | |||
| 281 | /* | ||
| 282 | * This is for keeping locks waiting for commit so that true dependencies | ||
| 283 | * can be added at commit step. | ||
| 284 | */ | ||
| 285 | struct hist_lock { | ||
| 286 | /* | ||
| 287 | * Id for each entry in the ring buffer. This is used to | ||
| 288 | * decide whether the ring buffer was overwritten or not. | ||
| 289 | * | ||
| 290 | * For example, | ||
| 291 | * | ||
| 292 | * |<----------- hist_lock ring buffer size ------->| | ||
| 293 | * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii | ||
| 294 | * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii....................... | ||
| 295 | * | ||
| 296 | * where 'p' represents an acquisition in process | ||
| 297 | * context, 'i' represents an acquisition in irq | ||
| 298 | * context. | ||
| 299 | * | ||
| 300 | * In this example, the ring buffer was overwritten by | ||
| 301 | * acquisitions in irq context, that should be detected on | ||
| 302 | * rollback or commit. | ||
| 303 | */ | ||
| 304 | unsigned int hist_id; | ||
| 305 | |||
| 306 | /* | ||
| 307 | * Seperate stack_trace data. This will be used at commit step. | ||
| 308 | */ | ||
| 309 | struct stack_trace trace; | ||
| 310 | unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES]; | ||
| 311 | |||
| 312 | /* | ||
| 313 | * Seperate hlock instance. This will be used at commit step. | ||
| 314 | * | ||
| 315 | * TODO: Use a smaller data structure containing only necessary | ||
| 316 | * data. However, we should make lockdep code able to handle the | ||
| 317 | * smaller one first. | ||
| 318 | */ | ||
| 319 | struct held_lock hlock; | ||
| 320 | }; | ||
| 321 | |||
| 322 | /* | ||
| 323 | * To initialize a lock as crosslock, lockdep_init_map_crosslock() should | ||
| 324 | * be called instead of lockdep_init_map(). | ||
| 325 | */ | ||
| 326 | struct cross_lock { | ||
| 327 | /* | ||
| 328 | * When more than one acquisition of crosslocks are overlapped, | ||
| 329 | * we have to perform commit for them based on cross_gen_id of | ||
| 330 | * the first acquisition, which allows us to add more true | ||
| 331 | * dependencies. | ||
| 332 | * | ||
| 333 | * Moreover, when no acquisition of a crosslock is in progress, | ||
| 334 | * we should not perform commit because the lock might not exist | ||
| 335 | * any more, which might cause incorrect memory access. So we | ||
| 336 | * have to track the number of acquisitions of a crosslock. | ||
| 337 | */ | ||
| 338 | int nr_acquire; | ||
| 339 | |||
| 340 | /* | ||
| 341 | * Seperate hlock instance. This will be used at commit step. | ||
| 342 | * | ||
| 343 | * TODO: Use a smaller data structure containing only necessary | ||
| 344 | * data. However, we should make lockdep code able to handle the | ||
| 345 | * smaller one first. | ||
| 346 | */ | ||
| 347 | struct held_lock hlock; | ||
| 261 | }; | 348 | }; |
| 262 | 349 | ||
| 350 | struct lockdep_map_cross { | ||
| 351 | struct lockdep_map map; | ||
| 352 | struct cross_lock xlock; | ||
| 353 | }; | ||
| 354 | #endif | ||
| 355 | |||
| 263 | /* | 356 | /* |
| 264 | * Initialization, self-test and debugging-output methods: | 357 | * Initialization, self-test and debugging-output methods: |
| 265 | */ | 358 | */ |
| @@ -282,13 +375,6 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
| 282 | struct lock_class_key *key, int subclass); | 375 | struct lock_class_key *key, int subclass); |
| 283 | 376 | ||
| 284 | /* | 377 | /* |
| 285 | * To initialize a lockdep_map statically use this macro. | ||
| 286 | * Note that _name must not be NULL. | ||
| 287 | */ | ||
| 288 | #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ | ||
| 289 | { .name = (_name), .key = (void *)(_key), } | ||
| 290 | |||
| 291 | /* | ||
| 292 | * Reinitialize a lock key - for cases where there is special locking or | 378 | * Reinitialize a lock key - for cases where there is special locking or |
| 293 | * special initialization of locks so that the validator gets the scope | 379 | * special initialization of locks so that the validator gets the scope |
| 294 | * of dependencies wrong: they are either too broad (they need a class-split) | 380 | * of dependencies wrong: they are either too broad (they need a class-split) |
| @@ -363,10 +449,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock, | |||
| 363 | 449 | ||
| 364 | extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); | 450 | extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); |
| 365 | 451 | ||
| 366 | extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); | ||
| 367 | extern void lockdep_clear_current_reclaim_state(void); | ||
| 368 | extern void lockdep_trace_alloc(gfp_t mask); | ||
| 369 | |||
| 370 | struct pin_cookie { unsigned int val; }; | 452 | struct pin_cookie { unsigned int val; }; |
| 371 | 453 | ||
| 372 | #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } | 454 | #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } |
| @@ -375,7 +457,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); | |||
| 375 | extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); | 457 | extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); |
| 376 | extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); | 458 | extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); |
| 377 | 459 | ||
| 378 | # define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, | 460 | # define INIT_LOCKDEP .lockdep_recursion = 0, |
| 379 | 461 | ||
| 380 | #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) | 462 | #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) |
| 381 | 463 | ||
| @@ -416,9 +498,6 @@ static inline void lockdep_on(void) | |||
| 416 | # define lock_downgrade(l, i) do { } while (0) | 498 | # define lock_downgrade(l, i) do { } while (0) |
| 417 | # define lock_set_class(l, n, k, s, i) do { } while (0) | 499 | # define lock_set_class(l, n, k, s, i) do { } while (0) |
| 418 | # define lock_set_subclass(l, s, i) do { } while (0) | 500 | # define lock_set_subclass(l, s, i) do { } while (0) |
| 419 | # define lockdep_set_current_reclaim_state(g) do { } while (0) | ||
| 420 | # define lockdep_clear_current_reclaim_state() do { } while (0) | ||
| 421 | # define lockdep_trace_alloc(g) do { } while (0) | ||
| 422 | # define lockdep_info() do { } while (0) | 501 | # define lockdep_info() do { } while (0) |
| 423 | # define lockdep_init_map(lock, name, key, sub) \ | 502 | # define lockdep_init_map(lock, name, key, sub) \ |
| 424 | do { (void)(name); (void)(key); } while (0) | 503 | do { (void)(name); (void)(key); } while (0) |
| @@ -467,6 +546,57 @@ struct pin_cookie { }; | |||
| 467 | 546 | ||
| 468 | #endif /* !LOCKDEP */ | 547 | #endif /* !LOCKDEP */ |
| 469 | 548 | ||
| 549 | enum xhlock_context_t { | ||
| 550 | XHLOCK_HARD, | ||
| 551 | XHLOCK_SOFT, | ||
| 552 | XHLOCK_PROC, | ||
| 553 | XHLOCK_CTX_NR, | ||
| 554 | }; | ||
| 555 | |||
| 556 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 557 | extern void lockdep_init_map_crosslock(struct lockdep_map *lock, | ||
| 558 | const char *name, | ||
| 559 | struct lock_class_key *key, | ||
| 560 | int subclass); | ||
| 561 | extern void lock_commit_crosslock(struct lockdep_map *lock); | ||
| 562 | |||
| 563 | /* | ||
| 564 | * What we essencially have to initialize is 'nr_acquire'. Other members | ||
| 565 | * will be initialized in add_xlock(). | ||
| 566 | */ | ||
| 567 | #define STATIC_CROSS_LOCK_INIT() \ | ||
| 568 | { .nr_acquire = 0,} | ||
| 569 | |||
| 570 | #define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \ | ||
| 571 | { .map.name = (_name), .map.key = (void *)(_key), \ | ||
| 572 | .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), } | ||
| 573 | |||
| 574 | /* | ||
| 575 | * To initialize a lockdep_map statically use this macro. | ||
| 576 | * Note that _name must not be NULL. | ||
| 577 | */ | ||
| 578 | #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ | ||
| 579 | { .name = (_name), .key = (void *)(_key), .cross = 0, } | ||
| 580 | |||
| 581 | extern void crossrelease_hist_start(enum xhlock_context_t c); | ||
| 582 | extern void crossrelease_hist_end(enum xhlock_context_t c); | ||
| 583 | extern void lockdep_init_task(struct task_struct *task); | ||
| 584 | extern void lockdep_free_task(struct task_struct *task); | ||
| 585 | #else | ||
| 586 | #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) | ||
| 587 | /* | ||
| 588 | * To initialize a lockdep_map statically use this macro. | ||
| 589 | * Note that _name must not be NULL. | ||
| 590 | */ | ||
| 591 | #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ | ||
| 592 | { .name = (_name), .key = (void *)(_key), } | ||
| 593 | |||
| 594 | static inline void crossrelease_hist_start(enum xhlock_context_t c) {} | ||
| 595 | static inline void crossrelease_hist_end(enum xhlock_context_t c) {} | ||
| 596 | static inline void lockdep_init_task(struct task_struct *task) {} | ||
| 597 | static inline void lockdep_free_task(struct task_struct *task) {} | ||
| 598 | #endif | ||
| 599 | |||
| 470 | #ifdef CONFIG_LOCK_STAT | 600 | #ifdef CONFIG_LOCK_STAT |
| 471 | 601 | ||
| 472 | extern void lock_contended(struct lockdep_map *lock, unsigned long ip); | 602 | extern void lock_contended(struct lockdep_map *lock, unsigned long ip); |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3cadee0a3508..dc1edec05a3f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
| @@ -535,6 +535,10 @@ extern void tlb_finish_mmu(struct mmu_gather *tlb, | |||
| 535 | */ | 535 | */ |
| 536 | static inline bool mm_tlb_flush_pending(struct mm_struct *mm) | 536 | static inline bool mm_tlb_flush_pending(struct mm_struct *mm) |
| 537 | { | 537 | { |
| 538 | /* | ||
| 539 | * Must be called with PTL held; such that our PTL acquire will have | ||
| 540 | * observed the store from set_tlb_flush_pending(). | ||
| 541 | */ | ||
| 538 | return atomic_read(&mm->tlb_flush_pending) > 0; | 542 | return atomic_read(&mm->tlb_flush_pending) > 0; |
| 539 | } | 543 | } |
| 540 | 544 | ||
| @@ -556,10 +560,29 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm) | |||
| 556 | atomic_inc(&mm->tlb_flush_pending); | 560 | atomic_inc(&mm->tlb_flush_pending); |
| 557 | 561 | ||
| 558 | /* | 562 | /* |
| 559 | * Guarantee that the tlb_flush_pending increase does not leak into the | 563 | * The only time this value is relevant is when there are indeed pages |
| 560 | * critical section updating the page tables | 564 | * to flush. And we'll only flush pages after changing them, which |
| 565 | * requires the PTL. | ||
| 566 | * | ||
| 567 | * So the ordering here is: | ||
| 568 | * | ||
| 569 | * atomic_inc(&mm->tlb_flush_pending); | ||
| 570 | * spin_lock(&ptl); | ||
| 571 | * ... | ||
| 572 | * set_pte_at(); | ||
| 573 | * spin_unlock(&ptl); | ||
| 574 | * | ||
| 575 | * spin_lock(&ptl) | ||
| 576 | * mm_tlb_flush_pending(); | ||
| 577 | * .... | ||
| 578 | * spin_unlock(&ptl); | ||
| 579 | * | ||
| 580 | * flush_tlb_range(); | ||
| 581 | * atomic_dec(&mm->tlb_flush_pending); | ||
| 582 | * | ||
| 583 | * So the =true store is constrained by the PTL unlock, and the =false | ||
| 584 | * store is constrained by the TLB invalidate. | ||
| 561 | */ | 585 | */ |
| 562 | smp_mb__before_spinlock(); | ||
| 563 | } | 586 | } |
| 564 | 587 | ||
| 565 | /* Clearing is done after a TLB flush, which also provides a barrier. */ | 588 | /* Clearing is done after a TLB flush, which also provides a barrier. */ |
diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 591792c8e5b0..48b7c9c68c4d 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h | |||
| @@ -53,6 +53,9 @@ extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); | |||
| 53 | extern __must_check bool refcount_dec_and_test(refcount_t *r); | 53 | extern __must_check bool refcount_dec_and_test(refcount_t *r); |
| 54 | extern void refcount_dec(refcount_t *r); | 54 | extern void refcount_dec(refcount_t *r); |
| 55 | #else | 55 | #else |
| 56 | # ifdef CONFIG_ARCH_HAS_REFCOUNT | ||
| 57 | # include <asm/refcount.h> | ||
| 58 | # else | ||
| 56 | static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) | 59 | static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) |
| 57 | { | 60 | { |
| 58 | return atomic_add_unless(&r->refs, i, 0); | 61 | return atomic_add_unless(&r->refs, i, 0); |
| @@ -87,6 +90,7 @@ static inline void refcount_dec(refcount_t *r) | |||
| 87 | { | 90 | { |
| 88 | atomic_dec(&r->refs); | 91 | atomic_dec(&r->refs); |
| 89 | } | 92 | } |
| 93 | # endif /* !CONFIG_ARCH_HAS_REFCOUNT */ | ||
| 90 | #endif /* CONFIG_REFCOUNT_FULL */ | 94 | #endif /* CONFIG_REFCOUNT_FULL */ |
| 91 | 95 | ||
| 92 | extern __must_check bool refcount_dec_if_one(refcount_t *r); | 96 | extern __must_check bool refcount_dec_if_one(refcount_t *r); |
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index ae0528b834cd..e784761a4443 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h | |||
| @@ -32,6 +32,7 @@ struct rw_semaphore { | |||
| 32 | #define RWSEM_UNLOCKED_VALUE 0x00000000 | 32 | #define RWSEM_UNLOCKED_VALUE 0x00000000 |
| 33 | 33 | ||
| 34 | extern void __down_read(struct rw_semaphore *sem); | 34 | extern void __down_read(struct rw_semaphore *sem); |
| 35 | extern int __must_check __down_read_killable(struct rw_semaphore *sem); | ||
| 35 | extern int __down_read_trylock(struct rw_semaphore *sem); | 36 | extern int __down_read_trylock(struct rw_semaphore *sem); |
| 36 | extern void __down_write(struct rw_semaphore *sem); | 37 | extern void __down_write(struct rw_semaphore *sem); |
| 37 | extern int __must_check __down_write_killable(struct rw_semaphore *sem); | 38 | extern int __must_check __down_write_killable(struct rw_semaphore *sem); |
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index dd1d14250340..0ad7318ff299 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h | |||
| @@ -44,6 +44,7 @@ struct rw_semaphore { | |||
| 44 | }; | 44 | }; |
| 45 | 45 | ||
| 46 | extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); | 46 | extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); |
| 47 | extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); | ||
| 47 | extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); | 48 | extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); |
| 48 | extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); | 49 | extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); |
| 49 | extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); | 50 | extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index c05ac5f5aa03..93be319e0cbf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -846,7 +846,17 @@ struct task_struct { | |||
| 846 | int lockdep_depth; | 846 | int lockdep_depth; |
| 847 | unsigned int lockdep_recursion; | 847 | unsigned int lockdep_recursion; |
| 848 | struct held_lock held_locks[MAX_LOCK_DEPTH]; | 848 | struct held_lock held_locks[MAX_LOCK_DEPTH]; |
| 849 | gfp_t lockdep_reclaim_gfp; | 849 | #endif |
| 850 | |||
| 851 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 852 | #define MAX_XHLOCKS_NR 64UL | ||
| 853 | struct hist_lock *xhlocks; /* Crossrelease history locks */ | ||
| 854 | unsigned int xhlock_idx; | ||
| 855 | /* For restoring at history boundaries */ | ||
| 856 | unsigned int xhlock_idx_hist[XHLOCK_CTX_NR]; | ||
| 857 | unsigned int hist_id; | ||
| 858 | /* For overwrite check at each context exit */ | ||
| 859 | unsigned int hist_id_save[XHLOCK_CTX_NR]; | ||
| 850 | #endif | 860 | #endif |
| 851 | 861 | ||
| 852 | #ifdef CONFIG_UBSAN | 862 | #ifdef CONFIG_UBSAN |
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 2b24a6974847..2b0a281f9d26 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h | |||
| @@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags) | |||
| 167 | return flags; | 167 | return flags; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | #ifdef CONFIG_LOCKDEP | ||
| 171 | extern void fs_reclaim_acquire(gfp_t gfp_mask); | ||
| 172 | extern void fs_reclaim_release(gfp_t gfp_mask); | ||
| 173 | #else | ||
| 174 | static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } | ||
| 175 | static inline void fs_reclaim_release(gfp_t gfp_mask) { } | ||
| 176 | #endif | ||
| 177 | |||
| 170 | static inline unsigned int memalloc_noio_save(void) | 178 | static inline unsigned int memalloc_noio_save(void) |
| 171 | { | 179 | { |
| 172 | unsigned int flags = current->flags & PF_MEMALLOC_NOIO; | 180 | unsigned int flags = current->flags & PF_MEMALLOC_NOIO; |
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d9510e8522d4..4e8cce19b507 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h | |||
| @@ -118,16 +118,39 @@ do { \ | |||
| 118 | #endif | 118 | #endif |
| 119 | 119 | ||
| 120 | /* | 120 | /* |
| 121 | * Despite its name it doesn't necessarily has to be a full barrier. | 121 | * This barrier must provide two things: |
| 122 | * It should only guarantee that a STORE before the critical section | 122 | * |
| 123 | * can not be reordered with LOADs and STOREs inside this section. | 123 | * - it must guarantee a STORE before the spin_lock() is ordered against a |
| 124 | * spin_lock() is the one-way barrier, this LOAD can not escape out | 124 | * LOAD after it, see the comments at its two usage sites. |
| 125 | * of the region. So the default implementation simply ensures that | 125 | * |
| 126 | * a STORE can not move into the critical section, smp_wmb() should | 126 | * - it must ensure the critical section is RCsc. |
| 127 | * serialize it with another STORE done by spin_lock(). | 127 | * |
| 128 | * The latter is important for cases where we observe values written by other | ||
| 129 | * CPUs in spin-loops, without barriers, while being subject to scheduling. | ||
| 130 | * | ||
| 131 | * CPU0 CPU1 CPU2 | ||
| 132 | * | ||
| 133 | * for (;;) { | ||
| 134 | * if (READ_ONCE(X)) | ||
| 135 | * break; | ||
| 136 | * } | ||
| 137 | * X=1 | ||
| 138 | * <sched-out> | ||
| 139 | * <sched-in> | ||
| 140 | * r = X; | ||
| 141 | * | ||
| 142 | * without transitivity it could be that CPU1 observes X!=0 breaks the loop, | ||
| 143 | * we get migrated and CPU2 sees X==0. | ||
| 144 | * | ||
| 145 | * Since most load-store architectures implement ACQUIRE with an smp_mb() after | ||
| 146 | * the LL/SC loop, they need no further barriers. Similarly all our TSO | ||
| 147 | * architectures imply an smp_mb() for each atomic instruction and equally don't | ||
| 148 | * need more. | ||
| 149 | * | ||
| 150 | * Architectures that can implement ACQUIRE better need to take care. | ||
| 128 | */ | 151 | */ |
| 129 | #ifndef smp_mb__before_spinlock | 152 | #ifndef smp_mb__after_spinlock |
| 130 | #define smp_mb__before_spinlock() smp_wmb() | 153 | #define smp_mb__after_spinlock() do { } while (0) |
| 131 | #endif | 154 | #endif |
| 132 | 155 | ||
| 133 | /** | 156 | /** |
diff --git a/init/Kconfig b/init/Kconfig index 8514b25db21c..5f0ef850e808 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -1275,12 +1275,17 @@ config BASE_FULL | |||
| 1275 | config FUTEX | 1275 | config FUTEX |
| 1276 | bool "Enable futex support" if EXPERT | 1276 | bool "Enable futex support" if EXPERT |
| 1277 | default y | 1277 | default y |
| 1278 | select RT_MUTEXES | 1278 | imply RT_MUTEXES |
| 1279 | help | 1279 | help |
| 1280 | Disabling this option will cause the kernel to be built without | 1280 | Disabling this option will cause the kernel to be built without |
| 1281 | support for "fast userspace mutexes". The resulting kernel may not | 1281 | support for "fast userspace mutexes". The resulting kernel may not |
| 1282 | run glibc-based applications correctly. | 1282 | run glibc-based applications correctly. |
| 1283 | 1283 | ||
| 1284 | config FUTEX_PI | ||
| 1285 | bool | ||
| 1286 | depends on FUTEX && RT_MUTEXES | ||
| 1287 | default y | ||
| 1288 | |||
| 1284 | config HAVE_FUTEX_CMPXCHG | 1289 | config HAVE_FUTEX_CMPXCHG |
| 1285 | bool | 1290 | bool |
| 1286 | depends on FUTEX | 1291 | depends on FUTEX |
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 8d5151688504..9ed6a051a1b9 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c | |||
| @@ -577,6 +577,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, | |||
| 577 | rcu_read_unlock(); | 577 | rcu_read_unlock(); |
| 578 | } | 578 | } |
| 579 | 579 | ||
| 580 | /* Must be called with cpuset_mutex held. */ | ||
| 581 | static inline int nr_cpusets(void) | ||
| 582 | { | ||
| 583 | /* jump label reference count + the top-level cpuset */ | ||
| 584 | return static_key_count(&cpusets_enabled_key.key) + 1; | ||
| 585 | } | ||
| 586 | |||
| 580 | /* | 587 | /* |
| 581 | * generate_sched_domains() | 588 | * generate_sched_domains() |
| 582 | * | 589 | * |
diff --git a/kernel/exit.c b/kernel/exit.c index c5548faa9f37..fa72d57db747 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -920,6 +920,7 @@ void __noreturn do_exit(long code) | |||
| 920 | exit_rcu(); | 920 | exit_rcu(); |
| 921 | TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); | 921 | TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); |
| 922 | 922 | ||
| 923 | lockdep_free_task(tsk); | ||
| 923 | do_task_dead(); | 924 | do_task_dead(); |
| 924 | } | 925 | } |
| 925 | EXPORT_SYMBOL_GPL(do_exit); | 926 | EXPORT_SYMBOL_GPL(do_exit); |
diff --git a/kernel/fork.c b/kernel/fork.c index e075b7780421..5fc09911fbb9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -484,6 +484,8 @@ void __init fork_init(void) | |||
| 484 | cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", | 484 | cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", |
| 485 | NULL, free_vm_stack_cache); | 485 | NULL, free_vm_stack_cache); |
| 486 | #endif | 486 | #endif |
| 487 | |||
| 488 | lockdep_init_task(&init_task); | ||
| 487 | } | 489 | } |
| 488 | 490 | ||
| 489 | int __weak arch_dup_task_struct(struct task_struct *dst, | 491 | int __weak arch_dup_task_struct(struct task_struct *dst, |
| @@ -1691,6 +1693,7 @@ static __latent_entropy struct task_struct *copy_process( | |||
| 1691 | p->lockdep_depth = 0; /* no locks held yet */ | 1693 | p->lockdep_depth = 0; /* no locks held yet */ |
| 1692 | p->curr_chain_key = 0; | 1694 | p->curr_chain_key = 0; |
| 1693 | p->lockdep_recursion = 0; | 1695 | p->lockdep_recursion = 0; |
| 1696 | lockdep_init_task(p); | ||
| 1694 | #endif | 1697 | #endif |
| 1695 | 1698 | ||
| 1696 | #ifdef CONFIG_DEBUG_MUTEXES | 1699 | #ifdef CONFIG_DEBUG_MUTEXES |
| @@ -1949,6 +1952,7 @@ bad_fork_cleanup_audit: | |||
| 1949 | bad_fork_cleanup_perf: | 1952 | bad_fork_cleanup_perf: |
| 1950 | perf_event_free_task(p); | 1953 | perf_event_free_task(p); |
| 1951 | bad_fork_cleanup_policy: | 1954 | bad_fork_cleanup_policy: |
| 1955 | lockdep_free_task(p); | ||
| 1952 | #ifdef CONFIG_NUMA | 1956 | #ifdef CONFIG_NUMA |
| 1953 | mpol_put(p->mempolicy); | 1957 | mpol_put(p->mempolicy); |
| 1954 | bad_fork_cleanup_threadgroup_lock: | 1958 | bad_fork_cleanup_threadgroup_lock: |
diff --git a/kernel/futex.c b/kernel/futex.c index f50b434756c1..0939255fc750 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -876,6 +876,8 @@ static struct task_struct *futex_find_get_task(pid_t pid) | |||
| 876 | return p; | 876 | return p; |
| 877 | } | 877 | } |
| 878 | 878 | ||
| 879 | #ifdef CONFIG_FUTEX_PI | ||
| 880 | |||
| 879 | /* | 881 | /* |
| 880 | * This task is holding PI mutexes at exit time => bad. | 882 | * This task is holding PI mutexes at exit time => bad. |
| 881 | * Kernel cleans up PI-state, but userspace is likely hosed. | 883 | * Kernel cleans up PI-state, but userspace is likely hosed. |
| @@ -933,6 +935,8 @@ void exit_pi_state_list(struct task_struct *curr) | |||
| 933 | raw_spin_unlock_irq(&curr->pi_lock); | 935 | raw_spin_unlock_irq(&curr->pi_lock); |
| 934 | } | 936 | } |
| 935 | 937 | ||
| 938 | #endif | ||
| 939 | |||
| 936 | /* | 940 | /* |
| 937 | * We need to check the following states: | 941 | * We need to check the following states: |
| 938 | * | 942 | * |
| @@ -1800,6 +1804,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
| 1800 | struct futex_q *this, *next; | 1804 | struct futex_q *this, *next; |
| 1801 | DEFINE_WAKE_Q(wake_q); | 1805 | DEFINE_WAKE_Q(wake_q); |
| 1802 | 1806 | ||
| 1807 | /* | ||
| 1808 | * When PI not supported: return -ENOSYS if requeue_pi is true, | ||
| 1809 | * consequently the compiler knows requeue_pi is always false past | ||
| 1810 | * this point which will optimize away all the conditional code | ||
| 1811 | * further down. | ||
| 1812 | */ | ||
| 1813 | if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi) | ||
| 1814 | return -ENOSYS; | ||
| 1815 | |||
| 1803 | if (requeue_pi) { | 1816 | if (requeue_pi) { |
| 1804 | /* | 1817 | /* |
| 1805 | * Requeue PI only works on two distinct uaddrs. This | 1818 | * Requeue PI only works on two distinct uaddrs. This |
| @@ -2595,6 +2608,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, | |||
| 2595 | struct futex_q q = futex_q_init; | 2608 | struct futex_q q = futex_q_init; |
| 2596 | int res, ret; | 2609 | int res, ret; |
| 2597 | 2610 | ||
| 2611 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | ||
| 2612 | return -ENOSYS; | ||
| 2613 | |||
| 2598 | if (refill_pi_state_cache()) | 2614 | if (refill_pi_state_cache()) |
| 2599 | return -ENOMEM; | 2615 | return -ENOMEM; |
| 2600 | 2616 | ||
| @@ -2774,6 +2790,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) | |||
| 2774 | struct futex_q *top_waiter; | 2790 | struct futex_q *top_waiter; |
| 2775 | int ret; | 2791 | int ret; |
| 2776 | 2792 | ||
| 2793 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | ||
| 2794 | return -ENOSYS; | ||
| 2795 | |||
| 2777 | retry: | 2796 | retry: |
| 2778 | if (get_user(uval, uaddr)) | 2797 | if (get_user(uval, uaddr)) |
| 2779 | return -EFAULT; | 2798 | return -EFAULT; |
| @@ -2984,6 +3003,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |||
| 2984 | struct futex_q q = futex_q_init; | 3003 | struct futex_q q = futex_q_init; |
| 2985 | int res, ret; | 3004 | int res, ret; |
| 2986 | 3005 | ||
| 3006 | if (!IS_ENABLED(CONFIG_FUTEX_PI)) | ||
| 3007 | return -ENOSYS; | ||
| 3008 | |||
| 2987 | if (uaddr == uaddr2) | 3009 | if (uaddr == uaddr2) |
| 2988 | return -EINVAL; | 3010 | return -EINVAL; |
| 2989 | 3011 | ||
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index d11c506a6ac3..0bf2e8f5244a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
| @@ -79,29 +79,7 @@ int static_key_count(struct static_key *key) | |||
| 79 | } | 79 | } |
| 80 | EXPORT_SYMBOL_GPL(static_key_count); | 80 | EXPORT_SYMBOL_GPL(static_key_count); |
| 81 | 81 | ||
| 82 | void static_key_enable(struct static_key *key) | 82 | static void static_key_slow_inc_cpuslocked(struct static_key *key) |
| 83 | { | ||
| 84 | int count = static_key_count(key); | ||
| 85 | |||
| 86 | WARN_ON_ONCE(count < 0 || count > 1); | ||
| 87 | |||
| 88 | if (!count) | ||
| 89 | static_key_slow_inc(key); | ||
| 90 | } | ||
| 91 | EXPORT_SYMBOL_GPL(static_key_enable); | ||
| 92 | |||
| 93 | void static_key_disable(struct static_key *key) | ||
| 94 | { | ||
| 95 | int count = static_key_count(key); | ||
| 96 | |||
| 97 | WARN_ON_ONCE(count < 0 || count > 1); | ||
| 98 | |||
| 99 | if (count) | ||
| 100 | static_key_slow_dec(key); | ||
| 101 | } | ||
| 102 | EXPORT_SYMBOL_GPL(static_key_disable); | ||
| 103 | |||
| 104 | void static_key_slow_inc(struct static_key *key) | ||
| 105 | { | 83 | { |
| 106 | int v, v1; | 84 | int v, v1; |
| 107 | 85 | ||
| @@ -125,24 +103,87 @@ void static_key_slow_inc(struct static_key *key) | |||
| 125 | return; | 103 | return; |
| 126 | } | 104 | } |
| 127 | 105 | ||
| 128 | cpus_read_lock(); | ||
| 129 | jump_label_lock(); | 106 | jump_label_lock(); |
| 130 | if (atomic_read(&key->enabled) == 0) { | 107 | if (atomic_read(&key->enabled) == 0) { |
| 131 | atomic_set(&key->enabled, -1); | 108 | atomic_set(&key->enabled, -1); |
| 132 | jump_label_update(key); | 109 | jump_label_update(key); |
| 133 | atomic_set(&key->enabled, 1); | 110 | /* |
| 111 | * Ensure that if the above cmpxchg loop observes our positive | ||
| 112 | * value, it must also observe all the text changes. | ||
| 113 | */ | ||
| 114 | atomic_set_release(&key->enabled, 1); | ||
| 134 | } else { | 115 | } else { |
| 135 | atomic_inc(&key->enabled); | 116 | atomic_inc(&key->enabled); |
| 136 | } | 117 | } |
| 137 | jump_label_unlock(); | 118 | jump_label_unlock(); |
| 119 | } | ||
| 120 | |||
| 121 | void static_key_slow_inc(struct static_key *key) | ||
| 122 | { | ||
| 123 | cpus_read_lock(); | ||
| 124 | static_key_slow_inc_cpuslocked(key); | ||
| 138 | cpus_read_unlock(); | 125 | cpus_read_unlock(); |
| 139 | } | 126 | } |
| 140 | EXPORT_SYMBOL_GPL(static_key_slow_inc); | 127 | EXPORT_SYMBOL_GPL(static_key_slow_inc); |
| 141 | 128 | ||
| 142 | static void __static_key_slow_dec(struct static_key *key, | 129 | void static_key_enable_cpuslocked(struct static_key *key) |
| 143 | unsigned long rate_limit, struct delayed_work *work) | 130 | { |
| 131 | STATIC_KEY_CHECK_USE(); | ||
| 132 | |||
| 133 | if (atomic_read(&key->enabled) > 0) { | ||
| 134 | WARN_ON_ONCE(atomic_read(&key->enabled) != 1); | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | |||
| 138 | jump_label_lock(); | ||
| 139 | if (atomic_read(&key->enabled) == 0) { | ||
| 140 | atomic_set(&key->enabled, -1); | ||
| 141 | jump_label_update(key); | ||
| 142 | /* | ||
| 143 | * See static_key_slow_inc(). | ||
| 144 | */ | ||
| 145 | atomic_set_release(&key->enabled, 1); | ||
| 146 | } | ||
| 147 | jump_label_unlock(); | ||
| 148 | } | ||
| 149 | EXPORT_SYMBOL_GPL(static_key_enable_cpuslocked); | ||
| 150 | |||
| 151 | void static_key_enable(struct static_key *key) | ||
| 152 | { | ||
| 153 | cpus_read_lock(); | ||
| 154 | static_key_enable_cpuslocked(key); | ||
| 155 | cpus_read_unlock(); | ||
| 156 | } | ||
| 157 | EXPORT_SYMBOL_GPL(static_key_enable); | ||
| 158 | |||
| 159 | void static_key_disable_cpuslocked(struct static_key *key) | ||
| 160 | { | ||
| 161 | STATIC_KEY_CHECK_USE(); | ||
| 162 | |||
| 163 | if (atomic_read(&key->enabled) != 1) { | ||
| 164 | WARN_ON_ONCE(atomic_read(&key->enabled) != 0); | ||
| 165 | return; | ||
| 166 | } | ||
| 167 | |||
| 168 | jump_label_lock(); | ||
| 169 | if (atomic_cmpxchg(&key->enabled, 1, 0)) | ||
| 170 | jump_label_update(key); | ||
| 171 | jump_label_unlock(); | ||
| 172 | } | ||
| 173 | EXPORT_SYMBOL_GPL(static_key_disable_cpuslocked); | ||
| 174 | |||
| 175 | void static_key_disable(struct static_key *key) | ||
| 144 | { | 176 | { |
| 145 | cpus_read_lock(); | 177 | cpus_read_lock(); |
| 178 | static_key_disable_cpuslocked(key); | ||
| 179 | cpus_read_unlock(); | ||
| 180 | } | ||
| 181 | EXPORT_SYMBOL_GPL(static_key_disable); | ||
| 182 | |||
| 183 | static void static_key_slow_dec_cpuslocked(struct static_key *key, | ||
| 184 | unsigned long rate_limit, | ||
| 185 | struct delayed_work *work) | ||
| 186 | { | ||
| 146 | /* | 187 | /* |
| 147 | * The negative count check is valid even when a negative | 188 | * The negative count check is valid even when a negative |
| 148 | * key->enabled is in use by static_key_slow_inc(); a | 189 | * key->enabled is in use by static_key_slow_inc(); a |
| @@ -153,7 +194,6 @@ static void __static_key_slow_dec(struct static_key *key, | |||
| 153 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { | 194 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { |
| 154 | WARN(atomic_read(&key->enabled) < 0, | 195 | WARN(atomic_read(&key->enabled) < 0, |
| 155 | "jump label: negative count!\n"); | 196 | "jump label: negative count!\n"); |
| 156 | cpus_read_unlock(); | ||
| 157 | return; | 197 | return; |
| 158 | } | 198 | } |
| 159 | 199 | ||
| @@ -164,6 +204,14 @@ static void __static_key_slow_dec(struct static_key *key, | |||
| 164 | jump_label_update(key); | 204 | jump_label_update(key); |
| 165 | } | 205 | } |
| 166 | jump_label_unlock(); | 206 | jump_label_unlock(); |
| 207 | } | ||
| 208 | |||
| 209 | static void __static_key_slow_dec(struct static_key *key, | ||
| 210 | unsigned long rate_limit, | ||
| 211 | struct delayed_work *work) | ||
| 212 | { | ||
| 213 | cpus_read_lock(); | ||
| 214 | static_key_slow_dec_cpuslocked(key, rate_limit, work); | ||
| 167 | cpus_read_unlock(); | 215 | cpus_read_unlock(); |
| 168 | } | 216 | } |
| 169 | 217 | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 7d2499bec5fe..66011c9f5df3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -58,6 +58,10 @@ | |||
| 58 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
| 59 | #include <trace/events/lock.h> | 59 | #include <trace/events/lock.h> |
| 60 | 60 | ||
| 61 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 62 | #include <linux/slab.h> | ||
| 63 | #endif | ||
| 64 | |||
| 61 | #ifdef CONFIG_PROVE_LOCKING | 65 | #ifdef CONFIG_PROVE_LOCKING |
| 62 | int prove_locking = 1; | 66 | int prove_locking = 1; |
| 63 | module_param(prove_locking, int, 0644); | 67 | module_param(prove_locking, int, 0644); |
| @@ -344,14 +348,12 @@ EXPORT_SYMBOL(lockdep_on); | |||
| 344 | #if VERBOSE | 348 | #if VERBOSE |
| 345 | # define HARDIRQ_VERBOSE 1 | 349 | # define HARDIRQ_VERBOSE 1 |
| 346 | # define SOFTIRQ_VERBOSE 1 | 350 | # define SOFTIRQ_VERBOSE 1 |
| 347 | # define RECLAIM_VERBOSE 1 | ||
| 348 | #else | 351 | #else |
| 349 | # define HARDIRQ_VERBOSE 0 | 352 | # define HARDIRQ_VERBOSE 0 |
| 350 | # define SOFTIRQ_VERBOSE 0 | 353 | # define SOFTIRQ_VERBOSE 0 |
| 351 | # define RECLAIM_VERBOSE 0 | ||
| 352 | #endif | 354 | #endif |
| 353 | 355 | ||
| 354 | #if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE | 356 | #if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE |
| 355 | /* | 357 | /* |
| 356 | * Quick filtering for interesting events: | 358 | * Quick filtering for interesting events: |
| 357 | */ | 359 | */ |
| @@ -726,6 +728,18 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
| 726 | return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); | 728 | return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); |
| 727 | } | 729 | } |
| 728 | 730 | ||
| 731 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 732 | static void cross_init(struct lockdep_map *lock, int cross); | ||
| 733 | static int cross_lock(struct lockdep_map *lock); | ||
| 734 | static int lock_acquire_crosslock(struct held_lock *hlock); | ||
| 735 | static int lock_release_crosslock(struct lockdep_map *lock); | ||
| 736 | #else | ||
| 737 | static inline void cross_init(struct lockdep_map *lock, int cross) {} | ||
| 738 | static inline int cross_lock(struct lockdep_map *lock) { return 0; } | ||
| 739 | static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; } | ||
| 740 | static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; } | ||
| 741 | #endif | ||
| 742 | |||
| 729 | /* | 743 | /* |
| 730 | * Register a lock's class in the hash-table, if the class is not present | 744 | * Register a lock's class in the hash-table, if the class is not present |
| 731 | * yet. Otherwise we look it up. We cache the result in the lock object | 745 | * yet. Otherwise we look it up. We cache the result in the lock object |
| @@ -1125,22 +1139,41 @@ print_circular_lock_scenario(struct held_lock *src, | |||
| 1125 | printk(KERN_CONT "\n\n"); | 1139 | printk(KERN_CONT "\n\n"); |
| 1126 | } | 1140 | } |
| 1127 | 1141 | ||
| 1128 | printk(" Possible unsafe locking scenario:\n\n"); | 1142 | if (cross_lock(tgt->instance)) { |
| 1129 | printk(" CPU0 CPU1\n"); | 1143 | printk(" Possible unsafe locking scenario by crosslock:\n\n"); |
| 1130 | printk(" ---- ----\n"); | 1144 | printk(" CPU0 CPU1\n"); |
| 1131 | printk(" lock("); | 1145 | printk(" ---- ----\n"); |
| 1132 | __print_lock_name(target); | 1146 | printk(" lock("); |
| 1133 | printk(KERN_CONT ");\n"); | 1147 | __print_lock_name(parent); |
| 1134 | printk(" lock("); | 1148 | printk(KERN_CONT ");\n"); |
| 1135 | __print_lock_name(parent); | 1149 | printk(" lock("); |
| 1136 | printk(KERN_CONT ");\n"); | 1150 | __print_lock_name(target); |
| 1137 | printk(" lock("); | 1151 | printk(KERN_CONT ");\n"); |
| 1138 | __print_lock_name(target); | 1152 | printk(" lock("); |
| 1139 | printk(KERN_CONT ");\n"); | 1153 | __print_lock_name(source); |
| 1140 | printk(" lock("); | 1154 | printk(KERN_CONT ");\n"); |
| 1141 | __print_lock_name(source); | 1155 | printk(" unlock("); |
| 1142 | printk(KERN_CONT ");\n"); | 1156 | __print_lock_name(target); |
| 1143 | printk("\n *** DEADLOCK ***\n\n"); | 1157 | printk(KERN_CONT ");\n"); |
| 1158 | printk("\n *** DEADLOCK ***\n\n"); | ||
| 1159 | } else { | ||
| 1160 | printk(" Possible unsafe locking scenario:\n\n"); | ||
| 1161 | printk(" CPU0 CPU1\n"); | ||
| 1162 | printk(" ---- ----\n"); | ||
| 1163 | printk(" lock("); | ||
| 1164 | __print_lock_name(target); | ||
| 1165 | printk(KERN_CONT ");\n"); | ||
| 1166 | printk(" lock("); | ||
| 1167 | __print_lock_name(parent); | ||
| 1168 | printk(KERN_CONT ");\n"); | ||
| 1169 | printk(" lock("); | ||
| 1170 | __print_lock_name(target); | ||
| 1171 | printk(KERN_CONT ");\n"); | ||
| 1172 | printk(" lock("); | ||
| 1173 | __print_lock_name(source); | ||
| 1174 | printk(KERN_CONT ");\n"); | ||
| 1175 | printk("\n *** DEADLOCK ***\n\n"); | ||
| 1176 | } | ||
| 1144 | } | 1177 | } |
| 1145 | 1178 | ||
| 1146 | /* | 1179 | /* |
| @@ -1165,7 +1198,12 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | |||
| 1165 | pr_warn("%s/%d is trying to acquire lock:\n", | 1198 | pr_warn("%s/%d is trying to acquire lock:\n", |
| 1166 | curr->comm, task_pid_nr(curr)); | 1199 | curr->comm, task_pid_nr(curr)); |
| 1167 | print_lock(check_src); | 1200 | print_lock(check_src); |
| 1168 | pr_warn("\nbut task is already holding lock:\n"); | 1201 | |
| 1202 | if (cross_lock(check_tgt->instance)) | ||
| 1203 | pr_warn("\nbut now in release context of a crosslock acquired at the following:\n"); | ||
| 1204 | else | ||
| 1205 | pr_warn("\nbut task is already holding lock:\n"); | ||
| 1206 | |||
| 1169 | print_lock(check_tgt); | 1207 | print_lock(check_tgt); |
| 1170 | pr_warn("\nwhich lock already depends on the new lock.\n\n"); | 1208 | pr_warn("\nwhich lock already depends on the new lock.\n\n"); |
| 1171 | pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); | 1209 | pr_warn("\nthe existing dependency chain (in reverse order) is:\n"); |
| @@ -1183,7 +1221,8 @@ static inline int class_equal(struct lock_list *entry, void *data) | |||
| 1183 | static noinline int print_circular_bug(struct lock_list *this, | 1221 | static noinline int print_circular_bug(struct lock_list *this, |
| 1184 | struct lock_list *target, | 1222 | struct lock_list *target, |
| 1185 | struct held_lock *check_src, | 1223 | struct held_lock *check_src, |
| 1186 | struct held_lock *check_tgt) | 1224 | struct held_lock *check_tgt, |
| 1225 | struct stack_trace *trace) | ||
| 1187 | { | 1226 | { |
| 1188 | struct task_struct *curr = current; | 1227 | struct task_struct *curr = current; |
| 1189 | struct lock_list *parent; | 1228 | struct lock_list *parent; |
| @@ -1193,7 +1232,9 @@ static noinline int print_circular_bug(struct lock_list *this, | |||
| 1193 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) | 1232 | if (!debug_locks_off_graph_unlock() || debug_locks_silent) |
| 1194 | return 0; | 1233 | return 0; |
| 1195 | 1234 | ||
| 1196 | if (!save_trace(&this->trace)) | 1235 | if (cross_lock(check_tgt->instance)) |
| 1236 | this->trace = *trace; | ||
| 1237 | else if (!save_trace(&this->trace)) | ||
| 1197 | return 0; | 1238 | return 0; |
| 1198 | 1239 | ||
| 1199 | depth = get_lock_depth(target); | 1240 | depth = get_lock_depth(target); |
| @@ -1309,6 +1350,19 @@ check_noncircular(struct lock_list *root, struct lock_class *target, | |||
| 1309 | return result; | 1350 | return result; |
| 1310 | } | 1351 | } |
| 1311 | 1352 | ||
| 1353 | static noinline int | ||
| 1354 | check_redundant(struct lock_list *root, struct lock_class *target, | ||
| 1355 | struct lock_list **target_entry) | ||
| 1356 | { | ||
| 1357 | int result; | ||
| 1358 | |||
| 1359 | debug_atomic_inc(nr_redundant_checks); | ||
| 1360 | |||
| 1361 | result = __bfs_forwards(root, target, class_equal, target_entry); | ||
| 1362 | |||
| 1363 | return result; | ||
| 1364 | } | ||
| 1365 | |||
| 1312 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | 1366 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) |
| 1313 | /* | 1367 | /* |
| 1314 | * Forwards and backwards subgraph searching, for the purposes of | 1368 | * Forwards and backwards subgraph searching, for the purposes of |
| @@ -1784,6 +1838,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, | |||
| 1784 | if (nest) | 1838 | if (nest) |
| 1785 | return 2; | 1839 | return 2; |
| 1786 | 1840 | ||
| 1841 | if (cross_lock(prev->instance)) | ||
| 1842 | continue; | ||
| 1843 | |||
| 1787 | return print_deadlock_bug(curr, prev, next); | 1844 | return print_deadlock_bug(curr, prev, next); |
| 1788 | } | 1845 | } |
| 1789 | return 1; | 1846 | return 1; |
| @@ -1813,20 +1870,13 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, | |||
| 1813 | */ | 1870 | */ |
| 1814 | static int | 1871 | static int |
| 1815 | check_prev_add(struct task_struct *curr, struct held_lock *prev, | 1872 | check_prev_add(struct task_struct *curr, struct held_lock *prev, |
| 1816 | struct held_lock *next, int distance, int *stack_saved) | 1873 | struct held_lock *next, int distance, struct stack_trace *trace, |
| 1874 | int (*save)(struct stack_trace *trace)) | ||
| 1817 | { | 1875 | { |
| 1818 | struct lock_list *entry; | 1876 | struct lock_list *entry; |
| 1819 | int ret; | 1877 | int ret; |
| 1820 | struct lock_list this; | 1878 | struct lock_list this; |
| 1821 | struct lock_list *uninitialized_var(target_entry); | 1879 | struct lock_list *uninitialized_var(target_entry); |
| 1822 | /* | ||
| 1823 | * Static variable, serialized by the graph_lock(). | ||
| 1824 | * | ||
| 1825 | * We use this static variable to save the stack trace in case | ||
| 1826 | * we call into this function multiple times due to encountering | ||
| 1827 | * trylocks in the held lock stack. | ||
| 1828 | */ | ||
| 1829 | static struct stack_trace trace; | ||
| 1830 | 1880 | ||
| 1831 | /* | 1881 | /* |
| 1832 | * Prove that the new <prev> -> <next> dependency would not | 1882 | * Prove that the new <prev> -> <next> dependency would not |
| @@ -1841,7 +1891,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
| 1841 | this.parent = NULL; | 1891 | this.parent = NULL; |
| 1842 | ret = check_noncircular(&this, hlock_class(prev), &target_entry); | 1892 | ret = check_noncircular(&this, hlock_class(prev), &target_entry); |
| 1843 | if (unlikely(!ret)) | 1893 | if (unlikely(!ret)) |
| 1844 | return print_circular_bug(&this, target_entry, next, prev); | 1894 | return print_circular_bug(&this, target_entry, next, prev, trace); |
| 1845 | else if (unlikely(ret < 0)) | 1895 | else if (unlikely(ret < 0)) |
| 1846 | return print_bfs_bug(ret); | 1896 | return print_bfs_bug(ret); |
| 1847 | 1897 | ||
| @@ -1870,15 +1920,26 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
| 1870 | if (entry->class == hlock_class(next)) { | 1920 | if (entry->class == hlock_class(next)) { |
| 1871 | if (distance == 1) | 1921 | if (distance == 1) |
| 1872 | entry->distance = 1; | 1922 | entry->distance = 1; |
| 1873 | return 2; | 1923 | return 1; |
| 1874 | } | 1924 | } |
| 1875 | } | 1925 | } |
| 1876 | 1926 | ||
| 1877 | if (!*stack_saved) { | 1927 | /* |
| 1878 | if (!save_trace(&trace)) | 1928 | * Is the <prev> -> <next> link redundant? |
| 1879 | return 0; | 1929 | */ |
| 1880 | *stack_saved = 1; | 1930 | this.class = hlock_class(prev); |
| 1931 | this.parent = NULL; | ||
| 1932 | ret = check_redundant(&this, hlock_class(next), &target_entry); | ||
| 1933 | if (!ret) { | ||
| 1934 | debug_atomic_inc(nr_redundant); | ||
| 1935 | return 2; | ||
| 1881 | } | 1936 | } |
| 1937 | if (ret < 0) | ||
| 1938 | return print_bfs_bug(ret); | ||
| 1939 | |||
| 1940 | |||
| 1941 | if (save && !save(trace)) | ||
| 1942 | return 0; | ||
| 1882 | 1943 | ||
| 1883 | /* | 1944 | /* |
| 1884 | * Ok, all validations passed, add the new lock | 1945 | * Ok, all validations passed, add the new lock |
| @@ -1886,14 +1947,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
| 1886 | */ | 1947 | */ |
| 1887 | ret = add_lock_to_list(hlock_class(next), | 1948 | ret = add_lock_to_list(hlock_class(next), |
| 1888 | &hlock_class(prev)->locks_after, | 1949 | &hlock_class(prev)->locks_after, |
| 1889 | next->acquire_ip, distance, &trace); | 1950 | next->acquire_ip, distance, trace); |
| 1890 | 1951 | ||
| 1891 | if (!ret) | 1952 | if (!ret) |
| 1892 | return 0; | 1953 | return 0; |
| 1893 | 1954 | ||
| 1894 | ret = add_lock_to_list(hlock_class(prev), | 1955 | ret = add_lock_to_list(hlock_class(prev), |
| 1895 | &hlock_class(next)->locks_before, | 1956 | &hlock_class(next)->locks_before, |
| 1896 | next->acquire_ip, distance, &trace); | 1957 | next->acquire_ip, distance, trace); |
| 1897 | if (!ret) | 1958 | if (!ret) |
| 1898 | return 0; | 1959 | return 0; |
| 1899 | 1960 | ||
| @@ -1901,8 +1962,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
| 1901 | * Debugging printouts: | 1962 | * Debugging printouts: |
| 1902 | */ | 1963 | */ |
| 1903 | if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { | 1964 | if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { |
| 1904 | /* We drop graph lock, so another thread can overwrite trace. */ | ||
| 1905 | *stack_saved = 0; | ||
| 1906 | graph_unlock(); | 1965 | graph_unlock(); |
| 1907 | printk("\n new dependency: "); | 1966 | printk("\n new dependency: "); |
| 1908 | print_lock_name(hlock_class(prev)); | 1967 | print_lock_name(hlock_class(prev)); |
| @@ -1910,9 +1969,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
| 1910 | print_lock_name(hlock_class(next)); | 1969 | print_lock_name(hlock_class(next)); |
| 1911 | printk(KERN_CONT "\n"); | 1970 | printk(KERN_CONT "\n"); |
| 1912 | dump_stack(); | 1971 | dump_stack(); |
| 1913 | return graph_lock(); | 1972 | if (!graph_lock()) |
| 1973 | return 0; | ||
| 1914 | } | 1974 | } |
| 1915 | return 1; | 1975 | return 2; |
| 1916 | } | 1976 | } |
| 1917 | 1977 | ||
| 1918 | /* | 1978 | /* |
| @@ -1925,8 +1985,9 @@ static int | |||
| 1925 | check_prevs_add(struct task_struct *curr, struct held_lock *next) | 1985 | check_prevs_add(struct task_struct *curr, struct held_lock *next) |
| 1926 | { | 1986 | { |
| 1927 | int depth = curr->lockdep_depth; | 1987 | int depth = curr->lockdep_depth; |
| 1928 | int stack_saved = 0; | ||
| 1929 | struct held_lock *hlock; | 1988 | struct held_lock *hlock; |
| 1989 | struct stack_trace trace; | ||
| 1990 | int (*save)(struct stack_trace *trace) = save_trace; | ||
| 1930 | 1991 | ||
| 1931 | /* | 1992 | /* |
| 1932 | * Debugging checks. | 1993 | * Debugging checks. |
| @@ -1947,21 +2008,36 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
| 1947 | int distance = curr->lockdep_depth - depth + 1; | 2008 | int distance = curr->lockdep_depth - depth + 1; |
| 1948 | hlock = curr->held_locks + depth - 1; | 2009 | hlock = curr->held_locks + depth - 1; |
| 1949 | /* | 2010 | /* |
| 1950 | * Only non-recursive-read entries get new dependencies | 2011 | * Only non-crosslock entries get new dependencies added. |
| 1951 | * added: | 2012 | * Crosslock entries will be added by commit later: |
| 1952 | */ | 2013 | */ |
| 1953 | if (hlock->read != 2 && hlock->check) { | 2014 | if (!cross_lock(hlock->instance)) { |
| 1954 | if (!check_prev_add(curr, hlock, next, | ||
| 1955 | distance, &stack_saved)) | ||
| 1956 | return 0; | ||
| 1957 | /* | 2015 | /* |
| 1958 | * Stop after the first non-trylock entry, | 2016 | * Only non-recursive-read entries get new dependencies |
| 1959 | * as non-trylock entries have added their | 2017 | * added: |
| 1960 | * own direct dependencies already, so this | ||
| 1961 | * lock is connected to them indirectly: | ||
| 1962 | */ | 2018 | */ |
| 1963 | if (!hlock->trylock) | 2019 | if (hlock->read != 2 && hlock->check) { |
| 1964 | break; | 2020 | int ret = check_prev_add(curr, hlock, next, |
| 2021 | distance, &trace, save); | ||
| 2022 | if (!ret) | ||
| 2023 | return 0; | ||
| 2024 | |||
| 2025 | /* | ||
| 2026 | * Stop saving stack_trace if save_trace() was | ||
| 2027 | * called at least once: | ||
| 2028 | */ | ||
| 2029 | if (save && ret == 2) | ||
| 2030 | save = NULL; | ||
| 2031 | |||
| 2032 | /* | ||
| 2033 | * Stop after the first non-trylock entry, | ||
| 2034 | * as non-trylock entries have added their | ||
| 2035 | * own direct dependencies already, so this | ||
| 2036 | * lock is connected to them indirectly: | ||
| 2037 | */ | ||
| 2038 | if (!hlock->trylock) | ||
| 2039 | break; | ||
| 2040 | } | ||
| 1965 | } | 2041 | } |
| 1966 | depth--; | 2042 | depth--; |
| 1967 | /* | 2043 | /* |
| @@ -2126,19 +2202,26 @@ static int check_no_collision(struct task_struct *curr, | |||
| 2126 | } | 2202 | } |
| 2127 | 2203 | ||
| 2128 | /* | 2204 | /* |
| 2129 | * Look up a dependency chain. If the key is not present yet then | 2205 | * This is for building a chain between just two different classes, |
| 2130 | * add it and return 1 - in this case the new dependency chain is | 2206 | * instead of adding a new hlock upon current, which is done by |
| 2131 | * validated. If the key is already hashed, return 0. | 2207 | * add_chain_cache(). |
| 2132 | * (On return with 1 graph_lock is held.) | 2208 | * |
| 2209 | * This can be called in any context with two classes, while | ||
| 2210 | * add_chain_cache() must be done within the lock owener's context | ||
| 2211 | * since it uses hlock which might be racy in another context. | ||
| 2133 | */ | 2212 | */ |
| 2134 | static inline int lookup_chain_cache(struct task_struct *curr, | 2213 | static inline int add_chain_cache_classes(unsigned int prev, |
| 2135 | struct held_lock *hlock, | 2214 | unsigned int next, |
| 2136 | u64 chain_key) | 2215 | unsigned int irq_context, |
| 2216 | u64 chain_key) | ||
| 2137 | { | 2217 | { |
| 2138 | struct lock_class *class = hlock_class(hlock); | ||
| 2139 | struct hlist_head *hash_head = chainhashentry(chain_key); | 2218 | struct hlist_head *hash_head = chainhashentry(chain_key); |
| 2140 | struct lock_chain *chain; | 2219 | struct lock_chain *chain; |
| 2141 | int i, j; | 2220 | |
| 2221 | /* | ||
| 2222 | * Allocate a new chain entry from the static array, and add | ||
| 2223 | * it to the hash: | ||
| 2224 | */ | ||
| 2142 | 2225 | ||
| 2143 | /* | 2226 | /* |
| 2144 | * We might need to take the graph lock, ensure we've got IRQs | 2227 | * We might need to take the graph lock, ensure we've got IRQs |
| @@ -2147,43 +2230,76 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
| 2147 | */ | 2230 | */ |
| 2148 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 2231 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| 2149 | return 0; | 2232 | return 0; |
| 2233 | |||
| 2234 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { | ||
| 2235 | if (!debug_locks_off_graph_unlock()) | ||
| 2236 | return 0; | ||
| 2237 | |||
| 2238 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); | ||
| 2239 | dump_stack(); | ||
| 2240 | return 0; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | chain = lock_chains + nr_lock_chains++; | ||
| 2244 | chain->chain_key = chain_key; | ||
| 2245 | chain->irq_context = irq_context; | ||
| 2246 | chain->depth = 2; | ||
| 2247 | if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { | ||
| 2248 | chain->base = nr_chain_hlocks; | ||
| 2249 | nr_chain_hlocks += chain->depth; | ||
| 2250 | chain_hlocks[chain->base] = prev - 1; | ||
| 2251 | chain_hlocks[chain->base + 1] = next -1; | ||
| 2252 | } | ||
| 2253 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
| 2150 | /* | 2254 | /* |
| 2151 | * We can walk it lock-free, because entries only get added | 2255 | * Important for check_no_collision(). |
| 2152 | * to the hash: | ||
| 2153 | */ | 2256 | */ |
| 2154 | hlist_for_each_entry_rcu(chain, hash_head, entry) { | 2257 | else { |
| 2155 | if (chain->chain_key == chain_key) { | 2258 | if (!debug_locks_off_graph_unlock()) |
| 2156 | cache_hit: | ||
| 2157 | debug_atomic_inc(chain_lookup_hits); | ||
| 2158 | if (!check_no_collision(curr, hlock, chain)) | ||
| 2159 | return 0; | ||
| 2160 | |||
| 2161 | if (very_verbose(class)) | ||
| 2162 | printk("\nhash chain already cached, key: " | ||
| 2163 | "%016Lx tail class: [%p] %s\n", | ||
| 2164 | (unsigned long long)chain_key, | ||
| 2165 | class->key, class->name); | ||
| 2166 | return 0; | 2259 | return 0; |
| 2167 | } | 2260 | |
| 2261 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); | ||
| 2262 | dump_stack(); | ||
| 2263 | return 0; | ||
| 2168 | } | 2264 | } |
| 2169 | if (very_verbose(class)) | 2265 | #endif |
| 2170 | printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", | 2266 | |
| 2171 | (unsigned long long)chain_key, class->key, class->name); | 2267 | hlist_add_head_rcu(&chain->entry, hash_head); |
| 2268 | debug_atomic_inc(chain_lookup_misses); | ||
| 2269 | inc_chains(); | ||
| 2270 | |||
| 2271 | return 1; | ||
| 2272 | } | ||
| 2273 | |||
| 2274 | /* | ||
| 2275 | * Adds a dependency chain into chain hashtable. And must be called with | ||
| 2276 | * graph_lock held. | ||
| 2277 | * | ||
| 2278 | * Return 0 if fail, and graph_lock is released. | ||
| 2279 | * Return 1 if succeed, with graph_lock held. | ||
| 2280 | */ | ||
| 2281 | static inline int add_chain_cache(struct task_struct *curr, | ||
| 2282 | struct held_lock *hlock, | ||
| 2283 | u64 chain_key) | ||
| 2284 | { | ||
| 2285 | struct lock_class *class = hlock_class(hlock); | ||
| 2286 | struct hlist_head *hash_head = chainhashentry(chain_key); | ||
| 2287 | struct lock_chain *chain; | ||
| 2288 | int i, j; | ||
| 2289 | |||
| 2172 | /* | 2290 | /* |
| 2173 | * Allocate a new chain entry from the static array, and add | 2291 | * Allocate a new chain entry from the static array, and add |
| 2174 | * it to the hash: | 2292 | * it to the hash: |
| 2175 | */ | 2293 | */ |
| 2176 | if (!graph_lock()) | 2294 | |
| 2177 | return 0; | ||
| 2178 | /* | 2295 | /* |
| 2179 | * We have to walk the chain again locked - to avoid duplicates: | 2296 | * We might need to take the graph lock, ensure we've got IRQs |
| 2297 | * disabled to make this an IRQ-safe lock.. for recursion reasons | ||
| 2298 | * lockdep won't complain about its own locking errors. | ||
| 2180 | */ | 2299 | */ |
| 2181 | hlist_for_each_entry(chain, hash_head, entry) { | 2300 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| 2182 | if (chain->chain_key == chain_key) { | 2301 | return 0; |
| 2183 | graph_unlock(); | 2302 | |
| 2184 | goto cache_hit; | ||
| 2185 | } | ||
| 2186 | } | ||
| 2187 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { | 2303 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { |
| 2188 | if (!debug_locks_off_graph_unlock()) | 2304 | if (!debug_locks_off_graph_unlock()) |
| 2189 | return 0; | 2305 | return 0; |
| @@ -2235,6 +2351,78 @@ cache_hit: | |||
| 2235 | return 1; | 2351 | return 1; |
| 2236 | } | 2352 | } |
| 2237 | 2353 | ||
| 2354 | /* | ||
| 2355 | * Look up a dependency chain. | ||
| 2356 | */ | ||
| 2357 | static inline struct lock_chain *lookup_chain_cache(u64 chain_key) | ||
| 2358 | { | ||
| 2359 | struct hlist_head *hash_head = chainhashentry(chain_key); | ||
| 2360 | struct lock_chain *chain; | ||
| 2361 | |||
| 2362 | /* | ||
| 2363 | * We can walk it lock-free, because entries only get added | ||
| 2364 | * to the hash: | ||
| 2365 | */ | ||
| 2366 | hlist_for_each_entry_rcu(chain, hash_head, entry) { | ||
| 2367 | if (chain->chain_key == chain_key) { | ||
| 2368 | debug_atomic_inc(chain_lookup_hits); | ||
| 2369 | return chain; | ||
| 2370 | } | ||
| 2371 | } | ||
| 2372 | return NULL; | ||
| 2373 | } | ||
| 2374 | |||
| 2375 | /* | ||
| 2376 | * If the key is not present yet in dependency chain cache then | ||
| 2377 | * add it and return 1 - in this case the new dependency chain is | ||
| 2378 | * validated. If the key is already hashed, return 0. | ||
| 2379 | * (On return with 1 graph_lock is held.) | ||
| 2380 | */ | ||
| 2381 | static inline int lookup_chain_cache_add(struct task_struct *curr, | ||
| 2382 | struct held_lock *hlock, | ||
| 2383 | u64 chain_key) | ||
| 2384 | { | ||
| 2385 | struct lock_class *class = hlock_class(hlock); | ||
| 2386 | struct lock_chain *chain = lookup_chain_cache(chain_key); | ||
| 2387 | |||
| 2388 | if (chain) { | ||
| 2389 | cache_hit: | ||
| 2390 | if (!check_no_collision(curr, hlock, chain)) | ||
| 2391 | return 0; | ||
| 2392 | |||
| 2393 | if (very_verbose(class)) { | ||
| 2394 | printk("\nhash chain already cached, key: " | ||
| 2395 | "%016Lx tail class: [%p] %s\n", | ||
| 2396 | (unsigned long long)chain_key, | ||
| 2397 | class->key, class->name); | ||
| 2398 | } | ||
| 2399 | |||
| 2400 | return 0; | ||
| 2401 | } | ||
| 2402 | |||
| 2403 | if (very_verbose(class)) { | ||
| 2404 | printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n", | ||
| 2405 | (unsigned long long)chain_key, class->key, class->name); | ||
| 2406 | } | ||
| 2407 | |||
| 2408 | if (!graph_lock()) | ||
| 2409 | return 0; | ||
| 2410 | |||
| 2411 | /* | ||
| 2412 | * We have to walk the chain again locked - to avoid duplicates: | ||
| 2413 | */ | ||
| 2414 | chain = lookup_chain_cache(chain_key); | ||
| 2415 | if (chain) { | ||
| 2416 | graph_unlock(); | ||
| 2417 | goto cache_hit; | ||
| 2418 | } | ||
| 2419 | |||
| 2420 | if (!add_chain_cache(curr, hlock, chain_key)) | ||
| 2421 | return 0; | ||
| 2422 | |||
| 2423 | return 1; | ||
| 2424 | } | ||
| 2425 | |||
| 2238 | static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | 2426 | static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, |
| 2239 | struct held_lock *hlock, int chain_head, u64 chain_key) | 2427 | struct held_lock *hlock, int chain_head, u64 chain_key) |
| 2240 | { | 2428 | { |
| @@ -2245,11 +2433,11 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | |||
| 2245 | * | 2433 | * |
| 2246 | * We look up the chain_key and do the O(N^2) check and update of | 2434 | * We look up the chain_key and do the O(N^2) check and update of |
| 2247 | * the dependencies only if this is a new dependency chain. | 2435 | * the dependencies only if this is a new dependency chain. |
| 2248 | * (If lookup_chain_cache() returns with 1 it acquires | 2436 | * (If lookup_chain_cache_add() return with 1 it acquires |
| 2249 | * graph_lock for us) | 2437 | * graph_lock for us) |
| 2250 | */ | 2438 | */ |
| 2251 | if (!hlock->trylock && hlock->check && | 2439 | if (!hlock->trylock && hlock->check && |
| 2252 | lookup_chain_cache(curr, hlock, chain_key)) { | 2440 | lookup_chain_cache_add(curr, hlock, chain_key)) { |
| 2253 | /* | 2441 | /* |
| 2254 | * Check whether last held lock: | 2442 | * Check whether last held lock: |
| 2255 | * | 2443 | * |
| @@ -2277,14 +2465,17 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | |||
| 2277 | * Add dependency only if this lock is not the head | 2465 | * Add dependency only if this lock is not the head |
| 2278 | * of the chain, and if it's not a secondary read-lock: | 2466 | * of the chain, and if it's not a secondary read-lock: |
| 2279 | */ | 2467 | */ |
| 2280 | if (!chain_head && ret != 2) | 2468 | if (!chain_head && ret != 2) { |
| 2281 | if (!check_prevs_add(curr, hlock)) | 2469 | if (!check_prevs_add(curr, hlock)) |
| 2282 | return 0; | 2470 | return 0; |
| 2471 | } | ||
| 2472 | |||
| 2283 | graph_unlock(); | 2473 | graph_unlock(); |
| 2284 | } else | 2474 | } else { |
| 2285 | /* after lookup_chain_cache(): */ | 2475 | /* after lookup_chain_cache_add(): */ |
| 2286 | if (unlikely(!debug_locks)) | 2476 | if (unlikely(!debug_locks)) |
| 2287 | return 0; | 2477 | return 0; |
| 2478 | } | ||
| 2288 | 2479 | ||
| 2289 | return 1; | 2480 | return 1; |
| 2290 | } | 2481 | } |
| @@ -2567,14 +2758,6 @@ static int SOFTIRQ_verbose(struct lock_class *class) | |||
| 2567 | return 0; | 2758 | return 0; |
| 2568 | } | 2759 | } |
| 2569 | 2760 | ||
| 2570 | static int RECLAIM_FS_verbose(struct lock_class *class) | ||
| 2571 | { | ||
| 2572 | #if RECLAIM_VERBOSE | ||
| 2573 | return class_filter(class); | ||
| 2574 | #endif | ||
| 2575 | return 0; | ||
| 2576 | } | ||
| 2577 | |||
| 2578 | #define STRICT_READ_CHECKS 1 | 2761 | #define STRICT_READ_CHECKS 1 |
| 2579 | 2762 | ||
| 2580 | static int (*state_verbose_f[])(struct lock_class *class) = { | 2763 | static int (*state_verbose_f[])(struct lock_class *class) = { |
| @@ -2870,57 +3053,6 @@ void trace_softirqs_off(unsigned long ip) | |||
| 2870 | debug_atomic_inc(redundant_softirqs_off); | 3053 | debug_atomic_inc(redundant_softirqs_off); |
| 2871 | } | 3054 | } |
| 2872 | 3055 | ||
| 2873 | static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) | ||
| 2874 | { | ||
| 2875 | struct task_struct *curr = current; | ||
| 2876 | |||
| 2877 | if (unlikely(!debug_locks)) | ||
| 2878 | return; | ||
| 2879 | |||
| 2880 | gfp_mask = current_gfp_context(gfp_mask); | ||
| 2881 | |||
| 2882 | /* no reclaim without waiting on it */ | ||
| 2883 | if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) | ||
| 2884 | return; | ||
| 2885 | |||
| 2886 | /* this guy won't enter reclaim */ | ||
| 2887 | if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) | ||
| 2888 | return; | ||
| 2889 | |||
| 2890 | /* We're only interested __GFP_FS allocations for now */ | ||
| 2891 | if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS)) | ||
| 2892 | return; | ||
| 2893 | |||
| 2894 | /* | ||
| 2895 | * Oi! Can't be having __GFP_FS allocations with IRQs disabled. | ||
| 2896 | */ | ||
| 2897 | if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) | ||
| 2898 | return; | ||
| 2899 | |||
| 2900 | /* Disable lockdep if explicitly requested */ | ||
| 2901 | if (gfp_mask & __GFP_NOLOCKDEP) | ||
| 2902 | return; | ||
| 2903 | |||
| 2904 | mark_held_locks(curr, RECLAIM_FS); | ||
| 2905 | } | ||
| 2906 | |||
| 2907 | static void check_flags(unsigned long flags); | ||
| 2908 | |||
| 2909 | void lockdep_trace_alloc(gfp_t gfp_mask) | ||
| 2910 | { | ||
| 2911 | unsigned long flags; | ||
| 2912 | |||
| 2913 | if (unlikely(current->lockdep_recursion)) | ||
| 2914 | return; | ||
| 2915 | |||
| 2916 | raw_local_irq_save(flags); | ||
| 2917 | check_flags(flags); | ||
| 2918 | current->lockdep_recursion = 1; | ||
| 2919 | __lockdep_trace_alloc(gfp_mask, flags); | ||
| 2920 | current->lockdep_recursion = 0; | ||
| 2921 | raw_local_irq_restore(flags); | ||
| 2922 | } | ||
| 2923 | |||
| 2924 | static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) | 3056 | static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) |
| 2925 | { | 3057 | { |
| 2926 | /* | 3058 | /* |
| @@ -2966,22 +3098,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) | |||
| 2966 | } | 3098 | } |
| 2967 | } | 3099 | } |
| 2968 | 3100 | ||
| 2969 | /* | ||
| 2970 | * We reuse the irq context infrastructure more broadly as a general | ||
| 2971 | * context checking code. This tests GFP_FS recursion (a lock taken | ||
| 2972 | * during reclaim for a GFP_FS allocation is held over a GFP_FS | ||
| 2973 | * allocation). | ||
| 2974 | */ | ||
| 2975 | if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) { | ||
| 2976 | if (hlock->read) { | ||
| 2977 | if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ)) | ||
| 2978 | return 0; | ||
| 2979 | } else { | ||
| 2980 | if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS)) | ||
| 2981 | return 0; | ||
| 2982 | } | ||
| 2983 | } | ||
| 2984 | |||
| 2985 | return 1; | 3101 | return 1; |
| 2986 | } | 3102 | } |
| 2987 | 3103 | ||
| @@ -3040,10 +3156,6 @@ static inline int separate_irq_context(struct task_struct *curr, | |||
| 3040 | return 0; | 3156 | return 0; |
| 3041 | } | 3157 | } |
| 3042 | 3158 | ||
| 3043 | void lockdep_trace_alloc(gfp_t gfp_mask) | ||
| 3044 | { | ||
| 3045 | } | ||
| 3046 | |||
| 3047 | #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ | 3159 | #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ |
| 3048 | 3160 | ||
| 3049 | /* | 3161 | /* |
| @@ -3116,7 +3228,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 3116 | /* | 3228 | /* |
| 3117 | * Initialize a lock instance's lock-class mapping info: | 3229 | * Initialize a lock instance's lock-class mapping info: |
| 3118 | */ | 3230 | */ |
| 3119 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 3231 | static void __lockdep_init_map(struct lockdep_map *lock, const char *name, |
| 3120 | struct lock_class_key *key, int subclass) | 3232 | struct lock_class_key *key, int subclass) |
| 3121 | { | 3233 | { |
| 3122 | int i; | 3234 | int i; |
| @@ -3174,8 +3286,25 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
| 3174 | raw_local_irq_restore(flags); | 3286 | raw_local_irq_restore(flags); |
| 3175 | } | 3287 | } |
| 3176 | } | 3288 | } |
| 3289 | |||
| 3290 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | ||
| 3291 | struct lock_class_key *key, int subclass) | ||
| 3292 | { | ||
| 3293 | cross_init(lock, 0); | ||
| 3294 | __lockdep_init_map(lock, name, key, subclass); | ||
| 3295 | } | ||
| 3177 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 3296 | EXPORT_SYMBOL_GPL(lockdep_init_map); |
| 3178 | 3297 | ||
| 3298 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 3299 | void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name, | ||
| 3300 | struct lock_class_key *key, int subclass) | ||
| 3301 | { | ||
| 3302 | cross_init(lock, 1); | ||
| 3303 | __lockdep_init_map(lock, name, key, subclass); | ||
| 3304 | } | ||
| 3305 | EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock); | ||
| 3306 | #endif | ||
| 3307 | |||
| 3179 | struct lock_class_key __lockdep_no_validate__; | 3308 | struct lock_class_key __lockdep_no_validate__; |
| 3180 | EXPORT_SYMBOL_GPL(__lockdep_no_validate__); | 3309 | EXPORT_SYMBOL_GPL(__lockdep_no_validate__); |
| 3181 | 3310 | ||
| @@ -3231,6 +3360,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3231 | int chain_head = 0; | 3360 | int chain_head = 0; |
| 3232 | int class_idx; | 3361 | int class_idx; |
| 3233 | u64 chain_key; | 3362 | u64 chain_key; |
| 3363 | int ret; | ||
| 3234 | 3364 | ||
| 3235 | if (unlikely(!debug_locks)) | 3365 | if (unlikely(!debug_locks)) |
| 3236 | return 0; | 3366 | return 0; |
| @@ -3279,7 +3409,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3279 | 3409 | ||
| 3280 | class_idx = class - lock_classes + 1; | 3410 | class_idx = class - lock_classes + 1; |
| 3281 | 3411 | ||
| 3282 | if (depth) { | 3412 | /* TODO: nest_lock is not implemented for crosslock yet. */ |
| 3413 | if (depth && !cross_lock(lock)) { | ||
| 3283 | hlock = curr->held_locks + depth - 1; | 3414 | hlock = curr->held_locks + depth - 1; |
| 3284 | if (hlock->class_idx == class_idx && nest_lock) { | 3415 | if (hlock->class_idx == class_idx && nest_lock) { |
| 3285 | if (hlock->references) { | 3416 | if (hlock->references) { |
| @@ -3367,6 +3498,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3367 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) | 3498 | if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) |
| 3368 | return 0; | 3499 | return 0; |
| 3369 | 3500 | ||
| 3501 | ret = lock_acquire_crosslock(hlock); | ||
| 3502 | /* | ||
| 3503 | * 2 means normal acquire operations are needed. Otherwise, it's | ||
| 3504 | * ok just to return with '0:fail, 1:success'. | ||
| 3505 | */ | ||
| 3506 | if (ret != 2) | ||
| 3507 | return ret; | ||
| 3508 | |||
| 3370 | curr->curr_chain_key = chain_key; | 3509 | curr->curr_chain_key = chain_key; |
| 3371 | curr->lockdep_depth++; | 3510 | curr->lockdep_depth++; |
| 3372 | check_chain_key(curr); | 3511 | check_chain_key(curr); |
| @@ -3604,11 +3743,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
| 3604 | struct task_struct *curr = current; | 3743 | struct task_struct *curr = current; |
| 3605 | struct held_lock *hlock; | 3744 | struct held_lock *hlock; |
| 3606 | unsigned int depth; | 3745 | unsigned int depth; |
| 3607 | int i; | 3746 | int ret, i; |
| 3608 | 3747 | ||
| 3609 | if (unlikely(!debug_locks)) | 3748 | if (unlikely(!debug_locks)) |
| 3610 | return 0; | 3749 | return 0; |
| 3611 | 3750 | ||
| 3751 | ret = lock_release_crosslock(lock); | ||
| 3752 | /* | ||
| 3753 | * 2 means normal release operations are needed. Otherwise, it's | ||
| 3754 | * ok just to return with '0:fail, 1:success'. | ||
| 3755 | */ | ||
| 3756 | if (ret != 2) | ||
| 3757 | return ret; | ||
| 3758 | |||
| 3612 | depth = curr->lockdep_depth; | 3759 | depth = curr->lockdep_depth; |
| 3613 | /* | 3760 | /* |
| 3614 | * So we're all set to release this lock.. wait what lock? We don't | 3761 | * So we're all set to release this lock.. wait what lock? We don't |
| @@ -3952,18 +4099,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie) | |||
| 3952 | } | 4099 | } |
| 3953 | EXPORT_SYMBOL_GPL(lock_unpin_lock); | 4100 | EXPORT_SYMBOL_GPL(lock_unpin_lock); |
| 3954 | 4101 | ||
| 3955 | void lockdep_set_current_reclaim_state(gfp_t gfp_mask) | ||
| 3956 | { | ||
| 3957 | current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask); | ||
| 3958 | } | ||
| 3959 | EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state); | ||
| 3960 | |||
| 3961 | void lockdep_clear_current_reclaim_state(void) | ||
| 3962 | { | ||
| 3963 | current->lockdep_reclaim_gfp = 0; | ||
| 3964 | } | ||
| 3965 | EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state); | ||
| 3966 | |||
| 3967 | #ifdef CONFIG_LOCK_STAT | 4102 | #ifdef CONFIG_LOCK_STAT |
| 3968 | static int | 4103 | static int |
| 3969 | print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | 4104 | print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, |
| @@ -4484,6 +4619,17 @@ asmlinkage __visible void lockdep_sys_exit(void) | |||
| 4484 | curr->comm, curr->pid); | 4619 | curr->comm, curr->pid); |
| 4485 | lockdep_print_held_locks(curr); | 4620 | lockdep_print_held_locks(curr); |
| 4486 | } | 4621 | } |
| 4622 | |||
| 4623 | /* | ||
| 4624 | * The lock history for each syscall should be independent. So wipe the | ||
| 4625 | * slate clean on return to userspace. | ||
| 4626 | * | ||
| 4627 | * crossrelease_hist_end() works well here even when getting here | ||
| 4628 | * without starting (i.e. just after forking), because it rolls back | ||
| 4629 | * the index to point to the last entry, which is already invalid. | ||
| 4630 | */ | ||
| 4631 | crossrelease_hist_end(XHLOCK_PROC); | ||
| 4632 | crossrelease_hist_start(XHLOCK_PROC); | ||
| 4487 | } | 4633 | } |
| 4488 | 4634 | ||
| 4489 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | 4635 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s) |
| @@ -4532,3 +4678,470 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | |||
| 4532 | dump_stack(); | 4678 | dump_stack(); |
| 4533 | } | 4679 | } |
| 4534 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); | 4680 | EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); |
| 4681 | |||
| 4682 | #ifdef CONFIG_LOCKDEP_CROSSRELEASE | ||
| 4683 | |||
| 4684 | /* | ||
| 4685 | * Crossrelease works by recording a lock history for each thread and | ||
| 4686 | * connecting those historic locks that were taken after the | ||
| 4687 | * wait_for_completion() in the complete() context. | ||
| 4688 | * | ||
| 4689 | * Task-A Task-B | ||
| 4690 | * | ||
| 4691 | * mutex_lock(&A); | ||
| 4692 | * mutex_unlock(&A); | ||
| 4693 | * | ||
| 4694 | * wait_for_completion(&C); | ||
| 4695 | * lock_acquire_crosslock(); | ||
| 4696 | * atomic_inc_return(&cross_gen_id); | ||
| 4697 | * | | ||
| 4698 | * | mutex_lock(&B); | ||
| 4699 | * | mutex_unlock(&B); | ||
| 4700 | * | | ||
| 4701 | * | complete(&C); | ||
| 4702 | * `-- lock_commit_crosslock(); | ||
| 4703 | * | ||
| 4704 | * Which will then add a dependency between B and C. | ||
| 4705 | */ | ||
| 4706 | |||
| 4707 | #define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR]) | ||
| 4708 | |||
| 4709 | /* | ||
| 4710 | * Whenever a crosslock is held, cross_gen_id will be increased. | ||
| 4711 | */ | ||
| 4712 | static atomic_t cross_gen_id; /* Can be wrapped */ | ||
| 4713 | |||
| 4714 | /* | ||
| 4715 | * Make an entry of the ring buffer invalid. | ||
| 4716 | */ | ||
| 4717 | static inline void invalidate_xhlock(struct hist_lock *xhlock) | ||
| 4718 | { | ||
| 4719 | /* | ||
| 4720 | * Normally, xhlock->hlock.instance must be !NULL. | ||
| 4721 | */ | ||
| 4722 | xhlock->hlock.instance = NULL; | ||
| 4723 | } | ||
| 4724 | |||
| 4725 | /* | ||
| 4726 | * Lock history stacks; we have 3 nested lock history stacks: | ||
| 4727 | * | ||
| 4728 | * Hard IRQ | ||
| 4729 | * Soft IRQ | ||
| 4730 | * History / Task | ||
| 4731 | * | ||
| 4732 | * The thing is that once we complete a (Hard/Soft) IRQ the future task locks | ||
| 4733 | * should not depend on any of the locks observed while running the IRQ. | ||
| 4734 | * | ||
| 4735 | * So what we do is rewind the history buffer and erase all our knowledge of | ||
| 4736 | * that temporal event. | ||
| 4737 | */ | ||
| 4738 | |||
| 4739 | /* | ||
| 4740 | * We need this to annotate lock history boundaries. Take for instance | ||
| 4741 | * workqueues; each work is independent of the last. The completion of a future | ||
| 4742 | * work does not depend on the completion of a past work (in general). | ||
| 4743 | * Therefore we must not carry that (lock) dependency across works. | ||
| 4744 | * | ||
| 4745 | * This is true for many things; pretty much all kthreads fall into this | ||
| 4746 | * pattern, where they have an 'idle' state and future completions do not | ||
| 4747 | * depend on past completions. Its just that since they all have the 'same' | ||
| 4748 | * form -- the kthread does the same over and over -- it doesn't typically | ||
| 4749 | * matter. | ||
| 4750 | * | ||
| 4751 | * The same is true for system-calls, once a system call is completed (we've | ||
| 4752 | * returned to userspace) the next system call does not depend on the lock | ||
| 4753 | * history of the previous system call. | ||
| 4754 | */ | ||
| 4755 | void crossrelease_hist_start(enum xhlock_context_t c) | ||
| 4756 | { | ||
| 4757 | struct task_struct *cur = current; | ||
| 4758 | |||
| 4759 | if (cur->xhlocks) { | ||
| 4760 | cur->xhlock_idx_hist[c] = cur->xhlock_idx; | ||
| 4761 | cur->hist_id_save[c] = cur->hist_id; | ||
| 4762 | } | ||
| 4763 | } | ||
| 4764 | |||
| 4765 | void crossrelease_hist_end(enum xhlock_context_t c) | ||
| 4766 | { | ||
| 4767 | struct task_struct *cur = current; | ||
| 4768 | |||
| 4769 | if (cur->xhlocks) { | ||
| 4770 | unsigned int idx = cur->xhlock_idx_hist[c]; | ||
| 4771 | struct hist_lock *h = &xhlock(idx); | ||
| 4772 | |||
| 4773 | cur->xhlock_idx = idx; | ||
| 4774 | |||
| 4775 | /* Check if the ring was overwritten. */ | ||
| 4776 | if (h->hist_id != cur->hist_id_save[c]) | ||
| 4777 | invalidate_xhlock(h); | ||
| 4778 | } | ||
| 4779 | } | ||
| 4780 | |||
| 4781 | static int cross_lock(struct lockdep_map *lock) | ||
| 4782 | { | ||
| 4783 | return lock ? lock->cross : 0; | ||
| 4784 | } | ||
| 4785 | |||
| 4786 | /* | ||
| 4787 | * This is needed to decide the relationship between wrapable variables. | ||
| 4788 | */ | ||
| 4789 | static inline int before(unsigned int a, unsigned int b) | ||
| 4790 | { | ||
| 4791 | return (int)(a - b) < 0; | ||
| 4792 | } | ||
| 4793 | |||
| 4794 | static inline struct lock_class *xhlock_class(struct hist_lock *xhlock) | ||
| 4795 | { | ||
| 4796 | return hlock_class(&xhlock->hlock); | ||
| 4797 | } | ||
| 4798 | |||
| 4799 | static inline struct lock_class *xlock_class(struct cross_lock *xlock) | ||
| 4800 | { | ||
| 4801 | return hlock_class(&xlock->hlock); | ||
| 4802 | } | ||
| 4803 | |||
| 4804 | /* | ||
| 4805 | * Should we check a dependency with previous one? | ||
| 4806 | */ | ||
| 4807 | static inline int depend_before(struct held_lock *hlock) | ||
| 4808 | { | ||
| 4809 | return hlock->read != 2 && hlock->check && !hlock->trylock; | ||
| 4810 | } | ||
| 4811 | |||
| 4812 | /* | ||
| 4813 | * Should we check a dependency with next one? | ||
| 4814 | */ | ||
| 4815 | static inline int depend_after(struct held_lock *hlock) | ||
| 4816 | { | ||
| 4817 | return hlock->read != 2 && hlock->check; | ||
| 4818 | } | ||
| 4819 | |||
| 4820 | /* | ||
| 4821 | * Check if the xhlock is valid, which would be false if, | ||
| 4822 | * | ||
| 4823 | * 1. Has not used after initializaion yet. | ||
| 4824 | * 2. Got invalidated. | ||
| 4825 | * | ||
| 4826 | * Remind hist_lock is implemented as a ring buffer. | ||
| 4827 | */ | ||
| 4828 | static inline int xhlock_valid(struct hist_lock *xhlock) | ||
| 4829 | { | ||
| 4830 | /* | ||
| 4831 | * xhlock->hlock.instance must be !NULL. | ||
| 4832 | */ | ||
| 4833 | return !!xhlock->hlock.instance; | ||
| 4834 | } | ||
| 4835 | |||
| 4836 | /* | ||
| 4837 | * Record a hist_lock entry. | ||
| 4838 | * | ||
| 4839 | * Irq disable is only required. | ||
| 4840 | */ | ||
| 4841 | static void add_xhlock(struct held_lock *hlock) | ||
| 4842 | { | ||
| 4843 | unsigned int idx = ++current->xhlock_idx; | ||
| 4844 | struct hist_lock *xhlock = &xhlock(idx); | ||
| 4845 | |||
| 4846 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
| 4847 | /* | ||
| 4848 | * This can be done locklessly because they are all task-local | ||
| 4849 | * state, we must however ensure IRQs are disabled. | ||
| 4850 | */ | ||
| 4851 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 4852 | #endif | ||
| 4853 | |||
| 4854 | /* Initialize hist_lock's members */ | ||
| 4855 | xhlock->hlock = *hlock; | ||
| 4856 | xhlock->hist_id = ++current->hist_id; | ||
| 4857 | |||
| 4858 | xhlock->trace.nr_entries = 0; | ||
| 4859 | xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; | ||
| 4860 | xhlock->trace.entries = xhlock->trace_entries; | ||
| 4861 | xhlock->trace.skip = 3; | ||
| 4862 | save_stack_trace(&xhlock->trace); | ||
| 4863 | } | ||
| 4864 | |||
| 4865 | static inline int same_context_xhlock(struct hist_lock *xhlock) | ||
| 4866 | { | ||
| 4867 | return xhlock->hlock.irq_context == task_irq_context(current); | ||
| 4868 | } | ||
| 4869 | |||
| 4870 | /* | ||
| 4871 | * This should be lockless as far as possible because this would be | ||
| 4872 | * called very frequently. | ||
| 4873 | */ | ||
| 4874 | static void check_add_xhlock(struct held_lock *hlock) | ||
| 4875 | { | ||
| 4876 | /* | ||
| 4877 | * Record a hist_lock, only in case that acquisitions ahead | ||
| 4878 | * could depend on the held_lock. For example, if the held_lock | ||
| 4879 | * is trylock then acquisitions ahead never depends on that. | ||
| 4880 | * In that case, we don't need to record it. Just return. | ||
| 4881 | */ | ||
| 4882 | if (!current->xhlocks || !depend_before(hlock)) | ||
| 4883 | return; | ||
| 4884 | |||
| 4885 | add_xhlock(hlock); | ||
| 4886 | } | ||
| 4887 | |||
| 4888 | /* | ||
| 4889 | * For crosslock. | ||
| 4890 | */ | ||
| 4891 | static int add_xlock(struct held_lock *hlock) | ||
| 4892 | { | ||
| 4893 | struct cross_lock *xlock; | ||
| 4894 | unsigned int gen_id; | ||
| 4895 | |||
| 4896 | if (!graph_lock()) | ||
| 4897 | return 0; | ||
| 4898 | |||
| 4899 | xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock; | ||
| 4900 | |||
| 4901 | /* | ||
| 4902 | * When acquisitions for a crosslock are overlapped, we use | ||
| 4903 | * nr_acquire to perform commit for them, based on cross_gen_id | ||
| 4904 | * of the first acquisition, which allows to add additional | ||
| 4905 | * dependencies. | ||
| 4906 | * | ||
| 4907 | * Moreover, when no acquisition of a crosslock is in progress, | ||
| 4908 | * we should not perform commit because the lock might not exist | ||
| 4909 | * any more, which might cause incorrect memory access. So we | ||
| 4910 | * have to track the number of acquisitions of a crosslock. | ||
| 4911 | * | ||
| 4912 | * depend_after() is necessary to initialize only the first | ||
| 4913 | * valid xlock so that the xlock can be used on its commit. | ||
| 4914 | */ | ||
| 4915 | if (xlock->nr_acquire++ && depend_after(&xlock->hlock)) | ||
| 4916 | goto unlock; | ||
| 4917 | |||
| 4918 | gen_id = (unsigned int)atomic_inc_return(&cross_gen_id); | ||
| 4919 | xlock->hlock = *hlock; | ||
| 4920 | xlock->hlock.gen_id = gen_id; | ||
| 4921 | unlock: | ||
| 4922 | graph_unlock(); | ||
| 4923 | return 1; | ||
| 4924 | } | ||
| 4925 | |||
| 4926 | /* | ||
| 4927 | * Called for both normal and crosslock acquires. Normal locks will be | ||
| 4928 | * pushed on the hist_lock queue. Cross locks will record state and | ||
| 4929 | * stop regular lock_acquire() to avoid being placed on the held_lock | ||
| 4930 | * stack. | ||
| 4931 | * | ||
| 4932 | * Return: 0 - failure; | ||
| 4933 | * 1 - crosslock, done; | ||
| 4934 | * 2 - normal lock, continue to held_lock[] ops. | ||
| 4935 | */ | ||
| 4936 | static int lock_acquire_crosslock(struct held_lock *hlock) | ||
| 4937 | { | ||
| 4938 | /* | ||
| 4939 | * CONTEXT 1 CONTEXT 2 | ||
| 4940 | * --------- --------- | ||
| 4941 | * lock A (cross) | ||
| 4942 | * X = atomic_inc_return(&cross_gen_id) | ||
| 4943 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
| 4944 | * Y = atomic_read_acquire(&cross_gen_id) | ||
| 4945 | * lock B | ||
| 4946 | * | ||
| 4947 | * atomic_read_acquire() is for ordering between A and B, | ||
| 4948 | * IOW, A happens before B, when CONTEXT 2 see Y >= X. | ||
| 4949 | * | ||
| 4950 | * Pairs with atomic_inc_return() in add_xlock(). | ||
| 4951 | */ | ||
| 4952 | hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id); | ||
| 4953 | |||
| 4954 | if (cross_lock(hlock->instance)) | ||
| 4955 | return add_xlock(hlock); | ||
| 4956 | |||
| 4957 | check_add_xhlock(hlock); | ||
| 4958 | return 2; | ||
| 4959 | } | ||
| 4960 | |||
| 4961 | static int copy_trace(struct stack_trace *trace) | ||
| 4962 | { | ||
| 4963 | unsigned long *buf = stack_trace + nr_stack_trace_entries; | ||
| 4964 | unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; | ||
| 4965 | unsigned int nr = min(max_nr, trace->nr_entries); | ||
| 4966 | |||
| 4967 | trace->nr_entries = nr; | ||
| 4968 | memcpy(buf, trace->entries, nr * sizeof(trace->entries[0])); | ||
| 4969 | trace->entries = buf; | ||
| 4970 | nr_stack_trace_entries += nr; | ||
| 4971 | |||
| 4972 | if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { | ||
| 4973 | if (!debug_locks_off_graph_unlock()) | ||
| 4974 | return 0; | ||
| 4975 | |||
| 4976 | print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); | ||
| 4977 | dump_stack(); | ||
| 4978 | |||
| 4979 | return 0; | ||
| 4980 | } | ||
| 4981 | |||
| 4982 | return 1; | ||
| 4983 | } | ||
| 4984 | |||
| 4985 | static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock) | ||
| 4986 | { | ||
| 4987 | unsigned int xid, pid; | ||
| 4988 | u64 chain_key; | ||
| 4989 | |||
| 4990 | xid = xlock_class(xlock) - lock_classes; | ||
| 4991 | chain_key = iterate_chain_key((u64)0, xid); | ||
| 4992 | pid = xhlock_class(xhlock) - lock_classes; | ||
| 4993 | chain_key = iterate_chain_key(chain_key, pid); | ||
| 4994 | |||
| 4995 | if (lookup_chain_cache(chain_key)) | ||
| 4996 | return 1; | ||
| 4997 | |||
| 4998 | if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context, | ||
| 4999 | chain_key)) | ||
| 5000 | return 0; | ||
| 5001 | |||
| 5002 | if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1, | ||
| 5003 | &xhlock->trace, copy_trace)) | ||
| 5004 | return 0; | ||
| 5005 | |||
| 5006 | return 1; | ||
| 5007 | } | ||
| 5008 | |||
| 5009 | static void commit_xhlocks(struct cross_lock *xlock) | ||
| 5010 | { | ||
| 5011 | unsigned int cur = current->xhlock_idx; | ||
| 5012 | unsigned int prev_hist_id = xhlock(cur).hist_id; | ||
| 5013 | unsigned int i; | ||
| 5014 | |||
| 5015 | if (!graph_lock()) | ||
| 5016 | return; | ||
| 5017 | |||
| 5018 | if (xlock->nr_acquire) { | ||
| 5019 | for (i = 0; i < MAX_XHLOCKS_NR; i++) { | ||
| 5020 | struct hist_lock *xhlock = &xhlock(cur - i); | ||
| 5021 | |||
| 5022 | if (!xhlock_valid(xhlock)) | ||
| 5023 | break; | ||
| 5024 | |||
| 5025 | if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) | ||
| 5026 | break; | ||
| 5027 | |||
| 5028 | if (!same_context_xhlock(xhlock)) | ||
| 5029 | break; | ||
| 5030 | |||
| 5031 | /* | ||
| 5032 | * Filter out the cases where the ring buffer was | ||
| 5033 | * overwritten and the current entry has a bigger | ||
| 5034 | * hist_id than the previous one, which is impossible | ||
| 5035 | * otherwise: | ||
| 5036 | */ | ||
| 5037 | if (unlikely(before(prev_hist_id, xhlock->hist_id))) | ||
| 5038 | break; | ||
| 5039 | |||
| 5040 | prev_hist_id = xhlock->hist_id; | ||
| 5041 | |||
| 5042 | /* | ||
| 5043 | * commit_xhlock() returns 0 with graph_lock already | ||
| 5044 | * released if fail. | ||
| 5045 | */ | ||
| 5046 | if (!commit_xhlock(xlock, xhlock)) | ||
| 5047 | return; | ||
| 5048 | } | ||
| 5049 | } | ||
| 5050 | |||
| 5051 | graph_unlock(); | ||
| 5052 | } | ||
| 5053 | |||
| 5054 | void lock_commit_crosslock(struct lockdep_map *lock) | ||
| 5055 | { | ||
| 5056 | struct cross_lock *xlock; | ||
| 5057 | unsigned long flags; | ||
| 5058 | |||
| 5059 | if (unlikely(!debug_locks || current->lockdep_recursion)) | ||
| 5060 | return; | ||
| 5061 | |||
| 5062 | if (!current->xhlocks) | ||
| 5063 | return; | ||
| 5064 | |||
| 5065 | /* | ||
| 5066 | * Do commit hist_locks with the cross_lock, only in case that | ||
| 5067 | * the cross_lock could depend on acquisitions after that. | ||
| 5068 | * | ||
| 5069 | * For example, if the cross_lock does not have the 'check' flag | ||
| 5070 | * then we don't need to check dependencies and commit for that. | ||
| 5071 | * Just skip it. In that case, of course, the cross_lock does | ||
| 5072 | * not depend on acquisitions ahead, either. | ||
| 5073 | * | ||
| 5074 | * WARNING: Don't do that in add_xlock() in advance. When an | ||
| 5075 | * acquisition context is different from the commit context, | ||
| 5076 | * invalid(skipped) cross_lock might be accessed. | ||
| 5077 | */ | ||
| 5078 | if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock)) | ||
| 5079 | return; | ||
| 5080 | |||
| 5081 | raw_local_irq_save(flags); | ||
| 5082 | check_flags(flags); | ||
| 5083 | current->lockdep_recursion = 1; | ||
| 5084 | xlock = &((struct lockdep_map_cross *)lock)->xlock; | ||
| 5085 | commit_xhlocks(xlock); | ||
| 5086 | current->lockdep_recursion = 0; | ||
| 5087 | raw_local_irq_restore(flags); | ||
| 5088 | } | ||
| 5089 | EXPORT_SYMBOL_GPL(lock_commit_crosslock); | ||
| 5090 | |||
| 5091 | /* | ||
| 5092 | * Return: 0 - failure; | ||
| 5093 | * 1 - crosslock, done; | ||
| 5094 | * 2 - normal lock, continue to held_lock[] ops. | ||
| 5095 | */ | ||
| 5096 | static int lock_release_crosslock(struct lockdep_map *lock) | ||
| 5097 | { | ||
| 5098 | if (cross_lock(lock)) { | ||
| 5099 | if (!graph_lock()) | ||
| 5100 | return 0; | ||
| 5101 | ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--; | ||
| 5102 | graph_unlock(); | ||
| 5103 | return 1; | ||
| 5104 | } | ||
| 5105 | return 2; | ||
| 5106 | } | ||
| 5107 | |||
| 5108 | static void cross_init(struct lockdep_map *lock, int cross) | ||
| 5109 | { | ||
| 5110 | if (cross) | ||
| 5111 | ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0; | ||
| 5112 | |||
| 5113 | lock->cross = cross; | ||
| 5114 | |||
| 5115 | /* | ||
| 5116 | * Crossrelease assumes that the ring buffer size of xhlocks | ||
| 5117 | * is aligned with power of 2. So force it on build. | ||
| 5118 | */ | ||
| 5119 | BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1)); | ||
| 5120 | } | ||
| 5121 | |||
| 5122 | void lockdep_init_task(struct task_struct *task) | ||
| 5123 | { | ||
| 5124 | int i; | ||
| 5125 | |||
| 5126 | task->xhlock_idx = UINT_MAX; | ||
| 5127 | task->hist_id = 0; | ||
| 5128 | |||
| 5129 | for (i = 0; i < XHLOCK_CTX_NR; i++) { | ||
| 5130 | task->xhlock_idx_hist[i] = UINT_MAX; | ||
| 5131 | task->hist_id_save[i] = 0; | ||
| 5132 | } | ||
| 5133 | |||
| 5134 | task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR, | ||
| 5135 | GFP_KERNEL); | ||
| 5136 | } | ||
| 5137 | |||
| 5138 | void lockdep_free_task(struct task_struct *task) | ||
| 5139 | { | ||
| 5140 | if (task->xhlocks) { | ||
| 5141 | void *tmp = task->xhlocks; | ||
| 5142 | /* Diable crossrelease for current */ | ||
| 5143 | task->xhlocks = NULL; | ||
| 5144 | kfree(tmp); | ||
| 5145 | } | ||
| 5146 | } | ||
| 5147 | #endif | ||
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index c08fbd2f5ba9..1da4669d57a7 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h | |||
| @@ -143,6 +143,8 @@ struct lockdep_stats { | |||
| 143 | int redundant_softirqs_on; | 143 | int redundant_softirqs_on; |
| 144 | int redundant_softirqs_off; | 144 | int redundant_softirqs_off; |
| 145 | int nr_unused_locks; | 145 | int nr_unused_locks; |
| 146 | int nr_redundant_checks; | ||
| 147 | int nr_redundant; | ||
| 146 | int nr_cyclic_checks; | 148 | int nr_cyclic_checks; |
| 147 | int nr_cyclic_check_recursions; | 149 | int nr_cyclic_check_recursions; |
| 148 | int nr_find_usage_forwards_checks; | 150 | int nr_find_usage_forwards_checks; |
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 6d1fcc786081..68d9e267ccd4 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c | |||
| @@ -201,6 +201,10 @@ static void lockdep_stats_debug_show(struct seq_file *m) | |||
| 201 | debug_atomic_read(chain_lookup_hits)); | 201 | debug_atomic_read(chain_lookup_hits)); |
| 202 | seq_printf(m, " cyclic checks: %11llu\n", | 202 | seq_printf(m, " cyclic checks: %11llu\n", |
| 203 | debug_atomic_read(nr_cyclic_checks)); | 203 | debug_atomic_read(nr_cyclic_checks)); |
| 204 | seq_printf(m, " redundant checks: %11llu\n", | ||
| 205 | debug_atomic_read(nr_redundant_checks)); | ||
| 206 | seq_printf(m, " redundant links: %11llu\n", | ||
| 207 | debug_atomic_read(nr_redundant)); | ||
| 204 | seq_printf(m, " find-mask forwards checks: %11llu\n", | 208 | seq_printf(m, " find-mask forwards checks: %11llu\n", |
| 205 | debug_atomic_read(nr_find_usage_forwards_checks)); | 209 | debug_atomic_read(nr_find_usage_forwards_checks)); |
| 206 | seq_printf(m, " find-mask backwards checks: %11llu\n", | 210 | seq_printf(m, " find-mask backwards checks: %11llu\n", |
diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h index 995b0cc2b84c..35ca09f2ed0b 100644 --- a/kernel/locking/lockdep_states.h +++ b/kernel/locking/lockdep_states.h | |||
| @@ -6,4 +6,3 @@ | |||
| 6 | */ | 6 | */ |
| 7 | LOCKDEP_STATE(HARDIRQ) | 7 | LOCKDEP_STATE(HARDIRQ) |
| 8 | LOCKDEP_STATE(SOFTIRQ) | 8 | LOCKDEP_STATE(SOFTIRQ) |
| 9 | LOCKDEP_STATE(RECLAIM_FS) | ||
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index a3167941093b..a74ee6abd039 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c | |||
| @@ -109,6 +109,19 @@ bool osq_lock(struct optimistic_spin_queue *lock) | |||
| 109 | 109 | ||
| 110 | prev = decode_cpu(old); | 110 | prev = decode_cpu(old); |
| 111 | node->prev = prev; | 111 | node->prev = prev; |
| 112 | |||
| 113 | /* | ||
| 114 | * osq_lock() unqueue | ||
| 115 | * | ||
| 116 | * node->prev = prev osq_wait_next() | ||
| 117 | * WMB MB | ||
| 118 | * prev->next = node next->prev = prev // unqueue-C | ||
| 119 | * | ||
| 120 | * Here 'node->prev' and 'next->prev' are the same variable and we need | ||
| 121 | * to ensure these stores happen in-order to avoid corrupting the list. | ||
| 122 | */ | ||
| 123 | smp_wmb(); | ||
| 124 | |||
| 112 | WRITE_ONCE(prev->next, node); | 125 | WRITE_ONCE(prev->next, node); |
| 113 | 126 | ||
| 114 | /* | 127 | /* |
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 72ad45a9a794..8d039b928d61 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h | |||
| @@ -40,6 +40,9 @@ struct rt_mutex_waiter { | |||
| 40 | /* | 40 | /* |
| 41 | * Various helpers to access the waiters-tree: | 41 | * Various helpers to access the waiters-tree: |
| 42 | */ | 42 | */ |
| 43 | |||
| 44 | #ifdef CONFIG_RT_MUTEXES | ||
| 45 | |||
| 43 | static inline int rt_mutex_has_waiters(struct rt_mutex *lock) | 46 | static inline int rt_mutex_has_waiters(struct rt_mutex *lock) |
| 44 | { | 47 | { |
| 45 | return !RB_EMPTY_ROOT(&lock->waiters); | 48 | return !RB_EMPTY_ROOT(&lock->waiters); |
| @@ -69,6 +72,32 @@ task_top_pi_waiter(struct task_struct *p) | |||
| 69 | pi_tree_entry); | 72 | pi_tree_entry); |
| 70 | } | 73 | } |
| 71 | 74 | ||
| 75 | #else | ||
| 76 | |||
| 77 | static inline int rt_mutex_has_waiters(struct rt_mutex *lock) | ||
| 78 | { | ||
| 79 | return false; | ||
| 80 | } | ||
| 81 | |||
| 82 | static inline struct rt_mutex_waiter * | ||
| 83 | rt_mutex_top_waiter(struct rt_mutex *lock) | ||
| 84 | { | ||
| 85 | return NULL; | ||
| 86 | } | ||
| 87 | |||
| 88 | static inline int task_has_pi_waiters(struct task_struct *p) | ||
| 89 | { | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | static inline struct rt_mutex_waiter * | ||
| 94 | task_top_pi_waiter(struct task_struct *p) | ||
| 95 | { | ||
| 96 | return NULL; | ||
| 97 | } | ||
| 98 | |||
| 99 | #endif | ||
| 100 | |||
| 72 | /* | 101 | /* |
| 73 | * lock->owner state tracking: | 102 | * lock->owner state tracking: |
| 74 | */ | 103 | */ |
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 20819df98125..0848634c5512 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c | |||
| @@ -126,7 +126,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem) | |||
| 126 | /* | 126 | /* |
| 127 | * get a read lock on the semaphore | 127 | * get a read lock on the semaphore |
| 128 | */ | 128 | */ |
| 129 | void __sched __down_read(struct rw_semaphore *sem) | 129 | int __sched __down_read_common(struct rw_semaphore *sem, int state) |
| 130 | { | 130 | { |
| 131 | struct rwsem_waiter waiter; | 131 | struct rwsem_waiter waiter; |
| 132 | unsigned long flags; | 132 | unsigned long flags; |
| @@ -140,8 +140,6 @@ void __sched __down_read(struct rw_semaphore *sem) | |||
| 140 | goto out; | 140 | goto out; |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 144 | |||
| 145 | /* set up my own style of waitqueue */ | 143 | /* set up my own style of waitqueue */ |
| 146 | waiter.task = current; | 144 | waiter.task = current; |
| 147 | waiter.type = RWSEM_WAITING_FOR_READ; | 145 | waiter.type = RWSEM_WAITING_FOR_READ; |
| @@ -149,20 +147,41 @@ void __sched __down_read(struct rw_semaphore *sem) | |||
| 149 | 147 | ||
| 150 | list_add_tail(&waiter.list, &sem->wait_list); | 148 | list_add_tail(&waiter.list, &sem->wait_list); |
| 151 | 149 | ||
| 152 | /* we don't need to touch the semaphore struct anymore */ | ||
| 153 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
| 154 | |||
| 155 | /* wait to be given the lock */ | 150 | /* wait to be given the lock */ |
| 156 | for (;;) { | 151 | for (;;) { |
| 157 | if (!waiter.task) | 152 | if (!waiter.task) |
| 158 | break; | 153 | break; |
| 154 | if (signal_pending_state(state, current)) | ||
| 155 | goto out_nolock; | ||
| 156 | set_current_state(state); | ||
| 157 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
| 159 | schedule(); | 158 | schedule(); |
| 160 | set_current_state(TASK_UNINTERRUPTIBLE); | 159 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 161 | } | 160 | } |
| 162 | 161 | ||
| 163 | __set_current_state(TASK_RUNNING); | 162 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| 164 | out: | 163 | out: |
| 165 | ; | 164 | return 0; |
| 165 | |||
| 166 | out_nolock: | ||
| 167 | /* | ||
| 168 | * We didn't take the lock, so that there is a writer, which | ||
| 169 | * is owner or the first waiter of the sem. If it's a waiter, | ||
| 170 | * it will be woken by current owner. Not need to wake anybody. | ||
| 171 | */ | ||
| 172 | list_del(&waiter.list); | ||
| 173 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
| 174 | return -EINTR; | ||
| 175 | } | ||
| 176 | |||
| 177 | void __sched __down_read(struct rw_semaphore *sem) | ||
| 178 | { | ||
| 179 | __down_read_common(sem, TASK_UNINTERRUPTIBLE); | ||
| 180 | } | ||
| 181 | |||
| 182 | int __sched __down_read_killable(struct rw_semaphore *sem) | ||
| 183 | { | ||
| 184 | return __down_read_common(sem, TASK_KILLABLE); | ||
| 166 | } | 185 | } |
| 167 | 186 | ||
| 168 | /* | 187 | /* |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 34e727f18e49..02f660666ab8 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -221,8 +221,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
| 221 | /* | 221 | /* |
| 222 | * Wait for the read lock to be granted | 222 | * Wait for the read lock to be granted |
| 223 | */ | 223 | */ |
| 224 | __visible | 224 | static inline struct rw_semaphore __sched * |
| 225 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | 225 | __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) |
| 226 | { | 226 | { |
| 227 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | 227 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; |
| 228 | struct rwsem_waiter waiter; | 228 | struct rwsem_waiter waiter; |
| @@ -255,17 +255,44 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
| 255 | 255 | ||
| 256 | /* wait to be given the lock */ | 256 | /* wait to be given the lock */ |
| 257 | while (true) { | 257 | while (true) { |
| 258 | set_current_state(TASK_UNINTERRUPTIBLE); | 258 | set_current_state(state); |
| 259 | if (!waiter.task) | 259 | if (!waiter.task) |
| 260 | break; | 260 | break; |
| 261 | if (signal_pending_state(state, current)) { | ||
| 262 | raw_spin_lock_irq(&sem->wait_lock); | ||
| 263 | if (waiter.task) | ||
| 264 | goto out_nolock; | ||
| 265 | raw_spin_unlock_irq(&sem->wait_lock); | ||
| 266 | break; | ||
| 267 | } | ||
| 261 | schedule(); | 268 | schedule(); |
| 262 | } | 269 | } |
| 263 | 270 | ||
| 264 | __set_current_state(TASK_RUNNING); | 271 | __set_current_state(TASK_RUNNING); |
| 265 | return sem; | 272 | return sem; |
| 273 | out_nolock: | ||
| 274 | list_del(&waiter.list); | ||
| 275 | if (list_empty(&sem->wait_list)) | ||
| 276 | atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); | ||
| 277 | raw_spin_unlock_irq(&sem->wait_lock); | ||
| 278 | __set_current_state(TASK_RUNNING); | ||
| 279 | return ERR_PTR(-EINTR); | ||
| 280 | } | ||
| 281 | |||
| 282 | __visible struct rw_semaphore * __sched | ||
| 283 | rwsem_down_read_failed(struct rw_semaphore *sem) | ||
| 284 | { | ||
| 285 | return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); | ||
| 266 | } | 286 | } |
| 267 | EXPORT_SYMBOL(rwsem_down_read_failed); | 287 | EXPORT_SYMBOL(rwsem_down_read_failed); |
| 268 | 288 | ||
| 289 | __visible struct rw_semaphore * __sched | ||
| 290 | rwsem_down_read_failed_killable(struct rw_semaphore *sem) | ||
| 291 | { | ||
| 292 | return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); | ||
| 293 | } | ||
| 294 | EXPORT_SYMBOL(rwsem_down_read_failed_killable); | ||
| 295 | |||
| 269 | /* | 296 | /* |
| 270 | * This function must be called with the sem->wait_lock held to prevent | 297 | * This function must be called with the sem->wait_lock held to prevent |
| 271 | * race conditions between checking the rwsem wait list and setting the | 298 | * race conditions between checking the rwsem wait list and setting the |
diff --git a/kernel/panic.c b/kernel/panic.c index a58932b41700..bdd18afa19a4 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/nmi.h> | 26 | #include <linux/nmi.h> |
| 27 | #include <linux/console.h> | 27 | #include <linux/console.h> |
| 28 | #include <linux/bug.h> | 28 | #include <linux/bug.h> |
| 29 | #include <linux/ratelimit.h> | ||
| 29 | 30 | ||
| 30 | #define PANIC_TIMER_STEP 100 | 31 | #define PANIC_TIMER_STEP 100 |
| 31 | #define PANIC_BLINK_SPD 18 | 32 | #define PANIC_BLINK_SPD 18 |
| @@ -601,6 +602,17 @@ EXPORT_SYMBOL(__stack_chk_fail); | |||
| 601 | 602 | ||
| 602 | #endif | 603 | #endif |
| 603 | 604 | ||
| 605 | #ifdef CONFIG_ARCH_HAS_REFCOUNT | ||
| 606 | void refcount_error_report(struct pt_regs *regs, const char *err) | ||
| 607 | { | ||
| 608 | WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n", | ||
| 609 | err, (void *)instruction_pointer(regs), | ||
| 610 | current->comm, task_pid_nr(current), | ||
| 611 | from_kuid_munged(&init_user_ns, current_uid()), | ||
| 612 | from_kuid_munged(&init_user_ns, current_euid())); | ||
| 613 | } | ||
| 614 | #endif | ||
| 615 | |||
| 604 | core_param(panic, panic_timeout, int, 0644); | 616 | core_param(panic, panic_timeout, int, 0644); |
| 605 | core_param(pause_on_oops, pause_on_oops, int, 0644); | 617 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
| 606 | core_param(panic_on_warn, panic_on_warn, int, 0644); | 618 | core_param(panic_on_warn, panic_on_warn, int, 0644); |
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 13fc5ae9bf2f..566b6ec7b6fe 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c | |||
| @@ -32,6 +32,12 @@ void complete(struct completion *x) | |||
| 32 | unsigned long flags; | 32 | unsigned long flags; |
| 33 | 33 | ||
| 34 | spin_lock_irqsave(&x->wait.lock, flags); | 34 | spin_lock_irqsave(&x->wait.lock, flags); |
| 35 | |||
| 36 | /* | ||
| 37 | * Perform commit of crossrelease here. | ||
| 38 | */ | ||
| 39 | complete_release_commit(x); | ||
| 40 | |||
| 35 | if (x->done != UINT_MAX) | 41 | if (x->done != UINT_MAX) |
| 36 | x->done++; | 42 | x->done++; |
| 37 | __wake_up_locked(&x->wait, TASK_NORMAL, 1); | 43 | __wake_up_locked(&x->wait, TASK_NORMAL, 1); |
| @@ -92,9 +98,14 @@ __wait_for_common(struct completion *x, | |||
| 92 | { | 98 | { |
| 93 | might_sleep(); | 99 | might_sleep(); |
| 94 | 100 | ||
| 101 | complete_acquire(x); | ||
| 102 | |||
| 95 | spin_lock_irq(&x->wait.lock); | 103 | spin_lock_irq(&x->wait.lock); |
| 96 | timeout = do_wait_for_common(x, action, timeout, state); | 104 | timeout = do_wait_for_common(x, action, timeout, state); |
| 97 | spin_unlock_irq(&x->wait.lock); | 105 | spin_unlock_irq(&x->wait.lock); |
| 106 | |||
| 107 | complete_release(x); | ||
| 108 | |||
| 98 | return timeout; | 109 | return timeout; |
| 99 | } | 110 | } |
| 100 | 111 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0869b20fba81..9fece583a1f0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -1967,8 +1967,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 1967 | * reordered with p->state check below. This pairs with mb() in | 1967 | * reordered with p->state check below. This pairs with mb() in |
| 1968 | * set_current_state() the waiting thread does. | 1968 | * set_current_state() the waiting thread does. |
| 1969 | */ | 1969 | */ |
| 1970 | smp_mb__before_spinlock(); | ||
| 1971 | raw_spin_lock_irqsave(&p->pi_lock, flags); | 1970 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
| 1971 | smp_mb__after_spinlock(); | ||
| 1972 | if (!(p->state & state)) | 1972 | if (!(p->state & state)) |
| 1973 | goto out; | 1973 | goto out; |
| 1974 | 1974 | ||
| @@ -3281,8 +3281,8 @@ static void __sched notrace __schedule(bool preempt) | |||
| 3281 | * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) | 3281 | * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) |
| 3282 | * done by the caller to avoid the race with signal_wake_up(). | 3282 | * done by the caller to avoid the race with signal_wake_up(). |
| 3283 | */ | 3283 | */ |
| 3284 | smp_mb__before_spinlock(); | ||
| 3285 | rq_lock(rq, &rf); | 3284 | rq_lock(rq, &rf); |
| 3285 | smp_mb__after_spinlock(); | ||
| 3286 | 3286 | ||
| 3287 | /* Promote REQ to ACT */ | 3287 | /* Promote REQ to ACT */ |
| 3288 | rq->clock_update_flags <<= 1; | 3288 | rq->clock_update_flags <<= 1; |
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 3d5610dcce11..2227e183e202 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c | |||
| @@ -33,9 +33,6 @@ void swake_up(struct swait_queue_head *q) | |||
| 33 | { | 33 | { |
| 34 | unsigned long flags; | 34 | unsigned long flags; |
| 35 | 35 | ||
| 36 | if (!swait_active(q)) | ||
| 37 | return; | ||
| 38 | |||
| 39 | raw_spin_lock_irqsave(&q->lock, flags); | 36 | raw_spin_lock_irqsave(&q->lock, flags); |
| 40 | swake_up_locked(q); | 37 | swake_up_locked(q); |
| 41 | raw_spin_unlock_irqrestore(&q->lock, flags); | 38 | raw_spin_unlock_irqrestore(&q->lock, flags); |
| @@ -51,9 +48,6 @@ void swake_up_all(struct swait_queue_head *q) | |||
| 51 | struct swait_queue *curr; | 48 | struct swait_queue *curr; |
| 52 | LIST_HEAD(tmp); | 49 | LIST_HEAD(tmp); |
| 53 | 50 | ||
| 54 | if (!swait_active(q)) | ||
| 55 | return; | ||
| 56 | |||
| 57 | raw_spin_lock_irq(&q->lock); | 51 | raw_spin_lock_irq(&q->lock); |
| 58 | list_splice_init(&q->task_list, &tmp); | 52 | list_splice_init(&q->task_list, &tmp); |
| 59 | while (!list_empty(&tmp)) { | 53 | while (!list_empty(&tmp)) { |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca937b0c3a96..f128b3becfe1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -2093,6 +2093,7 @@ __acquires(&pool->lock) | |||
| 2093 | 2093 | ||
| 2094 | lock_map_acquire_read(&pwq->wq->lockdep_map); | 2094 | lock_map_acquire_read(&pwq->wq->lockdep_map); |
| 2095 | lock_map_acquire(&lockdep_map); | 2095 | lock_map_acquire(&lockdep_map); |
| 2096 | crossrelease_hist_start(XHLOCK_PROC); | ||
| 2096 | trace_workqueue_execute_start(work); | 2097 | trace_workqueue_execute_start(work); |
| 2097 | worker->current_func(work); | 2098 | worker->current_func(work); |
| 2098 | /* | 2099 | /* |
| @@ -2100,6 +2101,7 @@ __acquires(&pool->lock) | |||
| 2100 | * point will only record its address. | 2101 | * point will only record its address. |
| 2101 | */ | 2102 | */ |
| 2102 | trace_workqueue_execute_end(work); | 2103 | trace_workqueue_execute_end(work); |
| 2104 | crossrelease_hist_end(XHLOCK_PROC); | ||
| 2103 | lock_map_release(&lockdep_map); | 2105 | lock_map_release(&lockdep_map); |
| 2104 | lock_map_release(&pwq->wq->lockdep_map); | 2106 | lock_map_release(&pwq->wq->lockdep_map); |
| 2105 | 2107 | ||
| @@ -2474,7 +2476,16 @@ static void insert_wq_barrier(struct pool_workqueue *pwq, | |||
| 2474 | */ | 2476 | */ |
| 2475 | INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); | 2477 | INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); |
| 2476 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); | 2478 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); |
| 2477 | init_completion(&barr->done); | 2479 | |
| 2480 | /* | ||
| 2481 | * Explicitly init the crosslock for wq_barrier::done, make its lock | ||
| 2482 | * key a subkey of the corresponding work. As a result we won't | ||
| 2483 | * build a dependency between wq_barrier::done and unrelated work. | ||
| 2484 | */ | ||
| 2485 | lockdep_init_map_crosslock((struct lockdep_map *)&barr->done.map, | ||
| 2486 | "(complete)wq_barr::done", | ||
| 2487 | target->lockdep_map.key, 1); | ||
| 2488 | __init_completion(&barr->done); | ||
| 2478 | barr->task = current; | 2489 | barr->task = current; |
| 2479 | 2490 | ||
| 2480 | /* | 2491 | /* |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c617b9d1d6cb..e718df3cbd46 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -1088,6 +1088,8 @@ config PROVE_LOCKING | |||
| 1088 | select DEBUG_MUTEXES | 1088 | select DEBUG_MUTEXES |
| 1089 | select DEBUG_RT_MUTEXES if RT_MUTEXES | 1089 | select DEBUG_RT_MUTEXES if RT_MUTEXES |
| 1090 | select DEBUG_LOCK_ALLOC | 1090 | select DEBUG_LOCK_ALLOC |
| 1091 | select LOCKDEP_CROSSRELEASE | ||
| 1092 | select LOCKDEP_COMPLETIONS | ||
| 1091 | select TRACE_IRQFLAGS | 1093 | select TRACE_IRQFLAGS |
| 1092 | default n | 1094 | default n |
| 1093 | help | 1095 | help |
| @@ -1157,6 +1159,22 @@ config LOCK_STAT | |||
| 1157 | CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. | 1159 | CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. |
| 1158 | (CONFIG_LOCKDEP defines "acquire" and "release" events.) | 1160 | (CONFIG_LOCKDEP defines "acquire" and "release" events.) |
| 1159 | 1161 | ||
| 1162 | config LOCKDEP_CROSSRELEASE | ||
| 1163 | bool | ||
| 1164 | help | ||
| 1165 | This makes lockdep work for crosslock which is a lock allowed to | ||
| 1166 | be released in a different context from the acquisition context. | ||
| 1167 | Normally a lock must be released in the context acquiring the lock. | ||
| 1168 | However, relexing this constraint helps synchronization primitives | ||
| 1169 | such as page locks or completions can use the lock correctness | ||
| 1170 | detector, lockdep. | ||
| 1171 | |||
| 1172 | config LOCKDEP_COMPLETIONS | ||
| 1173 | bool | ||
| 1174 | help | ||
| 1175 | A deadlock caused by wait_for_completion() and complete() can be | ||
| 1176 | detected by lockdep using crossrelease feature. | ||
| 1177 | |||
| 1160 | config DEBUG_LOCKDEP | 1178 | config DEBUG_LOCKDEP |
| 1161 | bool "Lock dependency engine debugging" | 1179 | bool "Lock dependency engine debugging" |
| 1162 | depends on DEBUG_KERNEL && LOCKDEP | 1180 | depends on DEBUG_KERNEL && LOCKDEP |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 90731e3b7e58..3644ff918434 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -1510,8 +1510,15 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) | |||
| 1510 | } | 1510 | } |
| 1511 | 1511 | ||
| 1512 | /* | 1512 | /* |
| 1513 | * The page_table_lock above provides a memory barrier | 1513 | * Since we took the NUMA fault, we must have observed the !accessible |
| 1514 | * with change_protection_range. | 1514 | * bit. Make sure all other CPUs agree with that, to avoid them |
| 1515 | * modifying the page we're about to migrate. | ||
| 1516 | * | ||
| 1517 | * Must be done under PTL such that we'll observe the relevant | ||
| 1518 | * inc_tlb_flush_pending(). | ||
| 1519 | * | ||
| 1520 | * We are not sure a pending tlb flush here is for a huge page | ||
| 1521 | * mapping or not. Hence use the tlb range variant | ||
| 1515 | */ | 1522 | */ |
| 1516 | if (mm_tlb_flush_pending(vma->vm_mm)) | 1523 | if (mm_tlb_flush_pending(vma->vm_mm)) |
| 1517 | flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); | 1524 | flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); |
| @@ -1521,6 +1528,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) | |||
| 1521 | * and access rights restored. | 1528 | * and access rights restored. |
| 1522 | */ | 1529 | */ |
| 1523 | spin_unlock(vmf->ptl); | 1530 | spin_unlock(vmf->ptl); |
| 1531 | |||
| 1524 | migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, | 1532 | migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, |
| 1525 | vmf->pmd, pmd, vmf->address, page, target_nid); | 1533 | vmf->pmd, pmd, vmf->address, page, target_nid); |
| 1526 | if (migrated) { | 1534 | if (migrated) { |
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index ca11bc4ce205..6f319fb81718 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c | |||
| @@ -267,13 +267,13 @@ static void check_memory_region(unsigned long addr, | |||
| 267 | check_memory_region_inline(addr, size, write, ret_ip); | 267 | check_memory_region_inline(addr, size, write, ret_ip); |
| 268 | } | 268 | } |
| 269 | 269 | ||
| 270 | void kasan_check_read(const void *p, unsigned int size) | 270 | void kasan_check_read(const volatile void *p, unsigned int size) |
| 271 | { | 271 | { |
| 272 | check_memory_region((unsigned long)p, size, false, _RET_IP_); | 272 | check_memory_region((unsigned long)p, size, false, _RET_IP_); |
| 273 | } | 273 | } |
| 274 | EXPORT_SYMBOL(kasan_check_read); | 274 | EXPORT_SYMBOL(kasan_check_read); |
| 275 | 275 | ||
| 276 | void kasan_check_write(const void *p, unsigned int size) | 276 | void kasan_check_write(const volatile void *p, unsigned int size) |
| 277 | { | 277 | { |
| 278 | check_memory_region((unsigned long)p, size, true, _RET_IP_); | 278 | check_memory_region((unsigned long)p, size, true, _RET_IP_); |
| 279 | } | 279 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1bad301820c7..471b0526b876 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -66,6 +66,7 @@ | |||
| 66 | #include <linux/kthread.h> | 66 | #include <linux/kthread.h> |
| 67 | #include <linux/memcontrol.h> | 67 | #include <linux/memcontrol.h> |
| 68 | #include <linux/ftrace.h> | 68 | #include <linux/ftrace.h> |
| 69 | #include <linux/lockdep.h> | ||
| 69 | 70 | ||
| 70 | #include <asm/sections.h> | 71 | #include <asm/sections.h> |
| 71 | #include <asm/tlbflush.h> | 72 | #include <asm/tlbflush.h> |
| @@ -3494,6 +3495,47 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla | |||
| 3494 | } | 3495 | } |
| 3495 | #endif /* CONFIG_COMPACTION */ | 3496 | #endif /* CONFIG_COMPACTION */ |
| 3496 | 3497 | ||
| 3498 | #ifdef CONFIG_LOCKDEP | ||
| 3499 | struct lockdep_map __fs_reclaim_map = | ||
| 3500 | STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map); | ||
| 3501 | |||
| 3502 | static bool __need_fs_reclaim(gfp_t gfp_mask) | ||
| 3503 | { | ||
| 3504 | gfp_mask = current_gfp_context(gfp_mask); | ||
| 3505 | |||
| 3506 | /* no reclaim without waiting on it */ | ||
| 3507 | if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) | ||
| 3508 | return false; | ||
| 3509 | |||
| 3510 | /* this guy won't enter reclaim */ | ||
| 3511 | if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) | ||
| 3512 | return false; | ||
| 3513 | |||
| 3514 | /* We're only interested __GFP_FS allocations for now */ | ||
| 3515 | if (!(gfp_mask & __GFP_FS)) | ||
| 3516 | return false; | ||
| 3517 | |||
| 3518 | if (gfp_mask & __GFP_NOLOCKDEP) | ||
| 3519 | return false; | ||
| 3520 | |||
| 3521 | return true; | ||
| 3522 | } | ||
| 3523 | |||
| 3524 | void fs_reclaim_acquire(gfp_t gfp_mask) | ||
| 3525 | { | ||
| 3526 | if (__need_fs_reclaim(gfp_mask)) | ||
| 3527 | lock_map_acquire(&__fs_reclaim_map); | ||
| 3528 | } | ||
| 3529 | EXPORT_SYMBOL_GPL(fs_reclaim_acquire); | ||
| 3530 | |||
| 3531 | void fs_reclaim_release(gfp_t gfp_mask) | ||
| 3532 | { | ||
| 3533 | if (__need_fs_reclaim(gfp_mask)) | ||
| 3534 | lock_map_release(&__fs_reclaim_map); | ||
| 3535 | } | ||
| 3536 | EXPORT_SYMBOL_GPL(fs_reclaim_release); | ||
| 3537 | #endif | ||
| 3538 | |||
| 3497 | /* Perform direct synchronous page reclaim */ | 3539 | /* Perform direct synchronous page reclaim */ |
| 3498 | static int | 3540 | static int |
| 3499 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, | 3541 | __perform_reclaim(gfp_t gfp_mask, unsigned int order, |
| @@ -3508,7 +3550,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
| 3508 | /* We now go into synchronous reclaim */ | 3550 | /* We now go into synchronous reclaim */ |
| 3509 | cpuset_memory_pressure_bump(); | 3551 | cpuset_memory_pressure_bump(); |
| 3510 | noreclaim_flag = memalloc_noreclaim_save(); | 3552 | noreclaim_flag = memalloc_noreclaim_save(); |
| 3511 | lockdep_set_current_reclaim_state(gfp_mask); | 3553 | fs_reclaim_acquire(gfp_mask); |
| 3512 | reclaim_state.reclaimed_slab = 0; | 3554 | reclaim_state.reclaimed_slab = 0; |
| 3513 | current->reclaim_state = &reclaim_state; | 3555 | current->reclaim_state = &reclaim_state; |
| 3514 | 3556 | ||
| @@ -3516,7 +3558,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
| 3516 | ac->nodemask); | 3558 | ac->nodemask); |
| 3517 | 3559 | ||
| 3518 | current->reclaim_state = NULL; | 3560 | current->reclaim_state = NULL; |
| 3519 | lockdep_clear_current_reclaim_state(); | 3561 | fs_reclaim_release(gfp_mask); |
| 3520 | memalloc_noreclaim_restore(noreclaim_flag); | 3562 | memalloc_noreclaim_restore(noreclaim_flag); |
| 3521 | 3563 | ||
| 3522 | cond_resched(); | 3564 | cond_resched(); |
| @@ -4045,7 +4087,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
| 4045 | *alloc_flags |= ALLOC_CPUSET; | 4087 | *alloc_flags |= ALLOC_CPUSET; |
| 4046 | } | 4088 | } |
| 4047 | 4089 | ||
| 4048 | lockdep_trace_alloc(gfp_mask); | 4090 | fs_reclaim_acquire(gfp_mask); |
| 4091 | fs_reclaim_release(gfp_mask); | ||
| 4049 | 4092 | ||
| 4050 | might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); | 4093 | might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); |
| 4051 | 4094 | ||
| @@ -43,6 +43,7 @@ struct kmem_cache { | |||
| 43 | #include <linux/kasan.h> | 43 | #include <linux/kasan.h> |
| 44 | #include <linux/kmemleak.h> | 44 | #include <linux/kmemleak.h> |
| 45 | #include <linux/random.h> | 45 | #include <linux/random.h> |
| 46 | #include <linux/sched/mm.h> | ||
| 46 | 47 | ||
| 47 | /* | 48 | /* |
| 48 | * State of the slab allocator. | 49 | * State of the slab allocator. |
| @@ -412,7 +413,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, | |||
| 412 | gfp_t flags) | 413 | gfp_t flags) |
| 413 | { | 414 | { |
| 414 | flags &= gfp_allowed_mask; | 415 | flags &= gfp_allowed_mask; |
| 415 | lockdep_trace_alloc(flags); | 416 | |
| 417 | fs_reclaim_acquire(flags); | ||
| 418 | fs_reclaim_release(flags); | ||
| 419 | |||
| 416 | might_sleep_if(gfpflags_allow_blocking(flags)); | 420 | might_sleep_if(gfpflags_allow_blocking(flags)); |
| 417 | 421 | ||
| 418 | if (should_failslab(s, flags)) | 422 | if (should_failslab(s, flags)) |
| @@ -432,7 +432,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) | |||
| 432 | 432 | ||
| 433 | gfp &= gfp_allowed_mask; | 433 | gfp &= gfp_allowed_mask; |
| 434 | 434 | ||
| 435 | lockdep_trace_alloc(gfp); | 435 | fs_reclaim_acquire(gfp); |
| 436 | fs_reclaim_release(gfp); | ||
| 436 | 437 | ||
| 437 | if (size < PAGE_SIZE - align) { | 438 | if (size < PAGE_SIZE - align) { |
| 438 | if (!size) | 439 | if (!size) |
| @@ -538,7 +539,8 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
| 538 | 539 | ||
| 539 | flags &= gfp_allowed_mask; | 540 | flags &= gfp_allowed_mask; |
| 540 | 541 | ||
| 541 | lockdep_trace_alloc(flags); | 542 | fs_reclaim_acquire(flags); |
| 543 | fs_reclaim_release(flags); | ||
| 542 | 544 | ||
| 543 | if (c->size < PAGE_SIZE) { | 545 | if (c->size < PAGE_SIZE) { |
| 544 | b = slob_alloc(c->size, flags, c->align, node); | 546 | b = slob_alloc(c->size, flags, c->align, node); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a1af041930a6..f957afe900ec 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -3525,8 +3525,6 @@ static int kswapd(void *p) | |||
| 3525 | }; | 3525 | }; |
| 3526 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 3526 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
| 3527 | 3527 | ||
| 3528 | lockdep_set_current_reclaim_state(GFP_KERNEL); | ||
| 3529 | |||
| 3530 | if (!cpumask_empty(cpumask)) | 3528 | if (!cpumask_empty(cpumask)) |
| 3531 | set_cpus_allowed_ptr(tsk, cpumask); | 3529 | set_cpus_allowed_ptr(tsk, cpumask); |
| 3532 | current->reclaim_state = &reclaim_state; | 3530 | current->reclaim_state = &reclaim_state; |
| @@ -3585,14 +3583,15 @@ kswapd_try_sleep: | |||
| 3585 | */ | 3583 | */ |
| 3586 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, | 3584 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, |
| 3587 | alloc_order); | 3585 | alloc_order); |
| 3586 | fs_reclaim_acquire(GFP_KERNEL); | ||
| 3588 | reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); | 3587 | reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); |
| 3588 | fs_reclaim_release(GFP_KERNEL); | ||
| 3589 | if (reclaim_order < alloc_order) | 3589 | if (reclaim_order < alloc_order) |
| 3590 | goto kswapd_try_sleep; | 3590 | goto kswapd_try_sleep; |
| 3591 | } | 3591 | } |
| 3592 | 3592 | ||
| 3593 | tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); | 3593 | tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); |
| 3594 | current->reclaim_state = NULL; | 3594 | current->reclaim_state = NULL; |
| 3595 | lockdep_clear_current_reclaim_state(); | ||
| 3596 | 3595 | ||
| 3597 | return 0; | 3596 | return 0; |
| 3598 | } | 3597 | } |
| @@ -3655,14 +3654,14 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |||
| 3655 | unsigned int noreclaim_flag; | 3654 | unsigned int noreclaim_flag; |
| 3656 | 3655 | ||
| 3657 | noreclaim_flag = memalloc_noreclaim_save(); | 3656 | noreclaim_flag = memalloc_noreclaim_save(); |
| 3658 | lockdep_set_current_reclaim_state(sc.gfp_mask); | 3657 | fs_reclaim_acquire(sc.gfp_mask); |
| 3659 | reclaim_state.reclaimed_slab = 0; | 3658 | reclaim_state.reclaimed_slab = 0; |
| 3660 | p->reclaim_state = &reclaim_state; | 3659 | p->reclaim_state = &reclaim_state; |
| 3661 | 3660 | ||
| 3662 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | 3661 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); |
| 3663 | 3662 | ||
| 3664 | p->reclaim_state = NULL; | 3663 | p->reclaim_state = NULL; |
| 3665 | lockdep_clear_current_reclaim_state(); | 3664 | fs_reclaim_release(sc.gfp_mask); |
| 3666 | memalloc_noreclaim_restore(noreclaim_flag); | 3665 | memalloc_noreclaim_restore(noreclaim_flag); |
| 3667 | 3666 | ||
| 3668 | return nr_reclaimed; | 3667 | return nr_reclaimed; |
| @@ -3847,7 +3846,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
| 3847 | */ | 3846 | */ |
| 3848 | noreclaim_flag = memalloc_noreclaim_save(); | 3847 | noreclaim_flag = memalloc_noreclaim_save(); |
| 3849 | p->flags |= PF_SWAPWRITE; | 3848 | p->flags |= PF_SWAPWRITE; |
| 3850 | lockdep_set_current_reclaim_state(sc.gfp_mask); | 3849 | fs_reclaim_acquire(sc.gfp_mask); |
| 3851 | reclaim_state.reclaimed_slab = 0; | 3850 | reclaim_state.reclaimed_slab = 0; |
| 3852 | p->reclaim_state = &reclaim_state; | 3851 | p->reclaim_state = &reclaim_state; |
| 3853 | 3852 | ||
| @@ -3862,9 +3861,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in | |||
| 3862 | } | 3861 | } |
| 3863 | 3862 | ||
| 3864 | p->reclaim_state = NULL; | 3863 | p->reclaim_state = NULL; |
| 3864 | fs_reclaim_release(gfp_mask); | ||
| 3865 | current->flags &= ~PF_SWAPWRITE; | 3865 | current->flags &= ~PF_SWAPWRITE; |
| 3866 | memalloc_noreclaim_restore(noreclaim_flag); | 3866 | memalloc_noreclaim_restore(noreclaim_flag); |
| 3867 | lockdep_clear_current_reclaim_state(); | ||
| 3868 | return sc.nr_reclaimed >= nr_pages; | 3867 | return sc.nr_reclaimed >= nr_pages; |
| 3869 | } | 3868 | } |
| 3870 | 3869 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cd1d044a7fa5..ebe46ed997cb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -1810,8 +1810,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 1810 | static struct static_key udp_encap_needed __read_mostly; | 1810 | static struct static_key udp_encap_needed __read_mostly; |
| 1811 | void udp_encap_enable(void) | 1811 | void udp_encap_enable(void) |
| 1812 | { | 1812 | { |
| 1813 | if (!static_key_enabled(&udp_encap_needed)) | 1813 | static_key_enable(&udp_encap_needed); |
| 1814 | static_key_slow_inc(&udp_encap_needed); | ||
| 1815 | } | 1814 | } |
| 1816 | EXPORT_SYMBOL(udp_encap_enable); | 1815 | EXPORT_SYMBOL(udp_encap_enable); |
| 1817 | 1816 | ||
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 20039c8501eb..8cd9b628cdc7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
| @@ -575,8 +575,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, | |||
| 575 | static struct static_key udpv6_encap_needed __read_mostly; | 575 | static struct static_key udpv6_encap_needed __read_mostly; |
| 576 | void udpv6_encap_enable(void) | 576 | void udpv6_encap_enable(void) |
| 577 | { | 577 | { |
| 578 | if (!static_key_enabled(&udpv6_encap_needed)) | 578 | static_key_enable(&udpv6_encap_needed); |
| 579 | static_key_slow_inc(&udpv6_encap_needed); | ||
| 580 | } | 579 | } |
| 581 | EXPORT_SYMBOL(udpv6_encap_enable); | 580 | EXPORT_SYMBOL(udpv6_encap_enable); |
| 582 | 581 | ||
diff --git a/scripts/Makefile.build b/scripts/Makefile.build index f6152c70f7f4..a18cb4496e1e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build | |||
| @@ -262,6 +262,9 @@ objtool_args = check | |||
| 262 | ifndef CONFIG_FRAME_POINTER | 262 | ifndef CONFIG_FRAME_POINTER |
| 263 | objtool_args += --no-fp | 263 | objtool_args += --no-fp |
| 264 | endif | 264 | endif |
| 265 | ifdef CONFIG_GCOV_KERNEL | ||
| 266 | objtool_args += --no-unreachable | ||
| 267 | endif | ||
| 265 | 268 | ||
| 266 | # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory | 269 | # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory |
| 267 | # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file | 270 | # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file |
diff --git a/tools/objtool/Build b/tools/objtool/Build index 6f2e1987c4d9..749becdf5b90 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build | |||
| @@ -1,6 +1,9 @@ | |||
| 1 | objtool-y += arch/$(SRCARCH)/ | 1 | objtool-y += arch/$(SRCARCH)/ |
| 2 | objtool-y += builtin-check.o | 2 | objtool-y += builtin-check.o |
| 3 | objtool-y += builtin-orc.o | ||
| 3 | objtool-y += check.o | 4 | objtool-y += check.o |
| 5 | objtool-y += orc_gen.o | ||
| 6 | objtool-y += orc_dump.o | ||
| 4 | objtool-y += elf.o | 7 | objtool-y += elf.o |
| 5 | objtool-y += special.o | 8 | objtool-y += special.o |
| 6 | objtool-y += objtool.o | 9 | objtool-y += objtool.o |
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 17c1195f11f4..6a1af43862df 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt | |||
| @@ -11,9 +11,6 @@ analyzes every .o file and ensures the validity of its stack metadata. | |||
| 11 | It enforces a set of rules on asm code and C inline assembly code so | 11 | It enforces a set of rules on asm code and C inline assembly code so |
| 12 | that stack traces can be reliable. | 12 | that stack traces can be reliable. |
| 13 | 13 | ||
| 14 | Currently it only checks frame pointer usage, but there are plans to add | ||
| 15 | CFI validation for C files and CFI generation for asm files. | ||
| 16 | |||
| 17 | For each function, it recursively follows all possible code paths and | 14 | For each function, it recursively follows all possible code paths and |
| 18 | validates the correct frame pointer state at each instruction. | 15 | validates the correct frame pointer state at each instruction. |
| 19 | 16 | ||
| @@ -23,6 +20,10 @@ alternative execution paths to a given instruction (or set of | |||
| 23 | instructions). Similarly, it knows how to follow switch statements, for | 20 | instructions). Similarly, it knows how to follow switch statements, for |
| 24 | which gcc sometimes uses jump tables. | 21 | which gcc sometimes uses jump tables. |
| 25 | 22 | ||
| 23 | (Objtool also has an 'orc generate' subcommand which generates debuginfo | ||
| 24 | for the ORC unwinder. See Documentation/x86/orc-unwinder.txt in the | ||
| 25 | kernel tree for more details.) | ||
| 26 | |||
| 26 | 27 | ||
| 27 | Why do we need stack metadata validation? | 28 | Why do we need stack metadata validation? |
| 28 | ----------------------------------------- | 29 | ----------------------------------------- |
| @@ -93,37 +94,14 @@ a) More reliable stack traces for frame pointer enabled kernels | |||
| 93 | or at the very end of the function after the stack frame has been | 94 | or at the very end of the function after the stack frame has been |
| 94 | destroyed. This is an inherent limitation of frame pointers. | 95 | destroyed. This is an inherent limitation of frame pointers. |
| 95 | 96 | ||
| 96 | b) 100% reliable stack traces for DWARF enabled kernels | 97 | b) ORC (Oops Rewind Capability) unwind table generation |
| 97 | |||
| 98 | (NOTE: This is not yet implemented) | ||
| 99 | |||
| 100 | As an alternative to frame pointers, DWARF Call Frame Information | ||
| 101 | (CFI) metadata can be used to walk the stack. Unlike frame pointers, | ||
| 102 | CFI metadata is out of band. So it doesn't affect runtime | ||
| 103 | performance and it can be reliable even when interrupts or exceptions | ||
| 104 | are involved. | ||
| 105 | |||
| 106 | For C code, gcc automatically generates DWARF CFI metadata. But for | ||
| 107 | asm code, generating CFI is a tedious manual approach which requires | ||
| 108 | manually placed .cfi assembler macros to be scattered throughout the | ||
| 109 | code. It's clumsy and very easy to get wrong, and it makes the real | ||
| 110 | code harder to read. | ||
| 111 | |||
| 112 | Stacktool will improve this situation in several ways. For code | ||
| 113 | which already has CFI annotations, it will validate them. For code | ||
| 114 | which doesn't have CFI annotations, it will generate them. So an | ||
| 115 | architecture can opt to strip out all the manual .cfi annotations | ||
| 116 | from their asm code and have objtool generate them instead. | ||
| 117 | 98 | ||
| 118 | We might also add a runtime stack validation debug option where we | 99 | An alternative to frame pointers and DWARF, ORC unwind data can be |
| 119 | periodically walk the stack from schedule() and/or an NMI to ensure | 100 | used to walk the stack. Unlike frame pointers, ORC data is out of |
| 120 | that the stack metadata is sane and that we reach the bottom of the | 101 | band. So it doesn't affect runtime performance and it can be |
| 121 | stack. | 102 | reliable even when interrupts or exceptions are involved. |
| 122 | 103 | ||
| 123 | So the benefit of objtool here will be that external tooling should | 104 | For more details, see Documentation/x86/orc-unwinder.txt. |
| 124 | always show perfect stack traces. And the same will be true for | ||
| 125 | kernel warning/oops traces if the architecture has a runtime DWARF | ||
| 126 | unwinder. | ||
| 127 | 105 | ||
| 128 | c) Higher live patching compatibility rate | 106 | c) Higher live patching compatibility rate |
| 129 | 107 | ||
| @@ -211,7 +189,7 @@ they mean, and suggestions for how to fix them. | |||
| 211 | function, add proper frame pointer logic using the FRAME_BEGIN and | 189 | function, add proper frame pointer logic using the FRAME_BEGIN and |
| 212 | FRAME_END macros. Otherwise, if it's not a callable function, remove | 190 | FRAME_END macros. Otherwise, if it's not a callable function, remove |
| 213 | its ELF function annotation by changing ENDPROC to END, and instead | 191 | its ELF function annotation by changing ENDPROC to END, and instead |
| 214 | use the manual CFI hint macros in asm/undwarf.h. | 192 | use the manual unwind hint macros in asm/unwind_hints.h. |
| 215 | 193 | ||
| 216 | If it's a GCC-compiled .c file, the error may be because the function | 194 | If it's a GCC-compiled .c file, the error may be because the function |
| 217 | uses an inline asm() statement which has a "call" instruction. An | 195 | uses an inline asm() statement which has a "call" instruction. An |
| @@ -231,8 +209,8 @@ they mean, and suggestions for how to fix them. | |||
| 231 | If the error is for an asm file, and the instruction is inside (or | 209 | If the error is for an asm file, and the instruction is inside (or |
| 232 | reachable from) a callable function, the function should be annotated | 210 | reachable from) a callable function, the function should be annotated |
| 233 | with the ENTRY/ENDPROC macros (ENDPROC is the important one). | 211 | with the ENTRY/ENDPROC macros (ENDPROC is the important one). |
| 234 | Otherwise, the code should probably be annotated with the CFI hint | 212 | Otherwise, the code should probably be annotated with the unwind hint |
| 235 | macros in asm/undwarf.h so objtool and the unwinder can know the | 213 | macros in asm/unwind_hints.h so objtool and the unwinder can know the |
| 236 | stack state associated with the code. | 214 | stack state associated with the code. |
| 237 | 215 | ||
| 238 | If you're 100% sure the code won't affect stack traces, or if you're | 216 | If you're 100% sure the code won't affect stack traces, or if you're |
| @@ -258,7 +236,7 @@ they mean, and suggestions for how to fix them. | |||
| 258 | instructions aren't allowed in a callable function, and are most | 236 | instructions aren't allowed in a callable function, and are most |
| 259 | likely part of the kernel entry code. They should usually not have | 237 | likely part of the kernel entry code. They should usually not have |
| 260 | the callable function annotation (ENDPROC) and should always be | 238 | the callable function annotation (ENDPROC) and should always be |
| 261 | annotated with the CFI hint macros in asm/undwarf.h. | 239 | annotated with the unwind hint macros in asm/unwind_hints.h. |
| 262 | 240 | ||
| 263 | 241 | ||
| 264 | 6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame | 242 | 6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame |
| @@ -272,7 +250,7 @@ they mean, and suggestions for how to fix them. | |||
| 272 | 250 | ||
| 273 | If the instruction is not actually in a callable function (e.g. | 251 | If the instruction is not actually in a callable function (e.g. |
| 274 | kernel entry code), change ENDPROC to END and annotate manually with | 252 | kernel entry code), change ENDPROC to END and annotate manually with |
| 275 | the CFI hint macros in asm/undwarf.h. | 253 | the unwind hint macros in asm/unwind_hints.h. |
| 276 | 254 | ||
| 277 | 255 | ||
| 278 | 7. file: warning: objtool: func()+0x5c: stack state mismatch | 256 | 7. file: warning: objtool: func()+0x5c: stack state mismatch |
| @@ -288,8 +266,8 @@ they mean, and suggestions for how to fix them. | |||
| 288 | 266 | ||
| 289 | Another possibility is that the code has some asm or inline asm which | 267 | Another possibility is that the code has some asm or inline asm which |
| 290 | does some unusual things to the stack or the frame pointer. In such | 268 | does some unusual things to the stack or the frame pointer. In such |
| 291 | cases it's probably appropriate to use the CFI hint macros in | 269 | cases it's probably appropriate to use the unwind hint macros in |
| 292 | asm/undwarf.h. | 270 | asm/unwind_hints.h. |
| 293 | 271 | ||
| 294 | 272 | ||
| 295 | 8. file.o: warning: objtool: funcA() falls through to next function funcB() | 273 | 8. file.o: warning: objtool: funcA() falls through to next function funcB() |
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 0e2765e243c0..3a6425fefc43 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile | |||
| @@ -52,6 +52,9 @@ $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) | |||
| 52 | diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ | 52 | diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ |
| 53 | diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ | 53 | diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ |
| 54 | || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true | 54 | || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true |
| 55 | @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \ | ||
| 56 | diff ../../arch/x86/include/asm/orc_types.h orc_types.h >/dev/null) \ | ||
| 57 | || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true | ||
| 55 | $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ | 58 | $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ |
| 56 | 59 | ||
| 57 | 60 | ||
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 365c34ecab26..57254f5b2779 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | #include "builtin.h" | 29 | #include "builtin.h" |
| 30 | #include "check.h" | 30 | #include "check.h" |
| 31 | 31 | ||
| 32 | bool nofp; | 32 | bool no_fp, no_unreachable; |
| 33 | 33 | ||
| 34 | static const char * const check_usage[] = { | 34 | static const char * const check_usage[] = { |
| 35 | "objtool check [<options>] file.o", | 35 | "objtool check [<options>] file.o", |
| @@ -37,7 +37,8 @@ static const char * const check_usage[] = { | |||
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | const struct option check_options[] = { | 39 | const struct option check_options[] = { |
| 40 | OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"), | 40 | OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), |
| 41 | OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), | ||
| 41 | OPT_END(), | 42 | OPT_END(), |
| 42 | }; | 43 | }; |
| 43 | 44 | ||
| @@ -52,5 +53,5 @@ int cmd_check(int argc, const char **argv) | |||
| 52 | 53 | ||
| 53 | objname = argv[0]; | 54 | objname = argv[0]; |
| 54 | 55 | ||
| 55 | return check(objname, nofp); | 56 | return check(objname, no_fp, no_unreachable, false); |
| 56 | } | 57 | } |
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c new file mode 100644 index 000000000000..4c6b5c9ef073 --- /dev/null +++ b/tools/objtool/builtin-orc.c | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version 2 | ||
| 7 | * of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | /* | ||
| 19 | * objtool orc: | ||
| 20 | * | ||
| 21 | * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip | ||
| 22 | * sections to it, which is used by the in-kernel ORC unwinder. | ||
| 23 | * | ||
| 24 | * This command is a superset of "objtool check". | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <string.h> | ||
| 28 | #include <subcmd/parse-options.h> | ||
| 29 | #include "builtin.h" | ||
| 30 | #include "check.h" | ||
| 31 | |||
| 32 | |||
| 33 | static const char *orc_usage[] = { | ||
| 34 | "objtool orc generate [<options>] file.o", | ||
| 35 | "objtool orc dump file.o", | ||
| 36 | NULL, | ||
| 37 | }; | ||
| 38 | |||
| 39 | extern const struct option check_options[]; | ||
| 40 | extern bool no_fp, no_unreachable; | ||
| 41 | |||
| 42 | int cmd_orc(int argc, const char **argv) | ||
| 43 | { | ||
| 44 | const char *objname; | ||
| 45 | |||
| 46 | argc--; argv++; | ||
| 47 | if (!strncmp(argv[0], "gen", 3)) { | ||
| 48 | argc = parse_options(argc, argv, check_options, orc_usage, 0); | ||
| 49 | if (argc != 1) | ||
| 50 | usage_with_options(orc_usage, check_options); | ||
| 51 | |||
| 52 | objname = argv[0]; | ||
| 53 | |||
| 54 | return check(objname, no_fp, no_unreachable, true); | ||
| 55 | |||
| 56 | } | ||
| 57 | |||
| 58 | if (!strcmp(argv[0], "dump")) { | ||
| 59 | if (argc != 2) | ||
| 60 | usage_with_options(orc_usage, check_options); | ||
| 61 | |||
| 62 | objname = argv[1]; | ||
| 63 | |||
| 64 | return orc_dump(objname); | ||
| 65 | } | ||
| 66 | |||
| 67 | usage_with_options(orc_usage, check_options); | ||
| 68 | |||
| 69 | return 0; | ||
| 70 | } | ||
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index 34d2ba78a616..dd526067fed5 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h | |||
| @@ -18,5 +18,6 @@ | |||
| 18 | #define _BUILTIN_H | 18 | #define _BUILTIN_H |
| 19 | 19 | ||
| 20 | extern int cmd_check(int argc, const char **argv); | 20 | extern int cmd_check(int argc, const char **argv); |
| 21 | extern int cmd_orc(int argc, const char **argv); | ||
| 21 | 22 | ||
| 22 | #endif /* _BUILTIN_H */ | 23 | #endif /* _BUILTIN_H */ |
diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2c6d74880403..3436a942b606 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c | |||
| @@ -33,11 +33,11 @@ struct alternative { | |||
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | const char *objname; | 35 | const char *objname; |
| 36 | static bool nofp; | 36 | static bool no_fp; |
| 37 | struct cfi_state initial_func_cfi; | 37 | struct cfi_state initial_func_cfi; |
| 38 | 38 | ||
| 39 | static struct instruction *find_insn(struct objtool_file *file, | 39 | struct instruction *find_insn(struct objtool_file *file, |
| 40 | struct section *sec, unsigned long offset) | 40 | struct section *sec, unsigned long offset) |
| 41 | { | 41 | { |
| 42 | struct instruction *insn; | 42 | struct instruction *insn; |
| 43 | 43 | ||
| @@ -59,19 +59,6 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, | |||
| 59 | return next; | 59 | return next; |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static bool gcov_enabled(struct objtool_file *file) | ||
| 63 | { | ||
| 64 | struct section *sec; | ||
| 65 | struct symbol *sym; | ||
| 66 | |||
| 67 | for_each_sec(file, sec) | ||
| 68 | list_for_each_entry(sym, &sec->symbol_list, list) | ||
| 69 | if (!strncmp(sym->name, "__gcov_.", 8)) | ||
| 70 | return true; | ||
| 71 | |||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 75 | #define func_for_each_insn(file, func, insn) \ | 62 | #define func_for_each_insn(file, func, insn) \ |
| 76 | for (insn = find_insn(file, func->sec, func->offset); \ | 63 | for (insn = find_insn(file, func->sec, func->offset); \ |
| 77 | insn && &insn->list != &file->insn_list && \ | 64 | insn && &insn->list != &file->insn_list && \ |
| @@ -100,7 +87,6 @@ static bool gcov_enabled(struct objtool_file *file) | |||
| 100 | static bool ignore_func(struct objtool_file *file, struct symbol *func) | 87 | static bool ignore_func(struct objtool_file *file, struct symbol *func) |
| 101 | { | 88 | { |
| 102 | struct rela *rela; | 89 | struct rela *rela; |
| 103 | struct instruction *insn; | ||
| 104 | 90 | ||
| 105 | /* check for STACK_FRAME_NON_STANDARD */ | 91 | /* check for STACK_FRAME_NON_STANDARD */ |
| 106 | if (file->whitelist && file->whitelist->rela) | 92 | if (file->whitelist && file->whitelist->rela) |
| @@ -113,11 +99,6 @@ static bool ignore_func(struct objtool_file *file, struct symbol *func) | |||
| 113 | return true; | 99 | return true; |
| 114 | } | 100 | } |
| 115 | 101 | ||
| 116 | /* check if it has a context switching instruction */ | ||
| 117 | func_for_each_insn(file, func, insn) | ||
| 118 | if (insn->type == INSN_CONTEXT_SWITCH) | ||
| 119 | return true; | ||
| 120 | |||
| 121 | return false; | 102 | return false; |
| 122 | } | 103 | } |
| 123 | 104 | ||
| @@ -259,6 +240,11 @@ static int decode_instructions(struct objtool_file *file) | |||
| 259 | if (!(sec->sh.sh_flags & SHF_EXECINSTR)) | 240 | if (!(sec->sh.sh_flags & SHF_EXECINSTR)) |
| 260 | continue; | 241 | continue; |
| 261 | 242 | ||
| 243 | if (strcmp(sec->name, ".altinstr_replacement") && | ||
| 244 | strcmp(sec->name, ".altinstr_aux") && | ||
| 245 | strncmp(sec->name, ".discard.", 9)) | ||
| 246 | sec->text = true; | ||
| 247 | |||
| 262 | for (offset = 0; offset < sec->len; offset += insn->len) { | 248 | for (offset = 0; offset < sec->len; offset += insn->len) { |
| 263 | insn = malloc(sizeof(*insn)); | 249 | insn = malloc(sizeof(*insn)); |
| 264 | if (!insn) { | 250 | if (!insn) { |
| @@ -874,6 +860,99 @@ static int add_switch_table_alts(struct objtool_file *file) | |||
| 874 | return 0; | 860 | return 0; |
| 875 | } | 861 | } |
| 876 | 862 | ||
| 863 | static int read_unwind_hints(struct objtool_file *file) | ||
| 864 | { | ||
| 865 | struct section *sec, *relasec; | ||
| 866 | struct rela *rela; | ||
| 867 | struct unwind_hint *hint; | ||
| 868 | struct instruction *insn; | ||
| 869 | struct cfi_reg *cfa; | ||
| 870 | int i; | ||
| 871 | |||
| 872 | sec = find_section_by_name(file->elf, ".discard.unwind_hints"); | ||
| 873 | if (!sec) | ||
| 874 | return 0; | ||
| 875 | |||
| 876 | relasec = sec->rela; | ||
| 877 | if (!relasec) { | ||
| 878 | WARN("missing .rela.discard.unwind_hints section"); | ||
| 879 | return -1; | ||
| 880 | } | ||
| 881 | |||
| 882 | if (sec->len % sizeof(struct unwind_hint)) { | ||
| 883 | WARN("struct unwind_hint size mismatch"); | ||
| 884 | return -1; | ||
| 885 | } | ||
| 886 | |||
| 887 | file->hints = true; | ||
| 888 | |||
| 889 | for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) { | ||
| 890 | hint = (struct unwind_hint *)sec->data->d_buf + i; | ||
| 891 | |||
| 892 | rela = find_rela_by_dest(sec, i * sizeof(*hint)); | ||
| 893 | if (!rela) { | ||
| 894 | WARN("can't find rela for unwind_hints[%d]", i); | ||
| 895 | return -1; | ||
| 896 | } | ||
| 897 | |||
| 898 | insn = find_insn(file, rela->sym->sec, rela->addend); | ||
| 899 | if (!insn) { | ||
| 900 | WARN("can't find insn for unwind_hints[%d]", i); | ||
| 901 | return -1; | ||
| 902 | } | ||
| 903 | |||
| 904 | cfa = &insn->state.cfa; | ||
| 905 | |||
| 906 | if (hint->type == UNWIND_HINT_TYPE_SAVE) { | ||
| 907 | insn->save = true; | ||
| 908 | continue; | ||
| 909 | |||
| 910 | } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) { | ||
| 911 | insn->restore = true; | ||
| 912 | insn->hint = true; | ||
| 913 | continue; | ||
| 914 | } | ||
| 915 | |||
| 916 | insn->hint = true; | ||
| 917 | |||
| 918 | switch (hint->sp_reg) { | ||
| 919 | case ORC_REG_UNDEFINED: | ||
| 920 | cfa->base = CFI_UNDEFINED; | ||
| 921 | break; | ||
| 922 | case ORC_REG_SP: | ||
| 923 | cfa->base = CFI_SP; | ||
| 924 | break; | ||
| 925 | case ORC_REG_BP: | ||
| 926 | cfa->base = CFI_BP; | ||
| 927 | break; | ||
| 928 | case ORC_REG_SP_INDIRECT: | ||
| 929 | cfa->base = CFI_SP_INDIRECT; | ||
| 930 | break; | ||
| 931 | case ORC_REG_R10: | ||
| 932 | cfa->base = CFI_R10; | ||
| 933 | break; | ||
| 934 | case ORC_REG_R13: | ||
| 935 | cfa->base = CFI_R13; | ||
| 936 | break; | ||
| 937 | case ORC_REG_DI: | ||
| 938 | cfa->base = CFI_DI; | ||
| 939 | break; | ||
| 940 | case ORC_REG_DX: | ||
| 941 | cfa->base = CFI_DX; | ||
| 942 | break; | ||
| 943 | default: | ||
| 944 | WARN_FUNC("unsupported unwind_hint sp base reg %d", | ||
| 945 | insn->sec, insn->offset, hint->sp_reg); | ||
| 946 | return -1; | ||
| 947 | } | ||
| 948 | |||
| 949 | cfa->offset = hint->sp_offset; | ||
| 950 | insn->state.type = hint->type; | ||
| 951 | } | ||
| 952 | |||
| 953 | return 0; | ||
| 954 | } | ||
| 955 | |||
| 877 | static int decode_sections(struct objtool_file *file) | 956 | static int decode_sections(struct objtool_file *file) |
| 878 | { | 957 | { |
| 879 | int ret; | 958 | int ret; |
| @@ -904,6 +983,10 @@ static int decode_sections(struct objtool_file *file) | |||
| 904 | if (ret) | 983 | if (ret) |
| 905 | return ret; | 984 | return ret; |
| 906 | 985 | ||
| 986 | ret = read_unwind_hints(file); | ||
| 987 | if (ret) | ||
| 988 | return ret; | ||
| 989 | |||
| 907 | return 0; | 990 | return 0; |
| 908 | } | 991 | } |
| 909 | 992 | ||
| @@ -947,6 +1030,30 @@ static bool has_valid_stack_frame(struct insn_state *state) | |||
| 947 | return false; | 1030 | return false; |
| 948 | } | 1031 | } |
| 949 | 1032 | ||
| 1033 | static int update_insn_state_regs(struct instruction *insn, struct insn_state *state) | ||
| 1034 | { | ||
| 1035 | struct cfi_reg *cfa = &state->cfa; | ||
| 1036 | struct stack_op *op = &insn->stack_op; | ||
| 1037 | |||
| 1038 | if (cfa->base != CFI_SP) | ||
| 1039 | return 0; | ||
| 1040 | |||
| 1041 | /* push */ | ||
| 1042 | if (op->dest.type == OP_DEST_PUSH) | ||
| 1043 | cfa->offset += 8; | ||
| 1044 | |||
| 1045 | /* pop */ | ||
| 1046 | if (op->src.type == OP_SRC_POP) | ||
| 1047 | cfa->offset -= 8; | ||
| 1048 | |||
| 1049 | /* add immediate to sp */ | ||
| 1050 | if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD && | ||
| 1051 | op->dest.reg == CFI_SP && op->src.reg == CFI_SP) | ||
| 1052 | cfa->offset -= op->src.offset; | ||
| 1053 | |||
| 1054 | return 0; | ||
| 1055 | } | ||
| 1056 | |||
| 950 | static void save_reg(struct insn_state *state, unsigned char reg, int base, | 1057 | static void save_reg(struct insn_state *state, unsigned char reg, int base, |
| 951 | int offset) | 1058 | int offset) |
| 952 | { | 1059 | { |
| @@ -1032,6 +1139,9 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) | |||
| 1032 | return 0; | 1139 | return 0; |
| 1033 | } | 1140 | } |
| 1034 | 1141 | ||
| 1142 | if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET) | ||
| 1143 | return update_insn_state_regs(insn, state); | ||
| 1144 | |||
| 1035 | switch (op->dest.type) { | 1145 | switch (op->dest.type) { |
| 1036 | 1146 | ||
| 1037 | case OP_DEST_REG: | 1147 | case OP_DEST_REG: |
| @@ -1051,7 +1161,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) | |||
| 1051 | regs[CFI_BP].base = CFI_BP; | 1161 | regs[CFI_BP].base = CFI_BP; |
| 1052 | regs[CFI_BP].offset = -state->stack_size; | 1162 | regs[CFI_BP].offset = -state->stack_size; |
| 1053 | state->bp_scratch = false; | 1163 | state->bp_scratch = false; |
| 1054 | } else if (!nofp) { | 1164 | } else if (!no_fp) { |
| 1055 | 1165 | ||
| 1056 | WARN_FUNC("unknown stack-related register move", | 1166 | WARN_FUNC("unknown stack-related register move", |
| 1057 | insn->sec, insn->offset); | 1167 | insn->sec, insn->offset); |
| @@ -1222,7 +1332,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) | |||
| 1222 | } | 1332 | } |
| 1223 | 1333 | ||
| 1224 | /* detect when asm code uses rbp as a scratch register */ | 1334 | /* detect when asm code uses rbp as a scratch register */ |
| 1225 | if (!nofp && insn->func && op->src.reg == CFI_BP && | 1335 | if (!no_fp && insn->func && op->src.reg == CFI_BP && |
| 1226 | cfa->base != CFI_BP) | 1336 | cfa->base != CFI_BP) |
| 1227 | state->bp_scratch = true; | 1337 | state->bp_scratch = true; |
| 1228 | break; | 1338 | break; |
| @@ -1323,6 +1433,10 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state) | |||
| 1323 | break; | 1433 | break; |
| 1324 | } | 1434 | } |
| 1325 | 1435 | ||
| 1436 | } else if (state1->type != state2->type) { | ||
| 1437 | WARN_FUNC("stack state mismatch: type1=%d type2=%d", | ||
| 1438 | insn->sec, insn->offset, state1->type, state2->type); | ||
| 1439 | |||
| 1326 | } else if (state1->drap != state2->drap || | 1440 | } else if (state1->drap != state2->drap || |
| 1327 | (state1->drap && state1->drap_reg != state2->drap_reg)) { | 1441 | (state1->drap && state1->drap_reg != state2->drap_reg)) { |
| 1328 | WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)", | 1442 | WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)", |
| @@ -1346,7 +1460,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
| 1346 | struct insn_state state) | 1460 | struct insn_state state) |
| 1347 | { | 1461 | { |
| 1348 | struct alternative *alt; | 1462 | struct alternative *alt; |
| 1349 | struct instruction *insn; | 1463 | struct instruction *insn, *next_insn; |
| 1350 | struct section *sec; | 1464 | struct section *sec; |
| 1351 | struct symbol *func = NULL; | 1465 | struct symbol *func = NULL; |
| 1352 | int ret; | 1466 | int ret; |
| @@ -1361,6 +1475,8 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
| 1361 | } | 1475 | } |
| 1362 | 1476 | ||
| 1363 | while (1) { | 1477 | while (1) { |
| 1478 | next_insn = next_insn_same_sec(file, insn); | ||
| 1479 | |||
| 1364 | if (file->c_file && insn->func) { | 1480 | if (file->c_file && insn->func) { |
| 1365 | if (func && func != insn->func) { | 1481 | if (func && func != insn->func) { |
| 1366 | WARN("%s() falls through to next function %s()", | 1482 | WARN("%s() falls through to next function %s()", |
| @@ -1378,13 +1494,54 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
| 1378 | } | 1494 | } |
| 1379 | 1495 | ||
| 1380 | if (insn->visited) { | 1496 | if (insn->visited) { |
| 1381 | if (!!insn_state_match(insn, &state)) | 1497 | if (!insn->hint && !insn_state_match(insn, &state)) |
| 1382 | return 1; | 1498 | return 1; |
| 1383 | 1499 | ||
| 1384 | return 0; | 1500 | return 0; |
| 1385 | } | 1501 | } |
| 1386 | 1502 | ||
| 1387 | insn->state = state; | 1503 | if (insn->hint) { |
| 1504 | if (insn->restore) { | ||
| 1505 | struct instruction *save_insn, *i; | ||
| 1506 | |||
| 1507 | i = insn; | ||
| 1508 | save_insn = NULL; | ||
| 1509 | func_for_each_insn_continue_reverse(file, func, i) { | ||
| 1510 | if (i->save) { | ||
| 1511 | save_insn = i; | ||
| 1512 | break; | ||
| 1513 | } | ||
| 1514 | } | ||
| 1515 | |||
| 1516 | if (!save_insn) { | ||
| 1517 | WARN_FUNC("no corresponding CFI save for CFI restore", | ||
| 1518 | sec, insn->offset); | ||
| 1519 | return 1; | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | if (!save_insn->visited) { | ||
| 1523 | /* | ||
| 1524 | * Oops, no state to copy yet. | ||
| 1525 | * Hopefully we can reach this | ||
| 1526 | * instruction from another branch | ||
| 1527 | * after the save insn has been | ||
| 1528 | * visited. | ||
| 1529 | */ | ||
| 1530 | if (insn == first) | ||
| 1531 | return 0; | ||
| 1532 | |||
| 1533 | WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", | ||
| 1534 | sec, insn->offset); | ||
| 1535 | return 1; | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | insn->state = save_insn->state; | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | state = insn->state; | ||
| 1542 | |||
| 1543 | } else | ||
| 1544 | insn->state = state; | ||
| 1388 | 1545 | ||
| 1389 | insn->visited = true; | 1546 | insn->visited = true; |
| 1390 | 1547 | ||
| @@ -1423,7 +1580,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
| 1423 | 1580 | ||
| 1424 | /* fallthrough */ | 1581 | /* fallthrough */ |
| 1425 | case INSN_CALL_DYNAMIC: | 1582 | case INSN_CALL_DYNAMIC: |
| 1426 | if (!nofp && func && !has_valid_stack_frame(&state)) { | 1583 | if (!no_fp && func && !has_valid_stack_frame(&state)) { |
| 1427 | WARN_FUNC("call without frame pointer save/setup", | 1584 | WARN_FUNC("call without frame pointer save/setup", |
| 1428 | sec, insn->offset); | 1585 | sec, insn->offset); |
| 1429 | return 1; | 1586 | return 1; |
| @@ -1461,6 +1618,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
| 1461 | 1618 | ||
| 1462 | return 0; | 1619 | return 0; |
| 1463 | 1620 | ||
| 1621 | case INSN_CONTEXT_SWITCH: | ||
| 1622 | if (func && (!next_insn || !next_insn->hint)) { | ||
| 1623 | WARN_FUNC("unsupported instruction in callable function", | ||
| 1624 | sec, insn->offset); | ||
| 1625 | return 1; | ||
| 1626 | } | ||
| 1627 | return 0; | ||
| 1628 | |||
| 1464 | case INSN_STACK: | 1629 | case INSN_STACK: |
| 1465 | if (update_insn_state(insn, &state)) | 1630 | if (update_insn_state(insn, &state)) |
| 1466 | return -1; | 1631 | return -1; |
| @@ -1474,7 +1639,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
| 1474 | if (insn->dead_end) | 1639 | if (insn->dead_end) |
| 1475 | return 0; | 1640 | return 0; |
| 1476 | 1641 | ||
| 1477 | insn = next_insn_same_sec(file, insn); | 1642 | insn = next_insn; |
| 1478 | if (!insn) { | 1643 | if (!insn) { |
| 1479 | WARN("%s: unexpected end of section", sec->name); | 1644 | WARN("%s: unexpected end of section", sec->name); |
| 1480 | return 1; | 1645 | return 1; |
| @@ -1484,6 +1649,27 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | |||
| 1484 | return 0; | 1649 | return 0; |
| 1485 | } | 1650 | } |
| 1486 | 1651 | ||
| 1652 | static int validate_unwind_hints(struct objtool_file *file) | ||
| 1653 | { | ||
| 1654 | struct instruction *insn; | ||
| 1655 | int ret, warnings = 0; | ||
| 1656 | struct insn_state state; | ||
| 1657 | |||
| 1658 | if (!file->hints) | ||
| 1659 | return 0; | ||
| 1660 | |||
| 1661 | clear_insn_state(&state); | ||
| 1662 | |||
| 1663 | for_each_insn(file, insn) { | ||
| 1664 | if (insn->hint && !insn->visited) { | ||
| 1665 | ret = validate_branch(file, insn, state); | ||
| 1666 | warnings += ret; | ||
| 1667 | } | ||
| 1668 | } | ||
| 1669 | |||
| 1670 | return warnings; | ||
| 1671 | } | ||
| 1672 | |||
| 1487 | static bool is_kasan_insn(struct instruction *insn) | 1673 | static bool is_kasan_insn(struct instruction *insn) |
| 1488 | { | 1674 | { |
| 1489 | return (insn->type == INSN_CALL && | 1675 | return (insn->type == INSN_CALL && |
| @@ -1580,15 +1766,6 @@ static int validate_reachable_instructions(struct objtool_file *file) | |||
| 1580 | if (insn->visited || ignore_unreachable_insn(insn)) | 1766 | if (insn->visited || ignore_unreachable_insn(insn)) |
| 1581 | continue; | 1767 | continue; |
| 1582 | 1768 | ||
| 1583 | /* | ||
| 1584 | * gcov produces a lot of unreachable instructions. If we get | ||
| 1585 | * an unreachable warning and the file has gcov enabled, just | ||
| 1586 | * ignore it, and all other such warnings for the file. Do | ||
| 1587 | * this here because this is an expensive function. | ||
| 1588 | */ | ||
| 1589 | if (gcov_enabled(file)) | ||
| 1590 | return 0; | ||
| 1591 | |||
| 1592 | WARN_FUNC("unreachable instruction", insn->sec, insn->offset); | 1769 | WARN_FUNC("unreachable instruction", insn->sec, insn->offset); |
| 1593 | return 1; | 1770 | return 1; |
| 1594 | } | 1771 | } |
| @@ -1613,15 +1790,15 @@ static void cleanup(struct objtool_file *file) | |||
| 1613 | elf_close(file->elf); | 1790 | elf_close(file->elf); |
| 1614 | } | 1791 | } |
| 1615 | 1792 | ||
| 1616 | int check(const char *_objname, bool _nofp) | 1793 | int check(const char *_objname, bool _no_fp, bool no_unreachable, bool orc) |
| 1617 | { | 1794 | { |
| 1618 | struct objtool_file file; | 1795 | struct objtool_file file; |
| 1619 | int ret, warnings = 0; | 1796 | int ret, warnings = 0; |
| 1620 | 1797 | ||
| 1621 | objname = _objname; | 1798 | objname = _objname; |
| 1622 | nofp = _nofp; | 1799 | no_fp = _no_fp; |
| 1623 | 1800 | ||
| 1624 | file.elf = elf_open(objname); | 1801 | file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); |
| 1625 | if (!file.elf) | 1802 | if (!file.elf) |
| 1626 | return 1; | 1803 | return 1; |
| 1627 | 1804 | ||
| @@ -1629,8 +1806,9 @@ int check(const char *_objname, bool _nofp) | |||
| 1629 | hash_init(file.insn_hash); | 1806 | hash_init(file.insn_hash); |
| 1630 | file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); | 1807 | file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); |
| 1631 | file.rodata = find_section_by_name(file.elf, ".rodata"); | 1808 | file.rodata = find_section_by_name(file.elf, ".rodata"); |
| 1632 | file.ignore_unreachables = false; | ||
| 1633 | file.c_file = find_section_by_name(file.elf, ".comment"); | 1809 | file.c_file = find_section_by_name(file.elf, ".comment"); |
| 1810 | file.ignore_unreachables = no_unreachable; | ||
| 1811 | file.hints = false; | ||
| 1634 | 1812 | ||
| 1635 | arch_initial_func_cfi_state(&initial_func_cfi); | 1813 | arch_initial_func_cfi_state(&initial_func_cfi); |
| 1636 | 1814 | ||
| @@ -1647,6 +1825,11 @@ int check(const char *_objname, bool _nofp) | |||
| 1647 | goto out; | 1825 | goto out; |
| 1648 | warnings += ret; | 1826 | warnings += ret; |
| 1649 | 1827 | ||
| 1828 | ret = validate_unwind_hints(&file); | ||
| 1829 | if (ret < 0) | ||
| 1830 | goto out; | ||
| 1831 | warnings += ret; | ||
| 1832 | |||
| 1650 | if (!warnings) { | 1833 | if (!warnings) { |
| 1651 | ret = validate_reachable_instructions(&file); | 1834 | ret = validate_reachable_instructions(&file); |
| 1652 | if (ret < 0) | 1835 | if (ret < 0) |
| @@ -1654,6 +1837,20 @@ int check(const char *_objname, bool _nofp) | |||
| 1654 | warnings += ret; | 1837 | warnings += ret; |
| 1655 | } | 1838 | } |
| 1656 | 1839 | ||
| 1840 | if (orc) { | ||
| 1841 | ret = create_orc(&file); | ||
| 1842 | if (ret < 0) | ||
| 1843 | goto out; | ||
| 1844 | |||
| 1845 | ret = create_orc_sections(&file); | ||
| 1846 | if (ret < 0) | ||
| 1847 | goto out; | ||
| 1848 | |||
| 1849 | ret = elf_write(file.elf); | ||
| 1850 | if (ret < 0) | ||
| 1851 | goto out; | ||
| 1852 | } | ||
| 1853 | |||
| 1657 | out: | 1854 | out: |
| 1658 | cleanup(&file); | 1855 | cleanup(&file); |
| 1659 | 1856 | ||
diff --git a/tools/objtool/check.h b/tools/objtool/check.h index da85f5b00ec6..c9af11f0c8af 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h | |||
| @@ -22,12 +22,14 @@ | |||
| 22 | #include "elf.h" | 22 | #include "elf.h" |
| 23 | #include "cfi.h" | 23 | #include "cfi.h" |
| 24 | #include "arch.h" | 24 | #include "arch.h" |
| 25 | #include "orc.h" | ||
| 25 | #include <linux/hashtable.h> | 26 | #include <linux/hashtable.h> |
| 26 | 27 | ||
| 27 | struct insn_state { | 28 | struct insn_state { |
| 28 | struct cfi_reg cfa; | 29 | struct cfi_reg cfa; |
| 29 | struct cfi_reg regs[CFI_NUM_REGS]; | 30 | struct cfi_reg regs[CFI_NUM_REGS]; |
| 30 | int stack_size; | 31 | int stack_size; |
| 32 | unsigned char type; | ||
| 31 | bool bp_scratch; | 33 | bool bp_scratch; |
| 32 | bool drap; | 34 | bool drap; |
| 33 | int drap_reg; | 35 | int drap_reg; |
| @@ -41,13 +43,14 @@ struct instruction { | |||
| 41 | unsigned int len; | 43 | unsigned int len; |
| 42 | unsigned char type; | 44 | unsigned char type; |
| 43 | unsigned long immediate; | 45 | unsigned long immediate; |
| 44 | bool alt_group, visited, dead_end, ignore; | 46 | bool alt_group, visited, dead_end, ignore, hint, save, restore; |
| 45 | struct symbol *call_dest; | 47 | struct symbol *call_dest; |
| 46 | struct instruction *jump_dest; | 48 | struct instruction *jump_dest; |
| 47 | struct list_head alts; | 49 | struct list_head alts; |
| 48 | struct symbol *func; | 50 | struct symbol *func; |
| 49 | struct stack_op stack_op; | 51 | struct stack_op stack_op; |
| 50 | struct insn_state state; | 52 | struct insn_state state; |
| 53 | struct orc_entry orc; | ||
| 51 | }; | 54 | }; |
| 52 | 55 | ||
| 53 | struct objtool_file { | 56 | struct objtool_file { |
| @@ -55,12 +58,22 @@ struct objtool_file { | |||
| 55 | struct list_head insn_list; | 58 | struct list_head insn_list; |
| 56 | DECLARE_HASHTABLE(insn_hash, 16); | 59 | DECLARE_HASHTABLE(insn_hash, 16); |
| 57 | struct section *rodata, *whitelist; | 60 | struct section *rodata, *whitelist; |
| 58 | bool ignore_unreachables, c_file; | 61 | bool ignore_unreachables, c_file, hints; |
| 59 | }; | 62 | }; |
| 60 | 63 | ||
| 61 | int check(const char *objname, bool nofp); | 64 | int check(const char *objname, bool no_fp, bool no_unreachable, bool orc); |
| 65 | |||
| 66 | struct instruction *find_insn(struct objtool_file *file, | ||
| 67 | struct section *sec, unsigned long offset); | ||
| 62 | 68 | ||
| 63 | #define for_each_insn(file, insn) \ | 69 | #define for_each_insn(file, insn) \ |
| 64 | list_for_each_entry(insn, &file->insn_list, list) | 70 | list_for_each_entry(insn, &file->insn_list, list) |
| 65 | 71 | ||
| 72 | #define sec_for_each_insn(file, sec, insn) \ | ||
| 73 | for (insn = find_insn(file, sec, 0); \ | ||
| 74 | insn && &insn->list != &file->insn_list && \ | ||
| 75 | insn->sec == sec; \ | ||
| 76 | insn = list_next_entry(insn, list)) | ||
| 77 | |||
| 78 | |||
| 66 | #endif /* _CHECK_H */ | 79 | #endif /* _CHECK_H */ |
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 1a7e8aa2af58..6e9f980a7d26 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c | |||
| @@ -30,16 +30,6 @@ | |||
| 30 | #include "elf.h" | 30 | #include "elf.h" |
| 31 | #include "warn.h" | 31 | #include "warn.h" |
| 32 | 32 | ||
| 33 | /* | ||
| 34 | * Fallback for systems without this "read, mmaping if possible" cmd. | ||
| 35 | */ | ||
| 36 | #ifndef ELF_C_READ_MMAP | ||
| 37 | #define ELF_C_READ_MMAP ELF_C_READ | ||
| 38 | #endif | ||
| 39 | |||
| 40 | #define WARN_ELF(format, ...) \ | ||
| 41 | WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) | ||
| 42 | |||
| 43 | struct section *find_section_by_name(struct elf *elf, const char *name) | 33 | struct section *find_section_by_name(struct elf *elf, const char *name) |
| 44 | { | 34 | { |
| 45 | struct section *sec; | 35 | struct section *sec; |
| @@ -349,9 +339,10 @@ static int read_relas(struct elf *elf) | |||
| 349 | return 0; | 339 | return 0; |
| 350 | } | 340 | } |
| 351 | 341 | ||
| 352 | struct elf *elf_open(const char *name) | 342 | struct elf *elf_open(const char *name, int flags) |
| 353 | { | 343 | { |
| 354 | struct elf *elf; | 344 | struct elf *elf; |
| 345 | Elf_Cmd cmd; | ||
| 355 | 346 | ||
| 356 | elf_version(EV_CURRENT); | 347 | elf_version(EV_CURRENT); |
| 357 | 348 | ||
| @@ -364,13 +355,20 @@ struct elf *elf_open(const char *name) | |||
| 364 | 355 | ||
| 365 | INIT_LIST_HEAD(&elf->sections); | 356 | INIT_LIST_HEAD(&elf->sections); |
| 366 | 357 | ||
| 367 | elf->fd = open(name, O_RDONLY); | 358 | elf->fd = open(name, flags); |
| 368 | if (elf->fd == -1) { | 359 | if (elf->fd == -1) { |
| 369 | perror("open"); | 360 | perror("open"); |
| 370 | goto err; | 361 | goto err; |
| 371 | } | 362 | } |
| 372 | 363 | ||
| 373 | elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL); | 364 | if ((flags & O_ACCMODE) == O_RDONLY) |
| 365 | cmd = ELF_C_READ_MMAP; | ||
| 366 | else if ((flags & O_ACCMODE) == O_RDWR) | ||
| 367 | cmd = ELF_C_RDWR; | ||
| 368 | else /* O_WRONLY */ | ||
| 369 | cmd = ELF_C_WRITE; | ||
| 370 | |||
| 371 | elf->elf = elf_begin(elf->fd, cmd, NULL); | ||
| 374 | if (!elf->elf) { | 372 | if (!elf->elf) { |
| 375 | WARN_ELF("elf_begin"); | 373 | WARN_ELF("elf_begin"); |
| 376 | goto err; | 374 | goto err; |
| @@ -397,6 +395,194 @@ err: | |||
| 397 | return NULL; | 395 | return NULL; |
| 398 | } | 396 | } |
| 399 | 397 | ||
| 398 | struct section *elf_create_section(struct elf *elf, const char *name, | ||
| 399 | size_t entsize, int nr) | ||
| 400 | { | ||
| 401 | struct section *sec, *shstrtab; | ||
| 402 | size_t size = entsize * nr; | ||
| 403 | struct Elf_Scn *s; | ||
| 404 | Elf_Data *data; | ||
| 405 | |||
| 406 | sec = malloc(sizeof(*sec)); | ||
| 407 | if (!sec) { | ||
| 408 | perror("malloc"); | ||
| 409 | return NULL; | ||
| 410 | } | ||
| 411 | memset(sec, 0, sizeof(*sec)); | ||
| 412 | |||
| 413 | INIT_LIST_HEAD(&sec->symbol_list); | ||
| 414 | INIT_LIST_HEAD(&sec->rela_list); | ||
| 415 | hash_init(sec->rela_hash); | ||
| 416 | hash_init(sec->symbol_hash); | ||
| 417 | |||
| 418 | list_add_tail(&sec->list, &elf->sections); | ||
| 419 | |||
| 420 | s = elf_newscn(elf->elf); | ||
| 421 | if (!s) { | ||
| 422 | WARN_ELF("elf_newscn"); | ||
| 423 | return NULL; | ||
| 424 | } | ||
| 425 | |||
| 426 | sec->name = strdup(name); | ||
| 427 | if (!sec->name) { | ||
| 428 | perror("strdup"); | ||
| 429 | return NULL; | ||
| 430 | } | ||
| 431 | |||
| 432 | sec->idx = elf_ndxscn(s); | ||
| 433 | sec->len = size; | ||
| 434 | sec->changed = true; | ||
| 435 | |||
| 436 | sec->data = elf_newdata(s); | ||
| 437 | if (!sec->data) { | ||
| 438 | WARN_ELF("elf_newdata"); | ||
| 439 | return NULL; | ||
| 440 | } | ||
| 441 | |||
| 442 | sec->data->d_size = size; | ||
| 443 | sec->data->d_align = 1; | ||
| 444 | |||
| 445 | if (size) { | ||
| 446 | sec->data->d_buf = malloc(size); | ||
| 447 | if (!sec->data->d_buf) { | ||
| 448 | perror("malloc"); | ||
| 449 | return NULL; | ||
| 450 | } | ||
| 451 | memset(sec->data->d_buf, 0, size); | ||
| 452 | } | ||
| 453 | |||
| 454 | if (!gelf_getshdr(s, &sec->sh)) { | ||
| 455 | WARN_ELF("gelf_getshdr"); | ||
| 456 | return NULL; | ||
| 457 | } | ||
| 458 | |||
| 459 | sec->sh.sh_size = size; | ||
| 460 | sec->sh.sh_entsize = entsize; | ||
| 461 | sec->sh.sh_type = SHT_PROGBITS; | ||
| 462 | sec->sh.sh_addralign = 1; | ||
| 463 | sec->sh.sh_flags = SHF_ALLOC; | ||
| 464 | |||
| 465 | |||
| 466 | /* Add section name to .shstrtab */ | ||
| 467 | shstrtab = find_section_by_name(elf, ".shstrtab"); | ||
| 468 | if (!shstrtab) { | ||
| 469 | WARN("can't find .shstrtab section"); | ||
| 470 | return NULL; | ||
| 471 | } | ||
| 472 | |||
| 473 | s = elf_getscn(elf->elf, shstrtab->idx); | ||
| 474 | if (!s) { | ||
| 475 | WARN_ELF("elf_getscn"); | ||
| 476 | return NULL; | ||
| 477 | } | ||
| 478 | |||
| 479 | data = elf_newdata(s); | ||
| 480 | if (!data) { | ||
| 481 | WARN_ELF("elf_newdata"); | ||
| 482 | return NULL; | ||
| 483 | } | ||
| 484 | |||
| 485 | data->d_buf = sec->name; | ||
| 486 | data->d_size = strlen(name) + 1; | ||
| 487 | data->d_align = 1; | ||
| 488 | |||
| 489 | sec->sh.sh_name = shstrtab->len; | ||
| 490 | |||
| 491 | shstrtab->len += strlen(name) + 1; | ||
| 492 | shstrtab->changed = true; | ||
| 493 | |||
| 494 | return sec; | ||
| 495 | } | ||
| 496 | |||
| 497 | struct section *elf_create_rela_section(struct elf *elf, struct section *base) | ||
| 498 | { | ||
| 499 | char *relaname; | ||
| 500 | struct section *sec; | ||
| 501 | |||
| 502 | relaname = malloc(strlen(base->name) + strlen(".rela") + 1); | ||
| 503 | if (!relaname) { | ||
| 504 | perror("malloc"); | ||
| 505 | return NULL; | ||
| 506 | } | ||
| 507 | strcpy(relaname, ".rela"); | ||
| 508 | strcat(relaname, base->name); | ||
| 509 | |||
| 510 | sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0); | ||
| 511 | if (!sec) | ||
| 512 | return NULL; | ||
| 513 | |||
| 514 | base->rela = sec; | ||
| 515 | sec->base = base; | ||
| 516 | |||
| 517 | sec->sh.sh_type = SHT_RELA; | ||
| 518 | sec->sh.sh_addralign = 8; | ||
| 519 | sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx; | ||
| 520 | sec->sh.sh_info = base->idx; | ||
| 521 | sec->sh.sh_flags = SHF_INFO_LINK; | ||
| 522 | |||
| 523 | return sec; | ||
| 524 | } | ||
| 525 | |||
| 526 | int elf_rebuild_rela_section(struct section *sec) | ||
| 527 | { | ||
| 528 | struct rela *rela; | ||
| 529 | int nr, idx = 0, size; | ||
| 530 | GElf_Rela *relas; | ||
| 531 | |||
| 532 | nr = 0; | ||
| 533 | list_for_each_entry(rela, &sec->rela_list, list) | ||
| 534 | nr++; | ||
| 535 | |||
| 536 | size = nr * sizeof(*relas); | ||
| 537 | relas = malloc(size); | ||
| 538 | if (!relas) { | ||
| 539 | perror("malloc"); | ||
| 540 | return -1; | ||
| 541 | } | ||
| 542 | |||
| 543 | sec->data->d_buf = relas; | ||
| 544 | sec->data->d_size = size; | ||
| 545 | |||
| 546 | sec->sh.sh_size = size; | ||
| 547 | |||
| 548 | idx = 0; | ||
| 549 | list_for_each_entry(rela, &sec->rela_list, list) { | ||
| 550 | relas[idx].r_offset = rela->offset; | ||
| 551 | relas[idx].r_addend = rela->addend; | ||
| 552 | relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type); | ||
| 553 | idx++; | ||
| 554 | } | ||
| 555 | |||
| 556 | return 0; | ||
| 557 | } | ||
| 558 | |||
| 559 | int elf_write(struct elf *elf) | ||
| 560 | { | ||
| 561 | struct section *sec; | ||
| 562 | Elf_Scn *s; | ||
| 563 | |||
| 564 | list_for_each_entry(sec, &elf->sections, list) { | ||
| 565 | if (sec->changed) { | ||
| 566 | s = elf_getscn(elf->elf, sec->idx); | ||
| 567 | if (!s) { | ||
| 568 | WARN_ELF("elf_getscn"); | ||
| 569 | return -1; | ||
| 570 | } | ||
| 571 | if (!gelf_update_shdr (s, &sec->sh)) { | ||
| 572 | WARN_ELF("gelf_update_shdr"); | ||
| 573 | return -1; | ||
| 574 | } | ||
| 575 | } | ||
| 576 | } | ||
| 577 | |||
| 578 | if (elf_update(elf->elf, ELF_C_WRITE) < 0) { | ||
| 579 | WARN_ELF("elf_update"); | ||
| 580 | return -1; | ||
| 581 | } | ||
| 582 | |||
| 583 | return 0; | ||
| 584 | } | ||
| 585 | |||
| 400 | void elf_close(struct elf *elf) | 586 | void elf_close(struct elf *elf) |
| 401 | { | 587 | { |
| 402 | struct section *sec, *tmpsec; | 588 | struct section *sec, *tmpsec; |
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 343968b778cb..d86e2ff14466 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h | |||
| @@ -28,6 +28,13 @@ | |||
| 28 | # define elf_getshdrstrndx elf_getshstrndx | 28 | # define elf_getshdrstrndx elf_getshstrndx |
| 29 | #endif | 29 | #endif |
| 30 | 30 | ||
| 31 | /* | ||
| 32 | * Fallback for systems without this "read, mmaping if possible" cmd. | ||
| 33 | */ | ||
| 34 | #ifndef ELF_C_READ_MMAP | ||
| 35 | #define ELF_C_READ_MMAP ELF_C_READ | ||
| 36 | #endif | ||
| 37 | |||
| 31 | struct section { | 38 | struct section { |
| 32 | struct list_head list; | 39 | struct list_head list; |
| 33 | GElf_Shdr sh; | 40 | GElf_Shdr sh; |
| @@ -41,6 +48,7 @@ struct section { | |||
| 41 | char *name; | 48 | char *name; |
| 42 | int idx; | 49 | int idx; |
| 43 | unsigned int len; | 50 | unsigned int len; |
| 51 | bool changed, text; | ||
| 44 | }; | 52 | }; |
| 45 | 53 | ||
| 46 | struct symbol { | 54 | struct symbol { |
| @@ -75,7 +83,7 @@ struct elf { | |||
| 75 | }; | 83 | }; |
| 76 | 84 | ||
| 77 | 85 | ||
| 78 | struct elf *elf_open(const char *name); | 86 | struct elf *elf_open(const char *name, int flags); |
| 79 | struct section *find_section_by_name(struct elf *elf, const char *name); | 87 | struct section *find_section_by_name(struct elf *elf, const char *name); |
| 80 | struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); | 88 | struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); |
| 81 | struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); | 89 | struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); |
| @@ -83,6 +91,11 @@ struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); | |||
| 83 | struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, | 91 | struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, |
| 84 | unsigned int len); | 92 | unsigned int len); |
| 85 | struct symbol *find_containing_func(struct section *sec, unsigned long offset); | 93 | struct symbol *find_containing_func(struct section *sec, unsigned long offset); |
| 94 | struct section *elf_create_section(struct elf *elf, const char *name, size_t | ||
| 95 | entsize, int nr); | ||
| 96 | struct section *elf_create_rela_section(struct elf *elf, struct section *base); | ||
| 97 | int elf_rebuild_rela_section(struct section *sec); | ||
| 98 | int elf_write(struct elf *elf); | ||
| 86 | void elf_close(struct elf *elf); | 99 | void elf_close(struct elf *elf); |
| 87 | 100 | ||
| 88 | #define for_each_sec(file, sec) \ | 101 | #define for_each_sec(file, sec) \ |
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index ecc5b1b5d15d..31e0f9143840 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c | |||
| @@ -42,10 +42,11 @@ struct cmd_struct { | |||
| 42 | }; | 42 | }; |
| 43 | 43 | ||
| 44 | static const char objtool_usage_string[] = | 44 | static const char objtool_usage_string[] = |
| 45 | "objtool [OPTIONS] COMMAND [ARGS]"; | 45 | "objtool COMMAND [ARGS]"; |
| 46 | 46 | ||
| 47 | static struct cmd_struct objtool_cmds[] = { | 47 | static struct cmd_struct objtool_cmds[] = { |
| 48 | {"check", cmd_check, "Perform stack metadata validation on an object file" }, | 48 | {"check", cmd_check, "Perform stack metadata validation on an object file" }, |
| 49 | {"orc", cmd_orc, "Generate in-place ORC unwind tables for an object file" }, | ||
| 49 | }; | 50 | }; |
| 50 | 51 | ||
| 51 | bool help; | 52 | bool help; |
diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h new file mode 100644 index 000000000000..a4139e386ef3 --- /dev/null +++ b/tools/objtool/orc.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version 2 | ||
| 7 | * of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #ifndef _ORC_H | ||
| 19 | #define _ORC_H | ||
| 20 | |||
| 21 | #include "orc_types.h" | ||
| 22 | |||
| 23 | struct objtool_file; | ||
| 24 | |||
| 25 | int create_orc(struct objtool_file *file); | ||
| 26 | int create_orc_sections(struct objtool_file *file); | ||
| 27 | |||
| 28 | int orc_dump(const char *objname); | ||
| 29 | |||
| 30 | #endif /* _ORC_H */ | ||
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c new file mode 100644 index 000000000000..36c5bf6a2675 --- /dev/null +++ b/tools/objtool/orc_dump.c | |||
| @@ -0,0 +1,212 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version 2 | ||
| 7 | * of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <unistd.h> | ||
| 19 | #include "orc.h" | ||
| 20 | #include "warn.h" | ||
| 21 | |||
| 22 | static const char *reg_name(unsigned int reg) | ||
| 23 | { | ||
| 24 | switch (reg) { | ||
| 25 | case ORC_REG_PREV_SP: | ||
| 26 | return "prevsp"; | ||
| 27 | case ORC_REG_DX: | ||
| 28 | return "dx"; | ||
| 29 | case ORC_REG_DI: | ||
| 30 | return "di"; | ||
| 31 | case ORC_REG_BP: | ||
| 32 | return "bp"; | ||
| 33 | case ORC_REG_SP: | ||
| 34 | return "sp"; | ||
| 35 | case ORC_REG_R10: | ||
| 36 | return "r10"; | ||
| 37 | case ORC_REG_R13: | ||
| 38 | return "r13"; | ||
| 39 | case ORC_REG_BP_INDIRECT: | ||
| 40 | return "bp(ind)"; | ||
| 41 | case ORC_REG_SP_INDIRECT: | ||
| 42 | return "sp(ind)"; | ||
| 43 | default: | ||
| 44 | return "?"; | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | static const char *orc_type_name(unsigned int type) | ||
| 49 | { | ||
| 50 | switch (type) { | ||
| 51 | case ORC_TYPE_CALL: | ||
| 52 | return "call"; | ||
| 53 | case ORC_TYPE_REGS: | ||
| 54 | return "regs"; | ||
| 55 | case ORC_TYPE_REGS_IRET: | ||
| 56 | return "iret"; | ||
| 57 | default: | ||
| 58 | return "?"; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | static void print_reg(unsigned int reg, int offset) | ||
| 63 | { | ||
| 64 | if (reg == ORC_REG_BP_INDIRECT) | ||
| 65 | printf("(bp%+d)", offset); | ||
| 66 | else if (reg == ORC_REG_SP_INDIRECT) | ||
| 67 | printf("(sp%+d)", offset); | ||
| 68 | else if (reg == ORC_REG_UNDEFINED) | ||
| 69 | printf("(und)"); | ||
| 70 | else | ||
| 71 | printf("%s%+d", reg_name(reg), offset); | ||
| 72 | } | ||
| 73 | |||
| 74 | int orc_dump(const char *_objname) | ||
| 75 | { | ||
| 76 | int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0; | ||
| 77 | struct orc_entry *orc = NULL; | ||
| 78 | char *name; | ||
| 79 | unsigned long nr_sections, orc_ip_addr = 0; | ||
| 80 | size_t shstrtab_idx; | ||
| 81 | Elf *elf; | ||
| 82 | Elf_Scn *scn; | ||
| 83 | GElf_Shdr sh; | ||
| 84 | GElf_Rela rela; | ||
| 85 | GElf_Sym sym; | ||
| 86 | Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL; | ||
| 87 | |||
| 88 | |||
| 89 | objname = _objname; | ||
| 90 | |||
| 91 | elf_version(EV_CURRENT); | ||
| 92 | |||
| 93 | fd = open(objname, O_RDONLY); | ||
| 94 | if (fd == -1) { | ||
| 95 | perror("open"); | ||
| 96 | return -1; | ||
| 97 | } | ||
| 98 | |||
| 99 | elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); | ||
| 100 | if (!elf) { | ||
| 101 | WARN_ELF("elf_begin"); | ||
| 102 | return -1; | ||
| 103 | } | ||
| 104 | |||
| 105 | if (elf_getshdrnum(elf, &nr_sections)) { | ||
| 106 | WARN_ELF("elf_getshdrnum"); | ||
| 107 | return -1; | ||
| 108 | } | ||
| 109 | |||
| 110 | if (elf_getshdrstrndx(elf, &shstrtab_idx)) { | ||
| 111 | WARN_ELF("elf_getshdrstrndx"); | ||
| 112 | return -1; | ||
| 113 | } | ||
| 114 | |||
| 115 | for (i = 0; i < nr_sections; i++) { | ||
| 116 | scn = elf_getscn(elf, i); | ||
| 117 | if (!scn) { | ||
| 118 | WARN_ELF("elf_getscn"); | ||
| 119 | return -1; | ||
| 120 | } | ||
| 121 | |||
| 122 | if (!gelf_getshdr(scn, &sh)) { | ||
| 123 | WARN_ELF("gelf_getshdr"); | ||
| 124 | return -1; | ||
| 125 | } | ||
| 126 | |||
| 127 | name = elf_strptr(elf, shstrtab_idx, sh.sh_name); | ||
| 128 | if (!name) { | ||
| 129 | WARN_ELF("elf_strptr"); | ||
| 130 | return -1; | ||
| 131 | } | ||
| 132 | |||
| 133 | data = elf_getdata(scn, NULL); | ||
| 134 | if (!data) { | ||
| 135 | WARN_ELF("elf_getdata"); | ||
| 136 | return -1; | ||
| 137 | } | ||
| 138 | |||
| 139 | if (!strcmp(name, ".symtab")) { | ||
| 140 | symtab = data; | ||
| 141 | } else if (!strcmp(name, ".orc_unwind")) { | ||
| 142 | orc = data->d_buf; | ||
| 143 | orc_size = sh.sh_size; | ||
| 144 | } else if (!strcmp(name, ".orc_unwind_ip")) { | ||
| 145 | orc_ip = data->d_buf; | ||
| 146 | orc_ip_addr = sh.sh_addr; | ||
| 147 | } else if (!strcmp(name, ".rela.orc_unwind_ip")) { | ||
| 148 | rela_orc_ip = data; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | if (!symtab || !orc || !orc_ip) | ||
| 153 | return 0; | ||
| 154 | |||
| 155 | if (orc_size % sizeof(*orc) != 0) { | ||
| 156 | WARN("bad .orc_unwind section size"); | ||
| 157 | return -1; | ||
| 158 | } | ||
| 159 | |||
| 160 | nr_entries = orc_size / sizeof(*orc); | ||
| 161 | for (i = 0; i < nr_entries; i++) { | ||
| 162 | if (rela_orc_ip) { | ||
| 163 | if (!gelf_getrela(rela_orc_ip, i, &rela)) { | ||
| 164 | WARN_ELF("gelf_getrela"); | ||
| 165 | return -1; | ||
| 166 | } | ||
| 167 | |||
| 168 | if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) { | ||
| 169 | WARN_ELF("gelf_getsym"); | ||
| 170 | return -1; | ||
| 171 | } | ||
| 172 | |||
| 173 | scn = elf_getscn(elf, sym.st_shndx); | ||
| 174 | if (!scn) { | ||
| 175 | WARN_ELF("elf_getscn"); | ||
| 176 | return -1; | ||
| 177 | } | ||
| 178 | |||
| 179 | if (!gelf_getshdr(scn, &sh)) { | ||
| 180 | WARN_ELF("gelf_getshdr"); | ||
| 181 | return -1; | ||
| 182 | } | ||
| 183 | |||
| 184 | name = elf_strptr(elf, shstrtab_idx, sh.sh_name); | ||
| 185 | if (!name || !*name) { | ||
| 186 | WARN_ELF("elf_strptr"); | ||
| 187 | return -1; | ||
| 188 | } | ||
| 189 | |||
| 190 | printf("%s+%lx:", name, rela.r_addend); | ||
| 191 | |||
| 192 | } else { | ||
| 193 | printf("%lx:", orc_ip_addr + (i * sizeof(int)) + orc_ip[i]); | ||
| 194 | } | ||
| 195 | |||
| 196 | |||
| 197 | printf(" sp:"); | ||
| 198 | |||
| 199 | print_reg(orc[i].sp_reg, orc[i].sp_offset); | ||
| 200 | |||
| 201 | printf(" bp:"); | ||
| 202 | |||
| 203 | print_reg(orc[i].bp_reg, orc[i].bp_offset); | ||
| 204 | |||
| 205 | printf(" type:%s\n", orc_type_name(orc[i].type)); | ||
| 206 | } | ||
| 207 | |||
| 208 | elf_end(elf); | ||
| 209 | close(fd); | ||
| 210 | |||
| 211 | return 0; | ||
| 212 | } | ||
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c new file mode 100644 index 000000000000..e5ca31429c9b --- /dev/null +++ b/tools/objtool/orc_gen.c | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version 2 | ||
| 7 | * of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <stdlib.h> | ||
| 19 | #include <string.h> | ||
| 20 | |||
| 21 | #include "orc.h" | ||
| 22 | #include "check.h" | ||
| 23 | #include "warn.h" | ||
| 24 | |||
| 25 | int create_orc(struct objtool_file *file) | ||
| 26 | { | ||
| 27 | struct instruction *insn; | ||
| 28 | |||
| 29 | for_each_insn(file, insn) { | ||
| 30 | struct orc_entry *orc = &insn->orc; | ||
| 31 | struct cfi_reg *cfa = &insn->state.cfa; | ||
| 32 | struct cfi_reg *bp = &insn->state.regs[CFI_BP]; | ||
| 33 | |||
| 34 | if (cfa->base == CFI_UNDEFINED) { | ||
| 35 | orc->sp_reg = ORC_REG_UNDEFINED; | ||
| 36 | continue; | ||
| 37 | } | ||
| 38 | |||
| 39 | switch (cfa->base) { | ||
| 40 | case CFI_SP: | ||
| 41 | orc->sp_reg = ORC_REG_SP; | ||
| 42 | break; | ||
| 43 | case CFI_SP_INDIRECT: | ||
| 44 | orc->sp_reg = ORC_REG_SP_INDIRECT; | ||
| 45 | break; | ||
| 46 | case CFI_BP: | ||
| 47 | orc->sp_reg = ORC_REG_BP; | ||
| 48 | break; | ||
| 49 | case CFI_BP_INDIRECT: | ||
| 50 | orc->sp_reg = ORC_REG_BP_INDIRECT; | ||
| 51 | break; | ||
| 52 | case CFI_R10: | ||
| 53 | orc->sp_reg = ORC_REG_R10; | ||
| 54 | break; | ||
| 55 | case CFI_R13: | ||
| 56 | orc->sp_reg = ORC_REG_R13; | ||
| 57 | break; | ||
| 58 | case CFI_DI: | ||
| 59 | orc->sp_reg = ORC_REG_DI; | ||
| 60 | break; | ||
| 61 | case CFI_DX: | ||
| 62 | orc->sp_reg = ORC_REG_DX; | ||
| 63 | break; | ||
| 64 | default: | ||
| 65 | WARN_FUNC("unknown CFA base reg %d", | ||
| 66 | insn->sec, insn->offset, cfa->base); | ||
| 67 | return -1; | ||
| 68 | } | ||
| 69 | |||
| 70 | switch(bp->base) { | ||
| 71 | case CFI_UNDEFINED: | ||
| 72 | orc->bp_reg = ORC_REG_UNDEFINED; | ||
| 73 | break; | ||
| 74 | case CFI_CFA: | ||
| 75 | orc->bp_reg = ORC_REG_PREV_SP; | ||
| 76 | break; | ||
| 77 | case CFI_BP: | ||
| 78 | orc->bp_reg = ORC_REG_BP; | ||
| 79 | break; | ||
| 80 | default: | ||
| 81 | WARN_FUNC("unknown BP base reg %d", | ||
| 82 | insn->sec, insn->offset, bp->base); | ||
| 83 | return -1; | ||
| 84 | } | ||
| 85 | |||
| 86 | orc->sp_offset = cfa->offset; | ||
| 87 | orc->bp_offset = bp->offset; | ||
| 88 | orc->type = insn->state.type; | ||
| 89 | } | ||
| 90 | |||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, | ||
| 95 | unsigned int idx, struct section *insn_sec, | ||
| 96 | unsigned long insn_off, struct orc_entry *o) | ||
| 97 | { | ||
| 98 | struct orc_entry *orc; | ||
| 99 | struct rela *rela; | ||
| 100 | |||
| 101 | /* populate ORC data */ | ||
| 102 | orc = (struct orc_entry *)u_sec->data->d_buf + idx; | ||
| 103 | memcpy(orc, o, sizeof(*orc)); | ||
| 104 | |||
| 105 | /* populate rela for ip */ | ||
| 106 | rela = malloc(sizeof(*rela)); | ||
| 107 | if (!rela) { | ||
| 108 | perror("malloc"); | ||
| 109 | return -1; | ||
| 110 | } | ||
| 111 | memset(rela, 0, sizeof(*rela)); | ||
| 112 | |||
| 113 | rela->sym = insn_sec->sym; | ||
| 114 | rela->addend = insn_off; | ||
| 115 | rela->type = R_X86_64_PC32; | ||
| 116 | rela->offset = idx * sizeof(int); | ||
| 117 | |||
| 118 | list_add_tail(&rela->list, &ip_relasec->rela_list); | ||
| 119 | hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset); | ||
| 120 | |||
| 121 | return 0; | ||
| 122 | } | ||
| 123 | |||
| 124 | int create_orc_sections(struct objtool_file *file) | ||
| 125 | { | ||
| 126 | struct instruction *insn, *prev_insn; | ||
| 127 | struct section *sec, *u_sec, *ip_relasec; | ||
| 128 | unsigned int idx; | ||
| 129 | |||
| 130 | struct orc_entry empty = { | ||
| 131 | .sp_reg = ORC_REG_UNDEFINED, | ||
| 132 | .bp_reg = ORC_REG_UNDEFINED, | ||
| 133 | .type = ORC_TYPE_CALL, | ||
| 134 | }; | ||
| 135 | |||
| 136 | sec = find_section_by_name(file->elf, ".orc_unwind"); | ||
| 137 | if (sec) { | ||
| 138 | WARN("file already has .orc_unwind section, skipping"); | ||
| 139 | return -1; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* count the number of needed orcs */ | ||
| 143 | idx = 0; | ||
| 144 | for_each_sec(file, sec) { | ||
| 145 | if (!sec->text) | ||
| 146 | continue; | ||
| 147 | |||
| 148 | prev_insn = NULL; | ||
| 149 | sec_for_each_insn(file, sec, insn) { | ||
| 150 | if (!prev_insn || | ||
| 151 | memcmp(&insn->orc, &prev_insn->orc, | ||
| 152 | sizeof(struct orc_entry))) { | ||
| 153 | idx++; | ||
| 154 | } | ||
| 155 | prev_insn = insn; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* section terminator */ | ||
| 159 | if (prev_insn) | ||
| 160 | idx++; | ||
| 161 | } | ||
| 162 | if (!idx) | ||
| 163 | return -1; | ||
| 164 | |||
| 165 | |||
| 166 | /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ | ||
| 167 | sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); | ||
| 168 | |||
| 169 | ip_relasec = elf_create_rela_section(file->elf, sec); | ||
| 170 | if (!ip_relasec) | ||
| 171 | return -1; | ||
| 172 | |||
| 173 | /* create .orc_unwind section */ | ||
| 174 | u_sec = elf_create_section(file->elf, ".orc_unwind", | ||
| 175 | sizeof(struct orc_entry), idx); | ||
| 176 | |||
| 177 | /* populate sections */ | ||
| 178 | idx = 0; | ||
| 179 | for_each_sec(file, sec) { | ||
| 180 | if (!sec->text) | ||
| 181 | continue; | ||
| 182 | |||
| 183 | prev_insn = NULL; | ||
| 184 | sec_for_each_insn(file, sec, insn) { | ||
| 185 | if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, | ||
| 186 | sizeof(struct orc_entry))) { | ||
| 187 | |||
| 188 | if (create_orc_entry(u_sec, ip_relasec, idx, | ||
| 189 | insn->sec, insn->offset, | ||
| 190 | &insn->orc)) | ||
| 191 | return -1; | ||
| 192 | |||
| 193 | idx++; | ||
| 194 | } | ||
| 195 | prev_insn = insn; | ||
| 196 | } | ||
| 197 | |||
| 198 | /* section terminator */ | ||
| 199 | if (prev_insn) { | ||
| 200 | if (create_orc_entry(u_sec, ip_relasec, idx, | ||
| 201 | prev_insn->sec, | ||
| 202 | prev_insn->offset + prev_insn->len, | ||
| 203 | &empty)) | ||
| 204 | return -1; | ||
| 205 | |||
| 206 | idx++; | ||
| 207 | } | ||
| 208 | } | ||
| 209 | |||
| 210 | if (elf_rebuild_rela_section(ip_relasec)) | ||
| 211 | return -1; | ||
| 212 | |||
| 213 | return 0; | ||
| 214 | } | ||
diff --git a/tools/objtool/orc_types.h b/tools/objtool/orc_types.h new file mode 100644 index 000000000000..9c9dc579bd7d --- /dev/null +++ b/tools/objtool/orc_types.h | |||
| @@ -0,0 +1,107 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public License | ||
| 6 | * as published by the Free Software Foundation; either version 2 | ||
| 7 | * of the License, or (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #ifndef _ORC_TYPES_H | ||
| 19 | #define _ORC_TYPES_H | ||
| 20 | |||
| 21 | #include <linux/types.h> | ||
| 22 | #include <linux/compiler.h> | ||
| 23 | |||
| 24 | /* | ||
| 25 | * The ORC_REG_* registers are base registers which are used to find other | ||
| 26 | * registers on the stack. | ||
| 27 | * | ||
| 28 | * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the | ||
| 29 | * address of the previous frame: the caller's SP before it called the current | ||
| 30 | * function. | ||
| 31 | * | ||
| 32 | * ORC_REG_UNDEFINED means the corresponding register's value didn't change in | ||
| 33 | * the current frame. | ||
| 34 | * | ||
| 35 | * The most commonly used base registers are SP and BP -- which the previous SP | ||
| 36 | * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is | ||
| 37 | * usually based on. | ||
| 38 | * | ||
| 39 | * The rest of the base registers are needed for special cases like entry code | ||
| 40 | * and GCC realigned stacks. | ||
| 41 | */ | ||
| 42 | #define ORC_REG_UNDEFINED 0 | ||
| 43 | #define ORC_REG_PREV_SP 1 | ||
| 44 | #define ORC_REG_DX 2 | ||
| 45 | #define ORC_REG_DI 3 | ||
| 46 | #define ORC_REG_BP 4 | ||
| 47 | #define ORC_REG_SP 5 | ||
| 48 | #define ORC_REG_R10 6 | ||
| 49 | #define ORC_REG_R13 7 | ||
| 50 | #define ORC_REG_BP_INDIRECT 8 | ||
| 51 | #define ORC_REG_SP_INDIRECT 9 | ||
| 52 | #define ORC_REG_MAX 15 | ||
| 53 | |||
| 54 | /* | ||
| 55 | * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the | ||
| 56 | * caller's SP right before it made the call). Used for all callable | ||
| 57 | * functions, i.e. all C code and all callable asm functions. | ||
| 58 | * | ||
| 59 | * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points | ||
| 60 | * to a fully populated pt_regs from a syscall, interrupt, or exception. | ||
| 61 | * | ||
| 62 | * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset | ||
| 63 | * points to the iret return frame. | ||
| 64 | * | ||
| 65 | * The UNWIND_HINT macros are used only for the unwind_hint struct. They | ||
| 66 | * aren't used in struct orc_entry due to size and complexity constraints. | ||
| 67 | * Objtool converts them to real types when it converts the hints to orc | ||
| 68 | * entries. | ||
| 69 | */ | ||
| 70 | #define ORC_TYPE_CALL 0 | ||
| 71 | #define ORC_TYPE_REGS 1 | ||
| 72 | #define ORC_TYPE_REGS_IRET 2 | ||
| 73 | #define UNWIND_HINT_TYPE_SAVE 3 | ||
| 74 | #define UNWIND_HINT_TYPE_RESTORE 4 | ||
| 75 | |||
| 76 | #ifndef __ASSEMBLY__ | ||
| 77 | /* | ||
| 78 | * This struct is more or less a vastly simplified version of the DWARF Call | ||
| 79 | * Frame Information standard. It contains only the necessary parts of DWARF | ||
| 80 | * CFI, simplified for ease of access by the in-kernel unwinder. It tells the | ||
| 81 | * unwinder how to find the previous SP and BP (and sometimes entry regs) on | ||
| 82 | * the stack for a given code address. Each instance of the struct corresponds | ||
| 83 | * to one or more code locations. | ||
| 84 | */ | ||
| 85 | struct orc_entry { | ||
| 86 | s16 sp_offset; | ||
| 87 | s16 bp_offset; | ||
| 88 | unsigned sp_reg:4; | ||
| 89 | unsigned bp_reg:4; | ||
| 90 | unsigned type:2; | ||
| 91 | } __packed; | ||
| 92 | |||
| 93 | /* | ||
| 94 | * This struct is used by asm and inline asm code to manually annotate the | ||
| 95 | * location of registers on the stack for the ORC unwinder. | ||
| 96 | * | ||
| 97 | * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*. | ||
| 98 | */ | ||
| 99 | struct unwind_hint { | ||
| 100 | u32 ip; | ||
| 101 | s16 sp_offset; | ||
| 102 | u8 sp_reg; | ||
| 103 | u8 type; | ||
| 104 | }; | ||
| 105 | #endif /* __ASSEMBLY__ */ | ||
| 106 | |||
| 107 | #endif /* _ORC_TYPES_H */ | ||
