diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/lockdep.c | 29 | ||||
-rw-r--r-- | kernel/mutex.c | 151 | ||||
-rw-r--r-- | kernel/rtmutex-tester.c | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 45 | ||||
-rw-r--r-- | kernel/sched/features.h | 7 |
5 files changed, 165 insertions, 72 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8a0efac4f99d..6a3bccba7e7d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -380,6 +380,13 @@ static int verbose(struct lock_class *class) | |||
380 | unsigned long nr_stack_trace_entries; | 380 | unsigned long nr_stack_trace_entries; |
381 | static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; | 381 | static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; |
382 | 382 | ||
383 | static void print_lockdep_off(const char *bug_msg) | ||
384 | { | ||
385 | printk(KERN_DEBUG "%s\n", bug_msg); | ||
386 | printk(KERN_DEBUG "turning off the locking correctness validator.\n"); | ||
387 | printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n"); | ||
388 | } | ||
389 | |||
383 | static int save_trace(struct stack_trace *trace) | 390 | static int save_trace(struct stack_trace *trace) |
384 | { | 391 | { |
385 | trace->nr_entries = 0; | 392 | trace->nr_entries = 0; |
@@ -409,8 +416,7 @@ static int save_trace(struct stack_trace *trace) | |||
409 | if (!debug_locks_off_graph_unlock()) | 416 | if (!debug_locks_off_graph_unlock()) |
410 | return 0; | 417 | return 0; |
411 | 418 | ||
412 | printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); | 419 | print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); |
413 | printk("turning off the locking correctness validator.\n"); | ||
414 | dump_stack(); | 420 | dump_stack(); |
415 | 421 | ||
416 | return 0; | 422 | return 0; |
@@ -763,8 +769,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
763 | } | 769 | } |
764 | raw_local_irq_restore(flags); | 770 | raw_local_irq_restore(flags); |
765 | 771 | ||
766 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | 772 | print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); |
767 | printk("turning off the locking correctness validator.\n"); | ||
768 | dump_stack(); | 773 | dump_stack(); |
769 | return NULL; | 774 | return NULL; |
770 | } | 775 | } |
@@ -834,8 +839,7 @@ static struct lock_list *alloc_list_entry(void) | |||
834 | if (!debug_locks_off_graph_unlock()) | 839 | if (!debug_locks_off_graph_unlock()) |
835 | return NULL; | 840 | return NULL; |
836 | 841 | ||
837 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); | 842 | print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!"); |
838 | printk("turning off the locking correctness validator.\n"); | ||
839 | dump_stack(); | 843 | dump_stack(); |
840 | return NULL; | 844 | return NULL; |
841 | } | 845 | } |
@@ -2000,7 +2004,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
2000 | struct lock_class *class = hlock_class(hlock); | 2004 | struct lock_class *class = hlock_class(hlock); |
2001 | struct list_head *hash_head = chainhashentry(chain_key); | 2005 | struct list_head *hash_head = chainhashentry(chain_key); |
2002 | struct lock_chain *chain; | 2006 | struct lock_chain *chain; |
2003 | struct held_lock *hlock_curr, *hlock_next; | 2007 | struct held_lock *hlock_curr; |
2004 | int i, j; | 2008 | int i, j; |
2005 | 2009 | ||
2006 | /* | 2010 | /* |
@@ -2048,8 +2052,7 @@ cache_hit: | |||
2048 | if (!debug_locks_off_graph_unlock()) | 2052 | if (!debug_locks_off_graph_unlock()) |
2049 | return 0; | 2053 | return 0; |
2050 | 2054 | ||
2051 | printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); | 2055 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); |
2052 | printk("turning off the locking correctness validator.\n"); | ||
2053 | dump_stack(); | 2056 | dump_stack(); |
2054 | return 0; | 2057 | return 0; |
2055 | } | 2058 | } |
@@ -2057,12 +2060,10 @@ cache_hit: | |||
2057 | chain->chain_key = chain_key; | 2060 | chain->chain_key = chain_key; |
2058 | chain->irq_context = hlock->irq_context; | 2061 | chain->irq_context = hlock->irq_context; |
2059 | /* Find the first held_lock of current chain */ | 2062 | /* Find the first held_lock of current chain */ |
2060 | hlock_next = hlock; | ||
2061 | for (i = curr->lockdep_depth - 1; i >= 0; i--) { | 2063 | for (i = curr->lockdep_depth - 1; i >= 0; i--) { |
2062 | hlock_curr = curr->held_locks + i; | 2064 | hlock_curr = curr->held_locks + i; |
2063 | if (hlock_curr->irq_context != hlock_next->irq_context) | 2065 | if (hlock_curr->irq_context != hlock->irq_context) |
2064 | break; | 2066 | break; |
2065 | hlock_next = hlock; | ||
2066 | } | 2067 | } |
2067 | i++; | 2068 | i++; |
2068 | chain->depth = curr->lockdep_depth + 1 - i; | 2069 | chain->depth = curr->lockdep_depth + 1 - i; |
@@ -3190,9 +3191,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3190 | #endif | 3191 | #endif |
3191 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { | 3192 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { |
3192 | debug_locks_off(); | 3193 | debug_locks_off(); |
3193 | printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", | 3194 | print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!"); |
3195 | printk(KERN_DEBUG "depth: %i max: %lu!\n", | ||
3194 | curr->lockdep_depth, MAX_LOCK_DEPTH); | 3196 | curr->lockdep_depth, MAX_LOCK_DEPTH); |
3195 | printk("turning off the locking correctness validator.\n"); | ||
3196 | 3197 | ||
3197 | lockdep_print_held_locks(current); | 3198 | lockdep_print_held_locks(current); |
3198 | debug_show_all_locks(); | 3199 | debug_show_all_locks(); |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 52f23011b6e0..ad53a664f113 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -37,6 +37,12 @@ | |||
37 | # include <asm/mutex.h> | 37 | # include <asm/mutex.h> |
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | /* | ||
41 | * A negative mutex count indicates that waiters are sleeping waiting for the | ||
42 | * mutex. | ||
43 | */ | ||
44 | #define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0) | ||
45 | |||
40 | void | 46 | void |
41 | __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | 47 | __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) |
42 | { | 48 | { |
@@ -44,6 +50,9 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
44 | spin_lock_init(&lock->wait_lock); | 50 | spin_lock_init(&lock->wait_lock); |
45 | INIT_LIST_HEAD(&lock->wait_list); | 51 | INIT_LIST_HEAD(&lock->wait_list); |
46 | mutex_clear_owner(lock); | 52 | mutex_clear_owner(lock); |
53 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
54 | lock->spin_mlock = NULL; | ||
55 | #endif | ||
47 | 56 | ||
48 | debug_mutex_init(lock, name, key); | 57 | debug_mutex_init(lock, name, key); |
49 | } | 58 | } |
@@ -95,6 +104,124 @@ void __sched mutex_lock(struct mutex *lock) | |||
95 | EXPORT_SYMBOL(mutex_lock); | 104 | EXPORT_SYMBOL(mutex_lock); |
96 | #endif | 105 | #endif |
97 | 106 | ||
107 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
108 | /* | ||
109 | * In order to avoid a stampede of mutex spinners from acquiring the mutex | ||
110 | * more or less simultaneously, the spinners need to acquire a MCS lock | ||
111 | * first before spinning on the owner field. | ||
112 | * | ||
113 | * We don't inline mspin_lock() so that perf can correctly account for the | ||
114 | * time spent in this lock function. | ||
115 | */ | ||
116 | struct mspin_node { | ||
117 | struct mspin_node *next ; | ||
118 | int locked; /* 1 if lock acquired */ | ||
119 | }; | ||
120 | #define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) | ||
121 | |||
122 | static noinline | ||
123 | void mspin_lock(struct mspin_node **lock, struct mspin_node *node) | ||
124 | { | ||
125 | struct mspin_node *prev; | ||
126 | |||
127 | /* Init node */ | ||
128 | node->locked = 0; | ||
129 | node->next = NULL; | ||
130 | |||
131 | prev = xchg(lock, node); | ||
132 | if (likely(prev == NULL)) { | ||
133 | /* Lock acquired */ | ||
134 | node->locked = 1; | ||
135 | return; | ||
136 | } | ||
137 | ACCESS_ONCE(prev->next) = node; | ||
138 | smp_wmb(); | ||
139 | /* Wait until the lock holder passes the lock down */ | ||
140 | while (!ACCESS_ONCE(node->locked)) | ||
141 | arch_mutex_cpu_relax(); | ||
142 | } | ||
143 | |||
144 | static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) | ||
145 | { | ||
146 | struct mspin_node *next = ACCESS_ONCE(node->next); | ||
147 | |||
148 | if (likely(!next)) { | ||
149 | /* | ||
150 | * Release the lock by setting it to NULL | ||
151 | */ | ||
152 | if (cmpxchg(lock, node, NULL) == node) | ||
153 | return; | ||
154 | /* Wait until the next pointer is set */ | ||
155 | while (!(next = ACCESS_ONCE(node->next))) | ||
156 | arch_mutex_cpu_relax(); | ||
157 | } | ||
158 | ACCESS_ONCE(next->locked) = 1; | ||
159 | smp_wmb(); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Mutex spinning code migrated from kernel/sched/core.c | ||
164 | */ | ||
165 | |||
166 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | ||
167 | { | ||
168 | if (lock->owner != owner) | ||
169 | return false; | ||
170 | |||
171 | /* | ||
172 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
173 | * lock->owner still matches owner, if that fails, owner might | ||
174 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
175 | * ensures the memory stays valid. | ||
176 | */ | ||
177 | barrier(); | ||
178 | |||
179 | return owner->on_cpu; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Look out! "owner" is an entirely speculative pointer | ||
184 | * access and not reliable. | ||
185 | */ | ||
186 | static noinline | ||
187 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | ||
188 | { | ||
189 | rcu_read_lock(); | ||
190 | while (owner_running(lock, owner)) { | ||
191 | if (need_resched()) | ||
192 | break; | ||
193 | |||
194 | arch_mutex_cpu_relax(); | ||
195 | } | ||
196 | rcu_read_unlock(); | ||
197 | |||
198 | /* | ||
199 | * We break out the loop above on need_resched() and when the | ||
200 | * owner changed, which is a sign for heavy contention. Return | ||
201 | * success only when lock->owner is NULL. | ||
202 | */ | ||
203 | return lock->owner == NULL; | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Initial check for entering the mutex spinning loop | ||
208 | */ | ||
209 | static inline int mutex_can_spin_on_owner(struct mutex *lock) | ||
210 | { | ||
211 | int retval = 1; | ||
212 | |||
213 | rcu_read_lock(); | ||
214 | if (lock->owner) | ||
215 | retval = lock->owner->on_cpu; | ||
216 | rcu_read_unlock(); | ||
217 | /* | ||
218 | * if lock->owner is not set, the mutex owner may have just acquired | ||
219 | * it and not set the owner yet or the mutex has been released. | ||
220 | */ | ||
221 | return retval; | ||
222 | } | ||
223 | #endif | ||
224 | |||
98 | static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); | 225 | static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); |
99 | 226 | ||
100 | /** | 227 | /** |
@@ -158,25 +285,39 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
158 | * | 285 | * |
159 | * We can't do this for DEBUG_MUTEXES because that relies on wait_lock | 286 | * We can't do this for DEBUG_MUTEXES because that relies on wait_lock |
160 | * to serialize everything. | 287 | * to serialize everything. |
288 | * | ||
289 | * The mutex spinners are queued up using MCS lock so that only one | ||
290 | * spinner can compete for the mutex. However, if mutex spinning isn't | ||
291 | * going to happen, there is no point in going through the lock/unlock | ||
292 | * overhead. | ||
161 | */ | 293 | */ |
294 | if (!mutex_can_spin_on_owner(lock)) | ||
295 | goto slowpath; | ||
162 | 296 | ||
163 | for (;;) { | 297 | for (;;) { |
164 | struct task_struct *owner; | 298 | struct task_struct *owner; |
299 | struct mspin_node node; | ||
165 | 300 | ||
166 | /* | 301 | /* |
167 | * If there's an owner, wait for it to either | 302 | * If there's an owner, wait for it to either |
168 | * release the lock or go to sleep. | 303 | * release the lock or go to sleep. |
169 | */ | 304 | */ |
305 | mspin_lock(MLOCK(lock), &node); | ||
170 | owner = ACCESS_ONCE(lock->owner); | 306 | owner = ACCESS_ONCE(lock->owner); |
171 | if (owner && !mutex_spin_on_owner(lock, owner)) | 307 | if (owner && !mutex_spin_on_owner(lock, owner)) { |
308 | mspin_unlock(MLOCK(lock), &node); | ||
172 | break; | 309 | break; |
310 | } | ||
173 | 311 | ||
174 | if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { | 312 | if ((atomic_read(&lock->count) == 1) && |
313 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { | ||
175 | lock_acquired(&lock->dep_map, ip); | 314 | lock_acquired(&lock->dep_map, ip); |
176 | mutex_set_owner(lock); | 315 | mutex_set_owner(lock); |
316 | mspin_unlock(MLOCK(lock), &node); | ||
177 | preempt_enable(); | 317 | preempt_enable(); |
178 | return 0; | 318 | return 0; |
179 | } | 319 | } |
320 | mspin_unlock(MLOCK(lock), &node); | ||
180 | 321 | ||
181 | /* | 322 | /* |
182 | * When there's no owner, we might have preempted between the | 323 | * When there's no owner, we might have preempted between the |
@@ -195,6 +336,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
195 | */ | 336 | */ |
196 | arch_mutex_cpu_relax(); | 337 | arch_mutex_cpu_relax(); |
197 | } | 338 | } |
339 | slowpath: | ||
198 | #endif | 340 | #endif |
199 | spin_lock_mutex(&lock->wait_lock, flags); | 341 | spin_lock_mutex(&lock->wait_lock, flags); |
200 | 342 | ||
@@ -205,7 +347,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
205 | list_add_tail(&waiter.list, &lock->wait_list); | 347 | list_add_tail(&waiter.list, &lock->wait_list); |
206 | waiter.task = task; | 348 | waiter.task = task; |
207 | 349 | ||
208 | if (atomic_xchg(&lock->count, -1) == 1) | 350 | if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, -1) == 1)) |
209 | goto done; | 351 | goto done; |
210 | 352 | ||
211 | lock_contended(&lock->dep_map, ip); | 353 | lock_contended(&lock->dep_map, ip); |
@@ -220,7 +362,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
220 | * that when we release the lock, we properly wake up the | 362 | * that when we release the lock, we properly wake up the |
221 | * other waiters: | 363 | * other waiters: |
222 | */ | 364 | */ |
223 | if (atomic_xchg(&lock->count, -1) == 1) | 365 | if (MUTEX_SHOW_NO_WAITER(lock) && |
366 | (atomic_xchg(&lock->count, -1) == 1)) | ||
224 | break; | 367 | break; |
225 | 368 | ||
226 | /* | 369 | /* |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 7890b10084a7..1d96dd0d93c1 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
15 | #include <linux/timer.h> | 15 | #include <linux/timer.h> |
16 | #include <linux/freezer.h> | 16 | #include <linux/freezer.h> |
17 | #include <linux/stat.h> | ||
17 | 18 | ||
18 | #include "rtmutex.h" | 19 | #include "rtmutex.h" |
19 | 20 | ||
@@ -366,8 +367,8 @@ static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *at | |||
366 | return curr - buf; | 367 | return curr - buf; |
367 | } | 368 | } |
368 | 369 | ||
369 | static DEVICE_ATTR(status, 0600, sysfs_test_status, NULL); | 370 | static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL); |
370 | static DEVICE_ATTR(command, 0600, NULL, sysfs_test_command); | 371 | static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command); |
371 | 372 | ||
372 | static struct bus_type rttest_subsys = { | 373 | static struct bus_type rttest_subsys = { |
373 | .name = "rttest", | 374 | .name = "rttest", |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 67d04651f44b..42053547e0f5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2999,51 +2999,6 @@ void __sched schedule_preempt_disabled(void) | |||
2999 | preempt_disable(); | 2999 | preempt_disable(); |
3000 | } | 3000 | } |
3001 | 3001 | ||
3002 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
3003 | |||
3004 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | ||
3005 | { | ||
3006 | if (lock->owner != owner) | ||
3007 | return false; | ||
3008 | |||
3009 | /* | ||
3010 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
3011 | * lock->owner still matches owner, if that fails, owner might | ||
3012 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
3013 | * ensures the memory stays valid. | ||
3014 | */ | ||
3015 | barrier(); | ||
3016 | |||
3017 | return owner->on_cpu; | ||
3018 | } | ||
3019 | |||
3020 | /* | ||
3021 | * Look out! "owner" is an entirely speculative pointer | ||
3022 | * access and not reliable. | ||
3023 | */ | ||
3024 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | ||
3025 | { | ||
3026 | if (!sched_feat(OWNER_SPIN)) | ||
3027 | return 0; | ||
3028 | |||
3029 | rcu_read_lock(); | ||
3030 | while (owner_running(lock, owner)) { | ||
3031 | if (need_resched()) | ||
3032 | break; | ||
3033 | |||
3034 | arch_mutex_cpu_relax(); | ||
3035 | } | ||
3036 | rcu_read_unlock(); | ||
3037 | |||
3038 | /* | ||
3039 | * We break out the loop above on need_resched() and when the | ||
3040 | * owner changed, which is a sign for heavy contention. Return | ||
3041 | * success only when lock->owner is NULL. | ||
3042 | */ | ||
3043 | return lock->owner == NULL; | ||
3044 | } | ||
3045 | #endif | ||
3046 | |||
3047 | #ifdef CONFIG_PREEMPT | 3002 | #ifdef CONFIG_PREEMPT |
3048 | /* | 3003 | /* |
3049 | * this is the entry point to schedule() from in-kernel preemption | 3004 | * this is the entry point to schedule() from in-kernel preemption |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 1ad1d2b5395f..99399f8e4799 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h | |||
@@ -46,13 +46,6 @@ SCHED_FEAT(DOUBLE_TICK, false) | |||
46 | SCHED_FEAT(LB_BIAS, true) | 46 | SCHED_FEAT(LB_BIAS, true) |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * Spin-wait on mutex acquisition when the mutex owner is running on | ||
50 | * another cpu -- assumes that when the owner is running, it will soon | ||
51 | * release the lock. Decreases scheduling overhead. | ||
52 | */ | ||
53 | SCHED_FEAT(OWNER_SPIN, true) | ||
54 | |||
55 | /* | ||
56 | * Decrement CPU power based on time not spent running tasks | 49 | * Decrement CPU power based on time not spent running tasks |
57 | */ | 50 | */ |
58 | SCHED_FEAT(NONTASK_POWER, true) | 51 | SCHED_FEAT(NONTASK_POWER, true) |