aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mutex.h3
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/lockdep.c29
-rw-r--r--kernel/mutex.c151
-rw-r--r--kernel/rtmutex-tester.c5
-rw-r--r--kernel/sched/core.c45
-rw-r--r--kernel/sched/features.h7
7 files changed, 168 insertions, 73 deletions
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 9121595a8ebf..433da8a1a426 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -53,6 +53,9 @@ struct mutex {
53#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) 53#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
54 struct task_struct *owner; 54 struct task_struct *owner;
55#endif 55#endif
56#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
57 void *spin_mlock; /* Spinner MCS lock */
58#endif
56#ifdef CONFIG_DEBUG_MUTEXES 59#ifdef CONFIG_DEBUG_MUTEXES
57 const char *name; 60 const char *name;
58 void *magic; 61 void *magic;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e692a022527b..2d02c76a01be 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -321,7 +321,6 @@ extern signed long schedule_timeout_killable(signed long timeout);
321extern signed long schedule_timeout_uninterruptible(signed long timeout); 321extern signed long schedule_timeout_uninterruptible(signed long timeout);
322asmlinkage void schedule(void); 322asmlinkage void schedule(void);
323extern void schedule_preempt_disabled(void); 323extern void schedule_preempt_disabled(void);
324extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
325 324
326struct nsproxy; 325struct nsproxy;
327struct user_namespace; 326struct user_namespace;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8a0efac4f99d..6a3bccba7e7d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -380,6 +380,13 @@ static int verbose(struct lock_class *class)
380unsigned long nr_stack_trace_entries; 380unsigned long nr_stack_trace_entries;
381static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; 381static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
382 382
383static void print_lockdep_off(const char *bug_msg)
384{
385 printk(KERN_DEBUG "%s\n", bug_msg);
386 printk(KERN_DEBUG "turning off the locking correctness validator.\n");
387 printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n");
388}
389
383static int save_trace(struct stack_trace *trace) 390static int save_trace(struct stack_trace *trace)
384{ 391{
385 trace->nr_entries = 0; 392 trace->nr_entries = 0;
@@ -409,8 +416,7 @@ static int save_trace(struct stack_trace *trace)
409 if (!debug_locks_off_graph_unlock()) 416 if (!debug_locks_off_graph_unlock())
410 return 0; 417 return 0;
411 418
412 printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); 419 print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
413 printk("turning off the locking correctness validator.\n");
414 dump_stack(); 420 dump_stack();
415 421
416 return 0; 422 return 0;
@@ -763,8 +769,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
763 } 769 }
764 raw_local_irq_restore(flags); 770 raw_local_irq_restore(flags);
765 771
766 printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); 772 print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
767 printk("turning off the locking correctness validator.\n");
768 dump_stack(); 773 dump_stack();
769 return NULL; 774 return NULL;
770 } 775 }
@@ -834,8 +839,7 @@ static struct lock_list *alloc_list_entry(void)
834 if (!debug_locks_off_graph_unlock()) 839 if (!debug_locks_off_graph_unlock())
835 return NULL; 840 return NULL;
836 841
837 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); 842 print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
838 printk("turning off the locking correctness validator.\n");
839 dump_stack(); 843 dump_stack();
840 return NULL; 844 return NULL;
841 } 845 }
@@ -2000,7 +2004,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
2000 struct lock_class *class = hlock_class(hlock); 2004 struct lock_class *class = hlock_class(hlock);
2001 struct list_head *hash_head = chainhashentry(chain_key); 2005 struct list_head *hash_head = chainhashentry(chain_key);
2002 struct lock_chain *chain; 2006 struct lock_chain *chain;
2003 struct held_lock *hlock_curr, *hlock_next; 2007 struct held_lock *hlock_curr;
2004 int i, j; 2008 int i, j;
2005 2009
2006 /* 2010 /*
@@ -2048,8 +2052,7 @@ cache_hit:
2048 if (!debug_locks_off_graph_unlock()) 2052 if (!debug_locks_off_graph_unlock())
2049 return 0; 2053 return 0;
2050 2054
2051 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); 2055 print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
2052 printk("turning off the locking correctness validator.\n");
2053 dump_stack(); 2056 dump_stack();
2054 return 0; 2057 return 0;
2055 } 2058 }
@@ -2057,12 +2060,10 @@ cache_hit:
2057 chain->chain_key = chain_key; 2060 chain->chain_key = chain_key;
2058 chain->irq_context = hlock->irq_context; 2061 chain->irq_context = hlock->irq_context;
2059 /* Find the first held_lock of current chain */ 2062 /* Find the first held_lock of current chain */
2060 hlock_next = hlock;
2061 for (i = curr->lockdep_depth - 1; i >= 0; i--) { 2063 for (i = curr->lockdep_depth - 1; i >= 0; i--) {
2062 hlock_curr = curr->held_locks + i; 2064 hlock_curr = curr->held_locks + i;
2063 if (hlock_curr->irq_context != hlock_next->irq_context) 2065 if (hlock_curr->irq_context != hlock->irq_context)
2064 break; 2066 break;
2065 hlock_next = hlock;
2066 } 2067 }
2067 i++; 2068 i++;
2068 chain->depth = curr->lockdep_depth + 1 - i; 2069 chain->depth = curr->lockdep_depth + 1 - i;
@@ -3190,9 +3191,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3190#endif 3191#endif
3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { 3192 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
3192 debug_locks_off(); 3193 debug_locks_off();
3193 printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", 3194 print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
3195 printk(KERN_DEBUG "depth: %i max: %lu!\n",
3194 curr->lockdep_depth, MAX_LOCK_DEPTH); 3196 curr->lockdep_depth, MAX_LOCK_DEPTH);
3195 printk("turning off the locking correctness validator.\n");
3196 3197
3197 lockdep_print_held_locks(current); 3198 lockdep_print_held_locks(current);
3198 debug_show_all_locks(); 3199 debug_show_all_locks();
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 52f23011b6e0..ad53a664f113 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -37,6 +37,12 @@
37# include <asm/mutex.h> 37# include <asm/mutex.h>
38#endif 38#endif
39 39
40/*
41 * A negative mutex count indicates that waiters are sleeping waiting for the
42 * mutex.
43 */
44#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0)
45
40void 46void
41__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) 47__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
42{ 48{
@@ -44,6 +50,9 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
44 spin_lock_init(&lock->wait_lock); 50 spin_lock_init(&lock->wait_lock);
45 INIT_LIST_HEAD(&lock->wait_list); 51 INIT_LIST_HEAD(&lock->wait_list);
46 mutex_clear_owner(lock); 52 mutex_clear_owner(lock);
53#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
54 lock->spin_mlock = NULL;
55#endif
47 56
48 debug_mutex_init(lock, name, key); 57 debug_mutex_init(lock, name, key);
49} 58}
@@ -95,6 +104,124 @@ void __sched mutex_lock(struct mutex *lock)
95EXPORT_SYMBOL(mutex_lock); 104EXPORT_SYMBOL(mutex_lock);
96#endif 105#endif
97 106
107#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
108/*
109 * In order to avoid a stampede of mutex spinners from acquiring the mutex
110 * more or less simultaneously, the spinners need to acquire a MCS lock
111 * first before spinning on the owner field.
112 *
113 * We don't inline mspin_lock() so that perf can correctly account for the
114 * time spent in this lock function.
115 */
116struct mspin_node {
117 struct mspin_node *next ;
118 int locked; /* 1 if lock acquired */
119};
120#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock))
121
122static noinline
123void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
124{
125 struct mspin_node *prev;
126
127 /* Init node */
128 node->locked = 0;
129 node->next = NULL;
130
131 prev = xchg(lock, node);
132 if (likely(prev == NULL)) {
133 /* Lock acquired */
134 node->locked = 1;
135 return;
136 }
137 ACCESS_ONCE(prev->next) = node;
138 smp_wmb();
139 /* Wait until the lock holder passes the lock down */
140 while (!ACCESS_ONCE(node->locked))
141 arch_mutex_cpu_relax();
142}
143
144static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
145{
146 struct mspin_node *next = ACCESS_ONCE(node->next);
147
148 if (likely(!next)) {
149 /*
150 * Release the lock by setting it to NULL
151 */
152 if (cmpxchg(lock, node, NULL) == node)
153 return;
154 /* Wait until the next pointer is set */
155 while (!(next = ACCESS_ONCE(node->next)))
156 arch_mutex_cpu_relax();
157 }
158 ACCESS_ONCE(next->locked) = 1;
159 smp_wmb();
160}
161
162/*
163 * Mutex spinning code migrated from kernel/sched/core.c
164 */
165
166static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
167{
168 if (lock->owner != owner)
169 return false;
170
171 /*
172 * Ensure we emit the owner->on_cpu, dereference _after_ checking
173 * lock->owner still matches owner, if that fails, owner might
174 * point to free()d memory, if it still matches, the rcu_read_lock()
175 * ensures the memory stays valid.
176 */
177 barrier();
178
179 return owner->on_cpu;
180}
181
182/*
183 * Look out! "owner" is an entirely speculative pointer
184 * access and not reliable.
185 */
186static noinline
187int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
188{
189 rcu_read_lock();
190 while (owner_running(lock, owner)) {
191 if (need_resched())
192 break;
193
194 arch_mutex_cpu_relax();
195 }
196 rcu_read_unlock();
197
198 /*
199 * We break out the loop above on need_resched() and when the
200 * owner changed, which is a sign for heavy contention. Return
201 * success only when lock->owner is NULL.
202 */
203 return lock->owner == NULL;
204}
205
206/*
207 * Initial check for entering the mutex spinning loop
208 */
209static inline int mutex_can_spin_on_owner(struct mutex *lock)
210{
211 int retval = 1;
212
213 rcu_read_lock();
214 if (lock->owner)
215 retval = lock->owner->on_cpu;
216 rcu_read_unlock();
217 /*
218 * if lock->owner is not set, the mutex owner may have just acquired
219 * it and not set the owner yet or the mutex has been released.
220 */
221 return retval;
222}
223#endif
224
98static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); 225static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
99 226
100/** 227/**
@@ -158,25 +285,39 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
158 * 285 *
159 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock 286 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
160 * to serialize everything. 287 * to serialize everything.
288 *
289 * The mutex spinners are queued up using MCS lock so that only one
290 * spinner can compete for the mutex. However, if mutex spinning isn't
291 * going to happen, there is no point in going through the lock/unlock
292 * overhead.
161 */ 293 */
294 if (!mutex_can_spin_on_owner(lock))
295 goto slowpath;
162 296
163 for (;;) { 297 for (;;) {
164 struct task_struct *owner; 298 struct task_struct *owner;
299 struct mspin_node node;
165 300
166 /* 301 /*
167 * If there's an owner, wait for it to either 302 * If there's an owner, wait for it to either
168 * release the lock or go to sleep. 303 * release the lock or go to sleep.
169 */ 304 */
305 mspin_lock(MLOCK(lock), &node);
170 owner = ACCESS_ONCE(lock->owner); 306 owner = ACCESS_ONCE(lock->owner);
171 if (owner && !mutex_spin_on_owner(lock, owner)) 307 if (owner && !mutex_spin_on_owner(lock, owner)) {
308 mspin_unlock(MLOCK(lock), &node);
172 break; 309 break;
310 }
173 311
174 if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { 312 if ((atomic_read(&lock->count) == 1) &&
313 (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
175 lock_acquired(&lock->dep_map, ip); 314 lock_acquired(&lock->dep_map, ip);
176 mutex_set_owner(lock); 315 mutex_set_owner(lock);
316 mspin_unlock(MLOCK(lock), &node);
177 preempt_enable(); 317 preempt_enable();
178 return 0; 318 return 0;
179 } 319 }
320 mspin_unlock(MLOCK(lock), &node);
180 321
181 /* 322 /*
182 * When there's no owner, we might have preempted between the 323 * When there's no owner, we might have preempted between the
@@ -195,6 +336,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
195 */ 336 */
196 arch_mutex_cpu_relax(); 337 arch_mutex_cpu_relax();
197 } 338 }
339slowpath:
198#endif 340#endif
199 spin_lock_mutex(&lock->wait_lock, flags); 341 spin_lock_mutex(&lock->wait_lock, flags);
200 342
@@ -205,7 +347,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
205 list_add_tail(&waiter.list, &lock->wait_list); 347 list_add_tail(&waiter.list, &lock->wait_list);
206 waiter.task = task; 348 waiter.task = task;
207 349
208 if (atomic_xchg(&lock->count, -1) == 1) 350 if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, -1) == 1))
209 goto done; 351 goto done;
210 352
211 lock_contended(&lock->dep_map, ip); 353 lock_contended(&lock->dep_map, ip);
@@ -220,7 +362,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
220 * that when we release the lock, we properly wake up the 362 * that when we release the lock, we properly wake up the
221 * other waiters: 363 * other waiters:
222 */ 364 */
223 if (atomic_xchg(&lock->count, -1) == 1) 365 if (MUTEX_SHOW_NO_WAITER(lock) &&
366 (atomic_xchg(&lock->count, -1) == 1))
224 break; 367 break;
225 368
226 /* 369 /*
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 7890b10084a7..1d96dd0d93c1 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -14,6 +14,7 @@
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/timer.h> 15#include <linux/timer.h>
16#include <linux/freezer.h> 16#include <linux/freezer.h>
17#include <linux/stat.h>
17 18
18#include "rtmutex.h" 19#include "rtmutex.h"
19 20
@@ -366,8 +367,8 @@ static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *at
366 return curr - buf; 367 return curr - buf;
367} 368}
368 369
369static DEVICE_ATTR(status, 0600, sysfs_test_status, NULL); 370static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
370static DEVICE_ATTR(command, 0600, NULL, sysfs_test_command); 371static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
371 372
372static struct bus_type rttest_subsys = { 373static struct bus_type rttest_subsys = {
373 .name = "rttest", 374 .name = "rttest",
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67d04651f44b..42053547e0f5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2999,51 +2999,6 @@ void __sched schedule_preempt_disabled(void)
2999 preempt_disable(); 2999 preempt_disable();
3000} 3000}
3001 3001
3002#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
3003
3004static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
3005{
3006 if (lock->owner != owner)
3007 return false;
3008
3009 /*
3010 * Ensure we emit the owner->on_cpu, dereference _after_ checking
3011 * lock->owner still matches owner, if that fails, owner might
3012 * point to free()d memory, if it still matches, the rcu_read_lock()
3013 * ensures the memory stays valid.
3014 */
3015 barrier();
3016
3017 return owner->on_cpu;
3018}
3019
3020/*
3021 * Look out! "owner" is an entirely speculative pointer
3022 * access and not reliable.
3023 */
3024int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
3025{
3026 if (!sched_feat(OWNER_SPIN))
3027 return 0;
3028
3029 rcu_read_lock();
3030 while (owner_running(lock, owner)) {
3031 if (need_resched())
3032 break;
3033
3034 arch_mutex_cpu_relax();
3035 }
3036 rcu_read_unlock();
3037
3038 /*
3039 * We break out the loop above on need_resched() and when the
3040 * owner changed, which is a sign for heavy contention. Return
3041 * success only when lock->owner is NULL.
3042 */
3043 return lock->owner == NULL;
3044}
3045#endif
3046
3047#ifdef CONFIG_PREEMPT 3002#ifdef CONFIG_PREEMPT
3048/* 3003/*
3049 * this is the entry point to schedule() from in-kernel preemption 3004 * this is the entry point to schedule() from in-kernel preemption
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1ad1d2b5395f..99399f8e4799 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -46,13 +46,6 @@ SCHED_FEAT(DOUBLE_TICK, false)
46SCHED_FEAT(LB_BIAS, true) 46SCHED_FEAT(LB_BIAS, true)
47 47
48/* 48/*
49 * Spin-wait on mutex acquisition when the mutex owner is running on
50 * another cpu -- assumes that when the owner is running, it will soon
51 * release the lock. Decreases scheduling overhead.
52 */
53SCHED_FEAT(OWNER_SPIN, true)
54
55/*
56 * Decrement CPU power based on time not spent running tasks 49 * Decrement CPU power based on time not spent running tasks
57 */ 50 */
58SCHED_FEAT(NONTASK_POWER, true) 51SCHED_FEAT(NONTASK_POWER, true)