aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 11:21:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 11:21:37 -0400
commit916bb6d76dfa49b540baa3f7262792d1de7f1c24 (patch)
tree8548db9da607ad8616125130969e7c2165ad00d5 /kernel
parentd0b8883800c913f5cc0eb273c052bcac94ad44d8 (diff)
parent2c522836627c6e78660f8bd52cdb4cdcb75e3e3c (diff)
Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking changes from Ingo Molnar: "The most noticeable change are mutex speedups from Waiman Long, for higher loads. These scalability changes should be most noticeable on larger server systems. There are also cleanups, fixes and debuggability improvements." * 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: lockdep: Consolidate bug messages into a single print_lockdep_off() function lockdep: Print out additional debugging advice when we hit lockdep BUGs mutex: Back out architecture specific check for negative mutex count mutex: Queue mutex spinners with MCS lock to reduce cacheline contention mutex: Make more scalable by doing less atomic operations mutex: Move mutex spinning code from sched/core.c back to mutex.c locking/rtmutex/tester: Set correct permissions on sysfs files lockdep: Remove unnecessary 'hlock_next' variable
Diffstat (limited to 'kernel')
-rw-r--r--kernel/lockdep.c29
-rw-r--r--kernel/mutex.c151
-rw-r--r--kernel/rtmutex-tester.c5
-rw-r--r--kernel/sched/core.c45
-rw-r--r--kernel/sched/features.h7
5 files changed, 165 insertions, 72 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8a0efac4f99d..6a3bccba7e7d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -380,6 +380,13 @@ static int verbose(struct lock_class *class)
380unsigned long nr_stack_trace_entries; 380unsigned long nr_stack_trace_entries;
381static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; 381static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
382 382
383static void print_lockdep_off(const char *bug_msg)
384{
385 printk(KERN_DEBUG "%s\n", bug_msg);
386 printk(KERN_DEBUG "turning off the locking correctness validator.\n");
387 printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n");
388}
389
383static int save_trace(struct stack_trace *trace) 390static int save_trace(struct stack_trace *trace)
384{ 391{
385 trace->nr_entries = 0; 392 trace->nr_entries = 0;
@@ -409,8 +416,7 @@ static int save_trace(struct stack_trace *trace)
409 if (!debug_locks_off_graph_unlock()) 416 if (!debug_locks_off_graph_unlock())
410 return 0; 417 return 0;
411 418
412 printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); 419 print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
413 printk("turning off the locking correctness validator.\n");
414 dump_stack(); 420 dump_stack();
415 421
416 return 0; 422 return 0;
@@ -763,8 +769,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
763 } 769 }
764 raw_local_irq_restore(flags); 770 raw_local_irq_restore(flags);
765 771
766 printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); 772 print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
767 printk("turning off the locking correctness validator.\n");
768 dump_stack(); 773 dump_stack();
769 return NULL; 774 return NULL;
770 } 775 }
@@ -834,8 +839,7 @@ static struct lock_list *alloc_list_entry(void)
834 if (!debug_locks_off_graph_unlock()) 839 if (!debug_locks_off_graph_unlock())
835 return NULL; 840 return NULL;
836 841
837 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); 842 print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
838 printk("turning off the locking correctness validator.\n");
839 dump_stack(); 843 dump_stack();
840 return NULL; 844 return NULL;
841 } 845 }
@@ -2000,7 +2004,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
2000 struct lock_class *class = hlock_class(hlock); 2004 struct lock_class *class = hlock_class(hlock);
2001 struct list_head *hash_head = chainhashentry(chain_key); 2005 struct list_head *hash_head = chainhashentry(chain_key);
2002 struct lock_chain *chain; 2006 struct lock_chain *chain;
2003 struct held_lock *hlock_curr, *hlock_next; 2007 struct held_lock *hlock_curr;
2004 int i, j; 2008 int i, j;
2005 2009
2006 /* 2010 /*
@@ -2048,8 +2052,7 @@ cache_hit:
2048 if (!debug_locks_off_graph_unlock()) 2052 if (!debug_locks_off_graph_unlock())
2049 return 0; 2053 return 0;
2050 2054
2051 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); 2055 print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
2052 printk("turning off the locking correctness validator.\n");
2053 dump_stack(); 2056 dump_stack();
2054 return 0; 2057 return 0;
2055 } 2058 }
@@ -2057,12 +2060,10 @@ cache_hit:
2057 chain->chain_key = chain_key; 2060 chain->chain_key = chain_key;
2058 chain->irq_context = hlock->irq_context; 2061 chain->irq_context = hlock->irq_context;
2059 /* Find the first held_lock of current chain */ 2062 /* Find the first held_lock of current chain */
2060 hlock_next = hlock;
2061 for (i = curr->lockdep_depth - 1; i >= 0; i--) { 2063 for (i = curr->lockdep_depth - 1; i >= 0; i--) {
2062 hlock_curr = curr->held_locks + i; 2064 hlock_curr = curr->held_locks + i;
2063 if (hlock_curr->irq_context != hlock_next->irq_context) 2065 if (hlock_curr->irq_context != hlock->irq_context)
2064 break; 2066 break;
2065 hlock_next = hlock;
2066 } 2067 }
2067 i++; 2068 i++;
2068 chain->depth = curr->lockdep_depth + 1 - i; 2069 chain->depth = curr->lockdep_depth + 1 - i;
@@ -3190,9 +3191,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3190#endif 3191#endif
3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { 3192 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
3192 debug_locks_off(); 3193 debug_locks_off();
3193 printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n", 3194 print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
3195 printk(KERN_DEBUG "depth: %i max: %lu!\n",
3194 curr->lockdep_depth, MAX_LOCK_DEPTH); 3196 curr->lockdep_depth, MAX_LOCK_DEPTH);
3195 printk("turning off the locking correctness validator.\n");
3196 3197
3197 lockdep_print_held_locks(current); 3198 lockdep_print_held_locks(current);
3198 debug_show_all_locks(); 3199 debug_show_all_locks();
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 52f23011b6e0..ad53a664f113 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -37,6 +37,12 @@
37# include <asm/mutex.h> 37# include <asm/mutex.h>
38#endif 38#endif
39 39
40/*
41 * A negative mutex count indicates that waiters are sleeping waiting for the
42 * mutex.
43 */
44#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0)
45
40void 46void
41__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) 47__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
42{ 48{
@@ -44,6 +50,9 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
44 spin_lock_init(&lock->wait_lock); 50 spin_lock_init(&lock->wait_lock);
45 INIT_LIST_HEAD(&lock->wait_list); 51 INIT_LIST_HEAD(&lock->wait_list);
46 mutex_clear_owner(lock); 52 mutex_clear_owner(lock);
53#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
54 lock->spin_mlock = NULL;
55#endif
47 56
48 debug_mutex_init(lock, name, key); 57 debug_mutex_init(lock, name, key);
49} 58}
@@ -95,6 +104,124 @@ void __sched mutex_lock(struct mutex *lock)
95EXPORT_SYMBOL(mutex_lock); 104EXPORT_SYMBOL(mutex_lock);
96#endif 105#endif
97 106
107#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
108/*
109 * In order to avoid a stampede of mutex spinners from acquiring the mutex
110 * more or less simultaneously, the spinners need to acquire a MCS lock
111 * first before spinning on the owner field.
112 *
113 * We don't inline mspin_lock() so that perf can correctly account for the
114 * time spent in this lock function.
115 */
116struct mspin_node {
117 struct mspin_node *next ;
118 int locked; /* 1 if lock acquired */
119};
120#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock))
121
122static noinline
123void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
124{
125 struct mspin_node *prev;
126
127 /* Init node */
128 node->locked = 0;
129 node->next = NULL;
130
131 prev = xchg(lock, node);
132 if (likely(prev == NULL)) {
133 /* Lock acquired */
134 node->locked = 1;
135 return;
136 }
137 ACCESS_ONCE(prev->next) = node;
138 smp_wmb();
139 /* Wait until the lock holder passes the lock down */
140 while (!ACCESS_ONCE(node->locked))
141 arch_mutex_cpu_relax();
142}
143
144static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
145{
146 struct mspin_node *next = ACCESS_ONCE(node->next);
147
148 if (likely(!next)) {
149 /*
150 * Release the lock by setting it to NULL
151 */
152 if (cmpxchg(lock, node, NULL) == node)
153 return;
154 /* Wait until the next pointer is set */
155 while (!(next = ACCESS_ONCE(node->next)))
156 arch_mutex_cpu_relax();
157 }
158 ACCESS_ONCE(next->locked) = 1;
159 smp_wmb();
160}
161
162/*
163 * Mutex spinning code migrated from kernel/sched/core.c
164 */
165
166static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
167{
168 if (lock->owner != owner)
169 return false;
170
171 /*
172 * Ensure we emit the owner->on_cpu, dereference _after_ checking
173 * lock->owner still matches owner, if that fails, owner might
174 * point to free()d memory, if it still matches, the rcu_read_lock()
175 * ensures the memory stays valid.
176 */
177 barrier();
178
179 return owner->on_cpu;
180}
181
182/*
183 * Look out! "owner" is an entirely speculative pointer
184 * access and not reliable.
185 */
186static noinline
187int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
188{
189 rcu_read_lock();
190 while (owner_running(lock, owner)) {
191 if (need_resched())
192 break;
193
194 arch_mutex_cpu_relax();
195 }
196 rcu_read_unlock();
197
198 /*
199 * We break out the loop above on need_resched() and when the
200 * owner changed, which is a sign for heavy contention. Return
201 * success only when lock->owner is NULL.
202 */
203 return lock->owner == NULL;
204}
205
206/*
207 * Initial check for entering the mutex spinning loop
208 */
209static inline int mutex_can_spin_on_owner(struct mutex *lock)
210{
211 int retval = 1;
212
213 rcu_read_lock();
214 if (lock->owner)
215 retval = lock->owner->on_cpu;
216 rcu_read_unlock();
217 /*
218 * if lock->owner is not set, the mutex owner may have just acquired
219 * it and not set the owner yet or the mutex has been released.
220 */
221 return retval;
222}
223#endif
224
98static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); 225static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
99 226
100/** 227/**
@@ -158,25 +285,39 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
158 * 285 *
159 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock 286 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
160 * to serialize everything. 287 * to serialize everything.
288 *
289 * The mutex spinners are queued up using MCS lock so that only one
290 * spinner can compete for the mutex. However, if mutex spinning isn't
291 * going to happen, there is no point in going through the lock/unlock
292 * overhead.
161 */ 293 */
294 if (!mutex_can_spin_on_owner(lock))
295 goto slowpath;
162 296
163 for (;;) { 297 for (;;) {
164 struct task_struct *owner; 298 struct task_struct *owner;
299 struct mspin_node node;
165 300
166 /* 301 /*
167 * If there's an owner, wait for it to either 302 * If there's an owner, wait for it to either
168 * release the lock or go to sleep. 303 * release the lock or go to sleep.
169 */ 304 */
305 mspin_lock(MLOCK(lock), &node);
170 owner = ACCESS_ONCE(lock->owner); 306 owner = ACCESS_ONCE(lock->owner);
171 if (owner && !mutex_spin_on_owner(lock, owner)) 307 if (owner && !mutex_spin_on_owner(lock, owner)) {
308 mspin_unlock(MLOCK(lock), &node);
172 break; 309 break;
310 }
173 311
174 if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { 312 if ((atomic_read(&lock->count) == 1) &&
313 (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
175 lock_acquired(&lock->dep_map, ip); 314 lock_acquired(&lock->dep_map, ip);
176 mutex_set_owner(lock); 315 mutex_set_owner(lock);
316 mspin_unlock(MLOCK(lock), &node);
177 preempt_enable(); 317 preempt_enable();
178 return 0; 318 return 0;
179 } 319 }
320 mspin_unlock(MLOCK(lock), &node);
180 321
181 /* 322 /*
182 * When there's no owner, we might have preempted between the 323 * When there's no owner, we might have preempted between the
@@ -195,6 +336,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
195 */ 336 */
196 arch_mutex_cpu_relax(); 337 arch_mutex_cpu_relax();
197 } 338 }
339slowpath:
198#endif 340#endif
199 spin_lock_mutex(&lock->wait_lock, flags); 341 spin_lock_mutex(&lock->wait_lock, flags);
200 342
@@ -205,7 +347,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
205 list_add_tail(&waiter.list, &lock->wait_list); 347 list_add_tail(&waiter.list, &lock->wait_list);
206 waiter.task = task; 348 waiter.task = task;
207 349
208 if (atomic_xchg(&lock->count, -1) == 1) 350 if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, -1) == 1))
209 goto done; 351 goto done;
210 352
211 lock_contended(&lock->dep_map, ip); 353 lock_contended(&lock->dep_map, ip);
@@ -220,7 +362,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
220 * that when we release the lock, we properly wake up the 362 * that when we release the lock, we properly wake up the
221 * other waiters: 363 * other waiters:
222 */ 364 */
223 if (atomic_xchg(&lock->count, -1) == 1) 365 if (MUTEX_SHOW_NO_WAITER(lock) &&
366 (atomic_xchg(&lock->count, -1) == 1))
224 break; 367 break;
225 368
226 /* 369 /*
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 7890b10084a7..1d96dd0d93c1 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -14,6 +14,7 @@
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/timer.h> 15#include <linux/timer.h>
16#include <linux/freezer.h> 16#include <linux/freezer.h>
17#include <linux/stat.h>
17 18
18#include "rtmutex.h" 19#include "rtmutex.h"
19 20
@@ -366,8 +367,8 @@ static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *at
366 return curr - buf; 367 return curr - buf;
367} 368}
368 369
369static DEVICE_ATTR(status, 0600, sysfs_test_status, NULL); 370static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
370static DEVICE_ATTR(command, 0600, NULL, sysfs_test_command); 371static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
371 372
372static struct bus_type rttest_subsys = { 373static struct bus_type rttest_subsys = {
373 .name = "rttest", 374 .name = "rttest",
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67d04651f44b..42053547e0f5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2999,51 +2999,6 @@ void __sched schedule_preempt_disabled(void)
2999 preempt_disable(); 2999 preempt_disable();
3000} 3000}
3001 3001
3002#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
3003
3004static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
3005{
3006 if (lock->owner != owner)
3007 return false;
3008
3009 /*
3010 * Ensure we emit the owner->on_cpu, dereference _after_ checking
3011 * lock->owner still matches owner, if that fails, owner might
3012 * point to free()d memory, if it still matches, the rcu_read_lock()
3013 * ensures the memory stays valid.
3014 */
3015 barrier();
3016
3017 return owner->on_cpu;
3018}
3019
3020/*
3021 * Look out! "owner" is an entirely speculative pointer
3022 * access and not reliable.
3023 */
3024int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
3025{
3026 if (!sched_feat(OWNER_SPIN))
3027 return 0;
3028
3029 rcu_read_lock();
3030 while (owner_running(lock, owner)) {
3031 if (need_resched())
3032 break;
3033
3034 arch_mutex_cpu_relax();
3035 }
3036 rcu_read_unlock();
3037
3038 /*
3039 * We break out the loop above on need_resched() and when the
3040 * owner changed, which is a sign for heavy contention. Return
3041 * success only when lock->owner is NULL.
3042 */
3043 return lock->owner == NULL;
3044}
3045#endif
3046
3047#ifdef CONFIG_PREEMPT 3002#ifdef CONFIG_PREEMPT
3048/* 3003/*
3049 * this is the entry point to schedule() from in-kernel preemption 3004 * this is the entry point to schedule() from in-kernel preemption
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 1ad1d2b5395f..99399f8e4799 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -46,13 +46,6 @@ SCHED_FEAT(DOUBLE_TICK, false)
46SCHED_FEAT(LB_BIAS, true) 46SCHED_FEAT(LB_BIAS, true)
47 47
48/* 48/*
49 * Spin-wait on mutex acquisition when the mutex owner is running on
50 * another cpu -- assumes that when the owner is running, it will soon
51 * release the lock. Decreases scheduling overhead.
52 */
53SCHED_FEAT(OWNER_SPIN, true)
54
55/*
56 * Decrement CPU power based on time not spent running tasks 49 * Decrement CPU power based on time not spent running tasks
57 */ 50 */
58SCHED_FEAT(NONTASK_POWER, true) 51SCHED_FEAT(NONTASK_POWER, true)