aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-22 22:25:09 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-22 22:25:09 -0500
commit3b5d8510b94a95e493e8c4951ffc3d1cf6a6792d (patch)
tree4493367715c7a15e9057e6c72fa3c3edfd0605ad
parentc47f39e3b75e1138823984ad5079547c7a41b726 (diff)
parent41ef8f826692c8f65882bec0a8211bd4d1d2d19a (diff)
Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking changes from Ingo Molnar: "The biggest change is the rwsem lock-steal improvements, both to the assembly optimized and the spinlock based variants. The other notable change is the clean up of the seqlock implementation to be based on the seqcount infrastructure. The rest is assorted smaller debuggability, cleanup and continued -rt locking changes." * 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: rwsem-spinlock: Implement writer lock-stealing for better scalability futex: Revert "futex: Mark get_robust_list as deprecated" generic: Use raw local irq variant for generic cmpxchg lockdep: Selftest: convert spinlock to raw spinlock seqlock: Use seqcount infrastructure seqlock: Remove unused functions ntp: Make ntp_lock raw intel_idle: Convert i7300_idle_lock to raw_spinlock locking: Various static lock initializer fixes lockdep: Print more info when MAX_LOCK_DEPTH is exceeded rwsem: Implement writer lock-stealing for better scalability lockdep: Silence warning if CONFIG_LOCKDEP isn't set watchdog: Use local_clock for get_timestamp() lockdep: Rename print_unlock_inbalance_bug() to print_unlock_imbalance_bug() locking/stat: Fix a typo
-rw-r--r--Documentation/lockstat.txt2
-rw-r--r--drivers/char/random.c6
-rw-r--r--drivers/idle/i7300_idle.c8
-rw-r--r--drivers/usb/chipidea/debug.c2
-rw-r--r--fs/file.c2
-rw-r--r--include/asm-generic/cmpxchg-local.h8
-rw-r--r--include/linux/idr.h2
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/seqlock.h193
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/futex_compat.c2
-rw-r--r--kernel/lockdep.c15
-rw-r--r--kernel/time/ntp.c26
-rw-r--r--kernel/watchdog.c10
-rw-r--r--lib/locking-selftest.c34
-rw-r--r--lib/rwsem-spinlock.c69
-rw-r--r--lib/rwsem.c75
17 files changed, 221 insertions, 237 deletions
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index cef00d42ed5b..dd2f7b26ca30 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -65,7 +65,7 @@ that had to wait on lock acquisition.
65 65
66 - CONFIGURATION 66 - CONFIGURATION
67 67
68Lock statistics are enabled via CONFIG_LOCK_STATS. 68Lock statistics are enabled via CONFIG_LOCK_STAT.
69 69
70 - USAGE 70 - USAGE
71 71
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 85e81ec1451e..594bda9dcfc8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -445,7 +445,7 @@ static struct entropy_store input_pool = {
445 .poolinfo = &poolinfo_table[0], 445 .poolinfo = &poolinfo_table[0],
446 .name = "input", 446 .name = "input",
447 .limit = 1, 447 .limit = 1,
448 .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock), 448 .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
449 .pool = input_pool_data 449 .pool = input_pool_data
450}; 450};
451 451
@@ -454,7 +454,7 @@ static struct entropy_store blocking_pool = {
454 .name = "blocking", 454 .name = "blocking",
455 .limit = 1, 455 .limit = 1,
456 .pull = &input_pool, 456 .pull = &input_pool,
457 .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock), 457 .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
458 .pool = blocking_pool_data 458 .pool = blocking_pool_data
459}; 459};
460 460
@@ -462,7 +462,7 @@ static struct entropy_store nonblocking_pool = {
462 .poolinfo = &poolinfo_table[1], 462 .poolinfo = &poolinfo_table[1],
463 .name = "nonblocking", 463 .name = "nonblocking",
464 .pull = &input_pool, 464 .pull = &input_pool,
465 .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock), 465 .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
466 .pool = nonblocking_pool_data 466 .pool = nonblocking_pool_data
467}; 467};
468 468
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index fa080ebd568f..ffeebc7e9f1c 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -75,7 +75,7 @@ static unsigned long past_skip;
75 75
76static struct pci_dev *fbd_dev; 76static struct pci_dev *fbd_dev;
77 77
78static spinlock_t i7300_idle_lock; 78static raw_spinlock_t i7300_idle_lock;
79static int i7300_idle_active; 79static int i7300_idle_active;
80 80
81static u8 i7300_idle_thrtctl_saved; 81static u8 i7300_idle_thrtctl_saved;
@@ -457,7 +457,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
457 idle_begin_time = ktime_get(); 457 idle_begin_time = ktime_get();
458 } 458 }
459 459
460 spin_lock_irqsave(&i7300_idle_lock, flags); 460 raw_spin_lock_irqsave(&i7300_idle_lock, flags);
461 if (val == IDLE_START) { 461 if (val == IDLE_START) {
462 462
463 cpumask_set_cpu(smp_processor_id(), idle_cpumask); 463 cpumask_set_cpu(smp_processor_id(), idle_cpumask);
@@ -506,7 +506,7 @@ static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
506 } 506 }
507 } 507 }
508end: 508end:
509 spin_unlock_irqrestore(&i7300_idle_lock, flags); 509 raw_spin_unlock_irqrestore(&i7300_idle_lock, flags);
510 return 0; 510 return 0;
511} 511}
512 512
@@ -548,7 +548,7 @@ struct debugfs_file_info {
548 548
549static int __init i7300_idle_init(void) 549static int __init i7300_idle_init(void)
550{ 550{
551 spin_lock_init(&i7300_idle_lock); 551 raw_spin_lock_init(&i7300_idle_lock);
552 total_us = 0; 552 total_us = 0;
553 553
554 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) 554 if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 3bc244d2636a..a62c4a47d52c 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -222,7 +222,7 @@ static struct {
222} dbg_data = { 222} dbg_data = {
223 .idx = 0, 223 .idx = 0,
224 .tty = 0, 224 .tty = 0,
225 .lck = __RW_LOCK_UNLOCKED(lck) 225 .lck = __RW_LOCK_UNLOCKED(dbg_data.lck)
226}; 226};
227 227
228/** 228/**
diff --git a/fs/file.c b/fs/file.c
index 2b3570b7caeb..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -516,7 +516,7 @@ struct files_struct init_files = {
516 .close_on_exec = init_files.close_on_exec_init, 516 .close_on_exec = init_files.close_on_exec_init,
517 .open_fds = init_files.open_fds_init, 517 .open_fds = init_files.open_fds_init,
518 }, 518 },
519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
520}; 520};
521 521
522/* 522/*
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index 2533fddd34a6..d8d4c898c1bb 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
21 if (size == 8 && sizeof(unsigned long) != 8) 21 if (size == 8 && sizeof(unsigned long) != 8)
22 wrong_size_cmpxchg(ptr); 22 wrong_size_cmpxchg(ptr);
23 23
24 local_irq_save(flags); 24 raw_local_irq_save(flags);
25 switch (size) { 25 switch (size) {
26 case 1: prev = *(u8 *)ptr; 26 case 1: prev = *(u8 *)ptr;
27 if (prev == old) 27 if (prev == old)
@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
42 default: 42 default:
43 wrong_size_cmpxchg(ptr); 43 wrong_size_cmpxchg(ptr);
44 } 44 }
45 local_irq_restore(flags); 45 raw_local_irq_restore(flags);
46 return prev; 46 return prev;
47} 47}
48 48
@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
55 u64 prev; 55 u64 prev;
56 unsigned long flags; 56 unsigned long flags;
57 57
58 local_irq_save(flags); 58 raw_local_irq_save(flags);
59 prev = *(u64 *)ptr; 59 prev = *(u64 *)ptr;
60 if (prev == old) 60 if (prev == old)
61 *(u64 *)ptr = new; 61 *(u64 *)ptr = new;
62 local_irq_restore(flags); 62 raw_local_irq_restore(flags);
63 return prev; 63 return prev;
64} 64}
65 65
diff --git a/include/linux/idr.h b/include/linux/idr.h
index de7e190f1af4..e5eb125effe6 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -136,7 +136,7 @@ struct ida {
136 struct ida_bitmap *free_bitmap; 136 struct ida_bitmap *free_bitmap;
137}; 137};
138 138
139#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } 139#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
140#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) 140#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
141 141
142int ida_pre_get(struct ida *ida, gfp_t gfp_mask); 142int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index bfe88c4aa251..f1e877b79ed8 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -412,7 +412,7 @@ struct lock_class_key { };
412 412
413#define lockdep_depth(tsk) (0) 413#define lockdep_depth(tsk) (0)
414 414
415#define lockdep_assert_held(l) do { } while (0) 415#define lockdep_assert_held(l) do { (void)(l); } while (0)
416 416
417#define lockdep_recursing(tsk) (0) 417#define lockdep_recursing(tsk) (0)
418 418
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 600060e25ec6..18299057402f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -30,92 +30,12 @@
30#include <linux/preempt.h> 30#include <linux/preempt.h>
31#include <asm/processor.h> 31#include <asm/processor.h>
32 32
33typedef struct {
34 unsigned sequence;
35 spinlock_t lock;
36} seqlock_t;
37
38/*
39 * These macros triggered gcc-3.x compile-time problems. We think these are
40 * OK now. Be cautious.
41 */
42#define __SEQLOCK_UNLOCKED(lockname) \
43 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
44
45#define seqlock_init(x) \
46 do { \
47 (x)->sequence = 0; \
48 spin_lock_init(&(x)->lock); \
49 } while (0)
50
51#define DEFINE_SEQLOCK(x) \
52 seqlock_t x = __SEQLOCK_UNLOCKED(x)
53
54/* Lock out other writers and update the count.
55 * Acts like a normal spin_lock/unlock.
56 * Don't need preempt_disable() because that is in the spin_lock already.
57 */
58static inline void write_seqlock(seqlock_t *sl)
59{
60 spin_lock(&sl->lock);
61 ++sl->sequence;
62 smp_wmb();
63}
64
65static inline void write_sequnlock(seqlock_t *sl)
66{
67 smp_wmb();
68 sl->sequence++;
69 spin_unlock(&sl->lock);
70}
71
72static inline int write_tryseqlock(seqlock_t *sl)
73{
74 int ret = spin_trylock(&sl->lock);
75
76 if (ret) {
77 ++sl->sequence;
78 smp_wmb();
79 }
80 return ret;
81}
82
83/* Start of read calculation -- fetch last complete writer token */
84static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
85{
86 unsigned ret;
87
88repeat:
89 ret = ACCESS_ONCE(sl->sequence);
90 if (unlikely(ret & 1)) {
91 cpu_relax();
92 goto repeat;
93 }
94 smp_rmb();
95
96 return ret;
97}
98
99/*
100 * Test if reader processed invalid data.
101 *
102 * If sequence value changed then writer changed data while in section.
103 */
104static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
105{
106 smp_rmb();
107
108 return unlikely(sl->sequence != start);
109}
110
111
112/* 33/*
113 * Version using sequence counter only. 34 * Version using sequence counter only.
114 * This can be used when code has its own mutex protecting the 35 * This can be used when code has its own mutex protecting the
115 * updating starting before the write_seqcountbeqin() and ending 36 * updating starting before the write_seqcountbeqin() and ending
116 * after the write_seqcount_end(). 37 * after the write_seqcount_end().
117 */ 38 */
118
119typedef struct seqcount { 39typedef struct seqcount {
120 unsigned sequence; 40 unsigned sequence;
121} seqcount_t; 41} seqcount_t;
@@ -218,7 +138,6 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
218static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 138static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
219{ 139{
220 smp_rmb(); 140 smp_rmb();
221
222 return __read_seqcount_retry(s, start); 141 return __read_seqcount_retry(s, start);
223} 142}
224 143
@@ -252,31 +171,101 @@ static inline void write_seqcount_barrier(seqcount_t *s)
252 s->sequence+=2; 171 s->sequence+=2;
253} 172}
254 173
174typedef struct {
175 struct seqcount seqcount;
176 spinlock_t lock;
177} seqlock_t;
178
255/* 179/*
256 * Possible sw/hw IRQ protected versions of the interfaces. 180 * These macros triggered gcc-3.x compile-time problems. We think these are
181 * OK now. Be cautious.
257 */ 182 */
258#define write_seqlock_irqsave(lock, flags) \ 183#define __SEQLOCK_UNLOCKED(lockname) \
259 do { local_irq_save(flags); write_seqlock(lock); } while (0) 184 { \
260#define write_seqlock_irq(lock) \ 185 .seqcount = SEQCNT_ZERO, \
261 do { local_irq_disable(); write_seqlock(lock); } while (0) 186 .lock = __SPIN_LOCK_UNLOCKED(lockname) \
262#define write_seqlock_bh(lock) \ 187 }
263 do { local_bh_disable(); write_seqlock(lock); } while (0) 188
189#define seqlock_init(x) \
190 do { \
191 seqcount_init(&(x)->seqcount); \
192 spin_lock_init(&(x)->lock); \
193 } while (0)
264 194
265#define write_sequnlock_irqrestore(lock, flags) \ 195#define DEFINE_SEQLOCK(x) \
266 do { write_sequnlock(lock); local_irq_restore(flags); } while(0) 196 seqlock_t x = __SEQLOCK_UNLOCKED(x)
267#define write_sequnlock_irq(lock) \
268 do { write_sequnlock(lock); local_irq_enable(); } while(0)
269#define write_sequnlock_bh(lock) \
270 do { write_sequnlock(lock); local_bh_enable(); } while(0)
271 197
272#define read_seqbegin_irqsave(lock, flags) \ 198/*
273 ({ local_irq_save(flags); read_seqbegin(lock); }) 199 * Read side functions for starting and finalizing a read side section.
200 */
201static inline unsigned read_seqbegin(const seqlock_t *sl)
202{
203 return read_seqcount_begin(&sl->seqcount);
204}
274 205
275#define read_seqretry_irqrestore(lock, iv, flags) \ 206static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
276 ({ \ 207{
277 int ret = read_seqretry(lock, iv); \ 208 return read_seqcount_retry(&sl->seqcount, start);
278 local_irq_restore(flags); \ 209}
279 ret; \ 210
280 }) 211/*
212 * Lock out other writers and update the count.
213 * Acts like a normal spin_lock/unlock.
214 * Don't need preempt_disable() because that is in the spin_lock already.
215 */
216static inline void write_seqlock(seqlock_t *sl)
217{
218 spin_lock(&sl->lock);
219 write_seqcount_begin(&sl->seqcount);
220}
221
222static inline void write_sequnlock(seqlock_t *sl)
223{
224 write_seqcount_end(&sl->seqcount);
225 spin_unlock(&sl->lock);
226}
227
228static inline void write_seqlock_bh(seqlock_t *sl)
229{
230 spin_lock_bh(&sl->lock);
231 write_seqcount_begin(&sl->seqcount);
232}
233
234static inline void write_sequnlock_bh(seqlock_t *sl)
235{
236 write_seqcount_end(&sl->seqcount);
237 spin_unlock_bh(&sl->lock);
238}
239
240static inline void write_seqlock_irq(seqlock_t *sl)
241{
242 spin_lock_irq(&sl->lock);
243 write_seqcount_begin(&sl->seqcount);
244}
245
246static inline void write_sequnlock_irq(seqlock_t *sl)
247{
248 write_seqcount_end(&sl->seqcount);
249 spin_unlock_irq(&sl->lock);
250}
251
252static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
253{
254 unsigned long flags;
255
256 spin_lock_irqsave(&sl->lock, flags);
257 write_seqcount_begin(&sl->seqcount);
258 return flags;
259}
260
261#define write_seqlock_irqsave(lock, flags) \
262 do { flags = __write_seqlock_irqsave(lock); } while (0)
263
264static inline void
265write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
266{
267 write_seqcount_end(&sl->seqcount);
268 spin_unlock_irqrestore(&sl->lock, flags);
269}
281 270
282#endif /* __LINUX_SEQLOCK_H */ 271#endif /* __LINUX_SEQLOCK_H */
diff --git a/kernel/futex.c b/kernel/futex.c
index 9618b6e9fb36..fbc07a29ec53 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2472,8 +2472,6 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2472 if (!futex_cmpxchg_enabled) 2472 if (!futex_cmpxchg_enabled)
2473 return -ENOSYS; 2473 return -ENOSYS;
2474 2474
2475 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
2476
2477 rcu_read_lock(); 2475 rcu_read_lock();
2478 2476
2479 ret = -ESRCH; 2477 ret = -ESRCH;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 83e368b005fc..a9642d528630 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -142,8 +142,6 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
142 if (!futex_cmpxchg_enabled) 142 if (!futex_cmpxchg_enabled)
143 return -ENOSYS; 143 return -ENOSYS;
144 144
145 WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
146
147 rcu_read_lock(); 145 rcu_read_lock();
148 146
149 ret = -ESRCH; 147 ret = -ESRCH;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 7981e5b2350d..8a0efac4f99d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3190,9 +3190,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3190#endif 3190#endif
3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { 3191 if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
3192 debug_locks_off(); 3192 debug_locks_off();
3193 printk("BUG: MAX_LOCK_DEPTH too low!\n"); 3193 printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n",
3194 curr->lockdep_depth, MAX_LOCK_DEPTH);
3194 printk("turning off the locking correctness validator.\n"); 3195 printk("turning off the locking correctness validator.\n");
3196
3197 lockdep_print_held_locks(current);
3198 debug_show_all_locks();
3195 dump_stack(); 3199 dump_stack();
3200
3196 return 0; 3201 return 0;
3197 } 3202 }
3198 3203
@@ -3203,7 +3208,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3203} 3208}
3204 3209
3205static int 3210static int
3206print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, 3211print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3207 unsigned long ip) 3212 unsigned long ip)
3208{ 3213{
3209 if (!debug_locks_off()) 3214 if (!debug_locks_off())
@@ -3246,7 +3251,7 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
3246 return 0; 3251 return 0;
3247 3252
3248 if (curr->lockdep_depth <= 0) 3253 if (curr->lockdep_depth <= 0)
3249 return print_unlock_inbalance_bug(curr, lock, ip); 3254 return print_unlock_imbalance_bug(curr, lock, ip);
3250 3255
3251 return 1; 3256 return 1;
3252} 3257}
@@ -3317,7 +3322,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
3317 goto found_it; 3322 goto found_it;
3318 prev_hlock = hlock; 3323 prev_hlock = hlock;
3319 } 3324 }
3320 return print_unlock_inbalance_bug(curr, lock, ip); 3325 return print_unlock_imbalance_bug(curr, lock, ip);
3321 3326
3322found_it: 3327found_it:
3323 lockdep_init_map(lock, name, key, 0); 3328 lockdep_init_map(lock, name, key, 0);
@@ -3384,7 +3389,7 @@ lock_release_non_nested(struct task_struct *curr,
3384 goto found_it; 3389 goto found_it;
3385 prev_hlock = hlock; 3390 prev_hlock = hlock;
3386 } 3391 }
3387 return print_unlock_inbalance_bug(curr, lock, ip); 3392 return print_unlock_imbalance_bug(curr, lock, ip);
3388 3393
3389found_it: 3394found_it:
3390 if (hlock->instance == lock) 3395 if (hlock->instance == lock)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index b10a42bb0165..072bb066bb7d 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -23,7 +23,7 @@
23 * NTP timekeeping variables: 23 * NTP timekeeping variables:
24 */ 24 */
25 25
26DEFINE_SPINLOCK(ntp_lock); 26DEFINE_RAW_SPINLOCK(ntp_lock);
27 27
28 28
29/* USER_HZ period (usecs): */ 29/* USER_HZ period (usecs): */
@@ -348,7 +348,7 @@ void ntp_clear(void)
348{ 348{
349 unsigned long flags; 349 unsigned long flags;
350 350
351 spin_lock_irqsave(&ntp_lock, flags); 351 raw_spin_lock_irqsave(&ntp_lock, flags);
352 352
353 time_adjust = 0; /* stop active adjtime() */ 353 time_adjust = 0; /* stop active adjtime() */
354 time_status |= STA_UNSYNC; 354 time_status |= STA_UNSYNC;
@@ -362,7 +362,7 @@ void ntp_clear(void)
362 362
363 /* Clear PPS state variables */ 363 /* Clear PPS state variables */
364 pps_clear(); 364 pps_clear();
365 spin_unlock_irqrestore(&ntp_lock, flags); 365 raw_spin_unlock_irqrestore(&ntp_lock, flags);
366 366
367} 367}
368 368
@@ -372,9 +372,9 @@ u64 ntp_tick_length(void)
372 unsigned long flags; 372 unsigned long flags;
373 s64 ret; 373 s64 ret;
374 374
375 spin_lock_irqsave(&ntp_lock, flags); 375 raw_spin_lock_irqsave(&ntp_lock, flags);
376 ret = tick_length; 376 ret = tick_length;
377 spin_unlock_irqrestore(&ntp_lock, flags); 377 raw_spin_unlock_irqrestore(&ntp_lock, flags);
378 return ret; 378 return ret;
379} 379}
380 380
@@ -395,7 +395,7 @@ int second_overflow(unsigned long secs)
395 int leap = 0; 395 int leap = 0;
396 unsigned long flags; 396 unsigned long flags;
397 397
398 spin_lock_irqsave(&ntp_lock, flags); 398 raw_spin_lock_irqsave(&ntp_lock, flags);
399 399
400 /* 400 /*
401 * Leap second processing. If in leap-insert state at the end of the 401 * Leap second processing. If in leap-insert state at the end of the
@@ -479,7 +479,7 @@ int second_overflow(unsigned long secs)
479 time_adjust = 0; 479 time_adjust = 0;
480 480
481out: 481out:
482 spin_unlock_irqrestore(&ntp_lock, flags); 482 raw_spin_unlock_irqrestore(&ntp_lock, flags);
483 483
484 return leap; 484 return leap;
485} 485}
@@ -672,7 +672,7 @@ int do_adjtimex(struct timex *txc)
672 672
673 getnstimeofday(&ts); 673 getnstimeofday(&ts);
674 674
675 spin_lock_irq(&ntp_lock); 675 raw_spin_lock_irq(&ntp_lock);
676 676
677 if (txc->modes & ADJ_ADJTIME) { 677 if (txc->modes & ADJ_ADJTIME) {
678 long save_adjust = time_adjust; 678 long save_adjust = time_adjust;
@@ -714,7 +714,7 @@ int do_adjtimex(struct timex *txc)
714 /* fill PPS status fields */ 714 /* fill PPS status fields */
715 pps_fill_timex(txc); 715 pps_fill_timex(txc);
716 716
717 spin_unlock_irq(&ntp_lock); 717 raw_spin_unlock_irq(&ntp_lock);
718 718
719 txc->time.tv_sec = ts.tv_sec; 719 txc->time.tv_sec = ts.tv_sec;
720 txc->time.tv_usec = ts.tv_nsec; 720 txc->time.tv_usec = ts.tv_nsec;
@@ -912,7 +912,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
912 912
913 pts_norm = pps_normalize_ts(*phase_ts); 913 pts_norm = pps_normalize_ts(*phase_ts);
914 914
915 spin_lock_irqsave(&ntp_lock, flags); 915 raw_spin_lock_irqsave(&ntp_lock, flags);
916 916
917 /* clear the error bits, they will be set again if needed */ 917 /* clear the error bits, they will be set again if needed */
918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 918 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
@@ -925,7 +925,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
925 * just start the frequency interval */ 925 * just start the frequency interval */
926 if (unlikely(pps_fbase.tv_sec == 0)) { 926 if (unlikely(pps_fbase.tv_sec == 0)) {
927 pps_fbase = *raw_ts; 927 pps_fbase = *raw_ts;
928 spin_unlock_irqrestore(&ntp_lock, flags); 928 raw_spin_unlock_irqrestore(&ntp_lock, flags);
929 return; 929 return;
930 } 930 }
931 931
@@ -940,7 +940,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
940 time_status |= STA_PPSJITTER; 940 time_status |= STA_PPSJITTER;
941 /* restart the frequency calibration interval */ 941 /* restart the frequency calibration interval */
942 pps_fbase = *raw_ts; 942 pps_fbase = *raw_ts;
943 spin_unlock_irqrestore(&ntp_lock, flags); 943 raw_spin_unlock_irqrestore(&ntp_lock, flags);
944 pr_err("hardpps: PPSJITTER: bad pulse\n"); 944 pr_err("hardpps: PPSJITTER: bad pulse\n");
945 return; 945 return;
946 } 946 }
@@ -957,7 +957,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
957 957
958 hardpps_update_phase(pts_norm.nsec); 958 hardpps_update_phase(pts_norm.nsec);
959 959
960 spin_unlock_irqrestore(&ntp_lock, flags); 960 raw_spin_unlock_irqrestore(&ntp_lock, flags);
961} 961}
962EXPORT_SYMBOL(hardpps); 962EXPORT_SYMBOL(hardpps);
963 963
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 27689422aa92..4a944676358e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -113,9 +113,9 @@ static int get_softlockup_thresh(void)
113 * resolution, and we don't need to waste time with a big divide when 113 * resolution, and we don't need to waste time with a big divide when
114 * 2^30ns == 1.074s. 114 * 2^30ns == 1.074s.
115 */ 115 */
116static unsigned long get_timestamp(int this_cpu) 116static unsigned long get_timestamp(void)
117{ 117{
118 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ 118 return local_clock() >> 30LL; /* 2^30 ~= 10^9 */
119} 119}
120 120
121static void set_sample_period(void) 121static void set_sample_period(void)
@@ -133,9 +133,7 @@ static void set_sample_period(void)
133/* Commands for resetting the watchdog */ 133/* Commands for resetting the watchdog */
134static void __touch_watchdog(void) 134static void __touch_watchdog(void)
135{ 135{
136 int this_cpu = smp_processor_id(); 136 __this_cpu_write(watchdog_touch_ts, get_timestamp());
137
138 __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
139} 137}
140 138
141void touch_softlockup_watchdog(void) 139void touch_softlockup_watchdog(void)
@@ -196,7 +194,7 @@ static int is_hardlockup(void)
196 194
197static int is_softlockup(unsigned long touch_ts) 195static int is_softlockup(unsigned long touch_ts)
198{ 196{
199 unsigned long now = get_timestamp(smp_processor_id()); 197 unsigned long now = get_timestamp();
200 198
201 /* Warn about unreasonable delays: */ 199 /* Warn about unreasonable delays: */
202 if (time_after(now, touch_ts + get_softlockup_thresh())) 200 if (time_after(now, touch_ts + get_softlockup_thresh()))
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 7aae0f2a5e0a..c3eb261a7df3 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -47,10 +47,10 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
47 * Normal standalone locks, for the circular and irq-context 47 * Normal standalone locks, for the circular and irq-context
48 * dependency tests: 48 * dependency tests:
49 */ 49 */
50static DEFINE_SPINLOCK(lock_A); 50static DEFINE_RAW_SPINLOCK(lock_A);
51static DEFINE_SPINLOCK(lock_B); 51static DEFINE_RAW_SPINLOCK(lock_B);
52static DEFINE_SPINLOCK(lock_C); 52static DEFINE_RAW_SPINLOCK(lock_C);
53static DEFINE_SPINLOCK(lock_D); 53static DEFINE_RAW_SPINLOCK(lock_D);
54 54
55static DEFINE_RWLOCK(rwlock_A); 55static DEFINE_RWLOCK(rwlock_A);
56static DEFINE_RWLOCK(rwlock_B); 56static DEFINE_RWLOCK(rwlock_B);
@@ -73,12 +73,12 @@ static DECLARE_RWSEM(rwsem_D);
73 * but X* and Y* are different classes. We do this so that 73 * but X* and Y* are different classes. We do this so that
74 * we do not trigger a real lockup: 74 * we do not trigger a real lockup:
75 */ 75 */
76static DEFINE_SPINLOCK(lock_X1); 76static DEFINE_RAW_SPINLOCK(lock_X1);
77static DEFINE_SPINLOCK(lock_X2); 77static DEFINE_RAW_SPINLOCK(lock_X2);
78static DEFINE_SPINLOCK(lock_Y1); 78static DEFINE_RAW_SPINLOCK(lock_Y1);
79static DEFINE_SPINLOCK(lock_Y2); 79static DEFINE_RAW_SPINLOCK(lock_Y2);
80static DEFINE_SPINLOCK(lock_Z1); 80static DEFINE_RAW_SPINLOCK(lock_Z1);
81static DEFINE_SPINLOCK(lock_Z2); 81static DEFINE_RAW_SPINLOCK(lock_Z2);
82 82
83static DEFINE_RWLOCK(rwlock_X1); 83static DEFINE_RWLOCK(rwlock_X1);
84static DEFINE_RWLOCK(rwlock_X2); 84static DEFINE_RWLOCK(rwlock_X2);
@@ -107,10 +107,10 @@ static DECLARE_RWSEM(rwsem_Z2);
107 */ 107 */
108#define INIT_CLASS_FUNC(class) \ 108#define INIT_CLASS_FUNC(class) \
109static noinline void \ 109static noinline void \
110init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ 110init_class_##class(raw_spinlock_t *lock, rwlock_t *rwlock, \
111 struct rw_semaphore *rwsem) \ 111 struct mutex *mutex, struct rw_semaphore *rwsem)\
112{ \ 112{ \
113 spin_lock_init(lock); \ 113 raw_spin_lock_init(lock); \
114 rwlock_init(rwlock); \ 114 rwlock_init(rwlock); \
115 mutex_init(mutex); \ 115 mutex_init(mutex); \
116 init_rwsem(rwsem); \ 116 init_rwsem(rwsem); \
@@ -168,10 +168,10 @@ static void init_shared_classes(void)
168 * Shortcuts for lock/unlock API variants, to keep 168 * Shortcuts for lock/unlock API variants, to keep
169 * the testcases compact: 169 * the testcases compact:
170 */ 170 */
171#define L(x) spin_lock(&lock_##x) 171#define L(x) raw_spin_lock(&lock_##x)
172#define U(x) spin_unlock(&lock_##x) 172#define U(x) raw_spin_unlock(&lock_##x)
173#define LU(x) L(x); U(x) 173#define LU(x) L(x); U(x)
174#define SI(x) spin_lock_init(&lock_##x) 174#define SI(x) raw_spin_lock_init(&lock_##x)
175 175
176#define WL(x) write_lock(&rwlock_##x) 176#define WL(x) write_lock(&rwlock_##x)
177#define WU(x) write_unlock(&rwlock_##x) 177#define WU(x) write_unlock(&rwlock_##x)
@@ -911,7 +911,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
911 911
912#define I2(x) \ 912#define I2(x) \
913 do { \ 913 do { \
914 spin_lock_init(&lock_##x); \ 914 raw_spin_lock_init(&lock_##x); \
915 rwlock_init(&rwlock_##x); \ 915 rwlock_init(&rwlock_##x); \
916 mutex_init(&mutex_##x); \ 916 mutex_init(&mutex_##x); \
917 init_rwsem(&rwsem_##x); \ 917 init_rwsem(&rwsem_##x); \
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 7e0d6a58fc83..7542afbb22b3 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -73,20 +73,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
73 goto dont_wake_writers; 73 goto dont_wake_writers;
74 } 74 }
75 75
76 /* if we are allowed to wake writers try to grant a single write lock 76 /*
77 * if there's a writer at the front of the queue 77 * as we support write lock stealing, we can't set sem->activity
78 * - we leave the 'waiting count' incremented to signify potential 78 * to -1 here to indicate we get the lock. Instead, we wake it up
79 * contention 79 * to let it go get it again.
80 */ 80 */
81 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { 81 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
82 sem->activity = -1; 82 wake_up_process(waiter->task);
83 list_del(&waiter->list);
84 tsk = waiter->task;
85 /* Don't touch waiter after ->task has been NULLed */
86 smp_mb();
87 waiter->task = NULL;
88 wake_up_process(tsk);
89 put_task_struct(tsk);
90 goto out; 83 goto out;
91 } 84 }
92 85
@@ -121,18 +114,10 @@ static inline struct rw_semaphore *
121__rwsem_wake_one_writer(struct rw_semaphore *sem) 114__rwsem_wake_one_writer(struct rw_semaphore *sem)
122{ 115{
123 struct rwsem_waiter *waiter; 116 struct rwsem_waiter *waiter;
124 struct task_struct *tsk;
125
126 sem->activity = -1;
127 117
128 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 118 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
129 list_del(&waiter->list); 119 wake_up_process(waiter->task);
130 120
131 tsk = waiter->task;
132 smp_mb();
133 waiter->task = NULL;
134 wake_up_process(tsk);
135 put_task_struct(tsk);
136 return sem; 121 return sem;
137} 122}
138 123
@@ -204,7 +189,6 @@ int __down_read_trylock(struct rw_semaphore *sem)
204 189
205/* 190/*
206 * get a write lock on the semaphore 191 * get a write lock on the semaphore
207 * - we increment the waiting count anyway to indicate an exclusive lock
208 */ 192 */
209void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) 193void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
210{ 194{
@@ -214,37 +198,32 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
214 198
215 raw_spin_lock_irqsave(&sem->wait_lock, flags); 199 raw_spin_lock_irqsave(&sem->wait_lock, flags);
216 200
217 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
218 /* granted */
219 sem->activity = -1;
220 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
221 goto out;
222 }
223
224 tsk = current;
225 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
226
227 /* set up my own style of waitqueue */ 201 /* set up my own style of waitqueue */
202 tsk = current;
228 waiter.task = tsk; 203 waiter.task = tsk;
229 waiter.flags = RWSEM_WAITING_FOR_WRITE; 204 waiter.flags = RWSEM_WAITING_FOR_WRITE;
230 get_task_struct(tsk);
231
232 list_add_tail(&waiter.list, &sem->wait_list); 205 list_add_tail(&waiter.list, &sem->wait_list);
233 206
234 /* we don't need to touch the semaphore struct anymore */ 207 /* wait for someone to release the lock */
235 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
236
237 /* wait to be given the lock */
238 for (;;) { 208 for (;;) {
239 if (!waiter.task) 209 /*
210 * That is the key to support write lock stealing: allows the
211 * task already on CPU to get the lock soon rather than put
212 * itself into sleep and waiting for system woke it or someone
213 * else in the head of the wait list up.
214 */
215 if (sem->activity == 0)
240 break; 216 break;
241 schedule();
242 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 217 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
218 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
219 schedule();
220 raw_spin_lock_irqsave(&sem->wait_lock, flags);
243 } 221 }
222 /* got the lock */
223 sem->activity = -1;
224 list_del(&waiter.list);
244 225
245 tsk->state = TASK_RUNNING; 226 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
246 out:
247 ;
248} 227}
249 228
250void __sched __down_write(struct rw_semaphore *sem) 229void __sched __down_write(struct rw_semaphore *sem)
@@ -262,8 +241,8 @@ int __down_write_trylock(struct rw_semaphore *sem)
262 241
263 raw_spin_lock_irqsave(&sem->wait_lock, flags); 242 raw_spin_lock_irqsave(&sem->wait_lock, flags);
264 243
265 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 244 if (sem->activity == 0) {
266 /* granted */ 245 /* got the lock */
267 sem->activity = -1; 246 sem->activity = -1;
268 ret = 1; 247 ret = 1;
269 } 248 }
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 8337e1b9bb8d..ad5e0df16ab4 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -2,6 +2,8 @@
2 * 2 *
3 * Written by David Howells (dhowells@redhat.com). 3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c 4 * Derived from arch/i386/kernel/semaphore.c
5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
5 */ 7 */
6#include <linux/rwsem.h> 8#include <linux/rwsem.h>
7#include <linux/sched.h> 9#include <linux/sched.h>
@@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
60 struct rwsem_waiter *waiter; 62 struct rwsem_waiter *waiter;
61 struct task_struct *tsk; 63 struct task_struct *tsk;
62 struct list_head *next; 64 struct list_head *next;
63 signed long oldcount, woken, loop, adjustment; 65 signed long woken, loop, adjustment;
64 66
65 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 67 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) 68 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
@@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
72 */ 74 */
73 goto out; 75 goto out;
74 76
75 /* There's a writer at the front of the queue - try to grant it the 77 /* Wake up the writing waiter and let the task grab the sem: */
76 * write lock. However, we only wake this writer if we can transition 78 wake_up_process(waiter->task);
77 * the active part of the count from 0 -> 1
78 */
79 adjustment = RWSEM_ACTIVE_WRITE_BIAS;
80 if (waiter->list.next == &sem->wait_list)
81 adjustment -= RWSEM_WAITING_BIAS;
82
83 try_again_write:
84 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
85 if (oldcount & RWSEM_ACTIVE_MASK)
86 /* Someone grabbed the sem already */
87 goto undo_write;
88
89 /* We must be careful not to touch 'waiter' after we set ->task = NULL.
90 * It is an allocated on the waiter's stack and may become invalid at
91 * any time after that point (due to a wakeup from another source).
92 */
93 list_del(&waiter->list);
94 tsk = waiter->task;
95 smp_mb();
96 waiter->task = NULL;
97 wake_up_process(tsk);
98 put_task_struct(tsk);
99 goto out; 79 goto out;
100 80
101 readers_only: 81 readers_only:
@@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
157 137
158 out: 138 out:
159 return sem; 139 return sem;
140}
141
142/* Try to get write sem, caller holds sem->wait_lock: */
143static int try_get_writer_sem(struct rw_semaphore *sem,
144 struct rwsem_waiter *waiter)
145{
146 struct rwsem_waiter *fwaiter;
147 long oldcount, adjustment;
160 148
161 /* undo the change to the active count, but check for a transition 149 /* only steal when first waiter is writing */
162 * 1->0 */ 150 fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
163 undo_write: 151 if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE))
152 return 0;
153
154 adjustment = RWSEM_ACTIVE_WRITE_BIAS;
155 /* Only one waiter in the queue: */
156 if (fwaiter == waiter && waiter->list.next == &sem->wait_list)
157 adjustment -= RWSEM_WAITING_BIAS;
158
159try_again_write:
160 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
161 if (!(oldcount & RWSEM_ACTIVE_MASK)) {
162 /* No active lock: */
163 struct task_struct *tsk = waiter->task;
164
165 list_del(&waiter->list);
166 smp_mb();
167 put_task_struct(tsk);
168 tsk->state = TASK_RUNNING;
169 return 1;
170 }
171 /* some one grabbed the sem already */
164 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) 172 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
165 goto out; 173 return 0;
166 goto try_again_write; 174 goto try_again_write;
167} 175}
168 176
@@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
210 for (;;) { 218 for (;;) {
211 if (!waiter.task) 219 if (!waiter.task)
212 break; 220 break;
221
222 raw_spin_lock_irq(&sem->wait_lock);
223 /* Try to get the writer sem, may steal from the head writer: */
224 if (flags == RWSEM_WAITING_FOR_WRITE)
225 if (try_get_writer_sem(sem, &waiter)) {
226 raw_spin_unlock_irq(&sem->wait_lock);
227 return sem;
228 }
229 raw_spin_unlock_irq(&sem->wait_lock);
213 schedule(); 230 schedule();
214 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 231 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
215 } 232 }