aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/locktorture.c529
-rw-r--r--kernel/locking/mcs_spinlock.h3
-rw-r--r--kernel/locking/mutex-debug.c2
-rw-r--r--kernel/locking/mutex.c422
-rw-r--r--kernel/locking/mutex.h2
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/locking/rwsem-xadd.c27
-rw-r--r--kernel/locking/semaphore.c12
8 files changed, 702 insertions, 297 deletions
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 0955b885d0dc..ec8cce259779 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -20,30 +20,20 @@
20 * Author: Paul E. McKenney <paulmck@us.ibm.com> 20 * Author: Paul E. McKenney <paulmck@us.ibm.com>
21 * Based on kernel/rcu/torture.c. 21 * Based on kernel/rcu/torture.c.
22 */ 22 */
23#include <linux/types.h>
24#include <linux/kernel.h> 23#include <linux/kernel.h>
25#include <linux/init.h>
26#include <linux/module.h> 24#include <linux/module.h>
27#include <linux/kthread.h> 25#include <linux/kthread.h>
28#include <linux/err.h>
29#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27#include <linux/rwlock.h>
28#include <linux/mutex.h>
29#include <linux/rwsem.h>
30#include <linux/smp.h> 30#include <linux/smp.h>
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/atomic.h> 33#include <linux/atomic.h>
34#include <linux/bitops.h>
35#include <linux/completion.h>
36#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
37#include <linux/percpu.h>
38#include <linux/notifier.h>
39#include <linux/reboot.h>
40#include <linux/freezer.h>
41#include <linux/cpu.h>
42#include <linux/delay.h> 35#include <linux/delay.h>
43#include <linux/stat.h>
44#include <linux/slab.h> 36#include <linux/slab.h>
45#include <linux/trace_clock.h>
46#include <asm/byteorder.h>
47#include <linux/torture.h> 37#include <linux/torture.h>
48 38
49MODULE_LICENSE("GPL"); 39MODULE_LICENSE("GPL");
@@ -51,6 +41,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
51 41
52torture_param(int, nwriters_stress, -1, 42torture_param(int, nwriters_stress, -1,
53 "Number of write-locking stress-test threads"); 43 "Number of write-locking stress-test threads");
44torture_param(int, nreaders_stress, -1,
45 "Number of read-locking stress-test threads");
54torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); 46torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
55torture_param(int, onoff_interval, 0, 47torture_param(int, onoff_interval, 0,
56 "Time between CPU hotplugs (s), 0=disable"); 48 "Time between CPU hotplugs (s), 0=disable");
@@ -66,30 +58,28 @@ torture_param(bool, verbose, true,
66static char *torture_type = "spin_lock"; 58static char *torture_type = "spin_lock";
67module_param(torture_type, charp, 0444); 59module_param(torture_type, charp, 0444);
68MODULE_PARM_DESC(torture_type, 60MODULE_PARM_DESC(torture_type,
69 "Type of lock to torture (spin_lock, spin_lock_irq, ...)"); 61 "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)");
70
71static atomic_t n_lock_torture_errors;
72 62
73static struct task_struct *stats_task; 63static struct task_struct *stats_task;
74static struct task_struct **writer_tasks; 64static struct task_struct **writer_tasks;
65static struct task_struct **reader_tasks;
75 66
76static int nrealwriters_stress;
77static bool lock_is_write_held; 67static bool lock_is_write_held;
68static bool lock_is_read_held;
78 69
79struct lock_writer_stress_stats { 70struct lock_stress_stats {
80 long n_write_lock_fail; 71 long n_lock_fail;
81 long n_write_lock_acquired; 72 long n_lock_acquired;
82}; 73};
83static struct lock_writer_stress_stats *lwsa;
84 74
85#if defined(MODULE) 75#if defined(MODULE)
86#define LOCKTORTURE_RUNNABLE_INIT 1 76#define LOCKTORTURE_RUNNABLE_INIT 1
87#else 77#else
88#define LOCKTORTURE_RUNNABLE_INIT 0 78#define LOCKTORTURE_RUNNABLE_INIT 0
89#endif 79#endif
90int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; 80int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
91module_param(locktorture_runnable, int, 0444); 81module_param(torture_runnable, int, 0444);
92MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init"); 82MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
93 83
94/* Forward reference. */ 84/* Forward reference. */
95static void lock_torture_cleanup(void); 85static void lock_torture_cleanup(void);
@@ -102,12 +92,25 @@ struct lock_torture_ops {
102 int (*writelock)(void); 92 int (*writelock)(void);
103 void (*write_delay)(struct torture_random_state *trsp); 93 void (*write_delay)(struct torture_random_state *trsp);
104 void (*writeunlock)(void); 94 void (*writeunlock)(void);
95 int (*readlock)(void);
96 void (*read_delay)(struct torture_random_state *trsp);
97 void (*readunlock)(void);
105 unsigned long flags; 98 unsigned long flags;
106 const char *name; 99 const char *name;
107}; 100};
108 101
109static struct lock_torture_ops *cur_ops; 102struct lock_torture_cxt {
110 103 int nrealwriters_stress;
104 int nrealreaders_stress;
105 bool debug_lock;
106 atomic_t n_lock_torture_errors;
107 struct lock_torture_ops *cur_ops;
108 struct lock_stress_stats *lwsa; /* writer statistics */
109 struct lock_stress_stats *lrsa; /* reader statistics */
110};
111static struct lock_torture_cxt cxt = { 0, 0, false,
112 ATOMIC_INIT(0),
113 NULL, NULL};
111/* 114/*
112 * Definitions for lock torture testing. 115 * Definitions for lock torture testing.
113 */ 116 */
@@ -123,10 +126,10 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
123 126
124 /* We want a long delay occasionally to force massive contention. */ 127 /* We want a long delay occasionally to force massive contention. */
125 if (!(torture_random(trsp) % 128 if (!(torture_random(trsp) %
126 (nrealwriters_stress * 2000 * longdelay_us))) 129 (cxt.nrealwriters_stress * 2000 * longdelay_us)))
127 mdelay(longdelay_us); 130 mdelay(longdelay_us);
128#ifdef CONFIG_PREEMPT 131#ifdef CONFIG_PREEMPT
129 if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) 132 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
130 preempt_schedule(); /* Allow test to be preempted. */ 133 preempt_schedule(); /* Allow test to be preempted. */
131#endif 134#endif
132} 135}
@@ -140,6 +143,9 @@ static struct lock_torture_ops lock_busted_ops = {
140 .writelock = torture_lock_busted_write_lock, 143 .writelock = torture_lock_busted_write_lock,
141 .write_delay = torture_lock_busted_write_delay, 144 .write_delay = torture_lock_busted_write_delay,
142 .writeunlock = torture_lock_busted_write_unlock, 145 .writeunlock = torture_lock_busted_write_unlock,
146 .readlock = NULL,
147 .read_delay = NULL,
148 .readunlock = NULL,
143 .name = "lock_busted" 149 .name = "lock_busted"
144}; 150};
145 151
@@ -160,13 +166,13 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
160 * we want a long delay occasionally to force massive contention. 166 * we want a long delay occasionally to force massive contention.
161 */ 167 */
162 if (!(torture_random(trsp) % 168 if (!(torture_random(trsp) %
163 (nrealwriters_stress * 2000 * longdelay_us))) 169 (cxt.nrealwriters_stress * 2000 * longdelay_us)))
164 mdelay(longdelay_us); 170 mdelay(longdelay_us);
165 if (!(torture_random(trsp) % 171 if (!(torture_random(trsp) %
166 (nrealwriters_stress * 2 * shortdelay_us))) 172 (cxt.nrealwriters_stress * 2 * shortdelay_us)))
167 udelay(shortdelay_us); 173 udelay(shortdelay_us);
168#ifdef CONFIG_PREEMPT 174#ifdef CONFIG_PREEMPT
169 if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) 175 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
170 preempt_schedule(); /* Allow test to be preempted. */ 176 preempt_schedule(); /* Allow test to be preempted. */
171#endif 177#endif
172} 178}
@@ -180,39 +186,253 @@ static struct lock_torture_ops spin_lock_ops = {
180 .writelock = torture_spin_lock_write_lock, 186 .writelock = torture_spin_lock_write_lock,
181 .write_delay = torture_spin_lock_write_delay, 187 .write_delay = torture_spin_lock_write_delay,
182 .writeunlock = torture_spin_lock_write_unlock, 188 .writeunlock = torture_spin_lock_write_unlock,
189 .readlock = NULL,
190 .read_delay = NULL,
191 .readunlock = NULL,
183 .name = "spin_lock" 192 .name = "spin_lock"
184}; 193};
185 194
186static int torture_spin_lock_write_lock_irq(void) 195static int torture_spin_lock_write_lock_irq(void)
187__acquires(torture_spinlock_irq) 196__acquires(torture_spinlock)
188{ 197{
189 unsigned long flags; 198 unsigned long flags;
190 199
191 spin_lock_irqsave(&torture_spinlock, flags); 200 spin_lock_irqsave(&torture_spinlock, flags);
192 cur_ops->flags = flags; 201 cxt.cur_ops->flags = flags;
193 return 0; 202 return 0;
194} 203}
195 204
196static void torture_lock_spin_write_unlock_irq(void) 205static void torture_lock_spin_write_unlock_irq(void)
197__releases(torture_spinlock) 206__releases(torture_spinlock)
198{ 207{
199 spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags); 208 spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags);
200} 209}
201 210
202static struct lock_torture_ops spin_lock_irq_ops = { 211static struct lock_torture_ops spin_lock_irq_ops = {
203 .writelock = torture_spin_lock_write_lock_irq, 212 .writelock = torture_spin_lock_write_lock_irq,
204 .write_delay = torture_spin_lock_write_delay, 213 .write_delay = torture_spin_lock_write_delay,
205 .writeunlock = torture_lock_spin_write_unlock_irq, 214 .writeunlock = torture_lock_spin_write_unlock_irq,
215 .readlock = NULL,
216 .read_delay = NULL,
217 .readunlock = NULL,
206 .name = "spin_lock_irq" 218 .name = "spin_lock_irq"
207}; 219};
208 220
221static DEFINE_RWLOCK(torture_rwlock);
222
223static int torture_rwlock_write_lock(void) __acquires(torture_rwlock)
224{
225 write_lock(&torture_rwlock);
226 return 0;
227}
228
229static void torture_rwlock_write_delay(struct torture_random_state *trsp)
230{
231 const unsigned long shortdelay_us = 2;
232 const unsigned long longdelay_ms = 100;
233
234 /* We want a short delay mostly to emulate likely code, and
235 * we want a long delay occasionally to force massive contention.
236 */
237 if (!(torture_random(trsp) %
238 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
239 mdelay(longdelay_ms);
240 else
241 udelay(shortdelay_us);
242}
243
244static void torture_rwlock_write_unlock(void) __releases(torture_rwlock)
245{
246 write_unlock(&torture_rwlock);
247}
248
249static int torture_rwlock_read_lock(void) __acquires(torture_rwlock)
250{
251 read_lock(&torture_rwlock);
252 return 0;
253}
254
255static void torture_rwlock_read_delay(struct torture_random_state *trsp)
256{
257 const unsigned long shortdelay_us = 10;
258 const unsigned long longdelay_ms = 100;
259
260 /* We want a short delay mostly to emulate likely code, and
261 * we want a long delay occasionally to force massive contention.
262 */
263 if (!(torture_random(trsp) %
264 (cxt.nrealreaders_stress * 2000 * longdelay_ms)))
265 mdelay(longdelay_ms);
266 else
267 udelay(shortdelay_us);
268}
269
270static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
271{
272 read_unlock(&torture_rwlock);
273}
274
275static struct lock_torture_ops rw_lock_ops = {
276 .writelock = torture_rwlock_write_lock,
277 .write_delay = torture_rwlock_write_delay,
278 .writeunlock = torture_rwlock_write_unlock,
279 .readlock = torture_rwlock_read_lock,
280 .read_delay = torture_rwlock_read_delay,
281 .readunlock = torture_rwlock_read_unlock,
282 .name = "rw_lock"
283};
284
285static int torture_rwlock_write_lock_irq(void) __acquires(torture_rwlock)
286{
287 unsigned long flags;
288
289 write_lock_irqsave(&torture_rwlock, flags);
290 cxt.cur_ops->flags = flags;
291 return 0;
292}
293
294static void torture_rwlock_write_unlock_irq(void)
295__releases(torture_rwlock)
296{
297 write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
298}
299
300static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock)
301{
302 unsigned long flags;
303
304 read_lock_irqsave(&torture_rwlock, flags);
305 cxt.cur_ops->flags = flags;
306 return 0;
307}
308
309static void torture_rwlock_read_unlock_irq(void)
310__releases(torture_rwlock)
311{
312 write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
313}
314
315static struct lock_torture_ops rw_lock_irq_ops = {
316 .writelock = torture_rwlock_write_lock_irq,
317 .write_delay = torture_rwlock_write_delay,
318 .writeunlock = torture_rwlock_write_unlock_irq,
319 .readlock = torture_rwlock_read_lock_irq,
320 .read_delay = torture_rwlock_read_delay,
321 .readunlock = torture_rwlock_read_unlock_irq,
322 .name = "rw_lock_irq"
323};
324
325static DEFINE_MUTEX(torture_mutex);
326
327static int torture_mutex_lock(void) __acquires(torture_mutex)
328{
329 mutex_lock(&torture_mutex);
330 return 0;
331}
332
333static void torture_mutex_delay(struct torture_random_state *trsp)
334{
335 const unsigned long longdelay_ms = 100;
336
337 /* We want a long delay occasionally to force massive contention. */
338 if (!(torture_random(trsp) %
339 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
340 mdelay(longdelay_ms * 5);
341 else
342 mdelay(longdelay_ms / 5);
343#ifdef CONFIG_PREEMPT
344 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
345 preempt_schedule(); /* Allow test to be preempted. */
346#endif
347}
348
349static void torture_mutex_unlock(void) __releases(torture_mutex)
350{
351 mutex_unlock(&torture_mutex);
352}
353
354static struct lock_torture_ops mutex_lock_ops = {
355 .writelock = torture_mutex_lock,
356 .write_delay = torture_mutex_delay,
357 .writeunlock = torture_mutex_unlock,
358 .readlock = NULL,
359 .read_delay = NULL,
360 .readunlock = NULL,
361 .name = "mutex_lock"
362};
363
364static DECLARE_RWSEM(torture_rwsem);
365static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
366{
367 down_write(&torture_rwsem);
368 return 0;
369}
370
371static void torture_rwsem_write_delay(struct torture_random_state *trsp)
372{
373 const unsigned long longdelay_ms = 100;
374
375 /* We want a long delay occasionally to force massive contention. */
376 if (!(torture_random(trsp) %
377 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
378 mdelay(longdelay_ms * 10);
379 else
380 mdelay(longdelay_ms / 10);
381#ifdef CONFIG_PREEMPT
382 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
383 preempt_schedule(); /* Allow test to be preempted. */
384#endif
385}
386
387static void torture_rwsem_up_write(void) __releases(torture_rwsem)
388{
389 up_write(&torture_rwsem);
390}
391
392static int torture_rwsem_down_read(void) __acquires(torture_rwsem)
393{
394 down_read(&torture_rwsem);
395 return 0;
396}
397
398static void torture_rwsem_read_delay(struct torture_random_state *trsp)
399{
400 const unsigned long longdelay_ms = 100;
401
402 /* We want a long delay occasionally to force massive contention. */
403 if (!(torture_random(trsp) %
404 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
405 mdelay(longdelay_ms * 2);
406 else
407 mdelay(longdelay_ms / 2);
408#ifdef CONFIG_PREEMPT
409 if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000)))
410 preempt_schedule(); /* Allow test to be preempted. */
411#endif
412}
413
414static void torture_rwsem_up_read(void) __releases(torture_rwsem)
415{
416 up_read(&torture_rwsem);
417}
418
419static struct lock_torture_ops rwsem_lock_ops = {
420 .writelock = torture_rwsem_down_write,
421 .write_delay = torture_rwsem_write_delay,
422 .writeunlock = torture_rwsem_up_write,
423 .readlock = torture_rwsem_down_read,
424 .read_delay = torture_rwsem_read_delay,
425 .readunlock = torture_rwsem_up_read,
426 .name = "rwsem_lock"
427};
428
209/* 429/*
210 * Lock torture writer kthread. Repeatedly acquires and releases 430 * Lock torture writer kthread. Repeatedly acquires and releases
211 * the lock, checking for duplicate acquisitions. 431 * the lock, checking for duplicate acquisitions.
212 */ 432 */
213static int lock_torture_writer(void *arg) 433static int lock_torture_writer(void *arg)
214{ 434{
215 struct lock_writer_stress_stats *lwsp = arg; 435 struct lock_stress_stats *lwsp = arg;
216 static DEFINE_TORTURE_RANDOM(rand); 436 static DEFINE_TORTURE_RANDOM(rand);
217 437
218 VERBOSE_TOROUT_STRING("lock_torture_writer task started"); 438 VERBOSE_TOROUT_STRING("lock_torture_writer task started");
@@ -221,14 +441,19 @@ static int lock_torture_writer(void *arg)
221 do { 441 do {
222 if ((torture_random(&rand) & 0xfffff) == 0) 442 if ((torture_random(&rand) & 0xfffff) == 0)
223 schedule_timeout_uninterruptible(1); 443 schedule_timeout_uninterruptible(1);
224 cur_ops->writelock(); 444
445 cxt.cur_ops->writelock();
225 if (WARN_ON_ONCE(lock_is_write_held)) 446 if (WARN_ON_ONCE(lock_is_write_held))
226 lwsp->n_write_lock_fail++; 447 lwsp->n_lock_fail++;
227 lock_is_write_held = 1; 448 lock_is_write_held = 1;
228 lwsp->n_write_lock_acquired++; 449 if (WARN_ON_ONCE(lock_is_read_held))
229 cur_ops->write_delay(&rand); 450 lwsp->n_lock_fail++; /* rare, but... */
451
452 lwsp->n_lock_acquired++;
453 cxt.cur_ops->write_delay(&rand);
230 lock_is_write_held = 0; 454 lock_is_write_held = 0;
231 cur_ops->writeunlock(); 455 cxt.cur_ops->writeunlock();
456
232 stutter_wait("lock_torture_writer"); 457 stutter_wait("lock_torture_writer");
233 } while (!torture_must_stop()); 458 } while (!torture_must_stop());
234 torture_kthread_stopping("lock_torture_writer"); 459 torture_kthread_stopping("lock_torture_writer");
@@ -236,32 +461,66 @@ static int lock_torture_writer(void *arg)
236} 461}
237 462
238/* 463/*
464 * Lock torture reader kthread. Repeatedly acquires and releases
465 * the reader lock.
466 */
467static int lock_torture_reader(void *arg)
468{
469 struct lock_stress_stats *lrsp = arg;
470 static DEFINE_TORTURE_RANDOM(rand);
471
472 VERBOSE_TOROUT_STRING("lock_torture_reader task started");
473 set_user_nice(current, MAX_NICE);
474
475 do {
476 if ((torture_random(&rand) & 0xfffff) == 0)
477 schedule_timeout_uninterruptible(1);
478
479 cxt.cur_ops->readlock();
480 lock_is_read_held = 1;
481 if (WARN_ON_ONCE(lock_is_write_held))
482 lrsp->n_lock_fail++; /* rare, but... */
483
484 lrsp->n_lock_acquired++;
485 cxt.cur_ops->read_delay(&rand);
486 lock_is_read_held = 0;
487 cxt.cur_ops->readunlock();
488
489 stutter_wait("lock_torture_reader");
490 } while (!torture_must_stop());
491 torture_kthread_stopping("lock_torture_reader");
492 return 0;
493}
494
495/*
239 * Create an lock-torture-statistics message in the specified buffer. 496 * Create an lock-torture-statistics message in the specified buffer.
240 */ 497 */
241static void lock_torture_printk(char *page) 498static void __torture_print_stats(char *page,
499 struct lock_stress_stats *statp, bool write)
242{ 500{
243 bool fail = 0; 501 bool fail = 0;
244 int i; 502 int i, n_stress;
245 long max = 0; 503 long max = 0;
246 long min = lwsa[0].n_write_lock_acquired; 504 long min = statp[0].n_lock_acquired;
247 long long sum = 0; 505 long long sum = 0;
248 506
249 for (i = 0; i < nrealwriters_stress; i++) { 507 n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
250 if (lwsa[i].n_write_lock_fail) 508 for (i = 0; i < n_stress; i++) {
509 if (statp[i].n_lock_fail)
251 fail = true; 510 fail = true;
252 sum += lwsa[i].n_write_lock_acquired; 511 sum += statp[i].n_lock_acquired;
253 if (max < lwsa[i].n_write_lock_fail) 512 if (max < statp[i].n_lock_fail)
254 max = lwsa[i].n_write_lock_fail; 513 max = statp[i].n_lock_fail;
255 if (min > lwsa[i].n_write_lock_fail) 514 if (min > statp[i].n_lock_fail)
256 min = lwsa[i].n_write_lock_fail; 515 min = statp[i].n_lock_fail;
257 } 516 }
258 page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
259 page += sprintf(page, 517 page += sprintf(page,
260 "Writes: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", 518 "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
519 write ? "Writes" : "Reads ",
261 sum, max, min, max / 2 > min ? "???" : "", 520 sum, max, min, max / 2 > min ? "???" : "",
262 fail, fail ? "!!!" : ""); 521 fail, fail ? "!!!" : "");
263 if (fail) 522 if (fail)
264 atomic_inc(&n_lock_torture_errors); 523 atomic_inc(&cxt.n_lock_torture_errors);
265} 524}
266 525
267/* 526/*
@@ -274,18 +533,35 @@ static void lock_torture_printk(char *page)
274 */ 533 */
275static void lock_torture_stats_print(void) 534static void lock_torture_stats_print(void)
276{ 535{
277 int size = nrealwriters_stress * 200 + 8192; 536 int size = cxt.nrealwriters_stress * 200 + 8192;
278 char *buf; 537 char *buf;
279 538
539 if (cxt.cur_ops->readlock)
540 size += cxt.nrealreaders_stress * 200 + 8192;
541
280 buf = kmalloc(size, GFP_KERNEL); 542 buf = kmalloc(size, GFP_KERNEL);
281 if (!buf) { 543 if (!buf) {
282 pr_err("lock_torture_stats_print: Out of memory, need: %d", 544 pr_err("lock_torture_stats_print: Out of memory, need: %d",
283 size); 545 size);
284 return; 546 return;
285 } 547 }
286 lock_torture_printk(buf); 548
549 __torture_print_stats(buf, cxt.lwsa, true);
287 pr_alert("%s", buf); 550 pr_alert("%s", buf);
288 kfree(buf); 551 kfree(buf);
552
553 if (cxt.cur_ops->readlock) {
554 buf = kmalloc(size, GFP_KERNEL);
555 if (!buf) {
556 pr_err("lock_torture_stats_print: Out of memory, need: %d",
557 size);
558 return;
559 }
560
561 __torture_print_stats(buf, cxt.lrsa, false);
562 pr_alert("%s", buf);
563 kfree(buf);
564 }
289} 565}
290 566
291/* 567/*
@@ -312,9 +588,10 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
312 const char *tag) 588 const char *tag)
313{ 589{
314 pr_alert("%s" TORTURE_FLAG 590 pr_alert("%s" TORTURE_FLAG
315 "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", 591 "--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
316 torture_type, tag, nrealwriters_stress, stat_interval, verbose, 592 torture_type, tag, cxt.debug_lock ? " [debug]": "",
317 shuffle_interval, stutter, shutdown_secs, 593 cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval,
594 verbose, shuffle_interval, stutter, shutdown_secs,
318 onoff_interval, onoff_holdoff); 595 onoff_interval, onoff_holdoff);
319} 596}
320 597
@@ -322,46 +599,59 @@ static void lock_torture_cleanup(void)
322{ 599{
323 int i; 600 int i;
324 601
325 if (torture_cleanup()) 602 if (torture_cleanup_begin())
326 return; 603 return;
327 604
328 if (writer_tasks) { 605 if (writer_tasks) {
329 for (i = 0; i < nrealwriters_stress; i++) 606 for (i = 0; i < cxt.nrealwriters_stress; i++)
330 torture_stop_kthread(lock_torture_writer, 607 torture_stop_kthread(lock_torture_writer,
331 writer_tasks[i]); 608 writer_tasks[i]);
332 kfree(writer_tasks); 609 kfree(writer_tasks);
333 writer_tasks = NULL; 610 writer_tasks = NULL;
334 } 611 }
335 612
613 if (reader_tasks) {
614 for (i = 0; i < cxt.nrealreaders_stress; i++)
615 torture_stop_kthread(lock_torture_reader,
616 reader_tasks[i]);
617 kfree(reader_tasks);
618 reader_tasks = NULL;
619 }
620
336 torture_stop_kthread(lock_torture_stats, stats_task); 621 torture_stop_kthread(lock_torture_stats, stats_task);
337 lock_torture_stats_print(); /* -After- the stats thread is stopped! */ 622 lock_torture_stats_print(); /* -After- the stats thread is stopped! */
338 623
339 if (atomic_read(&n_lock_torture_errors)) 624 if (atomic_read(&cxt.n_lock_torture_errors))
340 lock_torture_print_module_parms(cur_ops, 625 lock_torture_print_module_parms(cxt.cur_ops,
341 "End of test: FAILURE"); 626 "End of test: FAILURE");
342 else if (torture_onoff_failures()) 627 else if (torture_onoff_failures())
343 lock_torture_print_module_parms(cur_ops, 628 lock_torture_print_module_parms(cxt.cur_ops,
344 "End of test: LOCK_HOTPLUG"); 629 "End of test: LOCK_HOTPLUG");
345 else 630 else
346 lock_torture_print_module_parms(cur_ops, 631 lock_torture_print_module_parms(cxt.cur_ops,
347 "End of test: SUCCESS"); 632 "End of test: SUCCESS");
633 torture_cleanup_end();
348} 634}
349 635
350static int __init lock_torture_init(void) 636static int __init lock_torture_init(void)
351{ 637{
352 int i; 638 int i, j;
353 int firsterr = 0; 639 int firsterr = 0;
354 static struct lock_torture_ops *torture_ops[] = { 640 static struct lock_torture_ops *torture_ops[] = {
355 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, 641 &lock_busted_ops,
642 &spin_lock_ops, &spin_lock_irq_ops,
643 &rw_lock_ops, &rw_lock_irq_ops,
644 &mutex_lock_ops,
645 &rwsem_lock_ops,
356 }; 646 };
357 647
358 if (!torture_init_begin(torture_type, verbose, &locktorture_runnable)) 648 if (!torture_init_begin(torture_type, verbose, &torture_runnable))
359 return -EBUSY; 649 return -EBUSY;
360 650
361 /* Process args and tell the world that the torturer is on the job. */ 651 /* Process args and tell the world that the torturer is on the job. */
362 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 652 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
363 cur_ops = torture_ops[i]; 653 cxt.cur_ops = torture_ops[i];
364 if (strcmp(torture_type, cur_ops->name) == 0) 654 if (strcmp(torture_type, cxt.cur_ops->name) == 0)
365 break; 655 break;
366 } 656 }
367 if (i == ARRAY_SIZE(torture_ops)) { 657 if (i == ARRAY_SIZE(torture_ops)) {
@@ -374,31 +664,69 @@ static int __init lock_torture_init(void)
374 torture_init_end(); 664 torture_init_end();
375 return -EINVAL; 665 return -EINVAL;
376 } 666 }
377 if (cur_ops->init) 667 if (cxt.cur_ops->init)
378 cur_ops->init(); /* no "goto unwind" prior to this point!!! */ 668 cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
379 669
380 if (nwriters_stress >= 0) 670 if (nwriters_stress >= 0)
381 nrealwriters_stress = nwriters_stress; 671 cxt.nrealwriters_stress = nwriters_stress;
382 else 672 else
383 nrealwriters_stress = 2 * num_online_cpus(); 673 cxt.nrealwriters_stress = 2 * num_online_cpus();
384 lock_torture_print_module_parms(cur_ops, "Start of test"); 674
675#ifdef CONFIG_DEBUG_MUTEXES
676 if (strncmp(torture_type, "mutex", 5) == 0)
677 cxt.debug_lock = true;
678#endif
679#ifdef CONFIG_DEBUG_SPINLOCK
680 if ((strncmp(torture_type, "spin", 4) == 0) ||
681 (strncmp(torture_type, "rw_lock", 7) == 0))
682 cxt.debug_lock = true;
683#endif
385 684
386 /* Initialize the statistics so that each run gets its own numbers. */ 685 /* Initialize the statistics so that each run gets its own numbers. */
387 686
388 lock_is_write_held = 0; 687 lock_is_write_held = 0;
389 lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL); 688 cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
390 if (lwsa == NULL) { 689 if (cxt.lwsa == NULL) {
391 VERBOSE_TOROUT_STRING("lwsa: Out of memory"); 690 VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
392 firsterr = -ENOMEM; 691 firsterr = -ENOMEM;
393 goto unwind; 692 goto unwind;
394 } 693 }
395 for (i = 0; i < nrealwriters_stress; i++) { 694 for (i = 0; i < cxt.nrealwriters_stress; i++) {
396 lwsa[i].n_write_lock_fail = 0; 695 cxt.lwsa[i].n_lock_fail = 0;
397 lwsa[i].n_write_lock_acquired = 0; 696 cxt.lwsa[i].n_lock_acquired = 0;
398 } 697 }
399 698
400 /* Start up the kthreads. */ 699 if (cxt.cur_ops->readlock) {
700 if (nreaders_stress >= 0)
701 cxt.nrealreaders_stress = nreaders_stress;
702 else {
703 /*
704 * By default distribute evenly the number of
705 * readers and writers. We still run the same number
706 * of threads as the writer-only locks default.
707 */
708 if (nwriters_stress < 0) /* user doesn't care */
709 cxt.nrealwriters_stress = num_online_cpus();
710 cxt.nrealreaders_stress = cxt.nrealwriters_stress;
711 }
712
713 lock_is_read_held = 0;
714 cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
715 if (cxt.lrsa == NULL) {
716 VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
717 firsterr = -ENOMEM;
718 kfree(cxt.lwsa);
719 goto unwind;
720 }
721
722 for (i = 0; i < cxt.nrealreaders_stress; i++) {
723 cxt.lrsa[i].n_lock_fail = 0;
724 cxt.lrsa[i].n_lock_acquired = 0;
725 }
726 }
727 lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
401 728
729 /* Prepare torture context. */
402 if (onoff_interval > 0) { 730 if (onoff_interval > 0) {
403 firsterr = torture_onoff_init(onoff_holdoff * HZ, 731 firsterr = torture_onoff_init(onoff_holdoff * HZ,
404 onoff_interval * HZ); 732 onoff_interval * HZ);
@@ -422,18 +750,51 @@ static int __init lock_torture_init(void)
422 goto unwind; 750 goto unwind;
423 } 751 }
424 752
425 writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]), 753 writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
426 GFP_KERNEL); 754 GFP_KERNEL);
427 if (writer_tasks == NULL) { 755 if (writer_tasks == NULL) {
428 VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); 756 VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
429 firsterr = -ENOMEM; 757 firsterr = -ENOMEM;
430 goto unwind; 758 goto unwind;
431 } 759 }
432 for (i = 0; i < nrealwriters_stress; i++) { 760
433 firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], 761 if (cxt.cur_ops->readlock) {
762 reader_tasks = kzalloc(cxt.nrealreaders_stress * sizeof(reader_tasks[0]),
763 GFP_KERNEL);
764 if (reader_tasks == NULL) {
765 VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
766 firsterr = -ENOMEM;
767 goto unwind;
768 }
769 }
770
771 /*
772 * Create the kthreads and start torturing (oh, those poor little locks).
773 *
774 * TODO: Note that we interleave writers with readers, giving writers a
775 * slight advantage, by creating its kthread first. This can be modified
776 * for very specific needs, or even let the user choose the policy, if
777 * ever wanted.
778 */
779 for (i = 0, j = 0; i < cxt.nrealwriters_stress ||
780 j < cxt.nrealreaders_stress; i++, j++) {
781 if (i >= cxt.nrealwriters_stress)
782 goto create_reader;
783
784 /* Create writer. */
785 firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
434 writer_tasks[i]); 786 writer_tasks[i]);
435 if (firsterr) 787 if (firsterr)
436 goto unwind; 788 goto unwind;
789
790 create_reader:
791 if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress))
792 continue;
793 /* Create reader. */
794 firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j],
795 reader_tasks[j]);
796 if (firsterr)
797 goto unwind;
437 } 798 }
438 if (stat_interval > 0) { 799 if (stat_interval > 0) {
439 firsterr = torture_create_kthread(lock_torture_stats, NULL, 800 firsterr = torture_create_kthread(lock_torture_stats, NULL,
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 23e89c5930e9..4d60986fcbee 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -56,9 +56,6 @@ do { \
56 * If the lock has already been acquired, then this will proceed to spin 56 * If the lock has already been acquired, then this will proceed to spin
57 * on this node->locked until the previous lock holder sets the node->locked 57 * on this node->locked until the previous lock holder sets the node->locked
58 * in mcs_spin_unlock(). 58 * in mcs_spin_unlock().
59 *
60 * We don't inline mcs_spin_lock() so that perf can correctly account for the
61 * time spent in this lock function.
62 */ 59 */
63static inline 60static inline
64void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) 61void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 5cf6731b98e9..3ef3736002d8 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock)
80 DEBUG_LOCKS_WARN_ON(lock->owner != current); 80 DEBUG_LOCKS_WARN_ON(lock->owner != current);
81 81
82 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 82 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
83 mutex_clear_owner(lock);
84 } 83 }
85 84
86 /* 85 /*
87 * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug 86 * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
88 * mutexes so that we can do it here after we've verified state. 87 * mutexes so that we can do it here after we've verified state.
89 */ 88 */
89 mutex_clear_owner(lock);
90 atomic_set(&lock->count, 1); 90 atomic_set(&lock->count, 1);
91} 91}
92 92
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index ae712b25e492..454195194d4a 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -15,7 +15,7 @@
15 * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale 15 * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
16 * and Sven Dietrich. 16 * and Sven Dietrich.
17 * 17 *
18 * Also see Documentation/mutex-design.txt. 18 * Also see Documentation/locking/mutex-design.txt.
19 */ 19 */
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21#include <linux/ww_mutex.h> 21#include <linux/ww_mutex.h>
@@ -106,6 +106,92 @@ void __sched mutex_lock(struct mutex *lock)
106EXPORT_SYMBOL(mutex_lock); 106EXPORT_SYMBOL(mutex_lock);
107#endif 107#endif
108 108
109static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
110 struct ww_acquire_ctx *ww_ctx)
111{
112#ifdef CONFIG_DEBUG_MUTEXES
113 /*
114 * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
115 * but released with a normal mutex_unlock in this call.
116 *
117 * This should never happen, always use ww_mutex_unlock.
118 */
119 DEBUG_LOCKS_WARN_ON(ww->ctx);
120
121 /*
122 * Not quite done after calling ww_acquire_done() ?
123 */
124 DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
125
126 if (ww_ctx->contending_lock) {
127 /*
128 * After -EDEADLK you tried to
129 * acquire a different ww_mutex? Bad!
130 */
131 DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
132
133 /*
134 * You called ww_mutex_lock after receiving -EDEADLK,
135 * but 'forgot' to unlock everything else first?
136 */
137 DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
138 ww_ctx->contending_lock = NULL;
139 }
140
141 /*
142 * Naughty, using a different class will lead to undefined behavior!
143 */
144 DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
145#endif
146 ww_ctx->acquired++;
147}
148
149/*
150 * after acquiring lock with fastpath or when we lost out in contested
151 * slowpath, set ctx and wake up any waiters so they can recheck.
152 *
153 * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
154 * as the fastpath and opportunistic spinning are disabled in that case.
155 */
156static __always_inline void
157ww_mutex_set_context_fastpath(struct ww_mutex *lock,
158 struct ww_acquire_ctx *ctx)
159{
160 unsigned long flags;
161 struct mutex_waiter *cur;
162
163 ww_mutex_lock_acquired(lock, ctx);
164
165 lock->ctx = ctx;
166
167 /*
168 * The lock->ctx update should be visible on all cores before
169 * the atomic read is done, otherwise contended waiters might be
170 * missed. The contended waiters will either see ww_ctx == NULL
171 * and keep spinning, or it will acquire wait_lock, add itself
172 * to waiter list and sleep.
173 */
174 smp_mb(); /* ^^^ */
175
176 /*
177 * Check if lock is contended, if not there is nobody to wake up
178 */
179 if (likely(atomic_read(&lock->base.count) == 0))
180 return;
181
182 /*
183 * Uh oh, we raced in fastpath, wake up everyone in this case,
184 * so they can see the new lock->ctx.
185 */
186 spin_lock_mutex(&lock->base.wait_lock, flags);
187 list_for_each_entry(cur, &lock->base.wait_list, list) {
188 debug_mutex_wake_waiter(&lock->base, cur);
189 wake_up_process(cur->task);
190 }
191 spin_unlock_mutex(&lock->base.wait_lock, flags);
192}
193
194
109#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 195#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
110/* 196/*
111 * In order to avoid a stampede of mutex spinners from acquiring the mutex 197 * In order to avoid a stampede of mutex spinners from acquiring the mutex
@@ -180,6 +266,135 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
180 */ 266 */
181 return retval; 267 return retval;
182} 268}
269
270/*
271 * Atomically try to take the lock when it is available
272 */
273static inline bool mutex_try_to_acquire(struct mutex *lock)
274{
275 return !mutex_is_locked(lock) &&
276 (atomic_cmpxchg(&lock->count, 1, 0) == 1);
277}
278
279/*
280 * Optimistic spinning.
281 *
282 * We try to spin for acquisition when we find that the lock owner
283 * is currently running on a (different) CPU and while we don't
284 * need to reschedule. The rationale is that if the lock owner is
285 * running, it is likely to release the lock soon.
286 *
287 * Since this needs the lock owner, and this mutex implementation
288 * doesn't track the owner atomically in the lock field, we need to
289 * track it non-atomically.
290 *
291 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
292 * to serialize everything.
293 *
294 * The mutex spinners are queued up using MCS lock so that only one
295 * spinner can compete for the mutex. However, if mutex spinning isn't
296 * going to happen, there is no point in going through the lock/unlock
297 * overhead.
298 *
299 * Returns true when the lock was taken, otherwise false, indicating
300 * that we need to jump to the slowpath and sleep.
301 */
302static bool mutex_optimistic_spin(struct mutex *lock,
303 struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
304{
305 struct task_struct *task = current;
306
307 if (!mutex_can_spin_on_owner(lock))
308 goto done;
309
310 if (!osq_lock(&lock->osq))
311 goto done;
312
313 while (true) {
314 struct task_struct *owner;
315
316 if (use_ww_ctx && ww_ctx->acquired > 0) {
317 struct ww_mutex *ww;
318
319 ww = container_of(lock, struct ww_mutex, base);
320 /*
321 * If ww->ctx is set the contents are undefined, only
322 * by acquiring wait_lock there is a guarantee that
323 * they are not invalid when reading.
324 *
325 * As such, when deadlock detection needs to be
326 * performed the optimistic spinning cannot be done.
327 */
328 if (ACCESS_ONCE(ww->ctx))
329 break;
330 }
331
332 /*
333 * If there's an owner, wait for it to either
334 * release the lock or go to sleep.
335 */
336 owner = ACCESS_ONCE(lock->owner);
337 if (owner && !mutex_spin_on_owner(lock, owner))
338 break;
339
340 /* Try to acquire the mutex if it is unlocked. */
341 if (mutex_try_to_acquire(lock)) {
342 lock_acquired(&lock->dep_map, ip);
343
344 if (use_ww_ctx) {
345 struct ww_mutex *ww;
346 ww = container_of(lock, struct ww_mutex, base);
347
348 ww_mutex_set_context_fastpath(ww, ww_ctx);
349 }
350
351 mutex_set_owner(lock);
352 osq_unlock(&lock->osq);
353 return true;
354 }
355
356 /*
357 * When there's no owner, we might have preempted between the
358 * owner acquiring the lock and setting the owner field. If
359 * we're an RT task that will live-lock because we won't let
360 * the owner complete.
361 */
362 if (!owner && (need_resched() || rt_task(task)))
363 break;
364
365 /*
366 * The cpu_relax() call is a compiler barrier which forces
367 * everything in this loop to be re-loaded. We don't need
368 * memory barriers as we'll eventually observe the right
369 * values at the cost of a few extra spins.
370 */
371 cpu_relax_lowlatency();
372 }
373
374 osq_unlock(&lock->osq);
375done:
376 /*
377 * If we fell out of the spin path because of need_resched(),
378 * reschedule now, before we try-lock the mutex. This avoids getting
379 * scheduled out right after we obtained the mutex.
380 */
381 if (need_resched()) {
382 /*
383 * We _should_ have TASK_RUNNING here, but just in case
384 * we do not, make it so, otherwise we might get stuck.
385 */
386 __set_current_state(TASK_RUNNING);
387 schedule_preempt_disabled();
388 }
389
390 return false;
391}
392#else
393static bool mutex_optimistic_spin(struct mutex *lock,
394 struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
395{
396 return false;
397}
183#endif 398#endif
184 399
185__visible __used noinline 400__visible __used noinline
@@ -277,91 +492,6 @@ __mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
277 return 0; 492 return 0;
278} 493}
279 494
280static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
281 struct ww_acquire_ctx *ww_ctx)
282{
283#ifdef CONFIG_DEBUG_MUTEXES
284 /*
285 * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
286 * but released with a normal mutex_unlock in this call.
287 *
288 * This should never happen, always use ww_mutex_unlock.
289 */
290 DEBUG_LOCKS_WARN_ON(ww->ctx);
291
292 /*
293 * Not quite done after calling ww_acquire_done() ?
294 */
295 DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
296
297 if (ww_ctx->contending_lock) {
298 /*
299 * After -EDEADLK you tried to
300 * acquire a different ww_mutex? Bad!
301 */
302 DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
303
304 /*
305 * You called ww_mutex_lock after receiving -EDEADLK,
306 * but 'forgot' to unlock everything else first?
307 */
308 DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
309 ww_ctx->contending_lock = NULL;
310 }
311
312 /*
313 * Naughty, using a different class will lead to undefined behavior!
314 */
315 DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
316#endif
317 ww_ctx->acquired++;
318}
319
320/*
321 * after acquiring lock with fastpath or when we lost out in contested
322 * slowpath, set ctx and wake up any waiters so they can recheck.
323 *
324 * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
325 * as the fastpath and opportunistic spinning are disabled in that case.
326 */
327static __always_inline void
328ww_mutex_set_context_fastpath(struct ww_mutex *lock,
329 struct ww_acquire_ctx *ctx)
330{
331 unsigned long flags;
332 struct mutex_waiter *cur;
333
334 ww_mutex_lock_acquired(lock, ctx);
335
336 lock->ctx = ctx;
337
338 /*
339 * The lock->ctx update should be visible on all cores before
340 * the atomic read is done, otherwise contended waiters might be
341 * missed. The contended waiters will either see ww_ctx == NULL
342 * and keep spinning, or it will acquire wait_lock, add itself
343 * to waiter list and sleep.
344 */
345 smp_mb(); /* ^^^ */
346
347 /*
348 * Check if lock is contended, if not there is nobody to wake up
349 */
350 if (likely(atomic_read(&lock->base.count) == 0))
351 return;
352
353 /*
354 * Uh oh, we raced in fastpath, wake up everyone in this case,
355 * so they can see the new lock->ctx.
356 */
357 spin_lock_mutex(&lock->base.wait_lock, flags);
358 list_for_each_entry(cur, &lock->base.wait_list, list) {
359 debug_mutex_wake_waiter(&lock->base, cur);
360 wake_up_process(cur->task);
361 }
362 spin_unlock_mutex(&lock->base.wait_lock, flags);
363}
364
365/* 495/*
366 * Lock a mutex (possibly interruptible), slowpath: 496 * Lock a mutex (possibly interruptible), slowpath:
367 */ 497 */
@@ -378,104 +508,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
378 preempt_disable(); 508 preempt_disable();
379 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); 509 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
380 510
381#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 511 if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
382 /* 512 /* got the lock, yay! */
383 * Optimistic spinning. 513 preempt_enable();
384 * 514 return 0;
385 * We try to spin for acquisition when we find that the lock owner
386 * is currently running on a (different) CPU and while we don't
387 * need to reschedule. The rationale is that if the lock owner is
388 * running, it is likely to release the lock soon.
389 *
390 * Since this needs the lock owner, and this mutex implementation
391 * doesn't track the owner atomically in the lock field, we need to
392 * track it non-atomically.
393 *
394 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
395 * to serialize everything.
396 *
397 * The mutex spinners are queued up using MCS lock so that only one
398 * spinner can compete for the mutex. However, if mutex spinning isn't
399 * going to happen, there is no point in going through the lock/unlock
400 * overhead.
401 */
402 if (!mutex_can_spin_on_owner(lock))
403 goto slowpath;
404
405 if (!osq_lock(&lock->osq))
406 goto slowpath;
407
408 for (;;) {
409 struct task_struct *owner;
410
411 if (use_ww_ctx && ww_ctx->acquired > 0) {
412 struct ww_mutex *ww;
413
414 ww = container_of(lock, struct ww_mutex, base);
415 /*
416 * If ww->ctx is set the contents are undefined, only
417 * by acquiring wait_lock there is a guarantee that
418 * they are not invalid when reading.
419 *
420 * As such, when deadlock detection needs to be
421 * performed the optimistic spinning cannot be done.
422 */
423 if (ACCESS_ONCE(ww->ctx))
424 break;
425 }
426
427 /*
428 * If there's an owner, wait for it to either
429 * release the lock or go to sleep.
430 */
431 owner = ACCESS_ONCE(lock->owner);
432 if (owner && !mutex_spin_on_owner(lock, owner))
433 break;
434
435 /* Try to acquire the mutex if it is unlocked. */
436 if (!mutex_is_locked(lock) &&
437 (atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
438 lock_acquired(&lock->dep_map, ip);
439 if (use_ww_ctx) {
440 struct ww_mutex *ww;
441 ww = container_of(lock, struct ww_mutex, base);
442
443 ww_mutex_set_context_fastpath(ww, ww_ctx);
444 }
445
446 mutex_set_owner(lock);
447 osq_unlock(&lock->osq);
448 preempt_enable();
449 return 0;
450 }
451
452 /*
453 * When there's no owner, we might have preempted between the
454 * owner acquiring the lock and setting the owner field. If
455 * we're an RT task that will live-lock because we won't let
456 * the owner complete.
457 */
458 if (!owner && (need_resched() || rt_task(task)))
459 break;
460
461 /*
462 * The cpu_relax() call is a compiler barrier which forces
463 * everything in this loop to be re-loaded. We don't need
464 * memory barriers as we'll eventually observe the right
465 * values at the cost of a few extra spins.
466 */
467 cpu_relax_lowlatency();
468 } 515 }
469 osq_unlock(&lock->osq); 516
470slowpath:
471 /*
472 * If we fell out of the spin path because of need_resched(),
473 * reschedule now, before we try-lock the mutex. This avoids getting
474 * scheduled out right after we obtained the mutex.
475 */
476 if (need_resched())
477 schedule_preempt_disabled();
478#endif
479 spin_lock_mutex(&lock->wait_lock, flags); 517 spin_lock_mutex(&lock->wait_lock, flags);
480 518
481 /* 519 /*
@@ -679,15 +717,21 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
679 * Release the lock, slowpath: 717 * Release the lock, slowpath:
680 */ 718 */
681static inline void 719static inline void
682__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) 720__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
683{ 721{
684 struct mutex *lock = container_of(lock_count, struct mutex, count);
685 unsigned long flags; 722 unsigned long flags;
686 723
687 /* 724 /*
688 * some architectures leave the lock unlocked in the fastpath failure 725 * As a performance measurement, release the lock before doing other
726 * wakeup related duties to follow. This allows other tasks to acquire
727 * the lock sooner, while still handling cleanups in past unlock calls.
728 * This can be done as we do not enforce strict equivalence between the
729 * mutex counter and wait_list.
730 *
731 *
732 * Some architectures leave the lock unlocked in the fastpath failure
689 * case, others need to leave it locked. In the later case we have to 733 * case, others need to leave it locked. In the later case we have to
690 * unlock it here 734 * unlock it here - as the lock counter is currently 0 or negative.
691 */ 735 */
692 if (__mutex_slowpath_needs_to_unlock()) 736 if (__mutex_slowpath_needs_to_unlock())
693 atomic_set(&lock->count, 1); 737 atomic_set(&lock->count, 1);
@@ -716,7 +760,9 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
716__visible void 760__visible void
717__mutex_unlock_slowpath(atomic_t *lock_count) 761__mutex_unlock_slowpath(atomic_t *lock_count)
718{ 762{
719 __mutex_unlock_common_slowpath(lock_count, 1); 763 struct mutex *lock = container_of(lock_count, struct mutex, count);
764
765 __mutex_unlock_common_slowpath(lock, 1);
720} 766}
721 767
722#ifndef CONFIG_DEBUG_LOCK_ALLOC 768#ifndef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 4115fbf83b12..5cda397607f2 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -16,7 +16,7 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#ifdef CONFIG_SMP 19#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
20static inline void mutex_set_owner(struct mutex *lock) 20static inline void mutex_set_owner(struct mutex *lock)
21{ 21{
22 lock->owner = current; 22 lock->owner = current;
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index a0ea2a141b3b..7c98873a3077 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -8,7 +8,7 @@
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen 9 * Copyright (C) 2006 Esben Nielsen
10 * 10 *
11 * See Documentation/rt-mutex-design.txt for details. 11 * See Documentation/locking/rt-mutex-design.txt for details.
12 */ 12 */
13#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/export.h> 14#include <linux/export.h>
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index d6203faf2eb1..7628c3fc37ca 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -246,19 +246,22 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
246 246
247 return sem; 247 return sem;
248} 248}
249EXPORT_SYMBOL(rwsem_down_read_failed);
249 250
250static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) 251static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
251{ 252{
252 if (!(count & RWSEM_ACTIVE_MASK)) { 253 /*
253 /* try acquiring the write lock */ 254 * Try acquiring the write lock. Check count first in order
254 if (sem->count == RWSEM_WAITING_BIAS && 255 * to reduce unnecessary expensive cmpxchg() operations.
255 cmpxchg(&sem->count, RWSEM_WAITING_BIAS, 256 */
256 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { 257 if (count == RWSEM_WAITING_BIAS &&
257 if (!list_is_singular(&sem->wait_list)) 258 cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
258 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); 259 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
259 return true; 260 if (!list_is_singular(&sem->wait_list))
260 } 261 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
262 return true;
261 } 263 }
264
262 return false; 265 return false;
263} 266}
264 267
@@ -465,6 +468,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
465 468
466 return sem; 469 return sem;
467} 470}
471EXPORT_SYMBOL(rwsem_down_write_failed);
468 472
469/* 473/*
470 * handle waking up a waiter on the semaphore 474 * handle waking up a waiter on the semaphore
@@ -485,6 +489,7 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
485 489
486 return sem; 490 return sem;
487} 491}
492EXPORT_SYMBOL(rwsem_wake);
488 493
489/* 494/*
490 * downgrade a write lock into a read lock 495 * downgrade a write lock into a read lock
@@ -506,8 +511,4 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
506 511
507 return sem; 512 return sem;
508} 513}
509
510EXPORT_SYMBOL(rwsem_down_read_failed);
511EXPORT_SYMBOL(rwsem_down_write_failed);
512EXPORT_SYMBOL(rwsem_wake);
513EXPORT_SYMBOL(rwsem_downgrade_wake); 514EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 6815171a4fff..b8120abe594b 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -36,7 +36,7 @@
36static noinline void __down(struct semaphore *sem); 36static noinline void __down(struct semaphore *sem);
37static noinline int __down_interruptible(struct semaphore *sem); 37static noinline int __down_interruptible(struct semaphore *sem);
38static noinline int __down_killable(struct semaphore *sem); 38static noinline int __down_killable(struct semaphore *sem);
39static noinline int __down_timeout(struct semaphore *sem, long jiffies); 39static noinline int __down_timeout(struct semaphore *sem, long timeout);
40static noinline void __up(struct semaphore *sem); 40static noinline void __up(struct semaphore *sem);
41 41
42/** 42/**
@@ -145,14 +145,14 @@ EXPORT_SYMBOL(down_trylock);
145/** 145/**
146 * down_timeout - acquire the semaphore within a specified time 146 * down_timeout - acquire the semaphore within a specified time
147 * @sem: the semaphore to be acquired 147 * @sem: the semaphore to be acquired
148 * @jiffies: how long to wait before failing 148 * @timeout: how long to wait before failing
149 * 149 *
150 * Attempts to acquire the semaphore. If no more tasks are allowed to 150 * Attempts to acquire the semaphore. If no more tasks are allowed to
151 * acquire the semaphore, calling this function will put the task to sleep. 151 * acquire the semaphore, calling this function will put the task to sleep.
152 * If the semaphore is not released within the specified number of jiffies, 152 * If the semaphore is not released within the specified number of jiffies,
153 * this function returns -ETIME. It returns 0 if the semaphore was acquired. 153 * this function returns -ETIME. It returns 0 if the semaphore was acquired.
154 */ 154 */
155int down_timeout(struct semaphore *sem, long jiffies) 155int down_timeout(struct semaphore *sem, long timeout)
156{ 156{
157 unsigned long flags; 157 unsigned long flags;
158 int result = 0; 158 int result = 0;
@@ -161,7 +161,7 @@ int down_timeout(struct semaphore *sem, long jiffies)
161 if (likely(sem->count > 0)) 161 if (likely(sem->count > 0))
162 sem->count--; 162 sem->count--;
163 else 163 else
164 result = __down_timeout(sem, jiffies); 164 result = __down_timeout(sem, timeout);
165 raw_spin_unlock_irqrestore(&sem->lock, flags); 165 raw_spin_unlock_irqrestore(&sem->lock, flags);
166 166
167 return result; 167 return result;
@@ -248,9 +248,9 @@ static noinline int __sched __down_killable(struct semaphore *sem)
248 return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT); 248 return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT);
249} 249}
250 250
251static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) 251static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
252{ 252{
253 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); 253 return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout);
254} 254}
255 255
256static noinline void __sched __up(struct semaphore *sem) 256static noinline void __sched __up(struct semaphore *sem)