aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rtmutex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rtmutex.c')
-rw-r--r--kernel/rtmutex.c621
1 files changed, 551 insertions, 70 deletions
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786a..23dd44372d8b 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -8,12 +8,20 @@
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen 9 * Copyright (C) 2006 Esben Nielsen
10 * 10 *
11 * Adaptive Spinlocks:
12 * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13 * and Peter Morreale,
14 * Adaptive Spinlocks simplification:
15 * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
16 *
11 * See Documentation/rt-mutex-design.txt for details. 17 * See Documentation/rt-mutex-design.txt for details.
12 */ 18 */
13#include <linux/spinlock.h> 19#include <linux/spinlock.h>
14#include <linux/module.h> 20#include <linux/module.h>
15#include <linux/sched.h> 21#include <linux/sched.h>
16#include <linux/timer.h> 22#include <linux/timer.h>
23#include <linux/hardirq.h>
24#include <linux/semaphore.h>
17 25
18#include "rtmutex_common.h" 26#include "rtmutex_common.h"
19 27
@@ -97,6 +105,22 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
97} 105}
98#endif 106#endif
99 107
108int pi_initialized;
109
110/*
111 * we initialize the wait_list runtime. (Could be done build-time and/or
112 * boot-time.)
113 */
114static inline void init_lists(struct rt_mutex *lock)
115{
116 if (unlikely(!lock->wait_list.prio_list.prev)) {
117 plist_head_init_raw(&lock->wait_list, &lock->wait_lock);
118#ifdef CONFIG_DEBUG_RT_MUTEXES
119 pi_initialized++;
120#endif
121 }
122}
123
100/* 124/*
101 * Calculate task priority from the waiter list priority 125 * Calculate task priority from the waiter list priority
102 * 126 *
@@ -253,13 +277,13 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
253 plist_add(&waiter->list_entry, &lock->wait_list); 277 plist_add(&waiter->list_entry, &lock->wait_list);
254 278
255 /* Release the task */ 279 /* Release the task */
256 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 280 raw_spin_unlock(&task->pi_lock);
257 put_task_struct(task); 281 put_task_struct(task);
258 282
259 /* Grab the next task */ 283 /* Grab the next task */
260 task = rt_mutex_owner(lock); 284 task = rt_mutex_owner(lock);
261 get_task_struct(task); 285 get_task_struct(task);
262 raw_spin_lock_irqsave(&task->pi_lock, flags); 286 raw_spin_lock(&task->pi_lock);
263 287
264 if (waiter == rt_mutex_top_waiter(lock)) { 288 if (waiter == rt_mutex_top_waiter(lock)) {
265 /* Boost the owner */ 289 /* Boost the owner */
@@ -277,10 +301,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
277 __rt_mutex_adjust_prio(task); 301 __rt_mutex_adjust_prio(task);
278 } 302 }
279 303
280 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 304 raw_spin_unlock(&task->pi_lock);
281 305
282 top_waiter = rt_mutex_top_waiter(lock); 306 top_waiter = rt_mutex_top_waiter(lock);
283 raw_spin_unlock(&lock->wait_lock); 307 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
284 308
285 if (!detect_deadlock && waiter != top_waiter) 309 if (!detect_deadlock && waiter != top_waiter)
286 goto out_put_task; 310 goto out_put_task;
@@ -301,11 +325,10 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
301 * lock yet]: 325 * lock yet]:
302 */ 326 */
303static inline int try_to_steal_lock(struct rt_mutex *lock, 327static inline int try_to_steal_lock(struct rt_mutex *lock,
304 struct task_struct *task) 328 struct task_struct *task, int mode)
305{ 329{
306 struct task_struct *pendowner = rt_mutex_owner(lock); 330 struct task_struct *pendowner = rt_mutex_owner(lock);
307 struct rt_mutex_waiter *next; 331 struct rt_mutex_waiter *next;
308 unsigned long flags;
309 332
310 if (!rt_mutex_owner_pending(lock)) 333 if (!rt_mutex_owner_pending(lock))
311 return 0; 334 return 0;
@@ -313,9 +336,9 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
313 if (pendowner == task) 336 if (pendowner == task)
314 return 1; 337 return 1;
315 338
316 raw_spin_lock_irqsave(&pendowner->pi_lock, flags); 339 raw_spin_lock(&pendowner->pi_lock);
317 if (task->prio >= pendowner->prio) { 340 if (!lock_is_stealable(task, pendowner, mode)) {
318 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags); 341 raw_spin_unlock(&pendowner->pi_lock);
319 return 0; 342 return 0;
320 } 343 }
321 344
@@ -325,7 +348,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
325 * priority. 348 * priority.
326 */ 349 */
327 if (likely(!rt_mutex_has_waiters(lock))) { 350 if (likely(!rt_mutex_has_waiters(lock))) {
328 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags); 351 raw_spin_unlock(&pendowner->pi_lock);
329 return 1; 352 return 1;
330 } 353 }
331 354
@@ -333,7 +356,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
333 next = rt_mutex_top_waiter(lock); 356 next = rt_mutex_top_waiter(lock);
334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters); 357 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
335 __rt_mutex_adjust_prio(pendowner); 358 __rt_mutex_adjust_prio(pendowner);
336 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags); 359 raw_spin_unlock(&pendowner->pi_lock);
337 360
338 /* 361 /*
339 * We are going to steal the lock and a waiter was 362 * We are going to steal the lock and a waiter was
@@ -350,10 +373,10 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
350 * might be task: 373 * might be task:
351 */ 374 */
352 if (likely(next->task != task)) { 375 if (likely(next->task != task)) {
353 raw_spin_lock_irqsave(&task->pi_lock, flags); 376 raw_spin_lock(&task->pi_lock);
354 plist_add(&next->pi_list_entry, &task->pi_waiters); 377 plist_add(&next->pi_list_entry, &task->pi_waiters);
355 __rt_mutex_adjust_prio(task); 378 __rt_mutex_adjust_prio(task);
356 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 379 raw_spin_unlock(&task->pi_lock);
357 } 380 }
358 return 1; 381 return 1;
359} 382}
@@ -367,7 +390,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock,
367 * 390 *
368 * Must be called with lock->wait_lock held. 391 * Must be called with lock->wait_lock held.
369 */ 392 */
370static int try_to_take_rt_mutex(struct rt_mutex *lock) 393static int do_try_to_take_rt_mutex(struct rt_mutex *lock, int mode)
371{ 394{
372 /* 395 /*
373 * We have to be careful here if the atomic speedups are 396 * We have to be careful here if the atomic speedups are
@@ -390,7 +413,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
390 */ 413 */
391 mark_rt_mutex_waiters(lock); 414 mark_rt_mutex_waiters(lock);
392 415
393 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) 416 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current, mode))
394 return 0; 417 return 0;
395 418
396 /* We got the lock. */ 419 /* We got the lock. */
@@ -403,6 +426,11 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
403 return 1; 426 return 1;
404} 427}
405 428
429static inline int try_to_take_rt_mutex(struct rt_mutex *lock)
430{
431 return do_try_to_take_rt_mutex(lock, STEAL_NORMAL);
432}
433
406/* 434/*
407 * Task blocks on lock. 435 * Task blocks on lock.
408 * 436 *
@@ -413,14 +441,13 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
413static int task_blocks_on_rt_mutex(struct rt_mutex *lock, 441static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
414 struct rt_mutex_waiter *waiter, 442 struct rt_mutex_waiter *waiter,
415 struct task_struct *task, 443 struct task_struct *task,
416 int detect_deadlock) 444 int detect_deadlock, unsigned long flags)
417{ 445{
418 struct task_struct *owner = rt_mutex_owner(lock); 446 struct task_struct *owner = rt_mutex_owner(lock);
419 struct rt_mutex_waiter *top_waiter = waiter; 447 struct rt_mutex_waiter *top_waiter = waiter;
420 unsigned long flags;
421 int chain_walk = 0, res; 448 int chain_walk = 0, res;
422 449
423 raw_spin_lock_irqsave(&task->pi_lock, flags); 450 raw_spin_lock(&task->pi_lock);
424 __rt_mutex_adjust_prio(task); 451 __rt_mutex_adjust_prio(task);
425 waiter->task = task; 452 waiter->task = task;
426 waiter->lock = lock; 453 waiter->lock = lock;
@@ -434,17 +461,17 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
434 461
435 task->pi_blocked_on = waiter; 462 task->pi_blocked_on = waiter;
436 463
437 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 464 raw_spin_unlock(&task->pi_lock);
438 465
439 if (waiter == rt_mutex_top_waiter(lock)) { 466 if (waiter == rt_mutex_top_waiter(lock)) {
440 raw_spin_lock_irqsave(&owner->pi_lock, flags); 467 raw_spin_lock(&owner->pi_lock);
441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 468 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
442 plist_add(&waiter->pi_list_entry, &owner->pi_waiters); 469 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
443 470
444 __rt_mutex_adjust_prio(owner); 471 __rt_mutex_adjust_prio(owner);
445 if (owner->pi_blocked_on) 472 if (owner->pi_blocked_on)
446 chain_walk = 1; 473 chain_walk = 1;
447 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 474 raw_spin_unlock(&owner->pi_lock);
448 } 475 }
449 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) 476 else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
450 chain_walk = 1; 477 chain_walk = 1;
@@ -459,12 +486,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
459 */ 486 */
460 get_task_struct(owner); 487 get_task_struct(owner);
461 488
462 raw_spin_unlock(&lock->wait_lock); 489 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
463 490
464 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 491 res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
465 task); 492 task);
466 493
467 raw_spin_lock(&lock->wait_lock); 494 raw_spin_lock_irq(&lock->wait_lock);
468 495
469 return res; 496 return res;
470} 497}
@@ -477,13 +504,13 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
477 * 504 *
478 * Called with lock->wait_lock held. 505 * Called with lock->wait_lock held.
479 */ 506 */
480static void wakeup_next_waiter(struct rt_mutex *lock) 507static void wakeup_next_waiter(struct rt_mutex *lock, int savestate)
481{ 508{
482 struct rt_mutex_waiter *waiter; 509 struct rt_mutex_waiter *waiter;
483 struct task_struct *pendowner; 510 struct task_struct *pendowner;
484 unsigned long flags; 511 struct rt_mutex_waiter *next;
485 512
486 raw_spin_lock_irqsave(&current->pi_lock, flags); 513 raw_spin_lock(&current->pi_lock);
487 514
488 waiter = rt_mutex_top_waiter(lock); 515 waiter = rt_mutex_top_waiter(lock);
489 plist_del(&waiter->list_entry, &lock->wait_list); 516 plist_del(&waiter->list_entry, &lock->wait_list);
@@ -498,9 +525,44 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
498 pendowner = waiter->task; 525 pendowner = waiter->task;
499 waiter->task = NULL; 526 waiter->task = NULL;
500 527
528 /*
529 * Do the wakeup before the ownership change to give any spinning
530 * waiter grantees a headstart over the other threads that will
531 * trigger once owner changes.
532 */
533 if (!savestate)
534 wake_up_process(pendowner);
535 else {
536 /*
537 * We can skip the actual (expensive) wakeup if the
538 * waiter is already running, but we have to be careful
539 * of race conditions because they may be about to sleep.
540 *
541 * The waiter-side protocol has the following pattern:
542 * 1: Set state != RUNNING
543 * 2: Conditionally sleep if waiter->task != NULL;
544 *
545 * And the owner-side has the following:
546 * A: Set waiter->task = NULL
547 * B: Conditionally wake if the state != RUNNING
548 *
549 * As long as we ensure 1->2 order, and A->B order, we
550 * will never miss a wakeup.
551 *
552 * Therefore, this barrier ensures that waiter->task = NULL
553 * is visible before we test the pendowner->state. The
554 * corresponding barrier is in the sleep logic.
555 */
556 smp_mb();
557
558 /* If !RUNNING && !RUNNING_MUTEX */
559 if (pendowner->state & ~TASK_RUNNING_MUTEX)
560 wake_up_process_mutex(pendowner);
561 }
562
501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); 563 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
502 564
503 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 565 raw_spin_unlock(&current->pi_lock);
504 566
505 /* 567 /*
506 * Clear the pi_blocked_on variable and enqueue a possible 568 * Clear the pi_blocked_on variable and enqueue a possible
@@ -509,7 +571,13 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
509 * waiter with higher priority than pending-owner->normal_prio 571 * waiter with higher priority than pending-owner->normal_prio
510 * is blocked on the unboosted (pending) owner. 572 * is blocked on the unboosted (pending) owner.
511 */ 573 */
512 raw_spin_lock_irqsave(&pendowner->pi_lock, flags); 574
575 if (rt_mutex_has_waiters(lock))
576 next = rt_mutex_top_waiter(lock);
577 else
578 next = NULL;
579
580 raw_spin_lock(&pendowner->pi_lock);
513 581
514 WARN_ON(!pendowner->pi_blocked_on); 582 WARN_ON(!pendowner->pi_blocked_on);
515 WARN_ON(pendowner->pi_blocked_on != waiter); 583 WARN_ON(pendowner->pi_blocked_on != waiter);
@@ -517,15 +585,10 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
517 585
518 pendowner->pi_blocked_on = NULL; 586 pendowner->pi_blocked_on = NULL;
519 587
520 if (rt_mutex_has_waiters(lock)) { 588 if (next)
521 struct rt_mutex_waiter *next;
522
523 next = rt_mutex_top_waiter(lock);
524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters); 589 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
525 }
526 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
527 590
528 wake_up_process(pendowner); 591 raw_spin_unlock(&pendowner->pi_lock);
529} 592}
530 593
531/* 594/*
@@ -534,22 +597,22 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
534 * Must be called with lock->wait_lock held 597 * Must be called with lock->wait_lock held
535 */ 598 */
536static void remove_waiter(struct rt_mutex *lock, 599static void remove_waiter(struct rt_mutex *lock,
537 struct rt_mutex_waiter *waiter) 600 struct rt_mutex_waiter *waiter,
601 unsigned long flags)
538{ 602{
539 int first = (waiter == rt_mutex_top_waiter(lock)); 603 int first = (waiter == rt_mutex_top_waiter(lock));
540 struct task_struct *owner = rt_mutex_owner(lock); 604 struct task_struct *owner = rt_mutex_owner(lock);
541 unsigned long flags;
542 int chain_walk = 0; 605 int chain_walk = 0;
543 606
544 raw_spin_lock_irqsave(&current->pi_lock, flags); 607 raw_spin_lock(&current->pi_lock);
545 plist_del(&waiter->list_entry, &lock->wait_list); 608 plist_del(&waiter->list_entry, &lock->wait_list);
546 waiter->task = NULL; 609 waiter->task = NULL;
547 current->pi_blocked_on = NULL; 610 current->pi_blocked_on = NULL;
548 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 611 raw_spin_unlock(&current->pi_lock);
549 612
550 if (first && owner != current) { 613 if (first && owner != current) {
551 614
552 raw_spin_lock_irqsave(&owner->pi_lock, flags); 615 raw_spin_lock(&owner->pi_lock);
553 616
554 plist_del(&waiter->pi_list_entry, &owner->pi_waiters); 617 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
555 618
@@ -564,7 +627,7 @@ static void remove_waiter(struct rt_mutex *lock,
564 if (owner->pi_blocked_on) 627 if (owner->pi_blocked_on)
565 chain_walk = 1; 628 chain_walk = 1;
566 629
567 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 630 raw_spin_unlock(&owner->pi_lock);
568 } 631 }
569 632
570 WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); 633 WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
@@ -575,11 +638,11 @@ static void remove_waiter(struct rt_mutex *lock,
575 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 638 /* gets dropped in rt_mutex_adjust_prio_chain()! */
576 get_task_struct(owner); 639 get_task_struct(owner);
577 640
578 raw_spin_unlock(&lock->wait_lock); 641 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
579 642
580 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); 643 rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
581 644
582 raw_spin_lock(&lock->wait_lock); 645 raw_spin_lock_irq(&lock->wait_lock);
583} 646}
584 647
585/* 648/*
@@ -600,18 +663,392 @@ void rt_mutex_adjust_pi(struct task_struct *task)
600 return; 663 return;
601 } 664 }
602 665
603 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
604
605 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 666 /* gets dropped in rt_mutex_adjust_prio_chain()! */
606 get_task_struct(task); 667 get_task_struct(task);
668 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
607 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); 669 rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
608} 670}
609 671
672/*
673 * preemptible spin_lock functions:
674 */
675
676#ifdef CONFIG_PREEMPT_RT
677
678static inline void
679rt_spin_lock_fastlock(struct rt_mutex *lock,
680 void (*slowfn)(struct rt_mutex *lock))
681{
682 /* Temporary HACK! */
683 if (likely(!current->in_printk))
684 might_sleep();
685 else if (in_atomic() || irqs_disabled())
686 /* don't grab locks for printk in atomic */
687 return;
688
689 if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
690 rt_mutex_deadlock_account_lock(lock, current);
691 else
692 slowfn(lock);
693}
694
695static inline void
696rt_spin_lock_fastunlock(struct rt_mutex *lock,
697 void (*slowfn)(struct rt_mutex *lock))
698{
699 /* Temporary HACK! */
700 if (unlikely(rt_mutex_owner(lock) != current) && current->in_printk)
701 /* don't grab locks for printk in atomic */
702 return;
703
704 if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
705 rt_mutex_deadlock_account_unlock(current);
706 else
707 slowfn(lock);
708}
709
710
711#ifdef CONFIG_SMP
712static int adaptive_wait(struct rt_mutex_waiter *waiter,
713 struct task_struct *orig_owner)
714{
715 for (;;) {
716
717 /* we are the owner? */
718 if (!waiter->task)
719 return 0;
720
721 /* Owner changed? Then lets update the original */
722 if (orig_owner != rt_mutex_owner(waiter->lock))
723 return 0;
724
725 /* Owner went to bed, so should we */
726 if (!task_is_current(orig_owner))
727 return 1;
728
729 cpu_relax();
730 }
731}
732#else
733static int adaptive_wait(struct rt_mutex_waiter *waiter,
734 struct task_struct *orig_owner)
735{
736 return 1;
737}
738#endif
739
740/*
741 * The state setting needs to preserve the original state and needs to
742 * take care of non rtmutex wakeups.
743 *
744 * Called with rtmutex->wait_lock held to serialize against rtmutex
745 * wakeups().
746 */
747static inline unsigned long
748rt_set_current_blocked_state(unsigned long saved_state)
749{
750 unsigned long state, block_state;
751
752 /*
753 * If state is TASK_INTERRUPTIBLE, then we set the state for
754 * blocking to TASK_INTERRUPTIBLE as well, otherwise we would
755 * miss real wakeups via wake_up_interruptible(). If such a
756 * wakeup happens we see the running state and preserve it in
757 * saved_state. Now we can ignore further wakeups as we will
758 * return in state running from our "spin" sleep.
759 */
760 if (saved_state == TASK_INTERRUPTIBLE ||
761 saved_state == TASK_STOPPED)
762 block_state = saved_state;
763 else
764 block_state = TASK_UNINTERRUPTIBLE;
765
766 state = xchg(&current->state, block_state);
767 /*
768 * Take care of non rtmutex wakeups. rtmutex wakeups
769 * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE.
770 */
771 if (state == TASK_RUNNING)
772 saved_state = TASK_RUNNING;
773
774 return saved_state;
775}
776
777static inline void rt_restore_current_state(unsigned long saved_state)
778{
779 unsigned long state = xchg(&current->state, saved_state);
780
781 if (state == TASK_RUNNING)
782 current->state = TASK_RUNNING;
783}
784
785/*
786 * Slow path lock function spin_lock style: this variant is very
787 * careful not to miss any non-lock wakeups.
788 *
789 * The wakeup side uses wake_up_process_mutex, which, combined with
790 * the xchg code of this function is a transparent sleep/wakeup
791 * mechanism nested within any existing sleep/wakeup mechanism. This
792 * enables the seemless use of arbitrary (blocking) spinlocks within
793 * sleep/wakeup event loops.
794 */
795static void noinline __sched
796rt_spin_lock_slowlock(struct rt_mutex *lock)
797{
798 struct rt_mutex_waiter waiter;
799 unsigned long saved_state, flags;
800 struct task_struct *orig_owner;
801
802 debug_rt_mutex_init_waiter(&waiter);
803 waiter.task = NULL;
804
805 raw_spin_lock_irqsave(&lock->wait_lock, flags);
806 init_lists(lock);
807
808 BUG_ON(rt_mutex_owner(lock) == current);
809
810 /*
811 * Here we save whatever state the task was in originally,
812 * we'll restore it at the end of the function and we'll take
813 * any intermediate wakeup into account as well, independently
814 * of the lock sleep/wakeup mechanism. When we get a real
815 * wakeup the task->state is TASK_RUNNING and we change
816 * saved_state accordingly. If we did not get a real wakeup
817 * then we return with the saved state. We need to be careful
818 * about original state TASK_INTERRUPTIBLE as well, as we
819 * could miss a wakeup_interruptible()
820 */
821 saved_state = rt_set_current_blocked_state(current->state);
822
823 for (;;) {
824 int saved_lock_depth = current->lock_depth;
825
826 /* Try to acquire the lock */
827 if (do_try_to_take_rt_mutex(lock, STEAL_LATERAL))
828 break;
829
830 /*
831 * waiter.task is NULL the first time we come here and
832 * when we have been woken up by the previous owner
833 * but the lock got stolen by an higher prio task.
834 */
835 if (!waiter.task) {
836 task_blocks_on_rt_mutex(lock, &waiter, current, 0,
837 flags);
838 /* Wakeup during boost ? */
839 if (unlikely(!waiter.task))
840 continue;
841 }
842
843 /*
844 * Prevent schedule() to drop BKL, while waiting for
845 * the lock ! We restore lock_depth when we come back.
846 */
847 current->lock_depth = -1;
848 orig_owner = rt_mutex_owner(lock);
849 get_task_struct(orig_owner);
850 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
851
852 debug_rt_mutex_print_deadlock(&waiter);
853
854 if (adaptive_wait(&waiter, orig_owner)) {
855 put_task_struct(orig_owner);
856
857 if (waiter.task)
858 schedule_rt_mutex(lock);
859 } else
860 put_task_struct(orig_owner);
861
862 raw_spin_lock_irqsave(&lock->wait_lock, flags);
863 current->lock_depth = saved_lock_depth;
864 saved_state = rt_set_current_blocked_state(saved_state);
865 }
866
867 rt_restore_current_state(saved_state);
868
869 /*
870 * Extremely rare case, if we got woken up by a non-mutex wakeup,
871 * and we managed to steal the lock despite us not being the
872 * highest-prio waiter (due to SCHED_OTHER changing prio), then we
873 * can end up with a non-NULL waiter.task:
874 */
875 if (unlikely(waiter.task))
876 remove_waiter(lock, &waiter, flags);
877 /*
878 * try_to_take_rt_mutex() sets the waiter bit
879 * unconditionally. We might have to fix that up:
880 */
881 fixup_rt_mutex_waiters(lock);
882
883 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
884
885 debug_rt_mutex_free_waiter(&waiter);
886}
887
888/*
889 * Slow path to release a rt_mutex spin_lock style
890 */
891static void noinline __sched
892rt_spin_lock_slowunlock(struct rt_mutex *lock)
893{
894 unsigned long flags;
895
896 raw_spin_lock_irqsave(&lock->wait_lock, flags);
897
898 debug_rt_mutex_unlock(lock);
899
900 rt_mutex_deadlock_account_unlock(current);
901
902 if (!rt_mutex_has_waiters(lock)) {
903 lock->owner = NULL;
904 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
905 return;
906 }
907
908 wakeup_next_waiter(lock, 1);
909
910 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
911
912 /* Undo pi boosting.when necessary */
913 rt_mutex_adjust_prio(current);
914}
915
916void __lockfunc rt_spin_lock(spinlock_t *lock)
917{
918 rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
919 spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
920}
921EXPORT_SYMBOL(rt_spin_lock);
922
923void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
924{
925 rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
926}
927EXPORT_SYMBOL(__rt_spin_lock);
928
929#ifdef CONFIG_DEBUG_LOCK_ALLOC
930
931void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
932{
933 rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
934 spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
935}
936EXPORT_SYMBOL(rt_spin_lock_nested);
937
938#endif
939
940void __lockfunc rt_spin_unlock(spinlock_t *lock)
941{
942 /* NOTE: we always pass in '1' for nested, for simplicity */
943 spin_release(&lock->dep_map, 1, _RET_IP_);
944 rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
945}
946EXPORT_SYMBOL(rt_spin_unlock);
947
948void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
949{
950 rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
951}
952EXPORT_SYMBOL(__rt_spin_unlock);
953
954/*
955 * Wait for the lock to get unlocked: instead of polling for an unlock
956 * (like raw spinlocks do), we lock and unlock, to force the kernel to
957 * schedule if there's contention:
958 */
959void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
960{
961 spin_lock(lock);
962 spin_unlock(lock);
963}
964EXPORT_SYMBOL(rt_spin_unlock_wait);
965
966int __lockfunc rt_spin_trylock(spinlock_t *lock)
967{
968 int ret = rt_mutex_trylock(&lock->lock);
969
970 if (ret)
971 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
972
973 return ret;
974}
975EXPORT_SYMBOL(rt_spin_trylock);
976
977int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
978{
979 int ret;
980
981 *flags = 0;
982 ret = rt_mutex_trylock(&lock->lock);
983 if (ret)
984 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
985
986 return ret;
987}
988EXPORT_SYMBOL(rt_spin_trylock_irqsave);
989
990int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
991{
992 /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
993 if (atomic_add_unless(atomic, -1, 1))
994 return 0;
995 rt_spin_lock(lock);
996 if (atomic_dec_and_test(atomic))
997 return 1;
998 rt_spin_unlock(lock);
999 return 0;
1000}
1001EXPORT_SYMBOL(atomic_dec_and_spin_lock);
1002
1003void
1004__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
1005{
1006#ifdef CONFIG_DEBUG_LOCK_ALLOC
1007 /*
1008 * Make sure we are not reinitializing a held lock:
1009 */
1010 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
1011 lockdep_init_map(&lock->dep_map, name, key, 0);
1012#endif
1013 __rt_mutex_init(&lock->lock, name);
1014}
1015EXPORT_SYMBOL(__rt_spin_lock_init);
1016
1017#endif
1018
1019static inline int rt_release_bkl(struct rt_mutex *lock, unsigned long flags)
1020{
1021 int saved_lock_depth = current->lock_depth;
1022
1023#ifdef CONFIG_LOCK_KERNEL
1024 current->lock_depth = -1;
1025 /*
1026 * try_to_take_lock set the waiters, make sure it's
1027 * still correct.
1028 */
1029 fixup_rt_mutex_waiters(lock);
1030 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1031
1032 mutex_unlock(&kernel_sem);
1033
1034 raw_spin_lock_irq(&lock->wait_lock);
1035#endif
1036 return saved_lock_depth;
1037}
1038
1039static inline void rt_reacquire_bkl(int saved_lock_depth)
1040{
1041#ifdef CONFIG_LOCK_KERNEL
1042 mutex_lock(&kernel_sem);
1043 current->lock_depth = saved_lock_depth;
1044#endif
1045}
1046
610/** 1047/**
611 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop 1048 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
612 * @lock: the rt_mutex to take 1049 * @lock: the rt_mutex to take
613 * @state: the state the task should block in (TASK_INTERRUPTIBLE 1050 * @state: the state the task should block in (TASK_INTERRUPTIBLE
614 * or TASK_UNINTERRUPTIBLE) 1051 * or TASK_UNINTERRUPTIBLE)
615 * @timeout: the pre-initialized and started timer, or NULL for none 1052 * @timeout: the pre-initialized and started timer, or NULL for none
616 * @waiter: the pre-initialized rt_mutex_waiter 1053 * @waiter: the pre-initialized rt_mutex_waiter
617 * @detect_deadlock: passed to task_blocks_on_rt_mutex 1054 * @detect_deadlock: passed to task_blocks_on_rt_mutex
@@ -622,7 +1059,7 @@ static int __sched
622__rt_mutex_slowlock(struct rt_mutex *lock, int state, 1059__rt_mutex_slowlock(struct rt_mutex *lock, int state,
623 struct hrtimer_sleeper *timeout, 1060 struct hrtimer_sleeper *timeout,
624 struct rt_mutex_waiter *waiter, 1061 struct rt_mutex_waiter *waiter,
625 int detect_deadlock) 1062 int detect_deadlock, unsigned long flags)
626{ 1063{
627 int ret = 0; 1064 int ret = 0;
628 1065
@@ -652,7 +1089,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
652 */ 1089 */
653 if (!waiter->task) { 1090 if (!waiter->task) {
654 ret = task_blocks_on_rt_mutex(lock, waiter, current, 1091 ret = task_blocks_on_rt_mutex(lock, waiter, current,
655 detect_deadlock); 1092 detect_deadlock, flags);
656 /* 1093 /*
657 * If we got woken up by the owner then start loop 1094 * If we got woken up by the owner then start loop
658 * all over without going into schedule to try 1095 * all over without going into schedule to try
@@ -672,14 +1109,15 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
672 break; 1109 break;
673 } 1110 }
674 1111
675 raw_spin_unlock(&lock->wait_lock); 1112 raw_spin_unlock_irq(&lock->wait_lock);
676 1113
677 debug_rt_mutex_print_deadlock(waiter); 1114 debug_rt_mutex_print_deadlock(waiter);
678 1115
679 if (waiter->task) 1116 if (waiter->task)
680 schedule_rt_mutex(lock); 1117 schedule_rt_mutex(lock);
681 1118
682 raw_spin_lock(&lock->wait_lock); 1119 raw_spin_lock_irq(&lock->wait_lock);
1120
683 set_current_state(state); 1121 set_current_state(state);
684 } 1122 }
685 1123
@@ -694,20 +1132,29 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
694 struct hrtimer_sleeper *timeout, 1132 struct hrtimer_sleeper *timeout,
695 int detect_deadlock) 1133 int detect_deadlock)
696{ 1134{
1135 int ret = 0, saved_lock_depth = -1;
697 struct rt_mutex_waiter waiter; 1136 struct rt_mutex_waiter waiter;
698 int ret = 0; 1137 unsigned long flags;
699 1138
700 debug_rt_mutex_init_waiter(&waiter); 1139 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL; 1140 waiter.task = NULL;
702 1141
703 raw_spin_lock(&lock->wait_lock); 1142 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1143 init_lists(lock);
704 1144
705 /* Try to acquire the lock again: */ 1145 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) { 1146 if (try_to_take_rt_mutex(lock)) {
707 raw_spin_unlock(&lock->wait_lock); 1147 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
708 return 0; 1148 return 0;
709 } 1149 }
710 1150
1151 /*
1152 * We drop the BKL here before we go into the wait loop to avoid a
1153 * possible deadlock in the scheduler.
1154 */
1155 if (unlikely(current->lock_depth >= 0))
1156 saved_lock_depth = rt_release_bkl(lock, flags);
1157
711 set_current_state(state); 1158 set_current_state(state);
712 1159
713 /* Setup the timer, when timeout != NULL */ 1160 /* Setup the timer, when timeout != NULL */
@@ -718,12 +1165,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
718 } 1165 }
719 1166
720 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, 1167 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
721 detect_deadlock); 1168 detect_deadlock, flags);
722 1169
723 set_current_state(TASK_RUNNING); 1170 set_current_state(TASK_RUNNING);
724 1171
725 if (unlikely(waiter.task)) 1172 if (unlikely(waiter.task))
726 remove_waiter(lock, &waiter); 1173 remove_waiter(lock, &waiter, flags);
727 1174
728 /* 1175 /*
729 * try_to_take_rt_mutex() sets the waiter bit 1176 * try_to_take_rt_mutex() sets the waiter bit
@@ -731,7 +1178,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
731 */ 1178 */
732 fixup_rt_mutex_waiters(lock); 1179 fixup_rt_mutex_waiters(lock);
733 1180
734 raw_spin_unlock(&lock->wait_lock); 1181 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
735 1182
736 /* Remove pending timer: */ 1183 /* Remove pending timer: */
737 if (unlikely(timeout)) 1184 if (unlikely(timeout))
@@ -745,6 +1192,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
745 if (unlikely(ret)) 1192 if (unlikely(ret))
746 rt_mutex_adjust_prio(current); 1193 rt_mutex_adjust_prio(current);
747 1194
1195 /* Must we reaquire the BKL? */
1196 if (unlikely(saved_lock_depth >= 0))
1197 rt_reacquire_bkl(saved_lock_depth);
1198
748 debug_rt_mutex_free_waiter(&waiter); 1199 debug_rt_mutex_free_waiter(&waiter);
749 1200
750 return ret; 1201 return ret;
@@ -756,12 +1207,15 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
756static inline int 1207static inline int
757rt_mutex_slowtrylock(struct rt_mutex *lock) 1208rt_mutex_slowtrylock(struct rt_mutex *lock)
758{ 1209{
1210 unsigned long flags;
759 int ret = 0; 1211 int ret = 0;
760 1212
761 raw_spin_lock(&lock->wait_lock); 1213 raw_spin_lock_irqsave(&lock->wait_lock, flags);
762 1214
763 if (likely(rt_mutex_owner(lock) != current)) { 1215 if (likely(rt_mutex_owner(lock) != current)) {
764 1216
1217 init_lists(lock);
1218
765 ret = try_to_take_rt_mutex(lock); 1219 ret = try_to_take_rt_mutex(lock);
766 /* 1220 /*
767 * try_to_take_rt_mutex() sets the lock waiters 1221 * try_to_take_rt_mutex() sets the lock waiters
@@ -770,7 +1224,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
770 fixup_rt_mutex_waiters(lock); 1224 fixup_rt_mutex_waiters(lock);
771 } 1225 }
772 1226
773 raw_spin_unlock(&lock->wait_lock); 1227 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
774 1228
775 return ret; 1229 return ret;
776} 1230}
@@ -781,7 +1235,9 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
781static void __sched 1235static void __sched
782rt_mutex_slowunlock(struct rt_mutex *lock) 1236rt_mutex_slowunlock(struct rt_mutex *lock)
783{ 1237{
784 raw_spin_lock(&lock->wait_lock); 1238 unsigned long flags;
1239
1240 raw_spin_lock_irqsave(&lock->wait_lock, flags);
785 1241
786 debug_rt_mutex_unlock(lock); 1242 debug_rt_mutex_unlock(lock);
787 1243
@@ -789,13 +1245,13 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
789 1245
790 if (!rt_mutex_has_waiters(lock)) { 1246 if (!rt_mutex_has_waiters(lock)) {
791 lock->owner = NULL; 1247 lock->owner = NULL;
792 raw_spin_unlock(&lock->wait_lock); 1248 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
793 return; 1249 return;
794 } 1250 }
795 1251
796 wakeup_next_waiter(lock); 1252 wakeup_next_waiter(lock, 0);
797 1253
798 raw_spin_unlock(&lock->wait_lock); 1254 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
799 1255
800 /* Undo pi boosting if necessary: */ 1256 /* Undo pi boosting if necessary: */
801 rt_mutex_adjust_prio(current); 1257 rt_mutex_adjust_prio(current);
@@ -857,6 +1313,27 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
857} 1313}
858 1314
859/** 1315/**
1316 * rt_mutex_lock_killable - lock a rt_mutex killable
1317 *
1318 * @lock: the rt_mutex to be locked
1319 * @detect_deadlock: deadlock detection on/off
1320 *
1321 * Returns:
1322 * 0 on success
1323 * -EINTR when interrupted by a signal
1324 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
1325 */
1326int __sched rt_mutex_lock_killable(struct rt_mutex *lock,
1327 int detect_deadlock)
1328{
1329 might_sleep();
1330
1331 return rt_mutex_fastlock(lock, TASK_KILLABLE,
1332 detect_deadlock, rt_mutex_slowlock);
1333}
1334EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
1335
1336/**
860 * rt_mutex_lock - lock a rt_mutex 1337 * rt_mutex_lock - lock a rt_mutex
861 * 1338 *
862 * @lock: the rt_mutex to be locked 1339 * @lock: the rt_mutex to be locked
@@ -1030,13 +1507,15 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1030 struct rt_mutex_waiter *waiter, 1507 struct rt_mutex_waiter *waiter,
1031 struct task_struct *task, int detect_deadlock) 1508 struct task_struct *task, int detect_deadlock)
1032{ 1509{
1510 unsigned long flags;
1033 int ret; 1511 int ret;
1034 1512
1035 raw_spin_lock(&lock->wait_lock); 1513 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1036 1514
1037 mark_rt_mutex_waiters(lock); 1515 mark_rt_mutex_waiters(lock);
1038 1516
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { 1517 if (!rt_mutex_owner(lock) ||
1518 try_to_steal_lock(lock, task, STEAL_NORMAL)) {
1040 /* We got the lock for task. */ 1519 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock); 1520 debug_rt_mutex_lock(lock);
1042 rt_mutex_set_owner(lock, task, 0); 1521 rt_mutex_set_owner(lock, task, 0);
@@ -1045,7 +1524,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1045 return 1; 1524 return 1;
1046 } 1525 }
1047 1526
1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 1527 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock,
1528 flags);
1049 1529
1050 if (ret && !waiter->task) { 1530 if (ret && !waiter->task) {
1051 /* 1531 /*
@@ -1056,7 +1536,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1056 */ 1536 */
1057 ret = 0; 1537 ret = 0;
1058 } 1538 }
1059 raw_spin_unlock(&lock->wait_lock); 1539 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1060 1540
1061 debug_rt_mutex_print_deadlock(waiter); 1541 debug_rt_mutex_print_deadlock(waiter);
1062 1542
@@ -1104,19 +1584,20 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1104 struct rt_mutex_waiter *waiter, 1584 struct rt_mutex_waiter *waiter,
1105 int detect_deadlock) 1585 int detect_deadlock)
1106{ 1586{
1587 unsigned long flags;
1107 int ret; 1588 int ret;
1108 1589
1109 raw_spin_lock(&lock->wait_lock); 1590 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1110 1591
1111 set_current_state(TASK_INTERRUPTIBLE); 1592 set_current_state(TASK_INTERRUPTIBLE);
1112 1593
1113 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, 1594 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
1114 detect_deadlock); 1595 detect_deadlock, flags);
1115 1596
1116 set_current_state(TASK_RUNNING); 1597 set_current_state(TASK_RUNNING);
1117 1598
1118 if (unlikely(waiter->task)) 1599 if (unlikely(waiter->task))
1119 remove_waiter(lock, waiter); 1600 remove_waiter(lock, waiter, flags);
1120 1601
1121 /* 1602 /*
1122 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might 1603 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
@@ -1124,7 +1605,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1124 */ 1605 */
1125 fixup_rt_mutex_waiters(lock); 1606 fixup_rt_mutex_waiters(lock);
1126 1607
1127 raw_spin_unlock(&lock->wait_lock); 1608 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1128 1609
1129 /* 1610 /*
1130 * Readjust priority, when we did not get the lock. We might have been 1611 * Readjust priority, when we did not get the lock. We might have been