aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2011-01-31 09:08:43 -0500
committerThomas Gleixner <tglx@linutronix.de>2011-01-31 09:09:14 -0500
commit51563cd53c4b1c1790fccd2e0af0e2b756589af9 (patch)
treed2fedfc654ab4fa011feaca262f95481a89e232a /kernel
parentd123375425d7df4b6081a631fc1203fceafa59b2 (diff)
parent8161239a8bcce9ad6b537c04a1fa3b5c68bae693 (diff)
Merge branch 'tip/rtmutex' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into core/locking
*git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace tip/rtmutex: rtmutex: Simplify PI algorithm and make highest prio task get lock
Diffstat (limited to 'kernel')
-rw-r--r--kernel/futex.c22
-rw-r--r--kernel/params.c65
-rw-r--r--kernel/perf_event.c46
-rw-r--r--kernel/printk.c100
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex.c318
-rw-r--r--kernel/rtmutex_common.h16
-rw-r--r--kernel/sched_fair.c78
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/tick-sched.c7
10 files changed, 321 insertions, 335 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index b766d28accd6..64c38115c7b6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1556,10 +1556,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1556 1556
1557 /* 1557 /*
1558 * We are here either because we stole the rtmutex from the 1558 * We are here either because we stole the rtmutex from the
1559 * pending owner or we are the pending owner which failed to 1559 * previous highest priority waiter or we are the highest priority
1560 * get the rtmutex. We have to replace the pending owner TID 1560 * waiter but failed to get the rtmutex the first time.
1561 * in the user space variable. This must be atomic as we have 1561 * We have to replace the newowner TID in the user space variable.
1562 * to preserve the owner died bit here. 1562 * This must be atomic as we have to preserve the owner died bit here.
1563 * 1563 *
1564 * Note: We write the user space value _before_ changing the pi_state 1564 * Note: We write the user space value _before_ changing the pi_state
1565 * because we can fault here. Imagine swapped out pages or a fork 1565 * because we can fault here. Imagine swapped out pages or a fork
@@ -1608,8 +1608,8 @@ retry:
1608 1608
1609 /* 1609 /*
1610 * To handle the page fault we need to drop the hash bucket 1610 * To handle the page fault we need to drop the hash bucket
1611 * lock here. That gives the other task (either the pending 1611 * lock here. That gives the other task (either the highest priority
1612 * owner itself or the task which stole the rtmutex) the 1612 * waiter itself or the task which stole the rtmutex) the
1613 * chance to try the fixup of the pi_state. So once we are 1613 * chance to try the fixup of the pi_state. So once we are
1614 * back from handling the fault we need to check the pi_state 1614 * back from handling the fault we need to check the pi_state
1615 * after reacquiring the hash bucket lock and before trying to 1615 * after reacquiring the hash bucket lock and before trying to
@@ -1685,18 +1685,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1685 /* 1685 /*
1686 * pi_state is incorrect, some other task did a lock steal and 1686 * pi_state is incorrect, some other task did a lock steal and
1687 * we returned due to timeout or signal without taking the 1687 * we returned due to timeout or signal without taking the
1688 * rt_mutex. Too late. We can access the rt_mutex_owner without 1688 * rt_mutex. Too late.
1689 * locking, as the other task is now blocked on the hash bucket
1690 * lock. Fix the state up.
1691 */ 1689 */
1690 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1692 owner = rt_mutex_owner(&q->pi_state->pi_mutex); 1691 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1692 if (!owner)
1693 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1694 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1693 ret = fixup_pi_state_owner(uaddr, q, owner); 1695 ret = fixup_pi_state_owner(uaddr, q, owner);
1694 goto out; 1696 goto out;
1695 } 1697 }
1696 1698
1697 /* 1699 /*
1698 * Paranoia check. If we did not take the lock, then we should not be 1700 * Paranoia check. If we did not take the lock, then we should not be
1699 * the owner, nor the pending owner, of the rt_mutex. 1701 * the owner of the rt_mutex.
1700 */ 1702 */
1701 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) 1703 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1702 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " 1704 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
diff --git a/kernel/params.c b/kernel/params.c
index 08107d181758..0da1411222b9 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -719,9 +719,7 @@ void destroy_params(const struct kernel_param *params, unsigned num)
719 params[i].ops->free(params[i].arg); 719 params[i].ops->free(params[i].arg);
720} 720}
721 721
722static void __init kernel_add_sysfs_param(const char *name, 722static struct module_kobject * __init locate_module_kobject(const char *name)
723 struct kernel_param *kparam,
724 unsigned int name_skip)
725{ 723{
726 struct module_kobject *mk; 724 struct module_kobject *mk;
727 struct kobject *kobj; 725 struct kobject *kobj;
@@ -729,10 +727,7 @@ static void __init kernel_add_sysfs_param(const char *name,
729 727
730 kobj = kset_find_obj(module_kset, name); 728 kobj = kset_find_obj(module_kset, name);
731 if (kobj) { 729 if (kobj) {
732 /* We already have one. Remove params so we can add more. */
733 mk = to_module_kobject(kobj); 730 mk = to_module_kobject(kobj);
734 /* We need to remove it before adding parameters. */
735 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
736 } else { 731 } else {
737 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); 732 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
738 BUG_ON(!mk); 733 BUG_ON(!mk);
@@ -743,15 +738,36 @@ static void __init kernel_add_sysfs_param(const char *name,
743 "%s", name); 738 "%s", name);
744 if (err) { 739 if (err) {
745 kobject_put(&mk->kobj); 740 kobject_put(&mk->kobj);
746 printk(KERN_ERR "Module '%s' failed add to sysfs, " 741 printk(KERN_ERR
747 "error number %d\n", name, err); 742 "Module '%s' failed add to sysfs, error number %d\n",
748 printk(KERN_ERR "The system will be unstable now.\n"); 743 name, err);
749 return; 744 printk(KERN_ERR
745 "The system will be unstable now.\n");
746 return NULL;
750 } 747 }
751 /* So that exit path is even. */ 748
749 /* So that we hold reference in both cases. */
752 kobject_get(&mk->kobj); 750 kobject_get(&mk->kobj);
753 } 751 }
754 752
753 return mk;
754}
755
756static void __init kernel_add_sysfs_param(const char *name,
757 struct kernel_param *kparam,
758 unsigned int name_skip)
759{
760 struct module_kobject *mk;
761 int err;
762
763 mk = locate_module_kobject(name);
764 if (!mk)
765 return;
766
767 /* We need to remove old parameters before adding more. */
768 if (mk->mp)
769 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
770
755 /* These should not fail at boot. */ 771 /* These should not fail at boot. */
756 err = add_sysfs_param(mk, kparam, kparam->name + name_skip); 772 err = add_sysfs_param(mk, kparam, kparam->name + name_skip);
757 BUG_ON(err); 773 BUG_ON(err);
@@ -796,6 +812,32 @@ static void __init param_sysfs_builtin(void)
796 } 812 }
797} 813}
798 814
815ssize_t __modver_version_show(struct module_attribute *mattr,
816 struct module *mod, char *buf)
817{
818 struct module_version_attribute *vattr =
819 container_of(mattr, struct module_version_attribute, mattr);
820
821 return sprintf(buf, "%s\n", vattr->version);
822}
823
824extern struct module_version_attribute __start___modver[], __stop___modver[];
825
826static void __init version_sysfs_builtin(void)
827{
828 const struct module_version_attribute *vattr;
829 struct module_kobject *mk;
830 int err;
831
832 for (vattr = __start___modver; vattr < __stop___modver; vattr++) {
833 mk = locate_module_kobject(vattr->module_name);
834 if (mk) {
835 err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
836 kobject_uevent(&mk->kobj, KOBJ_ADD);
837 kobject_put(&mk->kobj);
838 }
839 }
840}
799 841
800/* module-related sysfs stuff */ 842/* module-related sysfs stuff */
801 843
@@ -875,6 +917,7 @@ static int __init param_sysfs_init(void)
875 } 917 }
876 module_sysfs_initialized = 1; 918 module_sysfs_initialized = 1;
877 919
920 version_sysfs_builtin();
878 param_sysfs_builtin(); 921 param_sysfs_builtin();
879 922
880 return 0; 923 return 0;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 84522c796987..126a302c481c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2201,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid)
2201 if (!task) 2201 if (!task)
2202 return ERR_PTR(-ESRCH); 2202 return ERR_PTR(-ESRCH);
2203 2203
2204 /*
2205 * Can't attach events to a dying task.
2206 */
2207 err = -ESRCH;
2208 if (task->flags & PF_EXITING)
2209 goto errout;
2210
2211 /* Reuse ptrace permission checks for now. */ 2204 /* Reuse ptrace permission checks for now. */
2212 err = -EACCES; 2205 err = -EACCES;
2213 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2206 if (!ptrace_may_access(task, PTRACE_MODE_READ))
@@ -2268,14 +2261,27 @@ retry:
2268 2261
2269 get_ctx(ctx); 2262 get_ctx(ctx);
2270 2263
2271 if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { 2264 err = 0;
2272 /* 2265 mutex_lock(&task->perf_event_mutex);
2273 * We raced with some other task; use 2266 /*
2274 * the context they set. 2267 * If it has already passed perf_event_exit_task().
2275 */ 2268 * we must see PF_EXITING, it takes this mutex too.
2269 */
2270 if (task->flags & PF_EXITING)
2271 err = -ESRCH;
2272 else if (task->perf_event_ctxp[ctxn])
2273 err = -EAGAIN;
2274 else
2275 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2276 mutex_unlock(&task->perf_event_mutex);
2277
2278 if (unlikely(err)) {
2276 put_task_struct(task); 2279 put_task_struct(task);
2277 kfree(ctx); 2280 kfree(ctx);
2278 goto retry; 2281
2282 if (err == -EAGAIN)
2283 goto retry;
2284 goto errout;
2279 } 2285 }
2280 } 2286 }
2281 2287
@@ -5374,6 +5380,8 @@ free_dev:
5374 goto out; 5380 goto out;
5375} 5381}
5376 5382
5383static struct lock_class_key cpuctx_mutex;
5384
5377int perf_pmu_register(struct pmu *pmu, char *name, int type) 5385int perf_pmu_register(struct pmu *pmu, char *name, int type)
5378{ 5386{
5379 int cpu, ret; 5387 int cpu, ret;
@@ -5422,6 +5430,7 @@ skip_type:
5422 5430
5423 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 5431 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5424 __perf_event_init_context(&cpuctx->ctx); 5432 __perf_event_init_context(&cpuctx->ctx);
5433 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
5425 cpuctx->ctx.type = cpu_context; 5434 cpuctx->ctx.type = cpu_context;
5426 cpuctx->ctx.pmu = pmu; 5435 cpuctx->ctx.pmu = pmu;
5427 cpuctx->jiffies_interval = 1; 5436 cpuctx->jiffies_interval = 1;
@@ -6127,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
6127 * scheduled, so we are now safe from rescheduling changing 6136 * scheduled, so we are now safe from rescheduling changing
6128 * our context. 6137 * our context.
6129 */ 6138 */
6130 child_ctx = child->perf_event_ctxp[ctxn]; 6139 child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
6131 task_ctx_sched_out(child_ctx, EVENT_ALL); 6140 task_ctx_sched_out(child_ctx, EVENT_ALL);
6132 6141
6133 /* 6142 /*
@@ -6440,11 +6449,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6440 unsigned long flags; 6449 unsigned long flags;
6441 int ret = 0; 6450 int ret = 0;
6442 6451
6443 child->perf_event_ctxp[ctxn] = NULL;
6444
6445 mutex_init(&child->perf_event_mutex);
6446 INIT_LIST_HEAD(&child->perf_event_list);
6447
6448 if (likely(!parent->perf_event_ctxp[ctxn])) 6452 if (likely(!parent->perf_event_ctxp[ctxn]))
6449 return 0; 6453 return 0;
6450 6454
@@ -6533,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child)
6533{ 6537{
6534 int ctxn, ret; 6538 int ctxn, ret;
6535 6539
6540 memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp));
6541 mutex_init(&child->perf_event_mutex);
6542 INIT_LIST_HEAD(&child->perf_event_list);
6543
6536 for_each_task_context_nr(ctxn) { 6544 for_each_task_context_nr(ctxn) {
6537 ret = perf_event_init_context(child, ctxn); 6545 ret = perf_event_init_context(child, ctxn);
6538 if (ret) 6546 if (ret)
diff --git a/kernel/printk.c b/kernel/printk.c
index 53d9a9ec88e6..2ddbdc73aade 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -97,7 +97,7 @@ static int console_locked, console_suspended;
97/* 97/*
98 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars 98 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
99 * It is also used in interesting ways to provide interlocking in 99 * It is also used in interesting ways to provide interlocking in
100 * release_console_sem(). 100 * console_unlock();.
101 */ 101 */
102static DEFINE_SPINLOCK(logbuf_lock); 102static DEFINE_SPINLOCK(logbuf_lock);
103 103
@@ -501,7 +501,7 @@ static void _call_console_drivers(unsigned start,
501/* 501/*
502 * Call the console drivers, asking them to write out 502 * Call the console drivers, asking them to write out
503 * log_buf[start] to log_buf[end - 1]. 503 * log_buf[start] to log_buf[end - 1].
504 * The console_sem must be held. 504 * The console_lock must be held.
505 */ 505 */
506static void call_console_drivers(unsigned start, unsigned end) 506static void call_console_drivers(unsigned start, unsigned end)
507{ 507{
@@ -604,11 +604,11 @@ static int have_callable_console(void)
604 * 604 *
605 * This is printk(). It can be called from any context. We want it to work. 605 * This is printk(). It can be called from any context. We want it to work.
606 * 606 *
607 * We try to grab the console_sem. If we succeed, it's easy - we log the output and 607 * We try to grab the console_lock. If we succeed, it's easy - we log the output and
608 * call the console drivers. If we fail to get the semaphore we place the output 608 * call the console drivers. If we fail to get the semaphore we place the output
609 * into the log buffer and return. The current holder of the console_sem will 609 * into the log buffer and return. The current holder of the console_sem will
610 * notice the new output in release_console_sem() and will send it to the 610 * notice the new output in console_unlock(); and will send it to the
611 * consoles before releasing the semaphore. 611 * consoles before releasing the lock.
612 * 612 *
613 * One effect of this deferred printing is that code which calls printk() and 613 * One effect of this deferred printing is that code which calls printk() and
614 * then changes console_loglevel may break. This is because console_loglevel 614 * then changes console_loglevel may break. This is because console_loglevel
@@ -659,19 +659,19 @@ static inline int can_use_console(unsigned int cpu)
659/* 659/*
660 * Try to get console ownership to actually show the kernel 660 * Try to get console ownership to actually show the kernel
661 * messages from a 'printk'. Return true (and with the 661 * messages from a 'printk'. Return true (and with the
662 * console_semaphore held, and 'console_locked' set) if it 662 * console_lock held, and 'console_locked' set) if it
663 * is successful, false otherwise. 663 * is successful, false otherwise.
664 * 664 *
665 * This gets called with the 'logbuf_lock' spinlock held and 665 * This gets called with the 'logbuf_lock' spinlock held and
666 * interrupts disabled. It should return with 'lockbuf_lock' 666 * interrupts disabled. It should return with 'lockbuf_lock'
667 * released but interrupts still disabled. 667 * released but interrupts still disabled.
668 */ 668 */
669static int acquire_console_semaphore_for_printk(unsigned int cpu) 669static int console_trylock_for_printk(unsigned int cpu)
670 __releases(&logbuf_lock) 670 __releases(&logbuf_lock)
671{ 671{
672 int retval = 0; 672 int retval = 0;
673 673
674 if (!try_acquire_console_sem()) { 674 if (console_trylock()) {
675 retval = 1; 675 retval = 1;
676 676
677 /* 677 /*
@@ -827,12 +827,12 @@ asmlinkage int vprintk(const char *fmt, va_list args)
827 * actual magic (print out buffers, wake up klogd, 827 * actual magic (print out buffers, wake up klogd,
828 * etc). 828 * etc).
829 * 829 *
830 * The acquire_console_semaphore_for_printk() function 830 * The console_trylock_for_printk() function
831 * will release 'logbuf_lock' regardless of whether it 831 * will release 'logbuf_lock' regardless of whether it
832 * actually gets the semaphore or not. 832 * actually gets the semaphore or not.
833 */ 833 */
834 if (acquire_console_semaphore_for_printk(this_cpu)) 834 if (console_trylock_for_printk(this_cpu))
835 release_console_sem(); 835 console_unlock();
836 836
837 lockdep_on(); 837 lockdep_on();
838out_restore_irqs: 838out_restore_irqs:
@@ -993,7 +993,7 @@ void suspend_console(void)
993 if (!console_suspend_enabled) 993 if (!console_suspend_enabled)
994 return; 994 return;
995 printk("Suspending console(s) (use no_console_suspend to debug)\n"); 995 printk("Suspending console(s) (use no_console_suspend to debug)\n");
996 acquire_console_sem(); 996 console_lock();
997 console_suspended = 1; 997 console_suspended = 1;
998 up(&console_sem); 998 up(&console_sem);
999} 999}
@@ -1004,7 +1004,7 @@ void resume_console(void)
1004 return; 1004 return;
1005 down(&console_sem); 1005 down(&console_sem);
1006 console_suspended = 0; 1006 console_suspended = 0;
1007 release_console_sem(); 1007 console_unlock();
1008} 1008}
1009 1009
1010/** 1010/**
@@ -1027,21 +1027,21 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,
1027 case CPU_DYING: 1027 case CPU_DYING:
1028 case CPU_DOWN_FAILED: 1028 case CPU_DOWN_FAILED:
1029 case CPU_UP_CANCELED: 1029 case CPU_UP_CANCELED:
1030 acquire_console_sem(); 1030 console_lock();
1031 release_console_sem(); 1031 console_unlock();
1032 } 1032 }
1033 return NOTIFY_OK; 1033 return NOTIFY_OK;
1034} 1034}
1035 1035
1036/** 1036/**
1037 * acquire_console_sem - lock the console system for exclusive use. 1037 * console_lock - lock the console system for exclusive use.
1038 * 1038 *
1039 * Acquires a semaphore which guarantees that the caller has 1039 * Acquires a lock which guarantees that the caller has
1040 * exclusive access to the console system and the console_drivers list. 1040 * exclusive access to the console system and the console_drivers list.
1041 * 1041 *
1042 * Can sleep, returns nothing. 1042 * Can sleep, returns nothing.
1043 */ 1043 */
1044void acquire_console_sem(void) 1044void console_lock(void)
1045{ 1045{
1046 BUG_ON(in_interrupt()); 1046 BUG_ON(in_interrupt());
1047 down(&console_sem); 1047 down(&console_sem);
@@ -1050,21 +1050,29 @@ void acquire_console_sem(void)
1050 console_locked = 1; 1050 console_locked = 1;
1051 console_may_schedule = 1; 1051 console_may_schedule = 1;
1052} 1052}
1053EXPORT_SYMBOL(acquire_console_sem); 1053EXPORT_SYMBOL(console_lock);
1054 1054
1055int try_acquire_console_sem(void) 1055/**
1056 * console_trylock - try to lock the console system for exclusive use.
1057 *
1058 * Tried to acquire a lock which guarantees that the caller has
1059 * exclusive access to the console system and the console_drivers list.
1060 *
1061 * returns 1 on success, and 0 on failure to acquire the lock.
1062 */
1063int console_trylock(void)
1056{ 1064{
1057 if (down_trylock(&console_sem)) 1065 if (down_trylock(&console_sem))
1058 return -1; 1066 return 0;
1059 if (console_suspended) { 1067 if (console_suspended) {
1060 up(&console_sem); 1068 up(&console_sem);
1061 return -1; 1069 return 0;
1062 } 1070 }
1063 console_locked = 1; 1071 console_locked = 1;
1064 console_may_schedule = 0; 1072 console_may_schedule = 0;
1065 return 0; 1073 return 1;
1066} 1074}
1067EXPORT_SYMBOL(try_acquire_console_sem); 1075EXPORT_SYMBOL(console_trylock);
1068 1076
1069int is_console_locked(void) 1077int is_console_locked(void)
1070{ 1078{
@@ -1095,20 +1103,20 @@ void wake_up_klogd(void)
1095} 1103}
1096 1104
1097/** 1105/**
1098 * release_console_sem - unlock the console system 1106 * console_unlock - unlock the console system
1099 * 1107 *
1100 * Releases the semaphore which the caller holds on the console system 1108 * Releases the console_lock which the caller holds on the console system
1101 * and the console driver list. 1109 * and the console driver list.
1102 * 1110 *
1103 * While the semaphore was held, console output may have been buffered 1111 * While the console_lock was held, console output may have been buffered
1104 * by printk(). If this is the case, release_console_sem() emits 1112 * by printk(). If this is the case, console_unlock(); emits
1105 * the output prior to releasing the semaphore. 1113 * the output prior to releasing the lock.
1106 * 1114 *
1107 * If there is output waiting for klogd, we wake it up. 1115 * If there is output waiting for klogd, we wake it up.
1108 * 1116 *
1109 * release_console_sem() may be called from any context. 1117 * console_unlock(); may be called from any context.
1110 */ 1118 */
1111void release_console_sem(void) 1119void console_unlock(void)
1112{ 1120{
1113 unsigned long flags; 1121 unsigned long flags;
1114 unsigned _con_start, _log_end; 1122 unsigned _con_start, _log_end;
@@ -1141,7 +1149,7 @@ void release_console_sem(void)
1141 if (wake_klogd) 1149 if (wake_klogd)
1142 wake_up_klogd(); 1150 wake_up_klogd();
1143} 1151}
1144EXPORT_SYMBOL(release_console_sem); 1152EXPORT_SYMBOL(console_unlock);
1145 1153
1146/** 1154/**
1147 * console_conditional_schedule - yield the CPU if required 1155 * console_conditional_schedule - yield the CPU if required
@@ -1150,7 +1158,7 @@ EXPORT_SYMBOL(release_console_sem);
1150 * if this CPU should yield the CPU to another task, do 1158 * if this CPU should yield the CPU to another task, do
1151 * so here. 1159 * so here.
1152 * 1160 *
1153 * Must be called within acquire_console_sem(). 1161 * Must be called within console_lock();.
1154 */ 1162 */
1155void __sched console_conditional_schedule(void) 1163void __sched console_conditional_schedule(void)
1156{ 1164{
@@ -1171,14 +1179,14 @@ void console_unblank(void)
1171 if (down_trylock(&console_sem) != 0) 1179 if (down_trylock(&console_sem) != 0)
1172 return; 1180 return;
1173 } else 1181 } else
1174 acquire_console_sem(); 1182 console_lock();
1175 1183
1176 console_locked = 1; 1184 console_locked = 1;
1177 console_may_schedule = 0; 1185 console_may_schedule = 0;
1178 for_each_console(c) 1186 for_each_console(c)
1179 if ((c->flags & CON_ENABLED) && c->unblank) 1187 if ((c->flags & CON_ENABLED) && c->unblank)
1180 c->unblank(); 1188 c->unblank();
1181 release_console_sem(); 1189 console_unlock();
1182} 1190}
1183 1191
1184/* 1192/*
@@ -1189,7 +1197,7 @@ struct tty_driver *console_device(int *index)
1189 struct console *c; 1197 struct console *c;
1190 struct tty_driver *driver = NULL; 1198 struct tty_driver *driver = NULL;
1191 1199
1192 acquire_console_sem(); 1200 console_lock();
1193 for_each_console(c) { 1201 for_each_console(c) {
1194 if (!c->device) 1202 if (!c->device)
1195 continue; 1203 continue;
@@ -1197,7 +1205,7 @@ struct tty_driver *console_device(int *index)
1197 if (driver) 1205 if (driver)
1198 break; 1206 break;
1199 } 1207 }
1200 release_console_sem(); 1208 console_unlock();
1201 return driver; 1209 return driver;
1202} 1210}
1203 1211
@@ -1208,17 +1216,17 @@ struct tty_driver *console_device(int *index)
1208 */ 1216 */
1209void console_stop(struct console *console) 1217void console_stop(struct console *console)
1210{ 1218{
1211 acquire_console_sem(); 1219 console_lock();
1212 console->flags &= ~CON_ENABLED; 1220 console->flags &= ~CON_ENABLED;
1213 release_console_sem(); 1221 console_unlock();
1214} 1222}
1215EXPORT_SYMBOL(console_stop); 1223EXPORT_SYMBOL(console_stop);
1216 1224
1217void console_start(struct console *console) 1225void console_start(struct console *console)
1218{ 1226{
1219 acquire_console_sem(); 1227 console_lock();
1220 console->flags |= CON_ENABLED; 1228 console->flags |= CON_ENABLED;
1221 release_console_sem(); 1229 console_unlock();
1222} 1230}
1223EXPORT_SYMBOL(console_start); 1231EXPORT_SYMBOL(console_start);
1224 1232
@@ -1340,7 +1348,7 @@ void register_console(struct console *newcon)
1340 * Put this console in the list - keep the 1348 * Put this console in the list - keep the
1341 * preferred driver at the head of the list. 1349 * preferred driver at the head of the list.
1342 */ 1350 */
1343 acquire_console_sem(); 1351 console_lock();
1344 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { 1352 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
1345 newcon->next = console_drivers; 1353 newcon->next = console_drivers;
1346 console_drivers = newcon; 1354 console_drivers = newcon;
@@ -1352,14 +1360,14 @@ void register_console(struct console *newcon)
1352 } 1360 }
1353 if (newcon->flags & CON_PRINTBUFFER) { 1361 if (newcon->flags & CON_PRINTBUFFER) {
1354 /* 1362 /*
1355 * release_console_sem() will print out the buffered messages 1363 * console_unlock(); will print out the buffered messages
1356 * for us. 1364 * for us.
1357 */ 1365 */
1358 spin_lock_irqsave(&logbuf_lock, flags); 1366 spin_lock_irqsave(&logbuf_lock, flags);
1359 con_start = log_start; 1367 con_start = log_start;
1360 spin_unlock_irqrestore(&logbuf_lock, flags); 1368 spin_unlock_irqrestore(&logbuf_lock, flags);
1361 } 1369 }
1362 release_console_sem(); 1370 console_unlock();
1363 console_sysfs_notify(); 1371 console_sysfs_notify();
1364 1372
1365 /* 1373 /*
@@ -1396,7 +1404,7 @@ int unregister_console(struct console *console)
1396 return braille_unregister_console(console); 1404 return braille_unregister_console(console);
1397#endif 1405#endif
1398 1406
1399 acquire_console_sem(); 1407 console_lock();
1400 if (console_drivers == console) { 1408 if (console_drivers == console) {
1401 console_drivers=console->next; 1409 console_drivers=console->next;
1402 res = 0; 1410 res = 0;
@@ -1418,7 +1426,7 @@ int unregister_console(struct console *console)
1418 if (console_drivers != NULL && console->flags & CON_CONSDEV) 1426 if (console_drivers != NULL && console->flags & CON_CONSDEV)
1419 console_drivers->flags |= CON_CONSDEV; 1427 console_drivers->flags |= CON_CONSDEV;
1420 1428
1421 release_console_sem(); 1429 console_unlock();
1422 console_sysfs_notify(); 1430 console_sysfs_notify();
1423 return res; 1431 return res;
1424} 1432}
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index ddabb54bb5c8..3c7cbc2c33be 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
215 put_pid(waiter->deadlock_task_pid); 215 put_pid(waiter->deadlock_task_pid);
216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); 216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); 217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
218 TRACE_WARN_ON(waiter->task);
219 memset(waiter, 0x22, sizeof(*waiter)); 218 memset(waiter, 0x22, sizeof(*waiter));
220} 219}
221 220
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786a..ab449117aaf2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -20,41 +20,34 @@
20/* 20/*
21 * lock->owner state tracking: 21 * lock->owner state tracking:
22 * 22 *
23 * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1 23 * lock->owner holds the task_struct pointer of the owner. Bit 0
24 * are used to keep track of the "owner is pending" and "lock has 24 * is used to keep track of the "lock has waiters" state.
25 * waiters" state.
26 * 25 *
27 * owner bit1 bit0 26 * owner bit0
28 * NULL 0 0 lock is free (fast acquire possible) 27 * NULL 0 lock is free (fast acquire possible)
29 * NULL 0 1 invalid state 28 * NULL 1 lock is free and has waiters and the top waiter
30 * NULL 1 0 Transitional State* 29 * is going to take the lock*
31 * NULL 1 1 invalid state 30 * taskpointer 0 lock is held (fast release possible)
32 * taskpointer 0 0 lock is held (fast release possible) 31 * taskpointer 1 lock is held and has waiters**
33 * taskpointer 0 1 task is pending owner
34 * taskpointer 1 0 lock is held and has waiters
35 * taskpointer 1 1 task is pending owner and lock has more waiters
36 *
37 * Pending ownership is assigned to the top (highest priority)
38 * waiter of the lock, when the lock is released. The thread is woken
39 * up and can now take the lock. Until the lock is taken (bit 0
40 * cleared) a competing higher priority thread can steal the lock
41 * which puts the woken up thread back on the waiters list.
42 * 32 *
43 * The fast atomic compare exchange based acquire and release is only 33 * The fast atomic compare exchange based acquire and release is only
44 * possible when bit 0 and 1 of lock->owner are 0. 34 * possible when bit 0 of lock->owner is 0.
35 *
36 * (*) It also can be a transitional state when grabbing the lock
37 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
38 * we need to set the bit0 before looking at the lock, and the owner may be
39 * NULL in this small time, hence this can be a transitional state.
45 * 40 *
46 * (*) There's a small time where the owner can be NULL and the 41 * (**) There is a small time when bit 0 is set but there are no
47 * "lock has waiters" bit is set. This can happen when grabbing the lock. 42 * waiters. This can happen when grabbing the lock in the slow path.
48 * To prevent a cmpxchg of the owner releasing the lock, we need to set this 43 * To prevent a cmpxchg of the owner releasing the lock, we need to
49 * bit before looking at the lock, hence the reason this is a transitional 44 * set this bit before looking at the lock.
50 * state.
51 */ 45 */
52 46
53static void 47static void
54rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, 48rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
55 unsigned long mask)
56{ 49{
57 unsigned long val = (unsigned long)owner | mask; 50 unsigned long val = (unsigned long)owner;
58 51
59 if (rt_mutex_has_waiters(lock)) 52 if (rt_mutex_has_waiters(lock))
60 val |= RT_MUTEX_HAS_WAITERS; 53 val |= RT_MUTEX_HAS_WAITERS;
@@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
203 * reached or the state of the chain has changed while we 196 * reached or the state of the chain has changed while we
204 * dropped the locks. 197 * dropped the locks.
205 */ 198 */
206 if (!waiter || !waiter->task) 199 if (!waiter)
207 goto out_unlock_pi; 200 goto out_unlock_pi;
208 201
209 /* 202 /*
210 * Check the orig_waiter state. After we dropped the locks, 203 * Check the orig_waiter state. After we dropped the locks,
211 * the previous owner of the lock might have released the lock 204 * the previous owner of the lock might have released the lock.
212 * and made us the pending owner:
213 */ 205 */
214 if (orig_waiter && !orig_waiter->task) 206 if (orig_waiter && !rt_mutex_owner(orig_lock))
215 goto out_unlock_pi; 207 goto out_unlock_pi;
216 208
217 /* 209 /*
@@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
254 246
255 /* Release the task */ 247 /* Release the task */
256 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 248 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
249 if (!rt_mutex_owner(lock)) {
250 /*
251 * If the requeue above changed the top waiter, then we need
252 * to wake the new top waiter up to try to get the lock.
253 */
254
255 if (top_waiter != rt_mutex_top_waiter(lock))
256 wake_up_process(rt_mutex_top_waiter(lock)->task);
257 raw_spin_unlock(&lock->wait_lock);
258 goto out_put_task;
259 }
257 put_task_struct(task); 260 put_task_struct(task);
258 261
259 /* Grab the next task */ 262 /* Grab the next task */
@@ -296,78 +299,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
296} 299}
297 300
298/* 301/*
299 * Optimization: check if we can steal the lock from the
300 * assigned pending owner [which might not have taken the
301 * lock yet]:
302 */
303static inline int try_to_steal_lock(struct rt_mutex *lock,
304 struct task_struct *task)
305{
306 struct task_struct *pendowner = rt_mutex_owner(lock);
307 struct rt_mutex_waiter *next;
308 unsigned long flags;
309
310 if (!rt_mutex_owner_pending(lock))
311 return 0;
312
313 if (pendowner == task)
314 return 1;
315
316 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
317 if (task->prio >= pendowner->prio) {
318 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
319 return 0;
320 }
321
322 /*
323 * Check if a waiter is enqueued on the pending owners
324 * pi_waiters list. Remove it and readjust pending owners
325 * priority.
326 */
327 if (likely(!rt_mutex_has_waiters(lock))) {
328 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
329 return 1;
330 }
331
332 /* No chain handling, pending owner is not blocked on anything: */
333 next = rt_mutex_top_waiter(lock);
334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
335 __rt_mutex_adjust_prio(pendowner);
336 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
337
338 /*
339 * We are going to steal the lock and a waiter was
340 * enqueued on the pending owners pi_waiters queue. So
341 * we have to enqueue this waiter into
342 * task->pi_waiters list. This covers the case,
343 * where task is boosted because it holds another
344 * lock and gets unboosted because the booster is
345 * interrupted, so we would delay a waiter with higher
346 * priority as task->normal_prio.
347 *
348 * Note: in the rare case of a SCHED_OTHER task changing
349 * its priority and thus stealing the lock, next->task
350 * might be task:
351 */
352 if (likely(next->task != task)) {
353 raw_spin_lock_irqsave(&task->pi_lock, flags);
354 plist_add(&next->pi_list_entry, &task->pi_waiters);
355 __rt_mutex_adjust_prio(task);
356 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
357 }
358 return 1;
359}
360
361/*
362 * Try to take an rt-mutex 302 * Try to take an rt-mutex
363 * 303 *
364 * This fails
365 * - when the lock has a real owner
366 * - when a different pending owner exists and has higher priority than current
367 *
368 * Must be called with lock->wait_lock held. 304 * Must be called with lock->wait_lock held.
305 *
306 * @lock: the lock to be acquired.
307 * @task: the task which wants to acquire the lock
308 * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
369 */ 309 */
370static int try_to_take_rt_mutex(struct rt_mutex *lock) 310static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
311 struct rt_mutex_waiter *waiter)
371{ 312{
372 /* 313 /*
373 * We have to be careful here if the atomic speedups are 314 * We have to be careful here if the atomic speedups are
@@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
390 */ 331 */
391 mark_rt_mutex_waiters(lock); 332 mark_rt_mutex_waiters(lock);
392 333
393 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) 334 if (rt_mutex_owner(lock))
394 return 0; 335 return 0;
395 336
337 /*
338 * It will get the lock because of one of these conditions:
339 * 1) there is no waiter
340 * 2) higher priority than waiters
341 * 3) it is top waiter
342 */
343 if (rt_mutex_has_waiters(lock)) {
344 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
345 if (!waiter || waiter != rt_mutex_top_waiter(lock))
346 return 0;
347 }
348 }
349
350 if (waiter || rt_mutex_has_waiters(lock)) {
351 unsigned long flags;
352 struct rt_mutex_waiter *top;
353
354 raw_spin_lock_irqsave(&task->pi_lock, flags);
355
356 /* remove the queued waiter. */
357 if (waiter) {
358 plist_del(&waiter->list_entry, &lock->wait_list);
359 task->pi_blocked_on = NULL;
360 }
361
362 /*
363 * We have to enqueue the top waiter(if it exists) into
364 * task->pi_waiters list.
365 */
366 if (rt_mutex_has_waiters(lock)) {
367 top = rt_mutex_top_waiter(lock);
368 top->pi_list_entry.prio = top->list_entry.prio;
369 plist_add(&top->pi_list_entry, &task->pi_waiters);
370 }
371 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
372 }
373
396 /* We got the lock. */ 374 /* We got the lock. */
397 debug_rt_mutex_lock(lock); 375 debug_rt_mutex_lock(lock);
398 376
399 rt_mutex_set_owner(lock, current, 0); 377 rt_mutex_set_owner(lock, task);
400 378
401 rt_mutex_deadlock_account_lock(lock, current); 379 rt_mutex_deadlock_account_lock(lock, task);
402 380
403 return 1; 381 return 1;
404} 382}
@@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
436 414
437 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 415 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
438 416
417 if (!owner)
418 return 0;
419
439 if (waiter == rt_mutex_top_waiter(lock)) { 420 if (waiter == rt_mutex_top_waiter(lock)) {
440 raw_spin_lock_irqsave(&owner->pi_lock, flags); 421 raw_spin_lock_irqsave(&owner->pi_lock, flags);
441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 422 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
472/* 453/*
473 * Wake up the next waiter on the lock. 454 * Wake up the next waiter on the lock.
474 * 455 *
475 * Remove the top waiter from the current tasks waiter list and from 456 * Remove the top waiter from the current tasks waiter list and wake it up.
476 * the lock waiter list. Set it as pending owner. Then wake it up.
477 * 457 *
478 * Called with lock->wait_lock held. 458 * Called with lock->wait_lock held.
479 */ 459 */
480static void wakeup_next_waiter(struct rt_mutex *lock) 460static void wakeup_next_waiter(struct rt_mutex *lock)
481{ 461{
482 struct rt_mutex_waiter *waiter; 462 struct rt_mutex_waiter *waiter;
483 struct task_struct *pendowner;
484 unsigned long flags; 463 unsigned long flags;
485 464
486 raw_spin_lock_irqsave(&current->pi_lock, flags); 465 raw_spin_lock_irqsave(&current->pi_lock, flags);
487 466
488 waiter = rt_mutex_top_waiter(lock); 467 waiter = rt_mutex_top_waiter(lock);
489 plist_del(&waiter->list_entry, &lock->wait_list);
490 468
491 /* 469 /*
492 * Remove it from current->pi_waiters. We do not adjust a 470 * Remove it from current->pi_waiters. We do not adjust a
@@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
495 * lock->wait_lock. 473 * lock->wait_lock.
496 */ 474 */
497 plist_del(&waiter->pi_list_entry, &current->pi_waiters); 475 plist_del(&waiter->pi_list_entry, &current->pi_waiters);
498 pendowner = waiter->task;
499 waiter->task = NULL;
500 476
501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); 477 rt_mutex_set_owner(lock, NULL);
502 478
503 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 479 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
504 480
505 /* 481 wake_up_process(waiter->task);
506 * Clear the pi_blocked_on variable and enqueue a possible
507 * waiter into the pi_waiters list of the pending owner. This
508 * prevents that in case the pending owner gets unboosted a
509 * waiter with higher priority than pending-owner->normal_prio
510 * is blocked on the unboosted (pending) owner.
511 */
512 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
513
514 WARN_ON(!pendowner->pi_blocked_on);
515 WARN_ON(pendowner->pi_blocked_on != waiter);
516 WARN_ON(pendowner->pi_blocked_on->lock != lock);
517
518 pendowner->pi_blocked_on = NULL;
519
520 if (rt_mutex_has_waiters(lock)) {
521 struct rt_mutex_waiter *next;
522
523 next = rt_mutex_top_waiter(lock);
524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
525 }
526 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
527
528 wake_up_process(pendowner);
529} 482}
530 483
531/* 484/*
532 * Remove a waiter from a lock 485 * Remove a waiter from a lock and give up
533 * 486 *
534 * Must be called with lock->wait_lock held 487 * Must be called with lock->wait_lock held and
488 * have just failed to try_to_take_rt_mutex().
535 */ 489 */
536static void remove_waiter(struct rt_mutex *lock, 490static void remove_waiter(struct rt_mutex *lock,
537 struct rt_mutex_waiter *waiter) 491 struct rt_mutex_waiter *waiter)
@@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock,
543 497
544 raw_spin_lock_irqsave(&current->pi_lock, flags); 498 raw_spin_lock_irqsave(&current->pi_lock, flags);
545 plist_del(&waiter->list_entry, &lock->wait_list); 499 plist_del(&waiter->list_entry, &lock->wait_list);
546 waiter->task = NULL;
547 current->pi_blocked_on = NULL; 500 current->pi_blocked_on = NULL;
548 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 501 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
549 502
550 if (first && owner != current) { 503 if (!owner)
504 return;
505
506 if (first) {
551 507
552 raw_spin_lock_irqsave(&owner->pi_lock, flags); 508 raw_spin_lock_irqsave(&owner->pi_lock, flags);
553 509
@@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task)
614 * or TASK_UNINTERRUPTIBLE) 570 * or TASK_UNINTERRUPTIBLE)
615 * @timeout: the pre-initialized and started timer, or NULL for none 571 * @timeout: the pre-initialized and started timer, or NULL for none
616 * @waiter: the pre-initialized rt_mutex_waiter 572 * @waiter: the pre-initialized rt_mutex_waiter
617 * @detect_deadlock: passed to task_blocks_on_rt_mutex
618 * 573 *
619 * lock->wait_lock must be held by the caller. 574 * lock->wait_lock must be held by the caller.
620 */ 575 */
621static int __sched 576static int __sched
622__rt_mutex_slowlock(struct rt_mutex *lock, int state, 577__rt_mutex_slowlock(struct rt_mutex *lock, int state,
623 struct hrtimer_sleeper *timeout, 578 struct hrtimer_sleeper *timeout,
624 struct rt_mutex_waiter *waiter, 579 struct rt_mutex_waiter *waiter)
625 int detect_deadlock)
626{ 580{
627 int ret = 0; 581 int ret = 0;
628 582
629 for (;;) { 583 for (;;) {
630 /* Try to acquire the lock: */ 584 /* Try to acquire the lock: */
631 if (try_to_take_rt_mutex(lock)) 585 if (try_to_take_rt_mutex(lock, current, waiter))
632 break; 586 break;
633 587
634 /* 588 /*
@@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
645 break; 599 break;
646 } 600 }
647 601
648 /*
649 * waiter->task is NULL the first time we come here and
650 * when we have been woken up by the previous owner
651 * but the lock got stolen by a higher prio task.
652 */
653 if (!waiter->task) {
654 ret = task_blocks_on_rt_mutex(lock, waiter, current,
655 detect_deadlock);
656 /*
657 * If we got woken up by the owner then start loop
658 * all over without going into schedule to try
659 * to get the lock now:
660 */
661 if (unlikely(!waiter->task)) {
662 /*
663 * Reset the return value. We might
664 * have returned with -EDEADLK and the
665 * owner released the lock while we
666 * were walking the pi chain.
667 */
668 ret = 0;
669 continue;
670 }
671 if (unlikely(ret))
672 break;
673 }
674
675 raw_spin_unlock(&lock->wait_lock); 602 raw_spin_unlock(&lock->wait_lock);
676 603
677 debug_rt_mutex_print_deadlock(waiter); 604 debug_rt_mutex_print_deadlock(waiter);
678 605
679 if (waiter->task) 606 schedule_rt_mutex(lock);
680 schedule_rt_mutex(lock);
681 607
682 raw_spin_lock(&lock->wait_lock); 608 raw_spin_lock(&lock->wait_lock);
683 set_current_state(state); 609 set_current_state(state);
@@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
698 int ret = 0; 624 int ret = 0;
699 625
700 debug_rt_mutex_init_waiter(&waiter); 626 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL;
702 627
703 raw_spin_lock(&lock->wait_lock); 628 raw_spin_lock(&lock->wait_lock);
704 629
705 /* Try to acquire the lock again: */ 630 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) { 631 if (try_to_take_rt_mutex(lock, current, NULL)) {
707 raw_spin_unlock(&lock->wait_lock); 632 raw_spin_unlock(&lock->wait_lock);
708 return 0; 633 return 0;
709 } 634 }
@@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
717 timeout->task = NULL; 642 timeout->task = NULL;
718 } 643 }
719 644
720 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, 645 ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
721 detect_deadlock); 646
647 if (likely(!ret))
648 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
722 649
723 set_current_state(TASK_RUNNING); 650 set_current_state(TASK_RUNNING);
724 651
725 if (unlikely(waiter.task)) 652 if (unlikely(ret))
726 remove_waiter(lock, &waiter); 653 remove_waiter(lock, &waiter);
727 654
728 /* 655 /*
@@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
737 if (unlikely(timeout)) 664 if (unlikely(timeout))
738 hrtimer_cancel(&timeout->timer); 665 hrtimer_cancel(&timeout->timer);
739 666
740 /*
741 * Readjust priority, when we did not get the lock. We might
742 * have been the pending owner and boosted. Since we did not
743 * take the lock, the PI boost has to go.
744 */
745 if (unlikely(ret))
746 rt_mutex_adjust_prio(current);
747
748 debug_rt_mutex_free_waiter(&waiter); 667 debug_rt_mutex_free_waiter(&waiter);
749 668
750 return ret; 669 return ret;
@@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
762 681
763 if (likely(rt_mutex_owner(lock) != current)) { 682 if (likely(rt_mutex_owner(lock) != current)) {
764 683
765 ret = try_to_take_rt_mutex(lock); 684 ret = try_to_take_rt_mutex(lock, current, NULL);
766 /* 685 /*
767 * try_to_take_rt_mutex() sets the lock waiters 686 * try_to_take_rt_mutex() sets the lock waiters
768 * bit unconditionally. Clean this up. 687 * bit unconditionally. Clean this up.
@@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
992{ 911{
993 __rt_mutex_init(lock, NULL); 912 __rt_mutex_init(lock, NULL);
994 debug_rt_mutex_proxy_lock(lock, proxy_owner); 913 debug_rt_mutex_proxy_lock(lock, proxy_owner);
995 rt_mutex_set_owner(lock, proxy_owner, 0); 914 rt_mutex_set_owner(lock, proxy_owner);
996 rt_mutex_deadlock_account_lock(lock, proxy_owner); 915 rt_mutex_deadlock_account_lock(lock, proxy_owner);
997} 916}
998 917
@@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1008 struct task_struct *proxy_owner) 927 struct task_struct *proxy_owner)
1009{ 928{
1010 debug_rt_mutex_proxy_unlock(lock); 929 debug_rt_mutex_proxy_unlock(lock);
1011 rt_mutex_set_owner(lock, NULL, 0); 930 rt_mutex_set_owner(lock, NULL);
1012 rt_mutex_deadlock_account_unlock(proxy_owner); 931 rt_mutex_deadlock_account_unlock(proxy_owner);
1013} 932}
1014 933
@@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1034 953
1035 raw_spin_lock(&lock->wait_lock); 954 raw_spin_lock(&lock->wait_lock);
1036 955
1037 mark_rt_mutex_waiters(lock); 956 if (try_to_take_rt_mutex(lock, task, NULL)) {
1038
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock);
1042 rt_mutex_set_owner(lock, task, 0);
1043 raw_spin_unlock(&lock->wait_lock); 957 raw_spin_unlock(&lock->wait_lock);
1044 rt_mutex_deadlock_account_lock(lock, task);
1045 return 1; 958 return 1;
1046 } 959 }
1047 960
1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 961 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1049 962
1050 if (ret && !waiter->task) { 963 if (ret && !rt_mutex_owner(lock)) {
1051 /* 964 /*
1052 * Reset the return value. We might have 965 * Reset the return value. We might have
1053 * returned with -EDEADLK and the owner 966 * returned with -EDEADLK and the owner
@@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1056 */ 969 */
1057 ret = 0; 970 ret = 0;
1058 } 971 }
972
973 if (unlikely(ret))
974 remove_waiter(lock, waiter);
975
1059 raw_spin_unlock(&lock->wait_lock); 976 raw_spin_unlock(&lock->wait_lock);
1060 977
1061 debug_rt_mutex_print_deadlock(waiter); 978 debug_rt_mutex_print_deadlock(waiter);
@@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1110 1027
1111 set_current_state(TASK_INTERRUPTIBLE); 1028 set_current_state(TASK_INTERRUPTIBLE);
1112 1029
1113 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, 1030 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1114 detect_deadlock);
1115 1031
1116 set_current_state(TASK_RUNNING); 1032 set_current_state(TASK_RUNNING);
1117 1033
1118 if (unlikely(waiter->task)) 1034 if (unlikely(ret))
1119 remove_waiter(lock, waiter); 1035 remove_waiter(lock, waiter);
1120 1036
1121 /* 1037 /*
@@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1126 1042
1127 raw_spin_unlock(&lock->wait_lock); 1043 raw_spin_unlock(&lock->wait_lock);
1128 1044
1129 /*
1130 * Readjust priority, when we did not get the lock. We might have been
1131 * the pending owner and boosted. Since we did not take the lock, the
1132 * PI boost has to go.
1133 */
1134 if (unlikely(ret))
1135 rt_mutex_adjust_prio(current);
1136
1137 return ret; 1045 return ret;
1138} 1046}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 97a2f81866af..53a66c85261b 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p)
91/* 91/*
92 * lock->owner state tracking: 92 * lock->owner state tracking:
93 */ 93 */
94#define RT_MUTEX_OWNER_PENDING 1UL 94#define RT_MUTEX_HAS_WAITERS 1UL
95#define RT_MUTEX_HAS_WAITERS 2UL 95#define RT_MUTEX_OWNER_MASKALL 1UL
96#define RT_MUTEX_OWNER_MASKALL 3UL
97 96
98static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) 97static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
99{ 98{
@@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
101 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); 100 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
102} 101}
103 102
104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
105{
106 return (struct task_struct *)
107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
108}
109
110static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
111{
112 return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
113}
114
115/* 103/*
116 * PI-futex support (proxy locking functions, etc.): 104 * PI-futex support (proxy locking functions, etc.):
117 */ 105 */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 77e9166d7bbf..354769979c02 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
699 cfs_rq->nr_running--; 699 cfs_rq->nr_running--;
700} 700}
701 701
702#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED 702#ifdef CONFIG_FAIR_GROUP_SCHED
703# ifdef CONFIG_SMP
703static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, 704static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
704 int global_update) 705 int global_update)
705{ 706{
@@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
762 list_del_leaf_cfs_rq(cfs_rq); 763 list_del_leaf_cfs_rq(cfs_rq);
763} 764}
764 765
766static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
767 long weight_delta)
768{
769 long load_weight, load, shares;
770
771 load = cfs_rq->load.weight + weight_delta;
772
773 load_weight = atomic_read(&tg->load_weight);
774 load_weight -= cfs_rq->load_contribution;
775 load_weight += load;
776
777 shares = (tg->shares * load);
778 if (load_weight)
779 shares /= load_weight;
780
781 if (shares < MIN_SHARES)
782 shares = MIN_SHARES;
783 if (shares > tg->shares)
784 shares = tg->shares;
785
786 return shares;
787}
788
789static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
790{
791 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
792 update_cfs_load(cfs_rq, 0);
793 update_cfs_shares(cfs_rq, 0);
794 }
795}
796# else /* CONFIG_SMP */
797static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
798{
799}
800
801static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
802 long weight_delta)
803{
804 return tg->shares;
805}
806
807static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
808{
809}
810# endif /* CONFIG_SMP */
765static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, 811static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
766 unsigned long weight) 812 unsigned long weight)
767{ 813{
@@ -782,7 +828,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
782{ 828{
783 struct task_group *tg; 829 struct task_group *tg;
784 struct sched_entity *se; 830 struct sched_entity *se;
785 long load_weight, load, shares; 831 long shares;
786 832
787 if (!cfs_rq) 833 if (!cfs_rq)
788 return; 834 return;
@@ -791,32 +837,14 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
791 se = tg->se[cpu_of(rq_of(cfs_rq))]; 837 se = tg->se[cpu_of(rq_of(cfs_rq))];
792 if (!se) 838 if (!se)
793 return; 839 return;
794 840#ifndef CONFIG_SMP
795 load = cfs_rq->load.weight + weight_delta; 841 if (likely(se->load.weight == tg->shares))
796 842 return;
797 load_weight = atomic_read(&tg->load_weight); 843#endif
798 load_weight -= cfs_rq->load_contribution; 844 shares = calc_cfs_shares(cfs_rq, tg, weight_delta);
799 load_weight += load;
800
801 shares = (tg->shares * load);
802 if (load_weight)
803 shares /= load_weight;
804
805 if (shares < MIN_SHARES)
806 shares = MIN_SHARES;
807 if (shares > tg->shares)
808 shares = tg->shares;
809 845
810 reweight_entity(cfs_rq_of(se), se, shares); 846 reweight_entity(cfs_rq_of(se), se, shares);
811} 847}
812
813static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
814{
815 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
816 update_cfs_load(cfs_rq, 0);
817 update_cfs_shares(cfs_rq, 0);
818 }
819}
820#else /* CONFIG_FAIR_GROUP_SCHED */ 848#else /* CONFIG_FAIR_GROUP_SCHED */
821static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) 849static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
822{ 850{
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bc86bb32e126..0f1bd83db985 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -170,7 +170,8 @@ static int proc_taint(struct ctl_table *table, int write,
170#endif 170#endif
171 171
172#ifdef CONFIG_MAGIC_SYSRQ 172#ifdef CONFIG_MAGIC_SYSRQ
173static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ 173/* Note: sysrq code uses it's own private copy */
174static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
174 175
175static int sysrq_sysctl_handler(ctl_table *table, int write, 176static int sysrq_sysctl_handler(ctl_table *table, int write,
176 void __user *buffer, size_t *lenp, 177 void __user *buffer, size_t *lenp,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3e216e01bbd1..c55ea2433471 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void)
642 } 642 }
643 local_irq_enable(); 643 local_irq_enable();
644 644
645 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", 645 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
646 smp_processor_id());
647} 646}
648 647
649/* 648/*
@@ -795,8 +794,10 @@ void tick_setup_sched_timer(void)
795 } 794 }
796 795
797#ifdef CONFIG_NO_HZ 796#ifdef CONFIG_NO_HZ
798 if (tick_nohz_enabled) 797 if (tick_nohz_enabled) {
799 ts->nohz_mode = NOHZ_MODE_HIGHRES; 798 ts->nohz_mode = NOHZ_MODE_HIGHRES;
799 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
800 }
800#endif 801#endif
801} 802}
802#endif /* HIGH_RES_TIMERS */ 803#endif /* HIGH_RES_TIMERS */