aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/exit.c31
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/futex.c269
-rw-r--r--kernel/futex_compat.c9
-rw-r--r--kernel/irq/spurious.c46
-rw-r--r--kernel/kallsyms.c3
-rw-r--r--kernel/kthread.c7
-rw-r--r--kernel/power/disk.c3
-rw-r--r--kernel/power/main.c19
-rw-r--r--kernel/power/process.c57
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/rtmutex.c24
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/signal.c38
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/clocksource.c10
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/tick-broadcast.c17
-rw-r--r--kernel/time/tick-sched.c28
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/time/timer_stats.c44
-rw-r--r--kernel/timer.c12
-rw-r--r--kernel/workqueue.c84
25 files changed, 457 insertions, 268 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 6c61263ff96d..74cc0fc6bb81 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -311,6 +311,7 @@ int audit_match_class(int class, unsigned syscall)
311 return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall); 311 return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall);
312} 312}
313 313
314#ifdef CONFIG_AUDITSYSCALL
314static inline int audit_match_class_bits(int class, u32 *mask) 315static inline int audit_match_class_bits(int class, u32 *mask)
315{ 316{
316 int i; 317 int i;
@@ -347,6 +348,7 @@ static int audit_match_signal(struct audit_entry *entry)
347 return 1; 348 return 1;
348 } 349 }
349} 350}
351#endif
350 352
351/* Common user-space to kernel rule translation. */ 353/* Common user-space to kernel rule translation. */
352static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) 354static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
diff --git a/kernel/exit.c b/kernel/exit.c
index c6d14b8008dd..5c8ecbaa19a5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -762,11 +762,8 @@ static void exit_notify(struct task_struct *tsk)
762 read_lock(&tasklist_lock); 762 read_lock(&tasklist_lock);
763 spin_lock_irq(&tsk->sighand->siglock); 763 spin_lock_irq(&tsk->sighand->siglock);
764 for (t = next_thread(tsk); t != tsk; t = next_thread(t)) 764 for (t = next_thread(tsk); t != tsk; t = next_thread(t))
765 if (!signal_pending(t) && !(t->flags & PF_EXITING)) { 765 if (!signal_pending(t) && !(t->flags & PF_EXITING))
766 recalc_sigpending_tsk(t); 766 recalc_sigpending_and_wake(t);
767 if (signal_pending(t))
768 signal_wake_up(t, 0);
769 }
770 spin_unlock_irq(&tsk->sighand->siglock); 767 spin_unlock_irq(&tsk->sighand->siglock);
771 read_unlock(&tasklist_lock); 768 read_unlock(&tasklist_lock);
772 } 769 }
@@ -895,13 +892,29 @@ fastcall NORET_TYPE void do_exit(long code)
895 if (unlikely(tsk->flags & PF_EXITING)) { 892 if (unlikely(tsk->flags & PF_EXITING)) {
896 printk(KERN_ALERT 893 printk(KERN_ALERT
897 "Fixing recursive fault but reboot is needed!\n"); 894 "Fixing recursive fault but reboot is needed!\n");
895 /*
896 * We can do this unlocked here. The futex code uses
897 * this flag just to verify whether the pi state
898 * cleanup has been done or not. In the worst case it
899 * loops once more. We pretend that the cleanup was
900 * done as there is no way to return. Either the
901 * OWNER_DIED bit is set by now or we push the blocked
902 * task into the wait for ever nirwana as well.
903 */
904 tsk->flags |= PF_EXITPIDONE;
898 if (tsk->io_context) 905 if (tsk->io_context)
899 exit_io_context(); 906 exit_io_context();
900 set_current_state(TASK_UNINTERRUPTIBLE); 907 set_current_state(TASK_UNINTERRUPTIBLE);
901 schedule(); 908 schedule();
902 } 909 }
903 910
911 /*
912 * tsk->flags are checked in the futex code to protect against
913 * an exiting task cleaning up the robust pi futexes.
914 */
915 spin_lock_irq(&tsk->pi_lock);
904 tsk->flags |= PF_EXITING; 916 tsk->flags |= PF_EXITING;
917 spin_unlock_irq(&tsk->pi_lock);
905 918
906 if (unlikely(in_atomic())) 919 if (unlikely(in_atomic()))
907 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 920 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -915,7 +928,7 @@ fastcall NORET_TYPE void do_exit(long code)
915 } 928 }
916 group_dead = atomic_dec_and_test(&tsk->signal->live); 929 group_dead = atomic_dec_and_test(&tsk->signal->live);
917 if (group_dead) { 930 if (group_dead) {
918 hrtimer_cancel(&tsk->signal->real_timer); 931 hrtimer_cancel(&tsk->signal->real_timer);
919 exit_itimers(tsk->signal); 932 exit_itimers(tsk->signal);
920 } 933 }
921 acct_collect(code, group_dead); 934 acct_collect(code, group_dead);
@@ -968,6 +981,12 @@ fastcall NORET_TYPE void do_exit(long code)
968 * Make sure we are holding no locks: 981 * Make sure we are holding no locks:
969 */ 982 */
970 debug_check_no_locks_held(tsk); 983 debug_check_no_locks_held(tsk);
984 /*
985 * We can do this unlocked here. The futex code uses this flag
986 * just to verify whether the pi state cleanup has been done
987 * or not. In the worst case it loops once more.
988 */
989 tsk->flags |= PF_EXITPIDONE;
971 990
972 if (tsk->io_context) 991 if (tsk->io_context)
973 exit_io_context(); 992 exit_io_context();
diff --git a/kernel/fork.c b/kernel/fork.c
index 49530e40ea8b..73ad5cda1bcd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -45,6 +45,7 @@
45#include <linux/acct.h> 45#include <linux/acct.h>
46#include <linux/tsacct_kern.h> 46#include <linux/tsacct_kern.h>
47#include <linux/cn_proc.h> 47#include <linux/cn_proc.h>
48#include <linux/freezer.h>
48#include <linux/delayacct.h> 49#include <linux/delayacct.h>
49#include <linux/taskstats_kern.h> 50#include <linux/taskstats_kern.h>
50#include <linux/random.h> 51#include <linux/random.h>
@@ -1405,7 +1406,9 @@ long do_fork(unsigned long clone_flags,
1405 } 1406 }
1406 1407
1407 if (clone_flags & CLONE_VFORK) { 1408 if (clone_flags & CLONE_VFORK) {
1409 freezer_do_not_count();
1408 wait_for_completion(&vfork); 1410 wait_for_completion(&vfork);
1411 freezer_count();
1409 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { 1412 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
1410 current->ptrace_message = nr; 1413 current->ptrace_message = nr;
1411 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 1414 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
@@ -1427,10 +1430,8 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep,
1427{ 1430{
1428 struct sighand_struct *sighand = data; 1431 struct sighand_struct *sighand = data;
1429 1432
1430 if (flags & SLAB_CTOR_CONSTRUCTOR) { 1433 spin_lock_init(&sighand->siglock);
1431 spin_lock_init(&sighand->siglock); 1434 INIT_LIST_HEAD(&sighand->signalfd_list);
1432 INIT_LIST_HEAD(&sighand->signalfd_list);
1433 }
1434} 1435}
1435 1436
1436void __init proc_caches_init(void) 1437void __init proc_caches_init(void)
diff --git a/kernel/futex.c b/kernel/futex.c
index b7ce15c67e32..3b7f7713d9a4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -430,10 +430,6 @@ static struct task_struct * futex_find_get_task(pid_t pid)
430 p = NULL; 430 p = NULL;
431 goto out_unlock; 431 goto out_unlock;
432 } 432 }
433 if (p->exit_state != 0) {
434 p = NULL;
435 goto out_unlock;
436 }
437 get_task_struct(p); 433 get_task_struct(p);
438out_unlock: 434out_unlock:
439 rcu_read_unlock(); 435 rcu_read_unlock();
@@ -502,7 +498,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
502 struct futex_q *this, *next; 498 struct futex_q *this, *next;
503 struct plist_head *head; 499 struct plist_head *head;
504 struct task_struct *p; 500 struct task_struct *p;
505 pid_t pid; 501 pid_t pid = uval & FUTEX_TID_MASK;
506 502
507 head = &hb->chain; 503 head = &hb->chain;
508 504
@@ -520,6 +516,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
520 return -EINVAL; 516 return -EINVAL;
521 517
522 WARN_ON(!atomic_read(&pi_state->refcount)); 518 WARN_ON(!atomic_read(&pi_state->refcount));
519 WARN_ON(pid && pi_state->owner &&
520 pi_state->owner->pid != pid);
523 521
524 atomic_inc(&pi_state->refcount); 522 atomic_inc(&pi_state->refcount);
525 *ps = pi_state; 523 *ps = pi_state;
@@ -530,15 +528,33 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
530 528
531 /* 529 /*
532 * We are the first waiter - try to look up the real owner and attach 530 * We are the first waiter - try to look up the real owner and attach
533 * the new pi_state to it, but bail out when the owner died bit is set 531 * the new pi_state to it, but bail out when TID = 0
534 * and TID = 0:
535 */ 532 */
536 pid = uval & FUTEX_TID_MASK; 533 if (!pid)
537 if (!pid && (uval & FUTEX_OWNER_DIED))
538 return -ESRCH; 534 return -ESRCH;
539 p = futex_find_get_task(pid); 535 p = futex_find_get_task(pid);
540 if (!p) 536 if (IS_ERR(p))
541 return -ESRCH; 537 return PTR_ERR(p);
538
539 /*
540 * We need to look at the task state flags to figure out,
541 * whether the task is exiting. To protect against the do_exit
542 * change of the task flags, we do this protected by
543 * p->pi_lock:
544 */
545 spin_lock_irq(&p->pi_lock);
546 if (unlikely(p->flags & PF_EXITING)) {
547 /*
548 * The task is on the way out. When PF_EXITPIDONE is
549 * set, we know that the task has finished the
550 * cleanup:
551 */
552 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
553
554 spin_unlock_irq(&p->pi_lock);
555 put_task_struct(p);
556 return ret;
557 }
542 558
543 pi_state = alloc_pi_state(); 559 pi_state = alloc_pi_state();
544 560
@@ -551,7 +567,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
551 /* Store the key for possible exit cleanups: */ 567 /* Store the key for possible exit cleanups: */
552 pi_state->key = *key; 568 pi_state->key = *key;
553 569
554 spin_lock_irq(&p->pi_lock);
555 WARN_ON(!list_empty(&pi_state->list)); 570 WARN_ON(!list_empty(&pi_state->list));
556 list_add(&pi_state->list, &p->pi_state_list); 571 list_add(&pi_state->list, &p->pi_state_list);
557 pi_state->owner = p; 572 pi_state->owner = p;
@@ -618,6 +633,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
618 * preserve the owner died bit.) 633 * preserve the owner died bit.)
619 */ 634 */
620 if (!(uval & FUTEX_OWNER_DIED)) { 635 if (!(uval & FUTEX_OWNER_DIED)) {
636 int ret = 0;
637
621 newval = FUTEX_WAITERS | new_owner->pid; 638 newval = FUTEX_WAITERS | new_owner->pid;
622 /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */ 639 /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
623 newval |= (uval & FUTEX_WAITER_REQUEUED); 640 newval |= (uval & FUTEX_WAITER_REQUEUED);
@@ -625,10 +642,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
625 pagefault_disable(); 642 pagefault_disable();
626 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 643 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
627 pagefault_enable(); 644 pagefault_enable();
645
628 if (curval == -EFAULT) 646 if (curval == -EFAULT)
629 return -EFAULT; 647 ret = -EFAULT;
630 if (curval != uval) 648 if (curval != uval)
631 return -EINVAL; 649 ret = -EINVAL;
650 if (ret) {
651 spin_unlock(&pi_state->pi_mutex.wait_lock);
652 return ret;
653 }
632 } 654 }
633 655
634 spin_lock_irq(&pi_state->owner->pi_lock); 656 spin_lock_irq(&pi_state->owner->pi_lock);
@@ -1174,7 +1196,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
1174#ifdef CONFIG_DEBUG_PI_LIST 1196#ifdef CONFIG_DEBUG_PI_LIST
1175 this->list.plist.lock = &hb2->lock; 1197 this->list.plist.lock = &hb2->lock;
1176#endif 1198#endif
1177 } 1199 }
1178 this->key = key2; 1200 this->key = key2;
1179 get_futex_key_refs(&key2); 1201 get_futex_key_refs(&key2);
1180 drop_count++; 1202 drop_count++;
@@ -1326,12 +1348,10 @@ static void unqueue_me_pi(struct futex_q *q)
1326/* 1348/*
1327 * Fixup the pi_state owner with current. 1349 * Fixup the pi_state owner with current.
1328 * 1350 *
1329 * The cur->mm semaphore must be held, it is released at return of this 1351 * Must be called with hash bucket lock held and mm->sem held for non
1330 * function. 1352 * private futexes.
1331 */ 1353 */
1332static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared, 1354static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1333 struct futex_q *q,
1334 struct futex_hash_bucket *hb,
1335 struct task_struct *curr) 1355 struct task_struct *curr)
1336{ 1356{
1337 u32 newtid = curr->pid | FUTEX_WAITERS; 1357 u32 newtid = curr->pid | FUTEX_WAITERS;
@@ -1355,23 +1375,24 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
1355 list_add(&pi_state->list, &curr->pi_state_list); 1375 list_add(&pi_state->list, &curr->pi_state_list);
1356 spin_unlock_irq(&curr->pi_lock); 1376 spin_unlock_irq(&curr->pi_lock);
1357 1377
1358 /* Unqueue and drop the lock */
1359 unqueue_me_pi(q);
1360 if (fshared)
1361 up_read(fshared);
1362 /* 1378 /*
1363 * We own it, so we have to replace the pending owner 1379 * We own it, so we have to replace the pending owner
1364 * TID. This must be atomic as we have preserve the 1380 * TID. This must be atomic as we have preserve the
1365 * owner died bit here. 1381 * owner died bit here.
1366 */ 1382 */
1367 ret = get_user(uval, uaddr); 1383 ret = get_futex_value_locked(&uval, uaddr);
1384
1368 while (!ret) { 1385 while (!ret) {
1369 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1386 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1370 newval |= (uval & FUTEX_WAITER_REQUEUED); 1387 newval |= (uval & FUTEX_WAITER_REQUEUED);
1388
1389 pagefault_disable();
1371 curval = futex_atomic_cmpxchg_inatomic(uaddr, 1390 curval = futex_atomic_cmpxchg_inatomic(uaddr,
1372 uval, newval); 1391 uval, newval);
1392 pagefault_enable();
1393
1373 if (curval == -EFAULT) 1394 if (curval == -EFAULT)
1374 ret = -EFAULT; 1395 ret = -EFAULT;
1375 if (curval == uval) 1396 if (curval == uval)
1376 break; 1397 break;
1377 uval = curval; 1398 uval = curval;
@@ -1553,10 +1574,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1553 */ 1574 */
1554 uaddr = q.pi_state->key.uaddr; 1575 uaddr = q.pi_state->key.uaddr;
1555 1576
1556 /* mmap_sem and hash_bucket lock are unlocked at 1577 ret = fixup_pi_state_owner(uaddr, &q, curr);
1557 return of this function */
1558 ret = fixup_pi_state_owner(uaddr, fshared,
1559 &q, hb, curr);
1560 } else { 1578 } else {
1561 /* 1579 /*
1562 * Catch the rare case, where the lock was released 1580 * Catch the rare case, where the lock was released
@@ -1567,12 +1585,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1567 if (rt_mutex_trylock(&q.pi_state->pi_mutex)) 1585 if (rt_mutex_trylock(&q.pi_state->pi_mutex))
1568 ret = 0; 1586 ret = 0;
1569 } 1587 }
1570 /* Unqueue and drop the lock */
1571 unqueue_me_pi(&q);
1572 if (fshared)
1573 up_read(fshared);
1574 } 1588 }
1575 1589
1590 /* Unqueue and drop the lock */
1591 unqueue_me_pi(&q);
1592 if (fshared)
1593 up_read(fshared);
1594
1576 debug_rt_mutex_free_waiter(&q.waiter); 1595 debug_rt_mutex_free_waiter(&q.waiter);
1577 1596
1578 return ret; 1597 return ret;
@@ -1688,7 +1707,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1688 struct futex_hash_bucket *hb; 1707 struct futex_hash_bucket *hb;
1689 u32 uval, newval, curval; 1708 u32 uval, newval, curval;
1690 struct futex_q q; 1709 struct futex_q q;
1691 int ret, lock_held, attempt = 0; 1710 int ret, lock_taken, ownerdied = 0, attempt = 0;
1692 1711
1693 if (refill_pi_state_cache()) 1712 if (refill_pi_state_cache())
1694 return -ENOMEM; 1713 return -ENOMEM;
@@ -1709,10 +1728,11 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1709 if (unlikely(ret != 0)) 1728 if (unlikely(ret != 0))
1710 goto out_release_sem; 1729 goto out_release_sem;
1711 1730
1731 retry_unlocked:
1712 hb = queue_lock(&q, -1, NULL); 1732 hb = queue_lock(&q, -1, NULL);
1713 1733
1714 retry_locked: 1734 retry_locked:
1715 lock_held = 0; 1735 ret = lock_taken = 0;
1716 1736
1717 /* 1737 /*
1718 * To avoid races, we attempt to take the lock here again 1738 * To avoid races, we attempt to take the lock here again
@@ -1728,43 +1748,44 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1728 if (unlikely(curval == -EFAULT)) 1748 if (unlikely(curval == -EFAULT))
1729 goto uaddr_faulted; 1749 goto uaddr_faulted;
1730 1750
1731 /* We own the lock already */ 1751 /*
1752 * Detect deadlocks. In case of REQUEUE_PI this is a valid
1753 * situation and we return success to user space.
1754 */
1732 if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { 1755 if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
1733 if (!detect && 0)
1734 force_sig(SIGKILL, current);
1735 /*
1736 * Normally, this check is done in user space.
1737 * In case of requeue, the owner may attempt to lock this futex,
1738 * even if the ownership has already been given by the previous
1739 * waker.
1740 * In the usual case, this is a case of deadlock, but not in case
1741 * of REQUEUE_PI.
1742 */
1743 if (!(curval & FUTEX_WAITER_REQUEUED)) 1756 if (!(curval & FUTEX_WAITER_REQUEUED))
1744 ret = -EDEADLK; 1757 ret = -EDEADLK;
1745 goto out_unlock_release_sem; 1758 goto out_unlock_release_sem;
1746 } 1759 }
1747 1760
1748 /* 1761 /*
1749 * Surprise - we got the lock. Just return 1762 * Surprise - we got the lock. Just return to userspace:
1750 * to userspace:
1751 */ 1763 */
1752 if (unlikely(!curval)) 1764 if (unlikely(!curval))
1753 goto out_unlock_release_sem; 1765 goto out_unlock_release_sem;
1754 1766
1755 uval = curval; 1767 uval = curval;
1768
1756 /* 1769 /*
1757 * In case of a requeue, check if there already is an owner 1770 * Set the WAITERS flag, so the owner will know it has someone
1758 * If not, just take the futex. 1771 * to wake at next unlock
1759 */ 1772 */
1760 if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) { 1773 newval = curval | FUTEX_WAITERS;
1761 /* set current as futex owner */ 1774
1762 newval = curval | current->pid; 1775 /*
1763 lock_held = 1; 1776 * There are two cases, where a futex might have no owner (the
1764 } else 1777 * owner TID is 0): OWNER_DIED or REQUEUE. We take over the
1765 /* Set the WAITERS flag, so the owner will know it has someone 1778 * futex in this case. We also do an unconditional take over,
1766 to wake at next unlock */ 1779 * when the owner of the futex died.
1767 newval = curval | FUTEX_WAITERS; 1780 *
1781 * This is safe as we are protected by the hash bucket lock !
1782 */
1783 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
1784 /* Keep the OWNER_DIED and REQUEUE bits */
1785 newval = (curval & ~FUTEX_TID_MASK) | current->pid;
1786 ownerdied = 0;
1787 lock_taken = 1;
1788 }
1768 1789
1769 pagefault_disable(); 1790 pagefault_disable();
1770 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 1791 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -1775,8 +1796,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1775 if (unlikely(curval != uval)) 1796 if (unlikely(curval != uval))
1776 goto retry_locked; 1797 goto retry_locked;
1777 1798
1778 if (lock_held) { 1799 /*
1779 set_pi_futex_owner(hb, &q.key, curr); 1800 * We took the lock due to requeue or owner died take over.
1801 */
1802 if (unlikely(lock_taken)) {
1803 /* For requeue we need to fixup the pi_futex */
1804 if (curval & FUTEX_WAITER_REQUEUED)
1805 set_pi_futex_owner(hb, &q.key, curr);
1780 goto out_unlock_release_sem; 1806 goto out_unlock_release_sem;
1781 } 1807 }
1782 1808
@@ -1787,34 +1813,40 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1787 ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); 1813 ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
1788 1814
1789 if (unlikely(ret)) { 1815 if (unlikely(ret)) {
1790 /* 1816 switch (ret) {
1791 * There were no waiters and the owner task lookup
1792 * failed. When the OWNER_DIED bit is set, then we
1793 * know that this is a robust futex and we actually
1794 * take the lock. This is safe as we are protected by
1795 * the hash bucket lock. We also set the waiters bit
1796 * unconditionally here, to simplify glibc handling of
1797 * multiple tasks racing to acquire the lock and
1798 * cleanup the problems which were left by the dead
1799 * owner.
1800 */
1801 if (curval & FUTEX_OWNER_DIED) {
1802 uval = newval;
1803 newval = current->pid |
1804 FUTEX_OWNER_DIED | FUTEX_WAITERS;
1805 1817
1806 pagefault_disable(); 1818 case -EAGAIN:
1807 curval = futex_atomic_cmpxchg_inatomic(uaddr, 1819 /*
1808 uval, newval); 1820 * Task is exiting and we just wait for the
1809 pagefault_enable(); 1821 * exit to complete.
1822 */
1823 queue_unlock(&q, hb);
1824 if (fshared)
1825 up_read(fshared);
1826 cond_resched();
1827 goto retry;
1810 1828
1811 if (unlikely(curval == -EFAULT)) 1829 case -ESRCH:
1830 /*
1831 * No owner found for this futex. Check if the
1832 * OWNER_DIED bit is set to figure out whether
1833 * this is a robust futex or not.
1834 */
1835 if (get_futex_value_locked(&curval, uaddr))
1812 goto uaddr_faulted; 1836 goto uaddr_faulted;
1813 if (unlikely(curval != uval)) 1837
1838 /*
1839 * We simply start over in case of a robust
1840 * futex. The code above will take the futex
1841 * and return happy.
1842 */
1843 if (curval & FUTEX_OWNER_DIED) {
1844 ownerdied = 1;
1814 goto retry_locked; 1845 goto retry_locked;
1815 ret = 0; 1846 }
1847 default:
1848 goto out_unlock_release_sem;
1816 } 1849 }
1817 goto out_unlock_release_sem;
1818 } 1850 }
1819 1851
1820 /* 1852 /*
@@ -1845,31 +1877,42 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1845 down_read(fshared); 1877 down_read(fshared);
1846 spin_lock(q.lock_ptr); 1878 spin_lock(q.lock_ptr);
1847 1879
1848 /* 1880 if (!ret) {
1849 * Got the lock. We might not be the anticipated owner if we 1881 /*
1850 * did a lock-steal - fix up the PI-state in that case. 1882 * Got the lock. We might not be the anticipated owner
1851 */ 1883 * if we did a lock-steal - fix up the PI-state in
1852 if (!ret && q.pi_state->owner != curr) 1884 * that case:
1853 /* mmap_sem is unlocked at return of this function */ 1885 */
1854 ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr); 1886 if (q.pi_state->owner != curr)
1855 else { 1887 ret = fixup_pi_state_owner(uaddr, &q, curr);
1888 } else {
1856 /* 1889 /*
1857 * Catch the rare case, where the lock was released 1890 * Catch the rare case, where the lock was released
1858 * when we were on the way back before we locked 1891 * when we were on the way back before we locked the
1859 * the hash bucket. 1892 * hash bucket.
1860 */ 1893 */
1861 if (ret && q.pi_state->owner == curr) { 1894 if (q.pi_state->owner == curr &&
1862 if (rt_mutex_trylock(&q.pi_state->pi_mutex)) 1895 rt_mutex_trylock(&q.pi_state->pi_mutex)) {
1863 ret = 0; 1896 ret = 0;
1897 } else {
1898 /*
1899 * Paranoia check. If we did not take the lock
1900 * in the trylock above, then we should not be
1901 * the owner of the rtmutex, neither the real
1902 * nor the pending one:
1903 */
1904 if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
1905 printk(KERN_ERR "futex_lock_pi: ret = %d "
1906 "pi-mutex: %p pi-state %p\n", ret,
1907 q.pi_state->pi_mutex.owner,
1908 q.pi_state->owner);
1864 } 1909 }
1865 /* Unqueue and drop the lock */
1866 unqueue_me_pi(&q);
1867 if (fshared)
1868 up_read(fshared);
1869 } 1910 }
1870 1911
1871 if (!detect && ret == -EDEADLK && 0) 1912 /* Unqueue and drop the lock */
1872 force_sig(SIGKILL, current); 1913 unqueue_me_pi(&q);
1914 if (fshared)
1915 up_read(fshared);
1873 1916
1874 return ret != -EINTR ? ret : -ERESTARTNOINTR; 1917 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1875 1918
@@ -1887,16 +1930,19 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1887 * non-atomically. Therefore, if get_user below is not 1930 * non-atomically. Therefore, if get_user below is not
1888 * enough, we need to handle the fault ourselves, while 1931 * enough, we need to handle the fault ourselves, while
1889 * still holding the mmap_sem. 1932 * still holding the mmap_sem.
1933 *
1934 * ... and hb->lock. :-) --ANK
1890 */ 1935 */
1936 queue_unlock(&q, hb);
1937
1891 if (attempt++) { 1938 if (attempt++) {
1892 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1939 ret = futex_handle_fault((unsigned long)uaddr, fshared,
1893 attempt); 1940 attempt);
1894 if (ret) 1941 if (ret)
1895 goto out_unlock_release_sem; 1942 goto out_release_sem;
1896 goto retry_locked; 1943 goto retry_unlocked;
1897 } 1944 }
1898 1945
1899 queue_unlock(&q, hb);
1900 if (fshared) 1946 if (fshared)
1901 up_read(fshared); 1947 up_read(fshared);
1902 1948
@@ -1940,9 +1986,9 @@ retry:
1940 goto out; 1986 goto out;
1941 1987
1942 hb = hash_futex(&key); 1988 hb = hash_futex(&key);
1989retry_unlocked:
1943 spin_lock(&hb->lock); 1990 spin_lock(&hb->lock);
1944 1991
1945retry_locked:
1946 /* 1992 /*
1947 * To avoid races, try to do the TID -> 0 atomic transition 1993 * To avoid races, try to do the TID -> 0 atomic transition
1948 * again. If it succeeds then we can return without waking 1994 * again. If it succeeds then we can return without waking
@@ -2005,16 +2051,19 @@ pi_faulted:
2005 * non-atomically. Therefore, if get_user below is not 2051 * non-atomically. Therefore, if get_user below is not
2006 * enough, we need to handle the fault ourselves, while 2052 * enough, we need to handle the fault ourselves, while
2007 * still holding the mmap_sem. 2053 * still holding the mmap_sem.
2054 *
2055 * ... and hb->lock. --ANK
2008 */ 2056 */
2057 spin_unlock(&hb->lock);
2058
2009 if (attempt++) { 2059 if (attempt++) {
2010 ret = futex_handle_fault((unsigned long)uaddr, fshared, 2060 ret = futex_handle_fault((unsigned long)uaddr, fshared,
2011 attempt); 2061 attempt);
2012 if (ret) 2062 if (ret)
2013 goto out_unlock; 2063 goto out;
2014 goto retry_locked; 2064 goto retry_unlocked;
2015 } 2065 }
2016 2066
2017 spin_unlock(&hb->lock);
2018 if (fshared) 2067 if (fshared)
2019 up_read(fshared); 2068 up_read(fshared);
2020 2069
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 338a9b489fbc..27478948b318 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -144,20 +144,21 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
144 struct timespec ts; 144 struct timespec ts;
145 ktime_t t, *tp = NULL; 145 ktime_t t, *tp = NULL;
146 int val2 = 0; 146 int val2 = 0;
147 int cmd = op & FUTEX_CMD_MASK;
147 148
148 if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { 149 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
149 if (get_compat_timespec(&ts, utime)) 150 if (get_compat_timespec(&ts, utime))
150 return -EFAULT; 151 return -EFAULT;
151 if (!timespec_valid(&ts)) 152 if (!timespec_valid(&ts))
152 return -EINVAL; 153 return -EINVAL;
153 154
154 t = timespec_to_ktime(ts); 155 t = timespec_to_ktime(ts);
155 if (op == FUTEX_WAIT) 156 if (cmd == FUTEX_WAIT)
156 t = ktime_add(ktime_get(), t); 157 t = ktime_add(ktime_get(), t);
157 tp = &t; 158 tp = &t;
158 } 159 }
159 if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE 160 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE
160 || op == FUTEX_CMP_REQUEUE_PI) 161 || cmd == FUTEX_CMP_REQUEUE_PI)
161 val2 = (int) (unsigned long) utime; 162 val2 = (int) (unsigned long) utime;
162 163
163 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 164 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index b0d81aae472f..bd9e272d55e9 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -135,6 +135,39 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
135 } 135 }
136} 136}
137 137
138static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
139{
140 struct irqaction *action;
141
142 if (!irqfixup)
143 return 0;
144
145 /* We didn't actually handle the IRQ - see if it was misrouted? */
146 if (action_ret == IRQ_NONE)
147 return 1;
148
149 /*
150 * But for 'irqfixup == 2' we also do it for handled interrupts if
151 * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
152 * traditional PC timer interrupt.. Legacy)
153 */
154 if (irqfixup < 2)
155 return 0;
156
157 if (!irq)
158 return 1;
159
160 /*
161 * Since we don't get the descriptor lock, "action" can
162 * change under us. We don't really care, but we don't
163 * want to follow a NULL pointer. So tell the compiler to
164 * just load it once by using a barrier.
165 */
166 action = desc->action;
167 barrier();
168 return action && (action->flags & IRQF_IRQPOLL);
169}
170
138void note_interrupt(unsigned int irq, struct irq_desc *desc, 171void note_interrupt(unsigned int irq, struct irq_desc *desc,
139 irqreturn_t action_ret) 172 irqreturn_t action_ret)
140{ 173{
@@ -144,15 +177,10 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
144 report_bad_irq(irq, desc, action_ret); 177 report_bad_irq(irq, desc, action_ret);
145 } 178 }
146 179
147 if (unlikely(irqfixup)) { 180 if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
148 /* Don't punish working computers */ 181 int ok = misrouted_irq(irq);
149 if ((irqfixup == 2 && ((irq == 0) || 182 if (action_ret == IRQ_NONE)
150 (desc->action->flags & IRQF_IRQPOLL))) || 183 desc->irqs_unhandled -= ok;
151 action_ret == IRQ_NONE) {
152 int ok = misrouted_irq(irq);
153 if (action_ret == IRQ_NONE)
154 desc->irqs_unhandled -= ok;
155 }
156 } 184 }
157 185
158 desc->irq_count++; 186 desc->irq_count++;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index f1bda23140b2..fed54418626c 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -257,7 +257,8 @@ const char *kallsyms_lookup(unsigned long addr,
257 pos = get_symbol_pos(addr, symbolsize, offset); 257 pos = get_symbol_pos(addr, symbolsize, offset);
258 /* Grab name */ 258 /* Grab name */
259 kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); 259 kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
260 *modname = NULL; 260 if (modname)
261 *modname = NULL;
261 return namebuf; 262 return namebuf;
262 } 263 }
263 264
diff --git a/kernel/kthread.c b/kernel/kthread.c
index df8a8e8f6ca4..bbd51b81a3e8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -70,7 +70,7 @@ static int kthread(void *_create)
70 data = create->data; 70 data = create->data;
71 71
72 /* OK, tell user we're spawned, wait for stop or wakeup */ 72 /* OK, tell user we're spawned, wait for stop or wakeup */
73 __set_current_state(TASK_INTERRUPTIBLE); 73 __set_current_state(TASK_UNINTERRUPTIBLE);
74 complete(&create->started); 74 complete(&create->started);
75 schedule(); 75 schedule();
76 76
@@ -162,7 +162,10 @@ EXPORT_SYMBOL(kthread_create);
162 */ 162 */
163void kthread_bind(struct task_struct *k, unsigned int cpu) 163void kthread_bind(struct task_struct *k, unsigned int cpu)
164{ 164{
165 BUG_ON(k->state != TASK_INTERRUPTIBLE); 165 if (k->state != TASK_UNINTERRUPTIBLE) {
166 WARN_ON(1);
167 return;
168 }
166 /* Must have done schedule() in kthread() before we set_task_cpu */ 169 /* Must have done schedule() in kthread() before we set_task_cpu */
167 wait_task_inactive(k); 170 wait_task_inactive(k);
168 set_task_cpu(k, cpu); 171 set_task_cpu(k, cpu);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b5f0543ed84d..f445b9cd60fb 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -416,7 +416,8 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
416 416
417 mutex_lock(&pm_mutex); 417 mutex_lock(&pm_mutex);
418 for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { 418 for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
419 if (!strncmp(buf, hibernation_modes[i], len)) { 419 if (len == strlen(hibernation_modes[i])
420 && !strncmp(buf, hibernation_modes[i], len)) {
420 mode = i; 421 mode = i;
421 break; 422 break;
422 } 423 }
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 40d56a31245e..8812985f3029 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -97,25 +97,26 @@ static int suspend_prepare(suspend_state_t state)
97 } 97 }
98 } 98 }
99 99
100 if (pm_ops->prepare) {
101 if ((error = pm_ops->prepare(state)))
102 goto Thaw;
103 }
104
105 suspend_console(); 100 suspend_console();
106 error = device_suspend(PMSG_SUSPEND); 101 error = device_suspend(PMSG_SUSPEND);
107 if (error) { 102 if (error) {
108 printk(KERN_ERR "Some devices failed to suspend\n"); 103 printk(KERN_ERR "Some devices failed to suspend\n");
109 goto Resume_devices; 104 goto Resume_console;
110 } 105 }
106 if (pm_ops->prepare) {
107 if ((error = pm_ops->prepare(state)))
108 goto Resume_devices;
109 }
110
111 error = disable_nonboot_cpus(); 111 error = disable_nonboot_cpus();
112 if (!error) 112 if (!error)
113 return 0; 113 return 0;
114 114
115 enable_nonboot_cpus(); 115 enable_nonboot_cpus();
116 Resume_devices:
117 pm_finish(state); 116 pm_finish(state);
117 Resume_devices:
118 device_resume(); 118 device_resume();
119 Resume_console:
119 resume_console(); 120 resume_console();
120 Thaw: 121 Thaw:
121 thaw_processes(); 122 thaw_processes();
@@ -289,13 +290,13 @@ static ssize_t state_store(struct kset *kset, const char *buf, size_t n)
289 len = p ? p - buf : n; 290 len = p ? p - buf : n;
290 291
291 /* First, check if we are requested to hibernate */ 292 /* First, check if we are requested to hibernate */
292 if (!strncmp(buf, "disk", len)) { 293 if (len == 4 && !strncmp(buf, "disk", len)) {
293 error = hibernate(); 294 error = hibernate();
294 return error ? error : n; 295 return error ? error : n;
295 } 296 }
296 297
297 for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { 298 for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
298 if (*s && !strncmp(buf, *s, len)) 299 if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
299 break; 300 break;
300 } 301 }
301 if (state < PM_SUSPEND_MAX && *s) 302 if (state < PM_SUSPEND_MAX && *s)
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 088419387388..e0233d8422b9 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -31,16 +31,36 @@ static inline int freezeable(struct task_struct * p)
31 return 1; 31 return 1;
32} 32}
33 33
34/*
35 * freezing is complete, mark current process as frozen
36 */
37static inline void frozen_process(void)
38{
39 if (!unlikely(current->flags & PF_NOFREEZE)) {
40 current->flags |= PF_FROZEN;
41 wmb();
42 }
43 clear_tsk_thread_flag(current, TIF_FREEZE);
44}
45
34/* Refrigerator is place where frozen processes are stored :-). */ 46/* Refrigerator is place where frozen processes are stored :-). */
35void refrigerator(void) 47void refrigerator(void)
36{ 48{
37 /* Hmm, should we be allowed to suspend when there are realtime 49 /* Hmm, should we be allowed to suspend when there are realtime
38 processes around? */ 50 processes around? */
39 long save; 51 long save;
52
53 task_lock(current);
54 if (freezing(current)) {
55 frozen_process();
56 task_unlock(current);
57 } else {
58 task_unlock(current);
59 return;
60 }
40 save = current->state; 61 save = current->state;
41 pr_debug("%s entered refrigerator\n", current->comm); 62 pr_debug("%s entered refrigerator\n", current->comm);
42 63
43 frozen_process(current);
44 spin_lock_irq(&current->sighand->siglock); 64 spin_lock_irq(&current->sighand->siglock);
45 recalc_sigpending(); /* We sent fake signal, clean it up */ 65 recalc_sigpending(); /* We sent fake signal, clean it up */
46 spin_unlock_irq(&current->sighand->siglock); 66 spin_unlock_irq(&current->sighand->siglock);
@@ -81,7 +101,7 @@ static void cancel_freezing(struct task_struct *p)
81 pr_debug(" clean up: %s\n", p->comm); 101 pr_debug(" clean up: %s\n", p->comm);
82 do_not_freeze(p); 102 do_not_freeze(p);
83 spin_lock_irqsave(&p->sighand->siglock, flags); 103 spin_lock_irqsave(&p->sighand->siglock, flags);
84 recalc_sigpending_tsk(p); 104 recalc_sigpending_and_wake(p);
85 spin_unlock_irqrestore(&p->sighand->siglock, flags); 105 spin_unlock_irqrestore(&p->sighand->siglock, flags);
86 } 106 }
87} 107}
@@ -112,22 +132,12 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
112 cancel_freezing(p); 132 cancel_freezing(p);
113 continue; 133 continue;
114 } 134 }
115 if (is_user_space(p)) { 135 if (freeze_user_space && !is_user_space(p))
116 if (!freeze_user_space) 136 continue;
117 continue; 137
118 138 freeze_process(p);
119 /* Freeze the task unless there is a vfork 139 if (!freezer_should_skip(p))
120 * completion pending 140 todo++;
121 */
122 if (!p->vfork_done)
123 freeze_process(p);
124 } else {
125 if (freeze_user_space)
126 continue;
127
128 freeze_process(p);
129 }
130 todo++;
131 } while_each_thread(g, p); 141 } while_each_thread(g, p);
132 read_unlock(&tasklist_lock); 142 read_unlock(&tasklist_lock);
133 yield(); /* Yield is okay here */ 143 yield(); /* Yield is okay here */
@@ -149,13 +159,16 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
149 TIMEOUT / HZ, todo); 159 TIMEOUT / HZ, todo);
150 read_lock(&tasklist_lock); 160 read_lock(&tasklist_lock);
151 do_each_thread(g, p) { 161 do_each_thread(g, p) {
152 if (is_user_space(p) == !freeze_user_space) 162 if (freeze_user_space && !is_user_space(p))
153 continue; 163 continue;
154 164
155 if (freezeable(p) && !frozen(p)) 165 task_lock(p);
166 if (freezeable(p) && !frozen(p) &&
167 !freezer_should_skip(p))
156 printk(KERN_ERR " %s\n", p->comm); 168 printk(KERN_ERR " %s\n", p->comm);
157 169
158 cancel_freezing(p); 170 cancel_freezing(p);
171 task_unlock(p);
159 } while_each_thread(g, p); 172 } while_each_thread(g, p);
160 read_unlock(&tasklist_lock); 173 read_unlock(&tasklist_lock);
161 } 174 }
@@ -200,9 +213,7 @@ static void thaw_tasks(int thaw_user_space)
200 if (is_user_space(p) == !thaw_user_space) 213 if (is_user_space(p) == !thaw_user_space)
201 continue; 214 continue;
202 215
203 if (!thaw_process(p)) 216 thaw_process(p);
204 printk(KERN_WARNING " Strange, %s not stopped\n",
205 p->comm );
206 } while_each_thread(g, p); 217 } while_each_thread(g, p);
207 read_unlock(&tasklist_lock); 218 read_unlock(&tasklist_lock);
208} 219}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b8b235cc19d1..8b1a1b837145 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -584,7 +584,7 @@ int swsusp_check(void)
584 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); 584 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
585 if (!IS_ERR(resume_bdev)) { 585 if (!IS_ERR(resume_bdev)) {
586 set_blocksize(resume_bdev, PAGE_SIZE); 586 set_blocksize(resume_bdev, PAGE_SIZE);
587 memset(swsusp_header, 0, sizeof(PAGE_SIZE)); 587 memset(swsusp_header, 0, PAGE_SIZE);
588 error = bio_read_page(swsusp_resume_block, 588 error = bio_read_page(swsusp_resume_block,
589 swsusp_header, NULL); 589 swsusp_header, NULL);
590 if (error) 590 if (error)
diff --git a/kernel/profile.c b/kernel/profile.c
index cc91b9bf759d..5b20fe977bed 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -26,6 +26,7 @@
26#include <asm/sections.h> 26#include <asm/sections.h>
27#include <asm/semaphore.h> 27#include <asm/semaphore.h>
28#include <asm/irq_regs.h> 28#include <asm/irq_regs.h>
29#include <asm/ptrace.h>
29 30
30struct profile_hit { 31struct profile_hit {
31 u32 pc, hits; 32 u32 pc, hits;
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 12879f6c1ec3..a6fbb4130521 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -189,6 +189,19 @@ int rt_mutex_adjust_prio_chain(struct task_struct *task,
189 if (!waiter || !waiter->task) 189 if (!waiter || !waiter->task)
190 goto out_unlock_pi; 190 goto out_unlock_pi;
191 191
192 /*
193 * Check the orig_waiter state. After we dropped the locks,
194 * the previous owner of the lock might have released the lock
195 * and made us the pending owner:
196 */
197 if (orig_waiter && !orig_waiter->task)
198 goto out_unlock_pi;
199
200 /*
201 * Drop out, when the task has no waiters. Note,
202 * top_waiter can be NULL, when we are in the deboosting
203 * mode!
204 */
192 if (top_waiter && (!task_has_pi_waiters(task) || 205 if (top_waiter && (!task_has_pi_waiters(task) ||
193 top_waiter != task_top_pi_waiter(task))) 206 top_waiter != task_top_pi_waiter(task)))
194 goto out_unlock_pi; 207 goto out_unlock_pi;
@@ -636,9 +649,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
636 * all over without going into schedule to try 649 * all over without going into schedule to try
637 * to get the lock now: 650 * to get the lock now:
638 */ 651 */
639 if (unlikely(!waiter.task)) 652 if (unlikely(!waiter.task)) {
653 /*
654 * Reset the return value. We might
655 * have returned with -EDEADLK and the
656 * owner released the lock while we
657 * were walking the pi chain.
658 */
659 ret = 0;
640 continue; 660 continue;
641 661 }
642 if (unlikely(ret)) 662 if (unlikely(ret))
643 break; 663 break;
644 } 664 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 799d23b4e35d..13cdab3b4c48 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4775,9 +4775,7 @@ int __sched cond_resched_softirq(void)
4775 BUG_ON(!in_softirq()); 4775 BUG_ON(!in_softirq());
4776 4776
4777 if (need_resched() && system_state == SYSTEM_RUNNING) { 4777 if (need_resched() && system_state == SYSTEM_RUNNING) {
4778 raw_local_irq_disable(); 4778 local_bh_enable();
4779 _local_bh_enable();
4780 raw_local_irq_enable();
4781 __cond_resched(); 4779 __cond_resched();
4782 local_bh_disable(); 4780 local_bh_disable();
4783 return 1; 4781 return 1;
diff --git a/kernel/signal.c b/kernel/signal.c
index 364fc95bf97c..fe590e00db8d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -96,20 +96,38 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
96 96
97#define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) 97#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
98 98
99fastcall void recalc_sigpending_tsk(struct task_struct *t) 99static int recalc_sigpending_tsk(struct task_struct *t)
100{ 100{
101 if (t->signal->group_stop_count > 0 || 101 if (t->signal->group_stop_count > 0 ||
102 (freezing(t)) || 102 (freezing(t)) ||
103 PENDING(&t->pending, &t->blocked) || 103 PENDING(&t->pending, &t->blocked) ||
104 PENDING(&t->signal->shared_pending, &t->blocked)) 104 PENDING(&t->signal->shared_pending, &t->blocked)) {
105 set_tsk_thread_flag(t, TIF_SIGPENDING); 105 set_tsk_thread_flag(t, TIF_SIGPENDING);
106 else 106 return 1;
107 clear_tsk_thread_flag(t, TIF_SIGPENDING); 107 }
108 /*
109 * We must never clear the flag in another thread, or in current
110 * when it's possible the current syscall is returning -ERESTART*.
111 * So we don't clear it here, and only callers who know they should do.
112 */
113 return 0;
114}
115
116/*
117 * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
118 * This is superfluous when called on current, the wakeup is a harmless no-op.
119 */
120void recalc_sigpending_and_wake(struct task_struct *t)
121{
122 if (recalc_sigpending_tsk(t))
123 signal_wake_up(t, 0);
108} 124}
109 125
110void recalc_sigpending(void) 126void recalc_sigpending(void)
111{ 127{
112 recalc_sigpending_tsk(current); 128 if (!recalc_sigpending_tsk(current))
129 clear_thread_flag(TIF_SIGPENDING);
130
113} 131}
114 132
115/* Given the mask, find the first available signal that should be serviced. */ 133/* Given the mask, find the first available signal that should be serviced. */
@@ -373,7 +391,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
373 } 391 }
374 } 392 }
375 } 393 }
376 recalc_sigpending_tsk(tsk); 394 if (likely(tsk == current))
395 recalc_sigpending();
377 if (signr && unlikely(sig_kernel_stop(signr))) { 396 if (signr && unlikely(sig_kernel_stop(signr))) {
378 /* 397 /*
379 * Set a marker that we have dequeued a stop signal. Our 398 * Set a marker that we have dequeued a stop signal. Our
@@ -744,7 +763,7 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
744 action->sa.sa_handler = SIG_DFL; 763 action->sa.sa_handler = SIG_DFL;
745 if (blocked) { 764 if (blocked) {
746 sigdelset(&t->blocked, sig); 765 sigdelset(&t->blocked, sig);
747 recalc_sigpending_tsk(t); 766 recalc_sigpending_and_wake(t);
748 } 767 }
749 } 768 }
750 ret = specific_send_sig_info(sig, info, t); 769 ret = specific_send_sig_info(sig, info, t);
@@ -1568,8 +1587,9 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
1568 /* 1587 /*
1569 * Queued signals ignored us while we were stopped for tracing. 1588 * Queued signals ignored us while we were stopped for tracing.
1570 * So check for any that we should take before resuming user mode. 1589 * So check for any that we should take before resuming user mode.
1590 * This sets TIF_SIGPENDING, but never clears it.
1571 */ 1591 */
1572 recalc_sigpending(); 1592 recalc_sigpending_tsk(current);
1573} 1593}
1574 1594
1575void ptrace_notify(int exit_code) 1595void ptrace_notify(int exit_code)
@@ -2273,7 +2293,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2273 rm_from_queue_full(&mask, &t->signal->shared_pending); 2293 rm_from_queue_full(&mask, &t->signal->shared_pending);
2274 do { 2294 do {
2275 rm_from_queue_full(&mask, &t->pending); 2295 rm_from_queue_full(&mask, &t->pending);
2276 recalc_sigpending_tsk(t); 2296 recalc_sigpending_and_wake(t);
2277 t = next_thread(t); 2297 t = next_thread(t);
2278 } while (t != current); 2298 } while (t != current);
2279 } 2299 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4073353abd4f..30ee462ee79f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -227,7 +227,7 @@ static ctl_table kern_table[] = {
227 .ctl_name = KERN_CORE_PATTERN, 227 .ctl_name = KERN_CORE_PATTERN,
228 .procname = "core_pattern", 228 .procname = "core_pattern",
229 .data = core_pattern, 229 .data = core_pattern,
230 .maxlen = 128, 230 .maxlen = CORENAME_MAX_SIZE,
231 .mode = 0644, 231 .mode = 0644,
232 .proc_handler = &proc_dostring, 232 .proc_handler = &proc_dostring,
233 .strategy = &sysctl_string, 233 .strategy = &sysctl_string,
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3db5c3c460d7..51b6a6a6158c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -74,7 +74,7 @@ static struct clocksource *watchdog;
74static struct timer_list watchdog_timer; 74static struct timer_list watchdog_timer;
75static DEFINE_SPINLOCK(watchdog_lock); 75static DEFINE_SPINLOCK(watchdog_lock);
76static cycle_t watchdog_last; 76static cycle_t watchdog_last;
77static int watchdog_resumed; 77static unsigned long watchdog_resumed;
78 78
79/* 79/*
80 * Interval: 0.5sec Threshold: 0.0625s 80 * Interval: 0.5sec Threshold: 0.0625s
@@ -104,9 +104,7 @@ static void clocksource_watchdog(unsigned long data)
104 104
105 spin_lock(&watchdog_lock); 105 spin_lock(&watchdog_lock);
106 106
107 resumed = watchdog_resumed; 107 resumed = test_and_clear_bit(0, &watchdog_resumed);
108 if (unlikely(resumed))
109 watchdog_resumed = 0;
110 108
111 wdnow = watchdog->read(); 109 wdnow = watchdog->read();
112 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); 110 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
@@ -151,9 +149,7 @@ static void clocksource_watchdog(unsigned long data)
151} 149}
152static void clocksource_resume_watchdog(void) 150static void clocksource_resume_watchdog(void)
153{ 151{
154 spin_lock(&watchdog_lock); 152 set_bit(0, &watchdog_resumed);
155 watchdog_resumed = 1;
156 spin_unlock(&watchdog_lock);
157} 153}
158 154
159static void clocksource_check_watchdog(struct clocksource *cs) 155static void clocksource_check_watchdog(struct clocksource *cs)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index cb25649c6f50..87aa5ff931e0 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -11,6 +11,8 @@
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/timex.h> 13#include <linux/timex.h>
14#include <linux/jiffies.h>
15#include <linux/hrtimer.h>
14 16
15#include <asm/div64.h> 17#include <asm/div64.h>
16#include <asm/timex.h> 18#include <asm/timex.h>
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index eadfce2fff74..8001d37071f5 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -243,11 +243,18 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
243{ 243{
244 int cpu = get_cpu(); 244 int cpu = get_cpu();
245 245
246 if (cpu == *oncpu) 246 if (!cpu_isset(*oncpu, cpu_online_map)) {
247 tick_do_broadcast_on_off(&reason); 247 printk(KERN_ERR "tick-braodcast: ignoring broadcast for "
248 else 248 "offline CPU #%d\n", *oncpu);
249 smp_call_function_single(*oncpu, tick_do_broadcast_on_off, 249 } else {
250 &reason, 1, 1); 250
251 if (cpu == *oncpu)
252 tick_do_broadcast_on_off(&reason);
253 else
254 smp_call_function_single(*oncpu,
255 tick_do_broadcast_on_off,
256 &reason, 1, 1);
257 }
251 put_cpu(); 258 put_cpu();
252} 259}
253 260
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3483e6cb9549..52db9e3c526e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -167,9 +167,15 @@ void tick_nohz_stop_sched_tick(void)
167 goto end; 167 goto end;
168 168
169 cpu = smp_processor_id(); 169 cpu = smp_processor_id();
170 if (unlikely(local_softirq_pending())) 170 if (unlikely(local_softirq_pending())) {
171 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 171 static int ratelimit;
172 local_softirq_pending()); 172
173 if (ratelimit < 10) {
174 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
175 local_softirq_pending());
176 ratelimit++;
177 }
178 }
173 179
174 now = ktime_get(); 180 now = ktime_get();
175 /* 181 /*
@@ -241,6 +247,21 @@ void tick_nohz_stop_sched_tick(void)
241 if (cpu == tick_do_timer_cpu) 247 if (cpu == tick_do_timer_cpu)
242 tick_do_timer_cpu = -1; 248 tick_do_timer_cpu = -1;
243 249
250 ts->idle_sleeps++;
251
252 /*
253 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
254 * there is no timer pending or at least extremly far
255 * into the future (12 days for HZ=1000). In this case
256 * we simply stop the tick timer:
257 */
258 if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
259 ts->idle_expires.tv64 = KTIME_MAX;
260 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
261 hrtimer_cancel(&ts->sched_timer);
262 goto out;
263 }
264
244 /* 265 /*
245 * calculate the expiry time for the next timer wheel 266 * calculate the expiry time for the next timer wheel
246 * timer 267 * timer
@@ -248,7 +269,6 @@ void tick_nohz_stop_sched_tick(void)
248 expires = ktime_add_ns(last_update, tick_period.tv64 * 269 expires = ktime_add_ns(last_update, tick_period.tv64 *
249 delta_jiffies); 270 delta_jiffies);
250 ts->idle_expires = expires; 271 ts->idle_expires = expires;
251 ts->idle_sleeps++;
252 272
253 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 273 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
254 hrtimer_start(&ts->sched_timer, expires, 274 hrtimer_start(&ts->sched_timer, expires,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f9217bf644f6..3d1042f82a68 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -273,6 +273,8 @@ static int timekeeping_resume(struct sys_device *dev)
273 unsigned long flags; 273 unsigned long flags;
274 unsigned long now = read_persistent_clock(); 274 unsigned long now = read_persistent_clock();
275 275
276 clocksource_resume();
277
276 write_seqlock_irqsave(&xtime_lock, flags); 278 write_seqlock_irqsave(&xtime_lock, flags);
277 279
278 if (now && (now > timekeeping_suspend_time)) { 280 if (now && (now > timekeeping_suspend_time)) {
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 868f1bceb07f..321693724ad7 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -117,21 +117,6 @@ static struct entry entries[MAX_ENTRIES];
117 117
118static atomic_t overflow_count; 118static atomic_t overflow_count;
119 119
120static void reset_entries(void)
121{
122 nr_entries = 0;
123 memset(entries, 0, sizeof(entries));
124 atomic_set(&overflow_count, 0);
125}
126
127static struct entry *alloc_entry(void)
128{
129 if (nr_entries >= MAX_ENTRIES)
130 return NULL;
131
132 return entries + nr_entries++;
133}
134
135/* 120/*
136 * The entries are in a hash-table, for fast lookup: 121 * The entries are in a hash-table, for fast lookup:
137 */ 122 */
@@ -149,6 +134,22 @@ static struct entry *alloc_entry(void)
149 134
150static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; 135static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
151 136
137static void reset_entries(void)
138{
139 nr_entries = 0;
140 memset(entries, 0, sizeof(entries));
141 memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
142 atomic_set(&overflow_count, 0);
143}
144
145static struct entry *alloc_entry(void)
146{
147 if (nr_entries >= MAX_ENTRIES)
148 return NULL;
149
150 return entries + nr_entries++;
151}
152
152static int match_entries(struct entry *entry1, struct entry *entry2) 153static int match_entries(struct entry *entry1, struct entry *entry2)
153{ 154{
154 return entry1->timer == entry2->timer && 155 return entry1->timer == entry2->timer &&
@@ -202,12 +203,15 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
202 if (curr) { 203 if (curr) {
203 *curr = *entry; 204 *curr = *entry;
204 curr->count = 0; 205 curr->count = 0;
206 curr->next = NULL;
205 memcpy(curr->comm, comm, TASK_COMM_LEN); 207 memcpy(curr->comm, comm, TASK_COMM_LEN);
208
209 smp_mb(); /* Ensure that curr is initialized before insert */
210
206 if (prev) 211 if (prev)
207 prev->next = curr; 212 prev->next = curr;
208 else 213 else
209 *head = curr; 214 *head = curr;
210 curr->next = NULL;
211 } 215 }
212 out_unlock: 216 out_unlock:
213 spin_unlock(&table_lock); 217 spin_unlock(&table_lock);
@@ -232,10 +236,15 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
232 /* 236 /*
233 * It doesnt matter which lock we take: 237 * It doesnt matter which lock we take:
234 */ 238 */
235 spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id()); 239 spinlock_t *lock;
236 struct entry *entry, input; 240 struct entry *entry, input;
237 unsigned long flags; 241 unsigned long flags;
238 242
243 if (likely(!active))
244 return;
245
246 lock = &per_cpu(lookup_lock, raw_smp_processor_id());
247
239 input.timer = timer; 248 input.timer = timer;
240 input.start_func = startf; 249 input.start_func = startf;
241 input.expire_func = timerf; 250 input.expire_func = timerf;
@@ -360,6 +369,7 @@ static ssize_t tstats_write(struct file *file, const char __user *buf,
360 if (!active) { 369 if (!active) {
361 reset_entries(); 370 reset_entries();
362 time_start = ktime_get(); 371 time_start = ktime_get();
372 smp_mb();
363 active = 1; 373 active = 1;
364 } 374 }
365 break; 375 break;
diff --git a/kernel/timer.c b/kernel/timer.c
index a6c580ac084b..1a69705c2fb9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -666,7 +666,7 @@ static inline void __run_timers(tvec_base_t *base)
666static unsigned long __next_timer_interrupt(tvec_base_t *base) 666static unsigned long __next_timer_interrupt(tvec_base_t *base)
667{ 667{
668 unsigned long timer_jiffies = base->timer_jiffies; 668 unsigned long timer_jiffies = base->timer_jiffies;
669 unsigned long expires = timer_jiffies + (LONG_MAX >> 1); 669 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
670 int index, slot, array, found = 0; 670 int index, slot, array, found = 0;
671 struct timer_list *nte; 671 struct timer_list *nte;
672 tvec_t *varray[4]; 672 tvec_t *varray[4];
@@ -752,6 +752,14 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
752 752
753 tsdelta = ktime_to_timespec(hr_delta); 753 tsdelta = ktime_to_timespec(hr_delta);
754 delta = timespec_to_jiffies(&tsdelta); 754 delta = timespec_to_jiffies(&tsdelta);
755
756 /*
757 * Limit the delta to the max value, which is checked in
758 * tick_nohz_stop_sched_tick():
759 */
760 if (delta > NEXT_TIMER_MAX_DELTA)
761 delta = NEXT_TIMER_MAX_DELTA;
762
755 /* 763 /*
756 * Take rounding errors in to account and make sure, that it 764 * Take rounding errors in to account and make sure, that it
757 * expires in the next tick. Otherwise we go into an endless 765 * expires in the next tick. Otherwise we go into an endless
@@ -1499,8 +1507,6 @@ unregister_time_interpolator(struct time_interpolator *ti)
1499 prev = &curr->next; 1507 prev = &curr->next;
1500 } 1508 }
1501 1509
1502 clocksource_resume();
1503
1504 write_seqlock_irqsave(&xtime_lock, flags); 1510 write_seqlock_irqsave(&xtime_lock, flags);
1505 if (ti == time_interpolator) { 1511 if (ti == time_interpolator) {
1506 /* we lost the best time-interpolator: */ 1512 /* we lost the best time-interpolator: */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fb56fedd5c02..3bebf73be976 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -47,7 +47,6 @@ struct cpu_workqueue_struct {
47 47
48 struct workqueue_struct *wq; 48 struct workqueue_struct *wq;
49 struct task_struct *thread; 49 struct task_struct *thread;
50 int should_stop;
51 50
52 int run_depth; /* Detect run_workqueue() recursion depth */ 51 int run_depth; /* Detect run_workqueue() recursion depth */
53} ____cacheline_aligned; 52} ____cacheline_aligned;
@@ -71,7 +70,13 @@ static LIST_HEAD(workqueues);
71 70
72static int singlethread_cpu __read_mostly; 71static int singlethread_cpu __read_mostly;
73static cpumask_t cpu_singlethread_map __read_mostly; 72static cpumask_t cpu_singlethread_map __read_mostly;
74/* optimization, we could use cpu_possible_map */ 73/*
74 * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
75 * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
76 * which comes in between can't use for_each_online_cpu(). We could
77 * use cpu_possible_map, the cpumask below is more a documentation
78 * than optimization.
79 */
75static cpumask_t cpu_populated_map __read_mostly; 80static cpumask_t cpu_populated_map __read_mostly;
76 81
77/* If it's single threaded, it isn't in the list of workqueues. */ 82/* If it's single threaded, it isn't in the list of workqueues. */
@@ -272,24 +277,6 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
272 spin_unlock_irq(&cwq->lock); 277 spin_unlock_irq(&cwq->lock);
273} 278}
274 279
275/*
276 * NOTE: the caller must not touch *cwq if this func returns true
277 */
278static int cwq_should_stop(struct cpu_workqueue_struct *cwq)
279{
280 int should_stop = cwq->should_stop;
281
282 if (unlikely(should_stop)) {
283 spin_lock_irq(&cwq->lock);
284 should_stop = cwq->should_stop && list_empty(&cwq->worklist);
285 if (should_stop)
286 cwq->thread = NULL;
287 spin_unlock_irq(&cwq->lock);
288 }
289
290 return should_stop;
291}
292
293static int worker_thread(void *__cwq) 280static int worker_thread(void *__cwq)
294{ 281{
295 struct cpu_workqueue_struct *cwq = __cwq; 282 struct cpu_workqueue_struct *cwq = __cwq;
@@ -302,14 +289,15 @@ static int worker_thread(void *__cwq)
302 289
303 for (;;) { 290 for (;;) {
304 prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE); 291 prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
305 if (!freezing(current) && !cwq->should_stop 292 if (!freezing(current) &&
306 && list_empty(&cwq->worklist)) 293 !kthread_should_stop() &&
294 list_empty(&cwq->worklist))
307 schedule(); 295 schedule();
308 finish_wait(&cwq->more_work, &wait); 296 finish_wait(&cwq->more_work, &wait);
309 297
310 try_to_freeze(); 298 try_to_freeze();
311 299
312 if (cwq_should_stop(cwq)) 300 if (kthread_should_stop())
313 break; 301 break;
314 302
315 run_workqueue(cwq); 303 run_workqueue(cwq);
@@ -340,18 +328,21 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
340 insert_work(cwq, &barr->work, tail); 328 insert_work(cwq, &barr->work, tail);
341} 329}
342 330
343static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) 331static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
344{ 332{
333 int active;
334
345 if (cwq->thread == current) { 335 if (cwq->thread == current) {
346 /* 336 /*
347 * Probably keventd trying to flush its own queue. So simply run 337 * Probably keventd trying to flush its own queue. So simply run
348 * it by hand rather than deadlocking. 338 * it by hand rather than deadlocking.
349 */ 339 */
350 run_workqueue(cwq); 340 run_workqueue(cwq);
341 active = 1;
351 } else { 342 } else {
352 struct wq_barrier barr; 343 struct wq_barrier barr;
353 int active = 0;
354 344
345 active = 0;
355 spin_lock_irq(&cwq->lock); 346 spin_lock_irq(&cwq->lock);
356 if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) { 347 if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
357 insert_wq_barrier(cwq, &barr, 1); 348 insert_wq_barrier(cwq, &barr, 1);
@@ -362,6 +353,8 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
362 if (active) 353 if (active)
363 wait_for_completion(&barr.done); 354 wait_for_completion(&barr.done);
364 } 355 }
356
357 return active;
365} 358}
366 359
367/** 360/**
@@ -674,7 +667,6 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
674 return PTR_ERR(p); 667 return PTR_ERR(p);
675 668
676 cwq->thread = p; 669 cwq->thread = p;
677 cwq->should_stop = 0;
678 670
679 return 0; 671 return 0;
680} 672}
@@ -740,29 +732,27 @@ EXPORT_SYMBOL_GPL(__create_workqueue);
740 732
741static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) 733static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
742{ 734{
743 struct wq_barrier barr; 735 /*
744 int alive = 0; 736 * Our caller is either destroy_workqueue() or CPU_DEAD,
745 737 * workqueue_mutex protects cwq->thread
746 spin_lock_irq(&cwq->lock); 738 */
747 if (cwq->thread != NULL) { 739 if (cwq->thread == NULL)
748 insert_wq_barrier(cwq, &barr, 1); 740 return;
749 cwq->should_stop = 1;
750 alive = 1;
751 }
752 spin_unlock_irq(&cwq->lock);
753 741
754 if (alive) { 742 /*
755 wait_for_completion(&barr.done); 743 * If the caller is CPU_DEAD the single flush_cpu_workqueue()
744 * is not enough, a concurrent flush_workqueue() can insert a
745 * barrier after us.
746 * When ->worklist becomes empty it is safe to exit because no
747 * more work_structs can be queued on this cwq: flush_workqueue
748 * checks list_empty(), and a "normal" queue_work() can't use
749 * a dead CPU.
750 */
751 while (flush_cpu_workqueue(cwq))
752 ;
756 753
757 while (unlikely(cwq->thread != NULL)) 754 kthread_stop(cwq->thread);
758 cpu_relax(); 755 cwq->thread = NULL;
759 /*
760 * Wait until cwq->thread unlocks cwq->lock,
761 * it won't touch *cwq after that.
762 */
763 smp_rmb();
764 spin_unlock_wait(&cwq->lock);
765 }
766} 756}
767 757
768/** 758/**