diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditfilter.c | 2 | ||||
-rw-r--r-- | kernel/exit.c | 31 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/futex.c | 269 | ||||
-rw-r--r-- | kernel/futex_compat.c | 9 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 46 | ||||
-rw-r--r-- | kernel/kallsyms.c | 3 | ||||
-rw-r--r-- | kernel/kthread.c | 7 | ||||
-rw-r--r-- | kernel/power/disk.c | 3 | ||||
-rw-r--r-- | kernel/power/main.c | 19 | ||||
-rw-r--r-- | kernel/power/process.c | 57 | ||||
-rw-r--r-- | kernel/power/swap.c | 2 | ||||
-rw-r--r-- | kernel/profile.c | 1 | ||||
-rw-r--r-- | kernel/rtmutex.c | 24 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | kernel/signal.c | 38 | ||||
-rw-r--r-- | kernel/sysctl.c | 2 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 10 | ||||
-rw-r--r-- | kernel/time/ntp.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 17 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 28 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 | ||||
-rw-r--r-- | kernel/time/timer_stats.c | 44 | ||||
-rw-r--r-- | kernel/timer.c | 12 | ||||
-rw-r--r-- | kernel/workqueue.c | 84 |
25 files changed, 457 insertions, 268 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6c61263ff96d..74cc0fc6bb81 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -311,6 +311,7 @@ int audit_match_class(int class, unsigned syscall) | |||
311 | return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall); | 311 | return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall); |
312 | } | 312 | } |
313 | 313 | ||
314 | #ifdef CONFIG_AUDITSYSCALL | ||
314 | static inline int audit_match_class_bits(int class, u32 *mask) | 315 | static inline int audit_match_class_bits(int class, u32 *mask) |
315 | { | 316 | { |
316 | int i; | 317 | int i; |
@@ -347,6 +348,7 @@ static int audit_match_signal(struct audit_entry *entry) | |||
347 | return 1; | 348 | return 1; |
348 | } | 349 | } |
349 | } | 350 | } |
351 | #endif | ||
350 | 352 | ||
351 | /* Common user-space to kernel rule translation. */ | 353 | /* Common user-space to kernel rule translation. */ |
352 | static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) | 354 | static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) |
diff --git a/kernel/exit.c b/kernel/exit.c index c6d14b8008dd..5c8ecbaa19a5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -762,11 +762,8 @@ static void exit_notify(struct task_struct *tsk) | |||
762 | read_lock(&tasklist_lock); | 762 | read_lock(&tasklist_lock); |
763 | spin_lock_irq(&tsk->sighand->siglock); | 763 | spin_lock_irq(&tsk->sighand->siglock); |
764 | for (t = next_thread(tsk); t != tsk; t = next_thread(t)) | 764 | for (t = next_thread(tsk); t != tsk; t = next_thread(t)) |
765 | if (!signal_pending(t) && !(t->flags & PF_EXITING)) { | 765 | if (!signal_pending(t) && !(t->flags & PF_EXITING)) |
766 | recalc_sigpending_tsk(t); | 766 | recalc_sigpending_and_wake(t); |
767 | if (signal_pending(t)) | ||
768 | signal_wake_up(t, 0); | ||
769 | } | ||
770 | spin_unlock_irq(&tsk->sighand->siglock); | 767 | spin_unlock_irq(&tsk->sighand->siglock); |
771 | read_unlock(&tasklist_lock); | 768 | read_unlock(&tasklist_lock); |
772 | } | 769 | } |
@@ -895,13 +892,29 @@ fastcall NORET_TYPE void do_exit(long code) | |||
895 | if (unlikely(tsk->flags & PF_EXITING)) { | 892 | if (unlikely(tsk->flags & PF_EXITING)) { |
896 | printk(KERN_ALERT | 893 | printk(KERN_ALERT |
897 | "Fixing recursive fault but reboot is needed!\n"); | 894 | "Fixing recursive fault but reboot is needed!\n"); |
895 | /* | ||
896 | * We can do this unlocked here. The futex code uses | ||
897 | * this flag just to verify whether the pi state | ||
898 | * cleanup has been done or not. In the worst case it | ||
899 | * loops once more. We pretend that the cleanup was | ||
900 | * done as there is no way to return. Either the | ||
901 | * OWNER_DIED bit is set by now or we push the blocked | ||
902 | * task into the wait for ever nirwana as well. | ||
903 | */ | ||
904 | tsk->flags |= PF_EXITPIDONE; | ||
898 | if (tsk->io_context) | 905 | if (tsk->io_context) |
899 | exit_io_context(); | 906 | exit_io_context(); |
900 | set_current_state(TASK_UNINTERRUPTIBLE); | 907 | set_current_state(TASK_UNINTERRUPTIBLE); |
901 | schedule(); | 908 | schedule(); |
902 | } | 909 | } |
903 | 910 | ||
911 | /* | ||
912 | * tsk->flags are checked in the futex code to protect against | ||
913 | * an exiting task cleaning up the robust pi futexes. | ||
914 | */ | ||
915 | spin_lock_irq(&tsk->pi_lock); | ||
904 | tsk->flags |= PF_EXITING; | 916 | tsk->flags |= PF_EXITING; |
917 | spin_unlock_irq(&tsk->pi_lock); | ||
905 | 918 | ||
906 | if (unlikely(in_atomic())) | 919 | if (unlikely(in_atomic())) |
907 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", | 920 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", |
@@ -915,7 +928,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
915 | } | 928 | } |
916 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 929 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
917 | if (group_dead) { | 930 | if (group_dead) { |
918 | hrtimer_cancel(&tsk->signal->real_timer); | 931 | hrtimer_cancel(&tsk->signal->real_timer); |
919 | exit_itimers(tsk->signal); | 932 | exit_itimers(tsk->signal); |
920 | } | 933 | } |
921 | acct_collect(code, group_dead); | 934 | acct_collect(code, group_dead); |
@@ -968,6 +981,12 @@ fastcall NORET_TYPE void do_exit(long code) | |||
968 | * Make sure we are holding no locks: | 981 | * Make sure we are holding no locks: |
969 | */ | 982 | */ |
970 | debug_check_no_locks_held(tsk); | 983 | debug_check_no_locks_held(tsk); |
984 | /* | ||
985 | * We can do this unlocked here. The futex code uses this flag | ||
986 | * just to verify whether the pi state cleanup has been done | ||
987 | * or not. In the worst case it loops once more. | ||
988 | */ | ||
989 | tsk->flags |= PF_EXITPIDONE; | ||
971 | 990 | ||
972 | if (tsk->io_context) | 991 | if (tsk->io_context) |
973 | exit_io_context(); | 992 | exit_io_context(); |
diff --git a/kernel/fork.c b/kernel/fork.c index 49530e40ea8b..73ad5cda1bcd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/acct.h> | 45 | #include <linux/acct.h> |
46 | #include <linux/tsacct_kern.h> | 46 | #include <linux/tsacct_kern.h> |
47 | #include <linux/cn_proc.h> | 47 | #include <linux/cn_proc.h> |
48 | #include <linux/freezer.h> | ||
48 | #include <linux/delayacct.h> | 49 | #include <linux/delayacct.h> |
49 | #include <linux/taskstats_kern.h> | 50 | #include <linux/taskstats_kern.h> |
50 | #include <linux/random.h> | 51 | #include <linux/random.h> |
@@ -1405,7 +1406,9 @@ long do_fork(unsigned long clone_flags, | |||
1405 | } | 1406 | } |
1406 | 1407 | ||
1407 | if (clone_flags & CLONE_VFORK) { | 1408 | if (clone_flags & CLONE_VFORK) { |
1409 | freezer_do_not_count(); | ||
1408 | wait_for_completion(&vfork); | 1410 | wait_for_completion(&vfork); |
1411 | freezer_count(); | ||
1409 | if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { | 1412 | if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) { |
1410 | current->ptrace_message = nr; | 1413 | current->ptrace_message = nr; |
1411 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); | 1414 | ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); |
@@ -1427,10 +1430,8 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, | |||
1427 | { | 1430 | { |
1428 | struct sighand_struct *sighand = data; | 1431 | struct sighand_struct *sighand = data; |
1429 | 1432 | ||
1430 | if (flags & SLAB_CTOR_CONSTRUCTOR) { | 1433 | spin_lock_init(&sighand->siglock); |
1431 | spin_lock_init(&sighand->siglock); | 1434 | INIT_LIST_HEAD(&sighand->signalfd_list); |
1432 | INIT_LIST_HEAD(&sighand->signalfd_list); | ||
1433 | } | ||
1434 | } | 1435 | } |
1435 | 1436 | ||
1436 | void __init proc_caches_init(void) | 1437 | void __init proc_caches_init(void) |
diff --git a/kernel/futex.c b/kernel/futex.c index b7ce15c67e32..3b7f7713d9a4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -430,10 +430,6 @@ static struct task_struct * futex_find_get_task(pid_t pid) | |||
430 | p = NULL; | 430 | p = NULL; |
431 | goto out_unlock; | 431 | goto out_unlock; |
432 | } | 432 | } |
433 | if (p->exit_state != 0) { | ||
434 | p = NULL; | ||
435 | goto out_unlock; | ||
436 | } | ||
437 | get_task_struct(p); | 433 | get_task_struct(p); |
438 | out_unlock: | 434 | out_unlock: |
439 | rcu_read_unlock(); | 435 | rcu_read_unlock(); |
@@ -502,7 +498,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
502 | struct futex_q *this, *next; | 498 | struct futex_q *this, *next; |
503 | struct plist_head *head; | 499 | struct plist_head *head; |
504 | struct task_struct *p; | 500 | struct task_struct *p; |
505 | pid_t pid; | 501 | pid_t pid = uval & FUTEX_TID_MASK; |
506 | 502 | ||
507 | head = &hb->chain; | 503 | head = &hb->chain; |
508 | 504 | ||
@@ -520,6 +516,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
520 | return -EINVAL; | 516 | return -EINVAL; |
521 | 517 | ||
522 | WARN_ON(!atomic_read(&pi_state->refcount)); | 518 | WARN_ON(!atomic_read(&pi_state->refcount)); |
519 | WARN_ON(pid && pi_state->owner && | ||
520 | pi_state->owner->pid != pid); | ||
523 | 521 | ||
524 | atomic_inc(&pi_state->refcount); | 522 | atomic_inc(&pi_state->refcount); |
525 | *ps = pi_state; | 523 | *ps = pi_state; |
@@ -530,15 +528,33 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
530 | 528 | ||
531 | /* | 529 | /* |
532 | * We are the first waiter - try to look up the real owner and attach | 530 | * We are the first waiter - try to look up the real owner and attach |
533 | * the new pi_state to it, but bail out when the owner died bit is set | 531 | * the new pi_state to it, but bail out when TID = 0 |
534 | * and TID = 0: | ||
535 | */ | 532 | */ |
536 | pid = uval & FUTEX_TID_MASK; | 533 | if (!pid) |
537 | if (!pid && (uval & FUTEX_OWNER_DIED)) | ||
538 | return -ESRCH; | 534 | return -ESRCH; |
539 | p = futex_find_get_task(pid); | 535 | p = futex_find_get_task(pid); |
540 | if (!p) | 536 | if (IS_ERR(p)) |
541 | return -ESRCH; | 537 | return PTR_ERR(p); |
538 | |||
539 | /* | ||
540 | * We need to look at the task state flags to figure out, | ||
541 | * whether the task is exiting. To protect against the do_exit | ||
542 | * change of the task flags, we do this protected by | ||
543 | * p->pi_lock: | ||
544 | */ | ||
545 | spin_lock_irq(&p->pi_lock); | ||
546 | if (unlikely(p->flags & PF_EXITING)) { | ||
547 | /* | ||
548 | * The task is on the way out. When PF_EXITPIDONE is | ||
549 | * set, we know that the task has finished the | ||
550 | * cleanup: | ||
551 | */ | ||
552 | int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; | ||
553 | |||
554 | spin_unlock_irq(&p->pi_lock); | ||
555 | put_task_struct(p); | ||
556 | return ret; | ||
557 | } | ||
542 | 558 | ||
543 | pi_state = alloc_pi_state(); | 559 | pi_state = alloc_pi_state(); |
544 | 560 | ||
@@ -551,7 +567,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
551 | /* Store the key for possible exit cleanups: */ | 567 | /* Store the key for possible exit cleanups: */ |
552 | pi_state->key = *key; | 568 | pi_state->key = *key; |
553 | 569 | ||
554 | spin_lock_irq(&p->pi_lock); | ||
555 | WARN_ON(!list_empty(&pi_state->list)); | 570 | WARN_ON(!list_empty(&pi_state->list)); |
556 | list_add(&pi_state->list, &p->pi_state_list); | 571 | list_add(&pi_state->list, &p->pi_state_list); |
557 | pi_state->owner = p; | 572 | pi_state->owner = p; |
@@ -618,6 +633,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
618 | * preserve the owner died bit.) | 633 | * preserve the owner died bit.) |
619 | */ | 634 | */ |
620 | if (!(uval & FUTEX_OWNER_DIED)) { | 635 | if (!(uval & FUTEX_OWNER_DIED)) { |
636 | int ret = 0; | ||
637 | |||
621 | newval = FUTEX_WAITERS | new_owner->pid; | 638 | newval = FUTEX_WAITERS | new_owner->pid; |
622 | /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */ | 639 | /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */ |
623 | newval |= (uval & FUTEX_WAITER_REQUEUED); | 640 | newval |= (uval & FUTEX_WAITER_REQUEUED); |
@@ -625,10 +642,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
625 | pagefault_disable(); | 642 | pagefault_disable(); |
626 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 643 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
627 | pagefault_enable(); | 644 | pagefault_enable(); |
645 | |||
628 | if (curval == -EFAULT) | 646 | if (curval == -EFAULT) |
629 | return -EFAULT; | 647 | ret = -EFAULT; |
630 | if (curval != uval) | 648 | if (curval != uval) |
631 | return -EINVAL; | 649 | ret = -EINVAL; |
650 | if (ret) { | ||
651 | spin_unlock(&pi_state->pi_mutex.wait_lock); | ||
652 | return ret; | ||
653 | } | ||
632 | } | 654 | } |
633 | 655 | ||
634 | spin_lock_irq(&pi_state->owner->pi_lock); | 656 | spin_lock_irq(&pi_state->owner->pi_lock); |
@@ -1174,7 +1196,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
1174 | #ifdef CONFIG_DEBUG_PI_LIST | 1196 | #ifdef CONFIG_DEBUG_PI_LIST |
1175 | this->list.plist.lock = &hb2->lock; | 1197 | this->list.plist.lock = &hb2->lock; |
1176 | #endif | 1198 | #endif |
1177 | } | 1199 | } |
1178 | this->key = key2; | 1200 | this->key = key2; |
1179 | get_futex_key_refs(&key2); | 1201 | get_futex_key_refs(&key2); |
1180 | drop_count++; | 1202 | drop_count++; |
@@ -1326,12 +1348,10 @@ static void unqueue_me_pi(struct futex_q *q) | |||
1326 | /* | 1348 | /* |
1327 | * Fixup the pi_state owner with current. | 1349 | * Fixup the pi_state owner with current. |
1328 | * | 1350 | * |
1329 | * The cur->mm semaphore must be held, it is released at return of this | 1351 | * Must be called with hash bucket lock held and mm->sem held for non |
1330 | * function. | 1352 | * private futexes. |
1331 | */ | 1353 | */ |
1332 | static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared, | 1354 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
1333 | struct futex_q *q, | ||
1334 | struct futex_hash_bucket *hb, | ||
1335 | struct task_struct *curr) | 1355 | struct task_struct *curr) |
1336 | { | 1356 | { |
1337 | u32 newtid = curr->pid | FUTEX_WAITERS; | 1357 | u32 newtid = curr->pid | FUTEX_WAITERS; |
@@ -1355,23 +1375,24 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1355 | list_add(&pi_state->list, &curr->pi_state_list); | 1375 | list_add(&pi_state->list, &curr->pi_state_list); |
1356 | spin_unlock_irq(&curr->pi_lock); | 1376 | spin_unlock_irq(&curr->pi_lock); |
1357 | 1377 | ||
1358 | /* Unqueue and drop the lock */ | ||
1359 | unqueue_me_pi(q); | ||
1360 | if (fshared) | ||
1361 | up_read(fshared); | ||
1362 | /* | 1378 | /* |
1363 | * We own it, so we have to replace the pending owner | 1379 | * We own it, so we have to replace the pending owner |
1364 | * TID. This must be atomic as we have preserve the | 1380 | * TID. This must be atomic as we have preserve the |
1365 | * owner died bit here. | 1381 | * owner died bit here. |
1366 | */ | 1382 | */ |
1367 | ret = get_user(uval, uaddr); | 1383 | ret = get_futex_value_locked(&uval, uaddr); |
1384 | |||
1368 | while (!ret) { | 1385 | while (!ret) { |
1369 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | 1386 | newval = (uval & FUTEX_OWNER_DIED) | newtid; |
1370 | newval |= (uval & FUTEX_WAITER_REQUEUED); | 1387 | newval |= (uval & FUTEX_WAITER_REQUEUED); |
1388 | |||
1389 | pagefault_disable(); | ||
1371 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | 1390 | curval = futex_atomic_cmpxchg_inatomic(uaddr, |
1372 | uval, newval); | 1391 | uval, newval); |
1392 | pagefault_enable(); | ||
1393 | |||
1373 | if (curval == -EFAULT) | 1394 | if (curval == -EFAULT) |
1374 | ret = -EFAULT; | 1395 | ret = -EFAULT; |
1375 | if (curval == uval) | 1396 | if (curval == uval) |
1376 | break; | 1397 | break; |
1377 | uval = curval; | 1398 | uval = curval; |
@@ -1553,10 +1574,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1553 | */ | 1574 | */ |
1554 | uaddr = q.pi_state->key.uaddr; | 1575 | uaddr = q.pi_state->key.uaddr; |
1555 | 1576 | ||
1556 | /* mmap_sem and hash_bucket lock are unlocked at | 1577 | ret = fixup_pi_state_owner(uaddr, &q, curr); |
1557 | return of this function */ | ||
1558 | ret = fixup_pi_state_owner(uaddr, fshared, | ||
1559 | &q, hb, curr); | ||
1560 | } else { | 1578 | } else { |
1561 | /* | 1579 | /* |
1562 | * Catch the rare case, where the lock was released | 1580 | * Catch the rare case, where the lock was released |
@@ -1567,12 +1585,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1567 | if (rt_mutex_trylock(&q.pi_state->pi_mutex)) | 1585 | if (rt_mutex_trylock(&q.pi_state->pi_mutex)) |
1568 | ret = 0; | 1586 | ret = 0; |
1569 | } | 1587 | } |
1570 | /* Unqueue and drop the lock */ | ||
1571 | unqueue_me_pi(&q); | ||
1572 | if (fshared) | ||
1573 | up_read(fshared); | ||
1574 | } | 1588 | } |
1575 | 1589 | ||
1590 | /* Unqueue and drop the lock */ | ||
1591 | unqueue_me_pi(&q); | ||
1592 | if (fshared) | ||
1593 | up_read(fshared); | ||
1594 | |||
1576 | debug_rt_mutex_free_waiter(&q.waiter); | 1595 | debug_rt_mutex_free_waiter(&q.waiter); |
1577 | 1596 | ||
1578 | return ret; | 1597 | return ret; |
@@ -1688,7 +1707,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1688 | struct futex_hash_bucket *hb; | 1707 | struct futex_hash_bucket *hb; |
1689 | u32 uval, newval, curval; | 1708 | u32 uval, newval, curval; |
1690 | struct futex_q q; | 1709 | struct futex_q q; |
1691 | int ret, lock_held, attempt = 0; | 1710 | int ret, lock_taken, ownerdied = 0, attempt = 0; |
1692 | 1711 | ||
1693 | if (refill_pi_state_cache()) | 1712 | if (refill_pi_state_cache()) |
1694 | return -ENOMEM; | 1713 | return -ENOMEM; |
@@ -1709,10 +1728,11 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1709 | if (unlikely(ret != 0)) | 1728 | if (unlikely(ret != 0)) |
1710 | goto out_release_sem; | 1729 | goto out_release_sem; |
1711 | 1730 | ||
1731 | retry_unlocked: | ||
1712 | hb = queue_lock(&q, -1, NULL); | 1732 | hb = queue_lock(&q, -1, NULL); |
1713 | 1733 | ||
1714 | retry_locked: | 1734 | retry_locked: |
1715 | lock_held = 0; | 1735 | ret = lock_taken = 0; |
1716 | 1736 | ||
1717 | /* | 1737 | /* |
1718 | * To avoid races, we attempt to take the lock here again | 1738 | * To avoid races, we attempt to take the lock here again |
@@ -1728,43 +1748,44 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1728 | if (unlikely(curval == -EFAULT)) | 1748 | if (unlikely(curval == -EFAULT)) |
1729 | goto uaddr_faulted; | 1749 | goto uaddr_faulted; |
1730 | 1750 | ||
1731 | /* We own the lock already */ | 1751 | /* |
1752 | * Detect deadlocks. In case of REQUEUE_PI this is a valid | ||
1753 | * situation and we return success to user space. | ||
1754 | */ | ||
1732 | if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { | 1755 | if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { |
1733 | if (!detect && 0) | ||
1734 | force_sig(SIGKILL, current); | ||
1735 | /* | ||
1736 | * Normally, this check is done in user space. | ||
1737 | * In case of requeue, the owner may attempt to lock this futex, | ||
1738 | * even if the ownership has already been given by the previous | ||
1739 | * waker. | ||
1740 | * In the usual case, this is a case of deadlock, but not in case | ||
1741 | * of REQUEUE_PI. | ||
1742 | */ | ||
1743 | if (!(curval & FUTEX_WAITER_REQUEUED)) | 1756 | if (!(curval & FUTEX_WAITER_REQUEUED)) |
1744 | ret = -EDEADLK; | 1757 | ret = -EDEADLK; |
1745 | goto out_unlock_release_sem; | 1758 | goto out_unlock_release_sem; |
1746 | } | 1759 | } |
1747 | 1760 | ||
1748 | /* | 1761 | /* |
1749 | * Surprise - we got the lock. Just return | 1762 | * Surprise - we got the lock. Just return to userspace: |
1750 | * to userspace: | ||
1751 | */ | 1763 | */ |
1752 | if (unlikely(!curval)) | 1764 | if (unlikely(!curval)) |
1753 | goto out_unlock_release_sem; | 1765 | goto out_unlock_release_sem; |
1754 | 1766 | ||
1755 | uval = curval; | 1767 | uval = curval; |
1768 | |||
1756 | /* | 1769 | /* |
1757 | * In case of a requeue, check if there already is an owner | 1770 | * Set the WAITERS flag, so the owner will know it has someone |
1758 | * If not, just take the futex. | 1771 | * to wake at next unlock |
1759 | */ | 1772 | */ |
1760 | if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) { | 1773 | newval = curval | FUTEX_WAITERS; |
1761 | /* set current as futex owner */ | 1774 | |
1762 | newval = curval | current->pid; | 1775 | /* |
1763 | lock_held = 1; | 1776 | * There are two cases, where a futex might have no owner (the |
1764 | } else | 1777 | * owner TID is 0): OWNER_DIED or REQUEUE. We take over the |
1765 | /* Set the WAITERS flag, so the owner will know it has someone | 1778 | * futex in this case. We also do an unconditional take over, |
1766 | to wake at next unlock */ | 1779 | * when the owner of the futex died. |
1767 | newval = curval | FUTEX_WAITERS; | 1780 | * |
1781 | * This is safe as we are protected by the hash bucket lock ! | ||
1782 | */ | ||
1783 | if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { | ||
1784 | /* Keep the OWNER_DIED and REQUEUE bits */ | ||
1785 | newval = (curval & ~FUTEX_TID_MASK) | current->pid; | ||
1786 | ownerdied = 0; | ||
1787 | lock_taken = 1; | ||
1788 | } | ||
1768 | 1789 | ||
1769 | pagefault_disable(); | 1790 | pagefault_disable(); |
1770 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 1791 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); |
@@ -1775,8 +1796,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1775 | if (unlikely(curval != uval)) | 1796 | if (unlikely(curval != uval)) |
1776 | goto retry_locked; | 1797 | goto retry_locked; |
1777 | 1798 | ||
1778 | if (lock_held) { | 1799 | /* |
1779 | set_pi_futex_owner(hb, &q.key, curr); | 1800 | * We took the lock due to requeue or owner died take over. |
1801 | */ | ||
1802 | if (unlikely(lock_taken)) { | ||
1803 | /* For requeue we need to fixup the pi_futex */ | ||
1804 | if (curval & FUTEX_WAITER_REQUEUED) | ||
1805 | set_pi_futex_owner(hb, &q.key, curr); | ||
1780 | goto out_unlock_release_sem; | 1806 | goto out_unlock_release_sem; |
1781 | } | 1807 | } |
1782 | 1808 | ||
@@ -1787,34 +1813,40 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1787 | ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); | 1813 | ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); |
1788 | 1814 | ||
1789 | if (unlikely(ret)) { | 1815 | if (unlikely(ret)) { |
1790 | /* | 1816 | switch (ret) { |
1791 | * There were no waiters and the owner task lookup | ||
1792 | * failed. When the OWNER_DIED bit is set, then we | ||
1793 | * know that this is a robust futex and we actually | ||
1794 | * take the lock. This is safe as we are protected by | ||
1795 | * the hash bucket lock. We also set the waiters bit | ||
1796 | * unconditionally here, to simplify glibc handling of | ||
1797 | * multiple tasks racing to acquire the lock and | ||
1798 | * cleanup the problems which were left by the dead | ||
1799 | * owner. | ||
1800 | */ | ||
1801 | if (curval & FUTEX_OWNER_DIED) { | ||
1802 | uval = newval; | ||
1803 | newval = current->pid | | ||
1804 | FUTEX_OWNER_DIED | FUTEX_WAITERS; | ||
1805 | 1817 | ||
1806 | pagefault_disable(); | 1818 | case -EAGAIN: |
1807 | curval = futex_atomic_cmpxchg_inatomic(uaddr, | 1819 | /* |
1808 | uval, newval); | 1820 | * Task is exiting and we just wait for the |
1809 | pagefault_enable(); | 1821 | * exit to complete. |
1822 | */ | ||
1823 | queue_unlock(&q, hb); | ||
1824 | if (fshared) | ||
1825 | up_read(fshared); | ||
1826 | cond_resched(); | ||
1827 | goto retry; | ||
1810 | 1828 | ||
1811 | if (unlikely(curval == -EFAULT)) | 1829 | case -ESRCH: |
1830 | /* | ||
1831 | * No owner found for this futex. Check if the | ||
1832 | * OWNER_DIED bit is set to figure out whether | ||
1833 | * this is a robust futex or not. | ||
1834 | */ | ||
1835 | if (get_futex_value_locked(&curval, uaddr)) | ||
1812 | goto uaddr_faulted; | 1836 | goto uaddr_faulted; |
1813 | if (unlikely(curval != uval)) | 1837 | |
1838 | /* | ||
1839 | * We simply start over in case of a robust | ||
1840 | * futex. The code above will take the futex | ||
1841 | * and return happy. | ||
1842 | */ | ||
1843 | if (curval & FUTEX_OWNER_DIED) { | ||
1844 | ownerdied = 1; | ||
1814 | goto retry_locked; | 1845 | goto retry_locked; |
1815 | ret = 0; | 1846 | } |
1847 | default: | ||
1848 | goto out_unlock_release_sem; | ||
1816 | } | 1849 | } |
1817 | goto out_unlock_release_sem; | ||
1818 | } | 1850 | } |
1819 | 1851 | ||
1820 | /* | 1852 | /* |
@@ -1845,31 +1877,42 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1845 | down_read(fshared); | 1877 | down_read(fshared); |
1846 | spin_lock(q.lock_ptr); | 1878 | spin_lock(q.lock_ptr); |
1847 | 1879 | ||
1848 | /* | 1880 | if (!ret) { |
1849 | * Got the lock. We might not be the anticipated owner if we | 1881 | /* |
1850 | * did a lock-steal - fix up the PI-state in that case. | 1882 | * Got the lock. We might not be the anticipated owner |
1851 | */ | 1883 | * if we did a lock-steal - fix up the PI-state in |
1852 | if (!ret && q.pi_state->owner != curr) | 1884 | * that case: |
1853 | /* mmap_sem is unlocked at return of this function */ | 1885 | */ |
1854 | ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr); | 1886 | if (q.pi_state->owner != curr) |
1855 | else { | 1887 | ret = fixup_pi_state_owner(uaddr, &q, curr); |
1888 | } else { | ||
1856 | /* | 1889 | /* |
1857 | * Catch the rare case, where the lock was released | 1890 | * Catch the rare case, where the lock was released |
1858 | * when we were on the way back before we locked | 1891 | * when we were on the way back before we locked the |
1859 | * the hash bucket. | 1892 | * hash bucket. |
1860 | */ | 1893 | */ |
1861 | if (ret && q.pi_state->owner == curr) { | 1894 | if (q.pi_state->owner == curr && |
1862 | if (rt_mutex_trylock(&q.pi_state->pi_mutex)) | 1895 | rt_mutex_trylock(&q.pi_state->pi_mutex)) { |
1863 | ret = 0; | 1896 | ret = 0; |
1897 | } else { | ||
1898 | /* | ||
1899 | * Paranoia check. If we did not take the lock | ||
1900 | * in the trylock above, then we should not be | ||
1901 | * the owner of the rtmutex, neither the real | ||
1902 | * nor the pending one: | ||
1903 | */ | ||
1904 | if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr) | ||
1905 | printk(KERN_ERR "futex_lock_pi: ret = %d " | ||
1906 | "pi-mutex: %p pi-state %p\n", ret, | ||
1907 | q.pi_state->pi_mutex.owner, | ||
1908 | q.pi_state->owner); | ||
1864 | } | 1909 | } |
1865 | /* Unqueue and drop the lock */ | ||
1866 | unqueue_me_pi(&q); | ||
1867 | if (fshared) | ||
1868 | up_read(fshared); | ||
1869 | } | 1910 | } |
1870 | 1911 | ||
1871 | if (!detect && ret == -EDEADLK && 0) | 1912 | /* Unqueue and drop the lock */ |
1872 | force_sig(SIGKILL, current); | 1913 | unqueue_me_pi(&q); |
1914 | if (fshared) | ||
1915 | up_read(fshared); | ||
1873 | 1916 | ||
1874 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | 1917 | return ret != -EINTR ? ret : -ERESTARTNOINTR; |
1875 | 1918 | ||
@@ -1887,16 +1930,19 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1887 | * non-atomically. Therefore, if get_user below is not | 1930 | * non-atomically. Therefore, if get_user below is not |
1888 | * enough, we need to handle the fault ourselves, while | 1931 | * enough, we need to handle the fault ourselves, while |
1889 | * still holding the mmap_sem. | 1932 | * still holding the mmap_sem. |
1933 | * | ||
1934 | * ... and hb->lock. :-) --ANK | ||
1890 | */ | 1935 | */ |
1936 | queue_unlock(&q, hb); | ||
1937 | |||
1891 | if (attempt++) { | 1938 | if (attempt++) { |
1892 | ret = futex_handle_fault((unsigned long)uaddr, fshared, | 1939 | ret = futex_handle_fault((unsigned long)uaddr, fshared, |
1893 | attempt); | 1940 | attempt); |
1894 | if (ret) | 1941 | if (ret) |
1895 | goto out_unlock_release_sem; | 1942 | goto out_release_sem; |
1896 | goto retry_locked; | 1943 | goto retry_unlocked; |
1897 | } | 1944 | } |
1898 | 1945 | ||
1899 | queue_unlock(&q, hb); | ||
1900 | if (fshared) | 1946 | if (fshared) |
1901 | up_read(fshared); | 1947 | up_read(fshared); |
1902 | 1948 | ||
@@ -1940,9 +1986,9 @@ retry: | |||
1940 | goto out; | 1986 | goto out; |
1941 | 1987 | ||
1942 | hb = hash_futex(&key); | 1988 | hb = hash_futex(&key); |
1989 | retry_unlocked: | ||
1943 | spin_lock(&hb->lock); | 1990 | spin_lock(&hb->lock); |
1944 | 1991 | ||
1945 | retry_locked: | ||
1946 | /* | 1992 | /* |
1947 | * To avoid races, try to do the TID -> 0 atomic transition | 1993 | * To avoid races, try to do the TID -> 0 atomic transition |
1948 | * again. If it succeeds then we can return without waking | 1994 | * again. If it succeeds then we can return without waking |
@@ -2005,16 +2051,19 @@ pi_faulted: | |||
2005 | * non-atomically. Therefore, if get_user below is not | 2051 | * non-atomically. Therefore, if get_user below is not |
2006 | * enough, we need to handle the fault ourselves, while | 2052 | * enough, we need to handle the fault ourselves, while |
2007 | * still holding the mmap_sem. | 2053 | * still holding the mmap_sem. |
2054 | * | ||
2055 | * ... and hb->lock. --ANK | ||
2008 | */ | 2056 | */ |
2057 | spin_unlock(&hb->lock); | ||
2058 | |||
2009 | if (attempt++) { | 2059 | if (attempt++) { |
2010 | ret = futex_handle_fault((unsigned long)uaddr, fshared, | 2060 | ret = futex_handle_fault((unsigned long)uaddr, fshared, |
2011 | attempt); | 2061 | attempt); |
2012 | if (ret) | 2062 | if (ret) |
2013 | goto out_unlock; | 2063 | goto out; |
2014 | goto retry_locked; | 2064 | goto retry_unlocked; |
2015 | } | 2065 | } |
2016 | 2066 | ||
2017 | spin_unlock(&hb->lock); | ||
2018 | if (fshared) | 2067 | if (fshared) |
2019 | up_read(fshared); | 2068 | up_read(fshared); |
2020 | 2069 | ||
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 338a9b489fbc..27478948b318 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -144,20 +144,21 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, | |||
144 | struct timespec ts; | 144 | struct timespec ts; |
145 | ktime_t t, *tp = NULL; | 145 | ktime_t t, *tp = NULL; |
146 | int val2 = 0; | 146 | int val2 = 0; |
147 | int cmd = op & FUTEX_CMD_MASK; | ||
147 | 148 | ||
148 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { | 149 | if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { |
149 | if (get_compat_timespec(&ts, utime)) | 150 | if (get_compat_timespec(&ts, utime)) |
150 | return -EFAULT; | 151 | return -EFAULT; |
151 | if (!timespec_valid(&ts)) | 152 | if (!timespec_valid(&ts)) |
152 | return -EINVAL; | 153 | return -EINVAL; |
153 | 154 | ||
154 | t = timespec_to_ktime(ts); | 155 | t = timespec_to_ktime(ts); |
155 | if (op == FUTEX_WAIT) | 156 | if (cmd == FUTEX_WAIT) |
156 | t = ktime_add(ktime_get(), t); | 157 | t = ktime_add(ktime_get(), t); |
157 | tp = &t; | 158 | tp = &t; |
158 | } | 159 | } |
159 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE | 160 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE |
160 | || op == FUTEX_CMP_REQUEUE_PI) | 161 | || cmd == FUTEX_CMP_REQUEUE_PI) |
161 | val2 = (int) (unsigned long) utime; | 162 | val2 = (int) (unsigned long) utime; |
162 | 163 | ||
163 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); | 164 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index b0d81aae472f..bd9e272d55e9 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -135,6 +135,39 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret) | |||
135 | } | 135 | } |
136 | } | 136 | } |
137 | 137 | ||
138 | static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret) | ||
139 | { | ||
140 | struct irqaction *action; | ||
141 | |||
142 | if (!irqfixup) | ||
143 | return 0; | ||
144 | |||
145 | /* We didn't actually handle the IRQ - see if it was misrouted? */ | ||
146 | if (action_ret == IRQ_NONE) | ||
147 | return 1; | ||
148 | |||
149 | /* | ||
150 | * But for 'irqfixup == 2' we also do it for handled interrupts if | ||
151 | * they are marked as IRQF_IRQPOLL (or for irq zero, which is the | ||
152 | * traditional PC timer interrupt.. Legacy) | ||
153 | */ | ||
154 | if (irqfixup < 2) | ||
155 | return 0; | ||
156 | |||
157 | if (!irq) | ||
158 | return 1; | ||
159 | |||
160 | /* | ||
161 | * Since we don't get the descriptor lock, "action" can | ||
162 | * change under us. We don't really care, but we don't | ||
163 | * want to follow a NULL pointer. So tell the compiler to | ||
164 | * just load it once by using a barrier. | ||
165 | */ | ||
166 | action = desc->action; | ||
167 | barrier(); | ||
168 | return action && (action->flags & IRQF_IRQPOLL); | ||
169 | } | ||
170 | |||
138 | void note_interrupt(unsigned int irq, struct irq_desc *desc, | 171 | void note_interrupt(unsigned int irq, struct irq_desc *desc, |
139 | irqreturn_t action_ret) | 172 | irqreturn_t action_ret) |
140 | { | 173 | { |
@@ -144,15 +177,10 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, | |||
144 | report_bad_irq(irq, desc, action_ret); | 177 | report_bad_irq(irq, desc, action_ret); |
145 | } | 178 | } |
146 | 179 | ||
147 | if (unlikely(irqfixup)) { | 180 | if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { |
148 | /* Don't punish working computers */ | 181 | int ok = misrouted_irq(irq); |
149 | if ((irqfixup == 2 && ((irq == 0) || | 182 | if (action_ret == IRQ_NONE) |
150 | (desc->action->flags & IRQF_IRQPOLL))) || | 183 | desc->irqs_unhandled -= ok; |
151 | action_ret == IRQ_NONE) { | ||
152 | int ok = misrouted_irq(irq); | ||
153 | if (action_ret == IRQ_NONE) | ||
154 | desc->irqs_unhandled -= ok; | ||
155 | } | ||
156 | } | 184 | } |
157 | 185 | ||
158 | desc->irq_count++; | 186 | desc->irq_count++; |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index f1bda23140b2..fed54418626c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -257,7 +257,8 @@ const char *kallsyms_lookup(unsigned long addr, | |||
257 | pos = get_symbol_pos(addr, symbolsize, offset); | 257 | pos = get_symbol_pos(addr, symbolsize, offset); |
258 | /* Grab name */ | 258 | /* Grab name */ |
259 | kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); | 259 | kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); |
260 | *modname = NULL; | 260 | if (modname) |
261 | *modname = NULL; | ||
261 | return namebuf; | 262 | return namebuf; |
262 | } | 263 | } |
263 | 264 | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index df8a8e8f6ca4..bbd51b81a3e8 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -70,7 +70,7 @@ static int kthread(void *_create) | |||
70 | data = create->data; | 70 | data = create->data; |
71 | 71 | ||
72 | /* OK, tell user we're spawned, wait for stop or wakeup */ | 72 | /* OK, tell user we're spawned, wait for stop or wakeup */ |
73 | __set_current_state(TASK_INTERRUPTIBLE); | 73 | __set_current_state(TASK_UNINTERRUPTIBLE); |
74 | complete(&create->started); | 74 | complete(&create->started); |
75 | schedule(); | 75 | schedule(); |
76 | 76 | ||
@@ -162,7 +162,10 @@ EXPORT_SYMBOL(kthread_create); | |||
162 | */ | 162 | */ |
163 | void kthread_bind(struct task_struct *k, unsigned int cpu) | 163 | void kthread_bind(struct task_struct *k, unsigned int cpu) |
164 | { | 164 | { |
165 | BUG_ON(k->state != TASK_INTERRUPTIBLE); | 165 | if (k->state != TASK_UNINTERRUPTIBLE) { |
166 | WARN_ON(1); | ||
167 | return; | ||
168 | } | ||
166 | /* Must have done schedule() in kthread() before we set_task_cpu */ | 169 | /* Must have done schedule() in kthread() before we set_task_cpu */ |
167 | wait_task_inactive(k); | 170 | wait_task_inactive(k); |
168 | set_task_cpu(k, cpu); | 171 | set_task_cpu(k, cpu); |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index b5f0543ed84d..f445b9cd60fb 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -416,7 +416,8 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n) | |||
416 | 416 | ||
417 | mutex_lock(&pm_mutex); | 417 | mutex_lock(&pm_mutex); |
418 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { | 418 | for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { |
419 | if (!strncmp(buf, hibernation_modes[i], len)) { | 419 | if (len == strlen(hibernation_modes[i]) |
420 | && !strncmp(buf, hibernation_modes[i], len)) { | ||
420 | mode = i; | 421 | mode = i; |
421 | break; | 422 | break; |
422 | } | 423 | } |
diff --git a/kernel/power/main.c b/kernel/power/main.c index 40d56a31245e..8812985f3029 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -97,25 +97,26 @@ static int suspend_prepare(suspend_state_t state) | |||
97 | } | 97 | } |
98 | } | 98 | } |
99 | 99 | ||
100 | if (pm_ops->prepare) { | ||
101 | if ((error = pm_ops->prepare(state))) | ||
102 | goto Thaw; | ||
103 | } | ||
104 | |||
105 | suspend_console(); | 100 | suspend_console(); |
106 | error = device_suspend(PMSG_SUSPEND); | 101 | error = device_suspend(PMSG_SUSPEND); |
107 | if (error) { | 102 | if (error) { |
108 | printk(KERN_ERR "Some devices failed to suspend\n"); | 103 | printk(KERN_ERR "Some devices failed to suspend\n"); |
109 | goto Resume_devices; | 104 | goto Resume_console; |
110 | } | 105 | } |
106 | if (pm_ops->prepare) { | ||
107 | if ((error = pm_ops->prepare(state))) | ||
108 | goto Resume_devices; | ||
109 | } | ||
110 | |||
111 | error = disable_nonboot_cpus(); | 111 | error = disable_nonboot_cpus(); |
112 | if (!error) | 112 | if (!error) |
113 | return 0; | 113 | return 0; |
114 | 114 | ||
115 | enable_nonboot_cpus(); | 115 | enable_nonboot_cpus(); |
116 | Resume_devices: | ||
117 | pm_finish(state); | 116 | pm_finish(state); |
117 | Resume_devices: | ||
118 | device_resume(); | 118 | device_resume(); |
119 | Resume_console: | ||
119 | resume_console(); | 120 | resume_console(); |
120 | Thaw: | 121 | Thaw: |
121 | thaw_processes(); | 122 | thaw_processes(); |
@@ -289,13 +290,13 @@ static ssize_t state_store(struct kset *kset, const char *buf, size_t n) | |||
289 | len = p ? p - buf : n; | 290 | len = p ? p - buf : n; |
290 | 291 | ||
291 | /* First, check if we are requested to hibernate */ | 292 | /* First, check if we are requested to hibernate */ |
292 | if (!strncmp(buf, "disk", len)) { | 293 | if (len == 4 && !strncmp(buf, "disk", len)) { |
293 | error = hibernate(); | 294 | error = hibernate(); |
294 | return error ? error : n; | 295 | return error ? error : n; |
295 | } | 296 | } |
296 | 297 | ||
297 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { | 298 | for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { |
298 | if (*s && !strncmp(buf, *s, len)) | 299 | if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) |
299 | break; | 300 | break; |
300 | } | 301 | } |
301 | if (state < PM_SUSPEND_MAX && *s) | 302 | if (state < PM_SUSPEND_MAX && *s) |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 088419387388..e0233d8422b9 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -31,16 +31,36 @@ static inline int freezeable(struct task_struct * p) | |||
31 | return 1; | 31 | return 1; |
32 | } | 32 | } |
33 | 33 | ||
34 | /* | ||
35 | * freezing is complete, mark current process as frozen | ||
36 | */ | ||
37 | static inline void frozen_process(void) | ||
38 | { | ||
39 | if (!unlikely(current->flags & PF_NOFREEZE)) { | ||
40 | current->flags |= PF_FROZEN; | ||
41 | wmb(); | ||
42 | } | ||
43 | clear_tsk_thread_flag(current, TIF_FREEZE); | ||
44 | } | ||
45 | |||
34 | /* Refrigerator is place where frozen processes are stored :-). */ | 46 | /* Refrigerator is place where frozen processes are stored :-). */ |
35 | void refrigerator(void) | 47 | void refrigerator(void) |
36 | { | 48 | { |
37 | /* Hmm, should we be allowed to suspend when there are realtime | 49 | /* Hmm, should we be allowed to suspend when there are realtime |
38 | processes around? */ | 50 | processes around? */ |
39 | long save; | 51 | long save; |
52 | |||
53 | task_lock(current); | ||
54 | if (freezing(current)) { | ||
55 | frozen_process(); | ||
56 | task_unlock(current); | ||
57 | } else { | ||
58 | task_unlock(current); | ||
59 | return; | ||
60 | } | ||
40 | save = current->state; | 61 | save = current->state; |
41 | pr_debug("%s entered refrigerator\n", current->comm); | 62 | pr_debug("%s entered refrigerator\n", current->comm); |
42 | 63 | ||
43 | frozen_process(current); | ||
44 | spin_lock_irq(¤t->sighand->siglock); | 64 | spin_lock_irq(¤t->sighand->siglock); |
45 | recalc_sigpending(); /* We sent fake signal, clean it up */ | 65 | recalc_sigpending(); /* We sent fake signal, clean it up */ |
46 | spin_unlock_irq(¤t->sighand->siglock); | 66 | spin_unlock_irq(¤t->sighand->siglock); |
@@ -81,7 +101,7 @@ static void cancel_freezing(struct task_struct *p) | |||
81 | pr_debug(" clean up: %s\n", p->comm); | 101 | pr_debug(" clean up: %s\n", p->comm); |
82 | do_not_freeze(p); | 102 | do_not_freeze(p); |
83 | spin_lock_irqsave(&p->sighand->siglock, flags); | 103 | spin_lock_irqsave(&p->sighand->siglock, flags); |
84 | recalc_sigpending_tsk(p); | 104 | recalc_sigpending_and_wake(p); |
85 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | 105 | spin_unlock_irqrestore(&p->sighand->siglock, flags); |
86 | } | 106 | } |
87 | } | 107 | } |
@@ -112,22 +132,12 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) | |||
112 | cancel_freezing(p); | 132 | cancel_freezing(p); |
113 | continue; | 133 | continue; |
114 | } | 134 | } |
115 | if (is_user_space(p)) { | 135 | if (freeze_user_space && !is_user_space(p)) |
116 | if (!freeze_user_space) | 136 | continue; |
117 | continue; | 137 | |
118 | 138 | freeze_process(p); | |
119 | /* Freeze the task unless there is a vfork | 139 | if (!freezer_should_skip(p)) |
120 | * completion pending | 140 | todo++; |
121 | */ | ||
122 | if (!p->vfork_done) | ||
123 | freeze_process(p); | ||
124 | } else { | ||
125 | if (freeze_user_space) | ||
126 | continue; | ||
127 | |||
128 | freeze_process(p); | ||
129 | } | ||
130 | todo++; | ||
131 | } while_each_thread(g, p); | 141 | } while_each_thread(g, p); |
132 | read_unlock(&tasklist_lock); | 142 | read_unlock(&tasklist_lock); |
133 | yield(); /* Yield is okay here */ | 143 | yield(); /* Yield is okay here */ |
@@ -149,13 +159,16 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space) | |||
149 | TIMEOUT / HZ, todo); | 159 | TIMEOUT / HZ, todo); |
150 | read_lock(&tasklist_lock); | 160 | read_lock(&tasklist_lock); |
151 | do_each_thread(g, p) { | 161 | do_each_thread(g, p) { |
152 | if (is_user_space(p) == !freeze_user_space) | 162 | if (freeze_user_space && !is_user_space(p)) |
153 | continue; | 163 | continue; |
154 | 164 | ||
155 | if (freezeable(p) && !frozen(p)) | 165 | task_lock(p); |
166 | if (freezeable(p) && !frozen(p) && | ||
167 | !freezer_should_skip(p)) | ||
156 | printk(KERN_ERR " %s\n", p->comm); | 168 | printk(KERN_ERR " %s\n", p->comm); |
157 | 169 | ||
158 | cancel_freezing(p); | 170 | cancel_freezing(p); |
171 | task_unlock(p); | ||
159 | } while_each_thread(g, p); | 172 | } while_each_thread(g, p); |
160 | read_unlock(&tasklist_lock); | 173 | read_unlock(&tasklist_lock); |
161 | } | 174 | } |
@@ -200,9 +213,7 @@ static void thaw_tasks(int thaw_user_space) | |||
200 | if (is_user_space(p) == !thaw_user_space) | 213 | if (is_user_space(p) == !thaw_user_space) |
201 | continue; | 214 | continue; |
202 | 215 | ||
203 | if (!thaw_process(p)) | 216 | thaw_process(p); |
204 | printk(KERN_WARNING " Strange, %s not stopped\n", | ||
205 | p->comm ); | ||
206 | } while_each_thread(g, p); | 217 | } while_each_thread(g, p); |
207 | read_unlock(&tasklist_lock); | 218 | read_unlock(&tasklist_lock); |
208 | } | 219 | } |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b8b235cc19d1..8b1a1b837145 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -584,7 +584,7 @@ int swsusp_check(void) | |||
584 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | 584 | resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); |
585 | if (!IS_ERR(resume_bdev)) { | 585 | if (!IS_ERR(resume_bdev)) { |
586 | set_blocksize(resume_bdev, PAGE_SIZE); | 586 | set_blocksize(resume_bdev, PAGE_SIZE); |
587 | memset(swsusp_header, 0, sizeof(PAGE_SIZE)); | 587 | memset(swsusp_header, 0, PAGE_SIZE); |
588 | error = bio_read_page(swsusp_resume_block, | 588 | error = bio_read_page(swsusp_resume_block, |
589 | swsusp_header, NULL); | 589 | swsusp_header, NULL); |
590 | if (error) | 590 | if (error) |
diff --git a/kernel/profile.c b/kernel/profile.c index cc91b9bf759d..5b20fe977bed 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/sections.h> | 26 | #include <asm/sections.h> |
27 | #include <asm/semaphore.h> | 27 | #include <asm/semaphore.h> |
28 | #include <asm/irq_regs.h> | 28 | #include <asm/irq_regs.h> |
29 | #include <asm/ptrace.h> | ||
29 | 30 | ||
30 | struct profile_hit { | 31 | struct profile_hit { |
31 | u32 pc, hits; | 32 | u32 pc, hits; |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 12879f6c1ec3..a6fbb4130521 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -189,6 +189,19 @@ int rt_mutex_adjust_prio_chain(struct task_struct *task, | |||
189 | if (!waiter || !waiter->task) | 189 | if (!waiter || !waiter->task) |
190 | goto out_unlock_pi; | 190 | goto out_unlock_pi; |
191 | 191 | ||
192 | /* | ||
193 | * Check the orig_waiter state. After we dropped the locks, | ||
194 | * the previous owner of the lock might have released the lock | ||
195 | * and made us the pending owner: | ||
196 | */ | ||
197 | if (orig_waiter && !orig_waiter->task) | ||
198 | goto out_unlock_pi; | ||
199 | |||
200 | /* | ||
201 | * Drop out, when the task has no waiters. Note, | ||
202 | * top_waiter can be NULL, when we are in the deboosting | ||
203 | * mode! | ||
204 | */ | ||
192 | if (top_waiter && (!task_has_pi_waiters(task) || | 205 | if (top_waiter && (!task_has_pi_waiters(task) || |
193 | top_waiter != task_top_pi_waiter(task))) | 206 | top_waiter != task_top_pi_waiter(task))) |
194 | goto out_unlock_pi; | 207 | goto out_unlock_pi; |
@@ -636,9 +649,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
636 | * all over without going into schedule to try | 649 | * all over without going into schedule to try |
637 | * to get the lock now: | 650 | * to get the lock now: |
638 | */ | 651 | */ |
639 | if (unlikely(!waiter.task)) | 652 | if (unlikely(!waiter.task)) { |
653 | /* | ||
654 | * Reset the return value. We might | ||
655 | * have returned with -EDEADLK and the | ||
656 | * owner released the lock while we | ||
657 | * were walking the pi chain. | ||
658 | */ | ||
659 | ret = 0; | ||
640 | continue; | 660 | continue; |
641 | 661 | } | |
642 | if (unlikely(ret)) | 662 | if (unlikely(ret)) |
643 | break; | 663 | break; |
644 | } | 664 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index 799d23b4e35d..13cdab3b4c48 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4775,9 +4775,7 @@ int __sched cond_resched_softirq(void) | |||
4775 | BUG_ON(!in_softirq()); | 4775 | BUG_ON(!in_softirq()); |
4776 | 4776 | ||
4777 | if (need_resched() && system_state == SYSTEM_RUNNING) { | 4777 | if (need_resched() && system_state == SYSTEM_RUNNING) { |
4778 | raw_local_irq_disable(); | 4778 | local_bh_enable(); |
4779 | _local_bh_enable(); | ||
4780 | raw_local_irq_enable(); | ||
4781 | __cond_resched(); | 4779 | __cond_resched(); |
4782 | local_bh_disable(); | 4780 | local_bh_disable(); |
4783 | return 1; | 4781 | return 1; |
diff --git a/kernel/signal.c b/kernel/signal.c index 364fc95bf97c..fe590e00db8d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -96,20 +96,38 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) | |||
96 | 96 | ||
97 | #define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) | 97 | #define PENDING(p,b) has_pending_signals(&(p)->signal, (b)) |
98 | 98 | ||
99 | fastcall void recalc_sigpending_tsk(struct task_struct *t) | 99 | static int recalc_sigpending_tsk(struct task_struct *t) |
100 | { | 100 | { |
101 | if (t->signal->group_stop_count > 0 || | 101 | if (t->signal->group_stop_count > 0 || |
102 | (freezing(t)) || | 102 | (freezing(t)) || |
103 | PENDING(&t->pending, &t->blocked) || | 103 | PENDING(&t->pending, &t->blocked) || |
104 | PENDING(&t->signal->shared_pending, &t->blocked)) | 104 | PENDING(&t->signal->shared_pending, &t->blocked)) { |
105 | set_tsk_thread_flag(t, TIF_SIGPENDING); | 105 | set_tsk_thread_flag(t, TIF_SIGPENDING); |
106 | else | 106 | return 1; |
107 | clear_tsk_thread_flag(t, TIF_SIGPENDING); | 107 | } |
108 | /* | ||
109 | * We must never clear the flag in another thread, or in current | ||
110 | * when it's possible the current syscall is returning -ERESTART*. | ||
111 | * So we don't clear it here, and only callers who know they should do. | ||
112 | */ | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up. | ||
118 | * This is superfluous when called on current, the wakeup is a harmless no-op. | ||
119 | */ | ||
120 | void recalc_sigpending_and_wake(struct task_struct *t) | ||
121 | { | ||
122 | if (recalc_sigpending_tsk(t)) | ||
123 | signal_wake_up(t, 0); | ||
108 | } | 124 | } |
109 | 125 | ||
110 | void recalc_sigpending(void) | 126 | void recalc_sigpending(void) |
111 | { | 127 | { |
112 | recalc_sigpending_tsk(current); | 128 | if (!recalc_sigpending_tsk(current)) |
129 | clear_thread_flag(TIF_SIGPENDING); | ||
130 | |||
113 | } | 131 | } |
114 | 132 | ||
115 | /* Given the mask, find the first available signal that should be serviced. */ | 133 | /* Given the mask, find the first available signal that should be serviced. */ |
@@ -373,7 +391,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
373 | } | 391 | } |
374 | } | 392 | } |
375 | } | 393 | } |
376 | recalc_sigpending_tsk(tsk); | 394 | if (likely(tsk == current)) |
395 | recalc_sigpending(); | ||
377 | if (signr && unlikely(sig_kernel_stop(signr))) { | 396 | if (signr && unlikely(sig_kernel_stop(signr))) { |
378 | /* | 397 | /* |
379 | * Set a marker that we have dequeued a stop signal. Our | 398 | * Set a marker that we have dequeued a stop signal. Our |
@@ -744,7 +763,7 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |||
744 | action->sa.sa_handler = SIG_DFL; | 763 | action->sa.sa_handler = SIG_DFL; |
745 | if (blocked) { | 764 | if (blocked) { |
746 | sigdelset(&t->blocked, sig); | 765 | sigdelset(&t->blocked, sig); |
747 | recalc_sigpending_tsk(t); | 766 | recalc_sigpending_and_wake(t); |
748 | } | 767 | } |
749 | } | 768 | } |
750 | ret = specific_send_sig_info(sig, info, t); | 769 | ret = specific_send_sig_info(sig, info, t); |
@@ -1568,8 +1587,9 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info) | |||
1568 | /* | 1587 | /* |
1569 | * Queued signals ignored us while we were stopped for tracing. | 1588 | * Queued signals ignored us while we were stopped for tracing. |
1570 | * So check for any that we should take before resuming user mode. | 1589 | * So check for any that we should take before resuming user mode. |
1590 | * This sets TIF_SIGPENDING, but never clears it. | ||
1571 | */ | 1591 | */ |
1572 | recalc_sigpending(); | 1592 | recalc_sigpending_tsk(current); |
1573 | } | 1593 | } |
1574 | 1594 | ||
1575 | void ptrace_notify(int exit_code) | 1595 | void ptrace_notify(int exit_code) |
@@ -2273,7 +2293,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) | |||
2273 | rm_from_queue_full(&mask, &t->signal->shared_pending); | 2293 | rm_from_queue_full(&mask, &t->signal->shared_pending); |
2274 | do { | 2294 | do { |
2275 | rm_from_queue_full(&mask, &t->pending); | 2295 | rm_from_queue_full(&mask, &t->pending); |
2276 | recalc_sigpending_tsk(t); | 2296 | recalc_sigpending_and_wake(t); |
2277 | t = next_thread(t); | 2297 | t = next_thread(t); |
2278 | } while (t != current); | 2298 | } while (t != current); |
2279 | } | 2299 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4073353abd4f..30ee462ee79f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -227,7 +227,7 @@ static ctl_table kern_table[] = { | |||
227 | .ctl_name = KERN_CORE_PATTERN, | 227 | .ctl_name = KERN_CORE_PATTERN, |
228 | .procname = "core_pattern", | 228 | .procname = "core_pattern", |
229 | .data = core_pattern, | 229 | .data = core_pattern, |
230 | .maxlen = 128, | 230 | .maxlen = CORENAME_MAX_SIZE, |
231 | .mode = 0644, | 231 | .mode = 0644, |
232 | .proc_handler = &proc_dostring, | 232 | .proc_handler = &proc_dostring, |
233 | .strategy = &sysctl_string, | 233 | .strategy = &sysctl_string, |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 3db5c3c460d7..51b6a6a6158c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -74,7 +74,7 @@ static struct clocksource *watchdog; | |||
74 | static struct timer_list watchdog_timer; | 74 | static struct timer_list watchdog_timer; |
75 | static DEFINE_SPINLOCK(watchdog_lock); | 75 | static DEFINE_SPINLOCK(watchdog_lock); |
76 | static cycle_t watchdog_last; | 76 | static cycle_t watchdog_last; |
77 | static int watchdog_resumed; | 77 | static unsigned long watchdog_resumed; |
78 | 78 | ||
79 | /* | 79 | /* |
80 | * Interval: 0.5sec Threshold: 0.0625s | 80 | * Interval: 0.5sec Threshold: 0.0625s |
@@ -104,9 +104,7 @@ static void clocksource_watchdog(unsigned long data) | |||
104 | 104 | ||
105 | spin_lock(&watchdog_lock); | 105 | spin_lock(&watchdog_lock); |
106 | 106 | ||
107 | resumed = watchdog_resumed; | 107 | resumed = test_and_clear_bit(0, &watchdog_resumed); |
108 | if (unlikely(resumed)) | ||
109 | watchdog_resumed = 0; | ||
110 | 108 | ||
111 | wdnow = watchdog->read(); | 109 | wdnow = watchdog->read(); |
112 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | 110 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); |
@@ -151,9 +149,7 @@ static void clocksource_watchdog(unsigned long data) | |||
151 | } | 149 | } |
152 | static void clocksource_resume_watchdog(void) | 150 | static void clocksource_resume_watchdog(void) |
153 | { | 151 | { |
154 | spin_lock(&watchdog_lock); | 152 | set_bit(0, &watchdog_resumed); |
155 | watchdog_resumed = 1; | ||
156 | spin_unlock(&watchdog_lock); | ||
157 | } | 153 | } |
158 | 154 | ||
159 | static void clocksource_check_watchdog(struct clocksource *cs) | 155 | static void clocksource_check_watchdog(struct clocksource *cs) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cb25649c6f50..87aa5ff931e0 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
13 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
14 | #include <linux/jiffies.h> | ||
15 | #include <linux/hrtimer.h> | ||
14 | 16 | ||
15 | #include <asm/div64.h> | 17 | #include <asm/div64.h> |
16 | #include <asm/timex.h> | 18 | #include <asm/timex.h> |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index eadfce2fff74..8001d37071f5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -243,11 +243,18 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu) | |||
243 | { | 243 | { |
244 | int cpu = get_cpu(); | 244 | int cpu = get_cpu(); |
245 | 245 | ||
246 | if (cpu == *oncpu) | 246 | if (!cpu_isset(*oncpu, cpu_online_map)) { |
247 | tick_do_broadcast_on_off(&reason); | 247 | printk(KERN_ERR "tick-braodcast: ignoring broadcast for " |
248 | else | 248 | "offline CPU #%d\n", *oncpu); |
249 | smp_call_function_single(*oncpu, tick_do_broadcast_on_off, | 249 | } else { |
250 | &reason, 1, 1); | 250 | |
251 | if (cpu == *oncpu) | ||
252 | tick_do_broadcast_on_off(&reason); | ||
253 | else | ||
254 | smp_call_function_single(*oncpu, | ||
255 | tick_do_broadcast_on_off, | ||
256 | &reason, 1, 1); | ||
257 | } | ||
251 | put_cpu(); | 258 | put_cpu(); |
252 | } | 259 | } |
253 | 260 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3483e6cb9549..52db9e3c526e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -167,9 +167,15 @@ void tick_nohz_stop_sched_tick(void) | |||
167 | goto end; | 167 | goto end; |
168 | 168 | ||
169 | cpu = smp_processor_id(); | 169 | cpu = smp_processor_id(); |
170 | if (unlikely(local_softirq_pending())) | 170 | if (unlikely(local_softirq_pending())) { |
171 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 171 | static int ratelimit; |
172 | local_softirq_pending()); | 172 | |
173 | if (ratelimit < 10) { | ||
174 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
175 | local_softirq_pending()); | ||
176 | ratelimit++; | ||
177 | } | ||
178 | } | ||
173 | 179 | ||
174 | now = ktime_get(); | 180 | now = ktime_get(); |
175 | /* | 181 | /* |
@@ -241,6 +247,21 @@ void tick_nohz_stop_sched_tick(void) | |||
241 | if (cpu == tick_do_timer_cpu) | 247 | if (cpu == tick_do_timer_cpu) |
242 | tick_do_timer_cpu = -1; | 248 | tick_do_timer_cpu = -1; |
243 | 249 | ||
250 | ts->idle_sleeps++; | ||
251 | |||
252 | /* | ||
253 | * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that | ||
254 | * there is no timer pending or at least extremly far | ||
255 | * into the future (12 days for HZ=1000). In this case | ||
256 | * we simply stop the tick timer: | ||
257 | */ | ||
258 | if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { | ||
259 | ts->idle_expires.tv64 = KTIME_MAX; | ||
260 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | ||
261 | hrtimer_cancel(&ts->sched_timer); | ||
262 | goto out; | ||
263 | } | ||
264 | |||
244 | /* | 265 | /* |
245 | * calculate the expiry time for the next timer wheel | 266 | * calculate the expiry time for the next timer wheel |
246 | * timer | 267 | * timer |
@@ -248,7 +269,6 @@ void tick_nohz_stop_sched_tick(void) | |||
248 | expires = ktime_add_ns(last_update, tick_period.tv64 * | 269 | expires = ktime_add_ns(last_update, tick_period.tv64 * |
249 | delta_jiffies); | 270 | delta_jiffies); |
250 | ts->idle_expires = expires; | 271 | ts->idle_expires = expires; |
251 | ts->idle_sleeps++; | ||
252 | 272 | ||
253 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 273 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
254 | hrtimer_start(&ts->sched_timer, expires, | 274 | hrtimer_start(&ts->sched_timer, expires, |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f9217bf644f6..3d1042f82a68 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -273,6 +273,8 @@ static int timekeeping_resume(struct sys_device *dev) | |||
273 | unsigned long flags; | 273 | unsigned long flags; |
274 | unsigned long now = read_persistent_clock(); | 274 | unsigned long now = read_persistent_clock(); |
275 | 275 | ||
276 | clocksource_resume(); | ||
277 | |||
276 | write_seqlock_irqsave(&xtime_lock, flags); | 278 | write_seqlock_irqsave(&xtime_lock, flags); |
277 | 279 | ||
278 | if (now && (now > timekeeping_suspend_time)) { | 280 | if (now && (now > timekeeping_suspend_time)) { |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 868f1bceb07f..321693724ad7 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
@@ -117,21 +117,6 @@ static struct entry entries[MAX_ENTRIES]; | |||
117 | 117 | ||
118 | static atomic_t overflow_count; | 118 | static atomic_t overflow_count; |
119 | 119 | ||
120 | static void reset_entries(void) | ||
121 | { | ||
122 | nr_entries = 0; | ||
123 | memset(entries, 0, sizeof(entries)); | ||
124 | atomic_set(&overflow_count, 0); | ||
125 | } | ||
126 | |||
127 | static struct entry *alloc_entry(void) | ||
128 | { | ||
129 | if (nr_entries >= MAX_ENTRIES) | ||
130 | return NULL; | ||
131 | |||
132 | return entries + nr_entries++; | ||
133 | } | ||
134 | |||
135 | /* | 120 | /* |
136 | * The entries are in a hash-table, for fast lookup: | 121 | * The entries are in a hash-table, for fast lookup: |
137 | */ | 122 | */ |
@@ -149,6 +134,22 @@ static struct entry *alloc_entry(void) | |||
149 | 134 | ||
150 | static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; | 135 | static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; |
151 | 136 | ||
137 | static void reset_entries(void) | ||
138 | { | ||
139 | nr_entries = 0; | ||
140 | memset(entries, 0, sizeof(entries)); | ||
141 | memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); | ||
142 | atomic_set(&overflow_count, 0); | ||
143 | } | ||
144 | |||
145 | static struct entry *alloc_entry(void) | ||
146 | { | ||
147 | if (nr_entries >= MAX_ENTRIES) | ||
148 | return NULL; | ||
149 | |||
150 | return entries + nr_entries++; | ||
151 | } | ||
152 | |||
152 | static int match_entries(struct entry *entry1, struct entry *entry2) | 153 | static int match_entries(struct entry *entry1, struct entry *entry2) |
153 | { | 154 | { |
154 | return entry1->timer == entry2->timer && | 155 | return entry1->timer == entry2->timer && |
@@ -202,12 +203,15 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm) | |||
202 | if (curr) { | 203 | if (curr) { |
203 | *curr = *entry; | 204 | *curr = *entry; |
204 | curr->count = 0; | 205 | curr->count = 0; |
206 | curr->next = NULL; | ||
205 | memcpy(curr->comm, comm, TASK_COMM_LEN); | 207 | memcpy(curr->comm, comm, TASK_COMM_LEN); |
208 | |||
209 | smp_mb(); /* Ensure that curr is initialized before insert */ | ||
210 | |||
206 | if (prev) | 211 | if (prev) |
207 | prev->next = curr; | 212 | prev->next = curr; |
208 | else | 213 | else |
209 | *head = curr; | 214 | *head = curr; |
210 | curr->next = NULL; | ||
211 | } | 215 | } |
212 | out_unlock: | 216 | out_unlock: |
213 | spin_unlock(&table_lock); | 217 | spin_unlock(&table_lock); |
@@ -232,10 +236,15 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
232 | /* | 236 | /* |
233 | * It doesnt matter which lock we take: | 237 | * It doesnt matter which lock we take: |
234 | */ | 238 | */ |
235 | spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | 239 | spinlock_t *lock; |
236 | struct entry *entry, input; | 240 | struct entry *entry, input; |
237 | unsigned long flags; | 241 | unsigned long flags; |
238 | 242 | ||
243 | if (likely(!active)) | ||
244 | return; | ||
245 | |||
246 | lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | ||
247 | |||
239 | input.timer = timer; | 248 | input.timer = timer; |
240 | input.start_func = startf; | 249 | input.start_func = startf; |
241 | input.expire_func = timerf; | 250 | input.expire_func = timerf; |
@@ -360,6 +369,7 @@ static ssize_t tstats_write(struct file *file, const char __user *buf, | |||
360 | if (!active) { | 369 | if (!active) { |
361 | reset_entries(); | 370 | reset_entries(); |
362 | time_start = ktime_get(); | 371 | time_start = ktime_get(); |
372 | smp_mb(); | ||
363 | active = 1; | 373 | active = 1; |
364 | } | 374 | } |
365 | break; | 375 | break; |
diff --git a/kernel/timer.c b/kernel/timer.c index a6c580ac084b..1a69705c2fb9 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -666,7 +666,7 @@ static inline void __run_timers(tvec_base_t *base) | |||
666 | static unsigned long __next_timer_interrupt(tvec_base_t *base) | 666 | static unsigned long __next_timer_interrupt(tvec_base_t *base) |
667 | { | 667 | { |
668 | unsigned long timer_jiffies = base->timer_jiffies; | 668 | unsigned long timer_jiffies = base->timer_jiffies; |
669 | unsigned long expires = timer_jiffies + (LONG_MAX >> 1); | 669 | unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; |
670 | int index, slot, array, found = 0; | 670 | int index, slot, array, found = 0; |
671 | struct timer_list *nte; | 671 | struct timer_list *nte; |
672 | tvec_t *varray[4]; | 672 | tvec_t *varray[4]; |
@@ -752,6 +752,14 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now, | |||
752 | 752 | ||
753 | tsdelta = ktime_to_timespec(hr_delta); | 753 | tsdelta = ktime_to_timespec(hr_delta); |
754 | delta = timespec_to_jiffies(&tsdelta); | 754 | delta = timespec_to_jiffies(&tsdelta); |
755 | |||
756 | /* | ||
757 | * Limit the delta to the max value, which is checked in | ||
758 | * tick_nohz_stop_sched_tick(): | ||
759 | */ | ||
760 | if (delta > NEXT_TIMER_MAX_DELTA) | ||
761 | delta = NEXT_TIMER_MAX_DELTA; | ||
762 | |||
755 | /* | 763 | /* |
756 | * Take rounding errors in to account and make sure, that it | 764 | * Take rounding errors in to account and make sure, that it |
757 | * expires in the next tick. Otherwise we go into an endless | 765 | * expires in the next tick. Otherwise we go into an endless |
@@ -1499,8 +1507,6 @@ unregister_time_interpolator(struct time_interpolator *ti) | |||
1499 | prev = &curr->next; | 1507 | prev = &curr->next; |
1500 | } | 1508 | } |
1501 | 1509 | ||
1502 | clocksource_resume(); | ||
1503 | |||
1504 | write_seqlock_irqsave(&xtime_lock, flags); | 1510 | write_seqlock_irqsave(&xtime_lock, flags); |
1505 | if (ti == time_interpolator) { | 1511 | if (ti == time_interpolator) { |
1506 | /* we lost the best time-interpolator: */ | 1512 | /* we lost the best time-interpolator: */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index fb56fedd5c02..3bebf73be976 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -47,7 +47,6 @@ struct cpu_workqueue_struct { | |||
47 | 47 | ||
48 | struct workqueue_struct *wq; | 48 | struct workqueue_struct *wq; |
49 | struct task_struct *thread; | 49 | struct task_struct *thread; |
50 | int should_stop; | ||
51 | 50 | ||
52 | int run_depth; /* Detect run_workqueue() recursion depth */ | 51 | int run_depth; /* Detect run_workqueue() recursion depth */ |
53 | } ____cacheline_aligned; | 52 | } ____cacheline_aligned; |
@@ -71,7 +70,13 @@ static LIST_HEAD(workqueues); | |||
71 | 70 | ||
72 | static int singlethread_cpu __read_mostly; | 71 | static int singlethread_cpu __read_mostly; |
73 | static cpumask_t cpu_singlethread_map __read_mostly; | 72 | static cpumask_t cpu_singlethread_map __read_mostly; |
74 | /* optimization, we could use cpu_possible_map */ | 73 | /* |
74 | * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD | ||
75 | * flushes cwq->worklist. This means that flush_workqueue/wait_on_work | ||
76 | * which comes in between can't use for_each_online_cpu(). We could | ||
77 | * use cpu_possible_map, the cpumask below is more a documentation | ||
78 | * than optimization. | ||
79 | */ | ||
75 | static cpumask_t cpu_populated_map __read_mostly; | 80 | static cpumask_t cpu_populated_map __read_mostly; |
76 | 81 | ||
77 | /* If it's single threaded, it isn't in the list of workqueues. */ | 82 | /* If it's single threaded, it isn't in the list of workqueues. */ |
@@ -272,24 +277,6 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) | |||
272 | spin_unlock_irq(&cwq->lock); | 277 | spin_unlock_irq(&cwq->lock); |
273 | } | 278 | } |
274 | 279 | ||
275 | /* | ||
276 | * NOTE: the caller must not touch *cwq if this func returns true | ||
277 | */ | ||
278 | static int cwq_should_stop(struct cpu_workqueue_struct *cwq) | ||
279 | { | ||
280 | int should_stop = cwq->should_stop; | ||
281 | |||
282 | if (unlikely(should_stop)) { | ||
283 | spin_lock_irq(&cwq->lock); | ||
284 | should_stop = cwq->should_stop && list_empty(&cwq->worklist); | ||
285 | if (should_stop) | ||
286 | cwq->thread = NULL; | ||
287 | spin_unlock_irq(&cwq->lock); | ||
288 | } | ||
289 | |||
290 | return should_stop; | ||
291 | } | ||
292 | |||
293 | static int worker_thread(void *__cwq) | 280 | static int worker_thread(void *__cwq) |
294 | { | 281 | { |
295 | struct cpu_workqueue_struct *cwq = __cwq; | 282 | struct cpu_workqueue_struct *cwq = __cwq; |
@@ -302,14 +289,15 @@ static int worker_thread(void *__cwq) | |||
302 | 289 | ||
303 | for (;;) { | 290 | for (;;) { |
304 | prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE); | 291 | prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE); |
305 | if (!freezing(current) && !cwq->should_stop | 292 | if (!freezing(current) && |
306 | && list_empty(&cwq->worklist)) | 293 | !kthread_should_stop() && |
294 | list_empty(&cwq->worklist)) | ||
307 | schedule(); | 295 | schedule(); |
308 | finish_wait(&cwq->more_work, &wait); | 296 | finish_wait(&cwq->more_work, &wait); |
309 | 297 | ||
310 | try_to_freeze(); | 298 | try_to_freeze(); |
311 | 299 | ||
312 | if (cwq_should_stop(cwq)) | 300 | if (kthread_should_stop()) |
313 | break; | 301 | break; |
314 | 302 | ||
315 | run_workqueue(cwq); | 303 | run_workqueue(cwq); |
@@ -340,18 +328,21 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, | |||
340 | insert_work(cwq, &barr->work, tail); | 328 | insert_work(cwq, &barr->work, tail); |
341 | } | 329 | } |
342 | 330 | ||
343 | static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | 331 | static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) |
344 | { | 332 | { |
333 | int active; | ||
334 | |||
345 | if (cwq->thread == current) { | 335 | if (cwq->thread == current) { |
346 | /* | 336 | /* |
347 | * Probably keventd trying to flush its own queue. So simply run | 337 | * Probably keventd trying to flush its own queue. So simply run |
348 | * it by hand rather than deadlocking. | 338 | * it by hand rather than deadlocking. |
349 | */ | 339 | */ |
350 | run_workqueue(cwq); | 340 | run_workqueue(cwq); |
341 | active = 1; | ||
351 | } else { | 342 | } else { |
352 | struct wq_barrier barr; | 343 | struct wq_barrier barr; |
353 | int active = 0; | ||
354 | 344 | ||
345 | active = 0; | ||
355 | spin_lock_irq(&cwq->lock); | 346 | spin_lock_irq(&cwq->lock); |
356 | if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) { | 347 | if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) { |
357 | insert_wq_barrier(cwq, &barr, 1); | 348 | insert_wq_barrier(cwq, &barr, 1); |
@@ -362,6 +353,8 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq) | |||
362 | if (active) | 353 | if (active) |
363 | wait_for_completion(&barr.done); | 354 | wait_for_completion(&barr.done); |
364 | } | 355 | } |
356 | |||
357 | return active; | ||
365 | } | 358 | } |
366 | 359 | ||
367 | /** | 360 | /** |
@@ -674,7 +667,6 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | |||
674 | return PTR_ERR(p); | 667 | return PTR_ERR(p); |
675 | 668 | ||
676 | cwq->thread = p; | 669 | cwq->thread = p; |
677 | cwq->should_stop = 0; | ||
678 | 670 | ||
679 | return 0; | 671 | return 0; |
680 | } | 672 | } |
@@ -740,29 +732,27 @@ EXPORT_SYMBOL_GPL(__create_workqueue); | |||
740 | 732 | ||
741 | static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) | 733 | static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) |
742 | { | 734 | { |
743 | struct wq_barrier barr; | 735 | /* |
744 | int alive = 0; | 736 | * Our caller is either destroy_workqueue() or CPU_DEAD, |
745 | 737 | * workqueue_mutex protects cwq->thread | |
746 | spin_lock_irq(&cwq->lock); | 738 | */ |
747 | if (cwq->thread != NULL) { | 739 | if (cwq->thread == NULL) |
748 | insert_wq_barrier(cwq, &barr, 1); | 740 | return; |
749 | cwq->should_stop = 1; | ||
750 | alive = 1; | ||
751 | } | ||
752 | spin_unlock_irq(&cwq->lock); | ||
753 | 741 | ||
754 | if (alive) { | 742 | /* |
755 | wait_for_completion(&barr.done); | 743 | * If the caller is CPU_DEAD the single flush_cpu_workqueue() |
744 | * is not enough, a concurrent flush_workqueue() can insert a | ||
745 | * barrier after us. | ||
746 | * When ->worklist becomes empty it is safe to exit because no | ||
747 | * more work_structs can be queued on this cwq: flush_workqueue | ||
748 | * checks list_empty(), and a "normal" queue_work() can't use | ||
749 | * a dead CPU. | ||
750 | */ | ||
751 | while (flush_cpu_workqueue(cwq)) | ||
752 | ; | ||
756 | 753 | ||
757 | while (unlikely(cwq->thread != NULL)) | 754 | kthread_stop(cwq->thread); |
758 | cpu_relax(); | 755 | cwq->thread = NULL; |
759 | /* | ||
760 | * Wait until cwq->thread unlocks cwq->lock, | ||
761 | * it won't touch *cwq after that. | ||
762 | */ | ||
763 | smp_rmb(); | ||
764 | spin_unlock_wait(&cwq->lock); | ||
765 | } | ||
766 | } | 756 | } |
767 | 757 | ||
768 | /** | 758 | /** |