aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-05-26 07:48:30 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-26 07:48:39 -0400
commit1102c660dd35725a11c7ca9365c237f2f42f6b30 (patch)
treecd32d3053b30050182218e0d36b4aed7459c48de /kernel
parent6e9101aeec39961308176e0f59e73ac5d37d243a (diff)
parent4db70f73e56961b9bcdfd0c36c62847a18b7dbb5 (diff)
Merge branch 'linus' into perf/urgent
Merge reason: Linus applied an overlapping commit: 5f2e8e2b0bf0: kernel/watchdog.c: Use proper ANSI C prototypes So merge it in to make sure we can iterate the file without conflicts. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks2
-rw-r--r--kernel/auditsc.c27
-rw-r--r--kernel/capability.c4
-rw-r--r--kernel/compat.c55
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/exit.c110
-rw-r--r--kernel/fork.c42
-rw-r--r--kernel/hrtimer.c162
-rw-r--r--kernel/irq/proc.c54
-rw-r--r--kernel/kmod.c100
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/mutex.c25
-rw-r--r--kernel/nsproxy.c42
-rw-r--r--kernel/pm_qos_params.c2
-rw-r--r--kernel/posix-cpu-timers.c4
-rw-r--r--kernel/posix-timers.c27
-rw-r--r--kernel/printk.c87
-rw-r--r--kernel/ptrace.c120
-rw-r--r--kernel/rcutiny.c1
-rw-r--r--kernel/rcutree.c1
-rw-r--r--kernel/sched.c41
-rw-r--r--kernel/sched_fair.c52
-rw-r--r--kernel/signal.c684
-rw-r--r--kernel/sys_ni.c9
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/alarmtimer.c16
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/tick-broadcast.c16
-rw-r--r--kernel/time/timekeeping.c17
-rw-r--r--kernel/utsname.c39
-rw-r--r--kernel/workqueue.c4
32 files changed, 1224 insertions, 541 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 88c92fb44618..5068e2a4e75f 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -199,4 +199,4 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE
199 def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE 199 def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
200 200
201config MUTEX_SPIN_ON_OWNER 201config MUTEX_SPIN_ON_OWNER
202 def_bool SMP && !DEBUG_MUTEXES && !HAVE_DEFAULT_NO_SPIN_MUTEXES 202 def_bool SMP && !DEBUG_MUTEXES
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b33513a08beb..00d79df03e76 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -443,17 +443,25 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree)
443 443
444/* Determine if any context name data matches a rule's watch data */ 444/* Determine if any context name data matches a rule's watch data */
445/* Compare a task_struct with an audit_rule. Return 1 on match, 0 445/* Compare a task_struct with an audit_rule. Return 1 on match, 0
446 * otherwise. */ 446 * otherwise.
447 *
448 * If task_creation is true, this is an explicit indication that we are
449 * filtering a task rule at task creation time. This and tsk == current are
450 * the only situations where tsk->cred may be accessed without an rcu read lock.
451 */
447static int audit_filter_rules(struct task_struct *tsk, 452static int audit_filter_rules(struct task_struct *tsk,
448 struct audit_krule *rule, 453 struct audit_krule *rule,
449 struct audit_context *ctx, 454 struct audit_context *ctx,
450 struct audit_names *name, 455 struct audit_names *name,
451 enum audit_state *state) 456 enum audit_state *state,
457 bool task_creation)
452{ 458{
453 const struct cred *cred = get_task_cred(tsk); 459 const struct cred *cred;
454 int i, j, need_sid = 1; 460 int i, j, need_sid = 1;
455 u32 sid; 461 u32 sid;
456 462
463 cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation);
464
457 for (i = 0; i < rule->field_count; i++) { 465 for (i = 0; i < rule->field_count; i++) {
458 struct audit_field *f = &rule->fields[i]; 466 struct audit_field *f = &rule->fields[i];
459 int result = 0; 467 int result = 0;
@@ -637,10 +645,8 @@ static int audit_filter_rules(struct task_struct *tsk,
637 break; 645 break;
638 } 646 }
639 647
640 if (!result) { 648 if (!result)
641 put_cred(cred);
642 return 0; 649 return 0;
643 }
644 } 650 }
645 651
646 if (ctx) { 652 if (ctx) {
@@ -656,7 +662,6 @@ static int audit_filter_rules(struct task_struct *tsk,
656 case AUDIT_NEVER: *state = AUDIT_DISABLED; break; 662 case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
657 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; 663 case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break;
658 } 664 }
659 put_cred(cred);
660 return 1; 665 return 1;
661} 666}
662 667
@@ -671,7 +676,8 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
671 676
672 rcu_read_lock(); 677 rcu_read_lock();
673 list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { 678 list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) {
674 if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) { 679 if (audit_filter_rules(tsk, &e->rule, NULL, NULL,
680 &state, true)) {
675 if (state == AUDIT_RECORD_CONTEXT) 681 if (state == AUDIT_RECORD_CONTEXT)
676 *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); 682 *key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
677 rcu_read_unlock(); 683 rcu_read_unlock();
@@ -705,7 +711,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
705 list_for_each_entry_rcu(e, list, list) { 711 list_for_each_entry_rcu(e, list, list) {
706 if ((e->rule.mask[word] & bit) == bit && 712 if ((e->rule.mask[word] & bit) == bit &&
707 audit_filter_rules(tsk, &e->rule, ctx, NULL, 713 audit_filter_rules(tsk, &e->rule, ctx, NULL,
708 &state)) { 714 &state, false)) {
709 rcu_read_unlock(); 715 rcu_read_unlock();
710 ctx->current_state = state; 716 ctx->current_state = state;
711 return state; 717 return state;
@@ -743,7 +749,8 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
743 749
744 list_for_each_entry_rcu(e, list, list) { 750 list_for_each_entry_rcu(e, list, list) {
745 if ((e->rule.mask[word] & bit) == bit && 751 if ((e->rule.mask[word] & bit) == bit &&
746 audit_filter_rules(tsk, &e->rule, ctx, n, &state)) { 752 audit_filter_rules(tsk, &e->rule, ctx, n,
753 &state, false)) {
747 rcu_read_unlock(); 754 rcu_read_unlock();
748 ctx->current_state = state; 755 ctx->current_state = state;
749 return; 756 return;
diff --git a/kernel/capability.c b/kernel/capability.c
index 32a80e08ff4b..283c529f8b1c 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -22,12 +22,8 @@
22 */ 22 */
23 23
24const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; 24const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
25const kernel_cap_t __cap_full_set = CAP_FULL_SET;
26const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET;
27 25
28EXPORT_SYMBOL(__cap_empty_set); 26EXPORT_SYMBOL(__cap_empty_set);
29EXPORT_SYMBOL(__cap_full_set);
30EXPORT_SYMBOL(__cap_init_eff_set);
31 27
32int file_caps_enabled = 1; 28int file_caps_enabled = 1;
33 29
diff --git a/kernel/compat.c b/kernel/compat.c
index 38b1d2c1cbe8..fc9eb093acd5 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -293,6 +293,8 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
293 return compat_jiffies_to_clock_t(jiffies); 293 return compat_jiffies_to_clock_t(jiffies);
294} 294}
295 295
296#ifdef __ARCH_WANT_SYS_SIGPENDING
297
296/* 298/*
297 * Assumption: old_sigset_t and compat_old_sigset_t are both 299 * Assumption: old_sigset_t and compat_old_sigset_t are both
298 * types that can be passed to put_user()/get_user(). 300 * types that can be passed to put_user()/get_user().
@@ -312,6 +314,10 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set)
312 return ret; 314 return ret;
313} 315}
314 316
317#endif
318
319#ifdef __ARCH_WANT_SYS_SIGPROCMASK
320
315asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, 321asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
316 compat_old_sigset_t __user *oset) 322 compat_old_sigset_t __user *oset)
317{ 323{
@@ -333,6 +339,8 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set,
333 return ret; 339 return ret;
334} 340}
335 341
342#endif
343
336asmlinkage long compat_sys_setrlimit(unsigned int resource, 344asmlinkage long compat_sys_setrlimit(unsigned int resource,
337 struct compat_rlimit __user *rlim) 345 struct compat_rlimit __user *rlim)
338{ 346{
@@ -890,10 +898,9 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
890{ 898{
891 compat_sigset_t s32; 899 compat_sigset_t s32;
892 sigset_t s; 900 sigset_t s;
893 int sig;
894 struct timespec t; 901 struct timespec t;
895 siginfo_t info; 902 siginfo_t info;
896 long ret, timeout = 0; 903 long ret;
897 904
898 if (sigsetsize != sizeof(sigset_t)) 905 if (sigsetsize != sizeof(sigset_t))
899 return -EINVAL; 906 return -EINVAL;
@@ -901,51 +908,19 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
901 if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t))) 908 if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
902 return -EFAULT; 909 return -EFAULT;
903 sigset_from_compat(&s, &s32); 910 sigset_from_compat(&s, &s32);
904 sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
905 signotset(&s);
906 911
907 if (uts) { 912 if (uts) {
908 if (get_compat_timespec (&t, uts)) 913 if (get_compat_timespec(&t, uts))
909 return -EFAULT; 914 return -EFAULT;
910 if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
911 || t.tv_sec < 0)
912 return -EINVAL;
913 } 915 }
914 916
915 spin_lock_irq(&current->sighand->siglock); 917 ret = do_sigtimedwait(&s, &info, uts ? &t : NULL);
916 sig = dequeue_signal(current, &s, &info);
917 if (!sig) {
918 timeout = MAX_SCHEDULE_TIMEOUT;
919 if (uts)
920 timeout = timespec_to_jiffies(&t)
921 +(t.tv_sec || t.tv_nsec);
922 if (timeout) {
923 current->real_blocked = current->blocked;
924 sigandsets(&current->blocked, &current->blocked, &s);
925
926 recalc_sigpending();
927 spin_unlock_irq(&current->sighand->siglock);
928
929 timeout = schedule_timeout_interruptible(timeout);
930
931 spin_lock_irq(&current->sighand->siglock);
932 sig = dequeue_signal(current, &s, &info);
933 current->blocked = current->real_blocked;
934 siginitset(&current->real_blocked, 0);
935 recalc_sigpending();
936 }
937 }
938 spin_unlock_irq(&current->sighand->siglock);
939 918
940 if (sig) { 919 if (ret > 0 && uinfo) {
941 ret = sig; 920 if (copy_siginfo_to_user32(uinfo, &info))
942 if (uinfo) { 921 ret = -EFAULT;
943 if (copy_siginfo_to_user32(uinfo, &info))
944 ret = -EFAULT;
945 }
946 }else {
947 ret = timeout?-EINTR:-EAGAIN;
948 } 922 }
923
949 return ret; 924 return ret;
950 925
951} 926}
diff --git a/kernel/cred.c b/kernel/cred.c
index 8093c16b84b1..e12c8af793f8 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -49,10 +49,10 @@ struct cred init_cred = {
49 .magic = CRED_MAGIC, 49 .magic = CRED_MAGIC,
50#endif 50#endif
51 .securebits = SECUREBITS_DEFAULT, 51 .securebits = SECUREBITS_DEFAULT,
52 .cap_inheritable = CAP_INIT_INH_SET, 52 .cap_inheritable = CAP_EMPTY_SET,
53 .cap_permitted = CAP_FULL_SET, 53 .cap_permitted = CAP_FULL_SET,
54 .cap_effective = CAP_INIT_EFF_SET, 54 .cap_effective = CAP_FULL_SET,
55 .cap_bset = CAP_INIT_BSET, 55 .cap_bset = CAP_FULL_SET,
56 .user = INIT_USER, 56 .user = INIT_USER,
57 .user_ns = &init_user_ns, 57 .user_ns = &init_user_ns,
58 .group_info = &init_groups, 58 .group_info = &init_groups,
diff --git a/kernel/exit.c b/kernel/exit.c
index 8dd874181542..20a406471525 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1377,11 +1377,23 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
1377 return NULL; 1377 return NULL;
1378} 1378}
1379 1379
1380/* 1380/**
1381 * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold 1381 * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED
1382 * read_lock(&tasklist_lock) on entry. If we return zero, we still hold 1382 * @wo: wait options
1383 * the lock and this task is uninteresting. If we return nonzero, we have 1383 * @ptrace: is the wait for ptrace
1384 * released the lock and the system call should return. 1384 * @p: task to wait for
1385 *
1386 * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
1387 *
1388 * CONTEXT:
1389 * read_lock(&tasklist_lock), which is released if return value is
1390 * non-zero. Also, grabs and releases @p->sighand->siglock.
1391 *
1392 * RETURNS:
1393 * 0 if wait condition didn't exist and search for other wait conditions
1394 * should continue. Non-zero return, -errno on failure and @p's pid on
1395 * success, implies that tasklist_lock is released and wait condition
1396 * search should terminate.
1385 */ 1397 */
1386static int wait_task_stopped(struct wait_opts *wo, 1398static int wait_task_stopped(struct wait_opts *wo,
1387 int ptrace, struct task_struct *p) 1399 int ptrace, struct task_struct *p)
@@ -1397,6 +1409,9 @@ static int wait_task_stopped(struct wait_opts *wo,
1397 if (!ptrace && !(wo->wo_flags & WUNTRACED)) 1409 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1398 return 0; 1410 return 0;
1399 1411
1412 if (!task_stopped_code(p, ptrace))
1413 return 0;
1414
1400 exit_code = 0; 1415 exit_code = 0;
1401 spin_lock_irq(&p->sighand->siglock); 1416 spin_lock_irq(&p->sighand->siglock);
1402 1417
@@ -1538,33 +1553,84 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
1538 return 0; 1553 return 0;
1539 } 1554 }
1540 1555
1541 if (likely(!ptrace) && unlikely(task_ptrace(p))) { 1556 /* dead body doesn't have much to contribute */
1557 if (p->exit_state == EXIT_DEAD)
1558 return 0;
1559
1560 /* slay zombie? */
1561 if (p->exit_state == EXIT_ZOMBIE) {
1562 /*
1563 * A zombie ptracee is only visible to its ptracer.
1564 * Notification and reaping will be cascaded to the real
1565 * parent when the ptracer detaches.
1566 */
1567 if (likely(!ptrace) && unlikely(task_ptrace(p))) {
1568 /* it will become visible, clear notask_error */
1569 wo->notask_error = 0;
1570 return 0;
1571 }
1572
1573 /* we don't reap group leaders with subthreads */
1574 if (!delay_group_leader(p))
1575 return wait_task_zombie(wo, p);
1576
1542 /* 1577 /*
1543 * This child is hidden by ptrace. 1578 * Allow access to stopped/continued state via zombie by
1544 * We aren't allowed to see it now, but eventually we will. 1579 * falling through. Clearing of notask_error is complex.
1580 *
1581 * When !@ptrace:
1582 *
1583 * If WEXITED is set, notask_error should naturally be
1584 * cleared. If not, subset of WSTOPPED|WCONTINUED is set,
1585 * so, if there are live subthreads, there are events to
1586 * wait for. If all subthreads are dead, it's still safe
1587 * to clear - this function will be called again in finite
1588 * amount time once all the subthreads are released and
1589 * will then return without clearing.
1590 *
1591 * When @ptrace:
1592 *
1593 * Stopped state is per-task and thus can't change once the
1594 * target task dies. Only continued and exited can happen.
1595 * Clear notask_error if WCONTINUED | WEXITED.
1596 */
1597 if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
1598 wo->notask_error = 0;
1599 } else {
1600 /*
1601 * If @p is ptraced by a task in its real parent's group,
1602 * hide group stop/continued state when looking at @p as
1603 * the real parent; otherwise, a single stop can be
1604 * reported twice as group and ptrace stops.
1605 *
1606 * If a ptracer wants to distinguish the two events for its
1607 * own children, it should create a separate process which
1608 * takes the role of real parent.
1609 */
1610 if (likely(!ptrace) && task_ptrace(p) &&
1611 same_thread_group(p->parent, p->real_parent))
1612 return 0;
1613
1614 /*
1615 * @p is alive and it's gonna stop, continue or exit, so
1616 * there always is something to wait for.
1545 */ 1617 */
1546 wo->notask_error = 0; 1618 wo->notask_error = 0;
1547 return 0;
1548 } 1619 }
1549 1620
1550 if (p->exit_state == EXIT_DEAD)
1551 return 0;
1552
1553 /* 1621 /*
1554 * We don't reap group leaders with subthreads. 1622 * Wait for stopped. Depending on @ptrace, different stopped state
1623 * is used and the two don't interact with each other.
1555 */ 1624 */
1556 if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) 1625 ret = wait_task_stopped(wo, ptrace, p);
1557 return wait_task_zombie(wo, p); 1626 if (ret)
1627 return ret;
1558 1628
1559 /* 1629 /*
1560 * It's stopped or running now, so it might 1630 * Wait for continued. There's only one continued state and the
1561 * later continue, exit, or stop again. 1631 * ptracer can consume it which can confuse the real parent. Don't
1632 * use WCONTINUED from ptracer. You don't need or want it.
1562 */ 1633 */
1563 wo->notask_error = 0;
1564
1565 if (task_stopped_code(p, ptrace))
1566 return wait_task_stopped(wo, ptrace, p);
1567
1568 return wait_task_continued(wo, p); 1634 return wait_task_continued(wo, p);
1569} 1635}
1570 1636
diff --git a/kernel/fork.c b/kernel/fork.c
index 2b44d82b8237..8e7e135d0817 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -383,15 +383,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
383 get_file(file); 383 get_file(file);
384 if (tmp->vm_flags & VM_DENYWRITE) 384 if (tmp->vm_flags & VM_DENYWRITE)
385 atomic_dec(&inode->i_writecount); 385 atomic_dec(&inode->i_writecount);
386 spin_lock(&mapping->i_mmap_lock); 386 mutex_lock(&mapping->i_mmap_mutex);
387 if (tmp->vm_flags & VM_SHARED) 387 if (tmp->vm_flags & VM_SHARED)
388 mapping->i_mmap_writable++; 388 mapping->i_mmap_writable++;
389 tmp->vm_truncate_count = mpnt->vm_truncate_count;
390 flush_dcache_mmap_lock(mapping); 389 flush_dcache_mmap_lock(mapping);
391 /* insert tmp into the share list, just after mpnt */ 390 /* insert tmp into the share list, just after mpnt */
392 vma_prio_tree_add(tmp, mpnt); 391 vma_prio_tree_add(tmp, mpnt);
393 flush_dcache_mmap_unlock(mapping); 392 flush_dcache_mmap_unlock(mapping);
394 spin_unlock(&mapping->i_mmap_lock); 393 mutex_unlock(&mapping->i_mmap_mutex);
395 } 394 }
396 395
397 /* 396 /*
@@ -486,6 +485,20 @@ static void mm_init_aio(struct mm_struct *mm)
486#endif 485#endif
487} 486}
488 487
488int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
489{
490#ifdef CONFIG_CPUMASK_OFFSTACK
491 if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
492 return -ENOMEM;
493
494 if (oldmm)
495 cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
496 else
497 memset(mm_cpumask(mm), 0, cpumask_size());
498#endif
499 return 0;
500}
501
489static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) 502static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
490{ 503{
491 atomic_set(&mm->mm_users, 1); 504 atomic_set(&mm->mm_users, 1);
@@ -522,10 +535,20 @@ struct mm_struct * mm_alloc(void)
522 struct mm_struct * mm; 535 struct mm_struct * mm;
523 536
524 mm = allocate_mm(); 537 mm = allocate_mm();
525 if (mm) { 538 if (!mm)
526 memset(mm, 0, sizeof(*mm)); 539 return NULL;
527 mm = mm_init(mm, current); 540
541 memset(mm, 0, sizeof(*mm));
542 mm = mm_init(mm, current);
543 if (!mm)
544 return NULL;
545
546 if (mm_init_cpumask(mm, NULL)) {
547 mm_free_pgd(mm);
548 free_mm(mm);
549 return NULL;
528 } 550 }
551
529 return mm; 552 return mm;
530} 553}
531 554
@@ -537,6 +560,7 @@ struct mm_struct * mm_alloc(void)
537void __mmdrop(struct mm_struct *mm) 560void __mmdrop(struct mm_struct *mm)
538{ 561{
539 BUG_ON(mm == &init_mm); 562 BUG_ON(mm == &init_mm);
563 free_cpumask_var(mm->cpu_vm_mask_var);
540 mm_free_pgd(mm); 564 mm_free_pgd(mm);
541 destroy_context(mm); 565 destroy_context(mm);
542 mmu_notifier_mm_destroy(mm); 566 mmu_notifier_mm_destroy(mm);
@@ -691,6 +715,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
691 if (!mm_init(mm, tsk)) 715 if (!mm_init(mm, tsk))
692 goto fail_nomem; 716 goto fail_nomem;
693 717
718 if (mm_init_cpumask(mm, oldmm))
719 goto fail_nocpumask;
720
694 if (init_new_context(tsk, mm)) 721 if (init_new_context(tsk, mm))
695 goto fail_nocontext; 722 goto fail_nocontext;
696 723
@@ -717,6 +744,9 @@ fail_nomem:
717 return NULL; 744 return NULL;
718 745
719fail_nocontext: 746fail_nocontext:
747 free_cpumask_var(mm->cpu_vm_mask_var);
748
749fail_nocpumask:
720 /* 750 /*
721 * If init_new_context() failed, we cannot use mmput() to free the mm 751 * If init_new_context() failed, we cannot use mmput() to free the mm
722 * because it calls destroy_context() 752 * because it calls destroy_context()
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index dbbbf7d43080..a9205e32a059 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -64,17 +64,20 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
64 .clock_base = 64 .clock_base =
65 { 65 {
66 { 66 {
67 .index = CLOCK_REALTIME, 67 .index = HRTIMER_BASE_MONOTONIC,
68 .get_time = &ktime_get_real, 68 .clockid = CLOCK_MONOTONIC,
69 .get_time = &ktime_get,
69 .resolution = KTIME_LOW_RES, 70 .resolution = KTIME_LOW_RES,
70 }, 71 },
71 { 72 {
72 .index = CLOCK_MONOTONIC, 73 .index = HRTIMER_BASE_REALTIME,
73 .get_time = &ktime_get, 74 .clockid = CLOCK_REALTIME,
75 .get_time = &ktime_get_real,
74 .resolution = KTIME_LOW_RES, 76 .resolution = KTIME_LOW_RES,
75 }, 77 },
76 { 78 {
77 .index = CLOCK_BOOTTIME, 79 .index = HRTIMER_BASE_BOOTTIME,
80 .clockid = CLOCK_BOOTTIME,
78 .get_time = &ktime_get_boottime, 81 .get_time = &ktime_get_boottime,
79 .resolution = KTIME_LOW_RES, 82 .resolution = KTIME_LOW_RES,
80 }, 83 },
@@ -196,7 +199,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
196 struct hrtimer_cpu_base *new_cpu_base; 199 struct hrtimer_cpu_base *new_cpu_base;
197 int this_cpu = smp_processor_id(); 200 int this_cpu = smp_processor_id();
198 int cpu = hrtimer_get_target(this_cpu, pinned); 201 int cpu = hrtimer_get_target(this_cpu, pinned);
199 int basenum = hrtimer_clockid_to_base(base->index); 202 int basenum = base->index;
200 203
201again: 204again:
202 new_cpu_base = &per_cpu(hrtimer_bases, cpu); 205 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -621,66 +624,6 @@ static int hrtimer_reprogram(struct hrtimer *timer,
621 return res; 624 return res;
622} 625}
623 626
624
625/*
626 * Retrigger next event is called after clock was set
627 *
628 * Called with interrupts disabled via on_each_cpu()
629 */
630static void retrigger_next_event(void *arg)
631{
632 struct hrtimer_cpu_base *base;
633 struct timespec realtime_offset, wtm, sleep;
634
635 if (!hrtimer_hres_active())
636 return;
637
638 get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
639 &sleep);
640 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
641
642 base = &__get_cpu_var(hrtimer_bases);
643
644 /* Adjust CLOCK_REALTIME offset */
645 raw_spin_lock(&base->lock);
646 base->clock_base[HRTIMER_BASE_REALTIME].offset =
647 timespec_to_ktime(realtime_offset);
648 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
649 timespec_to_ktime(sleep);
650
651 hrtimer_force_reprogram(base, 0);
652 raw_spin_unlock(&base->lock);
653}
654
655/*
656 * Clock realtime was set
657 *
658 * Change the offset of the realtime clock vs. the monotonic
659 * clock.
660 *
661 * We might have to reprogram the high resolution timer interrupt. On
662 * SMP we call the architecture specific code to retrigger _all_ high
663 * resolution timer interrupts. On UP we just disable interrupts and
664 * call the high resolution interrupt code.
665 */
666void clock_was_set(void)
667{
668 /* Retrigger the CPU local events everywhere */
669 on_each_cpu(retrigger_next_event, NULL, 1);
670}
671
672/*
673 * During resume we might have to reprogram the high resolution timer
674 * interrupt (on the local CPU):
675 */
676void hres_timers_resume(void)
677{
678 WARN_ONCE(!irqs_disabled(),
679 KERN_INFO "hres_timers_resume() called with IRQs enabled!");
680
681 retrigger_next_event(NULL);
682}
683
684/* 627/*
685 * Initialize the high resolution related parts of cpu_base 628 * Initialize the high resolution related parts of cpu_base
686 */ 629 */
@@ -715,11 +658,39 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
715} 658}
716 659
717/* 660/*
661 * Retrigger next event is called after clock was set
662 *
663 * Called with interrupts disabled via on_each_cpu()
664 */
665static void retrigger_next_event(void *arg)
666{
667 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
668 struct timespec realtime_offset, xtim, wtm, sleep;
669
670 if (!hrtimer_hres_active())
671 return;
672
673 /* Optimized out for !HIGH_RES */
674 get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
675 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
676
677 /* Adjust CLOCK_REALTIME offset */
678 raw_spin_lock(&base->lock);
679 base->clock_base[HRTIMER_BASE_REALTIME].offset =
680 timespec_to_ktime(realtime_offset);
681 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
682 timespec_to_ktime(sleep);
683
684 hrtimer_force_reprogram(base, 0);
685 raw_spin_unlock(&base->lock);
686}
687
688/*
718 * Switch to high resolution mode 689 * Switch to high resolution mode
719 */ 690 */
720static int hrtimer_switch_to_hres(void) 691static int hrtimer_switch_to_hres(void)
721{ 692{
722 int cpu = smp_processor_id(); 693 int i, cpu = smp_processor_id();
723 struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); 694 struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
724 unsigned long flags; 695 unsigned long flags;
725 696
@@ -735,9 +706,8 @@ static int hrtimer_switch_to_hres(void)
735 return 0; 706 return 0;
736 } 707 }
737 base->hres_active = 1; 708 base->hres_active = 1;
738 base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES; 709 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
739 base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES; 710 base->clock_base[i].resolution = KTIME_HIGH_RES;
740 base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
741 711
742 tick_setup_sched_timer(); 712 tick_setup_sched_timer();
743 713
@@ -761,9 +731,43 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
761 return 0; 731 return 0;
762} 732}
763static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } 733static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
734static inline void retrigger_next_event(void *arg) { }
764 735
765#endif /* CONFIG_HIGH_RES_TIMERS */ 736#endif /* CONFIG_HIGH_RES_TIMERS */
766 737
738/*
739 * Clock realtime was set
740 *
741 * Change the offset of the realtime clock vs. the monotonic
742 * clock.
743 *
744 * We might have to reprogram the high resolution timer interrupt. On
745 * SMP we call the architecture specific code to retrigger _all_ high
746 * resolution timer interrupts. On UP we just disable interrupts and
747 * call the high resolution interrupt code.
748 */
749void clock_was_set(void)
750{
751#ifdef CONFIG_HIGH_RES_TIMERS
752 /* Retrigger the CPU local events everywhere */
753 on_each_cpu(retrigger_next_event, NULL, 1);
754#endif
755 timerfd_clock_was_set();
756}
757
758/*
759 * During resume we might have to reprogram the high resolution timer
760 * interrupt (on the local CPU):
761 */
762void hrtimers_resume(void)
763{
764 WARN_ONCE(!irqs_disabled(),
765 KERN_INFO "hrtimers_resume() called with IRQs enabled!");
766
767 retrigger_next_event(NULL);
768 timerfd_clock_was_set();
769}
770
767static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) 771static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
768{ 772{
769#ifdef CONFIG_TIMER_STATS 773#ifdef CONFIG_TIMER_STATS
@@ -856,6 +860,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
856 debug_activate(timer); 860 debug_activate(timer);
857 861
858 timerqueue_add(&base->active, &timer->node); 862 timerqueue_add(&base->active, &timer->node);
863 base->cpu_base->active_bases |= 1 << base->index;
859 864
860 /* 865 /*
861 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the 866 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
@@ -897,6 +902,8 @@ static void __remove_hrtimer(struct hrtimer *timer,
897#endif 902#endif
898 } 903 }
899 timerqueue_del(&base->active, &timer->node); 904 timerqueue_del(&base->active, &timer->node);
905 if (!timerqueue_getnext(&base->active))
906 base->cpu_base->active_bases &= ~(1 << base->index);
900out: 907out:
901 timer->state = newstate; 908 timer->state = newstate;
902} 909}
@@ -1234,7 +1241,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1234void hrtimer_interrupt(struct clock_event_device *dev) 1241void hrtimer_interrupt(struct clock_event_device *dev)
1235{ 1242{
1236 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1243 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1237 struct hrtimer_clock_base *base;
1238 ktime_t expires_next, now, entry_time, delta; 1244 ktime_t expires_next, now, entry_time, delta;
1239 int i, retries = 0; 1245 int i, retries = 0;
1240 1246
@@ -1256,12 +1262,15 @@ retry:
1256 */ 1262 */
1257 cpu_base->expires_next.tv64 = KTIME_MAX; 1263 cpu_base->expires_next.tv64 = KTIME_MAX;
1258 1264
1259 base = cpu_base->clock_base;
1260
1261 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1265 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1262 ktime_t basenow; 1266 struct hrtimer_clock_base *base;
1263 struct timerqueue_node *node; 1267 struct timerqueue_node *node;
1268 ktime_t basenow;
1269
1270 if (!(cpu_base->active_bases & (1 << i)))
1271 continue;
1264 1272
1273 base = cpu_base->clock_base + i;
1265 basenow = ktime_add(now, base->offset); 1274 basenow = ktime_add(now, base->offset);
1266 1275
1267 while ((node = timerqueue_getnext(&base->active))) { 1276 while ((node = timerqueue_getnext(&base->active))) {
@@ -1294,7 +1303,6 @@ retry:
1294 1303
1295 __run_hrtimer(timer, &basenow); 1304 __run_hrtimer(timer, &basenow);
1296 } 1305 }
1297 base++;
1298 } 1306 }
1299 1307
1300 /* 1308 /*
@@ -1525,7 +1533,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1525 struct timespec __user *rmtp; 1533 struct timespec __user *rmtp;
1526 int ret = 0; 1534 int ret = 0;
1527 1535
1528 hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, 1536 hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
1529 HRTIMER_MODE_ABS); 1537 HRTIMER_MODE_ABS);
1530 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); 1538 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1531 1539
@@ -1577,7 +1585,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
1577 1585
1578 restart = &current_thread_info()->restart_block; 1586 restart = &current_thread_info()->restart_block;
1579 restart->fn = hrtimer_nanosleep_restart; 1587 restart->fn = hrtimer_nanosleep_restart;
1580 restart->nanosleep.index = t.timer.base->index; 1588 restart->nanosleep.clockid = t.timer.base->clockid;
1581 restart->nanosleep.rmtp = rmtp; 1589 restart->nanosleep.rmtp = rmtp;
1582 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); 1590 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
1583 1591
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 834899f2500f..64e3df6ab1ef 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;
19 19
20#ifdef CONFIG_SMP 20#ifdef CONFIG_SMP
21 21
22static int irq_affinity_proc_show(struct seq_file *m, void *v) 22static int show_irq_affinity(int type, struct seq_file *m, void *v)
23{ 23{
24 struct irq_desc *desc = irq_to_desc((long)m->private); 24 struct irq_desc *desc = irq_to_desc((long)m->private);
25 const struct cpumask *mask = desc->irq_data.affinity; 25 const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
28 if (irqd_is_setaffinity_pending(&desc->irq_data)) 28 if (irqd_is_setaffinity_pending(&desc->irq_data))
29 mask = desc->pending_mask; 29 mask = desc->pending_mask;
30#endif 30#endif
31 seq_cpumask(m, mask); 31 if (type)
32 seq_cpumask_list(m, mask);
33 else
34 seq_cpumask(m, mask);
32 seq_putc(m, '\n'); 35 seq_putc(m, '\n');
33 return 0; 36 return 0;
34} 37}
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
59#endif 62#endif
60 63
61int no_irq_affinity; 64int no_irq_affinity;
62static ssize_t irq_affinity_proc_write(struct file *file, 65static int irq_affinity_proc_show(struct seq_file *m, void *v)
66{
67 return show_irq_affinity(0, m, v);
68}
69
70static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
71{
72 return show_irq_affinity(1, m, v);
73}
74
75
76static ssize_t write_irq_affinity(int type, struct file *file,
63 const char __user *buffer, size_t count, loff_t *pos) 77 const char __user *buffer, size_t count, loff_t *pos)
64{ 78{
65 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; 79 unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file,
72 if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) 86 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
73 return -ENOMEM; 87 return -ENOMEM;
74 88
75 err = cpumask_parse_user(buffer, count, new_value); 89 if (type)
90 err = cpumask_parselist_user(buffer, count, new_value);
91 else
92 err = cpumask_parse_user(buffer, count, new_value);
76 if (err) 93 if (err)
77 goto free_cpumask; 94 goto free_cpumask;
78 95
@@ -100,11 +117,28 @@ free_cpumask:
100 return err; 117 return err;
101} 118}
102 119
120static ssize_t irq_affinity_proc_write(struct file *file,
121 const char __user *buffer, size_t count, loff_t *pos)
122{
123 return write_irq_affinity(0, file, buffer, count, pos);
124}
125
126static ssize_t irq_affinity_list_proc_write(struct file *file,
127 const char __user *buffer, size_t count, loff_t *pos)
128{
129 return write_irq_affinity(1, file, buffer, count, pos);
130}
131
103static int irq_affinity_proc_open(struct inode *inode, struct file *file) 132static int irq_affinity_proc_open(struct inode *inode, struct file *file)
104{ 133{
105 return single_open(file, irq_affinity_proc_show, PDE(inode)->data); 134 return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
106} 135}
107 136
137static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
138{
139 return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
140}
141
108static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) 142static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
109{ 143{
110 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); 144 return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = {
125 .release = single_release, 159 .release = single_release,
126}; 160};
127 161
162static const struct file_operations irq_affinity_list_proc_fops = {
163 .open = irq_affinity_list_proc_open,
164 .read = seq_read,
165 .llseek = seq_lseek,
166 .release = single_release,
167 .write = irq_affinity_list_proc_write,
168};
169
128static int default_affinity_show(struct seq_file *m, void *v) 170static int default_affinity_show(struct seq_file *m, void *v)
129{ 171{
130 seq_cpumask(m, irq_default_affinity); 172 seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
289 proc_create_data("affinity_hint", 0400, desc->dir, 331 proc_create_data("affinity_hint", 0400, desc->dir,
290 &irq_affinity_hint_proc_fops, (void *)(long)irq); 332 &irq_affinity_hint_proc_fops, (void *)(long)irq);
291 333
334 /* create /proc/irq/<irq>/smp_affinity_list */
335 proc_create_data("smp_affinity_list", 0600, desc->dir,
336 &irq_affinity_list_proc_fops, (void *)(long)irq);
337
292 proc_create_data("node", 0444, desc->dir, 338 proc_create_data("node", 0444, desc->dir,
293 &irq_node_proc_fops, (void *)(long)irq); 339 &irq_node_proc_fops, (void *)(long)irq);
294#endif 340#endif
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 5ae0ff38425f..ad6a81c58b44 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -25,6 +25,7 @@
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/completion.h> 27#include <linux/completion.h>
28#include <linux/cred.h>
28#include <linux/file.h> 29#include <linux/file.h>
29#include <linux/fdtable.h> 30#include <linux/fdtable.h>
30#include <linux/workqueue.h> 31#include <linux/workqueue.h>
@@ -43,6 +44,13 @@ extern int max_threads;
43 44
44static struct workqueue_struct *khelper_wq; 45static struct workqueue_struct *khelper_wq;
45 46
47#define CAP_BSET (void *)1
48#define CAP_PI (void *)2
49
50static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
51static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
52static DEFINE_SPINLOCK(umh_sysctl_lock);
53
46#ifdef CONFIG_MODULES 54#ifdef CONFIG_MODULES
47 55
48/* 56/*
@@ -132,6 +140,7 @@ EXPORT_SYMBOL(__request_module);
132static int ____call_usermodehelper(void *data) 140static int ____call_usermodehelper(void *data)
133{ 141{
134 struct subprocess_info *sub_info = data; 142 struct subprocess_info *sub_info = data;
143 struct cred *new;
135 int retval; 144 int retval;
136 145
137 spin_lock_irq(&current->sighand->siglock); 146 spin_lock_irq(&current->sighand->siglock);
@@ -153,6 +162,19 @@ static int ____call_usermodehelper(void *data)
153 goto fail; 162 goto fail;
154 } 163 }
155 164
165 retval = -ENOMEM;
166 new = prepare_kernel_cred(current);
167 if (!new)
168 goto fail;
169
170 spin_lock(&umh_sysctl_lock);
171 new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
172 new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
173 new->cap_inheritable);
174 spin_unlock(&umh_sysctl_lock);
175
176 commit_creds(new);
177
156 retval = kernel_execve(sub_info->path, 178 retval = kernel_execve(sub_info->path,
157 (const char *const *)sub_info->argv, 179 (const char *const *)sub_info->argv,
158 (const char *const *)sub_info->envp); 180 (const char *const *)sub_info->envp);
@@ -420,6 +442,84 @@ unlock:
420} 442}
421EXPORT_SYMBOL(call_usermodehelper_exec); 443EXPORT_SYMBOL(call_usermodehelper_exec);
422 444
445static int proc_cap_handler(struct ctl_table *table, int write,
446 void __user *buffer, size_t *lenp, loff_t *ppos)
447{
448 struct ctl_table t;
449 unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
450 kernel_cap_t new_cap;
451 int err, i;
452
453 if (write && (!capable(CAP_SETPCAP) ||
454 !capable(CAP_SYS_MODULE)))
455 return -EPERM;
456
457 /*
458 * convert from the global kernel_cap_t to the ulong array to print to
459 * userspace if this is a read.
460 */
461 spin_lock(&umh_sysctl_lock);
462 for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) {
463 if (table->data == CAP_BSET)
464 cap_array[i] = usermodehelper_bset.cap[i];
465 else if (table->data == CAP_PI)
466 cap_array[i] = usermodehelper_inheritable.cap[i];
467 else
468 BUG();
469 }
470 spin_unlock(&umh_sysctl_lock);
471
472 t = *table;
473 t.data = &cap_array;
474
475 /*
476 * actually read or write and array of ulongs from userspace. Remember
477 * these are least significant 32 bits first
478 */
479 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
480 if (err < 0)
481 return err;
482
483 /*
484 * convert from the sysctl array of ulongs to the kernel_cap_t
485 * internal representation
486 */
487 for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
488 new_cap.cap[i] = cap_array[i];
489
490 /*
491 * Drop everything not in the new_cap (but don't add things)
492 */
493 spin_lock(&umh_sysctl_lock);
494 if (write) {
495 if (table->data == CAP_BSET)
496 usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
497 if (table->data == CAP_PI)
498 usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
499 }
500 spin_unlock(&umh_sysctl_lock);
501
502 return 0;
503}
504
505struct ctl_table usermodehelper_table[] = {
506 {
507 .procname = "bset",
508 .data = CAP_BSET,
509 .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
510 .mode = 0600,
511 .proc_handler = proc_cap_handler,
512 },
513 {
514 .procname = "inheritable",
515 .data = CAP_PI,
516 .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
517 .mode = 0600,
518 .proc_handler = proc_cap_handler,
519 },
520 { }
521};
522
423void __init usermodehelper_init(void) 523void __init usermodehelper_init(void)
424{ 524{
425 khelper_wq = create_singlethread_workqueue("khelper"); 525 khelper_wq = create_singlethread_workqueue("khelper");
diff --git a/kernel/module.c b/kernel/module.c
index 22879725678d..795bdc7f5c3f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2812,7 +2812,7 @@ static struct module *load_module(void __user *umod,
2812 } 2812 }
2813 2813
2814 /* This has to be done once we're sure module name is unique. */ 2814 /* This has to be done once we're sure module name is unique. */
2815 if (!mod->taints) 2815 if (!mod->taints || mod->taints == (1U<<TAINT_CRAP))
2816 dynamic_debug_setup(info.debug, info.num_debug); 2816 dynamic_debug_setup(info.debug, info.num_debug);
2817 2817
2818 /* Find duplicate symbols */ 2818 /* Find duplicate symbols */
@@ -2849,7 +2849,7 @@ static struct module *load_module(void __user *umod,
2849 module_bug_cleanup(mod); 2849 module_bug_cleanup(mod);
2850 2850
2851 ddebug: 2851 ddebug:
2852 if (!mod->taints) 2852 if (!mod->taints || mod->taints == (1U<<TAINT_CRAP))
2853 dynamic_debug_remove(info.debug); 2853 dynamic_debug_remove(info.debug);
2854 unlock: 2854 unlock:
2855 mutex_unlock(&module_mutex); 2855 mutex_unlock(&module_mutex);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 2c938e2337cd..d607ed5dd441 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -131,14 +131,14 @@ EXPORT_SYMBOL(mutex_unlock);
131 */ 131 */
132static inline int __sched 132static inline int __sched
133__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, 133__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
134 unsigned long ip) 134 struct lockdep_map *nest_lock, unsigned long ip)
135{ 135{
136 struct task_struct *task = current; 136 struct task_struct *task = current;
137 struct mutex_waiter waiter; 137 struct mutex_waiter waiter;
138 unsigned long flags; 138 unsigned long flags;
139 139
140 preempt_disable(); 140 preempt_disable();
141 mutex_acquire(&lock->dep_map, subclass, 0, ip); 141 mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
142 142
143#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 143#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
144 /* 144 /*
@@ -269,16 +269,25 @@ void __sched
269mutex_lock_nested(struct mutex *lock, unsigned int subclass) 269mutex_lock_nested(struct mutex *lock, unsigned int subclass)
270{ 270{
271 might_sleep(); 271 might_sleep();
272 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); 272 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
273} 273}
274 274
275EXPORT_SYMBOL_GPL(mutex_lock_nested); 275EXPORT_SYMBOL_GPL(mutex_lock_nested);
276 276
277void __sched
278_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
279{
280 might_sleep();
281 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
282}
283
284EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
285
277int __sched 286int __sched
278mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) 287mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
279{ 288{
280 might_sleep(); 289 might_sleep();
281 return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_); 290 return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
282} 291}
283EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); 292EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
284 293
@@ -287,7 +296,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
287{ 296{
288 might_sleep(); 297 might_sleep();
289 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 298 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
290 subclass, _RET_IP_); 299 subclass, NULL, _RET_IP_);
291} 300}
292 301
293EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); 302EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -393,7 +402,7 @@ __mutex_lock_slowpath(atomic_t *lock_count)
393{ 402{
394 struct mutex *lock = container_of(lock_count, struct mutex, count); 403 struct mutex *lock = container_of(lock_count, struct mutex, count);
395 404
396 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); 405 __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
397} 406}
398 407
399static noinline int __sched 408static noinline int __sched
@@ -401,7 +410,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count)
401{ 410{
402 struct mutex *lock = container_of(lock_count, struct mutex, count); 411 struct mutex *lock = container_of(lock_count, struct mutex, count);
403 412
404 return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); 413 return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
405} 414}
406 415
407static noinline int __sched 416static noinline int __sched
@@ -409,7 +418,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count)
409{ 418{
410 struct mutex *lock = container_of(lock_count, struct mutex, count); 419 struct mutex *lock = container_of(lock_count, struct mutex, count);
411 420
412 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); 421 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
413} 422}
414#endif 423#endif
415 424
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index a05d191ffdd9..5424e37673ed 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -22,6 +22,9 @@
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <net/net_namespace.h> 23#include <net/net_namespace.h>
24#include <linux/ipc_namespace.h> 24#include <linux/ipc_namespace.h>
25#include <linux/proc_fs.h>
26#include <linux/file.h>
27#include <linux/syscalls.h>
25 28
26static struct kmem_cache *nsproxy_cachep; 29static struct kmem_cache *nsproxy_cachep;
27 30
@@ -233,6 +236,45 @@ void exit_task_namespaces(struct task_struct *p)
233 switch_task_namespaces(p, NULL); 236 switch_task_namespaces(p, NULL);
234} 237}
235 238
239SYSCALL_DEFINE2(setns, int, fd, int, nstype)
240{
241 const struct proc_ns_operations *ops;
242 struct task_struct *tsk = current;
243 struct nsproxy *new_nsproxy;
244 struct proc_inode *ei;
245 struct file *file;
246 int err;
247
248 if (!capable(CAP_SYS_ADMIN))
249 return -EPERM;
250
251 file = proc_ns_fget(fd);
252 if (IS_ERR(file))
253 return PTR_ERR(file);
254
255 err = -EINVAL;
256 ei = PROC_I(file->f_dentry->d_inode);
257 ops = ei->ns_ops;
258 if (nstype && (ops->type != nstype))
259 goto out;
260
261 new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
262 if (IS_ERR(new_nsproxy)) {
263 err = PTR_ERR(new_nsproxy);
264 goto out;
265 }
266
267 err = ops->install(new_nsproxy, ei->ns);
268 if (err) {
269 free_nsproxy(new_nsproxy);
270 goto out;
271 }
272 switch_task_namespaces(tsk, new_nsproxy);
273out:
274 fput(file);
275 return err;
276}
277
236static int __init nsproxy_cache_init(void) 278static int __init nsproxy_cache_init(void)
237{ 279{
238 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 280 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 0da058bff8eb..beb184689af9 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -385,7 +385,7 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
385 s32 value; 385 s32 value;
386 unsigned long flags; 386 unsigned long flags;
387 struct pm_qos_object *o; 387 struct pm_qos_object *o;
388 struct pm_qos_request_list *pm_qos_req = filp->private_data;; 388 struct pm_qos_request_list *pm_qos_req = filp->private_data;
389 389
390 if (!pm_qos_req) 390 if (!pm_qos_req)
391 return -EINVAL; 391 return -EINVAL;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 0791b13df7bf..58f405b581e7 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1514,7 +1514,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1514 return -EFAULT; 1514 return -EFAULT;
1515 1515
1516 restart_block->fn = posix_cpu_nsleep_restart; 1516 restart_block->fn = posix_cpu_nsleep_restart;
1517 restart_block->nanosleep.index = which_clock; 1517 restart_block->nanosleep.clockid = which_clock;
1518 restart_block->nanosleep.rmtp = rmtp; 1518 restart_block->nanosleep.rmtp = rmtp;
1519 restart_block->nanosleep.expires = timespec_to_ns(rqtp); 1519 restart_block->nanosleep.expires = timespec_to_ns(rqtp);
1520 } 1520 }
@@ -1523,7 +1523,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1523 1523
1524static long posix_cpu_nsleep_restart(struct restart_block *restart_block) 1524static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1525{ 1525{
1526 clockid_t which_clock = restart_block->nanosleep.index; 1526 clockid_t which_clock = restart_block->nanosleep.clockid;
1527 struct timespec t; 1527 struct timespec t;
1528 struct itimerspec it; 1528 struct itimerspec it;
1529 int error; 1529 int error;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e5498d7405c3..4556182527f3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void)
491 return tmr; 491 return tmr;
492} 492}
493 493
494static void k_itimer_rcu_free(struct rcu_head *head)
495{
496 struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
497
498 kmem_cache_free(posix_timers_cache, tmr);
499}
500
494#define IT_ID_SET 1 501#define IT_ID_SET 1
495#define IT_ID_NOT_SET 0 502#define IT_ID_NOT_SET 0
496static void release_posix_timer(struct k_itimer *tmr, int it_id_set) 503static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
503 } 510 }
504 put_pid(tmr->it_pid); 511 put_pid(tmr->it_pid);
505 sigqueue_free(tmr->sigq); 512 sigqueue_free(tmr->sigq);
506 kmem_cache_free(posix_timers_cache, tmr); 513 call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
507} 514}
508 515
509static struct k_clock *clockid_to_kclock(const clockid_t id) 516static struct k_clock *clockid_to_kclock(const clockid_t id)
@@ -631,22 +638,18 @@ out:
631static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) 638static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
632{ 639{
633 struct k_itimer *timr; 640 struct k_itimer *timr;
634 /* 641
635 * Watch out here. We do a irqsave on the idr_lock and pass the 642 rcu_read_lock();
636 * flags part over to the timer lock. Must not let interrupts in
637 * while we are moving the lock.
638 */
639 spin_lock_irqsave(&idr_lock, *flags);
640 timr = idr_find(&posix_timers_id, (int)timer_id); 643 timr = idr_find(&posix_timers_id, (int)timer_id);
641 if (timr) { 644 if (timr) {
642 spin_lock(&timr->it_lock); 645 spin_lock_irqsave(&timr->it_lock, *flags);
643 if (timr->it_signal == current->signal) { 646 if (timr->it_signal == current->signal) {
644 spin_unlock(&idr_lock); 647 rcu_read_unlock();
645 return timr; 648 return timr;
646 } 649 }
647 spin_unlock(&timr->it_lock); 650 spin_unlock_irqrestore(&timr->it_lock, *flags);
648 } 651 }
649 spin_unlock_irqrestore(&idr_lock, *flags); 652 rcu_read_unlock();
650 653
651 return NULL; 654 return NULL;
652} 655}
@@ -1056,7 +1059,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1056 */ 1059 */
1057long clock_nanosleep_restart(struct restart_block *restart_block) 1060long clock_nanosleep_restart(struct restart_block *restart_block)
1058{ 1061{
1059 clockid_t which_clock = restart_block->nanosleep.index; 1062 clockid_t which_clock = restart_block->nanosleep.clockid;
1060 struct k_clock *kc = clockid_to_kclock(which_clock); 1063 struct k_clock *kc = clockid_to_kclock(which_clock);
1061 1064
1062 if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) 1065 if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
diff --git a/kernel/printk.c b/kernel/printk.c
index da8ca817eae3..35185392173f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
31#include <linux/smp.h> 31#include <linux/smp.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/memblock.h>
34#include <linux/syscalls.h> 35#include <linux/syscalls.h>
35#include <linux/kexec.h> 36#include <linux/kexec.h>
36#include <linux/kdb.h> 37#include <linux/kdb.h>
@@ -167,46 +168,74 @@ void log_buf_kexec_setup(void)
167} 168}
168#endif 169#endif
169 170
171/* requested log_buf_len from kernel cmdline */
172static unsigned long __initdata new_log_buf_len;
173
174/* save requested log_buf_len since it's too early to process it */
170static int __init log_buf_len_setup(char *str) 175static int __init log_buf_len_setup(char *str)
171{ 176{
172 unsigned size = memparse(str, &str); 177 unsigned size = memparse(str, &str);
173 unsigned long flags;
174 178
175 if (size) 179 if (size)
176 size = roundup_pow_of_two(size); 180 size = roundup_pow_of_two(size);
177 if (size > log_buf_len) { 181 if (size > log_buf_len)
178 unsigned start, dest_idx, offset; 182 new_log_buf_len = size;
179 char *new_log_buf;
180 183
181 new_log_buf = alloc_bootmem(size); 184 return 0;
182 if (!new_log_buf) { 185}
183 printk(KERN_WARNING "log_buf_len: allocation failed\n"); 186early_param("log_buf_len", log_buf_len_setup);
184 goto out;
185 }
186 187
187 spin_lock_irqsave(&logbuf_lock, flags); 188void __init setup_log_buf(int early)
188 log_buf_len = size; 189{
189 log_buf = new_log_buf; 190 unsigned long flags;
190 191 unsigned start, dest_idx, offset;
191 offset = start = min(con_start, log_start); 192 char *new_log_buf;
192 dest_idx = 0; 193 int free;
193 while (start != log_end) { 194
194 log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; 195 if (!new_log_buf_len)
195 start++; 196 return;
196 dest_idx++; 197
197 } 198 if (early) {
198 log_start -= offset; 199 unsigned long mem;
199 con_start -= offset;
200 log_end -= offset;
201 spin_unlock_irqrestore(&logbuf_lock, flags);
202 200
203 printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); 201 mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
202 if (mem == MEMBLOCK_ERROR)
203 return;
204 new_log_buf = __va(mem);
205 } else {
206 new_log_buf = alloc_bootmem_nopanic(new_log_buf_len);
204 } 207 }
205out:
206 return 1;
207}
208 208
209__setup("log_buf_len=", log_buf_len_setup); 209 if (unlikely(!new_log_buf)) {
210 pr_err("log_buf_len: %ld bytes not available\n",
211 new_log_buf_len);
212 return;
213 }
214
215 spin_lock_irqsave(&logbuf_lock, flags);
216 log_buf_len = new_log_buf_len;
217 log_buf = new_log_buf;
218 new_log_buf_len = 0;
219 free = __LOG_BUF_LEN - log_end;
220
221 offset = start = min(con_start, log_start);
222 dest_idx = 0;
223 while (start != log_end) {
224 unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1);
225
226 log_buf[dest_idx] = __log_buf[log_idx_mask];
227 start++;
228 dest_idx++;
229 }
230 log_start -= offset;
231 con_start -= offset;
232 log_end -= offset;
233 spin_unlock_irqrestore(&logbuf_lock, flags);
234
235 pr_info("log_buf_len: %d\n", log_buf_len);
236 pr_info("early log buf free: %d(%d%%)\n",
237 free, (free * 100) / __LOG_BUF_LEN);
238}
210 239
211#ifdef CONFIG_BOOT_PRINTK_DELAY 240#ifdef CONFIG_BOOT_PRINTK_DELAY
212 241
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index dc7ab65f3b36..2df115790cd9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -38,35 +38,33 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
38 child->parent = new_parent; 38 child->parent = new_parent;
39} 39}
40 40
41/* 41/**
42 * Turn a tracing stop into a normal stop now, since with no tracer there 42 * __ptrace_unlink - unlink ptracee and restore its execution state
43 * would be no way to wake it up with SIGCONT or SIGKILL. If there was a 43 * @child: ptracee to be unlinked
44 * signal sent that would resume the child, but didn't because it was in
45 * TASK_TRACED, resume it now.
46 * Requires that irqs be disabled.
47 */
48static void ptrace_untrace(struct task_struct *child)
49{
50 spin_lock(&child->sighand->siglock);
51 if (task_is_traced(child)) {
52 /*
53 * If the group stop is completed or in progress,
54 * this thread was already counted as stopped.
55 */
56 if (child->signal->flags & SIGNAL_STOP_STOPPED ||
57 child->signal->group_stop_count)
58 __set_task_state(child, TASK_STOPPED);
59 else
60 signal_wake_up(child, 1);
61 }
62 spin_unlock(&child->sighand->siglock);
63}
64
65/*
66 * unptrace a task: move it back to its original parent and
67 * remove it from the ptrace list.
68 * 44 *
69 * Must be called with the tasklist lock write-held. 45 * Remove @child from the ptrace list, move it back to the original parent,
46 * and restore the execution state so that it conforms to the group stop
47 * state.
48 *
49 * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
50 * exiting. For PTRACE_DETACH, unless the ptracee has been killed between
51 * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED.
52 * If the ptracer is exiting, the ptracee can be in any state.
53 *
54 * After detach, the ptracee should be in a state which conforms to the
55 * group stop. If the group is stopped or in the process of stopping, the
56 * ptracee should be put into TASK_STOPPED; otherwise, it should be woken
57 * up from TASK_TRACED.
58 *
59 * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED,
60 * it goes through TRACED -> RUNNING -> STOPPED transition which is similar
61 * to but in the opposite direction of what happens while attaching to a
62 * stopped task. However, in this direction, the intermediate RUNNING
63 * state is not hidden even from the current ptracer and if it immediately
64 * re-attaches and performs a WNOHANG wait(2), it may fail.
65 *
66 * CONTEXT:
67 * write_lock_irq(tasklist_lock)
70 */ 68 */
71void __ptrace_unlink(struct task_struct *child) 69void __ptrace_unlink(struct task_struct *child)
72{ 70{
@@ -76,8 +74,27 @@ void __ptrace_unlink(struct task_struct *child)
76 child->parent = child->real_parent; 74 child->parent = child->real_parent;
77 list_del_init(&child->ptrace_entry); 75 list_del_init(&child->ptrace_entry);
78 76
79 if (task_is_traced(child)) 77 spin_lock(&child->sighand->siglock);
80 ptrace_untrace(child); 78
79 /*
80 * Reinstate GROUP_STOP_PENDING if group stop is in effect and
81 * @child isn't dead.
82 */
83 if (!(child->flags & PF_EXITING) &&
84 (child->signal->flags & SIGNAL_STOP_STOPPED ||
85 child->signal->group_stop_count))
86 child->group_stop |= GROUP_STOP_PENDING;
87
88 /*
89 * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
90 * @child in the butt. Note that @resume should be used iff @child
91 * is in TASK_TRACED; otherwise, we might unduly disrupt
92 * TASK_KILLABLE sleeps.
93 */
94 if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child))
95 signal_wake_up(child, task_is_traced(child));
96
97 spin_unlock(&child->sighand->siglock);
81} 98}
82 99
83/* 100/*
@@ -96,16 +113,14 @@ int ptrace_check_attach(struct task_struct *child, int kill)
96 */ 113 */
97 read_lock(&tasklist_lock); 114 read_lock(&tasklist_lock);
98 if ((child->ptrace & PT_PTRACED) && child->parent == current) { 115 if ((child->ptrace & PT_PTRACED) && child->parent == current) {
99 ret = 0;
100 /* 116 /*
101 * child->sighand can't be NULL, release_task() 117 * child->sighand can't be NULL, release_task()
102 * does ptrace_unlink() before __exit_signal(). 118 * does ptrace_unlink() before __exit_signal().
103 */ 119 */
104 spin_lock_irq(&child->sighand->siglock); 120 spin_lock_irq(&child->sighand->siglock);
105 if (task_is_stopped(child)) 121 WARN_ON_ONCE(task_is_stopped(child));
106 child->state = TASK_TRACED; 122 if (task_is_traced(child) || kill)
107 else if (!task_is_traced(child) && !kill) 123 ret = 0;
108 ret = -ESRCH;
109 spin_unlock_irq(&child->sighand->siglock); 124 spin_unlock_irq(&child->sighand->siglock);
110 } 125 }
111 read_unlock(&tasklist_lock); 126 read_unlock(&tasklist_lock);
@@ -169,6 +184,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
169 184
170static int ptrace_attach(struct task_struct *task) 185static int ptrace_attach(struct task_struct *task)
171{ 186{
187 bool wait_trap = false;
172 int retval; 188 int retval;
173 189
174 audit_ptrace(task); 190 audit_ptrace(task);
@@ -208,12 +224,42 @@ static int ptrace_attach(struct task_struct *task)
208 __ptrace_link(task, current); 224 __ptrace_link(task, current);
209 send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); 225 send_sig_info(SIGSTOP, SEND_SIG_FORCED, task);
210 226
227 spin_lock(&task->sighand->siglock);
228
229 /*
230 * If the task is already STOPPED, set GROUP_STOP_PENDING and
231 * TRAPPING, and kick it so that it transits to TRACED. TRAPPING
232 * will be cleared if the child completes the transition or any
233 * event which clears the group stop states happens. We'll wait
234 * for the transition to complete before returning from this
235 * function.
236 *
237 * This hides STOPPED -> RUNNING -> TRACED transition from the
238 * attaching thread but a different thread in the same group can
239 * still observe the transient RUNNING state. IOW, if another
240 * thread's WNOHANG wait(2) on the stopped tracee races against
241 * ATTACH, the wait(2) may fail due to the transient RUNNING.
242 *
243 * The following task_is_stopped() test is safe as both transitions
244 * in and out of STOPPED are protected by siglock.
245 */
246 if (task_is_stopped(task)) {
247 task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
248 signal_wake_up(task, 1);
249 wait_trap = true;
250 }
251
252 spin_unlock(&task->sighand->siglock);
253
211 retval = 0; 254 retval = 0;
212unlock_tasklist: 255unlock_tasklist:
213 write_unlock_irq(&tasklist_lock); 256 write_unlock_irq(&tasklist_lock);
214unlock_creds: 257unlock_creds:
215 mutex_unlock(&task->signal->cred_guard_mutex); 258 mutex_unlock(&task->signal->cred_guard_mutex);
216out: 259out:
260 if (wait_trap)
261 wait_event(current->signal->wait_chldexit,
262 !(task->group_stop & GROUP_STOP_TRAPPING));
217 return retval; 263 return retval;
218} 264}
219 265
@@ -316,8 +362,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
316 if (child->ptrace) { 362 if (child->ptrace) {
317 child->exit_code = data; 363 child->exit_code = data;
318 dead = __ptrace_detach(current, child); 364 dead = __ptrace_detach(current, child);
319 if (!child->exit_state)
320 wake_up_state(child, TASK_TRACED | TASK_STOPPED);
321 } 365 }
322 write_unlock_irq(&tasklist_lock); 366 write_unlock_irq(&tasklist_lock);
323 367
@@ -518,7 +562,7 @@ static int ptrace_resume(struct task_struct *child, long request,
518 } 562 }
519 563
520 child->exit_code = data; 564 child->exit_code = data;
521 wake_up_process(child); 565 wake_up_state(child, __TASK_TRACED);
522 566
523 return 0; 567 return 0;
524} 568}
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 421abfd3641d..7bbac7d0f5ab 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -35,6 +35,7 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/time.h> 36#include <linux/time.h>
37#include <linux/cpu.h> 37#include <linux/cpu.h>
38#include <linux/prefetch.h>
38 39
39/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ 40/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
40static struct task_struct *rcu_kthread_task; 41static struct task_struct *rcu_kthread_task;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e486f7c3ffb8..f07d2f03181a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -49,6 +49,7 @@
49#include <linux/kernel_stat.h> 49#include <linux/kernel_stat.h>
50#include <linux/wait.h> 50#include <linux/wait.h>
51#include <linux/kthread.h> 51#include <linux/kthread.h>
52#include <linux/prefetch.h>
52 53
53#include "rcutree.h" 54#include "rcutree.h"
54 55
diff --git a/kernel/sched.c b/kernel/sched.c
index c62acf45d3b9..2d12893b8b0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -293,7 +293,7 @@ static DEFINE_SPINLOCK(task_group_lock);
293 * limitation from this.) 293 * limitation from this.)
294 */ 294 */
295#define MIN_SHARES 2 295#define MIN_SHARES 2
296#define MAX_SHARES (1UL << 18) 296#define MAX_SHARES (1UL << (18 + SCHED_LOAD_RESOLUTION))
297 297
298static int root_task_group_load = ROOT_TASK_GROUP_LOAD; 298static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
299#endif 299#endif
@@ -1330,13 +1330,25 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1330{ 1330{
1331 u64 tmp; 1331 u64 tmp;
1332 1332
1333 tmp = (u64)delta_exec * weight; 1333 /*
1334 * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
1335 * entities since MIN_SHARES = 2. Treat weight as 1 if less than
1336 * 2^SCHED_LOAD_RESOLUTION.
1337 */
1338 if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
1339 tmp = (u64)delta_exec * scale_load_down(weight);
1340 else
1341 tmp = (u64)delta_exec;
1334 1342
1335 if (!lw->inv_weight) { 1343 if (!lw->inv_weight) {
1336 if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) 1344 unsigned long w = scale_load_down(lw->weight);
1345
1346 if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
1337 lw->inv_weight = 1; 1347 lw->inv_weight = 1;
1348 else if (unlikely(!w))
1349 lw->inv_weight = WMULT_CONST;
1338 else 1350 else
1339 lw->inv_weight = WMULT_CONST / lw->weight; 1351 lw->inv_weight = WMULT_CONST / w;
1340 } 1352 }
1341 1353
1342 /* 1354 /*
@@ -1778,17 +1790,20 @@ static void dec_nr_running(struct rq *rq)
1778 1790
1779static void set_load_weight(struct task_struct *p) 1791static void set_load_weight(struct task_struct *p)
1780{ 1792{
1793 int prio = p->static_prio - MAX_RT_PRIO;
1794 struct load_weight *load = &p->se.load;
1795
1781 /* 1796 /*
1782 * SCHED_IDLE tasks get minimal weight: 1797 * SCHED_IDLE tasks get minimal weight:
1783 */ 1798 */
1784 if (p->policy == SCHED_IDLE) { 1799 if (p->policy == SCHED_IDLE) {
1785 p->se.load.weight = WEIGHT_IDLEPRIO; 1800 load->weight = scale_load(WEIGHT_IDLEPRIO);
1786 p->se.load.inv_weight = WMULT_IDLEPRIO; 1801 load->inv_weight = WMULT_IDLEPRIO;
1787 return; 1802 return;
1788 } 1803 }
1789 1804
1790 p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO]; 1805 load->weight = scale_load(prio_to_weight[prio]);
1791 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; 1806 load->inv_weight = prio_to_wmult[prio];
1792} 1807}
1793 1808
1794static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) 1809static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -2564,7 +2579,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
2564{ 2579{
2565 struct rq *rq = cpu_rq(cpu); 2580 struct rq *rq = cpu_rq(cpu);
2566 2581
2567#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) 2582#if defined(CONFIG_SMP)
2568 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { 2583 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2569 ttwu_queue_remote(p, cpu); 2584 ttwu_queue_remote(p, cpu);
2570 return; 2585 return;
@@ -6527,7 +6542,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6527 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 6542 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
6528 6543
6529 printk(KERN_CONT " %s", str); 6544 printk(KERN_CONT " %s", str);
6530 if (group->cpu_power != SCHED_LOAD_SCALE) { 6545 if (group->cpu_power != SCHED_POWER_SCALE) {
6531 printk(KERN_CONT " (cpu_power = %d)", 6546 printk(KERN_CONT " (cpu_power = %d)",
6532 group->cpu_power); 6547 group->cpu_power);
6533 } 6548 }
@@ -7902,7 +7917,7 @@ void __init sched_init(void)
7902#ifdef CONFIG_SMP 7917#ifdef CONFIG_SMP
7903 rq->sd = NULL; 7918 rq->sd = NULL;
7904 rq->rd = NULL; 7919 rq->rd = NULL;
7905 rq->cpu_power = SCHED_LOAD_SCALE; 7920 rq->cpu_power = SCHED_POWER_SCALE;
7906 rq->post_schedule = 0; 7921 rq->post_schedule = 0;
7907 rq->active_balance = 0; 7922 rq->active_balance = 0;
7908 rq->next_balance = jiffies; 7923 rq->next_balance = jiffies;
@@ -8806,14 +8821,14 @@ cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
8806static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, 8821static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
8807 u64 shareval) 8822 u64 shareval)
8808{ 8823{
8809 return sched_group_set_shares(cgroup_tg(cgrp), shareval); 8824 return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
8810} 8825}
8811 8826
8812static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) 8827static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
8813{ 8828{
8814 struct task_group *tg = cgroup_tg(cgrp); 8829 struct task_group *tg = cgroup_tg(cgrp);
8815 8830
8816 return (u64) tg->shares; 8831 return (u64) scale_load_down(tg->shares);
8817} 8832}
8818#endif /* CONFIG_FAIR_GROUP_SCHED */ 8833#endif /* CONFIG_FAIR_GROUP_SCHED */
8819 8834
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 37f22626225e..e32a9b70ee9c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1584,7 +1584,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
1584 } 1584 }
1585 1585
1586 /* Adjust by relative CPU power of the group */ 1586 /* Adjust by relative CPU power of the group */
1587 avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; 1587 avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
1588 1588
1589 if (local_group) { 1589 if (local_group) {
1590 this_load = avg_load; 1590 this_load = avg_load;
@@ -1722,7 +1722,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
1722 nr_running += cpu_rq(i)->cfs.nr_running; 1722 nr_running += cpu_rq(i)->cfs.nr_running;
1723 } 1723 }
1724 1724
1725 capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); 1725 capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
1726 1726
1727 if (tmp->flags & SD_POWERSAVINGS_BALANCE) 1727 if (tmp->flags & SD_POWERSAVINGS_BALANCE)
1728 nr_running /= 2; 1728 nr_running /= 2;
@@ -2570,7 +2570,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
2570 2570
2571unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) 2571unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
2572{ 2572{
2573 return SCHED_LOAD_SCALE; 2573 return SCHED_POWER_SCALE;
2574} 2574}
2575 2575
2576unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) 2576unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
@@ -2607,10 +2607,10 @@ unsigned long scale_rt_power(int cpu)
2607 available = total - rq->rt_avg; 2607 available = total - rq->rt_avg;
2608 } 2608 }
2609 2609
2610 if (unlikely((s64)total < SCHED_LOAD_SCALE)) 2610 if (unlikely((s64)total < SCHED_POWER_SCALE))
2611 total = SCHED_LOAD_SCALE; 2611 total = SCHED_POWER_SCALE;
2612 2612
2613 total >>= SCHED_LOAD_SHIFT; 2613 total >>= SCHED_POWER_SHIFT;
2614 2614
2615 return div_u64(available, total); 2615 return div_u64(available, total);
2616} 2616}
@@ -2618,7 +2618,7 @@ unsigned long scale_rt_power(int cpu)
2618static void update_cpu_power(struct sched_domain *sd, int cpu) 2618static void update_cpu_power(struct sched_domain *sd, int cpu)
2619{ 2619{
2620 unsigned long weight = sd->span_weight; 2620 unsigned long weight = sd->span_weight;
2621 unsigned long power = SCHED_LOAD_SCALE; 2621 unsigned long power = SCHED_POWER_SCALE;
2622 struct sched_group *sdg = sd->groups; 2622 struct sched_group *sdg = sd->groups;
2623 2623
2624 if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { 2624 if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
@@ -2627,7 +2627,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2627 else 2627 else
2628 power *= default_scale_smt_power(sd, cpu); 2628 power *= default_scale_smt_power(sd, cpu);
2629 2629
2630 power >>= SCHED_LOAD_SHIFT; 2630 power >>= SCHED_POWER_SHIFT;
2631 } 2631 }
2632 2632
2633 sdg->cpu_power_orig = power; 2633 sdg->cpu_power_orig = power;
@@ -2637,10 +2637,10 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2637 else 2637 else
2638 power *= default_scale_freq_power(sd, cpu); 2638 power *= default_scale_freq_power(sd, cpu);
2639 2639
2640 power >>= SCHED_LOAD_SHIFT; 2640 power >>= SCHED_POWER_SHIFT;
2641 2641
2642 power *= scale_rt_power(cpu); 2642 power *= scale_rt_power(cpu);
2643 power >>= SCHED_LOAD_SHIFT; 2643 power >>= SCHED_POWER_SHIFT;
2644 2644
2645 if (!power) 2645 if (!power)
2646 power = 1; 2646 power = 1;
@@ -2682,7 +2682,7 @@ static inline int
2682fix_small_capacity(struct sched_domain *sd, struct sched_group *group) 2682fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2683{ 2683{
2684 /* 2684 /*
2685 * Only siblings can have significantly less than SCHED_LOAD_SCALE 2685 * Only siblings can have significantly less than SCHED_POWER_SCALE
2686 */ 2686 */
2687 if (!(sd->flags & SD_SHARE_CPUPOWER)) 2687 if (!(sd->flags & SD_SHARE_CPUPOWER))
2688 return 0; 2688 return 0;
@@ -2770,7 +2770,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2770 } 2770 }
2771 2771
2772 /* Adjust by relative CPU power of the group */ 2772 /* Adjust by relative CPU power of the group */
2773 sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; 2773 sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
2774 2774
2775 /* 2775 /*
2776 * Consider the group unbalanced when the imbalance is larger 2776 * Consider the group unbalanced when the imbalance is larger
@@ -2787,7 +2787,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2787 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) 2787 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2788 sgs->group_imb = 1; 2788 sgs->group_imb = 1;
2789 2789
2790 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2790 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
2791 SCHED_POWER_SCALE);
2791 if (!sgs->group_capacity) 2792 if (!sgs->group_capacity)
2792 sgs->group_capacity = fix_small_capacity(sd, group); 2793 sgs->group_capacity = fix_small_capacity(sd, group);
2793 sgs->group_weight = group->group_weight; 2794 sgs->group_weight = group->group_weight;
@@ -2961,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd,
2961 return 0; 2962 return 0;
2962 2963
2963 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, 2964 *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
2964 SCHED_LOAD_SCALE); 2965 SCHED_POWER_SCALE);
2965 return 1; 2966 return 1;
2966} 2967}
2967 2968
@@ -2990,7 +2991,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
2990 cpu_avg_load_per_task(this_cpu); 2991 cpu_avg_load_per_task(this_cpu);
2991 2992
2992 scaled_busy_load_per_task = sds->busiest_load_per_task 2993 scaled_busy_load_per_task = sds->busiest_load_per_task
2993 * SCHED_LOAD_SCALE; 2994 * SCHED_POWER_SCALE;
2994 scaled_busy_load_per_task /= sds->busiest->cpu_power; 2995 scaled_busy_load_per_task /= sds->busiest->cpu_power;
2995 2996
2996 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= 2997 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
@@ -3009,10 +3010,10 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
3009 min(sds->busiest_load_per_task, sds->max_load); 3010 min(sds->busiest_load_per_task, sds->max_load);
3010 pwr_now += sds->this->cpu_power * 3011 pwr_now += sds->this->cpu_power *
3011 min(sds->this_load_per_task, sds->this_load); 3012 min(sds->this_load_per_task, sds->this_load);
3012 pwr_now /= SCHED_LOAD_SCALE; 3013 pwr_now /= SCHED_POWER_SCALE;
3013 3014
3014 /* Amount of load we'd subtract */ 3015 /* Amount of load we'd subtract */
3015 tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / 3016 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3016 sds->busiest->cpu_power; 3017 sds->busiest->cpu_power;
3017 if (sds->max_load > tmp) 3018 if (sds->max_load > tmp)
3018 pwr_move += sds->busiest->cpu_power * 3019 pwr_move += sds->busiest->cpu_power *
@@ -3020,15 +3021,15 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
3020 3021
3021 /* Amount of load we'd add */ 3022 /* Amount of load we'd add */
3022 if (sds->max_load * sds->busiest->cpu_power < 3023 if (sds->max_load * sds->busiest->cpu_power <
3023 sds->busiest_load_per_task * SCHED_LOAD_SCALE) 3024 sds->busiest_load_per_task * SCHED_POWER_SCALE)
3024 tmp = (sds->max_load * sds->busiest->cpu_power) / 3025 tmp = (sds->max_load * sds->busiest->cpu_power) /
3025 sds->this->cpu_power; 3026 sds->this->cpu_power;
3026 else 3027 else
3027 tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / 3028 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3028 sds->this->cpu_power; 3029 sds->this->cpu_power;
3029 pwr_move += sds->this->cpu_power * 3030 pwr_move += sds->this->cpu_power *
3030 min(sds->this_load_per_task, sds->this_load + tmp); 3031 min(sds->this_load_per_task, sds->this_load + tmp);
3031 pwr_move /= SCHED_LOAD_SCALE; 3032 pwr_move /= SCHED_POWER_SCALE;
3032 3033
3033 /* Move if we gain throughput */ 3034 /* Move if we gain throughput */
3034 if (pwr_move > pwr_now) 3035 if (pwr_move > pwr_now)
@@ -3070,7 +3071,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3070 load_above_capacity = (sds->busiest_nr_running - 3071 load_above_capacity = (sds->busiest_nr_running -
3071 sds->busiest_group_capacity); 3072 sds->busiest_group_capacity);
3072 3073
3073 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_LOAD_SCALE); 3074 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
3074 3075
3075 load_above_capacity /= sds->busiest->cpu_power; 3076 load_above_capacity /= sds->busiest->cpu_power;
3076 } 3077 }
@@ -3090,7 +3091,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3090 /* How much load to actually move to equalise the imbalance */ 3091 /* How much load to actually move to equalise the imbalance */
3091 *imbalance = min(max_pull * sds->busiest->cpu_power, 3092 *imbalance = min(max_pull * sds->busiest->cpu_power,
3092 (sds->avg_load - sds->this_load) * sds->this->cpu_power) 3093 (sds->avg_load - sds->this_load) * sds->this->cpu_power)
3093 / SCHED_LOAD_SCALE; 3094 / SCHED_POWER_SCALE;
3094 3095
3095 /* 3096 /*
3096 * if *imbalance is less than the average load per runnable task 3097 * if *imbalance is less than the average load per runnable task
@@ -3159,7 +3160,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3159 if (!sds.busiest || sds.busiest_nr_running == 0) 3160 if (!sds.busiest || sds.busiest_nr_running == 0)
3160 goto out_balanced; 3161 goto out_balanced;
3161 3162
3162 sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; 3163 sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
3163 3164
3164 /* 3165 /*
3165 * If the busiest group is imbalanced the below checks don't 3166 * If the busiest group is imbalanced the below checks don't
@@ -3238,7 +3239,8 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
3238 3239
3239 for_each_cpu(i, sched_group_cpus(group)) { 3240 for_each_cpu(i, sched_group_cpus(group)) {
3240 unsigned long power = power_of(i); 3241 unsigned long power = power_of(i);
3241 unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); 3242 unsigned long capacity = DIV_ROUND_CLOSEST(power,
3243 SCHED_POWER_SCALE);
3242 unsigned long wl; 3244 unsigned long wl;
3243 3245
3244 if (!capacity) 3246 if (!capacity)
@@ -3263,7 +3265,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
3263 * the load can be moved away from the cpu that is potentially 3265 * the load can be moved away from the cpu that is potentially
3264 * running at a lower capacity. 3266 * running at a lower capacity.
3265 */ 3267 */
3266 wl = (wl * SCHED_LOAD_SCALE) / power; 3268 wl = (wl * SCHED_POWER_SCALE) / power;
3267 3269
3268 if (wl > max_load) { 3270 if (wl > max_load) {
3269 max_load = wl; 3271 max_load = wl;
diff --git a/kernel/signal.c b/kernel/signal.c
index 7165af5f1b11..86c32b884f8e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
124 124
125static int recalc_sigpending_tsk(struct task_struct *t) 125static int recalc_sigpending_tsk(struct task_struct *t)
126{ 126{
127 if (t->signal->group_stop_count > 0 || 127 if ((t->group_stop & GROUP_STOP_PENDING) ||
128 PENDING(&t->pending, &t->blocked) || 128 PENDING(&t->pending, &t->blocked) ||
129 PENDING(&t->signal->shared_pending, &t->blocked)) { 129 PENDING(&t->signal->shared_pending, &t->blocked)) {
130 set_tsk_thread_flag(t, TIF_SIGPENDING); 130 set_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -223,6 +223,83 @@ static inline void print_dropped_signal(int sig)
223 current->comm, current->pid, sig); 223 current->comm, current->pid, sig);
224} 224}
225 225
226/**
227 * task_clear_group_stop_trapping - clear group stop trapping bit
228 * @task: target task
229 *
230 * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us. Clear it
231 * and wake up the ptracer. Note that we don't need any further locking.
232 * @task->siglock guarantees that @task->parent points to the ptracer.
233 *
234 * CONTEXT:
235 * Must be called with @task->sighand->siglock held.
236 */
237static void task_clear_group_stop_trapping(struct task_struct *task)
238{
239 if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) {
240 task->group_stop &= ~GROUP_STOP_TRAPPING;
241 __wake_up_sync_key(&task->parent->signal->wait_chldexit,
242 TASK_UNINTERRUPTIBLE, 1, task);
243 }
244}
245
246/**
247 * task_clear_group_stop_pending - clear pending group stop
248 * @task: target task
249 *
250 * Clear group stop states for @task.
251 *
252 * CONTEXT:
253 * Must be called with @task->sighand->siglock held.
254 */
255void task_clear_group_stop_pending(struct task_struct *task)
256{
257 task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME |
258 GROUP_STOP_DEQUEUED);
259}
260
261/**
262 * task_participate_group_stop - participate in a group stop
263 * @task: task participating in a group stop
264 *
265 * @task has GROUP_STOP_PENDING set and is participating in a group stop.
266 * Group stop states are cleared and the group stop count is consumed if
267 * %GROUP_STOP_CONSUME was set. If the consumption completes the group
268 * stop, the appropriate %SIGNAL_* flags are set.
269 *
270 * CONTEXT:
271 * Must be called with @task->sighand->siglock held.
272 *
273 * RETURNS:
274 * %true if group stop completion should be notified to the parent, %false
275 * otherwise.
276 */
277static bool task_participate_group_stop(struct task_struct *task)
278{
279 struct signal_struct *sig = task->signal;
280 bool consume = task->group_stop & GROUP_STOP_CONSUME;
281
282 WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING));
283
284 task_clear_group_stop_pending(task);
285
286 if (!consume)
287 return false;
288
289 if (!WARN_ON_ONCE(sig->group_stop_count == 0))
290 sig->group_stop_count--;
291
292 /*
293 * Tell the caller to notify completion iff we are entering into a
294 * fresh group stop. Read comment in do_signal_stop() for details.
295 */
296 if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
297 sig->flags = SIGNAL_STOP_STOPPED;
298 return true;
299 }
300 return false;
301}
302
226/* 303/*
227 * allocate a new signal queue record 304 * allocate a new signal queue record
228 * - this may be called without locks if and only if t == current, otherwise an 305 * - this may be called without locks if and only if t == current, otherwise an
@@ -527,7 +604,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
527 * is to alert stop-signal processing code when another 604 * is to alert stop-signal processing code when another
528 * processor has come along and cleared the flag. 605 * processor has come along and cleared the flag.
529 */ 606 */
530 tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; 607 current->group_stop |= GROUP_STOP_DEQUEUED;
531 } 608 }
532 if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { 609 if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
533 /* 610 /*
@@ -592,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
592 if (sigisemptyset(&m)) 669 if (sigisemptyset(&m))
593 return 0; 670 return 0;
594 671
595 signandsets(&s->signal, &s->signal, mask); 672 sigandnsets(&s->signal, &s->signal, mask);
596 list_for_each_entry_safe(q, n, &s->list, list) { 673 list_for_each_entry_safe(q, n, &s->list, list) {
597 if (sigismember(mask, q->info.si_signo)) { 674 if (sigismember(mask, q->info.si_signo)) {
598 list_del_init(&q->list); 675 list_del_init(&q->list);
@@ -727,34 +804,14 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
727 } else if (sig == SIGCONT) { 804 } else if (sig == SIGCONT) {
728 unsigned int why; 805 unsigned int why;
729 /* 806 /*
730 * Remove all stop signals from all queues, 807 * Remove all stop signals from all queues, wake all threads.
731 * and wake all threads.
732 */ 808 */
733 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); 809 rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending);
734 t = p; 810 t = p;
735 do { 811 do {
736 unsigned int state; 812 task_clear_group_stop_pending(t);
737 rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); 813 rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
738 /* 814 wake_up_state(t, __TASK_STOPPED);
739 * If there is a handler for SIGCONT, we must make
740 * sure that no thread returns to user mode before
741 * we post the signal, in case it was the only
742 * thread eligible to run the signal handler--then
743 * it must not do anything between resuming and
744 * running the handler. With the TIF_SIGPENDING
745 * flag set, the thread will pause and acquire the
746 * siglock that we hold now and until we've queued
747 * the pending signal.
748 *
749 * Wake up the stopped thread _after_ setting
750 * TIF_SIGPENDING
751 */
752 state = __TASK_STOPPED;
753 if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
754 set_tsk_thread_flag(t, TIF_SIGPENDING);
755 state |= TASK_INTERRUPTIBLE;
756 }
757 wake_up_state(t, state);
758 } while_each_thread(p, t); 815 } while_each_thread(p, t);
759 816
760 /* 817 /*
@@ -780,13 +837,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
780 signal->flags = why | SIGNAL_STOP_CONTINUED; 837 signal->flags = why | SIGNAL_STOP_CONTINUED;
781 signal->group_stop_count = 0; 838 signal->group_stop_count = 0;
782 signal->group_exit_code = 0; 839 signal->group_exit_code = 0;
783 } else {
784 /*
785 * We are not stopped, but there could be a stop
786 * signal in the middle of being processed after
787 * being removed from the queue. Clear that too.
788 */
789 signal->flags &= ~SIGNAL_STOP_DEQUEUED;
790 } 840 }
791 } 841 }
792 842
@@ -875,6 +925,7 @@ static void complete_signal(int sig, struct task_struct *p, int group)
875 signal->group_stop_count = 0; 925 signal->group_stop_count = 0;
876 t = p; 926 t = p;
877 do { 927 do {
928 task_clear_group_stop_pending(t);
878 sigaddset(&t->pending.signal, SIGKILL); 929 sigaddset(&t->pending.signal, SIGKILL);
879 signal_wake_up(t, 1); 930 signal_wake_up(t, 1);
880 } while_each_thread(p, t); 931 } while_each_thread(p, t);
@@ -1109,6 +1160,7 @@ int zap_other_threads(struct task_struct *p)
1109 p->signal->group_stop_count = 0; 1160 p->signal->group_stop_count = 0;
1110 1161
1111 while_each_thread(p, t) { 1162 while_each_thread(p, t) {
1163 task_clear_group_stop_pending(t);
1112 count++; 1164 count++;
1113 1165
1114 /* Don't bother with already dead threads */ 1166 /* Don't bother with already dead threads */
@@ -1536,16 +1588,30 @@ int do_notify_parent(struct task_struct *tsk, int sig)
1536 return ret; 1588 return ret;
1537} 1589}
1538 1590
1539static void do_notify_parent_cldstop(struct task_struct *tsk, int why) 1591/**
1592 * do_notify_parent_cldstop - notify parent of stopped/continued state change
1593 * @tsk: task reporting the state change
1594 * @for_ptracer: the notification is for ptracer
1595 * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report
1596 *
1597 * Notify @tsk's parent that the stopped/continued state has changed. If
1598 * @for_ptracer is %false, @tsk's group leader notifies to its real parent.
1599 * If %true, @tsk reports to @tsk->parent which should be the ptracer.
1600 *
1601 * CONTEXT:
1602 * Must be called with tasklist_lock at least read locked.
1603 */
1604static void do_notify_parent_cldstop(struct task_struct *tsk,
1605 bool for_ptracer, int why)
1540{ 1606{
1541 struct siginfo info; 1607 struct siginfo info;
1542 unsigned long flags; 1608 unsigned long flags;
1543 struct task_struct *parent; 1609 struct task_struct *parent;
1544 struct sighand_struct *sighand; 1610 struct sighand_struct *sighand;
1545 1611
1546 if (task_ptrace(tsk)) 1612 if (for_ptracer) {
1547 parent = tsk->parent; 1613 parent = tsk->parent;
1548 else { 1614 } else {
1549 tsk = tsk->group_leader; 1615 tsk = tsk->group_leader;
1550 parent = tsk->real_parent; 1616 parent = tsk->real_parent;
1551 } 1617 }
@@ -1621,6 +1687,15 @@ static int sigkill_pending(struct task_struct *tsk)
1621} 1687}
1622 1688
1623/* 1689/*
1690 * Test whether the target task of the usual cldstop notification - the
1691 * real_parent of @child - is in the same group as the ptracer.
1692 */
1693static bool real_parent_is_ptracer(struct task_struct *child)
1694{
1695 return same_thread_group(child->parent, child->real_parent);
1696}
1697
1698/*
1624 * This must be called with current->sighand->siglock held. 1699 * This must be called with current->sighand->siglock held.
1625 * 1700 *
1626 * This should be the path for all ptrace stops. 1701 * This should be the path for all ptrace stops.
@@ -1631,10 +1706,12 @@ static int sigkill_pending(struct task_struct *tsk)
1631 * If we actually decide not to stop at all because the tracer 1706 * If we actually decide not to stop at all because the tracer
1632 * is gone, we keep current->exit_code unless clear_code. 1707 * is gone, we keep current->exit_code unless clear_code.
1633 */ 1708 */
1634static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) 1709static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
1635 __releases(&current->sighand->siglock) 1710 __releases(&current->sighand->siglock)
1636 __acquires(&current->sighand->siglock) 1711 __acquires(&current->sighand->siglock)
1637{ 1712{
1713 bool gstop_done = false;
1714
1638 if (arch_ptrace_stop_needed(exit_code, info)) { 1715 if (arch_ptrace_stop_needed(exit_code, info)) {
1639 /* 1716 /*
1640 * The arch code has something special to do before a 1717 * The arch code has something special to do before a
@@ -1655,21 +1732,49 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1655 } 1732 }
1656 1733
1657 /* 1734 /*
1658 * If there is a group stop in progress, 1735 * If @why is CLD_STOPPED, we're trapping to participate in a group
1659 * we must participate in the bookkeeping. 1736 * stop. Do the bookkeeping. Note that if SIGCONT was delievered
1737 * while siglock was released for the arch hook, PENDING could be
1738 * clear now. We act as if SIGCONT is received after TASK_TRACED
1739 * is entered - ignore it.
1660 */ 1740 */
1661 if (current->signal->group_stop_count > 0) 1741 if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING))
1662 --current->signal->group_stop_count; 1742 gstop_done = task_participate_group_stop(current);
1663 1743
1664 current->last_siginfo = info; 1744 current->last_siginfo = info;
1665 current->exit_code = exit_code; 1745 current->exit_code = exit_code;
1666 1746
1667 /* Let the debugger run. */ 1747 /*
1668 __set_current_state(TASK_TRACED); 1748 * TRACED should be visible before TRAPPING is cleared; otherwise,
1749 * the tracer might fail do_wait().
1750 */
1751 set_current_state(TASK_TRACED);
1752
1753 /*
1754 * We're committing to trapping. Clearing GROUP_STOP_TRAPPING and
1755 * transition to TASK_TRACED should be atomic with respect to
1756 * siglock. This hsould be done after the arch hook as siglock is
1757 * released and regrabbed across it.
1758 */
1759 task_clear_group_stop_trapping(current);
1760
1669 spin_unlock_irq(&current->sighand->siglock); 1761 spin_unlock_irq(&current->sighand->siglock);
1670 read_lock(&tasklist_lock); 1762 read_lock(&tasklist_lock);
1671 if (may_ptrace_stop()) { 1763 if (may_ptrace_stop()) {
1672 do_notify_parent_cldstop(current, CLD_TRAPPED); 1764 /*
1765 * Notify parents of the stop.
1766 *
1767 * While ptraced, there are two parents - the ptracer and
1768 * the real_parent of the group_leader. The ptracer should
1769 * know about every stop while the real parent is only
1770 * interested in the completion of group stop. The states
1771 * for the two don't interact with each other. Notify
1772 * separately unless they're gonna be duplicates.
1773 */
1774 do_notify_parent_cldstop(current, true, why);
1775 if (gstop_done && !real_parent_is_ptracer(current))
1776 do_notify_parent_cldstop(current, false, why);
1777
1673 /* 1778 /*
1674 * Don't want to allow preemption here, because 1779 * Don't want to allow preemption here, because
1675 * sys_ptrace() needs this task to be inactive. 1780 * sys_ptrace() needs this task to be inactive.
@@ -1684,7 +1789,16 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1684 /* 1789 /*
1685 * By the time we got the lock, our tracer went away. 1790 * By the time we got the lock, our tracer went away.
1686 * Don't drop the lock yet, another tracer may come. 1791 * Don't drop the lock yet, another tracer may come.
1792 *
1793 * If @gstop_done, the ptracer went away between group stop
1794 * completion and here. During detach, it would have set
1795 * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED
1796 * in do_signal_stop() on return, so notifying the real
1797 * parent of the group stop completion is enough.
1687 */ 1798 */
1799 if (gstop_done)
1800 do_notify_parent_cldstop(current, false, why);
1801
1688 __set_current_state(TASK_RUNNING); 1802 __set_current_state(TASK_RUNNING);
1689 if (clear_code) 1803 if (clear_code)
1690 current->exit_code = 0; 1804 current->exit_code = 0;
@@ -1728,7 +1842,7 @@ void ptrace_notify(int exit_code)
1728 1842
1729 /* Let the debugger run. */ 1843 /* Let the debugger run. */
1730 spin_lock_irq(&current->sighand->siglock); 1844 spin_lock_irq(&current->sighand->siglock);
1731 ptrace_stop(exit_code, 1, &info); 1845 ptrace_stop(exit_code, CLD_TRAPPED, 1, &info);
1732 spin_unlock_irq(&current->sighand->siglock); 1846 spin_unlock_irq(&current->sighand->siglock);
1733} 1847}
1734 1848
@@ -1741,66 +1855,115 @@ void ptrace_notify(int exit_code)
1741static int do_signal_stop(int signr) 1855static int do_signal_stop(int signr)
1742{ 1856{
1743 struct signal_struct *sig = current->signal; 1857 struct signal_struct *sig = current->signal;
1744 int notify;
1745 1858
1746 if (!sig->group_stop_count) { 1859 if (!(current->group_stop & GROUP_STOP_PENDING)) {
1860 unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME;
1747 struct task_struct *t; 1861 struct task_struct *t;
1748 1862
1749 if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || 1863 /* signr will be recorded in task->group_stop for retries */
1864 WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK);
1865
1866 if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) ||
1750 unlikely(signal_group_exit(sig))) 1867 unlikely(signal_group_exit(sig)))
1751 return 0; 1868 return 0;
1752 /* 1869 /*
1753 * There is no group stop already in progress. 1870 * There is no group stop already in progress. We must
1754 * We must initiate one now. 1871 * initiate one now.
1872 *
1873 * While ptraced, a task may be resumed while group stop is
1874 * still in effect and then receive a stop signal and
1875 * initiate another group stop. This deviates from the
1876 * usual behavior as two consecutive stop signals can't
1877 * cause two group stops when !ptraced. That is why we
1878 * also check !task_is_stopped(t) below.
1879 *
1880 * The condition can be distinguished by testing whether
1881 * SIGNAL_STOP_STOPPED is already set. Don't generate
1882 * group_exit_code in such case.
1883 *
1884 * This is not necessary for SIGNAL_STOP_CONTINUED because
1885 * an intervening stop signal is required to cause two
1886 * continued events regardless of ptrace.
1755 */ 1887 */
1756 sig->group_exit_code = signr; 1888 if (!(sig->flags & SIGNAL_STOP_STOPPED))
1889 sig->group_exit_code = signr;
1890 else
1891 WARN_ON_ONCE(!task_ptrace(current));
1757 1892
1893 current->group_stop &= ~GROUP_STOP_SIGMASK;
1894 current->group_stop |= signr | gstop;
1758 sig->group_stop_count = 1; 1895 sig->group_stop_count = 1;
1759 for (t = next_thread(current); t != current; t = next_thread(t)) 1896 for (t = next_thread(current); t != current;
1897 t = next_thread(t)) {
1898 t->group_stop &= ~GROUP_STOP_SIGMASK;
1760 /* 1899 /*
1761 * Setting state to TASK_STOPPED for a group 1900 * Setting state to TASK_STOPPED for a group
1762 * stop is always done with the siglock held, 1901 * stop is always done with the siglock held,
1763 * so this check has no races. 1902 * so this check has no races.
1764 */ 1903 */
1765 if (!(t->flags & PF_EXITING) && 1904 if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) {
1766 !task_is_stopped_or_traced(t)) { 1905 t->group_stop |= signr | gstop;
1767 sig->group_stop_count++; 1906 sig->group_stop_count++;
1768 signal_wake_up(t, 0); 1907 signal_wake_up(t, 0);
1769 } 1908 }
1909 }
1770 } 1910 }
1771 /* 1911retry:
1772 * If there are no other threads in the group, or if there is 1912 if (likely(!task_ptrace(current))) {
1773 * a group stop in progress and we are the last to stop, report 1913 int notify = 0;
1774 * to the parent. When ptraced, every thread reports itself. 1914
1775 */ 1915 /*
1776 notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0; 1916 * If there are no other threads in the group, or if there
1777 notify = tracehook_notify_jctl(notify, CLD_STOPPED); 1917 * is a group stop in progress and we are the last to stop,
1778 /* 1918 * report to the parent.
1779 * tracehook_notify_jctl() can drop and reacquire siglock, so 1919 */
1780 * we keep ->group_stop_count != 0 before the call. If SIGCONT 1920 if (task_participate_group_stop(current))
1781 * or SIGKILL comes in between ->group_stop_count == 0. 1921 notify = CLD_STOPPED;
1782 */ 1922
1783 if (sig->group_stop_count) {
1784 if (!--sig->group_stop_count)
1785 sig->flags = SIGNAL_STOP_STOPPED;
1786 current->exit_code = sig->group_exit_code;
1787 __set_current_state(TASK_STOPPED); 1923 __set_current_state(TASK_STOPPED);
1924 spin_unlock_irq(&current->sighand->siglock);
1925
1926 /*
1927 * Notify the parent of the group stop completion. Because
1928 * we're not holding either the siglock or tasklist_lock
1929 * here, ptracer may attach inbetween; however, this is for
1930 * group stop and should always be delivered to the real
1931 * parent of the group leader. The new ptracer will get
1932 * its notification when this task transitions into
1933 * TASK_TRACED.
1934 */
1935 if (notify) {
1936 read_lock(&tasklist_lock);
1937 do_notify_parent_cldstop(current, false, notify);
1938 read_unlock(&tasklist_lock);
1939 }
1940
1941 /* Now we don't run again until woken by SIGCONT or SIGKILL */
1942 schedule();
1943
1944 spin_lock_irq(&current->sighand->siglock);
1945 } else {
1946 ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK,
1947 CLD_STOPPED, 0, NULL);
1948 current->exit_code = 0;
1788 } 1949 }
1789 spin_unlock_irq(&current->sighand->siglock);
1790 1950
1791 if (notify) { 1951 /*
1792 read_lock(&tasklist_lock); 1952 * GROUP_STOP_PENDING could be set if another group stop has
1793 do_notify_parent_cldstop(current, notify); 1953 * started since being woken up or ptrace wants us to transit
1794 read_unlock(&tasklist_lock); 1954 * between TASK_STOPPED and TRACED. Retry group stop.
1955 */
1956 if (current->group_stop & GROUP_STOP_PENDING) {
1957 WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK));
1958 goto retry;
1795 } 1959 }
1796 1960
1797 /* Now we don't run again until woken by SIGCONT or SIGKILL */ 1961 /* PTRACE_ATTACH might have raced with task killing, clear trapping */
1798 do { 1962 task_clear_group_stop_trapping(current);
1799 schedule(); 1963
1800 } while (try_to_freeze()); 1964 spin_unlock_irq(&current->sighand->siglock);
1801 1965
1802 tracehook_finish_jctl(); 1966 tracehook_finish_jctl();
1803 current->exit_code = 0;
1804 1967
1805 return 1; 1968 return 1;
1806} 1969}
@@ -1814,7 +1977,7 @@ static int ptrace_signal(int signr, siginfo_t *info,
1814 ptrace_signal_deliver(regs, cookie); 1977 ptrace_signal_deliver(regs, cookie);
1815 1978
1816 /* Let the debugger run. */ 1979 /* Let the debugger run. */
1817 ptrace_stop(signr, 0, info); 1980 ptrace_stop(signr, CLD_TRAPPED, 0, info);
1818 1981
1819 /* We're back. Did the debugger cancel the sig? */ 1982 /* We're back. Did the debugger cancel the sig? */
1820 signr = current->exit_code; 1983 signr = current->exit_code;
@@ -1869,18 +2032,36 @@ relock:
1869 * the CLD_ si_code into SIGNAL_CLD_MASK bits. 2032 * the CLD_ si_code into SIGNAL_CLD_MASK bits.
1870 */ 2033 */
1871 if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { 2034 if (unlikely(signal->flags & SIGNAL_CLD_MASK)) {
1872 int why = (signal->flags & SIGNAL_STOP_CONTINUED) 2035 struct task_struct *leader;
1873 ? CLD_CONTINUED : CLD_STOPPED; 2036 int why;
2037
2038 if (signal->flags & SIGNAL_CLD_CONTINUED)
2039 why = CLD_CONTINUED;
2040 else
2041 why = CLD_STOPPED;
2042
1874 signal->flags &= ~SIGNAL_CLD_MASK; 2043 signal->flags &= ~SIGNAL_CLD_MASK;
1875 2044
1876 why = tracehook_notify_jctl(why, CLD_CONTINUED);
1877 spin_unlock_irq(&sighand->siglock); 2045 spin_unlock_irq(&sighand->siglock);
1878 2046
1879 if (why) { 2047 /*
1880 read_lock(&tasklist_lock); 2048 * Notify the parent that we're continuing. This event is
1881 do_notify_parent_cldstop(current->group_leader, why); 2049 * always per-process and doesn't make whole lot of sense
1882 read_unlock(&tasklist_lock); 2050 * for ptracers, who shouldn't consume the state via
1883 } 2051 * wait(2) either, but, for backward compatibility, notify
2052 * the ptracer of the group leader too unless it's gonna be
2053 * a duplicate.
2054 */
2055 read_lock(&tasklist_lock);
2056
2057 do_notify_parent_cldstop(current, false, why);
2058
2059 leader = current->group_leader;
2060 if (task_ptrace(leader) && !real_parent_is_ptracer(leader))
2061 do_notify_parent_cldstop(leader, true, why);
2062
2063 read_unlock(&tasklist_lock);
2064
1884 goto relock; 2065 goto relock;
1885 } 2066 }
1886 2067
@@ -1897,8 +2078,8 @@ relock:
1897 if (unlikely(signr != 0)) 2078 if (unlikely(signr != 0))
1898 ka = return_ka; 2079 ka = return_ka;
1899 else { 2080 else {
1900 if (unlikely(signal->group_stop_count > 0) && 2081 if (unlikely(current->group_stop &
1901 do_signal_stop(0)) 2082 GROUP_STOP_PENDING) && do_signal_stop(0))
1902 goto relock; 2083 goto relock;
1903 2084
1904 signr = dequeue_signal(current, &current->blocked, 2085 signr = dequeue_signal(current, &current->blocked,
@@ -2017,10 +2198,42 @@ relock:
2017 return signr; 2198 return signr;
2018} 2199}
2019 2200
2201/*
2202 * It could be that complete_signal() picked us to notify about the
2203 * group-wide signal. Other threads should be notified now to take
2204 * the shared signals in @which since we will not.
2205 */
2206static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which)
2207{
2208 sigset_t retarget;
2209 struct task_struct *t;
2210
2211 sigandsets(&retarget, &tsk->signal->shared_pending.signal, which);
2212 if (sigisemptyset(&retarget))
2213 return;
2214
2215 t = tsk;
2216 while_each_thread(tsk, t) {
2217 if (t->flags & PF_EXITING)
2218 continue;
2219
2220 if (!has_pending_signals(&retarget, &t->blocked))
2221 continue;
2222 /* Remove the signals this thread can handle. */
2223 sigandsets(&retarget, &retarget, &t->blocked);
2224
2225 if (!signal_pending(t))
2226 signal_wake_up(t, 0);
2227
2228 if (sigisemptyset(&retarget))
2229 break;
2230 }
2231}
2232
2020void exit_signals(struct task_struct *tsk) 2233void exit_signals(struct task_struct *tsk)
2021{ 2234{
2022 int group_stop = 0; 2235 int group_stop = 0;
2023 struct task_struct *t; 2236 sigset_t unblocked;
2024 2237
2025 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { 2238 if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
2026 tsk->flags |= PF_EXITING; 2239 tsk->flags |= PF_EXITING;
@@ -2036,26 +2249,23 @@ void exit_signals(struct task_struct *tsk)
2036 if (!signal_pending(tsk)) 2249 if (!signal_pending(tsk))
2037 goto out; 2250 goto out;
2038 2251
2039 /* 2252 unblocked = tsk->blocked;
2040 * It could be that __group_complete_signal() choose us to 2253 signotset(&unblocked);
2041 * notify about group-wide signal. Another thread should be 2254 retarget_shared_pending(tsk, &unblocked);
2042 * woken now to take the signal since we will not.
2043 */
2044 for (t = tsk; (t = next_thread(t)) != tsk; )
2045 if (!signal_pending(t) && !(t->flags & PF_EXITING))
2046 recalc_sigpending_and_wake(t);
2047 2255
2048 if (unlikely(tsk->signal->group_stop_count) && 2256 if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) &&
2049 !--tsk->signal->group_stop_count) { 2257 task_participate_group_stop(tsk))
2050 tsk->signal->flags = SIGNAL_STOP_STOPPED; 2258 group_stop = CLD_STOPPED;
2051 group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
2052 }
2053out: 2259out:
2054 spin_unlock_irq(&tsk->sighand->siglock); 2260 spin_unlock_irq(&tsk->sighand->siglock);
2055 2261
2262 /*
2263 * If group stop has completed, deliver the notification. This
2264 * should always go to the real parent of the group leader.
2265 */
2056 if (unlikely(group_stop)) { 2266 if (unlikely(group_stop)) {
2057 read_lock(&tasklist_lock); 2267 read_lock(&tasklist_lock);
2058 do_notify_parent_cldstop(tsk, group_stop); 2268 do_notify_parent_cldstop(tsk, false, group_stop);
2059 read_unlock(&tasklist_lock); 2269 read_unlock(&tasklist_lock);
2060 } 2270 }
2061} 2271}
@@ -2089,11 +2299,33 @@ long do_no_restart_syscall(struct restart_block *param)
2089 return -EINTR; 2299 return -EINTR;
2090} 2300}
2091 2301
2092/* 2302static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
2093 * We don't need to get the kernel lock - this is all local to this 2303{
2094 * particular thread.. (and that's good, because this is _heavily_ 2304 if (signal_pending(tsk) && !thread_group_empty(tsk)) {
2095 * used by various programs) 2305 sigset_t newblocked;
2306 /* A set of now blocked but previously unblocked signals. */
2307 sigandnsets(&newblocked, newset, &current->blocked);
2308 retarget_shared_pending(tsk, &newblocked);
2309 }
2310 tsk->blocked = *newset;
2311 recalc_sigpending();
2312}
2313
2314/**
2315 * set_current_blocked - change current->blocked mask
2316 * @newset: new mask
2317 *
2318 * It is wrong to change ->blocked directly, this helper should be used
2319 * to ensure the process can't miss a shared signal we are going to block.
2096 */ 2320 */
2321void set_current_blocked(const sigset_t *newset)
2322{
2323 struct task_struct *tsk = current;
2324
2325 spin_lock_irq(&tsk->sighand->siglock);
2326 __set_task_blocked(tsk, newset);
2327 spin_unlock_irq(&tsk->sighand->siglock);
2328}
2097 2329
2098/* 2330/*
2099 * This is also useful for kernel threads that want to temporarily 2331 * This is also useful for kernel threads that want to temporarily
@@ -2105,30 +2337,29 @@ long do_no_restart_syscall(struct restart_block *param)
2105 */ 2337 */
2106int sigprocmask(int how, sigset_t *set, sigset_t *oldset) 2338int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2107{ 2339{
2108 int error; 2340 struct task_struct *tsk = current;
2341 sigset_t newset;
2109 2342
2110 spin_lock_irq(&current->sighand->siglock); 2343 /* Lockless, only current can change ->blocked, never from irq */
2111 if (oldset) 2344 if (oldset)
2112 *oldset = current->blocked; 2345 *oldset = tsk->blocked;
2113 2346
2114 error = 0;
2115 switch (how) { 2347 switch (how) {
2116 case SIG_BLOCK: 2348 case SIG_BLOCK:
2117 sigorsets(&current->blocked, &current->blocked, set); 2349 sigorsets(&newset, &tsk->blocked, set);
2118 break; 2350 break;
2119 case SIG_UNBLOCK: 2351 case SIG_UNBLOCK:
2120 signandsets(&current->blocked, &current->blocked, set); 2352 sigandnsets(&newset, &tsk->blocked, set);
2121 break; 2353 break;
2122 case SIG_SETMASK: 2354 case SIG_SETMASK:
2123 current->blocked = *set; 2355 newset = *set;
2124 break; 2356 break;
2125 default: 2357 default:
2126 error = -EINVAL; 2358 return -EINVAL;
2127 } 2359 }
2128 recalc_sigpending();
2129 spin_unlock_irq(&current->sighand->siglock);
2130 2360
2131 return error; 2361 set_current_blocked(&newset);
2362 return 0;
2132} 2363}
2133 2364
2134/** 2365/**
@@ -2138,40 +2369,34 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
2138 * @oset: previous value of signal mask if non-null 2369 * @oset: previous value of signal mask if non-null
2139 * @sigsetsize: size of sigset_t type 2370 * @sigsetsize: size of sigset_t type
2140 */ 2371 */
2141SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, 2372SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
2142 sigset_t __user *, oset, size_t, sigsetsize) 2373 sigset_t __user *, oset, size_t, sigsetsize)
2143{ 2374{
2144 int error = -EINVAL;
2145 sigset_t old_set, new_set; 2375 sigset_t old_set, new_set;
2376 int error;
2146 2377
2147 /* XXX: Don't preclude handling different sized sigset_t's. */ 2378 /* XXX: Don't preclude handling different sized sigset_t's. */
2148 if (sigsetsize != sizeof(sigset_t)) 2379 if (sigsetsize != sizeof(sigset_t))
2149 goto out; 2380 return -EINVAL;
2150 2381
2151 if (set) { 2382 old_set = current->blocked;
2152 error = -EFAULT; 2383
2153 if (copy_from_user(&new_set, set, sizeof(*set))) 2384 if (nset) {
2154 goto out; 2385 if (copy_from_user(&new_set, nset, sizeof(sigset_t)))
2386 return -EFAULT;
2155 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); 2387 sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
2156 2388
2157 error = sigprocmask(how, &new_set, &old_set); 2389 error = sigprocmask(how, &new_set, NULL);
2158 if (error) 2390 if (error)
2159 goto out; 2391 return error;
2160 if (oset) 2392 }
2161 goto set_old;
2162 } else if (oset) {
2163 spin_lock_irq(&current->sighand->siglock);
2164 old_set = current->blocked;
2165 spin_unlock_irq(&current->sighand->siglock);
2166 2393
2167 set_old: 2394 if (oset) {
2168 error = -EFAULT; 2395 if (copy_to_user(oset, &old_set, sizeof(sigset_t)))
2169 if (copy_to_user(oset, &old_set, sizeof(*oset))) 2396 return -EFAULT;
2170 goto out;
2171 } 2397 }
2172 error = 0; 2398
2173out: 2399 return 0;
2174 return error;
2175} 2400}
2176 2401
2177long do_sigpending(void __user *set, unsigned long sigsetsize) 2402long do_sigpending(void __user *set, unsigned long sigsetsize)
@@ -2284,6 +2509,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
2284#endif 2509#endif
2285 2510
2286/** 2511/**
2512 * do_sigtimedwait - wait for queued signals specified in @which
2513 * @which: queued signals to wait for
2514 * @info: if non-null, the signal's siginfo is returned here
2515 * @ts: upper bound on process time suspension
2516 */
2517int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
2518 const struct timespec *ts)
2519{
2520 struct task_struct *tsk = current;
2521 long timeout = MAX_SCHEDULE_TIMEOUT;
2522 sigset_t mask = *which;
2523 int sig;
2524
2525 if (ts) {
2526 if (!timespec_valid(ts))
2527 return -EINVAL;
2528 timeout = timespec_to_jiffies(ts);
2529 /*
2530 * We can be close to the next tick, add another one
2531 * to ensure we will wait at least the time asked for.
2532 */
2533 if (ts->tv_sec || ts->tv_nsec)
2534 timeout++;
2535 }
2536
2537 /*
2538 * Invert the set of allowed signals to get those we want to block.
2539 */
2540 sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
2541 signotset(&mask);
2542
2543 spin_lock_irq(&tsk->sighand->siglock);
2544 sig = dequeue_signal(tsk, &mask, info);
2545 if (!sig && timeout) {
2546 /*
2547 * None ready, temporarily unblock those we're interested
2548 * while we are sleeping in so that we'll be awakened when
2549 * they arrive. Unblocking is always fine, we can avoid
2550 * set_current_blocked().
2551 */
2552 tsk->real_blocked = tsk->blocked;
2553 sigandsets(&tsk->blocked, &tsk->blocked, &mask);
2554 recalc_sigpending();
2555 spin_unlock_irq(&tsk->sighand->siglock);
2556
2557 timeout = schedule_timeout_interruptible(timeout);
2558
2559 spin_lock_irq(&tsk->sighand->siglock);
2560 __set_task_blocked(tsk, &tsk->real_blocked);
2561 siginitset(&tsk->real_blocked, 0);
2562 sig = dequeue_signal(tsk, &mask, info);
2563 }
2564 spin_unlock_irq(&tsk->sighand->siglock);
2565
2566 if (sig)
2567 return sig;
2568 return timeout ? -EINTR : -EAGAIN;
2569}
2570
2571/**
2287 * sys_rt_sigtimedwait - synchronously wait for queued signals specified 2572 * sys_rt_sigtimedwait - synchronously wait for queued signals specified
2288 * in @uthese 2573 * in @uthese
2289 * @uthese: queued signals to wait for 2574 * @uthese: queued signals to wait for
@@ -2295,11 +2580,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2295 siginfo_t __user *, uinfo, const struct timespec __user *, uts, 2580 siginfo_t __user *, uinfo, const struct timespec __user *, uts,
2296 size_t, sigsetsize) 2581 size_t, sigsetsize)
2297{ 2582{
2298 int ret, sig;
2299 sigset_t these; 2583 sigset_t these;
2300 struct timespec ts; 2584 struct timespec ts;
2301 siginfo_t info; 2585 siginfo_t info;
2302 long timeout = 0; 2586 int ret;
2303 2587
2304 /* XXX: Don't preclude handling different sized sigset_t's. */ 2588 /* XXX: Don't preclude handling different sized sigset_t's. */
2305 if (sigsetsize != sizeof(sigset_t)) 2589 if (sigsetsize != sizeof(sigset_t))
@@ -2308,61 +2592,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
2308 if (copy_from_user(&these, uthese, sizeof(these))) 2592 if (copy_from_user(&these, uthese, sizeof(these)))
2309 return -EFAULT; 2593 return -EFAULT;
2310 2594
2311 /*
2312 * Invert the set of allowed signals to get those we
2313 * want to block.
2314 */
2315 sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP));
2316 signotset(&these);
2317
2318 if (uts) { 2595 if (uts) {
2319 if (copy_from_user(&ts, uts, sizeof(ts))) 2596 if (copy_from_user(&ts, uts, sizeof(ts)))
2320 return -EFAULT; 2597 return -EFAULT;
2321 if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0
2322 || ts.tv_sec < 0)
2323 return -EINVAL;
2324 } 2598 }
2325 2599
2326 spin_lock_irq(&current->sighand->siglock); 2600 ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL);
2327 sig = dequeue_signal(current, &these, &info);
2328 if (!sig) {
2329 timeout = MAX_SCHEDULE_TIMEOUT;
2330 if (uts)
2331 timeout = (timespec_to_jiffies(&ts)
2332 + (ts.tv_sec || ts.tv_nsec));
2333
2334 if (timeout) {
2335 /*
2336 * None ready -- temporarily unblock those we're
2337 * interested while we are sleeping in so that we'll
2338 * be awakened when they arrive.
2339 */
2340 current->real_blocked = current->blocked;
2341 sigandsets(&current->blocked, &current->blocked, &these);
2342 recalc_sigpending();
2343 spin_unlock_irq(&current->sighand->siglock);
2344
2345 timeout = schedule_timeout_interruptible(timeout);
2346
2347 spin_lock_irq(&current->sighand->siglock);
2348 sig = dequeue_signal(current, &these, &info);
2349 current->blocked = current->real_blocked;
2350 siginitset(&current->real_blocked, 0);
2351 recalc_sigpending();
2352 }
2353 }
2354 spin_unlock_irq(&current->sighand->siglock);
2355 2601
2356 if (sig) { 2602 if (ret > 0 && uinfo) {
2357 ret = sig; 2603 if (copy_siginfo_to_user(uinfo, &info))
2358 if (uinfo) { 2604 ret = -EFAULT;
2359 if (copy_siginfo_to_user(uinfo, &info))
2360 ret = -EFAULT;
2361 }
2362 } else {
2363 ret = -EAGAIN;
2364 if (timeout)
2365 ret = -EINTR;
2366 } 2605 }
2367 2606
2368 return ret; 2607 return ret;
@@ -2650,60 +2889,51 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
2650/** 2889/**
2651 * sys_sigprocmask - examine and change blocked signals 2890 * sys_sigprocmask - examine and change blocked signals
2652 * @how: whether to add, remove, or set signals 2891 * @how: whether to add, remove, or set signals
2653 * @set: signals to add or remove (if non-null) 2892 * @nset: signals to add or remove (if non-null)
2654 * @oset: previous value of signal mask if non-null 2893 * @oset: previous value of signal mask if non-null
2655 * 2894 *
2656 * Some platforms have their own version with special arguments; 2895 * Some platforms have their own version with special arguments;
2657 * others support only sys_rt_sigprocmask. 2896 * others support only sys_rt_sigprocmask.
2658 */ 2897 */
2659 2898
2660SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, 2899SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
2661 old_sigset_t __user *, oset) 2900 old_sigset_t __user *, oset)
2662{ 2901{
2663 int error;
2664 old_sigset_t old_set, new_set; 2902 old_sigset_t old_set, new_set;
2903 sigset_t new_blocked;
2665 2904
2666 if (set) { 2905 old_set = current->blocked.sig[0];
2667 error = -EFAULT; 2906
2668 if (copy_from_user(&new_set, set, sizeof(*set))) 2907 if (nset) {
2669 goto out; 2908 if (copy_from_user(&new_set, nset, sizeof(*nset)))
2909 return -EFAULT;
2670 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); 2910 new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
2671 2911
2672 spin_lock_irq(&current->sighand->siglock); 2912 new_blocked = current->blocked;
2673 old_set = current->blocked.sig[0];
2674 2913
2675 error = 0;
2676 switch (how) { 2914 switch (how) {
2677 default:
2678 error = -EINVAL;
2679 break;
2680 case SIG_BLOCK: 2915 case SIG_BLOCK:
2681 sigaddsetmask(&current->blocked, new_set); 2916 sigaddsetmask(&new_blocked, new_set);
2682 break; 2917 break;
2683 case SIG_UNBLOCK: 2918 case SIG_UNBLOCK:
2684 sigdelsetmask(&current->blocked, new_set); 2919 sigdelsetmask(&new_blocked, new_set);
2685 break; 2920 break;
2686 case SIG_SETMASK: 2921 case SIG_SETMASK:
2687 current->blocked.sig[0] = new_set; 2922 new_blocked.sig[0] = new_set;
2688 break; 2923 break;
2924 default:
2925 return -EINVAL;
2689 } 2926 }
2690 2927
2691 recalc_sigpending(); 2928 set_current_blocked(&new_blocked);
2692 spin_unlock_irq(&current->sighand->siglock); 2929 }
2693 if (error) 2930
2694 goto out; 2931 if (oset) {
2695 if (oset)
2696 goto set_old;
2697 } else if (oset) {
2698 old_set = current->blocked.sig[0];
2699 set_old:
2700 error = -EFAULT;
2701 if (copy_to_user(oset, &old_set, sizeof(*oset))) 2932 if (copy_to_user(oset, &old_set, sizeof(*oset)))
2702 goto out; 2933 return -EFAULT;
2703 } 2934 }
2704 error = 0; 2935
2705out: 2936 return 0;
2706 return error;
2707} 2937}
2708#endif /* __ARCH_WANT_SYS_SIGPROCMASK */ 2938#endif /* __ARCH_WANT_SYS_SIGPROCMASK */
2709 2939
@@ -2793,8 +3023,10 @@ SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler)
2793 3023
2794SYSCALL_DEFINE0(pause) 3024SYSCALL_DEFINE0(pause)
2795{ 3025{
2796 current->state = TASK_INTERRUPTIBLE; 3026 while (!signal_pending(current)) {
2797 schedule(); 3027 current->state = TASK_INTERRUPTIBLE;
3028 schedule();
3029 }
2798 return -ERESTARTNOHAND; 3030 return -ERESTARTNOHAND;
2799} 3031}
2800 3032
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 25cc41cd8f33..62cbc8877fef 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt);
46cond_syscall(compat_sys_getsockopt); 46cond_syscall(compat_sys_getsockopt);
47cond_syscall(sys_shutdown); 47cond_syscall(sys_shutdown);
48cond_syscall(sys_sendmsg); 48cond_syscall(sys_sendmsg);
49cond_syscall(sys_sendmmsg);
49cond_syscall(compat_sys_sendmsg); 50cond_syscall(compat_sys_sendmsg);
51cond_syscall(compat_sys_sendmmsg);
50cond_syscall(sys_recvmsg); 52cond_syscall(sys_recvmsg);
51cond_syscall(sys_recvmmsg); 53cond_syscall(sys_recvmmsg);
52cond_syscall(compat_sys_recvmsg); 54cond_syscall(compat_sys_recvmsg);
@@ -69,15 +71,22 @@ cond_syscall(compat_sys_epoll_pwait);
69cond_syscall(sys_semget); 71cond_syscall(sys_semget);
70cond_syscall(sys_semop); 72cond_syscall(sys_semop);
71cond_syscall(sys_semtimedop); 73cond_syscall(sys_semtimedop);
74cond_syscall(compat_sys_semtimedop);
72cond_syscall(sys_semctl); 75cond_syscall(sys_semctl);
76cond_syscall(compat_sys_semctl);
73cond_syscall(sys_msgget); 77cond_syscall(sys_msgget);
74cond_syscall(sys_msgsnd); 78cond_syscall(sys_msgsnd);
79cond_syscall(compat_sys_msgsnd);
75cond_syscall(sys_msgrcv); 80cond_syscall(sys_msgrcv);
81cond_syscall(compat_sys_msgrcv);
76cond_syscall(sys_msgctl); 82cond_syscall(sys_msgctl);
83cond_syscall(compat_sys_msgctl);
77cond_syscall(sys_shmget); 84cond_syscall(sys_shmget);
78cond_syscall(sys_shmat); 85cond_syscall(sys_shmat);
86cond_syscall(compat_sys_shmat);
79cond_syscall(sys_shmdt); 87cond_syscall(sys_shmdt);
80cond_syscall(sys_shmctl); 88cond_syscall(sys_shmctl);
89cond_syscall(compat_sys_shmctl);
81cond_syscall(sys_mq_open); 90cond_syscall(sys_mq_open);
82cond_syscall(sys_mq_unlink); 91cond_syscall(sys_mq_unlink);
83cond_syscall(sys_mq_timedsend); 92cond_syscall(sys_mq_timedsend);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3dd0c46fa3bb..4fc92445a29c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -56,6 +56,7 @@
56#include <linux/kprobes.h> 56#include <linux/kprobes.h>
57#include <linux/pipe_fs_i.h> 57#include <linux/pipe_fs_i.h>
58#include <linux/oom.h> 58#include <linux/oom.h>
59#include <linux/kmod.h>
59 60
60#include <asm/uaccess.h> 61#include <asm/uaccess.h>
61#include <asm/processor.h> 62#include <asm/processor.h>
@@ -616,6 +617,11 @@ static struct ctl_table kern_table[] = {
616 .child = random_table, 617 .child = random_table,
617 }, 618 },
618 { 619 {
620 .procname = "usermodehelper",
621 .mode = 0555,
622 .child = usermodehelper_table,
623 },
624 {
619 .procname = "overflowuid", 625 .procname = "overflowuid",
620 .data = &overflowuid, 626 .data = &overflowuid,
621 .maxlen = sizeof(int), 627 .maxlen = sizeof(int),
@@ -1500,7 +1506,7 @@ static struct ctl_table fs_table[] = {
1500 1506
1501static struct ctl_table debug_table[] = { 1507static struct ctl_table debug_table[] = {
1502#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ 1508#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1503 defined(CONFIG_S390) 1509 defined(CONFIG_S390) || defined(CONFIG_TILE)
1504 { 1510 {
1505 .procname = "exception-trace", 1511 .procname = "exception-trace",
1506 .data = &show_unhandled_signals, 1512 .data = &show_unhandled_signals,
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 9265014cb4db..2d966244ea60 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -494,7 +494,7 @@ static int update_rmtp(ktime_t exp, enum alarmtimer_type type,
494 */ 494 */
495static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) 495static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
496{ 496{
497 enum alarmtimer_type type = restart->nanosleep.index; 497 enum alarmtimer_type type = restart->nanosleep.clockid;
498 ktime_t exp; 498 ktime_t exp;
499 struct timespec __user *rmtp; 499 struct timespec __user *rmtp;
500 struct alarm alarm; 500 struct alarm alarm;
@@ -573,7 +573,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
573 573
574 restart = &current_thread_info()->restart_block; 574 restart = &current_thread_info()->restart_block;
575 restart->fn = alarm_timer_nsleep_restart; 575 restart->fn = alarm_timer_nsleep_restart;
576 restart->nanosleep.index = type; 576 restart->nanosleep.clockid = type;
577 restart->nanosleep.expires = exp.tv64; 577 restart->nanosleep.expires = exp.tv64;
578 restart->nanosleep.rmtp = rmtp; 578 restart->nanosleep.rmtp = rmtp;
579 ret = -ERESTART_RESTARTBLOCK; 579 ret = -ERESTART_RESTARTBLOCK;
@@ -669,12 +669,20 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr)
669 */ 669 */
670static int __init alarmtimer_init_late(void) 670static int __init alarmtimer_init_late(void)
671{ 671{
672 struct device *dev;
672 char *str; 673 char *str;
673 674
674 /* Find an rtc device and init the rtc_timer */ 675 /* Find an rtc device and init the rtc_timer */
675 class_find_device(rtc_class, NULL, &str, has_wakealarm); 676 dev = class_find_device(rtc_class, NULL, &str, has_wakealarm);
676 if (str) 677 /* If we have a device then str is valid. See has_wakealarm() */
678 if (dev) {
677 rtcdev = rtc_class_open(str); 679 rtcdev = rtc_class_open(str);
680 /*
681 * Drop the reference we got in class_find_device,
682 * rtc_open takes its own.
683 */
684 put_device(dev);
685 }
678 if (!rtcdev) { 686 if (!rtcdev) {
679 printk(KERN_WARNING "No RTC device found, ALARM timers will" 687 printk(KERN_WARNING "No RTC device found, ALARM timers will"
680 " not wake from suspend"); 688 " not wake from suspend");
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 22a9da9a9c96..c027d4f602f1 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -197,7 +197,7 @@ EXPORT_SYMBOL_GPL(clockevents_register_device);
197static void clockevents_config(struct clock_event_device *dev, 197static void clockevents_config(struct clock_event_device *dev,
198 u32 freq) 198 u32 freq)
199{ 199{
200 unsigned long sec; 200 u64 sec;
201 201
202 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 202 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
203 return; 203 return;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index d9d5f8c885f6..1c95fd677328 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -639,7 +639,7 @@ static void clocksource_enqueue(struct clocksource *cs)
639 */ 639 */
640void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) 640void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
641{ 641{
642 unsigned long sec; 642 u64 sec;
643 643
644 /* 644 /*
645 * Calc the maximum number of seconds which we can run before 645 * Calc the maximum number of seconds which we can run before
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 723c7637e55a..c7218d132738 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -456,23 +456,27 @@ void tick_broadcast_oneshot_control(unsigned long reason)
456 unsigned long flags; 456 unsigned long flags;
457 int cpu; 457 int cpu;
458 458
459 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
460
461 /* 459 /*
462 * Periodic mode does not care about the enter/exit of power 460 * Periodic mode does not care about the enter/exit of power
463 * states 461 * states
464 */ 462 */
465 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 463 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
466 goto out; 464 return;
467 465
468 bc = tick_broadcast_device.evtdev; 466 /*
467 * We are called with preemtion disabled from the depth of the
468 * idle code, so we can't be moved away.
469 */
469 cpu = smp_processor_id(); 470 cpu = smp_processor_id();
470 td = &per_cpu(tick_cpu_device, cpu); 471 td = &per_cpu(tick_cpu_device, cpu);
471 dev = td->evtdev; 472 dev = td->evtdev;
472 473
473 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 474 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
474 goto out; 475 return;
476
477 bc = tick_broadcast_device.evtdev;
475 478
479 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
476 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { 480 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
477 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { 481 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
478 cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); 482 cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
@@ -489,8 +493,6 @@ void tick_broadcast_oneshot_control(unsigned long reason)
489 tick_program_event(dev->next_event, 1); 493 tick_program_event(dev->next_event, 1);
490 } 494 }
491 } 495 }
492
493out:
494 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 496 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
495} 497}
496 498
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8e6a05a5915a..342408cf68dd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -680,7 +680,7 @@ static void timekeeping_resume(void)
680 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); 680 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
681 681
682 /* Resume hrtimers */ 682 /* Resume hrtimers */
683 hres_timers_resume(); 683 hrtimers_resume();
684} 684}
685 685
686static int timekeeping_suspend(void) 686static int timekeeping_suspend(void)
@@ -1099,6 +1099,21 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1099} 1099}
1100 1100
1101/** 1101/**
1102 * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
1103 */
1104ktime_t ktime_get_monotonic_offset(void)
1105{
1106 unsigned long seq;
1107 struct timespec wtom;
1108
1109 do {
1110 seq = read_seqbegin(&xtime_lock);
1111 wtom = wall_to_monotonic;
1112 } while (read_seqretry(&xtime_lock, seq));
1113 return timespec_to_ktime(wtom);
1114}
1115
1116/**
1102 * xtime_update() - advances the timekeeping infrastructure 1117 * xtime_update() - advances the timekeeping infrastructure
1103 * @ticks: number of ticks, that have elapsed since the last call. 1118 * @ticks: number of ticks, that have elapsed since the last call.
1104 * 1119 *
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 44646179eaba..bff131b9510a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -15,6 +15,7 @@
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
18#include <linux/proc_fs.h>
18 19
19static struct uts_namespace *create_uts_ns(void) 20static struct uts_namespace *create_uts_ns(void)
20{ 21{
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref)
79 put_user_ns(ns->user_ns); 80 put_user_ns(ns->user_ns);
80 kfree(ns); 81 kfree(ns);
81} 82}
83
84static void *utsns_get(struct task_struct *task)
85{
86 struct uts_namespace *ns = NULL;
87 struct nsproxy *nsproxy;
88
89 rcu_read_lock();
90 nsproxy = task_nsproxy(task);
91 if (nsproxy) {
92 ns = nsproxy->uts_ns;
93 get_uts_ns(ns);
94 }
95 rcu_read_unlock();
96
97 return ns;
98}
99
100static void utsns_put(void *ns)
101{
102 put_uts_ns(ns);
103}
104
105static int utsns_install(struct nsproxy *nsproxy, void *ns)
106{
107 get_uts_ns(ns);
108 put_uts_ns(nsproxy->uts_ns);
109 nsproxy->uts_ns = ns;
110 return 0;
111}
112
113const struct proc_ns_operations utsns_operations = {
114 .name = "uts",
115 .type = CLONE_NEWUTS,
116 .get = utsns_get,
117 .put = utsns_put,
118 .install = utsns_install,
119};
120
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e3378e8d3a5c..0400553f0d04 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2866,9 +2866,7 @@ static int alloc_cwqs(struct workqueue_struct *wq)
2866 } 2866 }
2867 } 2867 }
2868 2868
2869 /* just in case, make sure it's actually aligned 2869 /* just in case, make sure it's actually aligned */
2870 * - this is affected by PERCPU() alignment in vmlinux.lds.S
2871 */
2872 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); 2870 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
2873 return wq->cpu_wq.v ? 0 : -ENOMEM; 2871 return wq->cpu_wq.v ? 0 : -ENOMEM;
2874} 2872}