diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Kconfig.locks | 2 | ||||
-rw-r--r-- | kernel/auditsc.c | 27 | ||||
-rw-r--r-- | kernel/capability.c | 4 | ||||
-rw-r--r-- | kernel/compat.c | 55 | ||||
-rw-r--r-- | kernel/cred.c | 6 | ||||
-rw-r--r-- | kernel/exit.c | 110 | ||||
-rw-r--r-- | kernel/fork.c | 42 | ||||
-rw-r--r-- | kernel/hrtimer.c | 162 | ||||
-rw-r--r-- | kernel/irq/proc.c | 54 | ||||
-rw-r--r-- | kernel/kmod.c | 100 | ||||
-rw-r--r-- | kernel/module.c | 4 | ||||
-rw-r--r-- | kernel/mutex.c | 25 | ||||
-rw-r--r-- | kernel/nsproxy.c | 42 | ||||
-rw-r--r-- | kernel/pm_qos_params.c | 2 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 4 | ||||
-rw-r--r-- | kernel/posix-timers.c | 27 | ||||
-rw-r--r-- | kernel/printk.c | 87 | ||||
-rw-r--r-- | kernel/ptrace.c | 120 | ||||
-rw-r--r-- | kernel/rcutiny.c | 1 | ||||
-rw-r--r-- | kernel/rcutree.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 41 | ||||
-rw-r--r-- | kernel/sched_fair.c | 52 | ||||
-rw-r--r-- | kernel/signal.c | 684 | ||||
-rw-r--r-- | kernel/sys_ni.c | 9 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 16 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 2 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 16 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 17 | ||||
-rw-r--r-- | kernel/utsname.c | 39 | ||||
-rw-r--r-- | kernel/workqueue.c | 4 |
32 files changed, 1224 insertions, 541 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 88c92fb44618..5068e2a4e75f 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks | |||
@@ -199,4 +199,4 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE | |||
199 | def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE | 199 | def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE |
200 | 200 | ||
201 | config MUTEX_SPIN_ON_OWNER | 201 | config MUTEX_SPIN_ON_OWNER |
202 | def_bool SMP && !DEBUG_MUTEXES && !HAVE_DEFAULT_NO_SPIN_MUTEXES | 202 | def_bool SMP && !DEBUG_MUTEXES |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index b33513a08beb..00d79df03e76 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -443,17 +443,25 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree) | |||
443 | 443 | ||
444 | /* Determine if any context name data matches a rule's watch data */ | 444 | /* Determine if any context name data matches a rule's watch data */ |
445 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 | 445 | /* Compare a task_struct with an audit_rule. Return 1 on match, 0 |
446 | * otherwise. */ | 446 | * otherwise. |
447 | * | ||
448 | * If task_creation is true, this is an explicit indication that we are | ||
449 | * filtering a task rule at task creation time. This and tsk == current are | ||
450 | * the only situations where tsk->cred may be accessed without an rcu read lock. | ||
451 | */ | ||
447 | static int audit_filter_rules(struct task_struct *tsk, | 452 | static int audit_filter_rules(struct task_struct *tsk, |
448 | struct audit_krule *rule, | 453 | struct audit_krule *rule, |
449 | struct audit_context *ctx, | 454 | struct audit_context *ctx, |
450 | struct audit_names *name, | 455 | struct audit_names *name, |
451 | enum audit_state *state) | 456 | enum audit_state *state, |
457 | bool task_creation) | ||
452 | { | 458 | { |
453 | const struct cred *cred = get_task_cred(tsk); | 459 | const struct cred *cred; |
454 | int i, j, need_sid = 1; | 460 | int i, j, need_sid = 1; |
455 | u32 sid; | 461 | u32 sid; |
456 | 462 | ||
463 | cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); | ||
464 | |||
457 | for (i = 0; i < rule->field_count; i++) { | 465 | for (i = 0; i < rule->field_count; i++) { |
458 | struct audit_field *f = &rule->fields[i]; | 466 | struct audit_field *f = &rule->fields[i]; |
459 | int result = 0; | 467 | int result = 0; |
@@ -637,10 +645,8 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
637 | break; | 645 | break; |
638 | } | 646 | } |
639 | 647 | ||
640 | if (!result) { | 648 | if (!result) |
641 | put_cred(cred); | ||
642 | return 0; | 649 | return 0; |
643 | } | ||
644 | } | 650 | } |
645 | 651 | ||
646 | if (ctx) { | 652 | if (ctx) { |
@@ -656,7 +662,6 @@ static int audit_filter_rules(struct task_struct *tsk, | |||
656 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; | 662 | case AUDIT_NEVER: *state = AUDIT_DISABLED; break; |
657 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; | 663 | case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; |
658 | } | 664 | } |
659 | put_cred(cred); | ||
660 | return 1; | 665 | return 1; |
661 | } | 666 | } |
662 | 667 | ||
@@ -671,7 +676,8 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key) | |||
671 | 676 | ||
672 | rcu_read_lock(); | 677 | rcu_read_lock(); |
673 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { | 678 | list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { |
674 | if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) { | 679 | if (audit_filter_rules(tsk, &e->rule, NULL, NULL, |
680 | &state, true)) { | ||
675 | if (state == AUDIT_RECORD_CONTEXT) | 681 | if (state == AUDIT_RECORD_CONTEXT) |
676 | *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); | 682 | *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); |
677 | rcu_read_unlock(); | 683 | rcu_read_unlock(); |
@@ -705,7 +711,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, | |||
705 | list_for_each_entry_rcu(e, list, list) { | 711 | list_for_each_entry_rcu(e, list, list) { |
706 | if ((e->rule.mask[word] & bit) == bit && | 712 | if ((e->rule.mask[word] & bit) == bit && |
707 | audit_filter_rules(tsk, &e->rule, ctx, NULL, | 713 | audit_filter_rules(tsk, &e->rule, ctx, NULL, |
708 | &state)) { | 714 | &state, false)) { |
709 | rcu_read_unlock(); | 715 | rcu_read_unlock(); |
710 | ctx->current_state = state; | 716 | ctx->current_state = state; |
711 | return state; | 717 | return state; |
@@ -743,7 +749,8 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) | |||
743 | 749 | ||
744 | list_for_each_entry_rcu(e, list, list) { | 750 | list_for_each_entry_rcu(e, list, list) { |
745 | if ((e->rule.mask[word] & bit) == bit && | 751 | if ((e->rule.mask[word] & bit) == bit && |
746 | audit_filter_rules(tsk, &e->rule, ctx, n, &state)) { | 752 | audit_filter_rules(tsk, &e->rule, ctx, n, |
753 | &state, false)) { | ||
747 | rcu_read_unlock(); | 754 | rcu_read_unlock(); |
748 | ctx->current_state = state; | 755 | ctx->current_state = state; |
749 | return; | 756 | return; |
diff --git a/kernel/capability.c b/kernel/capability.c index 32a80e08ff4b..283c529f8b1c 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -22,12 +22,8 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; | 24 | const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; |
25 | const kernel_cap_t __cap_full_set = CAP_FULL_SET; | ||
26 | const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET; | ||
27 | 25 | ||
28 | EXPORT_SYMBOL(__cap_empty_set); | 26 | EXPORT_SYMBOL(__cap_empty_set); |
29 | EXPORT_SYMBOL(__cap_full_set); | ||
30 | EXPORT_SYMBOL(__cap_init_eff_set); | ||
31 | 27 | ||
32 | int file_caps_enabled = 1; | 28 | int file_caps_enabled = 1; |
33 | 29 | ||
diff --git a/kernel/compat.c b/kernel/compat.c index 38b1d2c1cbe8..fc9eb093acd5 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -293,6 +293,8 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) | |||
293 | return compat_jiffies_to_clock_t(jiffies); | 293 | return compat_jiffies_to_clock_t(jiffies); |
294 | } | 294 | } |
295 | 295 | ||
296 | #ifdef __ARCH_WANT_SYS_SIGPENDING | ||
297 | |||
296 | /* | 298 | /* |
297 | * Assumption: old_sigset_t and compat_old_sigset_t are both | 299 | * Assumption: old_sigset_t and compat_old_sigset_t are both |
298 | * types that can be passed to put_user()/get_user(). | 300 | * types that can be passed to put_user()/get_user(). |
@@ -312,6 +314,10 @@ asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set) | |||
312 | return ret; | 314 | return ret; |
313 | } | 315 | } |
314 | 316 | ||
317 | #endif | ||
318 | |||
319 | #ifdef __ARCH_WANT_SYS_SIGPROCMASK | ||
320 | |||
315 | asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, | 321 | asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, |
316 | compat_old_sigset_t __user *oset) | 322 | compat_old_sigset_t __user *oset) |
317 | { | 323 | { |
@@ -333,6 +339,8 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, | |||
333 | return ret; | 339 | return ret; |
334 | } | 340 | } |
335 | 341 | ||
342 | #endif | ||
343 | |||
336 | asmlinkage long compat_sys_setrlimit(unsigned int resource, | 344 | asmlinkage long compat_sys_setrlimit(unsigned int resource, |
337 | struct compat_rlimit __user *rlim) | 345 | struct compat_rlimit __user *rlim) |
338 | { | 346 | { |
@@ -890,10 +898,9 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | |||
890 | { | 898 | { |
891 | compat_sigset_t s32; | 899 | compat_sigset_t s32; |
892 | sigset_t s; | 900 | sigset_t s; |
893 | int sig; | ||
894 | struct timespec t; | 901 | struct timespec t; |
895 | siginfo_t info; | 902 | siginfo_t info; |
896 | long ret, timeout = 0; | 903 | long ret; |
897 | 904 | ||
898 | if (sigsetsize != sizeof(sigset_t)) | 905 | if (sigsetsize != sizeof(sigset_t)) |
899 | return -EINVAL; | 906 | return -EINVAL; |
@@ -901,51 +908,19 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, | |||
901 | if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t))) | 908 | if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t))) |
902 | return -EFAULT; | 909 | return -EFAULT; |
903 | sigset_from_compat(&s, &s32); | 910 | sigset_from_compat(&s, &s32); |
904 | sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
905 | signotset(&s); | ||
906 | 911 | ||
907 | if (uts) { | 912 | if (uts) { |
908 | if (get_compat_timespec (&t, uts)) | 913 | if (get_compat_timespec(&t, uts)) |
909 | return -EFAULT; | 914 | return -EFAULT; |
910 | if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 | ||
911 | || t.tv_sec < 0) | ||
912 | return -EINVAL; | ||
913 | } | 915 | } |
914 | 916 | ||
915 | spin_lock_irq(¤t->sighand->siglock); | 917 | ret = do_sigtimedwait(&s, &info, uts ? &t : NULL); |
916 | sig = dequeue_signal(current, &s, &info); | ||
917 | if (!sig) { | ||
918 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
919 | if (uts) | ||
920 | timeout = timespec_to_jiffies(&t) | ||
921 | +(t.tv_sec || t.tv_nsec); | ||
922 | if (timeout) { | ||
923 | current->real_blocked = current->blocked; | ||
924 | sigandsets(¤t->blocked, ¤t->blocked, &s); | ||
925 | |||
926 | recalc_sigpending(); | ||
927 | spin_unlock_irq(¤t->sighand->siglock); | ||
928 | |||
929 | timeout = schedule_timeout_interruptible(timeout); | ||
930 | |||
931 | spin_lock_irq(¤t->sighand->siglock); | ||
932 | sig = dequeue_signal(current, &s, &info); | ||
933 | current->blocked = current->real_blocked; | ||
934 | siginitset(¤t->real_blocked, 0); | ||
935 | recalc_sigpending(); | ||
936 | } | ||
937 | } | ||
938 | spin_unlock_irq(¤t->sighand->siglock); | ||
939 | 918 | ||
940 | if (sig) { | 919 | if (ret > 0 && uinfo) { |
941 | ret = sig; | 920 | if (copy_siginfo_to_user32(uinfo, &info)) |
942 | if (uinfo) { | 921 | ret = -EFAULT; |
943 | if (copy_siginfo_to_user32(uinfo, &info)) | ||
944 | ret = -EFAULT; | ||
945 | } | ||
946 | }else { | ||
947 | ret = timeout?-EINTR:-EAGAIN; | ||
948 | } | 922 | } |
923 | |||
949 | return ret; | 924 | return ret; |
950 | 925 | ||
951 | } | 926 | } |
diff --git a/kernel/cred.c b/kernel/cred.c index 8093c16b84b1..e12c8af793f8 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -49,10 +49,10 @@ struct cred init_cred = { | |||
49 | .magic = CRED_MAGIC, | 49 | .magic = CRED_MAGIC, |
50 | #endif | 50 | #endif |
51 | .securebits = SECUREBITS_DEFAULT, | 51 | .securebits = SECUREBITS_DEFAULT, |
52 | .cap_inheritable = CAP_INIT_INH_SET, | 52 | .cap_inheritable = CAP_EMPTY_SET, |
53 | .cap_permitted = CAP_FULL_SET, | 53 | .cap_permitted = CAP_FULL_SET, |
54 | .cap_effective = CAP_INIT_EFF_SET, | 54 | .cap_effective = CAP_FULL_SET, |
55 | .cap_bset = CAP_INIT_BSET, | 55 | .cap_bset = CAP_FULL_SET, |
56 | .user = INIT_USER, | 56 | .user = INIT_USER, |
57 | .user_ns = &init_user_ns, | 57 | .user_ns = &init_user_ns, |
58 | .group_info = &init_groups, | 58 | .group_info = &init_groups, |
diff --git a/kernel/exit.c b/kernel/exit.c index 8dd874181542..20a406471525 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -1377,11 +1377,23 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace) | |||
1377 | return NULL; | 1377 | return NULL; |
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | /* | 1380 | /** |
1381 | * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold | 1381 | * wait_task_stopped - Wait for %TASK_STOPPED or %TASK_TRACED |
1382 | * read_lock(&tasklist_lock) on entry. If we return zero, we still hold | 1382 | * @wo: wait options |
1383 | * the lock and this task is uninteresting. If we return nonzero, we have | 1383 | * @ptrace: is the wait for ptrace |
1384 | * released the lock and the system call should return. | 1384 | * @p: task to wait for |
1385 | * | ||
1386 | * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED. | ||
1387 | * | ||
1388 | * CONTEXT: | ||
1389 | * read_lock(&tasklist_lock), which is released if return value is | ||
1390 | * non-zero. Also, grabs and releases @p->sighand->siglock. | ||
1391 | * | ||
1392 | * RETURNS: | ||
1393 | * 0 if wait condition didn't exist and search for other wait conditions | ||
1394 | * should continue. Non-zero return, -errno on failure and @p's pid on | ||
1395 | * success, implies that tasklist_lock is released and wait condition | ||
1396 | * search should terminate. | ||
1385 | */ | 1397 | */ |
1386 | static int wait_task_stopped(struct wait_opts *wo, | 1398 | static int wait_task_stopped(struct wait_opts *wo, |
1387 | int ptrace, struct task_struct *p) | 1399 | int ptrace, struct task_struct *p) |
@@ -1397,6 +1409,9 @@ static int wait_task_stopped(struct wait_opts *wo, | |||
1397 | if (!ptrace && !(wo->wo_flags & WUNTRACED)) | 1409 | if (!ptrace && !(wo->wo_flags & WUNTRACED)) |
1398 | return 0; | 1410 | return 0; |
1399 | 1411 | ||
1412 | if (!task_stopped_code(p, ptrace)) | ||
1413 | return 0; | ||
1414 | |||
1400 | exit_code = 0; | 1415 | exit_code = 0; |
1401 | spin_lock_irq(&p->sighand->siglock); | 1416 | spin_lock_irq(&p->sighand->siglock); |
1402 | 1417 | ||
@@ -1538,33 +1553,84 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, | |||
1538 | return 0; | 1553 | return 0; |
1539 | } | 1554 | } |
1540 | 1555 | ||
1541 | if (likely(!ptrace) && unlikely(task_ptrace(p))) { | 1556 | /* dead body doesn't have much to contribute */ |
1557 | if (p->exit_state == EXIT_DEAD) | ||
1558 | return 0; | ||
1559 | |||
1560 | /* slay zombie? */ | ||
1561 | if (p->exit_state == EXIT_ZOMBIE) { | ||
1562 | /* | ||
1563 | * A zombie ptracee is only visible to its ptracer. | ||
1564 | * Notification and reaping will be cascaded to the real | ||
1565 | * parent when the ptracer detaches. | ||
1566 | */ | ||
1567 | if (likely(!ptrace) && unlikely(task_ptrace(p))) { | ||
1568 | /* it will become visible, clear notask_error */ | ||
1569 | wo->notask_error = 0; | ||
1570 | return 0; | ||
1571 | } | ||
1572 | |||
1573 | /* we don't reap group leaders with subthreads */ | ||
1574 | if (!delay_group_leader(p)) | ||
1575 | return wait_task_zombie(wo, p); | ||
1576 | |||
1542 | /* | 1577 | /* |
1543 | * This child is hidden by ptrace. | 1578 | * Allow access to stopped/continued state via zombie by |
1544 | * We aren't allowed to see it now, but eventually we will. | 1579 | * falling through. Clearing of notask_error is complex. |
1580 | * | ||
1581 | * When !@ptrace: | ||
1582 | * | ||
1583 | * If WEXITED is set, notask_error should naturally be | ||
1584 | * cleared. If not, subset of WSTOPPED|WCONTINUED is set, | ||
1585 | * so, if there are live subthreads, there are events to | ||
1586 | * wait for. If all subthreads are dead, it's still safe | ||
1587 | * to clear - this function will be called again in finite | ||
1588 | * amount time once all the subthreads are released and | ||
1589 | * will then return without clearing. | ||
1590 | * | ||
1591 | * When @ptrace: | ||
1592 | * | ||
1593 | * Stopped state is per-task and thus can't change once the | ||
1594 | * target task dies. Only continued and exited can happen. | ||
1595 | * Clear notask_error if WCONTINUED | WEXITED. | ||
1596 | */ | ||
1597 | if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED))) | ||
1598 | wo->notask_error = 0; | ||
1599 | } else { | ||
1600 | /* | ||
1601 | * If @p is ptraced by a task in its real parent's group, | ||
1602 | * hide group stop/continued state when looking at @p as | ||
1603 | * the real parent; otherwise, a single stop can be | ||
1604 | * reported twice as group and ptrace stops. | ||
1605 | * | ||
1606 | * If a ptracer wants to distinguish the two events for its | ||
1607 | * own children, it should create a separate process which | ||
1608 | * takes the role of real parent. | ||
1609 | */ | ||
1610 | if (likely(!ptrace) && task_ptrace(p) && | ||
1611 | same_thread_group(p->parent, p->real_parent)) | ||
1612 | return 0; | ||
1613 | |||
1614 | /* | ||
1615 | * @p is alive and it's gonna stop, continue or exit, so | ||
1616 | * there always is something to wait for. | ||
1545 | */ | 1617 | */ |
1546 | wo->notask_error = 0; | 1618 | wo->notask_error = 0; |
1547 | return 0; | ||
1548 | } | 1619 | } |
1549 | 1620 | ||
1550 | if (p->exit_state == EXIT_DEAD) | ||
1551 | return 0; | ||
1552 | |||
1553 | /* | 1621 | /* |
1554 | * We don't reap group leaders with subthreads. | 1622 | * Wait for stopped. Depending on @ptrace, different stopped state |
1623 | * is used and the two don't interact with each other. | ||
1555 | */ | 1624 | */ |
1556 | if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p)) | 1625 | ret = wait_task_stopped(wo, ptrace, p); |
1557 | return wait_task_zombie(wo, p); | 1626 | if (ret) |
1627 | return ret; | ||
1558 | 1628 | ||
1559 | /* | 1629 | /* |
1560 | * It's stopped or running now, so it might | 1630 | * Wait for continued. There's only one continued state and the |
1561 | * later continue, exit, or stop again. | 1631 | * ptracer can consume it which can confuse the real parent. Don't |
1632 | * use WCONTINUED from ptracer. You don't need or want it. | ||
1562 | */ | 1633 | */ |
1563 | wo->notask_error = 0; | ||
1564 | |||
1565 | if (task_stopped_code(p, ptrace)) | ||
1566 | return wait_task_stopped(wo, ptrace, p); | ||
1567 | |||
1568 | return wait_task_continued(wo, p); | 1634 | return wait_task_continued(wo, p); |
1569 | } | 1635 | } |
1570 | 1636 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 2b44d82b8237..8e7e135d0817 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -383,15 +383,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
383 | get_file(file); | 383 | get_file(file); |
384 | if (tmp->vm_flags & VM_DENYWRITE) | 384 | if (tmp->vm_flags & VM_DENYWRITE) |
385 | atomic_dec(&inode->i_writecount); | 385 | atomic_dec(&inode->i_writecount); |
386 | spin_lock(&mapping->i_mmap_lock); | 386 | mutex_lock(&mapping->i_mmap_mutex); |
387 | if (tmp->vm_flags & VM_SHARED) | 387 | if (tmp->vm_flags & VM_SHARED) |
388 | mapping->i_mmap_writable++; | 388 | mapping->i_mmap_writable++; |
389 | tmp->vm_truncate_count = mpnt->vm_truncate_count; | ||
390 | flush_dcache_mmap_lock(mapping); | 389 | flush_dcache_mmap_lock(mapping); |
391 | /* insert tmp into the share list, just after mpnt */ | 390 | /* insert tmp into the share list, just after mpnt */ |
392 | vma_prio_tree_add(tmp, mpnt); | 391 | vma_prio_tree_add(tmp, mpnt); |
393 | flush_dcache_mmap_unlock(mapping); | 392 | flush_dcache_mmap_unlock(mapping); |
394 | spin_unlock(&mapping->i_mmap_lock); | 393 | mutex_unlock(&mapping->i_mmap_mutex); |
395 | } | 394 | } |
396 | 395 | ||
397 | /* | 396 | /* |
@@ -486,6 +485,20 @@ static void mm_init_aio(struct mm_struct *mm) | |||
486 | #endif | 485 | #endif |
487 | } | 486 | } |
488 | 487 | ||
488 | int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm) | ||
489 | { | ||
490 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
491 | if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL)) | ||
492 | return -ENOMEM; | ||
493 | |||
494 | if (oldmm) | ||
495 | cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm)); | ||
496 | else | ||
497 | memset(mm_cpumask(mm), 0, cpumask_size()); | ||
498 | #endif | ||
499 | return 0; | ||
500 | } | ||
501 | |||
489 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 502 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
490 | { | 503 | { |
491 | atomic_set(&mm->mm_users, 1); | 504 | atomic_set(&mm->mm_users, 1); |
@@ -522,10 +535,20 @@ struct mm_struct * mm_alloc(void) | |||
522 | struct mm_struct * mm; | 535 | struct mm_struct * mm; |
523 | 536 | ||
524 | mm = allocate_mm(); | 537 | mm = allocate_mm(); |
525 | if (mm) { | 538 | if (!mm) |
526 | memset(mm, 0, sizeof(*mm)); | 539 | return NULL; |
527 | mm = mm_init(mm, current); | 540 | |
541 | memset(mm, 0, sizeof(*mm)); | ||
542 | mm = mm_init(mm, current); | ||
543 | if (!mm) | ||
544 | return NULL; | ||
545 | |||
546 | if (mm_init_cpumask(mm, NULL)) { | ||
547 | mm_free_pgd(mm); | ||
548 | free_mm(mm); | ||
549 | return NULL; | ||
528 | } | 550 | } |
551 | |||
529 | return mm; | 552 | return mm; |
530 | } | 553 | } |
531 | 554 | ||
@@ -537,6 +560,7 @@ struct mm_struct * mm_alloc(void) | |||
537 | void __mmdrop(struct mm_struct *mm) | 560 | void __mmdrop(struct mm_struct *mm) |
538 | { | 561 | { |
539 | BUG_ON(mm == &init_mm); | 562 | BUG_ON(mm == &init_mm); |
563 | free_cpumask_var(mm->cpu_vm_mask_var); | ||
540 | mm_free_pgd(mm); | 564 | mm_free_pgd(mm); |
541 | destroy_context(mm); | 565 | destroy_context(mm); |
542 | mmu_notifier_mm_destroy(mm); | 566 | mmu_notifier_mm_destroy(mm); |
@@ -691,6 +715,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
691 | if (!mm_init(mm, tsk)) | 715 | if (!mm_init(mm, tsk)) |
692 | goto fail_nomem; | 716 | goto fail_nomem; |
693 | 717 | ||
718 | if (mm_init_cpumask(mm, oldmm)) | ||
719 | goto fail_nocpumask; | ||
720 | |||
694 | if (init_new_context(tsk, mm)) | 721 | if (init_new_context(tsk, mm)) |
695 | goto fail_nocontext; | 722 | goto fail_nocontext; |
696 | 723 | ||
@@ -717,6 +744,9 @@ fail_nomem: | |||
717 | return NULL; | 744 | return NULL; |
718 | 745 | ||
719 | fail_nocontext: | 746 | fail_nocontext: |
747 | free_cpumask_var(mm->cpu_vm_mask_var); | ||
748 | |||
749 | fail_nocpumask: | ||
720 | /* | 750 | /* |
721 | * If init_new_context() failed, we cannot use mmput() to free the mm | 751 | * If init_new_context() failed, we cannot use mmput() to free the mm |
722 | * because it calls destroy_context() | 752 | * because it calls destroy_context() |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index dbbbf7d43080..a9205e32a059 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -64,17 +64,20 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = | |||
64 | .clock_base = | 64 | .clock_base = |
65 | { | 65 | { |
66 | { | 66 | { |
67 | .index = CLOCK_REALTIME, | 67 | .index = HRTIMER_BASE_MONOTONIC, |
68 | .get_time = &ktime_get_real, | 68 | .clockid = CLOCK_MONOTONIC, |
69 | .get_time = &ktime_get, | ||
69 | .resolution = KTIME_LOW_RES, | 70 | .resolution = KTIME_LOW_RES, |
70 | }, | 71 | }, |
71 | { | 72 | { |
72 | .index = CLOCK_MONOTONIC, | 73 | .index = HRTIMER_BASE_REALTIME, |
73 | .get_time = &ktime_get, | 74 | .clockid = CLOCK_REALTIME, |
75 | .get_time = &ktime_get_real, | ||
74 | .resolution = KTIME_LOW_RES, | 76 | .resolution = KTIME_LOW_RES, |
75 | }, | 77 | }, |
76 | { | 78 | { |
77 | .index = CLOCK_BOOTTIME, | 79 | .index = HRTIMER_BASE_BOOTTIME, |
80 | .clockid = CLOCK_BOOTTIME, | ||
78 | .get_time = &ktime_get_boottime, | 81 | .get_time = &ktime_get_boottime, |
79 | .resolution = KTIME_LOW_RES, | 82 | .resolution = KTIME_LOW_RES, |
80 | }, | 83 | }, |
@@ -196,7 +199,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, | |||
196 | struct hrtimer_cpu_base *new_cpu_base; | 199 | struct hrtimer_cpu_base *new_cpu_base; |
197 | int this_cpu = smp_processor_id(); | 200 | int this_cpu = smp_processor_id(); |
198 | int cpu = hrtimer_get_target(this_cpu, pinned); | 201 | int cpu = hrtimer_get_target(this_cpu, pinned); |
199 | int basenum = hrtimer_clockid_to_base(base->index); | 202 | int basenum = base->index; |
200 | 203 | ||
201 | again: | 204 | again: |
202 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); | 205 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); |
@@ -621,66 +624,6 @@ static int hrtimer_reprogram(struct hrtimer *timer, | |||
621 | return res; | 624 | return res; |
622 | } | 625 | } |
623 | 626 | ||
624 | |||
625 | /* | ||
626 | * Retrigger next event is called after clock was set | ||
627 | * | ||
628 | * Called with interrupts disabled via on_each_cpu() | ||
629 | */ | ||
630 | static void retrigger_next_event(void *arg) | ||
631 | { | ||
632 | struct hrtimer_cpu_base *base; | ||
633 | struct timespec realtime_offset, wtm, sleep; | ||
634 | |||
635 | if (!hrtimer_hres_active()) | ||
636 | return; | ||
637 | |||
638 | get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm, | ||
639 | &sleep); | ||
640 | set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); | ||
641 | |||
642 | base = &__get_cpu_var(hrtimer_bases); | ||
643 | |||
644 | /* Adjust CLOCK_REALTIME offset */ | ||
645 | raw_spin_lock(&base->lock); | ||
646 | base->clock_base[HRTIMER_BASE_REALTIME].offset = | ||
647 | timespec_to_ktime(realtime_offset); | ||
648 | base->clock_base[HRTIMER_BASE_BOOTTIME].offset = | ||
649 | timespec_to_ktime(sleep); | ||
650 | |||
651 | hrtimer_force_reprogram(base, 0); | ||
652 | raw_spin_unlock(&base->lock); | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * Clock realtime was set | ||
657 | * | ||
658 | * Change the offset of the realtime clock vs. the monotonic | ||
659 | * clock. | ||
660 | * | ||
661 | * We might have to reprogram the high resolution timer interrupt. On | ||
662 | * SMP we call the architecture specific code to retrigger _all_ high | ||
663 | * resolution timer interrupts. On UP we just disable interrupts and | ||
664 | * call the high resolution interrupt code. | ||
665 | */ | ||
666 | void clock_was_set(void) | ||
667 | { | ||
668 | /* Retrigger the CPU local events everywhere */ | ||
669 | on_each_cpu(retrigger_next_event, NULL, 1); | ||
670 | } | ||
671 | |||
672 | /* | ||
673 | * During resume we might have to reprogram the high resolution timer | ||
674 | * interrupt (on the local CPU): | ||
675 | */ | ||
676 | void hres_timers_resume(void) | ||
677 | { | ||
678 | WARN_ONCE(!irqs_disabled(), | ||
679 | KERN_INFO "hres_timers_resume() called with IRQs enabled!"); | ||
680 | |||
681 | retrigger_next_event(NULL); | ||
682 | } | ||
683 | |||
684 | /* | 627 | /* |
685 | * Initialize the high resolution related parts of cpu_base | 628 | * Initialize the high resolution related parts of cpu_base |
686 | */ | 629 | */ |
@@ -715,11 +658,39 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
715 | } | 658 | } |
716 | 659 | ||
717 | /* | 660 | /* |
661 | * Retrigger next event is called after clock was set | ||
662 | * | ||
663 | * Called with interrupts disabled via on_each_cpu() | ||
664 | */ | ||
665 | static void retrigger_next_event(void *arg) | ||
666 | { | ||
667 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | ||
668 | struct timespec realtime_offset, xtim, wtm, sleep; | ||
669 | |||
670 | if (!hrtimer_hres_active()) | ||
671 | return; | ||
672 | |||
673 | /* Optimized out for !HIGH_RES */ | ||
674 | get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); | ||
675 | set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); | ||
676 | |||
677 | /* Adjust CLOCK_REALTIME offset */ | ||
678 | raw_spin_lock(&base->lock); | ||
679 | base->clock_base[HRTIMER_BASE_REALTIME].offset = | ||
680 | timespec_to_ktime(realtime_offset); | ||
681 | base->clock_base[HRTIMER_BASE_BOOTTIME].offset = | ||
682 | timespec_to_ktime(sleep); | ||
683 | |||
684 | hrtimer_force_reprogram(base, 0); | ||
685 | raw_spin_unlock(&base->lock); | ||
686 | } | ||
687 | |||
688 | /* | ||
718 | * Switch to high resolution mode | 689 | * Switch to high resolution mode |
719 | */ | 690 | */ |
720 | static int hrtimer_switch_to_hres(void) | 691 | static int hrtimer_switch_to_hres(void) |
721 | { | 692 | { |
722 | int cpu = smp_processor_id(); | 693 | int i, cpu = smp_processor_id(); |
723 | struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); | 694 | struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); |
724 | unsigned long flags; | 695 | unsigned long flags; |
725 | 696 | ||
@@ -735,9 +706,8 @@ static int hrtimer_switch_to_hres(void) | |||
735 | return 0; | 706 | return 0; |
736 | } | 707 | } |
737 | base->hres_active = 1; | 708 | base->hres_active = 1; |
738 | base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES; | 709 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) |
739 | base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES; | 710 | base->clock_base[i].resolution = KTIME_HIGH_RES; |
740 | base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES; | ||
741 | 711 | ||
742 | tick_setup_sched_timer(); | 712 | tick_setup_sched_timer(); |
743 | 713 | ||
@@ -761,9 +731,43 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
761 | return 0; | 731 | return 0; |
762 | } | 732 | } |
763 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | 733 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } |
734 | static inline void retrigger_next_event(void *arg) { } | ||
764 | 735 | ||
765 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 736 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
766 | 737 | ||
738 | /* | ||
739 | * Clock realtime was set | ||
740 | * | ||
741 | * Change the offset of the realtime clock vs. the monotonic | ||
742 | * clock. | ||
743 | * | ||
744 | * We might have to reprogram the high resolution timer interrupt. On | ||
745 | * SMP we call the architecture specific code to retrigger _all_ high | ||
746 | * resolution timer interrupts. On UP we just disable interrupts and | ||
747 | * call the high resolution interrupt code. | ||
748 | */ | ||
749 | void clock_was_set(void) | ||
750 | { | ||
751 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
752 | /* Retrigger the CPU local events everywhere */ | ||
753 | on_each_cpu(retrigger_next_event, NULL, 1); | ||
754 | #endif | ||
755 | timerfd_clock_was_set(); | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * During resume we might have to reprogram the high resolution timer | ||
760 | * interrupt (on the local CPU): | ||
761 | */ | ||
762 | void hrtimers_resume(void) | ||
763 | { | ||
764 | WARN_ONCE(!irqs_disabled(), | ||
765 | KERN_INFO "hrtimers_resume() called with IRQs enabled!"); | ||
766 | |||
767 | retrigger_next_event(NULL); | ||
768 | timerfd_clock_was_set(); | ||
769 | } | ||
770 | |||
767 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) | 771 | static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) |
768 | { | 772 | { |
769 | #ifdef CONFIG_TIMER_STATS | 773 | #ifdef CONFIG_TIMER_STATS |
@@ -856,6 +860,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, | |||
856 | debug_activate(timer); | 860 | debug_activate(timer); |
857 | 861 | ||
858 | timerqueue_add(&base->active, &timer->node); | 862 | timerqueue_add(&base->active, &timer->node); |
863 | base->cpu_base->active_bases |= 1 << base->index; | ||
859 | 864 | ||
860 | /* | 865 | /* |
861 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the | 866 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the |
@@ -897,6 +902,8 @@ static void __remove_hrtimer(struct hrtimer *timer, | |||
897 | #endif | 902 | #endif |
898 | } | 903 | } |
899 | timerqueue_del(&base->active, &timer->node); | 904 | timerqueue_del(&base->active, &timer->node); |
905 | if (!timerqueue_getnext(&base->active)) | ||
906 | base->cpu_base->active_bases &= ~(1 << base->index); | ||
900 | out: | 907 | out: |
901 | timer->state = newstate; | 908 | timer->state = newstate; |
902 | } | 909 | } |
@@ -1234,7 +1241,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) | |||
1234 | void hrtimer_interrupt(struct clock_event_device *dev) | 1241 | void hrtimer_interrupt(struct clock_event_device *dev) |
1235 | { | 1242 | { |
1236 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 1243 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
1237 | struct hrtimer_clock_base *base; | ||
1238 | ktime_t expires_next, now, entry_time, delta; | 1244 | ktime_t expires_next, now, entry_time, delta; |
1239 | int i, retries = 0; | 1245 | int i, retries = 0; |
1240 | 1246 | ||
@@ -1256,12 +1262,15 @@ retry: | |||
1256 | */ | 1262 | */ |
1257 | cpu_base->expires_next.tv64 = KTIME_MAX; | 1263 | cpu_base->expires_next.tv64 = KTIME_MAX; |
1258 | 1264 | ||
1259 | base = cpu_base->clock_base; | ||
1260 | |||
1261 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1265 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1262 | ktime_t basenow; | 1266 | struct hrtimer_clock_base *base; |
1263 | struct timerqueue_node *node; | 1267 | struct timerqueue_node *node; |
1268 | ktime_t basenow; | ||
1269 | |||
1270 | if (!(cpu_base->active_bases & (1 << i))) | ||
1271 | continue; | ||
1264 | 1272 | ||
1273 | base = cpu_base->clock_base + i; | ||
1265 | basenow = ktime_add(now, base->offset); | 1274 | basenow = ktime_add(now, base->offset); |
1266 | 1275 | ||
1267 | while ((node = timerqueue_getnext(&base->active))) { | 1276 | while ((node = timerqueue_getnext(&base->active))) { |
@@ -1294,7 +1303,6 @@ retry: | |||
1294 | 1303 | ||
1295 | __run_hrtimer(timer, &basenow); | 1304 | __run_hrtimer(timer, &basenow); |
1296 | } | 1305 | } |
1297 | base++; | ||
1298 | } | 1306 | } |
1299 | 1307 | ||
1300 | /* | 1308 | /* |
@@ -1525,7 +1533,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
1525 | struct timespec __user *rmtp; | 1533 | struct timespec __user *rmtp; |
1526 | int ret = 0; | 1534 | int ret = 0; |
1527 | 1535 | ||
1528 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, | 1536 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, |
1529 | HRTIMER_MODE_ABS); | 1537 | HRTIMER_MODE_ABS); |
1530 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | 1538 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); |
1531 | 1539 | ||
@@ -1577,7 +1585,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
1577 | 1585 | ||
1578 | restart = ¤t_thread_info()->restart_block; | 1586 | restart = ¤t_thread_info()->restart_block; |
1579 | restart->fn = hrtimer_nanosleep_restart; | 1587 | restart->fn = hrtimer_nanosleep_restart; |
1580 | restart->nanosleep.index = t.timer.base->index; | 1588 | restart->nanosleep.clockid = t.timer.base->clockid; |
1581 | restart->nanosleep.rmtp = rmtp; | 1589 | restart->nanosleep.rmtp = rmtp; |
1582 | restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); | 1590 | restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); |
1583 | 1591 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 834899f2500f..64e3df6ab1ef 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
19 | 19 | ||
20 | #ifdef CONFIG_SMP | 20 | #ifdef CONFIG_SMP |
21 | 21 | ||
22 | static int irq_affinity_proc_show(struct seq_file *m, void *v) | 22 | static int show_irq_affinity(int type, struct seq_file *m, void *v) |
23 | { | 23 | { |
24 | struct irq_desc *desc = irq_to_desc((long)m->private); | 24 | struct irq_desc *desc = irq_to_desc((long)m->private); |
25 | const struct cpumask *mask = desc->irq_data.affinity; | 25 | const struct cpumask *mask = desc->irq_data.affinity; |
@@ -28,7 +28,10 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v) | |||
28 | if (irqd_is_setaffinity_pending(&desc->irq_data)) | 28 | if (irqd_is_setaffinity_pending(&desc->irq_data)) |
29 | mask = desc->pending_mask; | 29 | mask = desc->pending_mask; |
30 | #endif | 30 | #endif |
31 | seq_cpumask(m, mask); | 31 | if (type) |
32 | seq_cpumask_list(m, mask); | ||
33 | else | ||
34 | seq_cpumask(m, mask); | ||
32 | seq_putc(m, '\n'); | 35 | seq_putc(m, '\n'); |
33 | return 0; | 36 | return 0; |
34 | } | 37 | } |
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) | |||
59 | #endif | 62 | #endif |
60 | 63 | ||
61 | int no_irq_affinity; | 64 | int no_irq_affinity; |
62 | static ssize_t irq_affinity_proc_write(struct file *file, | 65 | static int irq_affinity_proc_show(struct seq_file *m, void *v) |
66 | { | ||
67 | return show_irq_affinity(0, m, v); | ||
68 | } | ||
69 | |||
70 | static int irq_affinity_list_proc_show(struct seq_file *m, void *v) | ||
71 | { | ||
72 | return show_irq_affinity(1, m, v); | ||
73 | } | ||
74 | |||
75 | |||
76 | static ssize_t write_irq_affinity(int type, struct file *file, | ||
63 | const char __user *buffer, size_t count, loff_t *pos) | 77 | const char __user *buffer, size_t count, loff_t *pos) |
64 | { | 78 | { |
65 | unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; | 79 | unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; |
@@ -72,7 +86,10 @@ static ssize_t irq_affinity_proc_write(struct file *file, | |||
72 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) | 86 | if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) |
73 | return -ENOMEM; | 87 | return -ENOMEM; |
74 | 88 | ||
75 | err = cpumask_parse_user(buffer, count, new_value); | 89 | if (type) |
90 | err = cpumask_parselist_user(buffer, count, new_value); | ||
91 | else | ||
92 | err = cpumask_parse_user(buffer, count, new_value); | ||
76 | if (err) | 93 | if (err) |
77 | goto free_cpumask; | 94 | goto free_cpumask; |
78 | 95 | ||
@@ -100,11 +117,28 @@ free_cpumask: | |||
100 | return err; | 117 | return err; |
101 | } | 118 | } |
102 | 119 | ||
120 | static ssize_t irq_affinity_proc_write(struct file *file, | ||
121 | const char __user *buffer, size_t count, loff_t *pos) | ||
122 | { | ||
123 | return write_irq_affinity(0, file, buffer, count, pos); | ||
124 | } | ||
125 | |||
126 | static ssize_t irq_affinity_list_proc_write(struct file *file, | ||
127 | const char __user *buffer, size_t count, loff_t *pos) | ||
128 | { | ||
129 | return write_irq_affinity(1, file, buffer, count, pos); | ||
130 | } | ||
131 | |||
103 | static int irq_affinity_proc_open(struct inode *inode, struct file *file) | 132 | static int irq_affinity_proc_open(struct inode *inode, struct file *file) |
104 | { | 133 | { |
105 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); | 134 | return single_open(file, irq_affinity_proc_show, PDE(inode)->data); |
106 | } | 135 | } |
107 | 136 | ||
137 | static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) | ||
138 | { | ||
139 | return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data); | ||
140 | } | ||
141 | |||
108 | static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) | 142 | static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) |
109 | { | 143 | { |
110 | return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); | 144 | return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); |
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = { | |||
125 | .release = single_release, | 159 | .release = single_release, |
126 | }; | 160 | }; |
127 | 161 | ||
162 | static const struct file_operations irq_affinity_list_proc_fops = { | ||
163 | .open = irq_affinity_list_proc_open, | ||
164 | .read = seq_read, | ||
165 | .llseek = seq_lseek, | ||
166 | .release = single_release, | ||
167 | .write = irq_affinity_list_proc_write, | ||
168 | }; | ||
169 | |||
128 | static int default_affinity_show(struct seq_file *m, void *v) | 170 | static int default_affinity_show(struct seq_file *m, void *v) |
129 | { | 171 | { |
130 | seq_cpumask(m, irq_default_affinity); | 172 | seq_cpumask(m, irq_default_affinity); |
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
289 | proc_create_data("affinity_hint", 0400, desc->dir, | 331 | proc_create_data("affinity_hint", 0400, desc->dir, |
290 | &irq_affinity_hint_proc_fops, (void *)(long)irq); | 332 | &irq_affinity_hint_proc_fops, (void *)(long)irq); |
291 | 333 | ||
334 | /* create /proc/irq/<irq>/smp_affinity_list */ | ||
335 | proc_create_data("smp_affinity_list", 0600, desc->dir, | ||
336 | &irq_affinity_list_proc_fops, (void *)(long)irq); | ||
337 | |||
292 | proc_create_data("node", 0444, desc->dir, | 338 | proc_create_data("node", 0444, desc->dir, |
293 | &irq_node_proc_fops, (void *)(long)irq); | 339 | &irq_node_proc_fops, (void *)(long)irq); |
294 | #endif | 340 | #endif |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 5ae0ff38425f..ad6a81c58b44 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kmod.h> | 25 | #include <linux/kmod.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/completion.h> | 27 | #include <linux/completion.h> |
28 | #include <linux/cred.h> | ||
28 | #include <linux/file.h> | 29 | #include <linux/file.h> |
29 | #include <linux/fdtable.h> | 30 | #include <linux/fdtable.h> |
30 | #include <linux/workqueue.h> | 31 | #include <linux/workqueue.h> |
@@ -43,6 +44,13 @@ extern int max_threads; | |||
43 | 44 | ||
44 | static struct workqueue_struct *khelper_wq; | 45 | static struct workqueue_struct *khelper_wq; |
45 | 46 | ||
47 | #define CAP_BSET (void *)1 | ||
48 | #define CAP_PI (void *)2 | ||
49 | |||
50 | static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; | ||
51 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; | ||
52 | static DEFINE_SPINLOCK(umh_sysctl_lock); | ||
53 | |||
46 | #ifdef CONFIG_MODULES | 54 | #ifdef CONFIG_MODULES |
47 | 55 | ||
48 | /* | 56 | /* |
@@ -132,6 +140,7 @@ EXPORT_SYMBOL(__request_module); | |||
132 | static int ____call_usermodehelper(void *data) | 140 | static int ____call_usermodehelper(void *data) |
133 | { | 141 | { |
134 | struct subprocess_info *sub_info = data; | 142 | struct subprocess_info *sub_info = data; |
143 | struct cred *new; | ||
135 | int retval; | 144 | int retval; |
136 | 145 | ||
137 | spin_lock_irq(¤t->sighand->siglock); | 146 | spin_lock_irq(¤t->sighand->siglock); |
@@ -153,6 +162,19 @@ static int ____call_usermodehelper(void *data) | |||
153 | goto fail; | 162 | goto fail; |
154 | } | 163 | } |
155 | 164 | ||
165 | retval = -ENOMEM; | ||
166 | new = prepare_kernel_cred(current); | ||
167 | if (!new) | ||
168 | goto fail; | ||
169 | |||
170 | spin_lock(&umh_sysctl_lock); | ||
171 | new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); | ||
172 | new->cap_inheritable = cap_intersect(usermodehelper_inheritable, | ||
173 | new->cap_inheritable); | ||
174 | spin_unlock(&umh_sysctl_lock); | ||
175 | |||
176 | commit_creds(new); | ||
177 | |||
156 | retval = kernel_execve(sub_info->path, | 178 | retval = kernel_execve(sub_info->path, |
157 | (const char *const *)sub_info->argv, | 179 | (const char *const *)sub_info->argv, |
158 | (const char *const *)sub_info->envp); | 180 | (const char *const *)sub_info->envp); |
@@ -420,6 +442,84 @@ unlock: | |||
420 | } | 442 | } |
421 | EXPORT_SYMBOL(call_usermodehelper_exec); | 443 | EXPORT_SYMBOL(call_usermodehelper_exec); |
422 | 444 | ||
445 | static int proc_cap_handler(struct ctl_table *table, int write, | ||
446 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
447 | { | ||
448 | struct ctl_table t; | ||
449 | unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; | ||
450 | kernel_cap_t new_cap; | ||
451 | int err, i; | ||
452 | |||
453 | if (write && (!capable(CAP_SETPCAP) || | ||
454 | !capable(CAP_SYS_MODULE))) | ||
455 | return -EPERM; | ||
456 | |||
457 | /* | ||
458 | * convert from the global kernel_cap_t to the ulong array to print to | ||
459 | * userspace if this is a read. | ||
460 | */ | ||
461 | spin_lock(&umh_sysctl_lock); | ||
462 | for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { | ||
463 | if (table->data == CAP_BSET) | ||
464 | cap_array[i] = usermodehelper_bset.cap[i]; | ||
465 | else if (table->data == CAP_PI) | ||
466 | cap_array[i] = usermodehelper_inheritable.cap[i]; | ||
467 | else | ||
468 | BUG(); | ||
469 | } | ||
470 | spin_unlock(&umh_sysctl_lock); | ||
471 | |||
472 | t = *table; | ||
473 | t.data = &cap_array; | ||
474 | |||
475 | /* | ||
476 | * actually read or write and array of ulongs from userspace. Remember | ||
477 | * these are least significant 32 bits first | ||
478 | */ | ||
479 | err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); | ||
480 | if (err < 0) | ||
481 | return err; | ||
482 | |||
483 | /* | ||
484 | * convert from the sysctl array of ulongs to the kernel_cap_t | ||
485 | * internal representation | ||
486 | */ | ||
487 | for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) | ||
488 | new_cap.cap[i] = cap_array[i]; | ||
489 | |||
490 | /* | ||
491 | * Drop everything not in the new_cap (but don't add things) | ||
492 | */ | ||
493 | spin_lock(&umh_sysctl_lock); | ||
494 | if (write) { | ||
495 | if (table->data == CAP_BSET) | ||
496 | usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); | ||
497 | if (table->data == CAP_PI) | ||
498 | usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); | ||
499 | } | ||
500 | spin_unlock(&umh_sysctl_lock); | ||
501 | |||
502 | return 0; | ||
503 | } | ||
504 | |||
505 | struct ctl_table usermodehelper_table[] = { | ||
506 | { | ||
507 | .procname = "bset", | ||
508 | .data = CAP_BSET, | ||
509 | .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), | ||
510 | .mode = 0600, | ||
511 | .proc_handler = proc_cap_handler, | ||
512 | }, | ||
513 | { | ||
514 | .procname = "inheritable", | ||
515 | .data = CAP_PI, | ||
516 | .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), | ||
517 | .mode = 0600, | ||
518 | .proc_handler = proc_cap_handler, | ||
519 | }, | ||
520 | { } | ||
521 | }; | ||
522 | |||
423 | void __init usermodehelper_init(void) | 523 | void __init usermodehelper_init(void) |
424 | { | 524 | { |
425 | khelper_wq = create_singlethread_workqueue("khelper"); | 525 | khelper_wq = create_singlethread_workqueue("khelper"); |
diff --git a/kernel/module.c b/kernel/module.c index 22879725678d..795bdc7f5c3f 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2812,7 +2812,7 @@ static struct module *load_module(void __user *umod, | |||
2812 | } | 2812 | } |
2813 | 2813 | ||
2814 | /* This has to be done once we're sure module name is unique. */ | 2814 | /* This has to be done once we're sure module name is unique. */ |
2815 | if (!mod->taints) | 2815 | if (!mod->taints || mod->taints == (1U<<TAINT_CRAP)) |
2816 | dynamic_debug_setup(info.debug, info.num_debug); | 2816 | dynamic_debug_setup(info.debug, info.num_debug); |
2817 | 2817 | ||
2818 | /* Find duplicate symbols */ | 2818 | /* Find duplicate symbols */ |
@@ -2849,7 +2849,7 @@ static struct module *load_module(void __user *umod, | |||
2849 | module_bug_cleanup(mod); | 2849 | module_bug_cleanup(mod); |
2850 | 2850 | ||
2851 | ddebug: | 2851 | ddebug: |
2852 | if (!mod->taints) | 2852 | if (!mod->taints || mod->taints == (1U<<TAINT_CRAP)) |
2853 | dynamic_debug_remove(info.debug); | 2853 | dynamic_debug_remove(info.debug); |
2854 | unlock: | 2854 | unlock: |
2855 | mutex_unlock(&module_mutex); | 2855 | mutex_unlock(&module_mutex); |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 2c938e2337cd..d607ed5dd441 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -131,14 +131,14 @@ EXPORT_SYMBOL(mutex_unlock); | |||
131 | */ | 131 | */ |
132 | static inline int __sched | 132 | static inline int __sched |
133 | __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | 133 | __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, |
134 | unsigned long ip) | 134 | struct lockdep_map *nest_lock, unsigned long ip) |
135 | { | 135 | { |
136 | struct task_struct *task = current; | 136 | struct task_struct *task = current; |
137 | struct mutex_waiter waiter; | 137 | struct mutex_waiter waiter; |
138 | unsigned long flags; | 138 | unsigned long flags; |
139 | 139 | ||
140 | preempt_disable(); | 140 | preempt_disable(); |
141 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | 141 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); |
142 | 142 | ||
143 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 143 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
144 | /* | 144 | /* |
@@ -269,16 +269,25 @@ void __sched | |||
269 | mutex_lock_nested(struct mutex *lock, unsigned int subclass) | 269 | mutex_lock_nested(struct mutex *lock, unsigned int subclass) |
270 | { | 270 | { |
271 | might_sleep(); | 271 | might_sleep(); |
272 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); | 272 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); |
273 | } | 273 | } |
274 | 274 | ||
275 | EXPORT_SYMBOL_GPL(mutex_lock_nested); | 275 | EXPORT_SYMBOL_GPL(mutex_lock_nested); |
276 | 276 | ||
277 | void __sched | ||
278 | _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | ||
279 | { | ||
280 | might_sleep(); | ||
281 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_); | ||
282 | } | ||
283 | |||
284 | EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); | ||
285 | |||
277 | int __sched | 286 | int __sched |
278 | mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) | 287 | mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) |
279 | { | 288 | { |
280 | might_sleep(); | 289 | might_sleep(); |
281 | return __mutex_lock_common(lock, TASK_KILLABLE, subclass, _RET_IP_); | 290 | return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); |
282 | } | 291 | } |
283 | EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); | 292 | EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); |
284 | 293 | ||
@@ -287,7 +296,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) | |||
287 | { | 296 | { |
288 | might_sleep(); | 297 | might_sleep(); |
289 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, | 298 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, |
290 | subclass, _RET_IP_); | 299 | subclass, NULL, _RET_IP_); |
291 | } | 300 | } |
292 | 301 | ||
293 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); | 302 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); |
@@ -393,7 +402,7 @@ __mutex_lock_slowpath(atomic_t *lock_count) | |||
393 | { | 402 | { |
394 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 403 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
395 | 404 | ||
396 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); | 405 | __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); |
397 | } | 406 | } |
398 | 407 | ||
399 | static noinline int __sched | 408 | static noinline int __sched |
@@ -401,7 +410,7 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count) | |||
401 | { | 410 | { |
402 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 411 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
403 | 412 | ||
404 | return __mutex_lock_common(lock, TASK_KILLABLE, 0, _RET_IP_); | 413 | return __mutex_lock_common(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); |
405 | } | 414 | } |
406 | 415 | ||
407 | static noinline int __sched | 416 | static noinline int __sched |
@@ -409,7 +418,7 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count) | |||
409 | { | 418 | { |
410 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 419 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
411 | 420 | ||
412 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); | 421 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); |
413 | } | 422 | } |
414 | #endif | 423 | #endif |
415 | 424 | ||
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index a05d191ffdd9..5424e37673ed 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -22,6 +22,9 @@ | |||
22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
23 | #include <net/net_namespace.h> | 23 | #include <net/net_namespace.h> |
24 | #include <linux/ipc_namespace.h> | 24 | #include <linux/ipc_namespace.h> |
25 | #include <linux/proc_fs.h> | ||
26 | #include <linux/file.h> | ||
27 | #include <linux/syscalls.h> | ||
25 | 28 | ||
26 | static struct kmem_cache *nsproxy_cachep; | 29 | static struct kmem_cache *nsproxy_cachep; |
27 | 30 | ||
@@ -233,6 +236,45 @@ void exit_task_namespaces(struct task_struct *p) | |||
233 | switch_task_namespaces(p, NULL); | 236 | switch_task_namespaces(p, NULL); |
234 | } | 237 | } |
235 | 238 | ||
239 | SYSCALL_DEFINE2(setns, int, fd, int, nstype) | ||
240 | { | ||
241 | const struct proc_ns_operations *ops; | ||
242 | struct task_struct *tsk = current; | ||
243 | struct nsproxy *new_nsproxy; | ||
244 | struct proc_inode *ei; | ||
245 | struct file *file; | ||
246 | int err; | ||
247 | |||
248 | if (!capable(CAP_SYS_ADMIN)) | ||
249 | return -EPERM; | ||
250 | |||
251 | file = proc_ns_fget(fd); | ||
252 | if (IS_ERR(file)) | ||
253 | return PTR_ERR(file); | ||
254 | |||
255 | err = -EINVAL; | ||
256 | ei = PROC_I(file->f_dentry->d_inode); | ||
257 | ops = ei->ns_ops; | ||
258 | if (nstype && (ops->type != nstype)) | ||
259 | goto out; | ||
260 | |||
261 | new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); | ||
262 | if (IS_ERR(new_nsproxy)) { | ||
263 | err = PTR_ERR(new_nsproxy); | ||
264 | goto out; | ||
265 | } | ||
266 | |||
267 | err = ops->install(new_nsproxy, ei->ns); | ||
268 | if (err) { | ||
269 | free_nsproxy(new_nsproxy); | ||
270 | goto out; | ||
271 | } | ||
272 | switch_task_namespaces(tsk, new_nsproxy); | ||
273 | out: | ||
274 | fput(file); | ||
275 | return err; | ||
276 | } | ||
277 | |||
236 | static int __init nsproxy_cache_init(void) | 278 | static int __init nsproxy_cache_init(void) |
237 | { | 279 | { |
238 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); | 280 | nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 0da058bff8eb..beb184689af9 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -385,7 +385,7 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, | |||
385 | s32 value; | 385 | s32 value; |
386 | unsigned long flags; | 386 | unsigned long flags; |
387 | struct pm_qos_object *o; | 387 | struct pm_qos_object *o; |
388 | struct pm_qos_request_list *pm_qos_req = filp->private_data;; | 388 | struct pm_qos_request_list *pm_qos_req = filp->private_data; |
389 | 389 | ||
390 | if (!pm_qos_req) | 390 | if (!pm_qos_req) |
391 | return -EINVAL; | 391 | return -EINVAL; |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 0791b13df7bf..58f405b581e7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -1514,7 +1514,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, | |||
1514 | return -EFAULT; | 1514 | return -EFAULT; |
1515 | 1515 | ||
1516 | restart_block->fn = posix_cpu_nsleep_restart; | 1516 | restart_block->fn = posix_cpu_nsleep_restart; |
1517 | restart_block->nanosleep.index = which_clock; | 1517 | restart_block->nanosleep.clockid = which_clock; |
1518 | restart_block->nanosleep.rmtp = rmtp; | 1518 | restart_block->nanosleep.rmtp = rmtp; |
1519 | restart_block->nanosleep.expires = timespec_to_ns(rqtp); | 1519 | restart_block->nanosleep.expires = timespec_to_ns(rqtp); |
1520 | } | 1520 | } |
@@ -1523,7 +1523,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, | |||
1523 | 1523 | ||
1524 | static long posix_cpu_nsleep_restart(struct restart_block *restart_block) | 1524 | static long posix_cpu_nsleep_restart(struct restart_block *restart_block) |
1525 | { | 1525 | { |
1526 | clockid_t which_clock = restart_block->nanosleep.index; | 1526 | clockid_t which_clock = restart_block->nanosleep.clockid; |
1527 | struct timespec t; | 1527 | struct timespec t; |
1528 | struct itimerspec it; | 1528 | struct itimerspec it; |
1529 | int error; | 1529 | int error; |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index e5498d7405c3..4556182527f3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void) | |||
491 | return tmr; | 491 | return tmr; |
492 | } | 492 | } |
493 | 493 | ||
494 | static void k_itimer_rcu_free(struct rcu_head *head) | ||
495 | { | ||
496 | struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); | ||
497 | |||
498 | kmem_cache_free(posix_timers_cache, tmr); | ||
499 | } | ||
500 | |||
494 | #define IT_ID_SET 1 | 501 | #define IT_ID_SET 1 |
495 | #define IT_ID_NOT_SET 0 | 502 | #define IT_ID_NOT_SET 0 |
496 | static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | 503 | static void release_posix_timer(struct k_itimer *tmr, int it_id_set) |
@@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | |||
503 | } | 510 | } |
504 | put_pid(tmr->it_pid); | 511 | put_pid(tmr->it_pid); |
505 | sigqueue_free(tmr->sigq); | 512 | sigqueue_free(tmr->sigq); |
506 | kmem_cache_free(posix_timers_cache, tmr); | 513 | call_rcu(&tmr->it.rcu, k_itimer_rcu_free); |
507 | } | 514 | } |
508 | 515 | ||
509 | static struct k_clock *clockid_to_kclock(const clockid_t id) | 516 | static struct k_clock *clockid_to_kclock(const clockid_t id) |
@@ -631,22 +638,18 @@ out: | |||
631 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) | 638 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) |
632 | { | 639 | { |
633 | struct k_itimer *timr; | 640 | struct k_itimer *timr; |
634 | /* | 641 | |
635 | * Watch out here. We do a irqsave on the idr_lock and pass the | 642 | rcu_read_lock(); |
636 | * flags part over to the timer lock. Must not let interrupts in | ||
637 | * while we are moving the lock. | ||
638 | */ | ||
639 | spin_lock_irqsave(&idr_lock, *flags); | ||
640 | timr = idr_find(&posix_timers_id, (int)timer_id); | 643 | timr = idr_find(&posix_timers_id, (int)timer_id); |
641 | if (timr) { | 644 | if (timr) { |
642 | spin_lock(&timr->it_lock); | 645 | spin_lock_irqsave(&timr->it_lock, *flags); |
643 | if (timr->it_signal == current->signal) { | 646 | if (timr->it_signal == current->signal) { |
644 | spin_unlock(&idr_lock); | 647 | rcu_read_unlock(); |
645 | return timr; | 648 | return timr; |
646 | } | 649 | } |
647 | spin_unlock(&timr->it_lock); | 650 | spin_unlock_irqrestore(&timr->it_lock, *flags); |
648 | } | 651 | } |
649 | spin_unlock_irqrestore(&idr_lock, *flags); | 652 | rcu_read_unlock(); |
650 | 653 | ||
651 | return NULL; | 654 | return NULL; |
652 | } | 655 | } |
@@ -1056,7 +1059,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, | |||
1056 | */ | 1059 | */ |
1057 | long clock_nanosleep_restart(struct restart_block *restart_block) | 1060 | long clock_nanosleep_restart(struct restart_block *restart_block) |
1058 | { | 1061 | { |
1059 | clockid_t which_clock = restart_block->nanosleep.index; | 1062 | clockid_t which_clock = restart_block->nanosleep.clockid; |
1060 | struct k_clock *kc = clockid_to_kclock(which_clock); | 1063 | struct k_clock *kc = clockid_to_kclock(which_clock); |
1061 | 1064 | ||
1062 | if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) | 1065 | if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) |
diff --git a/kernel/printk.c b/kernel/printk.c index da8ca817eae3..35185392173f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/smp.h> | 31 | #include <linux/smp.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
34 | #include <linux/memblock.h> | ||
34 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
35 | #include <linux/kexec.h> | 36 | #include <linux/kexec.h> |
36 | #include <linux/kdb.h> | 37 | #include <linux/kdb.h> |
@@ -167,46 +168,74 @@ void log_buf_kexec_setup(void) | |||
167 | } | 168 | } |
168 | #endif | 169 | #endif |
169 | 170 | ||
171 | /* requested log_buf_len from kernel cmdline */ | ||
172 | static unsigned long __initdata new_log_buf_len; | ||
173 | |||
174 | /* save requested log_buf_len since it's too early to process it */ | ||
170 | static int __init log_buf_len_setup(char *str) | 175 | static int __init log_buf_len_setup(char *str) |
171 | { | 176 | { |
172 | unsigned size = memparse(str, &str); | 177 | unsigned size = memparse(str, &str); |
173 | unsigned long flags; | ||
174 | 178 | ||
175 | if (size) | 179 | if (size) |
176 | size = roundup_pow_of_two(size); | 180 | size = roundup_pow_of_two(size); |
177 | if (size > log_buf_len) { | 181 | if (size > log_buf_len) |
178 | unsigned start, dest_idx, offset; | 182 | new_log_buf_len = size; |
179 | char *new_log_buf; | ||
180 | 183 | ||
181 | new_log_buf = alloc_bootmem(size); | 184 | return 0; |
182 | if (!new_log_buf) { | 185 | } |
183 | printk(KERN_WARNING "log_buf_len: allocation failed\n"); | 186 | early_param("log_buf_len", log_buf_len_setup); |
184 | goto out; | ||
185 | } | ||
186 | 187 | ||
187 | spin_lock_irqsave(&logbuf_lock, flags); | 188 | void __init setup_log_buf(int early) |
188 | log_buf_len = size; | 189 | { |
189 | log_buf = new_log_buf; | 190 | unsigned long flags; |
190 | 191 | unsigned start, dest_idx, offset; | |
191 | offset = start = min(con_start, log_start); | 192 | char *new_log_buf; |
192 | dest_idx = 0; | 193 | int free; |
193 | while (start != log_end) { | 194 | |
194 | log_buf[dest_idx] = __log_buf[start & (__LOG_BUF_LEN - 1)]; | 195 | if (!new_log_buf_len) |
195 | start++; | 196 | return; |
196 | dest_idx++; | 197 | |
197 | } | 198 | if (early) { |
198 | log_start -= offset; | 199 | unsigned long mem; |
199 | con_start -= offset; | ||
200 | log_end -= offset; | ||
201 | spin_unlock_irqrestore(&logbuf_lock, flags); | ||
202 | 200 | ||
203 | printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len); | 201 | mem = memblock_alloc(new_log_buf_len, PAGE_SIZE); |
202 | if (mem == MEMBLOCK_ERROR) | ||
203 | return; | ||
204 | new_log_buf = __va(mem); | ||
205 | } else { | ||
206 | new_log_buf = alloc_bootmem_nopanic(new_log_buf_len); | ||
204 | } | 207 | } |
205 | out: | ||
206 | return 1; | ||
207 | } | ||
208 | 208 | ||
209 | __setup("log_buf_len=", log_buf_len_setup); | 209 | if (unlikely(!new_log_buf)) { |
210 | pr_err("log_buf_len: %ld bytes not available\n", | ||
211 | new_log_buf_len); | ||
212 | return; | ||
213 | } | ||
214 | |||
215 | spin_lock_irqsave(&logbuf_lock, flags); | ||
216 | log_buf_len = new_log_buf_len; | ||
217 | log_buf = new_log_buf; | ||
218 | new_log_buf_len = 0; | ||
219 | free = __LOG_BUF_LEN - log_end; | ||
220 | |||
221 | offset = start = min(con_start, log_start); | ||
222 | dest_idx = 0; | ||
223 | while (start != log_end) { | ||
224 | unsigned log_idx_mask = start & (__LOG_BUF_LEN - 1); | ||
225 | |||
226 | log_buf[dest_idx] = __log_buf[log_idx_mask]; | ||
227 | start++; | ||
228 | dest_idx++; | ||
229 | } | ||
230 | log_start -= offset; | ||
231 | con_start -= offset; | ||
232 | log_end -= offset; | ||
233 | spin_unlock_irqrestore(&logbuf_lock, flags); | ||
234 | |||
235 | pr_info("log_buf_len: %d\n", log_buf_len); | ||
236 | pr_info("early log buf free: %d(%d%%)\n", | ||
237 | free, (free * 100) / __LOG_BUF_LEN); | ||
238 | } | ||
210 | 239 | ||
211 | #ifdef CONFIG_BOOT_PRINTK_DELAY | 240 | #ifdef CONFIG_BOOT_PRINTK_DELAY |
212 | 241 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index dc7ab65f3b36..2df115790cd9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -38,35 +38,33 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) | |||
38 | child->parent = new_parent; | 38 | child->parent = new_parent; |
39 | } | 39 | } |
40 | 40 | ||
41 | /* | 41 | /** |
42 | * Turn a tracing stop into a normal stop now, since with no tracer there | 42 | * __ptrace_unlink - unlink ptracee and restore its execution state |
43 | * would be no way to wake it up with SIGCONT or SIGKILL. If there was a | 43 | * @child: ptracee to be unlinked |
44 | * signal sent that would resume the child, but didn't because it was in | ||
45 | * TASK_TRACED, resume it now. | ||
46 | * Requires that irqs be disabled. | ||
47 | */ | ||
48 | static void ptrace_untrace(struct task_struct *child) | ||
49 | { | ||
50 | spin_lock(&child->sighand->siglock); | ||
51 | if (task_is_traced(child)) { | ||
52 | /* | ||
53 | * If the group stop is completed or in progress, | ||
54 | * this thread was already counted as stopped. | ||
55 | */ | ||
56 | if (child->signal->flags & SIGNAL_STOP_STOPPED || | ||
57 | child->signal->group_stop_count) | ||
58 | __set_task_state(child, TASK_STOPPED); | ||
59 | else | ||
60 | signal_wake_up(child, 1); | ||
61 | } | ||
62 | spin_unlock(&child->sighand->siglock); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * unptrace a task: move it back to its original parent and | ||
67 | * remove it from the ptrace list. | ||
68 | * | 44 | * |
69 | * Must be called with the tasklist lock write-held. | 45 | * Remove @child from the ptrace list, move it back to the original parent, |
46 | * and restore the execution state so that it conforms to the group stop | ||
47 | * state. | ||
48 | * | ||
49 | * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer | ||
50 | * exiting. For PTRACE_DETACH, unless the ptracee has been killed between | ||
51 | * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED. | ||
52 | * If the ptracer is exiting, the ptracee can be in any state. | ||
53 | * | ||
54 | * After detach, the ptracee should be in a state which conforms to the | ||
55 | * group stop. If the group is stopped or in the process of stopping, the | ||
56 | * ptracee should be put into TASK_STOPPED; otherwise, it should be woken | ||
57 | * up from TASK_TRACED. | ||
58 | * | ||
59 | * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED, | ||
60 | * it goes through TRACED -> RUNNING -> STOPPED transition which is similar | ||
61 | * to but in the opposite direction of what happens while attaching to a | ||
62 | * stopped task. However, in this direction, the intermediate RUNNING | ||
63 | * state is not hidden even from the current ptracer and if it immediately | ||
64 | * re-attaches and performs a WNOHANG wait(2), it may fail. | ||
65 | * | ||
66 | * CONTEXT: | ||
67 | * write_lock_irq(tasklist_lock) | ||
70 | */ | 68 | */ |
71 | void __ptrace_unlink(struct task_struct *child) | 69 | void __ptrace_unlink(struct task_struct *child) |
72 | { | 70 | { |
@@ -76,8 +74,27 @@ void __ptrace_unlink(struct task_struct *child) | |||
76 | child->parent = child->real_parent; | 74 | child->parent = child->real_parent; |
77 | list_del_init(&child->ptrace_entry); | 75 | list_del_init(&child->ptrace_entry); |
78 | 76 | ||
79 | if (task_is_traced(child)) | 77 | spin_lock(&child->sighand->siglock); |
80 | ptrace_untrace(child); | 78 | |
79 | /* | ||
80 | * Reinstate GROUP_STOP_PENDING if group stop is in effect and | ||
81 | * @child isn't dead. | ||
82 | */ | ||
83 | if (!(child->flags & PF_EXITING) && | ||
84 | (child->signal->flags & SIGNAL_STOP_STOPPED || | ||
85 | child->signal->group_stop_count)) | ||
86 | child->group_stop |= GROUP_STOP_PENDING; | ||
87 | |||
88 | /* | ||
89 | * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick | ||
90 | * @child in the butt. Note that @resume should be used iff @child | ||
91 | * is in TASK_TRACED; otherwise, we might unduly disrupt | ||
92 | * TASK_KILLABLE sleeps. | ||
93 | */ | ||
94 | if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child)) | ||
95 | signal_wake_up(child, task_is_traced(child)); | ||
96 | |||
97 | spin_unlock(&child->sighand->siglock); | ||
81 | } | 98 | } |
82 | 99 | ||
83 | /* | 100 | /* |
@@ -96,16 +113,14 @@ int ptrace_check_attach(struct task_struct *child, int kill) | |||
96 | */ | 113 | */ |
97 | read_lock(&tasklist_lock); | 114 | read_lock(&tasklist_lock); |
98 | if ((child->ptrace & PT_PTRACED) && child->parent == current) { | 115 | if ((child->ptrace & PT_PTRACED) && child->parent == current) { |
99 | ret = 0; | ||
100 | /* | 116 | /* |
101 | * child->sighand can't be NULL, release_task() | 117 | * child->sighand can't be NULL, release_task() |
102 | * does ptrace_unlink() before __exit_signal(). | 118 | * does ptrace_unlink() before __exit_signal(). |
103 | */ | 119 | */ |
104 | spin_lock_irq(&child->sighand->siglock); | 120 | spin_lock_irq(&child->sighand->siglock); |
105 | if (task_is_stopped(child)) | 121 | WARN_ON_ONCE(task_is_stopped(child)); |
106 | child->state = TASK_TRACED; | 122 | if (task_is_traced(child) || kill) |
107 | else if (!task_is_traced(child) && !kill) | 123 | ret = 0; |
108 | ret = -ESRCH; | ||
109 | spin_unlock_irq(&child->sighand->siglock); | 124 | spin_unlock_irq(&child->sighand->siglock); |
110 | } | 125 | } |
111 | read_unlock(&tasklist_lock); | 126 | read_unlock(&tasklist_lock); |
@@ -169,6 +184,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
169 | 184 | ||
170 | static int ptrace_attach(struct task_struct *task) | 185 | static int ptrace_attach(struct task_struct *task) |
171 | { | 186 | { |
187 | bool wait_trap = false; | ||
172 | int retval; | 188 | int retval; |
173 | 189 | ||
174 | audit_ptrace(task); | 190 | audit_ptrace(task); |
@@ -208,12 +224,42 @@ static int ptrace_attach(struct task_struct *task) | |||
208 | __ptrace_link(task, current); | 224 | __ptrace_link(task, current); |
209 | send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); | 225 | send_sig_info(SIGSTOP, SEND_SIG_FORCED, task); |
210 | 226 | ||
227 | spin_lock(&task->sighand->siglock); | ||
228 | |||
229 | /* | ||
230 | * If the task is already STOPPED, set GROUP_STOP_PENDING and | ||
231 | * TRAPPING, and kick it so that it transits to TRACED. TRAPPING | ||
232 | * will be cleared if the child completes the transition or any | ||
233 | * event which clears the group stop states happens. We'll wait | ||
234 | * for the transition to complete before returning from this | ||
235 | * function. | ||
236 | * | ||
237 | * This hides STOPPED -> RUNNING -> TRACED transition from the | ||
238 | * attaching thread but a different thread in the same group can | ||
239 | * still observe the transient RUNNING state. IOW, if another | ||
240 | * thread's WNOHANG wait(2) on the stopped tracee races against | ||
241 | * ATTACH, the wait(2) may fail due to the transient RUNNING. | ||
242 | * | ||
243 | * The following task_is_stopped() test is safe as both transitions | ||
244 | * in and out of STOPPED are protected by siglock. | ||
245 | */ | ||
246 | if (task_is_stopped(task)) { | ||
247 | task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING; | ||
248 | signal_wake_up(task, 1); | ||
249 | wait_trap = true; | ||
250 | } | ||
251 | |||
252 | spin_unlock(&task->sighand->siglock); | ||
253 | |||
211 | retval = 0; | 254 | retval = 0; |
212 | unlock_tasklist: | 255 | unlock_tasklist: |
213 | write_unlock_irq(&tasklist_lock); | 256 | write_unlock_irq(&tasklist_lock); |
214 | unlock_creds: | 257 | unlock_creds: |
215 | mutex_unlock(&task->signal->cred_guard_mutex); | 258 | mutex_unlock(&task->signal->cred_guard_mutex); |
216 | out: | 259 | out: |
260 | if (wait_trap) | ||
261 | wait_event(current->signal->wait_chldexit, | ||
262 | !(task->group_stop & GROUP_STOP_TRAPPING)); | ||
217 | return retval; | 263 | return retval; |
218 | } | 264 | } |
219 | 265 | ||
@@ -316,8 +362,6 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) | |||
316 | if (child->ptrace) { | 362 | if (child->ptrace) { |
317 | child->exit_code = data; | 363 | child->exit_code = data; |
318 | dead = __ptrace_detach(current, child); | 364 | dead = __ptrace_detach(current, child); |
319 | if (!child->exit_state) | ||
320 | wake_up_state(child, TASK_TRACED | TASK_STOPPED); | ||
321 | } | 365 | } |
322 | write_unlock_irq(&tasklist_lock); | 366 | write_unlock_irq(&tasklist_lock); |
323 | 367 | ||
@@ -518,7 +562,7 @@ static int ptrace_resume(struct task_struct *child, long request, | |||
518 | } | 562 | } |
519 | 563 | ||
520 | child->exit_code = data; | 564 | child->exit_code = data; |
521 | wake_up_process(child); | 565 | wake_up_state(child, __TASK_TRACED); |
522 | 566 | ||
523 | return 0; | 567 | return 0; |
524 | } | 568 | } |
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 421abfd3641d..7bbac7d0f5ab 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/time.h> | 36 | #include <linux/time.h> |
37 | #include <linux/cpu.h> | 37 | #include <linux/cpu.h> |
38 | #include <linux/prefetch.h> | ||
38 | 39 | ||
39 | /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ | 40 | /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ |
40 | static struct task_struct *rcu_kthread_task; | 41 | static struct task_struct *rcu_kthread_task; |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e486f7c3ffb8..f07d2f03181a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/kernel_stat.h> | 49 | #include <linux/kernel_stat.h> |
50 | #include <linux/wait.h> | 50 | #include <linux/wait.h> |
51 | #include <linux/kthread.h> | 51 | #include <linux/kthread.h> |
52 | #include <linux/prefetch.h> | ||
52 | 53 | ||
53 | #include "rcutree.h" | 54 | #include "rcutree.h" |
54 | 55 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c62acf45d3b9..2d12893b8b0f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -293,7 +293,7 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
293 | * limitation from this.) | 293 | * limitation from this.) |
294 | */ | 294 | */ |
295 | #define MIN_SHARES 2 | 295 | #define MIN_SHARES 2 |
296 | #define MAX_SHARES (1UL << 18) | 296 | #define MAX_SHARES (1UL << (18 + SCHED_LOAD_RESOLUTION)) |
297 | 297 | ||
298 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; | 298 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; |
299 | #endif | 299 | #endif |
@@ -1330,13 +1330,25 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1330 | { | 1330 | { |
1331 | u64 tmp; | 1331 | u64 tmp; |
1332 | 1332 | ||
1333 | tmp = (u64)delta_exec * weight; | 1333 | /* |
1334 | * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched | ||
1335 | * entities since MIN_SHARES = 2. Treat weight as 1 if less than | ||
1336 | * 2^SCHED_LOAD_RESOLUTION. | ||
1337 | */ | ||
1338 | if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION))) | ||
1339 | tmp = (u64)delta_exec * scale_load_down(weight); | ||
1340 | else | ||
1341 | tmp = (u64)delta_exec; | ||
1334 | 1342 | ||
1335 | if (!lw->inv_weight) { | 1343 | if (!lw->inv_weight) { |
1336 | if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) | 1344 | unsigned long w = scale_load_down(lw->weight); |
1345 | |||
1346 | if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST)) | ||
1337 | lw->inv_weight = 1; | 1347 | lw->inv_weight = 1; |
1348 | else if (unlikely(!w)) | ||
1349 | lw->inv_weight = WMULT_CONST; | ||
1338 | else | 1350 | else |
1339 | lw->inv_weight = WMULT_CONST / lw->weight; | 1351 | lw->inv_weight = WMULT_CONST / w; |
1340 | } | 1352 | } |
1341 | 1353 | ||
1342 | /* | 1354 | /* |
@@ -1778,17 +1790,20 @@ static void dec_nr_running(struct rq *rq) | |||
1778 | 1790 | ||
1779 | static void set_load_weight(struct task_struct *p) | 1791 | static void set_load_weight(struct task_struct *p) |
1780 | { | 1792 | { |
1793 | int prio = p->static_prio - MAX_RT_PRIO; | ||
1794 | struct load_weight *load = &p->se.load; | ||
1795 | |||
1781 | /* | 1796 | /* |
1782 | * SCHED_IDLE tasks get minimal weight: | 1797 | * SCHED_IDLE tasks get minimal weight: |
1783 | */ | 1798 | */ |
1784 | if (p->policy == SCHED_IDLE) { | 1799 | if (p->policy == SCHED_IDLE) { |
1785 | p->se.load.weight = WEIGHT_IDLEPRIO; | 1800 | load->weight = scale_load(WEIGHT_IDLEPRIO); |
1786 | p->se.load.inv_weight = WMULT_IDLEPRIO; | 1801 | load->inv_weight = WMULT_IDLEPRIO; |
1787 | return; | 1802 | return; |
1788 | } | 1803 | } |
1789 | 1804 | ||
1790 | p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO]; | 1805 | load->weight = scale_load(prio_to_weight[prio]); |
1791 | p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; | 1806 | load->inv_weight = prio_to_wmult[prio]; |
1792 | } | 1807 | } |
1793 | 1808 | ||
1794 | static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) | 1809 | static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) |
@@ -2564,7 +2579,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) | |||
2564 | { | 2579 | { |
2565 | struct rq *rq = cpu_rq(cpu); | 2580 | struct rq *rq = cpu_rq(cpu); |
2566 | 2581 | ||
2567 | #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) | 2582 | #if defined(CONFIG_SMP) |
2568 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | 2583 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { |
2569 | ttwu_queue_remote(p, cpu); | 2584 | ttwu_queue_remote(p, cpu); |
2570 | return; | 2585 | return; |
@@ -6527,7 +6542,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6527 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 6542 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
6528 | 6543 | ||
6529 | printk(KERN_CONT " %s", str); | 6544 | printk(KERN_CONT " %s", str); |
6530 | if (group->cpu_power != SCHED_LOAD_SCALE) { | 6545 | if (group->cpu_power != SCHED_POWER_SCALE) { |
6531 | printk(KERN_CONT " (cpu_power = %d)", | 6546 | printk(KERN_CONT " (cpu_power = %d)", |
6532 | group->cpu_power); | 6547 | group->cpu_power); |
6533 | } | 6548 | } |
@@ -7902,7 +7917,7 @@ void __init sched_init(void) | |||
7902 | #ifdef CONFIG_SMP | 7917 | #ifdef CONFIG_SMP |
7903 | rq->sd = NULL; | 7918 | rq->sd = NULL; |
7904 | rq->rd = NULL; | 7919 | rq->rd = NULL; |
7905 | rq->cpu_power = SCHED_LOAD_SCALE; | 7920 | rq->cpu_power = SCHED_POWER_SCALE; |
7906 | rq->post_schedule = 0; | 7921 | rq->post_schedule = 0; |
7907 | rq->active_balance = 0; | 7922 | rq->active_balance = 0; |
7908 | rq->next_balance = jiffies; | 7923 | rq->next_balance = jiffies; |
@@ -8806,14 +8821,14 @@ cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
8806 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 8821 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
8807 | u64 shareval) | 8822 | u64 shareval) |
8808 | { | 8823 | { |
8809 | return sched_group_set_shares(cgroup_tg(cgrp), shareval); | 8824 | return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval)); |
8810 | } | 8825 | } |
8811 | 8826 | ||
8812 | static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | 8827 | static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) |
8813 | { | 8828 | { |
8814 | struct task_group *tg = cgroup_tg(cgrp); | 8829 | struct task_group *tg = cgroup_tg(cgrp); |
8815 | 8830 | ||
8816 | return (u64) tg->shares; | 8831 | return (u64) scale_load_down(tg->shares); |
8817 | } | 8832 | } |
8818 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 8833 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8819 | 8834 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 37f22626225e..e32a9b70ee9c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1584,7 +1584,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
1584 | } | 1584 | } |
1585 | 1585 | ||
1586 | /* Adjust by relative CPU power of the group */ | 1586 | /* Adjust by relative CPU power of the group */ |
1587 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | 1587 | avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power; |
1588 | 1588 | ||
1589 | if (local_group) { | 1589 | if (local_group) { |
1590 | this_load = avg_load; | 1590 | this_load = avg_load; |
@@ -1722,7 +1722,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
1722 | nr_running += cpu_rq(i)->cfs.nr_running; | 1722 | nr_running += cpu_rq(i)->cfs.nr_running; |
1723 | } | 1723 | } |
1724 | 1724 | ||
1725 | capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); | 1725 | capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); |
1726 | 1726 | ||
1727 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | 1727 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) |
1728 | nr_running /= 2; | 1728 | nr_running /= 2; |
@@ -2570,7 +2570,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
2570 | 2570 | ||
2571 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) | 2571 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) |
2572 | { | 2572 | { |
2573 | return SCHED_LOAD_SCALE; | 2573 | return SCHED_POWER_SCALE; |
2574 | } | 2574 | } |
2575 | 2575 | ||
2576 | unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) | 2576 | unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) |
@@ -2607,10 +2607,10 @@ unsigned long scale_rt_power(int cpu) | |||
2607 | available = total - rq->rt_avg; | 2607 | available = total - rq->rt_avg; |
2608 | } | 2608 | } |
2609 | 2609 | ||
2610 | if (unlikely((s64)total < SCHED_LOAD_SCALE)) | 2610 | if (unlikely((s64)total < SCHED_POWER_SCALE)) |
2611 | total = SCHED_LOAD_SCALE; | 2611 | total = SCHED_POWER_SCALE; |
2612 | 2612 | ||
2613 | total >>= SCHED_LOAD_SHIFT; | 2613 | total >>= SCHED_POWER_SHIFT; |
2614 | 2614 | ||
2615 | return div_u64(available, total); | 2615 | return div_u64(available, total); |
2616 | } | 2616 | } |
@@ -2618,7 +2618,7 @@ unsigned long scale_rt_power(int cpu) | |||
2618 | static void update_cpu_power(struct sched_domain *sd, int cpu) | 2618 | static void update_cpu_power(struct sched_domain *sd, int cpu) |
2619 | { | 2619 | { |
2620 | unsigned long weight = sd->span_weight; | 2620 | unsigned long weight = sd->span_weight; |
2621 | unsigned long power = SCHED_LOAD_SCALE; | 2621 | unsigned long power = SCHED_POWER_SCALE; |
2622 | struct sched_group *sdg = sd->groups; | 2622 | struct sched_group *sdg = sd->groups; |
2623 | 2623 | ||
2624 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | 2624 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { |
@@ -2627,7 +2627,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
2627 | else | 2627 | else |
2628 | power *= default_scale_smt_power(sd, cpu); | 2628 | power *= default_scale_smt_power(sd, cpu); |
2629 | 2629 | ||
2630 | power >>= SCHED_LOAD_SHIFT; | 2630 | power >>= SCHED_POWER_SHIFT; |
2631 | } | 2631 | } |
2632 | 2632 | ||
2633 | sdg->cpu_power_orig = power; | 2633 | sdg->cpu_power_orig = power; |
@@ -2637,10 +2637,10 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
2637 | else | 2637 | else |
2638 | power *= default_scale_freq_power(sd, cpu); | 2638 | power *= default_scale_freq_power(sd, cpu); |
2639 | 2639 | ||
2640 | power >>= SCHED_LOAD_SHIFT; | 2640 | power >>= SCHED_POWER_SHIFT; |
2641 | 2641 | ||
2642 | power *= scale_rt_power(cpu); | 2642 | power *= scale_rt_power(cpu); |
2643 | power >>= SCHED_LOAD_SHIFT; | 2643 | power >>= SCHED_POWER_SHIFT; |
2644 | 2644 | ||
2645 | if (!power) | 2645 | if (!power) |
2646 | power = 1; | 2646 | power = 1; |
@@ -2682,7 +2682,7 @@ static inline int | |||
2682 | fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | 2682 | fix_small_capacity(struct sched_domain *sd, struct sched_group *group) |
2683 | { | 2683 | { |
2684 | /* | 2684 | /* |
2685 | * Only siblings can have significantly less than SCHED_LOAD_SCALE | 2685 | * Only siblings can have significantly less than SCHED_POWER_SCALE |
2686 | */ | 2686 | */ |
2687 | if (!(sd->flags & SD_SHARE_CPUPOWER)) | 2687 | if (!(sd->flags & SD_SHARE_CPUPOWER)) |
2688 | return 0; | 2688 | return 0; |
@@ -2770,7 +2770,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2770 | } | 2770 | } |
2771 | 2771 | ||
2772 | /* Adjust by relative CPU power of the group */ | 2772 | /* Adjust by relative CPU power of the group */ |
2773 | sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; | 2773 | sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power; |
2774 | 2774 | ||
2775 | /* | 2775 | /* |
2776 | * Consider the group unbalanced when the imbalance is larger | 2776 | * Consider the group unbalanced when the imbalance is larger |
@@ -2787,7 +2787,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2787 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) | 2787 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) |
2788 | sgs->group_imb = 1; | 2788 | sgs->group_imb = 1; |
2789 | 2789 | ||
2790 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | 2790 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, |
2791 | SCHED_POWER_SCALE); | ||
2791 | if (!sgs->group_capacity) | 2792 | if (!sgs->group_capacity) |
2792 | sgs->group_capacity = fix_small_capacity(sd, group); | 2793 | sgs->group_capacity = fix_small_capacity(sd, group); |
2793 | sgs->group_weight = group->group_weight; | 2794 | sgs->group_weight = group->group_weight; |
@@ -2961,7 +2962,7 @@ static int check_asym_packing(struct sched_domain *sd, | |||
2961 | return 0; | 2962 | return 0; |
2962 | 2963 | ||
2963 | *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, | 2964 | *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power, |
2964 | SCHED_LOAD_SCALE); | 2965 | SCHED_POWER_SCALE); |
2965 | return 1; | 2966 | return 1; |
2966 | } | 2967 | } |
2967 | 2968 | ||
@@ -2990,7 +2991,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
2990 | cpu_avg_load_per_task(this_cpu); | 2991 | cpu_avg_load_per_task(this_cpu); |
2991 | 2992 | ||
2992 | scaled_busy_load_per_task = sds->busiest_load_per_task | 2993 | scaled_busy_load_per_task = sds->busiest_load_per_task |
2993 | * SCHED_LOAD_SCALE; | 2994 | * SCHED_POWER_SCALE; |
2994 | scaled_busy_load_per_task /= sds->busiest->cpu_power; | 2995 | scaled_busy_load_per_task /= sds->busiest->cpu_power; |
2995 | 2996 | ||
2996 | if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= | 2997 | if (sds->max_load - sds->this_load + scaled_busy_load_per_task >= |
@@ -3009,10 +3010,10 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
3009 | min(sds->busiest_load_per_task, sds->max_load); | 3010 | min(sds->busiest_load_per_task, sds->max_load); |
3010 | pwr_now += sds->this->cpu_power * | 3011 | pwr_now += sds->this->cpu_power * |
3011 | min(sds->this_load_per_task, sds->this_load); | 3012 | min(sds->this_load_per_task, sds->this_load); |
3012 | pwr_now /= SCHED_LOAD_SCALE; | 3013 | pwr_now /= SCHED_POWER_SCALE; |
3013 | 3014 | ||
3014 | /* Amount of load we'd subtract */ | 3015 | /* Amount of load we'd subtract */ |
3015 | tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / | 3016 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / |
3016 | sds->busiest->cpu_power; | 3017 | sds->busiest->cpu_power; |
3017 | if (sds->max_load > tmp) | 3018 | if (sds->max_load > tmp) |
3018 | pwr_move += sds->busiest->cpu_power * | 3019 | pwr_move += sds->busiest->cpu_power * |
@@ -3020,15 +3021,15 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds, | |||
3020 | 3021 | ||
3021 | /* Amount of load we'd add */ | 3022 | /* Amount of load we'd add */ |
3022 | if (sds->max_load * sds->busiest->cpu_power < | 3023 | if (sds->max_load * sds->busiest->cpu_power < |
3023 | sds->busiest_load_per_task * SCHED_LOAD_SCALE) | 3024 | sds->busiest_load_per_task * SCHED_POWER_SCALE) |
3024 | tmp = (sds->max_load * sds->busiest->cpu_power) / | 3025 | tmp = (sds->max_load * sds->busiest->cpu_power) / |
3025 | sds->this->cpu_power; | 3026 | sds->this->cpu_power; |
3026 | else | 3027 | else |
3027 | tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) / | 3028 | tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) / |
3028 | sds->this->cpu_power; | 3029 | sds->this->cpu_power; |
3029 | pwr_move += sds->this->cpu_power * | 3030 | pwr_move += sds->this->cpu_power * |
3030 | min(sds->this_load_per_task, sds->this_load + tmp); | 3031 | min(sds->this_load_per_task, sds->this_load + tmp); |
3031 | pwr_move /= SCHED_LOAD_SCALE; | 3032 | pwr_move /= SCHED_POWER_SCALE; |
3032 | 3033 | ||
3033 | /* Move if we gain throughput */ | 3034 | /* Move if we gain throughput */ |
3034 | if (pwr_move > pwr_now) | 3035 | if (pwr_move > pwr_now) |
@@ -3070,7 +3071,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3070 | load_above_capacity = (sds->busiest_nr_running - | 3071 | load_above_capacity = (sds->busiest_nr_running - |
3071 | sds->busiest_group_capacity); | 3072 | sds->busiest_group_capacity); |
3072 | 3073 | ||
3073 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_LOAD_SCALE); | 3074 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); |
3074 | 3075 | ||
3075 | load_above_capacity /= sds->busiest->cpu_power; | 3076 | load_above_capacity /= sds->busiest->cpu_power; |
3076 | } | 3077 | } |
@@ -3090,7 +3091,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3090 | /* How much load to actually move to equalise the imbalance */ | 3091 | /* How much load to actually move to equalise the imbalance */ |
3091 | *imbalance = min(max_pull * sds->busiest->cpu_power, | 3092 | *imbalance = min(max_pull * sds->busiest->cpu_power, |
3092 | (sds->avg_load - sds->this_load) * sds->this->cpu_power) | 3093 | (sds->avg_load - sds->this_load) * sds->this->cpu_power) |
3093 | / SCHED_LOAD_SCALE; | 3094 | / SCHED_POWER_SCALE; |
3094 | 3095 | ||
3095 | /* | 3096 | /* |
3096 | * if *imbalance is less than the average load per runnable task | 3097 | * if *imbalance is less than the average load per runnable task |
@@ -3159,7 +3160,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3159 | if (!sds.busiest || sds.busiest_nr_running == 0) | 3160 | if (!sds.busiest || sds.busiest_nr_running == 0) |
3160 | goto out_balanced; | 3161 | goto out_balanced; |
3161 | 3162 | ||
3162 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; | 3163 | sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr; |
3163 | 3164 | ||
3164 | /* | 3165 | /* |
3165 | * If the busiest group is imbalanced the below checks don't | 3166 | * If the busiest group is imbalanced the below checks don't |
@@ -3238,7 +3239,8 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group, | |||
3238 | 3239 | ||
3239 | for_each_cpu(i, sched_group_cpus(group)) { | 3240 | for_each_cpu(i, sched_group_cpus(group)) { |
3240 | unsigned long power = power_of(i); | 3241 | unsigned long power = power_of(i); |
3241 | unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); | 3242 | unsigned long capacity = DIV_ROUND_CLOSEST(power, |
3243 | SCHED_POWER_SCALE); | ||
3242 | unsigned long wl; | 3244 | unsigned long wl; |
3243 | 3245 | ||
3244 | if (!capacity) | 3246 | if (!capacity) |
@@ -3263,7 +3265,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group, | |||
3263 | * the load can be moved away from the cpu that is potentially | 3265 | * the load can be moved away from the cpu that is potentially |
3264 | * running at a lower capacity. | 3266 | * running at a lower capacity. |
3265 | */ | 3267 | */ |
3266 | wl = (wl * SCHED_LOAD_SCALE) / power; | 3268 | wl = (wl * SCHED_POWER_SCALE) / power; |
3267 | 3269 | ||
3268 | if (wl > max_load) { | 3270 | if (wl > max_load) { |
3269 | max_load = wl; | 3271 | max_load = wl; |
diff --git a/kernel/signal.c b/kernel/signal.c index 7165af5f1b11..86c32b884f8e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) | |||
124 | 124 | ||
125 | static int recalc_sigpending_tsk(struct task_struct *t) | 125 | static int recalc_sigpending_tsk(struct task_struct *t) |
126 | { | 126 | { |
127 | if (t->signal->group_stop_count > 0 || | 127 | if ((t->group_stop & GROUP_STOP_PENDING) || |
128 | PENDING(&t->pending, &t->blocked) || | 128 | PENDING(&t->pending, &t->blocked) || |
129 | PENDING(&t->signal->shared_pending, &t->blocked)) { | 129 | PENDING(&t->signal->shared_pending, &t->blocked)) { |
130 | set_tsk_thread_flag(t, TIF_SIGPENDING); | 130 | set_tsk_thread_flag(t, TIF_SIGPENDING); |
@@ -223,6 +223,83 @@ static inline void print_dropped_signal(int sig) | |||
223 | current->comm, current->pid, sig); | 223 | current->comm, current->pid, sig); |
224 | } | 224 | } |
225 | 225 | ||
226 | /** | ||
227 | * task_clear_group_stop_trapping - clear group stop trapping bit | ||
228 | * @task: target task | ||
229 | * | ||
230 | * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us. Clear it | ||
231 | * and wake up the ptracer. Note that we don't need any further locking. | ||
232 | * @task->siglock guarantees that @task->parent points to the ptracer. | ||
233 | * | ||
234 | * CONTEXT: | ||
235 | * Must be called with @task->sighand->siglock held. | ||
236 | */ | ||
237 | static void task_clear_group_stop_trapping(struct task_struct *task) | ||
238 | { | ||
239 | if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) { | ||
240 | task->group_stop &= ~GROUP_STOP_TRAPPING; | ||
241 | __wake_up_sync_key(&task->parent->signal->wait_chldexit, | ||
242 | TASK_UNINTERRUPTIBLE, 1, task); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | /** | ||
247 | * task_clear_group_stop_pending - clear pending group stop | ||
248 | * @task: target task | ||
249 | * | ||
250 | * Clear group stop states for @task. | ||
251 | * | ||
252 | * CONTEXT: | ||
253 | * Must be called with @task->sighand->siglock held. | ||
254 | */ | ||
255 | void task_clear_group_stop_pending(struct task_struct *task) | ||
256 | { | ||
257 | task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME | | ||
258 | GROUP_STOP_DEQUEUED); | ||
259 | } | ||
260 | |||
261 | /** | ||
262 | * task_participate_group_stop - participate in a group stop | ||
263 | * @task: task participating in a group stop | ||
264 | * | ||
265 | * @task has GROUP_STOP_PENDING set and is participating in a group stop. | ||
266 | * Group stop states are cleared and the group stop count is consumed if | ||
267 | * %GROUP_STOP_CONSUME was set. If the consumption completes the group | ||
268 | * stop, the appropriate %SIGNAL_* flags are set. | ||
269 | * | ||
270 | * CONTEXT: | ||
271 | * Must be called with @task->sighand->siglock held. | ||
272 | * | ||
273 | * RETURNS: | ||
274 | * %true if group stop completion should be notified to the parent, %false | ||
275 | * otherwise. | ||
276 | */ | ||
277 | static bool task_participate_group_stop(struct task_struct *task) | ||
278 | { | ||
279 | struct signal_struct *sig = task->signal; | ||
280 | bool consume = task->group_stop & GROUP_STOP_CONSUME; | ||
281 | |||
282 | WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING)); | ||
283 | |||
284 | task_clear_group_stop_pending(task); | ||
285 | |||
286 | if (!consume) | ||
287 | return false; | ||
288 | |||
289 | if (!WARN_ON_ONCE(sig->group_stop_count == 0)) | ||
290 | sig->group_stop_count--; | ||
291 | |||
292 | /* | ||
293 | * Tell the caller to notify completion iff we are entering into a | ||
294 | * fresh group stop. Read comment in do_signal_stop() for details. | ||
295 | */ | ||
296 | if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { | ||
297 | sig->flags = SIGNAL_STOP_STOPPED; | ||
298 | return true; | ||
299 | } | ||
300 | return false; | ||
301 | } | ||
302 | |||
226 | /* | 303 | /* |
227 | * allocate a new signal queue record | 304 | * allocate a new signal queue record |
228 | * - this may be called without locks if and only if t == current, otherwise an | 305 | * - this may be called without locks if and only if t == current, otherwise an |
@@ -527,7 +604,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
527 | * is to alert stop-signal processing code when another | 604 | * is to alert stop-signal processing code when another |
528 | * processor has come along and cleared the flag. | 605 | * processor has come along and cleared the flag. |
529 | */ | 606 | */ |
530 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; | 607 | current->group_stop |= GROUP_STOP_DEQUEUED; |
531 | } | 608 | } |
532 | if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { | 609 | if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { |
533 | /* | 610 | /* |
@@ -592,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s) | |||
592 | if (sigisemptyset(&m)) | 669 | if (sigisemptyset(&m)) |
593 | return 0; | 670 | return 0; |
594 | 671 | ||
595 | signandsets(&s->signal, &s->signal, mask); | 672 | sigandnsets(&s->signal, &s->signal, mask); |
596 | list_for_each_entry_safe(q, n, &s->list, list) { | 673 | list_for_each_entry_safe(q, n, &s->list, list) { |
597 | if (sigismember(mask, q->info.si_signo)) { | 674 | if (sigismember(mask, q->info.si_signo)) { |
598 | list_del_init(&q->list); | 675 | list_del_init(&q->list); |
@@ -727,34 +804,14 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | |||
727 | } else if (sig == SIGCONT) { | 804 | } else if (sig == SIGCONT) { |
728 | unsigned int why; | 805 | unsigned int why; |
729 | /* | 806 | /* |
730 | * Remove all stop signals from all queues, | 807 | * Remove all stop signals from all queues, wake all threads. |
731 | * and wake all threads. | ||
732 | */ | 808 | */ |
733 | rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); | 809 | rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); |
734 | t = p; | 810 | t = p; |
735 | do { | 811 | do { |
736 | unsigned int state; | 812 | task_clear_group_stop_pending(t); |
737 | rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); | 813 | rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); |
738 | /* | 814 | wake_up_state(t, __TASK_STOPPED); |
739 | * If there is a handler for SIGCONT, we must make | ||
740 | * sure that no thread returns to user mode before | ||
741 | * we post the signal, in case it was the only | ||
742 | * thread eligible to run the signal handler--then | ||
743 | * it must not do anything between resuming and | ||
744 | * running the handler. With the TIF_SIGPENDING | ||
745 | * flag set, the thread will pause and acquire the | ||
746 | * siglock that we hold now and until we've queued | ||
747 | * the pending signal. | ||
748 | * | ||
749 | * Wake up the stopped thread _after_ setting | ||
750 | * TIF_SIGPENDING | ||
751 | */ | ||
752 | state = __TASK_STOPPED; | ||
753 | if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) { | ||
754 | set_tsk_thread_flag(t, TIF_SIGPENDING); | ||
755 | state |= TASK_INTERRUPTIBLE; | ||
756 | } | ||
757 | wake_up_state(t, state); | ||
758 | } while_each_thread(p, t); | 815 | } while_each_thread(p, t); |
759 | 816 | ||
760 | /* | 817 | /* |
@@ -780,13 +837,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | |||
780 | signal->flags = why | SIGNAL_STOP_CONTINUED; | 837 | signal->flags = why | SIGNAL_STOP_CONTINUED; |
781 | signal->group_stop_count = 0; | 838 | signal->group_stop_count = 0; |
782 | signal->group_exit_code = 0; | 839 | signal->group_exit_code = 0; |
783 | } else { | ||
784 | /* | ||
785 | * We are not stopped, but there could be a stop | ||
786 | * signal in the middle of being processed after | ||
787 | * being removed from the queue. Clear that too. | ||
788 | */ | ||
789 | signal->flags &= ~SIGNAL_STOP_DEQUEUED; | ||
790 | } | 840 | } |
791 | } | 841 | } |
792 | 842 | ||
@@ -875,6 +925,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) | |||
875 | signal->group_stop_count = 0; | 925 | signal->group_stop_count = 0; |
876 | t = p; | 926 | t = p; |
877 | do { | 927 | do { |
928 | task_clear_group_stop_pending(t); | ||
878 | sigaddset(&t->pending.signal, SIGKILL); | 929 | sigaddset(&t->pending.signal, SIGKILL); |
879 | signal_wake_up(t, 1); | 930 | signal_wake_up(t, 1); |
880 | } while_each_thread(p, t); | 931 | } while_each_thread(p, t); |
@@ -1109,6 +1160,7 @@ int zap_other_threads(struct task_struct *p) | |||
1109 | p->signal->group_stop_count = 0; | 1160 | p->signal->group_stop_count = 0; |
1110 | 1161 | ||
1111 | while_each_thread(p, t) { | 1162 | while_each_thread(p, t) { |
1163 | task_clear_group_stop_pending(t); | ||
1112 | count++; | 1164 | count++; |
1113 | 1165 | ||
1114 | /* Don't bother with already dead threads */ | 1166 | /* Don't bother with already dead threads */ |
@@ -1536,16 +1588,30 @@ int do_notify_parent(struct task_struct *tsk, int sig) | |||
1536 | return ret; | 1588 | return ret; |
1537 | } | 1589 | } |
1538 | 1590 | ||
1539 | static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | 1591 | /** |
1592 | * do_notify_parent_cldstop - notify parent of stopped/continued state change | ||
1593 | * @tsk: task reporting the state change | ||
1594 | * @for_ptracer: the notification is for ptracer | ||
1595 | * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report | ||
1596 | * | ||
1597 | * Notify @tsk's parent that the stopped/continued state has changed. If | ||
1598 | * @for_ptracer is %false, @tsk's group leader notifies to its real parent. | ||
1599 | * If %true, @tsk reports to @tsk->parent which should be the ptracer. | ||
1600 | * | ||
1601 | * CONTEXT: | ||
1602 | * Must be called with tasklist_lock at least read locked. | ||
1603 | */ | ||
1604 | static void do_notify_parent_cldstop(struct task_struct *tsk, | ||
1605 | bool for_ptracer, int why) | ||
1540 | { | 1606 | { |
1541 | struct siginfo info; | 1607 | struct siginfo info; |
1542 | unsigned long flags; | 1608 | unsigned long flags; |
1543 | struct task_struct *parent; | 1609 | struct task_struct *parent; |
1544 | struct sighand_struct *sighand; | 1610 | struct sighand_struct *sighand; |
1545 | 1611 | ||
1546 | if (task_ptrace(tsk)) | 1612 | if (for_ptracer) { |
1547 | parent = tsk->parent; | 1613 | parent = tsk->parent; |
1548 | else { | 1614 | } else { |
1549 | tsk = tsk->group_leader; | 1615 | tsk = tsk->group_leader; |
1550 | parent = tsk->real_parent; | 1616 | parent = tsk->real_parent; |
1551 | } | 1617 | } |
@@ -1621,6 +1687,15 @@ static int sigkill_pending(struct task_struct *tsk) | |||
1621 | } | 1687 | } |
1622 | 1688 | ||
1623 | /* | 1689 | /* |
1690 | * Test whether the target task of the usual cldstop notification - the | ||
1691 | * real_parent of @child - is in the same group as the ptracer. | ||
1692 | */ | ||
1693 | static bool real_parent_is_ptracer(struct task_struct *child) | ||
1694 | { | ||
1695 | return same_thread_group(child->parent, child->real_parent); | ||
1696 | } | ||
1697 | |||
1698 | /* | ||
1624 | * This must be called with current->sighand->siglock held. | 1699 | * This must be called with current->sighand->siglock held. |
1625 | * | 1700 | * |
1626 | * This should be the path for all ptrace stops. | 1701 | * This should be the path for all ptrace stops. |
@@ -1631,10 +1706,12 @@ static int sigkill_pending(struct task_struct *tsk) | |||
1631 | * If we actually decide not to stop at all because the tracer | 1706 | * If we actually decide not to stop at all because the tracer |
1632 | * is gone, we keep current->exit_code unless clear_code. | 1707 | * is gone, we keep current->exit_code unless clear_code. |
1633 | */ | 1708 | */ |
1634 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | 1709 | static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) |
1635 | __releases(¤t->sighand->siglock) | 1710 | __releases(¤t->sighand->siglock) |
1636 | __acquires(¤t->sighand->siglock) | 1711 | __acquires(¤t->sighand->siglock) |
1637 | { | 1712 | { |
1713 | bool gstop_done = false; | ||
1714 | |||
1638 | if (arch_ptrace_stop_needed(exit_code, info)) { | 1715 | if (arch_ptrace_stop_needed(exit_code, info)) { |
1639 | /* | 1716 | /* |
1640 | * The arch code has something special to do before a | 1717 | * The arch code has something special to do before a |
@@ -1655,21 +1732,49 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | |||
1655 | } | 1732 | } |
1656 | 1733 | ||
1657 | /* | 1734 | /* |
1658 | * If there is a group stop in progress, | 1735 | * If @why is CLD_STOPPED, we're trapping to participate in a group |
1659 | * we must participate in the bookkeeping. | 1736 | * stop. Do the bookkeeping. Note that if SIGCONT was delievered |
1737 | * while siglock was released for the arch hook, PENDING could be | ||
1738 | * clear now. We act as if SIGCONT is received after TASK_TRACED | ||
1739 | * is entered - ignore it. | ||
1660 | */ | 1740 | */ |
1661 | if (current->signal->group_stop_count > 0) | 1741 | if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING)) |
1662 | --current->signal->group_stop_count; | 1742 | gstop_done = task_participate_group_stop(current); |
1663 | 1743 | ||
1664 | current->last_siginfo = info; | 1744 | current->last_siginfo = info; |
1665 | current->exit_code = exit_code; | 1745 | current->exit_code = exit_code; |
1666 | 1746 | ||
1667 | /* Let the debugger run. */ | 1747 | /* |
1668 | __set_current_state(TASK_TRACED); | 1748 | * TRACED should be visible before TRAPPING is cleared; otherwise, |
1749 | * the tracer might fail do_wait(). | ||
1750 | */ | ||
1751 | set_current_state(TASK_TRACED); | ||
1752 | |||
1753 | /* | ||
1754 | * We're committing to trapping. Clearing GROUP_STOP_TRAPPING and | ||
1755 | * transition to TASK_TRACED should be atomic with respect to | ||
1756 | * siglock. This hsould be done after the arch hook as siglock is | ||
1757 | * released and regrabbed across it. | ||
1758 | */ | ||
1759 | task_clear_group_stop_trapping(current); | ||
1760 | |||
1669 | spin_unlock_irq(¤t->sighand->siglock); | 1761 | spin_unlock_irq(¤t->sighand->siglock); |
1670 | read_lock(&tasklist_lock); | 1762 | read_lock(&tasklist_lock); |
1671 | if (may_ptrace_stop()) { | 1763 | if (may_ptrace_stop()) { |
1672 | do_notify_parent_cldstop(current, CLD_TRAPPED); | 1764 | /* |
1765 | * Notify parents of the stop. | ||
1766 | * | ||
1767 | * While ptraced, there are two parents - the ptracer and | ||
1768 | * the real_parent of the group_leader. The ptracer should | ||
1769 | * know about every stop while the real parent is only | ||
1770 | * interested in the completion of group stop. The states | ||
1771 | * for the two don't interact with each other. Notify | ||
1772 | * separately unless they're gonna be duplicates. | ||
1773 | */ | ||
1774 | do_notify_parent_cldstop(current, true, why); | ||
1775 | if (gstop_done && !real_parent_is_ptracer(current)) | ||
1776 | do_notify_parent_cldstop(current, false, why); | ||
1777 | |||
1673 | /* | 1778 | /* |
1674 | * Don't want to allow preemption here, because | 1779 | * Don't want to allow preemption here, because |
1675 | * sys_ptrace() needs this task to be inactive. | 1780 | * sys_ptrace() needs this task to be inactive. |
@@ -1684,7 +1789,16 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | |||
1684 | /* | 1789 | /* |
1685 | * By the time we got the lock, our tracer went away. | 1790 | * By the time we got the lock, our tracer went away. |
1686 | * Don't drop the lock yet, another tracer may come. | 1791 | * Don't drop the lock yet, another tracer may come. |
1792 | * | ||
1793 | * If @gstop_done, the ptracer went away between group stop | ||
1794 | * completion and here. During detach, it would have set | ||
1795 | * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED | ||
1796 | * in do_signal_stop() on return, so notifying the real | ||
1797 | * parent of the group stop completion is enough. | ||
1687 | */ | 1798 | */ |
1799 | if (gstop_done) | ||
1800 | do_notify_parent_cldstop(current, false, why); | ||
1801 | |||
1688 | __set_current_state(TASK_RUNNING); | 1802 | __set_current_state(TASK_RUNNING); |
1689 | if (clear_code) | 1803 | if (clear_code) |
1690 | current->exit_code = 0; | 1804 | current->exit_code = 0; |
@@ -1728,7 +1842,7 @@ void ptrace_notify(int exit_code) | |||
1728 | 1842 | ||
1729 | /* Let the debugger run. */ | 1843 | /* Let the debugger run. */ |
1730 | spin_lock_irq(¤t->sighand->siglock); | 1844 | spin_lock_irq(¤t->sighand->siglock); |
1731 | ptrace_stop(exit_code, 1, &info); | 1845 | ptrace_stop(exit_code, CLD_TRAPPED, 1, &info); |
1732 | spin_unlock_irq(¤t->sighand->siglock); | 1846 | spin_unlock_irq(¤t->sighand->siglock); |
1733 | } | 1847 | } |
1734 | 1848 | ||
@@ -1741,66 +1855,115 @@ void ptrace_notify(int exit_code) | |||
1741 | static int do_signal_stop(int signr) | 1855 | static int do_signal_stop(int signr) |
1742 | { | 1856 | { |
1743 | struct signal_struct *sig = current->signal; | 1857 | struct signal_struct *sig = current->signal; |
1744 | int notify; | ||
1745 | 1858 | ||
1746 | if (!sig->group_stop_count) { | 1859 | if (!(current->group_stop & GROUP_STOP_PENDING)) { |
1860 | unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME; | ||
1747 | struct task_struct *t; | 1861 | struct task_struct *t; |
1748 | 1862 | ||
1749 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || | 1863 | /* signr will be recorded in task->group_stop for retries */ |
1864 | WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK); | ||
1865 | |||
1866 | if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) || | ||
1750 | unlikely(signal_group_exit(sig))) | 1867 | unlikely(signal_group_exit(sig))) |
1751 | return 0; | 1868 | return 0; |
1752 | /* | 1869 | /* |
1753 | * There is no group stop already in progress. | 1870 | * There is no group stop already in progress. We must |
1754 | * We must initiate one now. | 1871 | * initiate one now. |
1872 | * | ||
1873 | * While ptraced, a task may be resumed while group stop is | ||
1874 | * still in effect and then receive a stop signal and | ||
1875 | * initiate another group stop. This deviates from the | ||
1876 | * usual behavior as two consecutive stop signals can't | ||
1877 | * cause two group stops when !ptraced. That is why we | ||
1878 | * also check !task_is_stopped(t) below. | ||
1879 | * | ||
1880 | * The condition can be distinguished by testing whether | ||
1881 | * SIGNAL_STOP_STOPPED is already set. Don't generate | ||
1882 | * group_exit_code in such case. | ||
1883 | * | ||
1884 | * This is not necessary for SIGNAL_STOP_CONTINUED because | ||
1885 | * an intervening stop signal is required to cause two | ||
1886 | * continued events regardless of ptrace. | ||
1755 | */ | 1887 | */ |
1756 | sig->group_exit_code = signr; | 1888 | if (!(sig->flags & SIGNAL_STOP_STOPPED)) |
1889 | sig->group_exit_code = signr; | ||
1890 | else | ||
1891 | WARN_ON_ONCE(!task_ptrace(current)); | ||
1757 | 1892 | ||
1893 | current->group_stop &= ~GROUP_STOP_SIGMASK; | ||
1894 | current->group_stop |= signr | gstop; | ||
1758 | sig->group_stop_count = 1; | 1895 | sig->group_stop_count = 1; |
1759 | for (t = next_thread(current); t != current; t = next_thread(t)) | 1896 | for (t = next_thread(current); t != current; |
1897 | t = next_thread(t)) { | ||
1898 | t->group_stop &= ~GROUP_STOP_SIGMASK; | ||
1760 | /* | 1899 | /* |
1761 | * Setting state to TASK_STOPPED for a group | 1900 | * Setting state to TASK_STOPPED for a group |
1762 | * stop is always done with the siglock held, | 1901 | * stop is always done with the siglock held, |
1763 | * so this check has no races. | 1902 | * so this check has no races. |
1764 | */ | 1903 | */ |
1765 | if (!(t->flags & PF_EXITING) && | 1904 | if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) { |
1766 | !task_is_stopped_or_traced(t)) { | 1905 | t->group_stop |= signr | gstop; |
1767 | sig->group_stop_count++; | 1906 | sig->group_stop_count++; |
1768 | signal_wake_up(t, 0); | 1907 | signal_wake_up(t, 0); |
1769 | } | 1908 | } |
1909 | } | ||
1770 | } | 1910 | } |
1771 | /* | 1911 | retry: |
1772 | * If there are no other threads in the group, or if there is | 1912 | if (likely(!task_ptrace(current))) { |
1773 | * a group stop in progress and we are the last to stop, report | 1913 | int notify = 0; |
1774 | * to the parent. When ptraced, every thread reports itself. | 1914 | |
1775 | */ | 1915 | /* |
1776 | notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0; | 1916 | * If there are no other threads in the group, or if there |
1777 | notify = tracehook_notify_jctl(notify, CLD_STOPPED); | 1917 | * is a group stop in progress and we are the last to stop, |
1778 | /* | 1918 | * report to the parent. |
1779 | * tracehook_notify_jctl() can drop and reacquire siglock, so | 1919 | */ |
1780 | * we keep ->group_stop_count != 0 before the call. If SIGCONT | 1920 | if (task_participate_group_stop(current)) |
1781 | * or SIGKILL comes in between ->group_stop_count == 0. | 1921 | notify = CLD_STOPPED; |
1782 | */ | 1922 | |
1783 | if (sig->group_stop_count) { | ||
1784 | if (!--sig->group_stop_count) | ||
1785 | sig->flags = SIGNAL_STOP_STOPPED; | ||
1786 | current->exit_code = sig->group_exit_code; | ||
1787 | __set_current_state(TASK_STOPPED); | 1923 | __set_current_state(TASK_STOPPED); |
1924 | spin_unlock_irq(¤t->sighand->siglock); | ||
1925 | |||
1926 | /* | ||
1927 | * Notify the parent of the group stop completion. Because | ||
1928 | * we're not holding either the siglock or tasklist_lock | ||
1929 | * here, ptracer may attach inbetween; however, this is for | ||
1930 | * group stop and should always be delivered to the real | ||
1931 | * parent of the group leader. The new ptracer will get | ||
1932 | * its notification when this task transitions into | ||
1933 | * TASK_TRACED. | ||
1934 | */ | ||
1935 | if (notify) { | ||
1936 | read_lock(&tasklist_lock); | ||
1937 | do_notify_parent_cldstop(current, false, notify); | ||
1938 | read_unlock(&tasklist_lock); | ||
1939 | } | ||
1940 | |||
1941 | /* Now we don't run again until woken by SIGCONT or SIGKILL */ | ||
1942 | schedule(); | ||
1943 | |||
1944 | spin_lock_irq(¤t->sighand->siglock); | ||
1945 | } else { | ||
1946 | ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK, | ||
1947 | CLD_STOPPED, 0, NULL); | ||
1948 | current->exit_code = 0; | ||
1788 | } | 1949 | } |
1789 | spin_unlock_irq(¤t->sighand->siglock); | ||
1790 | 1950 | ||
1791 | if (notify) { | 1951 | /* |
1792 | read_lock(&tasklist_lock); | 1952 | * GROUP_STOP_PENDING could be set if another group stop has |
1793 | do_notify_parent_cldstop(current, notify); | 1953 | * started since being woken up or ptrace wants us to transit |
1794 | read_unlock(&tasklist_lock); | 1954 | * between TASK_STOPPED and TRACED. Retry group stop. |
1955 | */ | ||
1956 | if (current->group_stop & GROUP_STOP_PENDING) { | ||
1957 | WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK)); | ||
1958 | goto retry; | ||
1795 | } | 1959 | } |
1796 | 1960 | ||
1797 | /* Now we don't run again until woken by SIGCONT or SIGKILL */ | 1961 | /* PTRACE_ATTACH might have raced with task killing, clear trapping */ |
1798 | do { | 1962 | task_clear_group_stop_trapping(current); |
1799 | schedule(); | 1963 | |
1800 | } while (try_to_freeze()); | 1964 | spin_unlock_irq(¤t->sighand->siglock); |
1801 | 1965 | ||
1802 | tracehook_finish_jctl(); | 1966 | tracehook_finish_jctl(); |
1803 | current->exit_code = 0; | ||
1804 | 1967 | ||
1805 | return 1; | 1968 | return 1; |
1806 | } | 1969 | } |
@@ -1814,7 +1977,7 @@ static int ptrace_signal(int signr, siginfo_t *info, | |||
1814 | ptrace_signal_deliver(regs, cookie); | 1977 | ptrace_signal_deliver(regs, cookie); |
1815 | 1978 | ||
1816 | /* Let the debugger run. */ | 1979 | /* Let the debugger run. */ |
1817 | ptrace_stop(signr, 0, info); | 1980 | ptrace_stop(signr, CLD_TRAPPED, 0, info); |
1818 | 1981 | ||
1819 | /* We're back. Did the debugger cancel the sig? */ | 1982 | /* We're back. Did the debugger cancel the sig? */ |
1820 | signr = current->exit_code; | 1983 | signr = current->exit_code; |
@@ -1869,18 +2032,36 @@ relock: | |||
1869 | * the CLD_ si_code into SIGNAL_CLD_MASK bits. | 2032 | * the CLD_ si_code into SIGNAL_CLD_MASK bits. |
1870 | */ | 2033 | */ |
1871 | if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { | 2034 | if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { |
1872 | int why = (signal->flags & SIGNAL_STOP_CONTINUED) | 2035 | struct task_struct *leader; |
1873 | ? CLD_CONTINUED : CLD_STOPPED; | 2036 | int why; |
2037 | |||
2038 | if (signal->flags & SIGNAL_CLD_CONTINUED) | ||
2039 | why = CLD_CONTINUED; | ||
2040 | else | ||
2041 | why = CLD_STOPPED; | ||
2042 | |||
1874 | signal->flags &= ~SIGNAL_CLD_MASK; | 2043 | signal->flags &= ~SIGNAL_CLD_MASK; |
1875 | 2044 | ||
1876 | why = tracehook_notify_jctl(why, CLD_CONTINUED); | ||
1877 | spin_unlock_irq(&sighand->siglock); | 2045 | spin_unlock_irq(&sighand->siglock); |
1878 | 2046 | ||
1879 | if (why) { | 2047 | /* |
1880 | read_lock(&tasklist_lock); | 2048 | * Notify the parent that we're continuing. This event is |
1881 | do_notify_parent_cldstop(current->group_leader, why); | 2049 | * always per-process and doesn't make whole lot of sense |
1882 | read_unlock(&tasklist_lock); | 2050 | * for ptracers, who shouldn't consume the state via |
1883 | } | 2051 | * wait(2) either, but, for backward compatibility, notify |
2052 | * the ptracer of the group leader too unless it's gonna be | ||
2053 | * a duplicate. | ||
2054 | */ | ||
2055 | read_lock(&tasklist_lock); | ||
2056 | |||
2057 | do_notify_parent_cldstop(current, false, why); | ||
2058 | |||
2059 | leader = current->group_leader; | ||
2060 | if (task_ptrace(leader) && !real_parent_is_ptracer(leader)) | ||
2061 | do_notify_parent_cldstop(leader, true, why); | ||
2062 | |||
2063 | read_unlock(&tasklist_lock); | ||
2064 | |||
1884 | goto relock; | 2065 | goto relock; |
1885 | } | 2066 | } |
1886 | 2067 | ||
@@ -1897,8 +2078,8 @@ relock: | |||
1897 | if (unlikely(signr != 0)) | 2078 | if (unlikely(signr != 0)) |
1898 | ka = return_ka; | 2079 | ka = return_ka; |
1899 | else { | 2080 | else { |
1900 | if (unlikely(signal->group_stop_count > 0) && | 2081 | if (unlikely(current->group_stop & |
1901 | do_signal_stop(0)) | 2082 | GROUP_STOP_PENDING) && do_signal_stop(0)) |
1902 | goto relock; | 2083 | goto relock; |
1903 | 2084 | ||
1904 | signr = dequeue_signal(current, ¤t->blocked, | 2085 | signr = dequeue_signal(current, ¤t->blocked, |
@@ -2017,10 +2198,42 @@ relock: | |||
2017 | return signr; | 2198 | return signr; |
2018 | } | 2199 | } |
2019 | 2200 | ||
2201 | /* | ||
2202 | * It could be that complete_signal() picked us to notify about the | ||
2203 | * group-wide signal. Other threads should be notified now to take | ||
2204 | * the shared signals in @which since we will not. | ||
2205 | */ | ||
2206 | static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) | ||
2207 | { | ||
2208 | sigset_t retarget; | ||
2209 | struct task_struct *t; | ||
2210 | |||
2211 | sigandsets(&retarget, &tsk->signal->shared_pending.signal, which); | ||
2212 | if (sigisemptyset(&retarget)) | ||
2213 | return; | ||
2214 | |||
2215 | t = tsk; | ||
2216 | while_each_thread(tsk, t) { | ||
2217 | if (t->flags & PF_EXITING) | ||
2218 | continue; | ||
2219 | |||
2220 | if (!has_pending_signals(&retarget, &t->blocked)) | ||
2221 | continue; | ||
2222 | /* Remove the signals this thread can handle. */ | ||
2223 | sigandsets(&retarget, &retarget, &t->blocked); | ||
2224 | |||
2225 | if (!signal_pending(t)) | ||
2226 | signal_wake_up(t, 0); | ||
2227 | |||
2228 | if (sigisemptyset(&retarget)) | ||
2229 | break; | ||
2230 | } | ||
2231 | } | ||
2232 | |||
2020 | void exit_signals(struct task_struct *tsk) | 2233 | void exit_signals(struct task_struct *tsk) |
2021 | { | 2234 | { |
2022 | int group_stop = 0; | 2235 | int group_stop = 0; |
2023 | struct task_struct *t; | 2236 | sigset_t unblocked; |
2024 | 2237 | ||
2025 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { | 2238 | if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { |
2026 | tsk->flags |= PF_EXITING; | 2239 | tsk->flags |= PF_EXITING; |
@@ -2036,26 +2249,23 @@ void exit_signals(struct task_struct *tsk) | |||
2036 | if (!signal_pending(tsk)) | 2249 | if (!signal_pending(tsk)) |
2037 | goto out; | 2250 | goto out; |
2038 | 2251 | ||
2039 | /* | 2252 | unblocked = tsk->blocked; |
2040 | * It could be that __group_complete_signal() choose us to | 2253 | signotset(&unblocked); |
2041 | * notify about group-wide signal. Another thread should be | 2254 | retarget_shared_pending(tsk, &unblocked); |
2042 | * woken now to take the signal since we will not. | ||
2043 | */ | ||
2044 | for (t = tsk; (t = next_thread(t)) != tsk; ) | ||
2045 | if (!signal_pending(t) && !(t->flags & PF_EXITING)) | ||
2046 | recalc_sigpending_and_wake(t); | ||
2047 | 2255 | ||
2048 | if (unlikely(tsk->signal->group_stop_count) && | 2256 | if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) && |
2049 | !--tsk->signal->group_stop_count) { | 2257 | task_participate_group_stop(tsk)) |
2050 | tsk->signal->flags = SIGNAL_STOP_STOPPED; | 2258 | group_stop = CLD_STOPPED; |
2051 | group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED); | ||
2052 | } | ||
2053 | out: | 2259 | out: |
2054 | spin_unlock_irq(&tsk->sighand->siglock); | 2260 | spin_unlock_irq(&tsk->sighand->siglock); |
2055 | 2261 | ||
2262 | /* | ||
2263 | * If group stop has completed, deliver the notification. This | ||
2264 | * should always go to the real parent of the group leader. | ||
2265 | */ | ||
2056 | if (unlikely(group_stop)) { | 2266 | if (unlikely(group_stop)) { |
2057 | read_lock(&tasklist_lock); | 2267 | read_lock(&tasklist_lock); |
2058 | do_notify_parent_cldstop(tsk, group_stop); | 2268 | do_notify_parent_cldstop(tsk, false, group_stop); |
2059 | read_unlock(&tasklist_lock); | 2269 | read_unlock(&tasklist_lock); |
2060 | } | 2270 | } |
2061 | } | 2271 | } |
@@ -2089,11 +2299,33 @@ long do_no_restart_syscall(struct restart_block *param) | |||
2089 | return -EINTR; | 2299 | return -EINTR; |
2090 | } | 2300 | } |
2091 | 2301 | ||
2092 | /* | 2302 | static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) |
2093 | * We don't need to get the kernel lock - this is all local to this | 2303 | { |
2094 | * particular thread.. (and that's good, because this is _heavily_ | 2304 | if (signal_pending(tsk) && !thread_group_empty(tsk)) { |
2095 | * used by various programs) | 2305 | sigset_t newblocked; |
2306 | /* A set of now blocked but previously unblocked signals. */ | ||
2307 | sigandnsets(&newblocked, newset, ¤t->blocked); | ||
2308 | retarget_shared_pending(tsk, &newblocked); | ||
2309 | } | ||
2310 | tsk->blocked = *newset; | ||
2311 | recalc_sigpending(); | ||
2312 | } | ||
2313 | |||
2314 | /** | ||
2315 | * set_current_blocked - change current->blocked mask | ||
2316 | * @newset: new mask | ||
2317 | * | ||
2318 | * It is wrong to change ->blocked directly, this helper should be used | ||
2319 | * to ensure the process can't miss a shared signal we are going to block. | ||
2096 | */ | 2320 | */ |
2321 | void set_current_blocked(const sigset_t *newset) | ||
2322 | { | ||
2323 | struct task_struct *tsk = current; | ||
2324 | |||
2325 | spin_lock_irq(&tsk->sighand->siglock); | ||
2326 | __set_task_blocked(tsk, newset); | ||
2327 | spin_unlock_irq(&tsk->sighand->siglock); | ||
2328 | } | ||
2097 | 2329 | ||
2098 | /* | 2330 | /* |
2099 | * This is also useful for kernel threads that want to temporarily | 2331 | * This is also useful for kernel threads that want to temporarily |
@@ -2105,30 +2337,29 @@ long do_no_restart_syscall(struct restart_block *param) | |||
2105 | */ | 2337 | */ |
2106 | int sigprocmask(int how, sigset_t *set, sigset_t *oldset) | 2338 | int sigprocmask(int how, sigset_t *set, sigset_t *oldset) |
2107 | { | 2339 | { |
2108 | int error; | 2340 | struct task_struct *tsk = current; |
2341 | sigset_t newset; | ||
2109 | 2342 | ||
2110 | spin_lock_irq(¤t->sighand->siglock); | 2343 | /* Lockless, only current can change ->blocked, never from irq */ |
2111 | if (oldset) | 2344 | if (oldset) |
2112 | *oldset = current->blocked; | 2345 | *oldset = tsk->blocked; |
2113 | 2346 | ||
2114 | error = 0; | ||
2115 | switch (how) { | 2347 | switch (how) { |
2116 | case SIG_BLOCK: | 2348 | case SIG_BLOCK: |
2117 | sigorsets(¤t->blocked, ¤t->blocked, set); | 2349 | sigorsets(&newset, &tsk->blocked, set); |
2118 | break; | 2350 | break; |
2119 | case SIG_UNBLOCK: | 2351 | case SIG_UNBLOCK: |
2120 | signandsets(¤t->blocked, ¤t->blocked, set); | 2352 | sigandnsets(&newset, &tsk->blocked, set); |
2121 | break; | 2353 | break; |
2122 | case SIG_SETMASK: | 2354 | case SIG_SETMASK: |
2123 | current->blocked = *set; | 2355 | newset = *set; |
2124 | break; | 2356 | break; |
2125 | default: | 2357 | default: |
2126 | error = -EINVAL; | 2358 | return -EINVAL; |
2127 | } | 2359 | } |
2128 | recalc_sigpending(); | ||
2129 | spin_unlock_irq(¤t->sighand->siglock); | ||
2130 | 2360 | ||
2131 | return error; | 2361 | set_current_blocked(&newset); |
2362 | return 0; | ||
2132 | } | 2363 | } |
2133 | 2364 | ||
2134 | /** | 2365 | /** |
@@ -2138,40 +2369,34 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) | |||
2138 | * @oset: previous value of signal mask if non-null | 2369 | * @oset: previous value of signal mask if non-null |
2139 | * @sigsetsize: size of sigset_t type | 2370 | * @sigsetsize: size of sigset_t type |
2140 | */ | 2371 | */ |
2141 | SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, | 2372 | SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset, |
2142 | sigset_t __user *, oset, size_t, sigsetsize) | 2373 | sigset_t __user *, oset, size_t, sigsetsize) |
2143 | { | 2374 | { |
2144 | int error = -EINVAL; | ||
2145 | sigset_t old_set, new_set; | 2375 | sigset_t old_set, new_set; |
2376 | int error; | ||
2146 | 2377 | ||
2147 | /* XXX: Don't preclude handling different sized sigset_t's. */ | 2378 | /* XXX: Don't preclude handling different sized sigset_t's. */ |
2148 | if (sigsetsize != sizeof(sigset_t)) | 2379 | if (sigsetsize != sizeof(sigset_t)) |
2149 | goto out; | 2380 | return -EINVAL; |
2150 | 2381 | ||
2151 | if (set) { | 2382 | old_set = current->blocked; |
2152 | error = -EFAULT; | 2383 | |
2153 | if (copy_from_user(&new_set, set, sizeof(*set))) | 2384 | if (nset) { |
2154 | goto out; | 2385 | if (copy_from_user(&new_set, nset, sizeof(sigset_t))) |
2386 | return -EFAULT; | ||
2155 | sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); | 2387 | sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
2156 | 2388 | ||
2157 | error = sigprocmask(how, &new_set, &old_set); | 2389 | error = sigprocmask(how, &new_set, NULL); |
2158 | if (error) | 2390 | if (error) |
2159 | goto out; | 2391 | return error; |
2160 | if (oset) | 2392 | } |
2161 | goto set_old; | ||
2162 | } else if (oset) { | ||
2163 | spin_lock_irq(¤t->sighand->siglock); | ||
2164 | old_set = current->blocked; | ||
2165 | spin_unlock_irq(¤t->sighand->siglock); | ||
2166 | 2393 | ||
2167 | set_old: | 2394 | if (oset) { |
2168 | error = -EFAULT; | 2395 | if (copy_to_user(oset, &old_set, sizeof(sigset_t))) |
2169 | if (copy_to_user(oset, &old_set, sizeof(*oset))) | 2396 | return -EFAULT; |
2170 | goto out; | ||
2171 | } | 2397 | } |
2172 | error = 0; | 2398 | |
2173 | out: | 2399 | return 0; |
2174 | return error; | ||
2175 | } | 2400 | } |
2176 | 2401 | ||
2177 | long do_sigpending(void __user *set, unsigned long sigsetsize) | 2402 | long do_sigpending(void __user *set, unsigned long sigsetsize) |
@@ -2284,6 +2509,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) | |||
2284 | #endif | 2509 | #endif |
2285 | 2510 | ||
2286 | /** | 2511 | /** |
2512 | * do_sigtimedwait - wait for queued signals specified in @which | ||
2513 | * @which: queued signals to wait for | ||
2514 | * @info: if non-null, the signal's siginfo is returned here | ||
2515 | * @ts: upper bound on process time suspension | ||
2516 | */ | ||
2517 | int do_sigtimedwait(const sigset_t *which, siginfo_t *info, | ||
2518 | const struct timespec *ts) | ||
2519 | { | ||
2520 | struct task_struct *tsk = current; | ||
2521 | long timeout = MAX_SCHEDULE_TIMEOUT; | ||
2522 | sigset_t mask = *which; | ||
2523 | int sig; | ||
2524 | |||
2525 | if (ts) { | ||
2526 | if (!timespec_valid(ts)) | ||
2527 | return -EINVAL; | ||
2528 | timeout = timespec_to_jiffies(ts); | ||
2529 | /* | ||
2530 | * We can be close to the next tick, add another one | ||
2531 | * to ensure we will wait at least the time asked for. | ||
2532 | */ | ||
2533 | if (ts->tv_sec || ts->tv_nsec) | ||
2534 | timeout++; | ||
2535 | } | ||
2536 | |||
2537 | /* | ||
2538 | * Invert the set of allowed signals to get those we want to block. | ||
2539 | */ | ||
2540 | sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | ||
2541 | signotset(&mask); | ||
2542 | |||
2543 | spin_lock_irq(&tsk->sighand->siglock); | ||
2544 | sig = dequeue_signal(tsk, &mask, info); | ||
2545 | if (!sig && timeout) { | ||
2546 | /* | ||
2547 | * None ready, temporarily unblock those we're interested | ||
2548 | * while we are sleeping in so that we'll be awakened when | ||
2549 | * they arrive. Unblocking is always fine, we can avoid | ||
2550 | * set_current_blocked(). | ||
2551 | */ | ||
2552 | tsk->real_blocked = tsk->blocked; | ||
2553 | sigandsets(&tsk->blocked, &tsk->blocked, &mask); | ||
2554 | recalc_sigpending(); | ||
2555 | spin_unlock_irq(&tsk->sighand->siglock); | ||
2556 | |||
2557 | timeout = schedule_timeout_interruptible(timeout); | ||
2558 | |||
2559 | spin_lock_irq(&tsk->sighand->siglock); | ||
2560 | __set_task_blocked(tsk, &tsk->real_blocked); | ||
2561 | siginitset(&tsk->real_blocked, 0); | ||
2562 | sig = dequeue_signal(tsk, &mask, info); | ||
2563 | } | ||
2564 | spin_unlock_irq(&tsk->sighand->siglock); | ||
2565 | |||
2566 | if (sig) | ||
2567 | return sig; | ||
2568 | return timeout ? -EINTR : -EAGAIN; | ||
2569 | } | ||
2570 | |||
2571 | /** | ||
2287 | * sys_rt_sigtimedwait - synchronously wait for queued signals specified | 2572 | * sys_rt_sigtimedwait - synchronously wait for queued signals specified |
2288 | * in @uthese | 2573 | * in @uthese |
2289 | * @uthese: queued signals to wait for | 2574 | * @uthese: queued signals to wait for |
@@ -2295,11 +2580,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, | |||
2295 | siginfo_t __user *, uinfo, const struct timespec __user *, uts, | 2580 | siginfo_t __user *, uinfo, const struct timespec __user *, uts, |
2296 | size_t, sigsetsize) | 2581 | size_t, sigsetsize) |
2297 | { | 2582 | { |
2298 | int ret, sig; | ||
2299 | sigset_t these; | 2583 | sigset_t these; |
2300 | struct timespec ts; | 2584 | struct timespec ts; |
2301 | siginfo_t info; | 2585 | siginfo_t info; |
2302 | long timeout = 0; | 2586 | int ret; |
2303 | 2587 | ||
2304 | /* XXX: Don't preclude handling different sized sigset_t's. */ | 2588 | /* XXX: Don't preclude handling different sized sigset_t's. */ |
2305 | if (sigsetsize != sizeof(sigset_t)) | 2589 | if (sigsetsize != sizeof(sigset_t)) |
@@ -2308,61 +2592,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, | |||
2308 | if (copy_from_user(&these, uthese, sizeof(these))) | 2592 | if (copy_from_user(&these, uthese, sizeof(these))) |
2309 | return -EFAULT; | 2593 | return -EFAULT; |
2310 | 2594 | ||
2311 | /* | ||
2312 | * Invert the set of allowed signals to get those we | ||
2313 | * want to block. | ||
2314 | */ | ||
2315 | sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP)); | ||
2316 | signotset(&these); | ||
2317 | |||
2318 | if (uts) { | 2595 | if (uts) { |
2319 | if (copy_from_user(&ts, uts, sizeof(ts))) | 2596 | if (copy_from_user(&ts, uts, sizeof(ts))) |
2320 | return -EFAULT; | 2597 | return -EFAULT; |
2321 | if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0 | ||
2322 | || ts.tv_sec < 0) | ||
2323 | return -EINVAL; | ||
2324 | } | 2598 | } |
2325 | 2599 | ||
2326 | spin_lock_irq(¤t->sighand->siglock); | 2600 | ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL); |
2327 | sig = dequeue_signal(current, &these, &info); | ||
2328 | if (!sig) { | ||
2329 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
2330 | if (uts) | ||
2331 | timeout = (timespec_to_jiffies(&ts) | ||
2332 | + (ts.tv_sec || ts.tv_nsec)); | ||
2333 | |||
2334 | if (timeout) { | ||
2335 | /* | ||
2336 | * None ready -- temporarily unblock those we're | ||
2337 | * interested while we are sleeping in so that we'll | ||
2338 | * be awakened when they arrive. | ||
2339 | */ | ||
2340 | current->real_blocked = current->blocked; | ||
2341 | sigandsets(¤t->blocked, ¤t->blocked, &these); | ||
2342 | recalc_sigpending(); | ||
2343 | spin_unlock_irq(¤t->sighand->siglock); | ||
2344 | |||
2345 | timeout = schedule_timeout_interruptible(timeout); | ||
2346 | |||
2347 | spin_lock_irq(¤t->sighand->siglock); | ||
2348 | sig = dequeue_signal(current, &these, &info); | ||
2349 | current->blocked = current->real_blocked; | ||
2350 | siginitset(¤t->real_blocked, 0); | ||
2351 | recalc_sigpending(); | ||
2352 | } | ||
2353 | } | ||
2354 | spin_unlock_irq(¤t->sighand->siglock); | ||
2355 | 2601 | ||
2356 | if (sig) { | 2602 | if (ret > 0 && uinfo) { |
2357 | ret = sig; | 2603 | if (copy_siginfo_to_user(uinfo, &info)) |
2358 | if (uinfo) { | 2604 | ret = -EFAULT; |
2359 | if (copy_siginfo_to_user(uinfo, &info)) | ||
2360 | ret = -EFAULT; | ||
2361 | } | ||
2362 | } else { | ||
2363 | ret = -EAGAIN; | ||
2364 | if (timeout) | ||
2365 | ret = -EINTR; | ||
2366 | } | 2605 | } |
2367 | 2606 | ||
2368 | return ret; | 2607 | return ret; |
@@ -2650,60 +2889,51 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) | |||
2650 | /** | 2889 | /** |
2651 | * sys_sigprocmask - examine and change blocked signals | 2890 | * sys_sigprocmask - examine and change blocked signals |
2652 | * @how: whether to add, remove, or set signals | 2891 | * @how: whether to add, remove, or set signals |
2653 | * @set: signals to add or remove (if non-null) | 2892 | * @nset: signals to add or remove (if non-null) |
2654 | * @oset: previous value of signal mask if non-null | 2893 | * @oset: previous value of signal mask if non-null |
2655 | * | 2894 | * |
2656 | * Some platforms have their own version with special arguments; | 2895 | * Some platforms have their own version with special arguments; |
2657 | * others support only sys_rt_sigprocmask. | 2896 | * others support only sys_rt_sigprocmask. |
2658 | */ | 2897 | */ |
2659 | 2898 | ||
2660 | SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, | 2899 | SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, |
2661 | old_sigset_t __user *, oset) | 2900 | old_sigset_t __user *, oset) |
2662 | { | 2901 | { |
2663 | int error; | ||
2664 | old_sigset_t old_set, new_set; | 2902 | old_sigset_t old_set, new_set; |
2903 | sigset_t new_blocked; | ||
2665 | 2904 | ||
2666 | if (set) { | 2905 | old_set = current->blocked.sig[0]; |
2667 | error = -EFAULT; | 2906 | |
2668 | if (copy_from_user(&new_set, set, sizeof(*set))) | 2907 | if (nset) { |
2669 | goto out; | 2908 | if (copy_from_user(&new_set, nset, sizeof(*nset))) |
2909 | return -EFAULT; | ||
2670 | new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); | 2910 | new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); |
2671 | 2911 | ||
2672 | spin_lock_irq(¤t->sighand->siglock); | 2912 | new_blocked = current->blocked; |
2673 | old_set = current->blocked.sig[0]; | ||
2674 | 2913 | ||
2675 | error = 0; | ||
2676 | switch (how) { | 2914 | switch (how) { |
2677 | default: | ||
2678 | error = -EINVAL; | ||
2679 | break; | ||
2680 | case SIG_BLOCK: | 2915 | case SIG_BLOCK: |
2681 | sigaddsetmask(¤t->blocked, new_set); | 2916 | sigaddsetmask(&new_blocked, new_set); |
2682 | break; | 2917 | break; |
2683 | case SIG_UNBLOCK: | 2918 | case SIG_UNBLOCK: |
2684 | sigdelsetmask(¤t->blocked, new_set); | 2919 | sigdelsetmask(&new_blocked, new_set); |
2685 | break; | 2920 | break; |
2686 | case SIG_SETMASK: | 2921 | case SIG_SETMASK: |
2687 | current->blocked.sig[0] = new_set; | 2922 | new_blocked.sig[0] = new_set; |
2688 | break; | 2923 | break; |
2924 | default: | ||
2925 | return -EINVAL; | ||
2689 | } | 2926 | } |
2690 | 2927 | ||
2691 | recalc_sigpending(); | 2928 | set_current_blocked(&new_blocked); |
2692 | spin_unlock_irq(¤t->sighand->siglock); | 2929 | } |
2693 | if (error) | 2930 | |
2694 | goto out; | 2931 | if (oset) { |
2695 | if (oset) | ||
2696 | goto set_old; | ||
2697 | } else if (oset) { | ||
2698 | old_set = current->blocked.sig[0]; | ||
2699 | set_old: | ||
2700 | error = -EFAULT; | ||
2701 | if (copy_to_user(oset, &old_set, sizeof(*oset))) | 2932 | if (copy_to_user(oset, &old_set, sizeof(*oset))) |
2702 | goto out; | 2933 | return -EFAULT; |
2703 | } | 2934 | } |
2704 | error = 0; | 2935 | |
2705 | out: | 2936 | return 0; |
2706 | return error; | ||
2707 | } | 2937 | } |
2708 | #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ | 2938 | #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ |
2709 | 2939 | ||
@@ -2793,8 +3023,10 @@ SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) | |||
2793 | 3023 | ||
2794 | SYSCALL_DEFINE0(pause) | 3024 | SYSCALL_DEFINE0(pause) |
2795 | { | 3025 | { |
2796 | current->state = TASK_INTERRUPTIBLE; | 3026 | while (!signal_pending(current)) { |
2797 | schedule(); | 3027 | current->state = TASK_INTERRUPTIBLE; |
3028 | schedule(); | ||
3029 | } | ||
2798 | return -ERESTARTNOHAND; | 3030 | return -ERESTARTNOHAND; |
2799 | } | 3031 | } |
2800 | 3032 | ||
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 25cc41cd8f33..62cbc8877fef 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt); | |||
46 | cond_syscall(compat_sys_getsockopt); | 46 | cond_syscall(compat_sys_getsockopt); |
47 | cond_syscall(sys_shutdown); | 47 | cond_syscall(sys_shutdown); |
48 | cond_syscall(sys_sendmsg); | 48 | cond_syscall(sys_sendmsg); |
49 | cond_syscall(sys_sendmmsg); | ||
49 | cond_syscall(compat_sys_sendmsg); | 50 | cond_syscall(compat_sys_sendmsg); |
51 | cond_syscall(compat_sys_sendmmsg); | ||
50 | cond_syscall(sys_recvmsg); | 52 | cond_syscall(sys_recvmsg); |
51 | cond_syscall(sys_recvmmsg); | 53 | cond_syscall(sys_recvmmsg); |
52 | cond_syscall(compat_sys_recvmsg); | 54 | cond_syscall(compat_sys_recvmsg); |
@@ -69,15 +71,22 @@ cond_syscall(compat_sys_epoll_pwait); | |||
69 | cond_syscall(sys_semget); | 71 | cond_syscall(sys_semget); |
70 | cond_syscall(sys_semop); | 72 | cond_syscall(sys_semop); |
71 | cond_syscall(sys_semtimedop); | 73 | cond_syscall(sys_semtimedop); |
74 | cond_syscall(compat_sys_semtimedop); | ||
72 | cond_syscall(sys_semctl); | 75 | cond_syscall(sys_semctl); |
76 | cond_syscall(compat_sys_semctl); | ||
73 | cond_syscall(sys_msgget); | 77 | cond_syscall(sys_msgget); |
74 | cond_syscall(sys_msgsnd); | 78 | cond_syscall(sys_msgsnd); |
79 | cond_syscall(compat_sys_msgsnd); | ||
75 | cond_syscall(sys_msgrcv); | 80 | cond_syscall(sys_msgrcv); |
81 | cond_syscall(compat_sys_msgrcv); | ||
76 | cond_syscall(sys_msgctl); | 82 | cond_syscall(sys_msgctl); |
83 | cond_syscall(compat_sys_msgctl); | ||
77 | cond_syscall(sys_shmget); | 84 | cond_syscall(sys_shmget); |
78 | cond_syscall(sys_shmat); | 85 | cond_syscall(sys_shmat); |
86 | cond_syscall(compat_sys_shmat); | ||
79 | cond_syscall(sys_shmdt); | 87 | cond_syscall(sys_shmdt); |
80 | cond_syscall(sys_shmctl); | 88 | cond_syscall(sys_shmctl); |
89 | cond_syscall(compat_sys_shmctl); | ||
81 | cond_syscall(sys_mq_open); | 90 | cond_syscall(sys_mq_open); |
82 | cond_syscall(sys_mq_unlink); | 91 | cond_syscall(sys_mq_unlink); |
83 | cond_syscall(sys_mq_timedsend); | 92 | cond_syscall(sys_mq_timedsend); |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3dd0c46fa3bb..4fc92445a29c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/kprobes.h> | 56 | #include <linux/kprobes.h> |
57 | #include <linux/pipe_fs_i.h> | 57 | #include <linux/pipe_fs_i.h> |
58 | #include <linux/oom.h> | 58 | #include <linux/oom.h> |
59 | #include <linux/kmod.h> | ||
59 | 60 | ||
60 | #include <asm/uaccess.h> | 61 | #include <asm/uaccess.h> |
61 | #include <asm/processor.h> | 62 | #include <asm/processor.h> |
@@ -616,6 +617,11 @@ static struct ctl_table kern_table[] = { | |||
616 | .child = random_table, | 617 | .child = random_table, |
617 | }, | 618 | }, |
618 | { | 619 | { |
620 | .procname = "usermodehelper", | ||
621 | .mode = 0555, | ||
622 | .child = usermodehelper_table, | ||
623 | }, | ||
624 | { | ||
619 | .procname = "overflowuid", | 625 | .procname = "overflowuid", |
620 | .data = &overflowuid, | 626 | .data = &overflowuid, |
621 | .maxlen = sizeof(int), | 627 | .maxlen = sizeof(int), |
@@ -1500,7 +1506,7 @@ static struct ctl_table fs_table[] = { | |||
1500 | 1506 | ||
1501 | static struct ctl_table debug_table[] = { | 1507 | static struct ctl_table debug_table[] = { |
1502 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ | 1508 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ |
1503 | defined(CONFIG_S390) | 1509 | defined(CONFIG_S390) || defined(CONFIG_TILE) |
1504 | { | 1510 | { |
1505 | .procname = "exception-trace", | 1511 | .procname = "exception-trace", |
1506 | .data = &show_unhandled_signals, | 1512 | .data = &show_unhandled_signals, |
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 9265014cb4db..2d966244ea60 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
@@ -494,7 +494,7 @@ static int update_rmtp(ktime_t exp, enum alarmtimer_type type, | |||
494 | */ | 494 | */ |
495 | static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) | 495 | static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) |
496 | { | 496 | { |
497 | enum alarmtimer_type type = restart->nanosleep.index; | 497 | enum alarmtimer_type type = restart->nanosleep.clockid; |
498 | ktime_t exp; | 498 | ktime_t exp; |
499 | struct timespec __user *rmtp; | 499 | struct timespec __user *rmtp; |
500 | struct alarm alarm; | 500 | struct alarm alarm; |
@@ -573,7 +573,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, | |||
573 | 573 | ||
574 | restart = ¤t_thread_info()->restart_block; | 574 | restart = ¤t_thread_info()->restart_block; |
575 | restart->fn = alarm_timer_nsleep_restart; | 575 | restart->fn = alarm_timer_nsleep_restart; |
576 | restart->nanosleep.index = type; | 576 | restart->nanosleep.clockid = type; |
577 | restart->nanosleep.expires = exp.tv64; | 577 | restart->nanosleep.expires = exp.tv64; |
578 | restart->nanosleep.rmtp = rmtp; | 578 | restart->nanosleep.rmtp = rmtp; |
579 | ret = -ERESTART_RESTARTBLOCK; | 579 | ret = -ERESTART_RESTARTBLOCK; |
@@ -669,12 +669,20 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr) | |||
669 | */ | 669 | */ |
670 | static int __init alarmtimer_init_late(void) | 670 | static int __init alarmtimer_init_late(void) |
671 | { | 671 | { |
672 | struct device *dev; | ||
672 | char *str; | 673 | char *str; |
673 | 674 | ||
674 | /* Find an rtc device and init the rtc_timer */ | 675 | /* Find an rtc device and init the rtc_timer */ |
675 | class_find_device(rtc_class, NULL, &str, has_wakealarm); | 676 | dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); |
676 | if (str) | 677 | /* If we have a device then str is valid. See has_wakealarm() */ |
678 | if (dev) { | ||
677 | rtcdev = rtc_class_open(str); | 679 | rtcdev = rtc_class_open(str); |
680 | /* | ||
681 | * Drop the reference we got in class_find_device, | ||
682 | * rtc_open takes its own. | ||
683 | */ | ||
684 | put_device(dev); | ||
685 | } | ||
678 | if (!rtcdev) { | 686 | if (!rtcdev) { |
679 | printk(KERN_WARNING "No RTC device found, ALARM timers will" | 687 | printk(KERN_WARNING "No RTC device found, ALARM timers will" |
680 | " not wake from suspend"); | 688 | " not wake from suspend"); |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 22a9da9a9c96..c027d4f602f1 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -197,7 +197,7 @@ EXPORT_SYMBOL_GPL(clockevents_register_device); | |||
197 | static void clockevents_config(struct clock_event_device *dev, | 197 | static void clockevents_config(struct clock_event_device *dev, |
198 | u32 freq) | 198 | u32 freq) |
199 | { | 199 | { |
200 | unsigned long sec; | 200 | u64 sec; |
201 | 201 | ||
202 | if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) | 202 | if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) |
203 | return; | 203 | return; |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d9d5f8c885f6..1c95fd677328 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -639,7 +639,7 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
639 | */ | 639 | */ |
640 | void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) | 640 | void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) |
641 | { | 641 | { |
642 | unsigned long sec; | 642 | u64 sec; |
643 | 643 | ||
644 | /* | 644 | /* |
645 | * Calc the maximum number of seconds which we can run before | 645 | * Calc the maximum number of seconds which we can run before |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 723c7637e55a..c7218d132738 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -456,23 +456,27 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
456 | unsigned long flags; | 456 | unsigned long flags; |
457 | int cpu; | 457 | int cpu; |
458 | 458 | ||
459 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
460 | |||
461 | /* | 459 | /* |
462 | * Periodic mode does not care about the enter/exit of power | 460 | * Periodic mode does not care about the enter/exit of power |
463 | * states | 461 | * states |
464 | */ | 462 | */ |
465 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | 463 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
466 | goto out; | 464 | return; |
467 | 465 | ||
468 | bc = tick_broadcast_device.evtdev; | 466 | /* |
467 | * We are called with preemtion disabled from the depth of the | ||
468 | * idle code, so we can't be moved away. | ||
469 | */ | ||
469 | cpu = smp_processor_id(); | 470 | cpu = smp_processor_id(); |
470 | td = &per_cpu(tick_cpu_device, cpu); | 471 | td = &per_cpu(tick_cpu_device, cpu); |
471 | dev = td->evtdev; | 472 | dev = td->evtdev; |
472 | 473 | ||
473 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) | 474 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
474 | goto out; | 475 | return; |
476 | |||
477 | bc = tick_broadcast_device.evtdev; | ||
475 | 478 | ||
479 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
476 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | 480 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { |
477 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { | 481 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { |
478 | cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); | 482 | cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); |
@@ -489,8 +493,6 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
489 | tick_program_event(dev->next_event, 1); | 493 | tick_program_event(dev->next_event, 1); |
490 | } | 494 | } |
491 | } | 495 | } |
492 | |||
493 | out: | ||
494 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 496 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
495 | } | 497 | } |
496 | 498 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8e6a05a5915a..342408cf68dd 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -680,7 +680,7 @@ static void timekeeping_resume(void) | |||
680 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | 680 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); |
681 | 681 | ||
682 | /* Resume hrtimers */ | 682 | /* Resume hrtimers */ |
683 | hres_timers_resume(); | 683 | hrtimers_resume(); |
684 | } | 684 | } |
685 | 685 | ||
686 | static int timekeeping_suspend(void) | 686 | static int timekeeping_suspend(void) |
@@ -1099,6 +1099,21 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, | |||
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | /** | 1101 | /** |
1102 | * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format | ||
1103 | */ | ||
1104 | ktime_t ktime_get_monotonic_offset(void) | ||
1105 | { | ||
1106 | unsigned long seq; | ||
1107 | struct timespec wtom; | ||
1108 | |||
1109 | do { | ||
1110 | seq = read_seqbegin(&xtime_lock); | ||
1111 | wtom = wall_to_monotonic; | ||
1112 | } while (read_seqretry(&xtime_lock, seq)); | ||
1113 | return timespec_to_ktime(wtom); | ||
1114 | } | ||
1115 | |||
1116 | /** | ||
1102 | * xtime_update() - advances the timekeeping infrastructure | 1117 | * xtime_update() - advances the timekeeping infrastructure |
1103 | * @ticks: number of ticks, that have elapsed since the last call. | 1118 | * @ticks: number of ticks, that have elapsed since the last call. |
1104 | * | 1119 | * |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 44646179eaba..bff131b9510a 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/user_namespace.h> | 17 | #include <linux/user_namespace.h> |
18 | #include <linux/proc_fs.h> | ||
18 | 19 | ||
19 | static struct uts_namespace *create_uts_ns(void) | 20 | static struct uts_namespace *create_uts_ns(void) |
20 | { | 21 | { |
@@ -79,3 +80,41 @@ void free_uts_ns(struct kref *kref) | |||
79 | put_user_ns(ns->user_ns); | 80 | put_user_ns(ns->user_ns); |
80 | kfree(ns); | 81 | kfree(ns); |
81 | } | 82 | } |
83 | |||
84 | static void *utsns_get(struct task_struct *task) | ||
85 | { | ||
86 | struct uts_namespace *ns = NULL; | ||
87 | struct nsproxy *nsproxy; | ||
88 | |||
89 | rcu_read_lock(); | ||
90 | nsproxy = task_nsproxy(task); | ||
91 | if (nsproxy) { | ||
92 | ns = nsproxy->uts_ns; | ||
93 | get_uts_ns(ns); | ||
94 | } | ||
95 | rcu_read_unlock(); | ||
96 | |||
97 | return ns; | ||
98 | } | ||
99 | |||
100 | static void utsns_put(void *ns) | ||
101 | { | ||
102 | put_uts_ns(ns); | ||
103 | } | ||
104 | |||
105 | static int utsns_install(struct nsproxy *nsproxy, void *ns) | ||
106 | { | ||
107 | get_uts_ns(ns); | ||
108 | put_uts_ns(nsproxy->uts_ns); | ||
109 | nsproxy->uts_ns = ns; | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | const struct proc_ns_operations utsns_operations = { | ||
114 | .name = "uts", | ||
115 | .type = CLONE_NEWUTS, | ||
116 | .get = utsns_get, | ||
117 | .put = utsns_put, | ||
118 | .install = utsns_install, | ||
119 | }; | ||
120 | |||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e3378e8d3a5c..0400553f0d04 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -2866,9 +2866,7 @@ static int alloc_cwqs(struct workqueue_struct *wq) | |||
2866 | } | 2866 | } |
2867 | } | 2867 | } |
2868 | 2868 | ||
2869 | /* just in case, make sure it's actually aligned | 2869 | /* just in case, make sure it's actually aligned */ |
2870 | * - this is affected by PERCPU() alignment in vmlinux.lds.S | ||
2871 | */ | ||
2872 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); | 2870 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); |
2873 | return wq->cpu_wq.v ? 0 : -ENOMEM; | 2871 | return wq->cpu_wq.v ? 0 : -ENOMEM; |
2874 | } | 2872 | } |