diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 10 | ||||
-rw-r--r-- | kernel/cgroup.c | 3 | ||||
-rw-r--r-- | kernel/compat.c | 17 | ||||
-rw-r--r-- | kernel/cred.c | 27 | ||||
-rw-r--r-- | kernel/events/core.c | 2 | ||||
-rw-r--r-- | kernel/exit.c | 12 | ||||
-rw-r--r-- | kernel/fork.c | 73 | ||||
-rw-r--r-- | kernel/irq/manage.c | 2 | ||||
-rw-r--r-- | kernel/modsign_certificate.S | 19 | ||||
-rw-r--r-- | kernel/modsign_pubkey.c | 6 | ||||
-rw-r--r-- | kernel/module.c | 444 | ||||
-rw-r--r-- | kernel/nsproxy.c | 36 | ||||
-rw-r--r-- | kernel/pid.c | 62 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 113 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 3 | ||||
-rw-r--r-- | kernel/printk.c | 40 | ||||
-rw-r--r-- | kernel/ptrace.c | 13 | ||||
-rw-r--r-- | kernel/res_counter.c | 20 | ||||
-rw-r--r-- | kernel/sched/core.c | 10 | ||||
-rw-r--r-- | kernel/sched/fair.c | 5 | ||||
-rw-r--r-- | kernel/signal.c | 2 | ||||
-rw-r--r-- | kernel/sys_ni.c | 1 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace.c | 60 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 8 | ||||
-rw-r--r-- | kernel/user.c | 2 | ||||
-rw-r--r-- | kernel/user_namespace.c | 147 | ||||
-rw-r--r-- | kernel/utsname.c | 34 | ||||
-rw-r--r-- | kernel/watchdog.c | 24 |
31 files changed, 806 insertions, 399 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index ac0d533eb7de..6c072b6da239 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -54,7 +54,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |||
54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
55 | obj-$(CONFIG_UID16) += uid16.o | 55 | obj-$(CONFIG_UID16) += uid16.o |
56 | obj-$(CONFIG_MODULES) += module.o | 56 | obj-$(CONFIG_MODULES) += module.o |
57 | obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o | 57 | obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o |
58 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 58 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
59 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 59 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
60 | obj-$(CONFIG_KEXEC) += kexec.o | 60 | obj-$(CONFIG_KEXEC) += kexec.o |
@@ -137,10 +137,14 @@ ifeq ($(CONFIG_MODULE_SIG),y) | |||
137 | # | 137 | # |
138 | # Pull the signing certificate and any extra certificates into the kernel | 138 | # Pull the signing certificate and any extra certificates into the kernel |
139 | # | 139 | # |
140 | |||
141 | quiet_cmd_touch = TOUCH $@ | ||
142 | cmd_touch = touch $@ | ||
143 | |||
140 | extra_certificates: | 144 | extra_certificates: |
141 | touch $@ | 145 | $(call cmd,touch) |
142 | 146 | ||
143 | kernel/modsign_pubkey.o: signing_key.x509 extra_certificates | 147 | kernel/modsign_certificate.o: signing_key.x509 extra_certificates |
144 | 148 | ||
145 | ############################################################################### | 149 | ############################################################################### |
146 | # | 150 | # |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f34c41bfaa37..4855892798fd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1333,7 +1333,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1333 | if (ret) | 1333 | if (ret) |
1334 | goto out_unlock; | 1334 | goto out_unlock; |
1335 | 1335 | ||
1336 | /* See feature-removal-schedule.txt */ | ||
1337 | if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent) | 1336 | if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent) |
1338 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", | 1337 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", |
1339 | task_tgid_nr(current), current->comm); | 1338 | task_tgid_nr(current), current->comm); |
@@ -3409,7 +3408,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3409 | { | 3408 | { |
3410 | struct cgroup_pidlist *l; | 3409 | struct cgroup_pidlist *l; |
3411 | /* don't need task_nsproxy() if we're looking at ourself */ | 3410 | /* don't need task_nsproxy() if we're looking at ourself */ |
3412 | struct pid_namespace *ns = current->nsproxy->pid_ns; | 3411 | struct pid_namespace *ns = task_active_pid_ns(current); |
3413 | 3412 | ||
3414 | /* | 3413 | /* |
3415 | * We can't drop the pidlist_mutex before taking the l->mutex in case | 3414 | * We can't drop the pidlist_mutex before taking the l->mutex in case |
diff --git a/kernel/compat.c b/kernel/compat.c index c28a306ae05c..f6150e92dfc9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -1215,6 +1215,23 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) | |||
1215 | return 0; | 1215 | return 0; |
1216 | } | 1216 | } |
1217 | 1217 | ||
1218 | #ifdef __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL | ||
1219 | asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, | ||
1220 | struct compat_timespec __user *interval) | ||
1221 | { | ||
1222 | struct timespec t; | ||
1223 | int ret; | ||
1224 | mm_segment_t old_fs = get_fs(); | ||
1225 | |||
1226 | set_fs(KERNEL_DS); | ||
1227 | ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); | ||
1228 | set_fs(old_fs); | ||
1229 | if (put_compat_timespec(&t, interval)) | ||
1230 | return -EFAULT; | ||
1231 | return ret; | ||
1232 | } | ||
1233 | #endif /* __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL */ | ||
1234 | |||
1218 | /* | 1235 | /* |
1219 | * Allocate user-space memory for the duration of a single system call, | 1236 | * Allocate user-space memory for the duration of a single system call, |
1220 | * in order to marshall parameters inside a compat thunk. | 1237 | * in order to marshall parameters inside a compat thunk. |
diff --git a/kernel/cred.c b/kernel/cred.c index 8888afb846e9..e0573a43c7df 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -372,6 +372,31 @@ error_put: | |||
372 | return ret; | 372 | return ret; |
373 | } | 373 | } |
374 | 374 | ||
375 | static bool cred_cap_issubset(const struct cred *set, const struct cred *subset) | ||
376 | { | ||
377 | const struct user_namespace *set_ns = set->user_ns; | ||
378 | const struct user_namespace *subset_ns = subset->user_ns; | ||
379 | |||
380 | /* If the two credentials are in the same user namespace see if | ||
381 | * the capabilities of subset are a subset of set. | ||
382 | */ | ||
383 | if (set_ns == subset_ns) | ||
384 | return cap_issubset(subset->cap_permitted, set->cap_permitted); | ||
385 | |||
386 | /* The credentials are in a different user namespaces | ||
387 | * therefore one is a subset of the other only if a set is an | ||
388 | * ancestor of subset and set->euid is owner of subset or one | ||
389 | * of subsets ancestors. | ||
390 | */ | ||
391 | for (;subset_ns != &init_user_ns; subset_ns = subset_ns->parent) { | ||
392 | if ((set_ns == subset_ns->parent) && | ||
393 | uid_eq(subset_ns->owner, set->euid)) | ||
394 | return true; | ||
395 | } | ||
396 | |||
397 | return false; | ||
398 | } | ||
399 | |||
375 | /** | 400 | /** |
376 | * commit_creds - Install new credentials upon the current task | 401 | * commit_creds - Install new credentials upon the current task |
377 | * @new: The credentials to be assigned | 402 | * @new: The credentials to be assigned |
@@ -410,7 +435,7 @@ int commit_creds(struct cred *new) | |||
410 | !gid_eq(old->egid, new->egid) || | 435 | !gid_eq(old->egid, new->egid) || |
411 | !uid_eq(old->fsuid, new->fsuid) || | 436 | !uid_eq(old->fsuid, new->fsuid) || |
412 | !gid_eq(old->fsgid, new->fsgid) || | 437 | !gid_eq(old->fsgid, new->fsgid) || |
413 | !cap_issubset(new->cap_permitted, old->cap_permitted)) { | 438 | !cred_cap_issubset(old, new)) { |
414 | if (task->mm) | 439 | if (task->mm) |
415 | set_dumpable(task->mm, suid_dumpable); | 440 | set_dumpable(task->mm, suid_dumpable); |
416 | task->pdeath_signal = 0; | 441 | task->pdeath_signal = 0; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index f9ff5493171d..301079d06f24 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -6155,7 +6155,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
6155 | 6155 | ||
6156 | event->parent = parent_event; | 6156 | event->parent = parent_event; |
6157 | 6157 | ||
6158 | event->ns = get_pid_ns(current->nsproxy->pid_ns); | 6158 | event->ns = get_pid_ns(task_active_pid_ns(current)); |
6159 | event->id = atomic64_inc_return(&perf_event_id); | 6159 | event->id = atomic64_inc_return(&perf_event_id); |
6160 | 6160 | ||
6161 | event->state = PERF_EVENT_STATE_INACTIVE; | 6161 | event->state = PERF_EVENT_STATE_INACTIVE; |
diff --git a/kernel/exit.c b/kernel/exit.c index 50d2e93c36ea..b4df21937216 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -72,18 +72,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead) | |||
72 | list_del_rcu(&p->tasks); | 72 | list_del_rcu(&p->tasks); |
73 | list_del_init(&p->sibling); | 73 | list_del_init(&p->sibling); |
74 | __this_cpu_dec(process_counts); | 74 | __this_cpu_dec(process_counts); |
75 | /* | ||
76 | * If we are the last child process in a pid namespace to be | ||
77 | * reaped, notify the reaper sleeping zap_pid_ns_processes(). | ||
78 | */ | ||
79 | if (IS_ENABLED(CONFIG_PID_NS)) { | ||
80 | struct task_struct *parent = p->real_parent; | ||
81 | |||
82 | if ((task_active_pid_ns(parent)->child_reaper == parent) && | ||
83 | list_empty(&parent->children) && | ||
84 | (parent->flags & PF_EXITING)) | ||
85 | wake_up_process(parent); | ||
86 | } | ||
87 | } | 75 | } |
88 | list_del_rcu(&p->thread_group); | 76 | list_del_rcu(&p->thread_group); |
89 | } | 77 | } |
diff --git a/kernel/fork.c b/kernel/fork.c index 115d6c2e4cca..85f6d536608d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti) | |||
146 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | 146 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
147 | int node) | 147 | int node) |
148 | { | 148 | { |
149 | struct page *page = alloc_pages_node(node, THREADINFO_GFP, | 149 | struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED, |
150 | THREAD_SIZE_ORDER); | 150 | THREAD_SIZE_ORDER); |
151 | 151 | ||
152 | return page ? page_address(page) : NULL; | 152 | return page ? page_address(page) : NULL; |
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | |||
154 | 154 | ||
155 | static inline void free_thread_info(struct thread_info *ti) | 155 | static inline void free_thread_info(struct thread_info *ti) |
156 | { | 156 | { |
157 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | 157 | free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); |
158 | } | 158 | } |
159 | # else | 159 | # else |
160 | static struct kmem_cache *thread_info_cache; | 160 | static struct kmem_cache *thread_info_cache; |
@@ -1044,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1044 | atomic_set(&sig->live, 1); | 1044 | atomic_set(&sig->live, 1); |
1045 | atomic_set(&sig->sigcnt, 1); | 1045 | atomic_set(&sig->sigcnt, 1); |
1046 | init_waitqueue_head(&sig->wait_chldexit); | 1046 | init_waitqueue_head(&sig->wait_chldexit); |
1047 | if (clone_flags & CLONE_NEWPID) | ||
1048 | sig->flags |= SIGNAL_UNKILLABLE; | ||
1049 | sig->curr_target = tsk; | 1047 | sig->curr_target = tsk; |
1050 | init_sigpending(&sig->shared_pending); | 1048 | init_sigpending(&sig->shared_pending); |
1051 | INIT_LIST_HEAD(&sig->posix_timers); | 1049 | INIT_LIST_HEAD(&sig->posix_timers); |
@@ -1438,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1438 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); | 1436 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); |
1439 | 1437 | ||
1440 | if (thread_group_leader(p)) { | 1438 | if (thread_group_leader(p)) { |
1441 | if (is_child_reaper(pid)) | 1439 | if (is_child_reaper(pid)) { |
1442 | p->nsproxy->pid_ns->child_reaper = p; | 1440 | ns_of_pid(pid)->child_reaper = p; |
1441 | p->signal->flags |= SIGNAL_UNKILLABLE; | ||
1442 | } | ||
1443 | 1443 | ||
1444 | p->signal->leader_pid = pid; | 1444 | p->signal->leader_pid = pid; |
1445 | p->signal->tty = tty_kref_get(current->signal->tty); | 1445 | p->signal->tty = tty_kref_get(current->signal->tty); |
@@ -1473,8 +1473,6 @@ bad_fork_cleanup_io: | |||
1473 | if (p->io_context) | 1473 | if (p->io_context) |
1474 | exit_io_context(p); | 1474 | exit_io_context(p); |
1475 | bad_fork_cleanup_namespaces: | 1475 | bad_fork_cleanup_namespaces: |
1476 | if (unlikely(clone_flags & CLONE_NEWPID)) | ||
1477 | pid_ns_release_proc(p->nsproxy->pid_ns); | ||
1478 | exit_task_namespaces(p); | 1476 | exit_task_namespaces(p); |
1479 | bad_fork_cleanup_mm: | 1477 | bad_fork_cleanup_mm: |
1480 | if (p->mm) | 1478 | if (p->mm) |
@@ -1554,15 +1552,9 @@ long do_fork(unsigned long clone_flags, | |||
1554 | * Do some preliminary argument and permissions checking before we | 1552 | * Do some preliminary argument and permissions checking before we |
1555 | * actually start allocating stuff | 1553 | * actually start allocating stuff |
1556 | */ | 1554 | */ |
1557 | if (clone_flags & CLONE_NEWUSER) { | 1555 | if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) { |
1558 | if (clone_flags & CLONE_THREAD) | 1556 | if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) |
1559 | return -EINVAL; | 1557 | return -EINVAL; |
1560 | /* hopefully this check will go away when userns support is | ||
1561 | * complete | ||
1562 | */ | ||
1563 | if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || | ||
1564 | !capable(CAP_SETGID)) | ||
1565 | return -EPERM; | ||
1566 | } | 1558 | } |
1567 | 1559 | ||
1568 | /* | 1560 | /* |
@@ -1724,7 +1716,8 @@ static int check_unshare_flags(unsigned long unshare_flags) | |||
1724 | { | 1716 | { |
1725 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1717 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1726 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1718 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1727 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | 1719 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| |
1720 | CLONE_NEWUSER|CLONE_NEWPID)) | ||
1728 | return -EINVAL; | 1721 | return -EINVAL; |
1729 | /* | 1722 | /* |
1730 | * Not implemented, but pretend it works if there is nothing to | 1723 | * Not implemented, but pretend it works if there is nothing to |
@@ -1791,19 +1784,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1791 | { | 1784 | { |
1792 | struct fs_struct *fs, *new_fs = NULL; | 1785 | struct fs_struct *fs, *new_fs = NULL; |
1793 | struct files_struct *fd, *new_fd = NULL; | 1786 | struct files_struct *fd, *new_fd = NULL; |
1787 | struct cred *new_cred = NULL; | ||
1794 | struct nsproxy *new_nsproxy = NULL; | 1788 | struct nsproxy *new_nsproxy = NULL; |
1795 | int do_sysvsem = 0; | 1789 | int do_sysvsem = 0; |
1796 | int err; | 1790 | int err; |
1797 | 1791 | ||
1798 | err = check_unshare_flags(unshare_flags); | 1792 | /* |
1799 | if (err) | 1793 | * If unsharing a user namespace must also unshare the thread. |
1800 | goto bad_unshare_out; | 1794 | */ |
1801 | 1795 | if (unshare_flags & CLONE_NEWUSER) | |
1796 | unshare_flags |= CLONE_THREAD; | ||
1797 | /* | ||
1798 | * If unsharing a pid namespace must also unshare the thread. | ||
1799 | */ | ||
1800 | if (unshare_flags & CLONE_NEWPID) | ||
1801 | unshare_flags |= CLONE_THREAD; | ||
1802 | /* | ||
1803 | * If unsharing a thread from a thread group, must also unshare vm. | ||
1804 | */ | ||
1805 | if (unshare_flags & CLONE_THREAD) | ||
1806 | unshare_flags |= CLONE_VM; | ||
1807 | /* | ||
1808 | * If unsharing vm, must also unshare signal handlers. | ||
1809 | */ | ||
1810 | if (unshare_flags & CLONE_VM) | ||
1811 | unshare_flags |= CLONE_SIGHAND; | ||
1802 | /* | 1812 | /* |
1803 | * If unsharing namespace, must also unshare filesystem information. | 1813 | * If unsharing namespace, must also unshare filesystem information. |
1804 | */ | 1814 | */ |
1805 | if (unshare_flags & CLONE_NEWNS) | 1815 | if (unshare_flags & CLONE_NEWNS) |
1806 | unshare_flags |= CLONE_FS; | 1816 | unshare_flags |= CLONE_FS; |
1817 | |||
1818 | err = check_unshare_flags(unshare_flags); | ||
1819 | if (err) | ||
1820 | goto bad_unshare_out; | ||
1807 | /* | 1821 | /* |
1808 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1822 | * CLONE_NEWIPC must also detach from the undolist: after switching |
1809 | * to a new ipc namespace, the semaphore arrays from the old | 1823 | * to a new ipc namespace, the semaphore arrays from the old |
@@ -1817,11 +1831,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1817 | err = unshare_fd(unshare_flags, &new_fd); | 1831 | err = unshare_fd(unshare_flags, &new_fd); |
1818 | if (err) | 1832 | if (err) |
1819 | goto bad_unshare_cleanup_fs; | 1833 | goto bad_unshare_cleanup_fs; |
1820 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); | 1834 | err = unshare_userns(unshare_flags, &new_cred); |
1821 | if (err) | 1835 | if (err) |
1822 | goto bad_unshare_cleanup_fd; | 1836 | goto bad_unshare_cleanup_fd; |
1837 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | ||
1838 | new_cred, new_fs); | ||
1839 | if (err) | ||
1840 | goto bad_unshare_cleanup_cred; | ||
1823 | 1841 | ||
1824 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { | 1842 | if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { |
1825 | if (do_sysvsem) { | 1843 | if (do_sysvsem) { |
1826 | /* | 1844 | /* |
1827 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1845 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
@@ -1854,11 +1872,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1854 | } | 1872 | } |
1855 | 1873 | ||
1856 | task_unlock(current); | 1874 | task_unlock(current); |
1875 | |||
1876 | if (new_cred) { | ||
1877 | /* Install the new user namespace */ | ||
1878 | commit_creds(new_cred); | ||
1879 | new_cred = NULL; | ||
1880 | } | ||
1857 | } | 1881 | } |
1858 | 1882 | ||
1859 | if (new_nsproxy) | 1883 | if (new_nsproxy) |
1860 | put_nsproxy(new_nsproxy); | 1884 | put_nsproxy(new_nsproxy); |
1861 | 1885 | ||
1886 | bad_unshare_cleanup_cred: | ||
1887 | if (new_cred) | ||
1888 | put_cred(new_cred); | ||
1862 | bad_unshare_cleanup_fd: | 1889 | bad_unshare_cleanup_fd: |
1863 | if (new_fd) | 1890 | if (new_fd) |
1864 | put_files_struct(new_fd); | 1891 | put_files_struct(new_fd); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 35c70c9e24d8..e49a288fa479 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -818,7 +818,7 @@ static void irq_thread_dtor(struct callback_head *unused) | |||
818 | action = kthread_data(tsk); | 818 | action = kthread_data(tsk); |
819 | 819 | ||
820 | pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | 820 | pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", |
821 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); | 821 | tsk->comm, tsk->pid, action->irq); |
822 | 822 | ||
823 | 823 | ||
824 | desc = irq_to_desc(action->irq); | 824 | desc = irq_to_desc(action->irq); |
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S new file mode 100644 index 000000000000..246b4c6e6135 --- /dev/null +++ b/kernel/modsign_certificate.S | |||
@@ -0,0 +1,19 @@ | |||
1 | /* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */ | ||
2 | #ifndef SYMBOL_PREFIX | ||
3 | #define ASM_SYMBOL(sym) sym | ||
4 | #else | ||
5 | #define PASTE2(x,y) x##y | ||
6 | #define PASTE(x,y) PASTE2(x,y) | ||
7 | #define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) | ||
8 | #endif | ||
9 | |||
10 | #define GLOBAL(name) \ | ||
11 | .globl ASM_SYMBOL(name); \ | ||
12 | ASM_SYMBOL(name): | ||
13 | |||
14 | .section ".init.data","aw" | ||
15 | |||
16 | GLOBAL(modsign_certificate_list) | ||
17 | .incbin "signing_key.x509" | ||
18 | .incbin "extra_certificates" | ||
19 | GLOBAL(modsign_certificate_list_end) | ||
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 767e559dfb10..045504fffbb2 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c | |||
@@ -20,12 +20,6 @@ struct key *modsign_keyring; | |||
20 | 20 | ||
21 | extern __initdata const u8 modsign_certificate_list[]; | 21 | extern __initdata const u8 modsign_certificate_list[]; |
22 | extern __initdata const u8 modsign_certificate_list_end[]; | 22 | extern __initdata const u8 modsign_certificate_list_end[]; |
23 | asm(".section .init.data,\"aw\"\n" | ||
24 | SYMBOL_PREFIX "modsign_certificate_list:\n" | ||
25 | ".incbin \"signing_key.x509\"\n" | ||
26 | ".incbin \"extra_certificates\"\n" | ||
27 | SYMBOL_PREFIX "modsign_certificate_list_end:" | ||
28 | ); | ||
29 | 23 | ||
30 | /* | 24 | /* |
31 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice | 25 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice |
diff --git a/kernel/module.c b/kernel/module.c index 6e48c3a43599..250092c1d57d 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/ftrace_event.h> | 21 | #include <linux/ftrace_event.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/kallsyms.h> | 23 | #include <linux/kallsyms.h> |
24 | #include <linux/file.h> | ||
24 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
25 | #include <linux/sysfs.h> | 26 | #include <linux/sysfs.h> |
26 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
@@ -28,6 +29,7 @@ | |||
28 | #include <linux/vmalloc.h> | 29 | #include <linux/vmalloc.h> |
29 | #include <linux/elf.h> | 30 | #include <linux/elf.h> |
30 | #include <linux/proc_fs.h> | 31 | #include <linux/proc_fs.h> |
32 | #include <linux/security.h> | ||
31 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
32 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
33 | #include <linux/fcntl.h> | 35 | #include <linux/fcntl.h> |
@@ -59,6 +61,7 @@ | |||
59 | #include <linux/pfn.h> | 61 | #include <linux/pfn.h> |
60 | #include <linux/bsearch.h> | 62 | #include <linux/bsearch.h> |
61 | #include <linux/fips.h> | 63 | #include <linux/fips.h> |
64 | #include <uapi/linux/module.h> | ||
62 | #include "module-internal.h" | 65 | #include "module-internal.h" |
63 | 66 | ||
64 | #define CREATE_TRACE_POINTS | 67 | #define CREATE_TRACE_POINTS |
@@ -372,9 +375,6 @@ static bool check_symbol(const struct symsearch *syms, | |||
372 | printk(KERN_WARNING "Symbol %s is being used " | 375 | printk(KERN_WARNING "Symbol %s is being used " |
373 | "by a non-GPL module, which will not " | 376 | "by a non-GPL module, which will not " |
374 | "be allowed in the future\n", fsa->name); | 377 | "be allowed in the future\n", fsa->name); |
375 | printk(KERN_WARNING "Please see the file " | ||
376 | "Documentation/feature-removal-schedule.txt " | ||
377 | "in the kernel source tree for more details.\n"); | ||
378 | } | 378 | } |
379 | } | 379 | } |
380 | 380 | ||
@@ -2282,7 +2282,7 @@ static void layout_symtab(struct module *mod, struct load_info *info) | |||
2282 | Elf_Shdr *symsect = info->sechdrs + info->index.sym; | 2282 | Elf_Shdr *symsect = info->sechdrs + info->index.sym; |
2283 | Elf_Shdr *strsect = info->sechdrs + info->index.str; | 2283 | Elf_Shdr *strsect = info->sechdrs + info->index.str; |
2284 | const Elf_Sym *src; | 2284 | const Elf_Sym *src; |
2285 | unsigned int i, nsrc, ndst, strtab_size; | 2285 | unsigned int i, nsrc, ndst, strtab_size = 0; |
2286 | 2286 | ||
2287 | /* Put symbol section at end of init part of module. */ | 2287 | /* Put symbol section at end of init part of module. */ |
2288 | symsect->sh_flags |= SHF_ALLOC; | 2288 | symsect->sh_flags |= SHF_ALLOC; |
@@ -2293,9 +2293,6 @@ static void layout_symtab(struct module *mod, struct load_info *info) | |||
2293 | src = (void *)info->hdr + symsect->sh_offset; | 2293 | src = (void *)info->hdr + symsect->sh_offset; |
2294 | nsrc = symsect->sh_size / sizeof(*src); | 2294 | nsrc = symsect->sh_size / sizeof(*src); |
2295 | 2295 | ||
2296 | /* strtab always starts with a nul, so offset 0 is the empty string. */ | ||
2297 | strtab_size = 1; | ||
2298 | |||
2299 | /* Compute total space required for the core symbols' strtab. */ | 2296 | /* Compute total space required for the core symbols' strtab. */ |
2300 | for (ndst = i = 0; i < nsrc; i++) { | 2297 | for (ndst = i = 0; i < nsrc; i++) { |
2301 | if (i == 0 || | 2298 | if (i == 0 || |
@@ -2337,7 +2334,6 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) | |||
2337 | mod->core_symtab = dst = mod->module_core + info->symoffs; | 2334 | mod->core_symtab = dst = mod->module_core + info->symoffs; |
2338 | mod->core_strtab = s = mod->module_core + info->stroffs; | 2335 | mod->core_strtab = s = mod->module_core + info->stroffs; |
2339 | src = mod->symtab; | 2336 | src = mod->symtab; |
2340 | *s++ = 0; | ||
2341 | for (ndst = i = 0; i < mod->num_symtab; i++) { | 2337 | for (ndst = i = 0; i < mod->num_symtab; i++) { |
2342 | if (i == 0 || | 2338 | if (i == 0 || |
2343 | is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { | 2339 | is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { |
@@ -2378,7 +2374,7 @@ static void dynamic_debug_remove(struct _ddebug *debug) | |||
2378 | 2374 | ||
2379 | void * __weak module_alloc(unsigned long size) | 2375 | void * __weak module_alloc(unsigned long size) |
2380 | { | 2376 | { |
2381 | return size == 0 ? NULL : vmalloc_exec(size); | 2377 | return vmalloc_exec(size); |
2382 | } | 2378 | } |
2383 | 2379 | ||
2384 | static void *module_alloc_update_bounds(unsigned long size) | 2380 | static void *module_alloc_update_bounds(unsigned long size) |
@@ -2425,18 +2421,17 @@ static inline void kmemleak_load_module(const struct module *mod, | |||
2425 | #endif | 2421 | #endif |
2426 | 2422 | ||
2427 | #ifdef CONFIG_MODULE_SIG | 2423 | #ifdef CONFIG_MODULE_SIG |
2428 | static int module_sig_check(struct load_info *info, | 2424 | static int module_sig_check(struct load_info *info) |
2429 | const void *mod, unsigned long *_len) | ||
2430 | { | 2425 | { |
2431 | int err = -ENOKEY; | 2426 | int err = -ENOKEY; |
2432 | unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; | 2427 | const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; |
2433 | unsigned long len = *_len; | 2428 | const void *mod = info->hdr; |
2434 | 2429 | ||
2435 | if (len > markerlen && | 2430 | if (info->len > markerlen && |
2436 | memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { | 2431 | memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { |
2437 | /* We truncate the module to discard the signature */ | 2432 | /* We truncate the module to discard the signature */ |
2438 | *_len -= markerlen; | 2433 | info->len -= markerlen; |
2439 | err = mod_verify_sig(mod, _len); | 2434 | err = mod_verify_sig(mod, &info->len); |
2440 | } | 2435 | } |
2441 | 2436 | ||
2442 | if (!err) { | 2437 | if (!err) { |
@@ -2454,59 +2449,107 @@ static int module_sig_check(struct load_info *info, | |||
2454 | return err; | 2449 | return err; |
2455 | } | 2450 | } |
2456 | #else /* !CONFIG_MODULE_SIG */ | 2451 | #else /* !CONFIG_MODULE_SIG */ |
2457 | static int module_sig_check(struct load_info *info, | 2452 | static int module_sig_check(struct load_info *info) |
2458 | void *mod, unsigned long *len) | ||
2459 | { | 2453 | { |
2460 | return 0; | 2454 | return 0; |
2461 | } | 2455 | } |
2462 | #endif /* !CONFIG_MODULE_SIG */ | 2456 | #endif /* !CONFIG_MODULE_SIG */ |
2463 | 2457 | ||
2464 | /* Sets info->hdr, info->len and info->sig_ok. */ | 2458 | /* Sanity checks against invalid binaries, wrong arch, weird elf version. */ |
2465 | static int copy_and_check(struct load_info *info, | 2459 | static int elf_header_check(struct load_info *info) |
2466 | const void __user *umod, unsigned long len, | 2460 | { |
2467 | const char __user *uargs) | 2461 | if (info->len < sizeof(*(info->hdr))) |
2462 | return -ENOEXEC; | ||
2463 | |||
2464 | if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0 | ||
2465 | || info->hdr->e_type != ET_REL | ||
2466 | || !elf_check_arch(info->hdr) | ||
2467 | || info->hdr->e_shentsize != sizeof(Elf_Shdr)) | ||
2468 | return -ENOEXEC; | ||
2469 | |||
2470 | if (info->hdr->e_shoff >= info->len | ||
2471 | || (info->hdr->e_shnum * sizeof(Elf_Shdr) > | ||
2472 | info->len - info->hdr->e_shoff)) | ||
2473 | return -ENOEXEC; | ||
2474 | |||
2475 | return 0; | ||
2476 | } | ||
2477 | |||
2478 | /* Sets info->hdr and info->len. */ | ||
2479 | static int copy_module_from_user(const void __user *umod, unsigned long len, | ||
2480 | struct load_info *info) | ||
2468 | { | 2481 | { |
2469 | int err; | 2482 | int err; |
2470 | Elf_Ehdr *hdr; | ||
2471 | 2483 | ||
2472 | if (len < sizeof(*hdr)) | 2484 | info->len = len; |
2485 | if (info->len < sizeof(*(info->hdr))) | ||
2473 | return -ENOEXEC; | 2486 | return -ENOEXEC; |
2474 | 2487 | ||
2488 | err = security_kernel_module_from_file(NULL); | ||
2489 | if (err) | ||
2490 | return err; | ||
2491 | |||
2475 | /* Suck in entire file: we'll want most of it. */ | 2492 | /* Suck in entire file: we'll want most of it. */ |
2476 | if ((hdr = vmalloc(len)) == NULL) | 2493 | info->hdr = vmalloc(info->len); |
2494 | if (!info->hdr) | ||
2477 | return -ENOMEM; | 2495 | return -ENOMEM; |
2478 | 2496 | ||
2479 | if (copy_from_user(hdr, umod, len) != 0) { | 2497 | if (copy_from_user(info->hdr, umod, info->len) != 0) { |
2480 | err = -EFAULT; | 2498 | vfree(info->hdr); |
2481 | goto free_hdr; | 2499 | return -EFAULT; |
2482 | } | 2500 | } |
2483 | 2501 | ||
2484 | err = module_sig_check(info, hdr, &len); | 2502 | return 0; |
2503 | } | ||
2504 | |||
2505 | /* Sets info->hdr and info->len. */ | ||
2506 | static int copy_module_from_fd(int fd, struct load_info *info) | ||
2507 | { | ||
2508 | struct file *file; | ||
2509 | int err; | ||
2510 | struct kstat stat; | ||
2511 | loff_t pos; | ||
2512 | ssize_t bytes = 0; | ||
2513 | |||
2514 | file = fget(fd); | ||
2515 | if (!file) | ||
2516 | return -ENOEXEC; | ||
2517 | |||
2518 | err = security_kernel_module_from_file(file); | ||
2485 | if (err) | 2519 | if (err) |
2486 | goto free_hdr; | 2520 | goto out; |
2487 | 2521 | ||
2488 | /* Sanity checks against insmoding binaries or wrong arch, | 2522 | err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat); |
2489 | weird elf version */ | 2523 | if (err) |
2490 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 | 2524 | goto out; |
2491 | || hdr->e_type != ET_REL | ||
2492 | || !elf_check_arch(hdr) | ||
2493 | || hdr->e_shentsize != sizeof(Elf_Shdr)) { | ||
2494 | err = -ENOEXEC; | ||
2495 | goto free_hdr; | ||
2496 | } | ||
2497 | 2525 | ||
2498 | if (hdr->e_shoff >= len || | 2526 | if (stat.size > INT_MAX) { |
2499 | hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) { | 2527 | err = -EFBIG; |
2500 | err = -ENOEXEC; | 2528 | goto out; |
2501 | goto free_hdr; | 2529 | } |
2530 | info->hdr = vmalloc(stat.size); | ||
2531 | if (!info->hdr) { | ||
2532 | err = -ENOMEM; | ||
2533 | goto out; | ||
2502 | } | 2534 | } |
2503 | 2535 | ||
2504 | info->hdr = hdr; | 2536 | pos = 0; |
2505 | info->len = len; | 2537 | while (pos < stat.size) { |
2506 | return 0; | 2538 | bytes = kernel_read(file, pos, (char *)(info->hdr) + pos, |
2539 | stat.size - pos); | ||
2540 | if (bytes < 0) { | ||
2541 | vfree(info->hdr); | ||
2542 | err = bytes; | ||
2543 | goto out; | ||
2544 | } | ||
2545 | if (bytes == 0) | ||
2546 | break; | ||
2547 | pos += bytes; | ||
2548 | } | ||
2549 | info->len = pos; | ||
2507 | 2550 | ||
2508 | free_hdr: | 2551 | out: |
2509 | vfree(hdr); | 2552 | fput(file); |
2510 | return err; | 2553 | return err; |
2511 | } | 2554 | } |
2512 | 2555 | ||
@@ -2515,7 +2558,7 @@ static void free_copy(struct load_info *info) | |||
2515 | vfree(info->hdr); | 2558 | vfree(info->hdr); |
2516 | } | 2559 | } |
2517 | 2560 | ||
2518 | static int rewrite_section_headers(struct load_info *info) | 2561 | static int rewrite_section_headers(struct load_info *info, int flags) |
2519 | { | 2562 | { |
2520 | unsigned int i; | 2563 | unsigned int i; |
2521 | 2564 | ||
@@ -2543,7 +2586,10 @@ static int rewrite_section_headers(struct load_info *info) | |||
2543 | } | 2586 | } |
2544 | 2587 | ||
2545 | /* Track but don't keep modinfo and version sections. */ | 2588 | /* Track but don't keep modinfo and version sections. */ |
2546 | info->index.vers = find_sec(info, "__versions"); | 2589 | if (flags & MODULE_INIT_IGNORE_MODVERSIONS) |
2590 | info->index.vers = 0; /* Pretend no __versions section! */ | ||
2591 | else | ||
2592 | info->index.vers = find_sec(info, "__versions"); | ||
2547 | info->index.info = find_sec(info, ".modinfo"); | 2593 | info->index.info = find_sec(info, ".modinfo"); |
2548 | info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2594 | info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; |
2549 | info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; | 2595 | info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; |
@@ -2558,7 +2604,7 @@ static int rewrite_section_headers(struct load_info *info) | |||
2558 | * Return the temporary module pointer (we'll replace it with the final | 2604 | * Return the temporary module pointer (we'll replace it with the final |
2559 | * one when we move the module sections around). | 2605 | * one when we move the module sections around). |
2560 | */ | 2606 | */ |
2561 | static struct module *setup_load_info(struct load_info *info) | 2607 | static struct module *setup_load_info(struct load_info *info, int flags) |
2562 | { | 2608 | { |
2563 | unsigned int i; | 2609 | unsigned int i; |
2564 | int err; | 2610 | int err; |
@@ -2569,7 +2615,7 @@ static struct module *setup_load_info(struct load_info *info) | |||
2569 | info->secstrings = (void *)info->hdr | 2615 | info->secstrings = (void *)info->hdr |
2570 | + info->sechdrs[info->hdr->e_shstrndx].sh_offset; | 2616 | + info->sechdrs[info->hdr->e_shstrndx].sh_offset; |
2571 | 2617 | ||
2572 | err = rewrite_section_headers(info); | 2618 | err = rewrite_section_headers(info, flags); |
2573 | if (err) | 2619 | if (err) |
2574 | return ERR_PTR(err); | 2620 | return ERR_PTR(err); |
2575 | 2621 | ||
@@ -2607,11 +2653,14 @@ static struct module *setup_load_info(struct load_info *info) | |||
2607 | return mod; | 2653 | return mod; |
2608 | } | 2654 | } |
2609 | 2655 | ||
2610 | static int check_modinfo(struct module *mod, struct load_info *info) | 2656 | static int check_modinfo(struct module *mod, struct load_info *info, int flags) |
2611 | { | 2657 | { |
2612 | const char *modmagic = get_modinfo(info, "vermagic"); | 2658 | const char *modmagic = get_modinfo(info, "vermagic"); |
2613 | int err; | 2659 | int err; |
2614 | 2660 | ||
2661 | if (flags & MODULE_INIT_IGNORE_VERMAGIC) | ||
2662 | modmagic = NULL; | ||
2663 | |||
2615 | /* This is allowed: modprobe --force will invalidate it. */ | 2664 | /* This is allowed: modprobe --force will invalidate it. */ |
2616 | if (!modmagic) { | 2665 | if (!modmagic) { |
2617 | err = try_to_force_load(mod, "bad vermagic"); | 2666 | err = try_to_force_load(mod, "bad vermagic"); |
@@ -2741,20 +2790,23 @@ static int move_module(struct module *mod, struct load_info *info) | |||
2741 | memset(ptr, 0, mod->core_size); | 2790 | memset(ptr, 0, mod->core_size); |
2742 | mod->module_core = ptr; | 2791 | mod->module_core = ptr; |
2743 | 2792 | ||
2744 | ptr = module_alloc_update_bounds(mod->init_size); | 2793 | if (mod->init_size) { |
2745 | /* | 2794 | ptr = module_alloc_update_bounds(mod->init_size); |
2746 | * The pointer to this block is stored in the module structure | 2795 | /* |
2747 | * which is inside the block. This block doesn't need to be | 2796 | * The pointer to this block is stored in the module structure |
2748 | * scanned as it contains data and code that will be freed | 2797 | * which is inside the block. This block doesn't need to be |
2749 | * after the module is initialized. | 2798 | * scanned as it contains data and code that will be freed |
2750 | */ | 2799 | * after the module is initialized. |
2751 | kmemleak_ignore(ptr); | 2800 | */ |
2752 | if (!ptr && mod->init_size) { | 2801 | kmemleak_ignore(ptr); |
2753 | module_free(mod, mod->module_core); | 2802 | if (!ptr) { |
2754 | return -ENOMEM; | 2803 | module_free(mod, mod->module_core); |
2755 | } | 2804 | return -ENOMEM; |
2756 | memset(ptr, 0, mod->init_size); | 2805 | } |
2757 | mod->module_init = ptr; | 2806 | memset(ptr, 0, mod->init_size); |
2807 | mod->module_init = ptr; | ||
2808 | } else | ||
2809 | mod->module_init = NULL; | ||
2758 | 2810 | ||
2759 | /* Transfer each section which specifies SHF_ALLOC */ | 2811 | /* Transfer each section which specifies SHF_ALLOC */ |
2760 | pr_debug("final section addresses:\n"); | 2812 | pr_debug("final section addresses:\n"); |
@@ -2847,18 +2899,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr, | |||
2847 | return 0; | 2899 | return 0; |
2848 | } | 2900 | } |
2849 | 2901 | ||
2850 | static struct module *layout_and_allocate(struct load_info *info) | 2902 | static struct module *layout_and_allocate(struct load_info *info, int flags) |
2851 | { | 2903 | { |
2852 | /* Module within temporary copy. */ | 2904 | /* Module within temporary copy. */ |
2853 | struct module *mod; | 2905 | struct module *mod; |
2854 | Elf_Shdr *pcpusec; | 2906 | Elf_Shdr *pcpusec; |
2855 | int err; | 2907 | int err; |
2856 | 2908 | ||
2857 | mod = setup_load_info(info); | 2909 | mod = setup_load_info(info, flags); |
2858 | if (IS_ERR(mod)) | 2910 | if (IS_ERR(mod)) |
2859 | return mod; | 2911 | return mod; |
2860 | 2912 | ||
2861 | err = check_modinfo(mod, info); | 2913 | err = check_modinfo(mod, info, flags); |
2862 | if (err) | 2914 | if (err) |
2863 | return ERR_PTR(err); | 2915 | return ERR_PTR(err); |
2864 | 2916 | ||
@@ -2945,33 +2997,124 @@ static bool finished_loading(const char *name) | |||
2945 | return ret; | 2997 | return ret; |
2946 | } | 2998 | } |
2947 | 2999 | ||
3000 | /* Call module constructors. */ | ||
3001 | static void do_mod_ctors(struct module *mod) | ||
3002 | { | ||
3003 | #ifdef CONFIG_CONSTRUCTORS | ||
3004 | unsigned long i; | ||
3005 | |||
3006 | for (i = 0; i < mod->num_ctors; i++) | ||
3007 | mod->ctors[i](); | ||
3008 | #endif | ||
3009 | } | ||
3010 | |||
3011 | /* This is where the real work happens */ | ||
3012 | static int do_init_module(struct module *mod) | ||
3013 | { | ||
3014 | int ret = 0; | ||
3015 | |||
3016 | blocking_notifier_call_chain(&module_notify_list, | ||
3017 | MODULE_STATE_COMING, mod); | ||
3018 | |||
3019 | /* Set RO and NX regions for core */ | ||
3020 | set_section_ro_nx(mod->module_core, | ||
3021 | mod->core_text_size, | ||
3022 | mod->core_ro_size, | ||
3023 | mod->core_size); | ||
3024 | |||
3025 | /* Set RO and NX regions for init */ | ||
3026 | set_section_ro_nx(mod->module_init, | ||
3027 | mod->init_text_size, | ||
3028 | mod->init_ro_size, | ||
3029 | mod->init_size); | ||
3030 | |||
3031 | do_mod_ctors(mod); | ||
3032 | /* Start the module */ | ||
3033 | if (mod->init != NULL) | ||
3034 | ret = do_one_initcall(mod->init); | ||
3035 | if (ret < 0) { | ||
3036 | /* Init routine failed: abort. Try to protect us from | ||
3037 | buggy refcounters. */ | ||
3038 | mod->state = MODULE_STATE_GOING; | ||
3039 | synchronize_sched(); | ||
3040 | module_put(mod); | ||
3041 | blocking_notifier_call_chain(&module_notify_list, | ||
3042 | MODULE_STATE_GOING, mod); | ||
3043 | free_module(mod); | ||
3044 | wake_up_all(&module_wq); | ||
3045 | return ret; | ||
3046 | } | ||
3047 | if (ret > 0) { | ||
3048 | printk(KERN_WARNING | ||
3049 | "%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" | ||
3050 | "%s: loading module anyway...\n", | ||
3051 | __func__, mod->name, ret, | ||
3052 | __func__); | ||
3053 | dump_stack(); | ||
3054 | } | ||
3055 | |||
3056 | /* Now it's a first class citizen! */ | ||
3057 | mod->state = MODULE_STATE_LIVE; | ||
3058 | blocking_notifier_call_chain(&module_notify_list, | ||
3059 | MODULE_STATE_LIVE, mod); | ||
3060 | |||
3061 | /* We need to finish all async code before the module init sequence is done */ | ||
3062 | async_synchronize_full(); | ||
3063 | |||
3064 | mutex_lock(&module_mutex); | ||
3065 | /* Drop initial reference. */ | ||
3066 | module_put(mod); | ||
3067 | trim_init_extable(mod); | ||
3068 | #ifdef CONFIG_KALLSYMS | ||
3069 | mod->num_symtab = mod->core_num_syms; | ||
3070 | mod->symtab = mod->core_symtab; | ||
3071 | mod->strtab = mod->core_strtab; | ||
3072 | #endif | ||
3073 | unset_module_init_ro_nx(mod); | ||
3074 | module_free(mod, mod->module_init); | ||
3075 | mod->module_init = NULL; | ||
3076 | mod->init_size = 0; | ||
3077 | mod->init_ro_size = 0; | ||
3078 | mod->init_text_size = 0; | ||
3079 | mutex_unlock(&module_mutex); | ||
3080 | wake_up_all(&module_wq); | ||
3081 | |||
3082 | return 0; | ||
3083 | } | ||
3084 | |||
3085 | static int may_init_module(void) | ||
3086 | { | ||
3087 | if (!capable(CAP_SYS_MODULE) || modules_disabled) | ||
3088 | return -EPERM; | ||
3089 | |||
3090 | return 0; | ||
3091 | } | ||
3092 | |||
2948 | /* Allocate and load the module: note that size of section 0 is always | 3093 | /* Allocate and load the module: note that size of section 0 is always |
2949 | zero, and we rely on this for optional sections. */ | 3094 | zero, and we rely on this for optional sections. */ |
2950 | static struct module *load_module(void __user *umod, | 3095 | static int load_module(struct load_info *info, const char __user *uargs, |
2951 | unsigned long len, | 3096 | int flags) |
2952 | const char __user *uargs) | ||
2953 | { | 3097 | { |
2954 | struct load_info info = { NULL, }; | ||
2955 | struct module *mod, *old; | 3098 | struct module *mod, *old; |
2956 | long err; | 3099 | long err; |
2957 | 3100 | ||
2958 | pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", | 3101 | err = module_sig_check(info); |
2959 | umod, len, uargs); | 3102 | if (err) |
3103 | goto free_copy; | ||
2960 | 3104 | ||
2961 | /* Copy in the blobs from userspace, check they are vaguely sane. */ | 3105 | err = elf_header_check(info); |
2962 | err = copy_and_check(&info, umod, len, uargs); | ||
2963 | if (err) | 3106 | if (err) |
2964 | return ERR_PTR(err); | 3107 | goto free_copy; |
2965 | 3108 | ||
2966 | /* Figure out module layout, and allocate all the memory. */ | 3109 | /* Figure out module layout, and allocate all the memory. */ |
2967 | mod = layout_and_allocate(&info); | 3110 | mod = layout_and_allocate(info, flags); |
2968 | if (IS_ERR(mod)) { | 3111 | if (IS_ERR(mod)) { |
2969 | err = PTR_ERR(mod); | 3112 | err = PTR_ERR(mod); |
2970 | goto free_copy; | 3113 | goto free_copy; |
2971 | } | 3114 | } |
2972 | 3115 | ||
2973 | #ifdef CONFIG_MODULE_SIG | 3116 | #ifdef CONFIG_MODULE_SIG |
2974 | mod->sig_ok = info.sig_ok; | 3117 | mod->sig_ok = info->sig_ok; |
2975 | if (!mod->sig_ok) | 3118 | if (!mod->sig_ok) |
2976 | add_taint_module(mod, TAINT_FORCED_MODULE); | 3119 | add_taint_module(mod, TAINT_FORCED_MODULE); |
2977 | #endif | 3120 | #endif |
@@ -2983,25 +3126,25 @@ static struct module *load_module(void __user *umod, | |||
2983 | 3126 | ||
2984 | /* Now we've got everything in the final locations, we can | 3127 | /* Now we've got everything in the final locations, we can |
2985 | * find optional sections. */ | 3128 | * find optional sections. */ |
2986 | find_module_sections(mod, &info); | 3129 | find_module_sections(mod, info); |
2987 | 3130 | ||
2988 | err = check_module_license_and_versions(mod); | 3131 | err = check_module_license_and_versions(mod); |
2989 | if (err) | 3132 | if (err) |
2990 | goto free_unload; | 3133 | goto free_unload; |
2991 | 3134 | ||
2992 | /* Set up MODINFO_ATTR fields */ | 3135 | /* Set up MODINFO_ATTR fields */ |
2993 | setup_modinfo(mod, &info); | 3136 | setup_modinfo(mod, info); |
2994 | 3137 | ||
2995 | /* Fix up syms, so that st_value is a pointer to location. */ | 3138 | /* Fix up syms, so that st_value is a pointer to location. */ |
2996 | err = simplify_symbols(mod, &info); | 3139 | err = simplify_symbols(mod, info); |
2997 | if (err < 0) | 3140 | if (err < 0) |
2998 | goto free_modinfo; | 3141 | goto free_modinfo; |
2999 | 3142 | ||
3000 | err = apply_relocations(mod, &info); | 3143 | err = apply_relocations(mod, info); |
3001 | if (err < 0) | 3144 | if (err < 0) |
3002 | goto free_modinfo; | 3145 | goto free_modinfo; |
3003 | 3146 | ||
3004 | err = post_relocation(mod, &info); | 3147 | err = post_relocation(mod, info); |
3005 | if (err < 0) | 3148 | if (err < 0) |
3006 | goto free_modinfo; | 3149 | goto free_modinfo; |
3007 | 3150 | ||
@@ -3041,14 +3184,14 @@ again: | |||
3041 | } | 3184 | } |
3042 | 3185 | ||
3043 | /* This has to be done once we're sure module name is unique. */ | 3186 | /* This has to be done once we're sure module name is unique. */ |
3044 | dynamic_debug_setup(info.debug, info.num_debug); | 3187 | dynamic_debug_setup(info->debug, info->num_debug); |
3045 | 3188 | ||
3046 | /* Find duplicate symbols */ | 3189 | /* Find duplicate symbols */ |
3047 | err = verify_export_symbols(mod); | 3190 | err = verify_export_symbols(mod); |
3048 | if (err < 0) | 3191 | if (err < 0) |
3049 | goto ddebug; | 3192 | goto ddebug; |
3050 | 3193 | ||
3051 | module_bug_finalize(info.hdr, info.sechdrs, mod); | 3194 | module_bug_finalize(info->hdr, info->sechdrs, mod); |
3052 | list_add_rcu(&mod->list, &modules); | 3195 | list_add_rcu(&mod->list, &modules); |
3053 | mutex_unlock(&module_mutex); | 3196 | mutex_unlock(&module_mutex); |
3054 | 3197 | ||
@@ -3059,16 +3202,17 @@ again: | |||
3059 | goto unlink; | 3202 | goto unlink; |
3060 | 3203 | ||
3061 | /* Link in to syfs. */ | 3204 | /* Link in to syfs. */ |
3062 | err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); | 3205 | err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp); |
3063 | if (err < 0) | 3206 | if (err < 0) |
3064 | goto unlink; | 3207 | goto unlink; |
3065 | 3208 | ||
3066 | /* Get rid of temporary copy. */ | 3209 | /* Get rid of temporary copy. */ |
3067 | free_copy(&info); | 3210 | free_copy(info); |
3068 | 3211 | ||
3069 | /* Done! */ | 3212 | /* Done! */ |
3070 | trace_module_load(mod); | 3213 | trace_module_load(mod); |
3071 | return mod; | 3214 | |
3215 | return do_init_module(mod); | ||
3072 | 3216 | ||
3073 | unlink: | 3217 | unlink: |
3074 | mutex_lock(&module_mutex); | 3218 | mutex_lock(&module_mutex); |
@@ -3077,7 +3221,7 @@ again: | |||
3077 | module_bug_cleanup(mod); | 3221 | module_bug_cleanup(mod); |
3078 | wake_up_all(&module_wq); | 3222 | wake_up_all(&module_wq); |
3079 | ddebug: | 3223 | ddebug: |
3080 | dynamic_debug_remove(info.debug); | 3224 | dynamic_debug_remove(info->debug); |
3081 | unlock: | 3225 | unlock: |
3082 | mutex_unlock(&module_mutex); | 3226 | mutex_unlock(&module_mutex); |
3083 | synchronize_sched(); | 3227 | synchronize_sched(); |
@@ -3089,106 +3233,52 @@ again: | |||
3089 | free_unload: | 3233 | free_unload: |
3090 | module_unload_free(mod); | 3234 | module_unload_free(mod); |
3091 | free_module: | 3235 | free_module: |
3092 | module_deallocate(mod, &info); | 3236 | module_deallocate(mod, info); |
3093 | free_copy: | 3237 | free_copy: |
3094 | free_copy(&info); | 3238 | free_copy(info); |
3095 | return ERR_PTR(err); | 3239 | return err; |
3096 | } | ||
3097 | |||
3098 | /* Call module constructors. */ | ||
3099 | static void do_mod_ctors(struct module *mod) | ||
3100 | { | ||
3101 | #ifdef CONFIG_CONSTRUCTORS | ||
3102 | unsigned long i; | ||
3103 | |||
3104 | for (i = 0; i < mod->num_ctors; i++) | ||
3105 | mod->ctors[i](); | ||
3106 | #endif | ||
3107 | } | 3240 | } |
3108 | 3241 | ||
3109 | /* This is where the real work happens */ | ||
3110 | SYSCALL_DEFINE3(init_module, void __user *, umod, | 3242 | SYSCALL_DEFINE3(init_module, void __user *, umod, |
3111 | unsigned long, len, const char __user *, uargs) | 3243 | unsigned long, len, const char __user *, uargs) |
3112 | { | 3244 | { |
3113 | struct module *mod; | 3245 | int err; |
3114 | int ret = 0; | 3246 | struct load_info info = { }; |
3115 | 3247 | ||
3116 | /* Must have permission */ | 3248 | err = may_init_module(); |
3117 | if (!capable(CAP_SYS_MODULE) || modules_disabled) | 3249 | if (err) |
3118 | return -EPERM; | 3250 | return err; |
3119 | 3251 | ||
3120 | /* Do all the hard work */ | 3252 | pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n", |
3121 | mod = load_module(umod, len, uargs); | 3253 | umod, len, uargs); |
3122 | if (IS_ERR(mod)) | ||
3123 | return PTR_ERR(mod); | ||
3124 | 3254 | ||
3125 | blocking_notifier_call_chain(&module_notify_list, | 3255 | err = copy_module_from_user(umod, len, &info); |
3126 | MODULE_STATE_COMING, mod); | 3256 | if (err) |
3257 | return err; | ||
3127 | 3258 | ||
3128 | /* Set RO and NX regions for core */ | 3259 | return load_module(&info, uargs, 0); |
3129 | set_section_ro_nx(mod->module_core, | 3260 | } |
3130 | mod->core_text_size, | ||
3131 | mod->core_ro_size, | ||
3132 | mod->core_size); | ||
3133 | 3261 | ||
3134 | /* Set RO and NX regions for init */ | 3262 | SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) |
3135 | set_section_ro_nx(mod->module_init, | 3263 | { |
3136 | mod->init_text_size, | 3264 | int err; |
3137 | mod->init_ro_size, | 3265 | struct load_info info = { }; |
3138 | mod->init_size); | ||
3139 | 3266 | ||
3140 | do_mod_ctors(mod); | 3267 | err = may_init_module(); |
3141 | /* Start the module */ | 3268 | if (err) |
3142 | if (mod->init != NULL) | 3269 | return err; |
3143 | ret = do_one_initcall(mod->init); | ||
3144 | if (ret < 0) { | ||
3145 | /* Init routine failed: abort. Try to protect us from | ||
3146 | buggy refcounters. */ | ||
3147 | mod->state = MODULE_STATE_GOING; | ||
3148 | synchronize_sched(); | ||
3149 | module_put(mod); | ||
3150 | blocking_notifier_call_chain(&module_notify_list, | ||
3151 | MODULE_STATE_GOING, mod); | ||
3152 | free_module(mod); | ||
3153 | wake_up_all(&module_wq); | ||
3154 | return ret; | ||
3155 | } | ||
3156 | if (ret > 0) { | ||
3157 | printk(KERN_WARNING | ||
3158 | "%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n" | ||
3159 | "%s: loading module anyway...\n", | ||
3160 | __func__, mod->name, ret, | ||
3161 | __func__); | ||
3162 | dump_stack(); | ||
3163 | } | ||
3164 | 3270 | ||
3165 | /* Now it's a first class citizen! */ | 3271 | pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); |
3166 | mod->state = MODULE_STATE_LIVE; | ||
3167 | blocking_notifier_call_chain(&module_notify_list, | ||
3168 | MODULE_STATE_LIVE, mod); | ||
3169 | 3272 | ||
3170 | /* We need to finish all async code before the module init sequence is done */ | 3273 | if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS |
3171 | async_synchronize_full(); | 3274 | |MODULE_INIT_IGNORE_VERMAGIC)) |
3275 | return -EINVAL; | ||
3172 | 3276 | ||
3173 | mutex_lock(&module_mutex); | 3277 | err = copy_module_from_fd(fd, &info); |
3174 | /* Drop initial reference. */ | 3278 | if (err) |
3175 | module_put(mod); | 3279 | return err; |
3176 | trim_init_extable(mod); | ||
3177 | #ifdef CONFIG_KALLSYMS | ||
3178 | mod->num_symtab = mod->core_num_syms; | ||
3179 | mod->symtab = mod->core_symtab; | ||
3180 | mod->strtab = mod->core_strtab; | ||
3181 | #endif | ||
3182 | unset_module_init_ro_nx(mod); | ||
3183 | module_free(mod, mod->module_init); | ||
3184 | mod->module_init = NULL; | ||
3185 | mod->init_size = 0; | ||
3186 | mod->init_ro_size = 0; | ||
3187 | mod->init_text_size = 0; | ||
3188 | mutex_unlock(&module_mutex); | ||
3189 | wake_up_all(&module_wq); | ||
3190 | 3280 | ||
3191 | return 0; | 3281 | return load_module(&info, uargs, flags); |
3192 | } | 3282 | } |
3193 | 3283 | ||
3194 | static inline int within(unsigned long addr, void *start, unsigned long size) | 3284 | static inline int within(unsigned long addr, void *start, unsigned long size) |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 7e1c3de1ce45..78e2ecb20165 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void) | |||
57 | * leave it to the caller to do proper locking and attach it to task. | 57 | * leave it to the caller to do proper locking and attach it to task. |
58 | */ | 58 | */ |
59 | static struct nsproxy *create_new_namespaces(unsigned long flags, | 59 | static struct nsproxy *create_new_namespaces(unsigned long flags, |
60 | struct task_struct *tsk, struct fs_struct *new_fs) | 60 | struct task_struct *tsk, struct user_namespace *user_ns, |
61 | struct fs_struct *new_fs) | ||
61 | { | 62 | { |
62 | struct nsproxy *new_nsp; | 63 | struct nsproxy *new_nsp; |
63 | int err; | 64 | int err; |
@@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
66 | if (!new_nsp) | 67 | if (!new_nsp) |
67 | return ERR_PTR(-ENOMEM); | 68 | return ERR_PTR(-ENOMEM); |
68 | 69 | ||
69 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); | 70 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs); |
70 | if (IS_ERR(new_nsp->mnt_ns)) { | 71 | if (IS_ERR(new_nsp->mnt_ns)) { |
71 | err = PTR_ERR(new_nsp->mnt_ns); | 72 | err = PTR_ERR(new_nsp->mnt_ns); |
72 | goto out_ns; | 73 | goto out_ns; |
73 | } | 74 | } |
74 | 75 | ||
75 | new_nsp->uts_ns = copy_utsname(flags, tsk); | 76 | new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns); |
76 | if (IS_ERR(new_nsp->uts_ns)) { | 77 | if (IS_ERR(new_nsp->uts_ns)) { |
77 | err = PTR_ERR(new_nsp->uts_ns); | 78 | err = PTR_ERR(new_nsp->uts_ns); |
78 | goto out_uts; | 79 | goto out_uts; |
79 | } | 80 | } |
80 | 81 | ||
81 | new_nsp->ipc_ns = copy_ipcs(flags, tsk); | 82 | new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns); |
82 | if (IS_ERR(new_nsp->ipc_ns)) { | 83 | if (IS_ERR(new_nsp->ipc_ns)) { |
83 | err = PTR_ERR(new_nsp->ipc_ns); | 84 | err = PTR_ERR(new_nsp->ipc_ns); |
84 | goto out_ipc; | 85 | goto out_ipc; |
85 | } | 86 | } |
86 | 87 | ||
87 | new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); | 88 | new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns); |
88 | if (IS_ERR(new_nsp->pid_ns)) { | 89 | if (IS_ERR(new_nsp->pid_ns)) { |
89 | err = PTR_ERR(new_nsp->pid_ns); | 90 | err = PTR_ERR(new_nsp->pid_ns); |
90 | goto out_pid; | 91 | goto out_pid; |
91 | } | 92 | } |
92 | 93 | ||
93 | new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns); | 94 | new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns); |
94 | if (IS_ERR(new_nsp->net_ns)) { | 95 | if (IS_ERR(new_nsp->net_ns)) { |
95 | err = PTR_ERR(new_nsp->net_ns); | 96 | err = PTR_ERR(new_nsp->net_ns); |
96 | goto out_net; | 97 | goto out_net; |
@@ -122,6 +123,7 @@ out_ns: | |||
122 | int copy_namespaces(unsigned long flags, struct task_struct *tsk) | 123 | int copy_namespaces(unsigned long flags, struct task_struct *tsk) |
123 | { | 124 | { |
124 | struct nsproxy *old_ns = tsk->nsproxy; | 125 | struct nsproxy *old_ns = tsk->nsproxy; |
126 | struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); | ||
125 | struct nsproxy *new_ns; | 127 | struct nsproxy *new_ns; |
126 | int err = 0; | 128 | int err = 0; |
127 | 129 | ||
@@ -134,7 +136,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
134 | CLONE_NEWPID | CLONE_NEWNET))) | 136 | CLONE_NEWPID | CLONE_NEWNET))) |
135 | return 0; | 137 | return 0; |
136 | 138 | ||
137 | if (!capable(CAP_SYS_ADMIN)) { | 139 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) { |
138 | err = -EPERM; | 140 | err = -EPERM; |
139 | goto out; | 141 | goto out; |
140 | } | 142 | } |
@@ -151,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
151 | goto out; | 153 | goto out; |
152 | } | 154 | } |
153 | 155 | ||
154 | new_ns = create_new_namespaces(flags, tsk, tsk->fs); | 156 | new_ns = create_new_namespaces(flags, tsk, |
157 | task_cred_xxx(tsk, user_ns), tsk->fs); | ||
155 | if (IS_ERR(new_ns)) { | 158 | if (IS_ERR(new_ns)) { |
156 | err = PTR_ERR(new_ns); | 159 | err = PTR_ERR(new_ns); |
157 | goto out; | 160 | goto out; |
@@ -183,19 +186,21 @@ void free_nsproxy(struct nsproxy *ns) | |||
183 | * On success, returns the new nsproxy. | 186 | * On success, returns the new nsproxy. |
184 | */ | 187 | */ |
185 | int unshare_nsproxy_namespaces(unsigned long unshare_flags, | 188 | int unshare_nsproxy_namespaces(unsigned long unshare_flags, |
186 | struct nsproxy **new_nsp, struct fs_struct *new_fs) | 189 | struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs) |
187 | { | 190 | { |
191 | struct user_namespace *user_ns; | ||
188 | int err = 0; | 192 | int err = 0; |
189 | 193 | ||
190 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | | 194 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
191 | CLONE_NEWNET))) | 195 | CLONE_NEWNET | CLONE_NEWPID))) |
192 | return 0; | 196 | return 0; |
193 | 197 | ||
194 | if (!capable(CAP_SYS_ADMIN)) | 198 | user_ns = new_cred ? new_cred->user_ns : current_user_ns(); |
199 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | ||
195 | return -EPERM; | 200 | return -EPERM; |
196 | 201 | ||
197 | *new_nsp = create_new_namespaces(unshare_flags, current, | 202 | *new_nsp = create_new_namespaces(unshare_flags, current, user_ns, |
198 | new_fs ? new_fs : current->fs); | 203 | new_fs ? new_fs : current->fs); |
199 | if (IS_ERR(*new_nsp)) { | 204 | if (IS_ERR(*new_nsp)) { |
200 | err = PTR_ERR(*new_nsp); | 205 | err = PTR_ERR(*new_nsp); |
201 | goto out; | 206 | goto out; |
@@ -241,9 +246,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) | |||
241 | struct file *file; | 246 | struct file *file; |
242 | int err; | 247 | int err; |
243 | 248 | ||
244 | if (!capable(CAP_SYS_ADMIN)) | ||
245 | return -EPERM; | ||
246 | |||
247 | file = proc_ns_fget(fd); | 249 | file = proc_ns_fget(fd); |
248 | if (IS_ERR(file)) | 250 | if (IS_ERR(file)) |
249 | return PTR_ERR(file); | 251 | return PTR_ERR(file); |
@@ -254,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) | |||
254 | if (nstype && (ops->type != nstype)) | 256 | if (nstype && (ops->type != nstype)) |
255 | goto out; | 257 | goto out; |
256 | 258 | ||
257 | new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); | 259 | new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); |
258 | if (IS_ERR(new_nsproxy)) { | 260 | if (IS_ERR(new_nsproxy)) { |
259 | err = PTR_ERR(new_nsproxy); | 261 | err = PTR_ERR(new_nsproxy); |
260 | goto out; | 262 | goto out; |
diff --git a/kernel/pid.c b/kernel/pid.c index fd996c1ed9f8..36aa02ff17d6 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/pid_namespace.h> | 36 | #include <linux/pid_namespace.h> |
37 | #include <linux/init_task.h> | 37 | #include <linux/init_task.h> |
38 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
39 | #include <linux/proc_fs.h> | ||
39 | 40 | ||
40 | #define pid_hashfn(nr, ns) \ | 41 | #define pid_hashfn(nr, ns) \ |
41 | hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) | 42 | hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) |
@@ -78,24 +79,11 @@ struct pid_namespace init_pid_ns = { | |||
78 | .last_pid = 0, | 79 | .last_pid = 0, |
79 | .level = 0, | 80 | .level = 0, |
80 | .child_reaper = &init_task, | 81 | .child_reaper = &init_task, |
82 | .user_ns = &init_user_ns, | ||
83 | .proc_inum = PROC_PID_INIT_INO, | ||
81 | }; | 84 | }; |
82 | EXPORT_SYMBOL_GPL(init_pid_ns); | 85 | EXPORT_SYMBOL_GPL(init_pid_ns); |
83 | 86 | ||
84 | int is_container_init(struct task_struct *tsk) | ||
85 | { | ||
86 | int ret = 0; | ||
87 | struct pid *pid; | ||
88 | |||
89 | rcu_read_lock(); | ||
90 | pid = task_pid(tsk); | ||
91 | if (pid != NULL && pid->numbers[pid->level].nr == 1) | ||
92 | ret = 1; | ||
93 | rcu_read_unlock(); | ||
94 | |||
95 | return ret; | ||
96 | } | ||
97 | EXPORT_SYMBOL(is_container_init); | ||
98 | |||
99 | /* | 87 | /* |
100 | * Note: disable interrupts while the pidmap_lock is held as an | 88 | * Note: disable interrupts while the pidmap_lock is held as an |
101 | * interrupt might come in and do read_lock(&tasklist_lock). | 89 | * interrupt might come in and do read_lock(&tasklist_lock). |
@@ -269,8 +257,24 @@ void free_pid(struct pid *pid) | |||
269 | unsigned long flags; | 257 | unsigned long flags; |
270 | 258 | ||
271 | spin_lock_irqsave(&pidmap_lock, flags); | 259 | spin_lock_irqsave(&pidmap_lock, flags); |
272 | for (i = 0; i <= pid->level; i++) | 260 | for (i = 0; i <= pid->level; i++) { |
273 | hlist_del_rcu(&pid->numbers[i].pid_chain); | 261 | struct upid *upid = pid->numbers + i; |
262 | struct pid_namespace *ns = upid->ns; | ||
263 | hlist_del_rcu(&upid->pid_chain); | ||
264 | switch(--ns->nr_hashed) { | ||
265 | case 1: | ||
266 | /* When all that is left in the pid namespace | ||
267 | * is the reaper wake up the reaper. The reaper | ||
268 | * may be sleeping in zap_pid_ns_processes(). | ||
269 | */ | ||
270 | wake_up_process(ns->child_reaper); | ||
271 | break; | ||
272 | case 0: | ||
273 | ns->nr_hashed = -1; | ||
274 | schedule_work(&ns->proc_work); | ||
275 | break; | ||
276 | } | ||
277 | } | ||
274 | spin_unlock_irqrestore(&pidmap_lock, flags); | 278 | spin_unlock_irqrestore(&pidmap_lock, flags); |
275 | 279 | ||
276 | for (i = 0; i <= pid->level; i++) | 280 | for (i = 0; i <= pid->level; i++) |
@@ -292,6 +296,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) | |||
292 | goto out; | 296 | goto out; |
293 | 297 | ||
294 | tmp = ns; | 298 | tmp = ns; |
299 | pid->level = ns->level; | ||
295 | for (i = ns->level; i >= 0; i--) { | 300 | for (i = ns->level; i >= 0; i--) { |
296 | nr = alloc_pidmap(tmp); | 301 | nr = alloc_pidmap(tmp); |
297 | if (nr < 0) | 302 | if (nr < 0) |
@@ -302,22 +307,32 @@ struct pid *alloc_pid(struct pid_namespace *ns) | |||
302 | tmp = tmp->parent; | 307 | tmp = tmp->parent; |
303 | } | 308 | } |
304 | 309 | ||
310 | if (unlikely(is_child_reaper(pid))) { | ||
311 | if (pid_ns_prepare_proc(ns)) | ||
312 | goto out_free; | ||
313 | } | ||
314 | |||
305 | get_pid_ns(ns); | 315 | get_pid_ns(ns); |
306 | pid->level = ns->level; | ||
307 | atomic_set(&pid->count, 1); | 316 | atomic_set(&pid->count, 1); |
308 | for (type = 0; type < PIDTYPE_MAX; ++type) | 317 | for (type = 0; type < PIDTYPE_MAX; ++type) |
309 | INIT_HLIST_HEAD(&pid->tasks[type]); | 318 | INIT_HLIST_HEAD(&pid->tasks[type]); |
310 | 319 | ||
311 | upid = pid->numbers + ns->level; | 320 | upid = pid->numbers + ns->level; |
312 | spin_lock_irq(&pidmap_lock); | 321 | spin_lock_irq(&pidmap_lock); |
313 | for ( ; upid >= pid->numbers; --upid) | 322 | if (ns->nr_hashed < 0) |
323 | goto out_unlock; | ||
324 | for ( ; upid >= pid->numbers; --upid) { | ||
314 | hlist_add_head_rcu(&upid->pid_chain, | 325 | hlist_add_head_rcu(&upid->pid_chain, |
315 | &pid_hash[pid_hashfn(upid->nr, upid->ns)]); | 326 | &pid_hash[pid_hashfn(upid->nr, upid->ns)]); |
327 | upid->ns->nr_hashed++; | ||
328 | } | ||
316 | spin_unlock_irq(&pidmap_lock); | 329 | spin_unlock_irq(&pidmap_lock); |
317 | 330 | ||
318 | out: | 331 | out: |
319 | return pid; | 332 | return pid; |
320 | 333 | ||
334 | out_unlock: | ||
335 | spin_unlock(&pidmap_lock); | ||
321 | out_free: | 336 | out_free: |
322 | while (++i <= ns->level) | 337 | while (++i <= ns->level) |
323 | free_pidmap(pid->numbers + i); | 338 | free_pidmap(pid->numbers + i); |
@@ -344,7 +359,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns); | |||
344 | 359 | ||
345 | struct pid *find_vpid(int nr) | 360 | struct pid *find_vpid(int nr) |
346 | { | 361 | { |
347 | return find_pid_ns(nr, current->nsproxy->pid_ns); | 362 | return find_pid_ns(nr, task_active_pid_ns(current)); |
348 | } | 363 | } |
349 | EXPORT_SYMBOL_GPL(find_vpid); | 364 | EXPORT_SYMBOL_GPL(find_vpid); |
350 | 365 | ||
@@ -428,7 +443,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | |||
428 | 443 | ||
429 | struct task_struct *find_task_by_vpid(pid_t vnr) | 444 | struct task_struct *find_task_by_vpid(pid_t vnr) |
430 | { | 445 | { |
431 | return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); | 446 | return find_task_by_pid_ns(vnr, task_active_pid_ns(current)); |
432 | } | 447 | } |
433 | 448 | ||
434 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | 449 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) |
@@ -483,7 +498,7 @@ EXPORT_SYMBOL_GPL(pid_nr_ns); | |||
483 | 498 | ||
484 | pid_t pid_vnr(struct pid *pid) | 499 | pid_t pid_vnr(struct pid *pid) |
485 | { | 500 | { |
486 | return pid_nr_ns(pid, current->nsproxy->pid_ns); | 501 | return pid_nr_ns(pid, task_active_pid_ns(current)); |
487 | } | 502 | } |
488 | EXPORT_SYMBOL_GPL(pid_vnr); | 503 | EXPORT_SYMBOL_GPL(pid_vnr); |
489 | 504 | ||
@@ -494,7 +509,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, | |||
494 | 509 | ||
495 | rcu_read_lock(); | 510 | rcu_read_lock(); |
496 | if (!ns) | 511 | if (!ns) |
497 | ns = current->nsproxy->pid_ns; | 512 | ns = task_active_pid_ns(current); |
498 | if (likely(pid_alive(task))) { | 513 | if (likely(pid_alive(task))) { |
499 | if (type != PIDTYPE_PID) | 514 | if (type != PIDTYPE_PID) |
500 | task = task->group_leader; | 515 | task = task->group_leader; |
@@ -569,6 +584,7 @@ void __init pidmap_init(void) | |||
569 | /* Reserve PID 0. We never call free_pidmap(0) */ | 584 | /* Reserve PID 0. We never call free_pidmap(0) */ |
570 | set_bit(0, init_pid_ns.pidmap[0].page); | 585 | set_bit(0, init_pid_ns.pidmap[0].page); |
571 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 586 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
587 | init_pid_ns.nr_hashed = 1; | ||
572 | 588 | ||
573 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, | 589 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, |
574 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); | 590 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7b07cc0dfb75..fdbd0cdf271a 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/pid.h> | 11 | #include <linux/pid.h> |
12 | #include <linux/pid_namespace.h> | 12 | #include <linux/pid_namespace.h> |
13 | #include <linux/user_namespace.h> | ||
13 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
14 | #include <linux/err.h> | 15 | #include <linux/err.h> |
15 | #include <linux/acct.h> | 16 | #include <linux/acct.h> |
@@ -71,10 +72,17 @@ err_alloc: | |||
71 | return NULL; | 72 | return NULL; |
72 | } | 73 | } |
73 | 74 | ||
75 | static void proc_cleanup_work(struct work_struct *work) | ||
76 | { | ||
77 | struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); | ||
78 | pid_ns_release_proc(ns); | ||
79 | } | ||
80 | |||
74 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ | 81 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ |
75 | #define MAX_PID_NS_LEVEL 32 | 82 | #define MAX_PID_NS_LEVEL 32 |
76 | 83 | ||
77 | static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) | 84 | static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, |
85 | struct pid_namespace *parent_pid_ns) | ||
78 | { | 86 | { |
79 | struct pid_namespace *ns; | 87 | struct pid_namespace *ns; |
80 | unsigned int level = parent_pid_ns->level + 1; | 88 | unsigned int level = parent_pid_ns->level + 1; |
@@ -99,9 +107,15 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
99 | if (ns->pid_cachep == NULL) | 107 | if (ns->pid_cachep == NULL) |
100 | goto out_free_map; | 108 | goto out_free_map; |
101 | 109 | ||
110 | err = proc_alloc_inum(&ns->proc_inum); | ||
111 | if (err) | ||
112 | goto out_free_map; | ||
113 | |||
102 | kref_init(&ns->kref); | 114 | kref_init(&ns->kref); |
103 | ns->level = level; | 115 | ns->level = level; |
104 | ns->parent = get_pid_ns(parent_pid_ns); | 116 | ns->parent = get_pid_ns(parent_pid_ns); |
117 | ns->user_ns = get_user_ns(user_ns); | ||
118 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | ||
105 | 119 | ||
106 | set_bit(0, ns->pidmap[0].page); | 120 | set_bit(0, ns->pidmap[0].page); |
107 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | 121 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
@@ -109,14 +123,8 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
109 | for (i = 1; i < PIDMAP_ENTRIES; i++) | 123 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
110 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 124 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
111 | 125 | ||
112 | err = pid_ns_prepare_proc(ns); | ||
113 | if (err) | ||
114 | goto out_put_parent_pid_ns; | ||
115 | |||
116 | return ns; | 126 | return ns; |
117 | 127 | ||
118 | out_put_parent_pid_ns: | ||
119 | put_pid_ns(parent_pid_ns); | ||
120 | out_free_map: | 128 | out_free_map: |
121 | kfree(ns->pidmap[0].page); | 129 | kfree(ns->pidmap[0].page); |
122 | out_free: | 130 | out_free: |
@@ -129,18 +137,21 @@ static void destroy_pid_namespace(struct pid_namespace *ns) | |||
129 | { | 137 | { |
130 | int i; | 138 | int i; |
131 | 139 | ||
140 | proc_free_inum(ns->proc_inum); | ||
132 | for (i = 0; i < PIDMAP_ENTRIES; i++) | 141 | for (i = 0; i < PIDMAP_ENTRIES; i++) |
133 | kfree(ns->pidmap[i].page); | 142 | kfree(ns->pidmap[i].page); |
143 | put_user_ns(ns->user_ns); | ||
134 | kmem_cache_free(pid_ns_cachep, ns); | 144 | kmem_cache_free(pid_ns_cachep, ns); |
135 | } | 145 | } |
136 | 146 | ||
137 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | 147 | struct pid_namespace *copy_pid_ns(unsigned long flags, |
148 | struct user_namespace *user_ns, struct pid_namespace *old_ns) | ||
138 | { | 149 | { |
139 | if (!(flags & CLONE_NEWPID)) | 150 | if (!(flags & CLONE_NEWPID)) |
140 | return get_pid_ns(old_ns); | 151 | return get_pid_ns(old_ns); |
141 | if (flags & (CLONE_THREAD|CLONE_PARENT)) | 152 | if (task_active_pid_ns(current) != old_ns) |
142 | return ERR_PTR(-EINVAL); | 153 | return ERR_PTR(-EINVAL); |
143 | return create_pid_namespace(old_ns); | 154 | return create_pid_namespace(user_ns, old_ns); |
144 | } | 155 | } |
145 | 156 | ||
146 | static void free_pid_ns(struct kref *kref) | 157 | static void free_pid_ns(struct kref *kref) |
@@ -211,22 +222,15 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
211 | 222 | ||
212 | /* | 223 | /* |
213 | * sys_wait4() above can't reap the TASK_DEAD children. | 224 | * sys_wait4() above can't reap the TASK_DEAD children. |
214 | * Make sure they all go away, see __unhash_process(). | 225 | * Make sure they all go away, see free_pid(). |
215 | */ | 226 | */ |
216 | for (;;) { | 227 | for (;;) { |
217 | bool need_wait = false; | 228 | set_current_state(TASK_UNINTERRUPTIBLE); |
218 | 229 | if (pid_ns->nr_hashed == 1) | |
219 | read_lock(&tasklist_lock); | ||
220 | if (!list_empty(¤t->children)) { | ||
221 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
222 | need_wait = true; | ||
223 | } | ||
224 | read_unlock(&tasklist_lock); | ||
225 | |||
226 | if (!need_wait) | ||
227 | break; | 230 | break; |
228 | schedule(); | 231 | schedule(); |
229 | } | 232 | } |
233 | __set_current_state(TASK_RUNNING); | ||
230 | 234 | ||
231 | if (pid_ns->reboot) | 235 | if (pid_ns->reboot) |
232 | current->signal->group_exit_code = pid_ns->reboot; | 236 | current->signal->group_exit_code = pid_ns->reboot; |
@@ -239,9 +243,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
239 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | 243 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
240 | void __user *buffer, size_t *lenp, loff_t *ppos) | 244 | void __user *buffer, size_t *lenp, loff_t *ppos) |
241 | { | 245 | { |
246 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
242 | struct ctl_table tmp = *table; | 247 | struct ctl_table tmp = *table; |
243 | 248 | ||
244 | if (write && !capable(CAP_SYS_ADMIN)) | 249 | if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) |
245 | return -EPERM; | 250 | return -EPERM; |
246 | 251 | ||
247 | /* | 252 | /* |
@@ -250,7 +255,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, | |||
250 | * it should synchronize its usage with external means. | 255 | * it should synchronize its usage with external means. |
251 | */ | 256 | */ |
252 | 257 | ||
253 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; | 258 | tmp.data = &pid_ns->last_pid; |
254 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | 259 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
255 | } | 260 | } |
256 | 261 | ||
@@ -299,6 +304,68 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | |||
299 | return 0; | 304 | return 0; |
300 | } | 305 | } |
301 | 306 | ||
307 | static void *pidns_get(struct task_struct *task) | ||
308 | { | ||
309 | struct pid_namespace *ns; | ||
310 | |||
311 | rcu_read_lock(); | ||
312 | ns = get_pid_ns(task_active_pid_ns(task)); | ||
313 | rcu_read_unlock(); | ||
314 | |||
315 | return ns; | ||
316 | } | ||
317 | |||
318 | static void pidns_put(void *ns) | ||
319 | { | ||
320 | put_pid_ns(ns); | ||
321 | } | ||
322 | |||
323 | static int pidns_install(struct nsproxy *nsproxy, void *ns) | ||
324 | { | ||
325 | struct pid_namespace *active = task_active_pid_ns(current); | ||
326 | struct pid_namespace *ancestor, *new = ns; | ||
327 | |||
328 | if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || | ||
329 | !nsown_capable(CAP_SYS_ADMIN)) | ||
330 | return -EPERM; | ||
331 | |||
332 | /* | ||
333 | * Only allow entering the current active pid namespace | ||
334 | * or a child of the current active pid namespace. | ||
335 | * | ||
336 | * This is required for fork to return a usable pid value and | ||
337 | * this maintains the property that processes and their | ||
338 | * children can not escape their current pid namespace. | ||
339 | */ | ||
340 | if (new->level < active->level) | ||
341 | return -EINVAL; | ||
342 | |||
343 | ancestor = new; | ||
344 | while (ancestor->level > active->level) | ||
345 | ancestor = ancestor->parent; | ||
346 | if (ancestor != active) | ||
347 | return -EINVAL; | ||
348 | |||
349 | put_pid_ns(nsproxy->pid_ns); | ||
350 | nsproxy->pid_ns = get_pid_ns(new); | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static unsigned int pidns_inum(void *ns) | ||
355 | { | ||
356 | struct pid_namespace *pid_ns = ns; | ||
357 | return pid_ns->proc_inum; | ||
358 | } | ||
359 | |||
360 | const struct proc_ns_operations pidns_operations = { | ||
361 | .name = "pid", | ||
362 | .type = CLONE_NEWPID, | ||
363 | .get = pidns_get, | ||
364 | .put = pidns_put, | ||
365 | .install = pidns_install, | ||
366 | .inum = pidns_inum, | ||
367 | }; | ||
368 | |||
302 | static __init int pid_namespaces_init(void) | 369 | static __init int pid_namespaces_init(void) |
303 | { | 370 | { |
304 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 371 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index d73840271dce..a278cad1d5d6 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <linux/kernel_stat.h> | 10 | #include <linux/kernel_stat.h> |
11 | #include <trace/events/timer.h> | 11 | #include <trace/events/timer.h> |
12 | #include <linux/random.h> | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * Called after updating RLIMIT_CPU to run cpu timer and update | 15 | * Called after updating RLIMIT_CPU to run cpu timer and update |
@@ -470,6 +471,8 @@ static void cleanup_timers(struct list_head *head, | |||
470 | */ | 471 | */ |
471 | void posix_cpu_timers_exit(struct task_struct *tsk) | 472 | void posix_cpu_timers_exit(struct task_struct *tsk) |
472 | { | 473 | { |
474 | add_device_randomness((const void*) &tsk->se.sum_exec_runtime, | ||
475 | sizeof(unsigned long long)); | ||
473 | cleanup_timers(tsk->cpu_timers, | 476 | cleanup_timers(tsk->cpu_timers, |
474 | tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); | 477 | tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); |
475 | 478 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 22e070f3470a..19c0d7bcf24a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -747,6 +747,21 @@ void __init setup_log_buf(int early) | |||
747 | free, (free * 100) / __LOG_BUF_LEN); | 747 | free, (free * 100) / __LOG_BUF_LEN); |
748 | } | 748 | } |
749 | 749 | ||
750 | static bool __read_mostly ignore_loglevel; | ||
751 | |||
752 | static int __init ignore_loglevel_setup(char *str) | ||
753 | { | ||
754 | ignore_loglevel = 1; | ||
755 | printk(KERN_INFO "debug: ignoring loglevel setting.\n"); | ||
756 | |||
757 | return 0; | ||
758 | } | ||
759 | |||
760 | early_param("ignore_loglevel", ignore_loglevel_setup); | ||
761 | module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); | ||
762 | MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" | ||
763 | "print all kernel messages to the console."); | ||
764 | |||
750 | #ifdef CONFIG_BOOT_PRINTK_DELAY | 765 | #ifdef CONFIG_BOOT_PRINTK_DELAY |
751 | 766 | ||
752 | static int boot_delay; /* msecs delay after each printk during bootup */ | 767 | static int boot_delay; /* msecs delay after each printk during bootup */ |
@@ -770,13 +785,15 @@ static int __init boot_delay_setup(char *str) | |||
770 | } | 785 | } |
771 | __setup("boot_delay=", boot_delay_setup); | 786 | __setup("boot_delay=", boot_delay_setup); |
772 | 787 | ||
773 | static void boot_delay_msec(void) | 788 | static void boot_delay_msec(int level) |
774 | { | 789 | { |
775 | unsigned long long k; | 790 | unsigned long long k; |
776 | unsigned long timeout; | 791 | unsigned long timeout; |
777 | 792 | ||
778 | if (boot_delay == 0 || system_state != SYSTEM_BOOTING) | 793 | if ((boot_delay == 0 || system_state != SYSTEM_BOOTING) |
794 | || (level >= console_loglevel && !ignore_loglevel)) { | ||
779 | return; | 795 | return; |
796 | } | ||
780 | 797 | ||
781 | k = (unsigned long long)loops_per_msec * boot_delay; | 798 | k = (unsigned long long)loops_per_msec * boot_delay; |
782 | 799 | ||
@@ -795,7 +812,7 @@ static void boot_delay_msec(void) | |||
795 | } | 812 | } |
796 | } | 813 | } |
797 | #else | 814 | #else |
798 | static inline void boot_delay_msec(void) | 815 | static inline void boot_delay_msec(int level) |
799 | { | 816 | { |
800 | } | 817 | } |
801 | #endif | 818 | #endif |
@@ -1238,21 +1255,6 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | |||
1238 | return do_syslog(type, buf, len, SYSLOG_FROM_CALL); | 1255 | return do_syslog(type, buf, len, SYSLOG_FROM_CALL); |
1239 | } | 1256 | } |
1240 | 1257 | ||
1241 | static bool __read_mostly ignore_loglevel; | ||
1242 | |||
1243 | static int __init ignore_loglevel_setup(char *str) | ||
1244 | { | ||
1245 | ignore_loglevel = 1; | ||
1246 | printk(KERN_INFO "debug: ignoring loglevel setting.\n"); | ||
1247 | |||
1248 | return 0; | ||
1249 | } | ||
1250 | |||
1251 | early_param("ignore_loglevel", ignore_loglevel_setup); | ||
1252 | module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); | ||
1253 | MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" | ||
1254 | "print all kernel messages to the console."); | ||
1255 | |||
1256 | /* | 1258 | /* |
1257 | * Call the console drivers, asking them to write out | 1259 | * Call the console drivers, asking them to write out |
1258 | * log_buf[start] to log_buf[end - 1]. | 1260 | * log_buf[start] to log_buf[end - 1]. |
@@ -1498,7 +1500,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1498 | int this_cpu; | 1500 | int this_cpu; |
1499 | int printed_len = 0; | 1501 | int printed_len = 0; |
1500 | 1502 | ||
1501 | boot_delay_msec(); | 1503 | boot_delay_msec(level); |
1502 | printk_delay(); | 1504 | printk_delay(); |
1503 | 1505 | ||
1504 | /* This stops the holder of console_sem just where we want him */ | 1506 | /* This stops the holder of console_sem just where we want him */ |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f5e55dda955..1599157336a6 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -215,8 +215,12 @@ ok: | |||
215 | smp_rmb(); | 215 | smp_rmb(); |
216 | if (task->mm) | 216 | if (task->mm) |
217 | dumpable = get_dumpable(task->mm); | 217 | dumpable = get_dumpable(task->mm); |
218 | if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode)) | 218 | rcu_read_lock(); |
219 | if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { | ||
220 | rcu_read_unlock(); | ||
219 | return -EPERM; | 221 | return -EPERM; |
222 | } | ||
223 | rcu_read_unlock(); | ||
220 | 224 | ||
221 | return security_ptrace_access_check(task, mode); | 225 | return security_ptrace_access_check(task, mode); |
222 | } | 226 | } |
@@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
280 | 284 | ||
281 | if (seize) | 285 | if (seize) |
282 | flags |= PT_SEIZED; | 286 | flags |= PT_SEIZED; |
283 | if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) | 287 | rcu_read_lock(); |
288 | if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) | ||
284 | flags |= PT_PTRACE_CAP; | 289 | flags |= PT_PTRACE_CAP; |
290 | rcu_read_unlock(); | ||
285 | task->ptrace = flags; | 291 | task->ptrace = flags; |
286 | 292 | ||
287 | __ptrace_link(task, current); | 293 | __ptrace_link(task, current); |
@@ -457,6 +463,9 @@ void exit_ptrace(struct task_struct *tracer) | |||
457 | return; | 463 | return; |
458 | 464 | ||
459 | list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { | 465 | list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { |
466 | if (unlikely(p->ptrace & PT_EXITKILL)) | ||
467 | send_sig_info(SIGKILL, SEND_SIG_FORCED, p); | ||
468 | |||
460 | if (__ptrace_detach(tracer, p)) | 469 | if (__ptrace_detach(tracer, p)) |
461 | list_add(&p->ptrace_entry, &ptrace_dead); | 470 | list_add(&p->ptrace_entry, &ptrace_dead); |
462 | } | 471 | } |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 3920d593e63c..ff55247e7049 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -86,33 +86,39 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, | |||
86 | return __res_counter_charge(counter, val, limit_fail_at, true); | 86 | return __res_counter_charge(counter, val, limit_fail_at, true); |
87 | } | 87 | } |
88 | 88 | ||
89 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | 89 | u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) |
90 | { | 90 | { |
91 | if (WARN_ON(counter->usage < val)) | 91 | if (WARN_ON(counter->usage < val)) |
92 | val = counter->usage; | 92 | val = counter->usage; |
93 | 93 | ||
94 | counter->usage -= val; | 94 | counter->usage -= val; |
95 | return counter->usage; | ||
95 | } | 96 | } |
96 | 97 | ||
97 | void res_counter_uncharge_until(struct res_counter *counter, | 98 | u64 res_counter_uncharge_until(struct res_counter *counter, |
98 | struct res_counter *top, | 99 | struct res_counter *top, |
99 | unsigned long val) | 100 | unsigned long val) |
100 | { | 101 | { |
101 | unsigned long flags; | 102 | unsigned long flags; |
102 | struct res_counter *c; | 103 | struct res_counter *c; |
104 | u64 ret = 0; | ||
103 | 105 | ||
104 | local_irq_save(flags); | 106 | local_irq_save(flags); |
105 | for (c = counter; c != top; c = c->parent) { | 107 | for (c = counter; c != top; c = c->parent) { |
108 | u64 r; | ||
106 | spin_lock(&c->lock); | 109 | spin_lock(&c->lock); |
107 | res_counter_uncharge_locked(c, val); | 110 | r = res_counter_uncharge_locked(c, val); |
111 | if (c == counter) | ||
112 | ret = r; | ||
108 | spin_unlock(&c->lock); | 113 | spin_unlock(&c->lock); |
109 | } | 114 | } |
110 | local_irq_restore(flags); | 115 | local_irq_restore(flags); |
116 | return ret; | ||
111 | } | 117 | } |
112 | 118 | ||
113 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | 119 | u64 res_counter_uncharge(struct res_counter *counter, unsigned long val) |
114 | { | 120 | { |
115 | res_counter_uncharge_until(counter, NULL, val); | 121 | return res_counter_uncharge_until(counter, NULL, val); |
116 | } | 122 | } |
117 | 123 | ||
118 | static inline unsigned long long * | 124 | static inline unsigned long long * |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1fb82104bfb..257002c13bb0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4097,8 +4097,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4097 | goto out_free_cpus_allowed; | 4097 | goto out_free_cpus_allowed; |
4098 | } | 4098 | } |
4099 | retval = -EPERM; | 4099 | retval = -EPERM; |
4100 | if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE)) | 4100 | if (!check_same_owner(p)) { |
4101 | goto out_unlock; | 4101 | rcu_read_lock(); |
4102 | if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { | ||
4103 | rcu_read_unlock(); | ||
4104 | goto out_unlock; | ||
4105 | } | ||
4106 | rcu_read_unlock(); | ||
4107 | } | ||
4102 | 4108 | ||
4103 | retval = security_task_setscheduler(p); | 4109 | retval = security_task_setscheduler(p); |
4104 | if (retval) | 4110 | if (retval) |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4603d6cb9e25..5eea8707234a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -793,8 +793,11 @@ unsigned int sysctl_numa_balancing_scan_delay = 1000; | |||
793 | 793 | ||
794 | static void task_numa_placement(struct task_struct *p) | 794 | static void task_numa_placement(struct task_struct *p) |
795 | { | 795 | { |
796 | int seq = ACCESS_ONCE(p->mm->numa_scan_seq); | 796 | int seq; |
797 | 797 | ||
798 | if (!p->mm) /* for example, ksmd faulting in a user's mm */ | ||
799 | return; | ||
800 | seq = ACCESS_ONCE(p->mm->numa_scan_seq); | ||
798 | if (p->numa_scan_seq == seq) | 801 | if (p->numa_scan_seq == seq) |
799 | return; | 802 | return; |
800 | p->numa_scan_seq = seq; | 803 | p->numa_scan_seq = seq; |
diff --git a/kernel/signal.c b/kernel/signal.c index a49c7f36ceb3..580a91e63471 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1753,7 +1753,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, | |||
1753 | * see comment in do_notify_parent() about the following 4 lines | 1753 | * see comment in do_notify_parent() about the following 4 lines |
1754 | */ | 1754 | */ |
1755 | rcu_read_lock(); | 1755 | rcu_read_lock(); |
1756 | info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); | 1756 | info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent)); |
1757 | info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); | 1757 | info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); |
1758 | rcu_read_unlock(); | 1758 | rcu_read_unlock(); |
1759 | 1759 | ||
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index dbff751e4086..395084d4ce16 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff); | |||
25 | cond_syscall(sys_kexec_load); | 25 | cond_syscall(sys_kexec_load); |
26 | cond_syscall(compat_sys_kexec_load); | 26 | cond_syscall(compat_sys_kexec_load); |
27 | cond_syscall(sys_init_module); | 27 | cond_syscall(sys_init_module); |
28 | cond_syscall(sys_finit_module); | ||
28 | cond_syscall(sys_delete_module); | 29 | cond_syscall(sys_delete_module); |
29 | cond_syscall(sys_socketpair); | 30 | cond_syscall(sys_socketpair); |
30 | cond_syscall(sys_bind); | 31 | cond_syscall(sys_bind); |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 65bdcf198d4e..5a6384450501 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -1344,7 +1344,7 @@ static ssize_t binary_sysctl(const int *name, int nlen, | |||
1344 | goto out_putname; | 1344 | goto out_putname; |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | mnt = current->nsproxy->pid_ns->proc_mnt; | 1347 | mnt = task_active_pid_ns(current)->proc_mnt; |
1348 | file = file_open_root(mnt->mnt_root, mnt, pathname, flags); | 1348 | file = file_open_root(mnt->mnt_root, mnt, pathname, flags); |
1349 | result = PTR_ERR(file); | 1349 | result = PTR_ERR(file); |
1350 | if (IS_ERR(file)) | 1350 | if (IS_ERR(file)) |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index afd092de45b7..3ffe4c5ad3f3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -2675,12 +2675,12 @@ ftrace_notrace_open(struct inode *inode, struct file *file) | |||
2675 | } | 2675 | } |
2676 | 2676 | ||
2677 | loff_t | 2677 | loff_t |
2678 | ftrace_regex_lseek(struct file *file, loff_t offset, int origin) | 2678 | ftrace_regex_lseek(struct file *file, loff_t offset, int whence) |
2679 | { | 2679 | { |
2680 | loff_t ret; | 2680 | loff_t ret; |
2681 | 2681 | ||
2682 | if (file->f_mode & FMODE_READ) | 2682 | if (file->f_mode & FMODE_READ) |
2683 | ret = seq_lseek(file, offset, origin); | 2683 | ret = seq_lseek(file, offset, whence); |
2684 | else | 2684 | else |
2685 | file->f_pos = ret = 1; | 2685 | file->f_pos = ret = 1; |
2686 | 2686 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 61e081b4ba11..e5125677efa0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3034,6 +3034,31 @@ static void set_buffer_entries(struct trace_array *tr, unsigned long val) | |||
3034 | tr->data[cpu]->entries = val; | 3034 | tr->data[cpu]->entries = val; |
3035 | } | 3035 | } |
3036 | 3036 | ||
3037 | /* resize @tr's buffer to the size of @size_tr's entries */ | ||
3038 | static int resize_buffer_duplicate_size(struct trace_array *tr, | ||
3039 | struct trace_array *size_tr, int cpu_id) | ||
3040 | { | ||
3041 | int cpu, ret = 0; | ||
3042 | |||
3043 | if (cpu_id == RING_BUFFER_ALL_CPUS) { | ||
3044 | for_each_tracing_cpu(cpu) { | ||
3045 | ret = ring_buffer_resize(tr->buffer, | ||
3046 | size_tr->data[cpu]->entries, cpu); | ||
3047 | if (ret < 0) | ||
3048 | break; | ||
3049 | tr->data[cpu]->entries = size_tr->data[cpu]->entries; | ||
3050 | } | ||
3051 | } else { | ||
3052 | ret = ring_buffer_resize(tr->buffer, | ||
3053 | size_tr->data[cpu_id]->entries, cpu_id); | ||
3054 | if (ret == 0) | ||
3055 | tr->data[cpu_id]->entries = | ||
3056 | size_tr->data[cpu_id]->entries; | ||
3057 | } | ||
3058 | |||
3059 | return ret; | ||
3060 | } | ||
3061 | |||
3037 | static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | 3062 | static int __tracing_resize_ring_buffer(unsigned long size, int cpu) |
3038 | { | 3063 | { |
3039 | int ret; | 3064 | int ret; |
@@ -3058,23 +3083,8 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu) | |||
3058 | 3083 | ||
3059 | ret = ring_buffer_resize(max_tr.buffer, size, cpu); | 3084 | ret = ring_buffer_resize(max_tr.buffer, size, cpu); |
3060 | if (ret < 0) { | 3085 | if (ret < 0) { |
3061 | int r = 0; | 3086 | int r = resize_buffer_duplicate_size(&global_trace, |
3062 | 3087 | &global_trace, cpu); | |
3063 | if (cpu == RING_BUFFER_ALL_CPUS) { | ||
3064 | int i; | ||
3065 | for_each_tracing_cpu(i) { | ||
3066 | r = ring_buffer_resize(global_trace.buffer, | ||
3067 | global_trace.data[i]->entries, | ||
3068 | i); | ||
3069 | if (r < 0) | ||
3070 | break; | ||
3071 | } | ||
3072 | } else { | ||
3073 | r = ring_buffer_resize(global_trace.buffer, | ||
3074 | global_trace.data[cpu]->entries, | ||
3075 | cpu); | ||
3076 | } | ||
3077 | |||
3078 | if (r < 0) { | 3088 | if (r < 0) { |
3079 | /* | 3089 | /* |
3080 | * AARGH! We are left with different | 3090 | * AARGH! We are left with different |
@@ -3212,17 +3222,11 @@ static int tracing_set_tracer(const char *buf) | |||
3212 | 3222 | ||
3213 | topts = create_trace_option_files(t); | 3223 | topts = create_trace_option_files(t); |
3214 | if (t->use_max_tr) { | 3224 | if (t->use_max_tr) { |
3215 | int cpu; | ||
3216 | /* we need to make per cpu buffer sizes equivalent */ | 3225 | /* we need to make per cpu buffer sizes equivalent */ |
3217 | for_each_tracing_cpu(cpu) { | 3226 | ret = resize_buffer_duplicate_size(&max_tr, &global_trace, |
3218 | ret = ring_buffer_resize(max_tr.buffer, | 3227 | RING_BUFFER_ALL_CPUS); |
3219 | global_trace.data[cpu]->entries, | 3228 | if (ret < 0) |
3220 | cpu); | 3229 | goto out; |
3221 | if (ret < 0) | ||
3222 | goto out; | ||
3223 | max_tr.data[cpu]->entries = | ||
3224 | global_trace.data[cpu]->entries; | ||
3225 | } | ||
3226 | } | 3230 | } |
3227 | 3231 | ||
3228 | if (t->init) { | 3232 | if (t->init) { |
@@ -4271,13 +4275,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
4271 | return -ENOMEM; | 4275 | return -ENOMEM; |
4272 | 4276 | ||
4273 | if (*ppos & (PAGE_SIZE - 1)) { | 4277 | if (*ppos & (PAGE_SIZE - 1)) { |
4274 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); | ||
4275 | ret = -EINVAL; | 4278 | ret = -EINVAL; |
4276 | goto out; | 4279 | goto out; |
4277 | } | 4280 | } |
4278 | 4281 | ||
4279 | if (len & (PAGE_SIZE - 1)) { | 4282 | if (len & (PAGE_SIZE - 1)) { |
4280 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); | ||
4281 | if (len < PAGE_SIZE) { | 4283 | if (len < PAGE_SIZE) { |
4282 | ret = -EINVAL; | 4284 | ret = -EINVAL; |
4283 | goto out; | 4285 | goto out; |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0c1b165778e5..42ca822fc701 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -33,7 +33,6 @@ static unsigned long max_stack_size; | |||
33 | static arch_spinlock_t max_stack_lock = | 33 | static arch_spinlock_t max_stack_lock = |
34 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; | 34 | (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
35 | 35 | ||
36 | static int stack_trace_disabled __read_mostly; | ||
37 | static DEFINE_PER_CPU(int, trace_active); | 36 | static DEFINE_PER_CPU(int, trace_active); |
38 | static DEFINE_MUTEX(stack_sysctl_mutex); | 37 | static DEFINE_MUTEX(stack_sysctl_mutex); |
39 | 38 | ||
@@ -116,9 +115,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, | |||
116 | { | 115 | { |
117 | int cpu; | 116 | int cpu; |
118 | 117 | ||
119 | if (unlikely(!ftrace_enabled || stack_trace_disabled)) | ||
120 | return; | ||
121 | |||
122 | preempt_disable_notrace(); | 118 | preempt_disable_notrace(); |
123 | 119 | ||
124 | cpu = raw_smp_processor_id(); | 120 | cpu = raw_smp_processor_id(); |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9614db8b0f8c..c86e6d4f67fb 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
23 | #include <linux/uprobes.h> | 23 | #include <linux/uprobes.h> |
24 | #include <linux/namei.h> | 24 | #include <linux/namei.h> |
25 | #include <linux/string.h> | ||
25 | 26 | ||
26 | #include "trace_probe.h" | 27 | #include "trace_probe.h" |
27 | 28 | ||
@@ -263,16 +264,15 @@ static int create_trace_uprobe(int argc, char **argv) | |||
263 | 264 | ||
264 | /* setup a probe */ | 265 | /* setup a probe */ |
265 | if (!event) { | 266 | if (!event) { |
266 | char *tail = strrchr(filename, '/'); | 267 | char *tail; |
267 | char *ptr; | 268 | char *ptr; |
268 | 269 | ||
269 | ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL); | 270 | tail = kstrdup(kbasename(filename), GFP_KERNEL); |
270 | if (!ptr) { | 271 | if (!tail) { |
271 | ret = -ENOMEM; | 272 | ret = -ENOMEM; |
272 | goto fail_address_parse; | 273 | goto fail_address_parse; |
273 | } | 274 | } |
274 | 275 | ||
275 | tail = ptr; | ||
276 | ptr = strpbrk(tail, ".-_"); | 276 | ptr = strpbrk(tail, ".-_"); |
277 | if (ptr) | 277 | if (ptr) |
278 | *ptr = '\0'; | 278 | *ptr = '\0'; |
diff --git a/kernel/user.c b/kernel/user.c index 750acffbe9ec..33acb5e53a5f 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/export.h> | 17 | #include <linux/export.h> |
18 | #include <linux/user_namespace.h> | 18 | #include <linux/user_namespace.h> |
19 | #include <linux/proc_fs.h> | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | * userns count is 1 for root user, 1 for init_uts_ns, | 22 | * userns count is 1 for root user, 1 for init_uts_ns, |
@@ -51,6 +52,7 @@ struct user_namespace init_user_ns = { | |||
51 | }, | 52 | }, |
52 | .owner = GLOBAL_ROOT_UID, | 53 | .owner = GLOBAL_ROOT_UID, |
53 | .group = GLOBAL_ROOT_GID, | 54 | .group = GLOBAL_ROOT_GID, |
55 | .proc_inum = PROC_USER_INIT_INO, | ||
54 | }; | 56 | }; |
55 | EXPORT_SYMBOL_GPL(init_user_ns); | 57 | EXPORT_SYMBOL_GPL(init_user_ns); |
56 | 58 | ||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 456a6b9fba34..2b042c42fbc4 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/nsproxy.h> | 9 | #include <linux/nsproxy.h> |
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/user_namespace.h> | 11 | #include <linux/user_namespace.h> |
12 | #include <linux/proc_fs.h> | ||
12 | #include <linux/highuid.h> | 13 | #include <linux/highuid.h> |
13 | #include <linux/cred.h> | 14 | #include <linux/cred.h> |
14 | #include <linux/securebits.h> | 15 | #include <linux/securebits.h> |
@@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly; | |||
26 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | 27 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, |
27 | struct uid_gid_map *map); | 28 | struct uid_gid_map *map); |
28 | 29 | ||
30 | static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) | ||
31 | { | ||
32 | /* Start with the same capabilities as init but useless for doing | ||
33 | * anything as the capabilities are bound to the new user namespace. | ||
34 | */ | ||
35 | cred->securebits = SECUREBITS_DEFAULT; | ||
36 | cred->cap_inheritable = CAP_EMPTY_SET; | ||
37 | cred->cap_permitted = CAP_FULL_SET; | ||
38 | cred->cap_effective = CAP_FULL_SET; | ||
39 | cred->cap_bset = CAP_FULL_SET; | ||
40 | #ifdef CONFIG_KEYS | ||
41 | key_put(cred->request_key_auth); | ||
42 | cred->request_key_auth = NULL; | ||
43 | #endif | ||
44 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ | ||
45 | cred->user_ns = user_ns; | ||
46 | } | ||
47 | |||
29 | /* | 48 | /* |
30 | * Create a new user namespace, deriving the creator from the user in the | 49 | * Create a new user namespace, deriving the creator from the user in the |
31 | * passed credentials, and replacing that user with the new root user for the | 50 | * passed credentials, and replacing that user with the new root user for the |
@@ -39,6 +58,7 @@ int create_user_ns(struct cred *new) | |||
39 | struct user_namespace *ns, *parent_ns = new->user_ns; | 58 | struct user_namespace *ns, *parent_ns = new->user_ns; |
40 | kuid_t owner = new->euid; | 59 | kuid_t owner = new->euid; |
41 | kgid_t group = new->egid; | 60 | kgid_t group = new->egid; |
61 | int ret; | ||
42 | 62 | ||
43 | /* The creator needs a mapping in the parent user namespace | 63 | /* The creator needs a mapping in the parent user namespace |
44 | * or else we won't be able to reasonably tell userspace who | 64 | * or else we won't be able to reasonably tell userspace who |
@@ -52,38 +72,45 @@ int create_user_ns(struct cred *new) | |||
52 | if (!ns) | 72 | if (!ns) |
53 | return -ENOMEM; | 73 | return -ENOMEM; |
54 | 74 | ||
75 | ret = proc_alloc_inum(&ns->proc_inum); | ||
76 | if (ret) { | ||
77 | kmem_cache_free(user_ns_cachep, ns); | ||
78 | return ret; | ||
79 | } | ||
80 | |||
55 | kref_init(&ns->kref); | 81 | kref_init(&ns->kref); |
82 | /* Leave the new->user_ns reference with the new user namespace. */ | ||
56 | ns->parent = parent_ns; | 83 | ns->parent = parent_ns; |
57 | ns->owner = owner; | 84 | ns->owner = owner; |
58 | ns->group = group; | 85 | ns->group = group; |
59 | 86 | ||
60 | /* Start with the same capabilities as init but useless for doing | 87 | set_cred_user_ns(new, ns); |
61 | * anything as the capabilities are bound to the new user namespace. | ||
62 | */ | ||
63 | new->securebits = SECUREBITS_DEFAULT; | ||
64 | new->cap_inheritable = CAP_EMPTY_SET; | ||
65 | new->cap_permitted = CAP_FULL_SET; | ||
66 | new->cap_effective = CAP_FULL_SET; | ||
67 | new->cap_bset = CAP_FULL_SET; | ||
68 | #ifdef CONFIG_KEYS | ||
69 | key_put(new->request_key_auth); | ||
70 | new->request_key_auth = NULL; | ||
71 | #endif | ||
72 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ | ||
73 | |||
74 | /* Leave the new->user_ns reference with the new user namespace. */ | ||
75 | /* Leave the reference to our user_ns with the new cred. */ | ||
76 | new->user_ns = ns; | ||
77 | 88 | ||
78 | return 0; | 89 | return 0; |
79 | } | 90 | } |
80 | 91 | ||
92 | int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) | ||
93 | { | ||
94 | struct cred *cred; | ||
95 | |||
96 | if (!(unshare_flags & CLONE_NEWUSER)) | ||
97 | return 0; | ||
98 | |||
99 | cred = prepare_creds(); | ||
100 | if (!cred) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | *new_cred = cred; | ||
104 | return create_user_ns(cred); | ||
105 | } | ||
106 | |||
81 | void free_user_ns(struct kref *kref) | 107 | void free_user_ns(struct kref *kref) |
82 | { | 108 | { |
83 | struct user_namespace *parent, *ns = | 109 | struct user_namespace *parent, *ns = |
84 | container_of(kref, struct user_namespace, kref); | 110 | container_of(kref, struct user_namespace, kref); |
85 | 111 | ||
86 | parent = ns->parent; | 112 | parent = ns->parent; |
113 | proc_free_inum(ns->proc_inum); | ||
87 | kmem_cache_free(user_ns_cachep, ns); | 114 | kmem_cache_free(user_ns_cachep, ns); |
88 | put_user_ns(parent); | 115 | put_user_ns(parent); |
89 | } | 116 | } |
@@ -372,7 +399,7 @@ static int uid_m_show(struct seq_file *seq, void *v) | |||
372 | struct user_namespace *lower_ns; | 399 | struct user_namespace *lower_ns; |
373 | uid_t lower; | 400 | uid_t lower; |
374 | 401 | ||
375 | lower_ns = current_user_ns(); | 402 | lower_ns = seq_user_ns(seq); |
376 | if ((lower_ns == ns) && lower_ns->parent) | 403 | if ((lower_ns == ns) && lower_ns->parent) |
377 | lower_ns = lower_ns->parent; | 404 | lower_ns = lower_ns->parent; |
378 | 405 | ||
@@ -393,7 +420,7 @@ static int gid_m_show(struct seq_file *seq, void *v) | |||
393 | struct user_namespace *lower_ns; | 420 | struct user_namespace *lower_ns; |
394 | gid_t lower; | 421 | gid_t lower; |
395 | 422 | ||
396 | lower_ns = current_user_ns(); | 423 | lower_ns = seq_user_ns(seq); |
397 | if ((lower_ns == ns) && lower_ns->parent) | 424 | if ((lower_ns == ns) && lower_ns->parent) |
398 | lower_ns = lower_ns->parent; | 425 | lower_ns = lower_ns->parent; |
399 | 426 | ||
@@ -669,10 +696,14 @@ ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t siz | |||
669 | { | 696 | { |
670 | struct seq_file *seq = file->private_data; | 697 | struct seq_file *seq = file->private_data; |
671 | struct user_namespace *ns = seq->private; | 698 | struct user_namespace *ns = seq->private; |
699 | struct user_namespace *seq_ns = seq_user_ns(seq); | ||
672 | 700 | ||
673 | if (!ns->parent) | 701 | if (!ns->parent) |
674 | return -EPERM; | 702 | return -EPERM; |
675 | 703 | ||
704 | if ((seq_ns != ns) && (seq_ns != ns->parent)) | ||
705 | return -EPERM; | ||
706 | |||
676 | return map_write(file, buf, size, ppos, CAP_SETUID, | 707 | return map_write(file, buf, size, ppos, CAP_SETUID, |
677 | &ns->uid_map, &ns->parent->uid_map); | 708 | &ns->uid_map, &ns->parent->uid_map); |
678 | } | 709 | } |
@@ -681,10 +712,14 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz | |||
681 | { | 712 | { |
682 | struct seq_file *seq = file->private_data; | 713 | struct seq_file *seq = file->private_data; |
683 | struct user_namespace *ns = seq->private; | 714 | struct user_namespace *ns = seq->private; |
715 | struct user_namespace *seq_ns = seq_user_ns(seq); | ||
684 | 716 | ||
685 | if (!ns->parent) | 717 | if (!ns->parent) |
686 | return -EPERM; | 718 | return -EPERM; |
687 | 719 | ||
720 | if ((seq_ns != ns) && (seq_ns != ns->parent)) | ||
721 | return -EPERM; | ||
722 | |||
688 | return map_write(file, buf, size, ppos, CAP_SETGID, | 723 | return map_write(file, buf, size, ppos, CAP_SETGID, |
689 | &ns->gid_map, &ns->parent->gid_map); | 724 | &ns->gid_map, &ns->parent->gid_map); |
690 | } | 725 | } |
@@ -709,6 +744,21 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t | |||
709 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | 744 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, |
710 | struct uid_gid_map *new_map) | 745 | struct uid_gid_map *new_map) |
711 | { | 746 | { |
747 | /* Allow mapping to your own filesystem ids */ | ||
748 | if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { | ||
749 | u32 id = new_map->extent[0].lower_first; | ||
750 | if (cap_setid == CAP_SETUID) { | ||
751 | kuid_t uid = make_kuid(ns->parent, id); | ||
752 | if (uid_eq(uid, current_fsuid())) | ||
753 | return true; | ||
754 | } | ||
755 | else if (cap_setid == CAP_SETGID) { | ||
756 | kgid_t gid = make_kgid(ns->parent, id); | ||
757 | if (gid_eq(gid, current_fsgid())) | ||
758 | return true; | ||
759 | } | ||
760 | } | ||
761 | |||
712 | /* Allow anyone to set a mapping that doesn't require privilege */ | 762 | /* Allow anyone to set a mapping that doesn't require privilege */ |
713 | if (!cap_valid(cap_setid)) | 763 | if (!cap_valid(cap_setid)) |
714 | return true; | 764 | return true; |
@@ -722,6 +772,65 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | |||
722 | return false; | 772 | return false; |
723 | } | 773 | } |
724 | 774 | ||
775 | static void *userns_get(struct task_struct *task) | ||
776 | { | ||
777 | struct user_namespace *user_ns; | ||
778 | |||
779 | rcu_read_lock(); | ||
780 | user_ns = get_user_ns(__task_cred(task)->user_ns); | ||
781 | rcu_read_unlock(); | ||
782 | |||
783 | return user_ns; | ||
784 | } | ||
785 | |||
786 | static void userns_put(void *ns) | ||
787 | { | ||
788 | put_user_ns(ns); | ||
789 | } | ||
790 | |||
791 | static int userns_install(struct nsproxy *nsproxy, void *ns) | ||
792 | { | ||
793 | struct user_namespace *user_ns = ns; | ||
794 | struct cred *cred; | ||
795 | |||
796 | /* Don't allow gaining capabilities by reentering | ||
797 | * the same user namespace. | ||
798 | */ | ||
799 | if (user_ns == current_user_ns()) | ||
800 | return -EINVAL; | ||
801 | |||
802 | /* Threaded processes may not enter a different user namespace */ | ||
803 | if (atomic_read(¤t->mm->mm_users) > 1) | ||
804 | return -EINVAL; | ||
805 | |||
806 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | ||
807 | return -EPERM; | ||
808 | |||
809 | cred = prepare_creds(); | ||
810 | if (!cred) | ||
811 | return -ENOMEM; | ||
812 | |||
813 | put_user_ns(cred->user_ns); | ||
814 | set_cred_user_ns(cred, get_user_ns(user_ns)); | ||
815 | |||
816 | return commit_creds(cred); | ||
817 | } | ||
818 | |||
819 | static unsigned int userns_inum(void *ns) | ||
820 | { | ||
821 | struct user_namespace *user_ns = ns; | ||
822 | return user_ns->proc_inum; | ||
823 | } | ||
824 | |||
825 | const struct proc_ns_operations userns_operations = { | ||
826 | .name = "user", | ||
827 | .type = CLONE_NEWUSER, | ||
828 | .get = userns_get, | ||
829 | .put = userns_put, | ||
830 | .install = userns_install, | ||
831 | .inum = userns_inum, | ||
832 | }; | ||
833 | |||
725 | static __init int user_namespaces_init(void) | 834 | static __init int user_namespaces_init(void) |
726 | { | 835 | { |
727 | user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); | 836 | user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 679d97a5d3fd..08b197e8c485 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -32,18 +32,25 @@ static struct uts_namespace *create_uts_ns(void) | |||
32 | * @old_ns: namespace to clone | 32 | * @old_ns: namespace to clone |
33 | * Return NULL on error (failure to kmalloc), new ns otherwise | 33 | * Return NULL on error (failure to kmalloc), new ns otherwise |
34 | */ | 34 | */ |
35 | static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, | 35 | static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, |
36 | struct uts_namespace *old_ns) | 36 | struct uts_namespace *old_ns) |
37 | { | 37 | { |
38 | struct uts_namespace *ns; | 38 | struct uts_namespace *ns; |
39 | int err; | ||
39 | 40 | ||
40 | ns = create_uts_ns(); | 41 | ns = create_uts_ns(); |
41 | if (!ns) | 42 | if (!ns) |
42 | return ERR_PTR(-ENOMEM); | 43 | return ERR_PTR(-ENOMEM); |
43 | 44 | ||
45 | err = proc_alloc_inum(&ns->proc_inum); | ||
46 | if (err) { | ||
47 | kfree(ns); | ||
48 | return ERR_PTR(err); | ||
49 | } | ||
50 | |||
44 | down_read(&uts_sem); | 51 | down_read(&uts_sem); |
45 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 52 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
46 | ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); | 53 | ns->user_ns = get_user_ns(user_ns); |
47 | up_read(&uts_sem); | 54 | up_read(&uts_sem); |
48 | return ns; | 55 | return ns; |
49 | } | 56 | } |
@@ -55,9 +62,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, | |||
55 | * versa. | 62 | * versa. |
56 | */ | 63 | */ |
57 | struct uts_namespace *copy_utsname(unsigned long flags, | 64 | struct uts_namespace *copy_utsname(unsigned long flags, |
58 | struct task_struct *tsk) | 65 | struct user_namespace *user_ns, struct uts_namespace *old_ns) |
59 | { | 66 | { |
60 | struct uts_namespace *old_ns = tsk->nsproxy->uts_ns; | ||
61 | struct uts_namespace *new_ns; | 67 | struct uts_namespace *new_ns; |
62 | 68 | ||
63 | BUG_ON(!old_ns); | 69 | BUG_ON(!old_ns); |
@@ -66,7 +72,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, | |||
66 | if (!(flags & CLONE_NEWUTS)) | 72 | if (!(flags & CLONE_NEWUTS)) |
67 | return old_ns; | 73 | return old_ns; |
68 | 74 | ||
69 | new_ns = clone_uts_ns(tsk, old_ns); | 75 | new_ns = clone_uts_ns(user_ns, old_ns); |
70 | 76 | ||
71 | put_uts_ns(old_ns); | 77 | put_uts_ns(old_ns); |
72 | return new_ns; | 78 | return new_ns; |
@@ -78,6 +84,7 @@ void free_uts_ns(struct kref *kref) | |||
78 | 84 | ||
79 | ns = container_of(kref, struct uts_namespace, kref); | 85 | ns = container_of(kref, struct uts_namespace, kref); |
80 | put_user_ns(ns->user_ns); | 86 | put_user_ns(ns->user_ns); |
87 | proc_free_inum(ns->proc_inum); | ||
81 | kfree(ns); | 88 | kfree(ns); |
82 | } | 89 | } |
83 | 90 | ||
@@ -102,19 +109,32 @@ static void utsns_put(void *ns) | |||
102 | put_uts_ns(ns); | 109 | put_uts_ns(ns); |
103 | } | 110 | } |
104 | 111 | ||
105 | static int utsns_install(struct nsproxy *nsproxy, void *ns) | 112 | static int utsns_install(struct nsproxy *nsproxy, void *new) |
106 | { | 113 | { |
114 | struct uts_namespace *ns = new; | ||
115 | |||
116 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || | ||
117 | !nsown_capable(CAP_SYS_ADMIN)) | ||
118 | return -EPERM; | ||
119 | |||
107 | get_uts_ns(ns); | 120 | get_uts_ns(ns); |
108 | put_uts_ns(nsproxy->uts_ns); | 121 | put_uts_ns(nsproxy->uts_ns); |
109 | nsproxy->uts_ns = ns; | 122 | nsproxy->uts_ns = ns; |
110 | return 0; | 123 | return 0; |
111 | } | 124 | } |
112 | 125 | ||
126 | static unsigned int utsns_inum(void *vp) | ||
127 | { | ||
128 | struct uts_namespace *ns = vp; | ||
129 | |||
130 | return ns->proc_inum; | ||
131 | } | ||
132 | |||
113 | const struct proc_ns_operations utsns_operations = { | 133 | const struct proc_ns_operations utsns_operations = { |
114 | .name = "uts", | 134 | .name = "uts", |
115 | .type = CLONE_NEWUTS, | 135 | .type = CLONE_NEWUTS, |
116 | .get = utsns_get, | 136 | .get = utsns_get, |
117 | .put = utsns_put, | 137 | .put = utsns_put, |
118 | .install = utsns_install, | 138 | .install = utsns_install, |
139 | .inum = utsns_inum, | ||
119 | }; | 140 | }; |
120 | |||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index c8c21be11ab4..75a2ab3d0b02 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -31,6 +31,7 @@ | |||
31 | int watchdog_enabled = 1; | 31 | int watchdog_enabled = 1; |
32 | int __read_mostly watchdog_thresh = 10; | 32 | int __read_mostly watchdog_thresh = 10; |
33 | static int __read_mostly watchdog_disabled; | 33 | static int __read_mostly watchdog_disabled; |
34 | static u64 __read_mostly sample_period; | ||
34 | 35 | ||
35 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 36 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
36 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); | 37 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); |
@@ -116,7 +117,7 @@ static unsigned long get_timestamp(int this_cpu) | |||
116 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ | 117 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ |
117 | } | 118 | } |
118 | 119 | ||
119 | static u64 get_sample_period(void) | 120 | static void set_sample_period(void) |
120 | { | 121 | { |
121 | /* | 122 | /* |
122 | * convert watchdog_thresh from seconds to ns | 123 | * convert watchdog_thresh from seconds to ns |
@@ -125,7 +126,7 @@ static u64 get_sample_period(void) | |||
125 | * and hard thresholds) to increment before the | 126 | * and hard thresholds) to increment before the |
126 | * hardlockup detector generates a warning | 127 | * hardlockup detector generates a warning |
127 | */ | 128 | */ |
128 | return get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); | 129 | sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); |
129 | } | 130 | } |
130 | 131 | ||
131 | /* Commands for resetting the watchdog */ | 132 | /* Commands for resetting the watchdog */ |
@@ -275,7 +276,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
275 | wake_up_process(__this_cpu_read(softlockup_watchdog)); | 276 | wake_up_process(__this_cpu_read(softlockup_watchdog)); |
276 | 277 | ||
277 | /* .. and repeat */ | 278 | /* .. and repeat */ |
278 | hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); | 279 | hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); |
279 | 280 | ||
280 | if (touch_ts == 0) { | 281 | if (touch_ts == 0) { |
281 | if (unlikely(__this_cpu_read(softlockup_touch_sync))) { | 282 | if (unlikely(__this_cpu_read(softlockup_touch_sync))) { |
@@ -343,6 +344,10 @@ static void watchdog_enable(unsigned int cpu) | |||
343 | { | 344 | { |
344 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 345 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
345 | 346 | ||
347 | /* kick off the timer for the hardlockup detector */ | ||
348 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
349 | hrtimer->function = watchdog_timer_fn; | ||
350 | |||
346 | if (!watchdog_enabled) { | 351 | if (!watchdog_enabled) { |
347 | kthread_park(current); | 352 | kthread_park(current); |
348 | return; | 353 | return; |
@@ -351,12 +356,8 @@ static void watchdog_enable(unsigned int cpu) | |||
351 | /* Enable the perf event */ | 356 | /* Enable the perf event */ |
352 | watchdog_nmi_enable(cpu); | 357 | watchdog_nmi_enable(cpu); |
353 | 358 | ||
354 | /* kick off the timer for the hardlockup detector */ | ||
355 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
356 | hrtimer->function = watchdog_timer_fn; | ||
357 | |||
358 | /* done here because hrtimer_start can only pin to smp_processor_id() */ | 359 | /* done here because hrtimer_start can only pin to smp_processor_id() */ |
359 | hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), | 360 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
360 | HRTIMER_MODE_REL_PINNED); | 361 | HRTIMER_MODE_REL_PINNED); |
361 | 362 | ||
362 | /* initialize timestamp */ | 363 | /* initialize timestamp */ |
@@ -368,9 +369,6 @@ static void watchdog_disable(unsigned int cpu) | |||
368 | { | 369 | { |
369 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 370 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
370 | 371 | ||
371 | if (!watchdog_enabled) | ||
372 | return; | ||
373 | |||
374 | watchdog_set_prio(SCHED_NORMAL, 0); | 372 | watchdog_set_prio(SCHED_NORMAL, 0); |
375 | hrtimer_cancel(hrtimer); | 373 | hrtimer_cancel(hrtimer); |
376 | /* disable the perf event */ | 374 | /* disable the perf event */ |
@@ -386,7 +384,7 @@ static int watchdog_should_run(unsigned int cpu) | |||
386 | /* | 384 | /* |
387 | * The watchdog thread function - touches the timestamp. | 385 | * The watchdog thread function - touches the timestamp. |
388 | * | 386 | * |
389 | * It only runs once every get_sample_period() seconds (4 seconds by | 387 | * It only runs once every sample_period seconds (4 seconds by |
390 | * default) to reset the softlockup timestamp. If this gets delayed | 388 | * default) to reset the softlockup timestamp. If this gets delayed |
391 | * for more than 2*watchdog_thresh seconds then the debug-printout | 389 | * for more than 2*watchdog_thresh seconds then the debug-printout |
392 | * triggers in watchdog_timer_fn(). | 390 | * triggers in watchdog_timer_fn(). |
@@ -519,6 +517,7 @@ int proc_dowatchdog(struct ctl_table *table, int write, | |||
519 | if (ret || !write) | 517 | if (ret || !write) |
520 | return ret; | 518 | return ret; |
521 | 519 | ||
520 | set_sample_period(); | ||
522 | if (watchdog_enabled && watchdog_thresh) | 521 | if (watchdog_enabled && watchdog_thresh) |
523 | watchdog_enable_all_cpus(); | 522 | watchdog_enable_all_cpus(); |
524 | else | 523 | else |
@@ -540,6 +539,7 @@ static struct smp_hotplug_thread watchdog_threads = { | |||
540 | 539 | ||
541 | void __init lockup_detector_init(void) | 540 | void __init lockup_detector_init(void) |
542 | { | 541 | { |
542 | set_sample_period(); | ||
543 | if (smpboot_register_percpu_thread(&watchdog_threads)) { | 543 | if (smpboot_register_percpu_thread(&watchdog_threads)) { |
544 | pr_err("Failed to create watchdog threads, disabled\n"); | 544 | pr_err("Failed to create watchdog threads, disabled\n"); |
545 | watchdog_disabled = -ENODEV; | 545 | watchdog_disabled = -ENODEV; |