aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-17 11:58:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-17 11:58:04 -0400
commit57a8ec387e1441ea5e1232bc0749fb99a8cba7e7 (patch)
treeb5fb03fc6bc5754de8b5b1f8b0e4f36d67c8315c /kernel
parent0a8ad0ffa4d80a544f6cbff703bf6394339afcdf (diff)
parent43e11fa2d1d3b6e35629fa556eb7d571edba2010 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "VM: - z3fold fixes and enhancements by Henry Burns and Vitaly Wool - more accurate reclaimed slab caches calculations by Yafang Shao - fix MAP_UNINITIALIZED UAPI symbol to not depend on config, by Christoph Hellwig - !CONFIG_MMU fixes by Christoph Hellwig - new novmcoredd parameter to omit device dumps from vmcore, by Kairui Song - new test_meminit module for testing heap and pagealloc initialization, by Alexander Potapenko - ioremap improvements for huge mappings, by Anshuman Khandual - generalize kprobe page fault handling, by Anshuman Khandual - device-dax hotplug fixes and improvements, by Pavel Tatashin - enable synchronous DAX fault on powerpc, by Aneesh Kumar K.V - add pte_devmap() support for arm64, by Robin Murphy - unify locked_vm accounting with a helper, by Daniel Jordan - several misc fixes core/lib: - new typeof_member() macro including some users, by Alexey Dobriyan - make BIT() and GENMASK() available in asm, by Masahiro Yamada - changed LIST_POISON2 on x86_64 to 0xdead000000000122 for better code generation, by Alexey Dobriyan - rbtree code size optimizations, by Michel Lespinasse - convert struct pid count to refcount_t, by Joel Fernandes get_maintainer.pl: - add --no-moderated switch to skip moderated ML's, by Joe Perches misc: - ptrace PTRACE_GET_SYSCALL_INFO interface - coda updates - gdb scripts, various" [ Using merge message suggestion from Vlastimil Babka, with some editing - Linus ] * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (100 commits) fs/select.c: use struct_size() in kmalloc() mm: add account_locked_vm utility function arm64: mm: implement pte_devmap support mm: introduce ARCH_HAS_PTE_DEVMAP mm: clean up is_device_*_page() definitions mm/mmap: move common defines to mman-common.h mm: move MAP_SYNC to asm-generic/mman-common.h device-dax: "Hotremove" persistent memory that is used like normal RAM mm/hotplug: make remove_memory() interface usable device-dax: fix memory and resource leak if hotplug fails include/linux/lz4.h: fix spelling and copy-paste errors in documentation ipc/mqueue.c: only perform resource calculation if user valid include/asm-generic/bug.h: fix "cut here" for WARN_ON for __WARN_TAINT architectures scripts/gdb: add helpers to find and list devices scripts/gdb: add lx-genpd-summary command drivers/pps/pps.c: clear offset flags in PPS_SETPARAMS ioctl kernel/pid.c: convert struct pid count to refcount_t drivers/rapidio/devices/rio_mport_cdev.c: NUL terminate some strings select: shift restore_saved_sigmask_unless() into poll_select_copy_remaining() select: change do_poll() to return -ERESTARTNOHAND rather than -EINTR ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/pid.c9
-rw-r--r--kernel/ptrace.c101
-rw-r--r--kernel/signal.c69
-rw-r--r--kernel/sysctl.c18
4 files changed, 132 insertions, 65 deletions
diff --git a/kernel/pid.c b/kernel/pid.c
index 16263b526560..0a9f2e437217 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -37,14 +37,14 @@
37#include <linux/init_task.h> 37#include <linux/init_task.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/proc_ns.h> 39#include <linux/proc_ns.h>
40#include <linux/proc_fs.h> 40#include <linux/refcount.h>
41#include <linux/anon_inodes.h> 41#include <linux/anon_inodes.h>
42#include <linux/sched/signal.h> 42#include <linux/sched/signal.h>
43#include <linux/sched/task.h> 43#include <linux/sched/task.h>
44#include <linux/idr.h> 44#include <linux/idr.h>
45 45
46struct pid init_struct_pid = { 46struct pid init_struct_pid = {
47 .count = ATOMIC_INIT(1), 47 .count = REFCOUNT_INIT(1),
48 .tasks = { 48 .tasks = {
49 { .first = NULL }, 49 { .first = NULL },
50 { .first = NULL }, 50 { .first = NULL },
@@ -108,8 +108,7 @@ void put_pid(struct pid *pid)
108 return; 108 return;
109 109
110 ns = pid->numbers[pid->level].ns; 110 ns = pid->numbers[pid->level].ns;
111 if ((atomic_read(&pid->count) == 1) || 111 if (refcount_dec_and_test(&pid->count)) {
112 atomic_dec_and_test(&pid->count)) {
113 kmem_cache_free(ns->pid_cachep, pid); 112 kmem_cache_free(ns->pid_cachep, pid);
114 put_pid_ns(ns); 113 put_pid_ns(ns);
115 } 114 }
@@ -212,7 +211,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
212 } 211 }
213 212
214 get_pid_ns(ns); 213 get_pid_ns(ns);
215 atomic_set(&pid->count, 1); 214 refcount_set(&pid->count, 1);
216 for (type = 0; type < PIDTYPE_MAX; ++type) 215 for (type = 0; type < PIDTYPE_MAX; ++type)
217 INIT_HLIST_HEAD(&pid->tasks[type]); 216 INIT_HLIST_HEAD(&pid->tasks[type]);
218 217
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 83a531cea2f3..cb9ddcc08119 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -32,6 +32,8 @@
32#include <linux/compat.h> 32#include <linux/compat.h>
33#include <linux/sched/signal.h> 33#include <linux/sched/signal.h>
34 34
35#include <asm/syscall.h> /* for syscall_get_* */
36
35/* 37/*
36 * Access another process' address space via ptrace. 38 * Access another process' address space via ptrace.
37 * Source/target buffer must be kernel space, 39 * Source/target buffer must be kernel space,
@@ -897,7 +899,100 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
897 * to ensure no machine forgets it. 899 * to ensure no machine forgets it.
898 */ 900 */
899EXPORT_SYMBOL_GPL(task_user_regset_view); 901EXPORT_SYMBOL_GPL(task_user_regset_view);
900#endif 902
903static unsigned long
904ptrace_get_syscall_info_entry(struct task_struct *child, struct pt_regs *regs,
905 struct ptrace_syscall_info *info)
906{
907 unsigned long args[ARRAY_SIZE(info->entry.args)];
908 int i;
909
910 info->op = PTRACE_SYSCALL_INFO_ENTRY;
911 info->entry.nr = syscall_get_nr(child, regs);
912 syscall_get_arguments(child, regs, args);
913 for (i = 0; i < ARRAY_SIZE(args); i++)
914 info->entry.args[i] = args[i];
915
916 /* args is the last field in struct ptrace_syscall_info.entry */
917 return offsetofend(struct ptrace_syscall_info, entry.args);
918}
919
920static unsigned long
921ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs,
922 struct ptrace_syscall_info *info)
923{
924 /*
925 * As struct ptrace_syscall_info.entry is currently a subset
926 * of struct ptrace_syscall_info.seccomp, it makes sense to
927 * initialize that subset using ptrace_get_syscall_info_entry().
928 * This can be reconsidered in the future if these structures
929 * diverge significantly enough.
930 */
931 ptrace_get_syscall_info_entry(child, regs, info);
932 info->op = PTRACE_SYSCALL_INFO_SECCOMP;
933 info->seccomp.ret_data = child->ptrace_message;
934
935 /* ret_data is the last field in struct ptrace_syscall_info.seccomp */
936 return offsetofend(struct ptrace_syscall_info, seccomp.ret_data);
937}
938
939static unsigned long
940ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs,
941 struct ptrace_syscall_info *info)
942{
943 info->op = PTRACE_SYSCALL_INFO_EXIT;
944 info->exit.rval = syscall_get_error(child, regs);
945 info->exit.is_error = !!info->exit.rval;
946 if (!info->exit.is_error)
947 info->exit.rval = syscall_get_return_value(child, regs);
948
949 /* is_error is the last field in struct ptrace_syscall_info.exit */
950 return offsetofend(struct ptrace_syscall_info, exit.is_error);
951}
952
953static int
954ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size,
955 void __user *datavp)
956{
957 struct pt_regs *regs = task_pt_regs(child);
958 struct ptrace_syscall_info info = {
959 .op = PTRACE_SYSCALL_INFO_NONE,
960 .arch = syscall_get_arch(child),
961 .instruction_pointer = instruction_pointer(regs),
962 .stack_pointer = user_stack_pointer(regs),
963 };
964 unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry);
965 unsigned long write_size;
966
967 /*
968 * This does not need lock_task_sighand() to access
969 * child->last_siginfo because ptrace_freeze_traced()
970 * called earlier by ptrace_check_attach() ensures that
971 * the tracee cannot go away and clear its last_siginfo.
972 */
973 switch (child->last_siginfo ? child->last_siginfo->si_code : 0) {
974 case SIGTRAP | 0x80:
975 switch (child->ptrace_message) {
976 case PTRACE_EVENTMSG_SYSCALL_ENTRY:
977 actual_size = ptrace_get_syscall_info_entry(child, regs,
978 &info);
979 break;
980 case PTRACE_EVENTMSG_SYSCALL_EXIT:
981 actual_size = ptrace_get_syscall_info_exit(child, regs,
982 &info);
983 break;
984 }
985 break;
986 case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8):
987 actual_size = ptrace_get_syscall_info_seccomp(child, regs,
988 &info);
989 break;
990 }
991
992 write_size = min(actual_size, user_size);
993 return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size;
994}
995#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
901 996
902int ptrace_request(struct task_struct *child, long request, 997int ptrace_request(struct task_struct *child, long request,
903 unsigned long addr, unsigned long data) 998 unsigned long addr, unsigned long data)
@@ -1114,6 +1209,10 @@ int ptrace_request(struct task_struct *child, long request,
1114 ret = __put_user(kiov.iov_len, &uiov->iov_len); 1209 ret = __put_user(kiov.iov_len, &uiov->iov_len);
1115 break; 1210 break;
1116 } 1211 }
1212
1213 case PTRACE_GET_SYSCALL_INFO:
1214 ret = ptrace_get_syscall_info(child, addr, datavp);
1215 break;
1117#endif 1216#endif
1118 1217
1119 case PTRACE_SECCOMP_GET_FILTER: 1218 case PTRACE_SECCOMP_GET_FILTER:
diff --git a/kernel/signal.c b/kernel/signal.c
index dabe100d2091..91b789dd6e72 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2951,80 +2951,49 @@ EXPORT_SYMBOL(sigprocmask);
2951 * 2951 *
2952 * This is useful for syscalls such as ppoll, pselect, io_pgetevents and 2952 * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
2953 * epoll_pwait where a new sigmask is passed from userland for the syscalls. 2953 * epoll_pwait where a new sigmask is passed from userland for the syscalls.
2954 *
2955 * Note that it does set_restore_sigmask() in advance, so it must be always
2956 * paired with restore_saved_sigmask_unless() before return from syscall.
2954 */ 2957 */
2955int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, 2958int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize)
2956 sigset_t *oldset, size_t sigsetsize)
2957{ 2959{
2958 if (!usigmask) 2960 sigset_t kmask;
2959 return 0;
2960 2961
2962 if (!umask)
2963 return 0;
2961 if (sigsetsize != sizeof(sigset_t)) 2964 if (sigsetsize != sizeof(sigset_t))
2962 return -EINVAL; 2965 return -EINVAL;
2963 if (copy_from_user(set, usigmask, sizeof(sigset_t))) 2966 if (copy_from_user(&kmask, umask, sizeof(sigset_t)))
2964 return -EFAULT; 2967 return -EFAULT;
2965 2968
2966 *oldset = current->blocked; 2969 set_restore_sigmask();
2967 set_current_blocked(set); 2970 current->saved_sigmask = current->blocked;
2971 set_current_blocked(&kmask);
2968 2972
2969 return 0; 2973 return 0;
2970} 2974}
2971EXPORT_SYMBOL(set_user_sigmask);
2972 2975
2973#ifdef CONFIG_COMPAT 2976#ifdef CONFIG_COMPAT
2974int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, 2977int set_compat_user_sigmask(const compat_sigset_t __user *umask,
2975 sigset_t *set, sigset_t *oldset,
2976 size_t sigsetsize) 2978 size_t sigsetsize)
2977{ 2979{
2978 if (!usigmask) 2980 sigset_t kmask;
2979 return 0;
2980 2981
2982 if (!umask)
2983 return 0;
2981 if (sigsetsize != sizeof(compat_sigset_t)) 2984 if (sigsetsize != sizeof(compat_sigset_t))
2982 return -EINVAL; 2985 return -EINVAL;
2983 if (get_compat_sigset(set, usigmask)) 2986 if (get_compat_sigset(&kmask, umask))
2984 return -EFAULT; 2987 return -EFAULT;
2985 2988
2986 *oldset = current->blocked; 2989 set_restore_sigmask();
2987 set_current_blocked(set); 2990 current->saved_sigmask = current->blocked;
2991 set_current_blocked(&kmask);
2988 2992
2989 return 0; 2993 return 0;
2990} 2994}
2991EXPORT_SYMBOL(set_compat_user_sigmask);
2992#endif 2995#endif
2993 2996
2994/*
2995 * restore_user_sigmask:
2996 * usigmask: sigmask passed in from userland.
2997 * sigsaved: saved sigmask when the syscall started and changed the sigmask to
2998 * usigmask.
2999 *
3000 * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
3001 * epoll_pwait where a new sigmask is passed in from userland for the syscalls.
3002 */
3003void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved,
3004 bool interrupted)
3005{
3006
3007 if (!usigmask)
3008 return;
3009 /*
3010 * When signals are pending, do not restore them here.
3011 * Restoring sigmask here can lead to delivering signals that the above
3012 * syscalls are intended to block because of the sigmask passed in.
3013 */
3014 if (interrupted) {
3015 current->saved_sigmask = *sigsaved;
3016 set_restore_sigmask();
3017 return;
3018 }
3019
3020 /*
3021 * This is needed because the fast syscall return path does not restore
3022 * saved_sigmask when signals are not pending.
3023 */
3024 set_current_blocked(sigsaved);
3025}
3026EXPORT_SYMBOL(restore_user_sigmask);
3027
3028/** 2997/**
3029 * sys_rt_sigprocmask - change the list of currently blocked signals 2998 * sys_rt_sigprocmask - change the list of currently blocked signals
3030 * @how: whether to add, remove, or set signals 2999 * @how: whether to add, remove, or set signals
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1c1ad1e14f21..43186ccfa139 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -188,17 +188,17 @@ extern int no_unaligned_warning;
188 * enum sysctl_writes_mode - supported sysctl write modes 188 * enum sysctl_writes_mode - supported sysctl write modes
189 * 189 *
190 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value 190 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
191 * to be written, and multiple writes on the same sysctl file descriptor 191 * to be written, and multiple writes on the same sysctl file descriptor
192 * will rewrite the sysctl value, regardless of file position. No warning 192 * will rewrite the sysctl value, regardless of file position. No warning
193 * is issued when the initial position is not 0. 193 * is issued when the initial position is not 0.
194 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is 194 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
195 * not 0. 195 * not 0.
196 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at 196 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
197 * file position 0 and the value must be fully contained in the buffer 197 * file position 0 and the value must be fully contained in the buffer
198 * sent to the write syscall. If dealing with strings respect the file 198 * sent to the write syscall. If dealing with strings respect the file
199 * position, but restrict this to the max length of the buffer, anything 199 * position, but restrict this to the max length of the buffer, anything
200 * passed the max lenght will be ignored. Multiple writes will append 200 * passed the max length will be ignored. Multiple writes will append
201 * to the buffer. 201 * to the buffer.
202 * 202 *
203 * These write modes control how current file position affects the behavior of 203 * These write modes control how current file position affects the behavior of
204 * updating sysctl values through the proc interface on each write. 204 * updating sysctl values through the proc interface on each write.