diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-17 11:58:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-17 11:58:04 -0400 |
commit | 57a8ec387e1441ea5e1232bc0749fb99a8cba7e7 (patch) | |
tree | b5fb03fc6bc5754de8b5b1f8b0e4f36d67c8315c /kernel | |
parent | 0a8ad0ffa4d80a544f6cbff703bf6394339afcdf (diff) | |
parent | 43e11fa2d1d3b6e35629fa556eb7d571edba2010 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
"VM:
- z3fold fixes and enhancements by Henry Burns and Vitaly Wool
- more accurate reclaimed slab caches calculations by Yafang Shao
- fix MAP_UNINITIALIZED UAPI symbol to not depend on config, by
Christoph Hellwig
- !CONFIG_MMU fixes by Christoph Hellwig
- new novmcoredd parameter to omit device dumps from vmcore, by
Kairui Song
- new test_meminit module for testing heap and pagealloc
initialization, by Alexander Potapenko
- ioremap improvements for huge mappings, by Anshuman Khandual
- generalize kprobe page fault handling, by Anshuman Khandual
- device-dax hotplug fixes and improvements, by Pavel Tatashin
- enable synchronous DAX fault on powerpc, by Aneesh Kumar K.V
- add pte_devmap() support for arm64, by Robin Murphy
- unify locked_vm accounting with a helper, by Daniel Jordan
- several misc fixes
core/lib:
- new typeof_member() macro including some users, by Alexey Dobriyan
- make BIT() and GENMASK() available in asm, by Masahiro Yamada
- changed LIST_POISON2 on x86_64 to 0xdead000000000122 for better
code generation, by Alexey Dobriyan
- rbtree code size optimizations, by Michel Lespinasse
- convert struct pid count to refcount_t, by Joel Fernandes
get_maintainer.pl:
- add --no-moderated switch to skip moderated ML's, by Joe Perches
misc:
- ptrace PTRACE_GET_SYSCALL_INFO interface
- coda updates
- gdb scripts, various"
[ Using merge message suggestion from Vlastimil Babka, with some editing - Linus ]
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (100 commits)
fs/select.c: use struct_size() in kmalloc()
mm: add account_locked_vm utility function
arm64: mm: implement pte_devmap support
mm: introduce ARCH_HAS_PTE_DEVMAP
mm: clean up is_device_*_page() definitions
mm/mmap: move common defines to mman-common.h
mm: move MAP_SYNC to asm-generic/mman-common.h
device-dax: "Hotremove" persistent memory that is used like normal RAM
mm/hotplug: make remove_memory() interface usable
device-dax: fix memory and resource leak if hotplug fails
include/linux/lz4.h: fix spelling and copy-paste errors in documentation
ipc/mqueue.c: only perform resource calculation if user valid
include/asm-generic/bug.h: fix "cut here" for WARN_ON for __WARN_TAINT architectures
scripts/gdb: add helpers to find and list devices
scripts/gdb: add lx-genpd-summary command
drivers/pps/pps.c: clear offset flags in PPS_SETPARAMS ioctl
kernel/pid.c: convert struct pid count to refcount_t
drivers/rapidio/devices/rio_mport_cdev.c: NUL terminate some strings
select: shift restore_saved_sigmask_unless() into poll_select_copy_remaining()
select: change do_poll() to return -ERESTARTNOHAND rather than -EINTR
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/pid.c | 9 | ||||
-rw-r--r-- | kernel/ptrace.c | 101 | ||||
-rw-r--r-- | kernel/signal.c | 69 | ||||
-rw-r--r-- | kernel/sysctl.c | 18 |
4 files changed, 132 insertions, 65 deletions
diff --git a/kernel/pid.c b/kernel/pid.c index 16263b526560..0a9f2e437217 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -37,14 +37,14 @@ | |||
37 | #include <linux/init_task.h> | 37 | #include <linux/init_task.h> |
38 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
39 | #include <linux/proc_ns.h> | 39 | #include <linux/proc_ns.h> |
40 | #include <linux/proc_fs.h> | 40 | #include <linux/refcount.h> |
41 | #include <linux/anon_inodes.h> | 41 | #include <linux/anon_inodes.h> |
42 | #include <linux/sched/signal.h> | 42 | #include <linux/sched/signal.h> |
43 | #include <linux/sched/task.h> | 43 | #include <linux/sched/task.h> |
44 | #include <linux/idr.h> | 44 | #include <linux/idr.h> |
45 | 45 | ||
46 | struct pid init_struct_pid = { | 46 | struct pid init_struct_pid = { |
47 | .count = ATOMIC_INIT(1), | 47 | .count = REFCOUNT_INIT(1), |
48 | .tasks = { | 48 | .tasks = { |
49 | { .first = NULL }, | 49 | { .first = NULL }, |
50 | { .first = NULL }, | 50 | { .first = NULL }, |
@@ -108,8 +108,7 @@ void put_pid(struct pid *pid) | |||
108 | return; | 108 | return; |
109 | 109 | ||
110 | ns = pid->numbers[pid->level].ns; | 110 | ns = pid->numbers[pid->level].ns; |
111 | if ((atomic_read(&pid->count) == 1) || | 111 | if (refcount_dec_and_test(&pid->count)) { |
112 | atomic_dec_and_test(&pid->count)) { | ||
113 | kmem_cache_free(ns->pid_cachep, pid); | 112 | kmem_cache_free(ns->pid_cachep, pid); |
114 | put_pid_ns(ns); | 113 | put_pid_ns(ns); |
115 | } | 114 | } |
@@ -212,7 +211,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) | |||
212 | } | 211 | } |
213 | 212 | ||
214 | get_pid_ns(ns); | 213 | get_pid_ns(ns); |
215 | atomic_set(&pid->count, 1); | 214 | refcount_set(&pid->count, 1); |
216 | for (type = 0; type < PIDTYPE_MAX; ++type) | 215 | for (type = 0; type < PIDTYPE_MAX; ++type) |
217 | INIT_HLIST_HEAD(&pid->tasks[type]); | 216 | INIT_HLIST_HEAD(&pid->tasks[type]); |
218 | 217 | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 83a531cea2f3..cb9ddcc08119 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <linux/compat.h> | 32 | #include <linux/compat.h> |
33 | #include <linux/sched/signal.h> | 33 | #include <linux/sched/signal.h> |
34 | 34 | ||
35 | #include <asm/syscall.h> /* for syscall_get_* */ | ||
36 | |||
35 | /* | 37 | /* |
36 | * Access another process' address space via ptrace. | 38 | * Access another process' address space via ptrace. |
37 | * Source/target buffer must be kernel space, | 39 | * Source/target buffer must be kernel space, |
@@ -897,7 +899,100 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, | |||
897 | * to ensure no machine forgets it. | 899 | * to ensure no machine forgets it. |
898 | */ | 900 | */ |
899 | EXPORT_SYMBOL_GPL(task_user_regset_view); | 901 | EXPORT_SYMBOL_GPL(task_user_regset_view); |
900 | #endif | 902 | |
903 | static unsigned long | ||
904 | ptrace_get_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, | ||
905 | struct ptrace_syscall_info *info) | ||
906 | { | ||
907 | unsigned long args[ARRAY_SIZE(info->entry.args)]; | ||
908 | int i; | ||
909 | |||
910 | info->op = PTRACE_SYSCALL_INFO_ENTRY; | ||
911 | info->entry.nr = syscall_get_nr(child, regs); | ||
912 | syscall_get_arguments(child, regs, args); | ||
913 | for (i = 0; i < ARRAY_SIZE(args); i++) | ||
914 | info->entry.args[i] = args[i]; | ||
915 | |||
916 | /* args is the last field in struct ptrace_syscall_info.entry */ | ||
917 | return offsetofend(struct ptrace_syscall_info, entry.args); | ||
918 | } | ||
919 | |||
920 | static unsigned long | ||
921 | ptrace_get_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, | ||
922 | struct ptrace_syscall_info *info) | ||
923 | { | ||
924 | /* | ||
925 | * As struct ptrace_syscall_info.entry is currently a subset | ||
926 | * of struct ptrace_syscall_info.seccomp, it makes sense to | ||
927 | * initialize that subset using ptrace_get_syscall_info_entry(). | ||
928 | * This can be reconsidered in the future if these structures | ||
929 | * diverge significantly enough. | ||
930 | */ | ||
931 | ptrace_get_syscall_info_entry(child, regs, info); | ||
932 | info->op = PTRACE_SYSCALL_INFO_SECCOMP; | ||
933 | info->seccomp.ret_data = child->ptrace_message; | ||
934 | |||
935 | /* ret_data is the last field in struct ptrace_syscall_info.seccomp */ | ||
936 | return offsetofend(struct ptrace_syscall_info, seccomp.ret_data); | ||
937 | } | ||
938 | |||
939 | static unsigned long | ||
940 | ptrace_get_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, | ||
941 | struct ptrace_syscall_info *info) | ||
942 | { | ||
943 | info->op = PTRACE_SYSCALL_INFO_EXIT; | ||
944 | info->exit.rval = syscall_get_error(child, regs); | ||
945 | info->exit.is_error = !!info->exit.rval; | ||
946 | if (!info->exit.is_error) | ||
947 | info->exit.rval = syscall_get_return_value(child, regs); | ||
948 | |||
949 | /* is_error is the last field in struct ptrace_syscall_info.exit */ | ||
950 | return offsetofend(struct ptrace_syscall_info, exit.is_error); | ||
951 | } | ||
952 | |||
953 | static int | ||
954 | ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, | ||
955 | void __user *datavp) | ||
956 | { | ||
957 | struct pt_regs *regs = task_pt_regs(child); | ||
958 | struct ptrace_syscall_info info = { | ||
959 | .op = PTRACE_SYSCALL_INFO_NONE, | ||
960 | .arch = syscall_get_arch(child), | ||
961 | .instruction_pointer = instruction_pointer(regs), | ||
962 | .stack_pointer = user_stack_pointer(regs), | ||
963 | }; | ||
964 | unsigned long actual_size = offsetof(struct ptrace_syscall_info, entry); | ||
965 | unsigned long write_size; | ||
966 | |||
967 | /* | ||
968 | * This does not need lock_task_sighand() to access | ||
969 | * child->last_siginfo because ptrace_freeze_traced() | ||
970 | * called earlier by ptrace_check_attach() ensures that | ||
971 | * the tracee cannot go away and clear its last_siginfo. | ||
972 | */ | ||
973 | switch (child->last_siginfo ? child->last_siginfo->si_code : 0) { | ||
974 | case SIGTRAP | 0x80: | ||
975 | switch (child->ptrace_message) { | ||
976 | case PTRACE_EVENTMSG_SYSCALL_ENTRY: | ||
977 | actual_size = ptrace_get_syscall_info_entry(child, regs, | ||
978 | &info); | ||
979 | break; | ||
980 | case PTRACE_EVENTMSG_SYSCALL_EXIT: | ||
981 | actual_size = ptrace_get_syscall_info_exit(child, regs, | ||
982 | &info); | ||
983 | break; | ||
984 | } | ||
985 | break; | ||
986 | case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8): | ||
987 | actual_size = ptrace_get_syscall_info_seccomp(child, regs, | ||
988 | &info); | ||
989 | break; | ||
990 | } | ||
991 | |||
992 | write_size = min(actual_size, user_size); | ||
993 | return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size; | ||
994 | } | ||
995 | #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ | ||
901 | 996 | ||
902 | int ptrace_request(struct task_struct *child, long request, | 997 | int ptrace_request(struct task_struct *child, long request, |
903 | unsigned long addr, unsigned long data) | 998 | unsigned long addr, unsigned long data) |
@@ -1114,6 +1209,10 @@ int ptrace_request(struct task_struct *child, long request, | |||
1114 | ret = __put_user(kiov.iov_len, &uiov->iov_len); | 1209 | ret = __put_user(kiov.iov_len, &uiov->iov_len); |
1115 | break; | 1210 | break; |
1116 | } | 1211 | } |
1212 | |||
1213 | case PTRACE_GET_SYSCALL_INFO: | ||
1214 | ret = ptrace_get_syscall_info(child, addr, datavp); | ||
1215 | break; | ||
1117 | #endif | 1216 | #endif |
1118 | 1217 | ||
1119 | case PTRACE_SECCOMP_GET_FILTER: | 1218 | case PTRACE_SECCOMP_GET_FILTER: |
diff --git a/kernel/signal.c b/kernel/signal.c index dabe100d2091..91b789dd6e72 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -2951,80 +2951,49 @@ EXPORT_SYMBOL(sigprocmask); | |||
2951 | * | 2951 | * |
2952 | * This is useful for syscalls such as ppoll, pselect, io_pgetevents and | 2952 | * This is useful for syscalls such as ppoll, pselect, io_pgetevents and |
2953 | * epoll_pwait where a new sigmask is passed from userland for the syscalls. | 2953 | * epoll_pwait where a new sigmask is passed from userland for the syscalls. |
2954 | * | ||
2955 | * Note that it does set_restore_sigmask() in advance, so it must be always | ||
2956 | * paired with restore_saved_sigmask_unless() before return from syscall. | ||
2954 | */ | 2957 | */ |
2955 | int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, | 2958 | int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize) |
2956 | sigset_t *oldset, size_t sigsetsize) | ||
2957 | { | 2959 | { |
2958 | if (!usigmask) | 2960 | sigset_t kmask; |
2959 | return 0; | ||
2960 | 2961 | ||
2962 | if (!umask) | ||
2963 | return 0; | ||
2961 | if (sigsetsize != sizeof(sigset_t)) | 2964 | if (sigsetsize != sizeof(sigset_t)) |
2962 | return -EINVAL; | 2965 | return -EINVAL; |
2963 | if (copy_from_user(set, usigmask, sizeof(sigset_t))) | 2966 | if (copy_from_user(&kmask, umask, sizeof(sigset_t))) |
2964 | return -EFAULT; | 2967 | return -EFAULT; |
2965 | 2968 | ||
2966 | *oldset = current->blocked; | 2969 | set_restore_sigmask(); |
2967 | set_current_blocked(set); | 2970 | current->saved_sigmask = current->blocked; |
2971 | set_current_blocked(&kmask); | ||
2968 | 2972 | ||
2969 | return 0; | 2973 | return 0; |
2970 | } | 2974 | } |
2971 | EXPORT_SYMBOL(set_user_sigmask); | ||
2972 | 2975 | ||
2973 | #ifdef CONFIG_COMPAT | 2976 | #ifdef CONFIG_COMPAT |
2974 | int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, | 2977 | int set_compat_user_sigmask(const compat_sigset_t __user *umask, |
2975 | sigset_t *set, sigset_t *oldset, | ||
2976 | size_t sigsetsize) | 2978 | size_t sigsetsize) |
2977 | { | 2979 | { |
2978 | if (!usigmask) | 2980 | sigset_t kmask; |
2979 | return 0; | ||
2980 | 2981 | ||
2982 | if (!umask) | ||
2983 | return 0; | ||
2981 | if (sigsetsize != sizeof(compat_sigset_t)) | 2984 | if (sigsetsize != sizeof(compat_sigset_t)) |
2982 | return -EINVAL; | 2985 | return -EINVAL; |
2983 | if (get_compat_sigset(set, usigmask)) | 2986 | if (get_compat_sigset(&kmask, umask)) |
2984 | return -EFAULT; | 2987 | return -EFAULT; |
2985 | 2988 | ||
2986 | *oldset = current->blocked; | 2989 | set_restore_sigmask(); |
2987 | set_current_blocked(set); | 2990 | current->saved_sigmask = current->blocked; |
2991 | set_current_blocked(&kmask); | ||
2988 | 2992 | ||
2989 | return 0; | 2993 | return 0; |
2990 | } | 2994 | } |
2991 | EXPORT_SYMBOL(set_compat_user_sigmask); | ||
2992 | #endif | 2995 | #endif |
2993 | 2996 | ||
2994 | /* | ||
2995 | * restore_user_sigmask: | ||
2996 | * usigmask: sigmask passed in from userland. | ||
2997 | * sigsaved: saved sigmask when the syscall started and changed the sigmask to | ||
2998 | * usigmask. | ||
2999 | * | ||
3000 | * This is useful for syscalls such as ppoll, pselect, io_pgetevents and | ||
3001 | * epoll_pwait where a new sigmask is passed in from userland for the syscalls. | ||
3002 | */ | ||
3003 | void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved, | ||
3004 | bool interrupted) | ||
3005 | { | ||
3006 | |||
3007 | if (!usigmask) | ||
3008 | return; | ||
3009 | /* | ||
3010 | * When signals are pending, do not restore them here. | ||
3011 | * Restoring sigmask here can lead to delivering signals that the above | ||
3012 | * syscalls are intended to block because of the sigmask passed in. | ||
3013 | */ | ||
3014 | if (interrupted) { | ||
3015 | current->saved_sigmask = *sigsaved; | ||
3016 | set_restore_sigmask(); | ||
3017 | return; | ||
3018 | } | ||
3019 | |||
3020 | /* | ||
3021 | * This is needed because the fast syscall return path does not restore | ||
3022 | * saved_sigmask when signals are not pending. | ||
3023 | */ | ||
3024 | set_current_blocked(sigsaved); | ||
3025 | } | ||
3026 | EXPORT_SYMBOL(restore_user_sigmask); | ||
3027 | |||
3028 | /** | 2997 | /** |
3029 | * sys_rt_sigprocmask - change the list of currently blocked signals | 2998 | * sys_rt_sigprocmask - change the list of currently blocked signals |
3030 | * @how: whether to add, remove, or set signals | 2999 | * @how: whether to add, remove, or set signals |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1c1ad1e14f21..43186ccfa139 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -188,17 +188,17 @@ extern int no_unaligned_warning; | |||
188 | * enum sysctl_writes_mode - supported sysctl write modes | 188 | * enum sysctl_writes_mode - supported sysctl write modes |
189 | * | 189 | * |
190 | * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value | 190 | * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value |
191 | * to be written, and multiple writes on the same sysctl file descriptor | 191 | * to be written, and multiple writes on the same sysctl file descriptor |
192 | * will rewrite the sysctl value, regardless of file position. No warning | 192 | * will rewrite the sysctl value, regardless of file position. No warning |
193 | * is issued when the initial position is not 0. | 193 | * is issued when the initial position is not 0. |
194 | * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is | 194 | * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is |
195 | * not 0. | 195 | * not 0. |
196 | * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at | 196 | * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at |
197 | * file position 0 and the value must be fully contained in the buffer | 197 | * file position 0 and the value must be fully contained in the buffer |
198 | * sent to the write syscall. If dealing with strings respect the file | 198 | * sent to the write syscall. If dealing with strings respect the file |
199 | * position, but restrict this to the max length of the buffer, anything | 199 | * position, but restrict this to the max length of the buffer, anything |
200 | * passed the max lenght will be ignored. Multiple writes will append | 200 | * passed the max length will be ignored. Multiple writes will append |
201 | * to the buffer. | 201 | * to the buffer. |
202 | * | 202 | * |
203 | * These write modes control how current file position affects the behavior of | 203 | * These write modes control how current file position affects the behavior of |
204 | * updating sysctl values through the proc interface on each write. | 204 | * updating sysctl values through the proc interface on each write. |