summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Xu <peterx@redhat.com>2019-05-13 20:16:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-14 12:47:45 -0400
commitcefdca0a86be517bc390fc4541e3674b8e7803b0 (patch)
treef85716c23f5e1356c8e5213162489a04d40b06f9
parentf0fd50504a54f5548eb666dc16ddf8394e44e4b7 (diff)
userfaultfd/sysctl: add vm.unprivileged_userfaultfd
Userfaultfd can be misued to make it easier to exploit existing use-after-free (and similar) bugs that might otherwise only make a short window or race condition available. By using userfaultfd to stall a kernel thread, a malicious program can keep some state that it wrote, stable for an extended period, which it can then access using an existing exploit. While it doesn't cause the exploit itself, and while it's not the only thing that can stall a kernel thread when accessing a memory location, it's one of the few that never needs privilege. We can add a flag, allowing userfaultfd to be restricted, so that in general it won't be useable by arbitrary user programs, but in environments that require userfaultfd it can be turned back on. Add a global sysctl knob "vm.unprivileged_userfaultfd" to control whether userfaultfd is allowed by unprivileged users. When this is set to zero, only privileged users (root user, or users with the CAP_SYS_PTRACE capability) will be able to use the userfaultfd syscalls. Andrea said: : The only difference between the bpf sysctl and the userfaultfd sysctl : this way is that the bpf sysctl adds the CAP_SYS_ADMIN capability : requirement, while userfaultfd adds the CAP_SYS_PTRACE requirement, : because the userfaultfd monitor is more likely to need CAP_SYS_PTRACE : already if it's doing other kind of tracking on processes runtime, in : addition of userfaultfd. In other words both syscalls works only for : root, when the two sysctl are opt-in set to 1. [dgilbert@redhat.com: changelog additions] [akpm@linux-foundation.org: documentation tweak, per Mike] Link: http://lkml.kernel.org/r/20190319030722.12441-2-peterx@redhat.com Signed-off-by: Peter Xu <peterx@redhat.com> Suggested-by: Andrea Arcangeli <aarcange@redhat.com> Suggested-by: Mike Rapoport <rppt@linux.ibm.com> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Luis Chamberlain <mcgrof@kernel.org> Cc: Maxime Coquelin <maxime.coquelin@redhat.com> Cc: Maya Gokhale <gokhale2@llnl.gov> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Pavel Emelyanov <xemul@virtuozzo.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Martin Cracauer <cracauer@cons.org> Cc: Denis Plotnikov <dplotnikov@virtuozzo.com> Cc: Marty McFadden <mcfadden8@llnl.gov> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Kees Cook <keescook@chromium.org> Cc: Mel Gorman <mgorman@suse.de> Cc: "Kirill A . Shutemov" <kirill@shutemov.name> Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/sysctl/vm.txt12
-rw-r--r--fs/userfaultfd.c5
-rw-r--r--include/linux/userfaultfd_k.h2
-rw-r--r--kernel/sysctl.c12
4 files changed, 31 insertions, 0 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 3f13d8599337..749322060f10 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -61,6 +61,7 @@ Currently, these files are in /proc/sys/vm:
61- stat_refresh 61- stat_refresh
62- numa_stat 62- numa_stat
63- swappiness 63- swappiness
64- unprivileged_userfaultfd
64- user_reserve_kbytes 65- user_reserve_kbytes
65- vfs_cache_pressure 66- vfs_cache_pressure
66- watermark_boost_factor 67- watermark_boost_factor
@@ -818,6 +819,17 @@ The default value is 60.
818 819
819============================================================== 820==============================================================
820 821
822unprivileged_userfaultfd
823
824This flag controls whether unprivileged users can use the userfaultfd
825system calls. Set this to 1 to allow unprivileged users to use the
826userfaultfd system calls, or set this to 0 to restrict userfaultfd to only
827privileged users (with SYS_CAP_PTRACE capability).
828
829The default value is 1.
830
831==============================================================
832
821- user_reserve_kbytes 833- user_reserve_kbytes
822 834
823When overcommit_memory is set to 2, "never overcommit" mode, reserve 835When overcommit_memory is set to 2, "never overcommit" mode, reserve
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f5de1e726356..3b30301c90ec 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -30,6 +30,8 @@
30#include <linux/security.h> 30#include <linux/security.h>
31#include <linux/hugetlb.h> 31#include <linux/hugetlb.h>
32 32
33int sysctl_unprivileged_userfaultfd __read_mostly = 1;
34
33static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; 35static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
34 36
35enum userfaultfd_state { 37enum userfaultfd_state {
@@ -1930,6 +1932,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
1930 struct userfaultfd_ctx *ctx; 1932 struct userfaultfd_ctx *ctx;
1931 int fd; 1933 int fd;
1932 1934
1935 if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE))
1936 return -EPERM;
1937
1933 BUG_ON(!current->mm); 1938 BUG_ON(!current->mm);
1934 1939
1935 /* Check the UFFD_* constants for consistency. */ 1940 /* Check the UFFD_* constants for consistency. */
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 37c9eba75c98..ac9d71e24b81 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -28,6 +28,8 @@
28#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) 28#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
29#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) 29#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
30 30
31extern int sysctl_unprivileged_userfaultfd;
32
31extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); 33extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
32 34
33extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, 35extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 599510a3355e..ba158f61aab4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,6 +66,7 @@
66#include <linux/kexec.h> 66#include <linux/kexec.h>
67#include <linux/bpf.h> 67#include <linux/bpf.h>
68#include <linux/mount.h> 68#include <linux/mount.h>
69#include <linux/userfaultfd_k.h>
69 70
70#include "../lib/kstrtox.h" 71#include "../lib/kstrtox.h"
71 72
@@ -1720,6 +1721,17 @@ static struct ctl_table vm_table[] = {
1720 .extra2 = (void *)&mmap_rnd_compat_bits_max, 1721 .extra2 = (void *)&mmap_rnd_compat_bits_max,
1721 }, 1722 },
1722#endif 1723#endif
1724#ifdef CONFIG_USERFAULTFD
1725 {
1726 .procname = "unprivileged_userfaultfd",
1727 .data = &sysctl_unprivileged_userfaultfd,
1728 .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
1729 .mode = 0644,
1730 .proc_handler = proc_dointvec_minmax,
1731 .extra1 = &zero,
1732 .extra2 = &one,
1733 },
1734#endif
1723 { } 1735 { }
1724}; 1736};
1725 1737