aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mlock.c
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2016-05-23 19:25:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 20:04:14 -0400
commitdc0ef0df7b6a90892ec41933212ac701152a254c (patch)
treeaf3ab3813d51334e8af5762e0bd4580bc33ea764 /mm/mlock.c
parente10af1328b13554dee3de91b713496704cb5822e (diff)
mm: make mmap_sem for write waits killable for mm syscalls
This is a follow up work for oom_reaper [1]. As the async OOM killing depends on oom_sem for read we would really appreciate if a holder for write didn't stood in the way. This patchset is changing many of down_write calls to be killable to help those cases when the writer is blocked and waiting for readers to release the lock and so help __oom_reap_task to process the oom victim. Most of the patches are really trivial because the lock is help from a shallow syscall paths where we can return EINTR trivially and allow the current task to die (note that EINTR will never get to the userspace as the task has fatal signal pending). Others seem to be easy as well as the callers are already handling fatal errors and bail and return to userspace which should be sufficient to handle the failure gracefully. I am not familiar with all those code paths so a deeper review is really appreciated. As this work is touching more areas which are not directly connected I have tried to keep the CC list as small as possible and people who I believed would be familiar are CCed only to the specific patches (all should have received the cover though). This patchset is based on linux-next and it depends on down_write_killable for rw_semaphores which got merged into tip locking/rwsem branch and it is merged into this next tree. I guess it would be easiest to route these patches via mmotm because of the dependency on the tip tree but if respective maintainers prefer other way I have no objections. I haven't covered all the mmap_write(mm->mmap_sem) instances here $ git grep "down_write(.*\<mmap_sem\>)" next/master | wc -l 98 $ git grep "down_write(.*\<mmap_sem\>)" | wc -l 62 I have tried to cover those which should be relatively easy to review in this series because this alone should be a nice improvement. Other places can be changed on top. [0] http://lkml.kernel.org/r/1456752417-9626-1-git-send-email-mhocko@kernel.org [1] http://lkml.kernel.org/r/1452094975-551-1-git-send-email-mhocko@kernel.org [2] http://lkml.kernel.org/r/1456750705-7141-1-git-send-email-mhocko@kernel.org This patch (of 18): This is the first step in making mmap_sem write waiters killable. It focuses on the trivial ones which are taking the lock early after entering the syscall and they are not changing state before. Therefore it is very easy to change them to use down_write_killable and immediately return with -EINTR. This will allow the waiter to pass away without blocking the mmap_sem which might be required to make a forward progress. E.g. the oom reaper will need the lock for reading to dismantle the OOM victim address space. The only tricky function in this patch is vm_mmap_pgoff which has many call sites via vm_mmap. To reduce the risk keep vm_mmap with the original non-killable semantic for now. vm_munmap callers do not bother checking the return value so open code it into the munmap syscall path for now for simplicity. Signed-off-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Mel Gorman <mgorman@suse.de> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Hugh Dickins <hughd@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c16
1 files changed, 10 insertions, 6 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 96f001041928..ef8dc9f395c4 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -617,7 +617,7 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
617 return error; 617 return error;
618} 618}
619 619
620static int do_mlock(unsigned long start, size_t len, vm_flags_t flags) 620static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
621{ 621{
622 unsigned long locked; 622 unsigned long locked;
623 unsigned long lock_limit; 623 unsigned long lock_limit;
@@ -635,7 +635,8 @@ static int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
635 lock_limit >>= PAGE_SHIFT; 635 lock_limit >>= PAGE_SHIFT;
636 locked = len >> PAGE_SHIFT; 636 locked = len >> PAGE_SHIFT;
637 637
638 down_write(&current->mm->mmap_sem); 638 if (down_write_killable(&current->mm->mmap_sem))
639 return -EINTR;
639 640
640 locked += current->mm->locked_vm; 641 locked += current->mm->locked_vm;
641 642
@@ -678,7 +679,8 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
678 len = PAGE_ALIGN(len + (offset_in_page(start))); 679 len = PAGE_ALIGN(len + (offset_in_page(start)));
679 start &= PAGE_MASK; 680 start &= PAGE_MASK;
680 681
681 down_write(&current->mm->mmap_sem); 682 if (down_write_killable(&current->mm->mmap_sem))
683 return -EINTR;
682 ret = apply_vma_lock_flags(start, len, 0); 684 ret = apply_vma_lock_flags(start, len, 0);
683 up_write(&current->mm->mmap_sem); 685 up_write(&current->mm->mmap_sem);
684 686
@@ -748,9 +750,10 @@ SYSCALL_DEFINE1(mlockall, int, flags)
748 lock_limit = rlimit(RLIMIT_MEMLOCK); 750 lock_limit = rlimit(RLIMIT_MEMLOCK);
749 lock_limit >>= PAGE_SHIFT; 751 lock_limit >>= PAGE_SHIFT;
750 752
751 ret = -ENOMEM; 753 if (down_write_killable(&current->mm->mmap_sem))
752 down_write(&current->mm->mmap_sem); 754 return -EINTR;
753 755
756 ret = -ENOMEM;
754 if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || 757 if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
755 capable(CAP_IPC_LOCK)) 758 capable(CAP_IPC_LOCK))
756 ret = apply_mlockall_flags(flags); 759 ret = apply_mlockall_flags(flags);
@@ -765,7 +768,8 @@ SYSCALL_DEFINE0(munlockall)
765{ 768{
766 int ret; 769 int ret;
767 770
768 down_write(&current->mm->mmap_sem); 771 if (down_write_killable(&current->mm->mmap_sem))
772 return -EINTR;
769 ret = apply_mlockall_flags(0); 773 ret = apply_mlockall_flags(0);
770 up_write(&current->mm->mmap_sem); 774 up_write(&current->mm->mmap_sem);
771 return ret; 775 return ret;