mm: mlock: check against vma for actual mlock() size

In do_mlock(), the check against locked memory limitation has a hole which will fail following cases at step 3): 1) User has a memory chunk from addressA with 50k, and user mem lock rlimit is 64k. 2) mlock(addressA, 30k) 3) mlock(addressA, 40k) The 3rd step should have been allowed since the 40k request is intersected with the previous 30k at step 2), and the 3rd step is actually for mlock on the extra 10k memory. This patch checks vma to caculate the actual "new" mlock size, if necessary, and ajust the logic to fix this issue. [akpm@linux-foundation.org: clean up comment layout] [wei.guo.simon@gmail.com: correct a typo in count_mm_mlocked_page_nr()] Link: http://lkml.kernel.org/r/1473325970-11393-2-git-send-email-wei.guo.simon@gmail.com Link: http://lkml.kernel.org/r/1472554781-9835-2-git-send-email-wei.guo.simon@gmail.com Signed-off-by: Simon Guo <wei.guo.simon@gmail.com> Cc: Alexey Klimov <klimov.linux@gmail.com> Cc: Eric B Munson <emunson@akamai.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Simon Guo <wei.guo.simon@gmail.com> Cc: Thierry Reding <treding@nvidia.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Simon Guo <wei.guo.simon@gmail.com> 2016-10-07 19:59:36 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-10-07 21:46:28 -0400
commit: 0cf2f6f6dc605e587d2c1120f295934c77e810e8 (patch)
tree: 9c8bdb773bf8edafab427f42f4c649b41048a9f7
parent: 9254990fb9f0f15f25605748da20cfbeced7c816 (diff)
1 files changed, 49 insertions, 0 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 14645be06e30..b1fec89bd1c5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -617,6 +617,45 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
        return error;
 }
+/*
+ * Go through vma areas and sum size of mlocked
+ * vma pages, as return value.
+ * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
+ * is also counted.
+ * Return value: previously mlocked page counts
+ */
+static int count_mm_mlocked_page_nr(struct mm_struct *mm,
+                unsigned long start, size_t len)
+{
+        struct vm_area_struct *vma;
+        int count = 0;
+        if (mm == NULL)
+                mm = current->mm;
+        vma = find_vma(mm, start);
+        if (vma == NULL)
+                vma = mm->mmap;
+        for (; vma ; vma = vma->vm_next) {
+                if (start >= vma->vm_end)
+                        continue;
+                if (start + len <=  vma->vm_start)
+                        break;
+                if (vma->vm_flags & VM_LOCKED) {
+                        if (start > vma->vm_start)
+                                count -= (start - vma->vm_start);
+                        if (start + len < vma->vm_end) {
+                                count += start + len - vma->vm_start;
+                                break;
+                        }
+                        count += vma->vm_end - vma->vm_start;
+                }
+        }
+        return count >> PAGE_SHIFT;
+}
 static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
 {
        unsigned long locked;
@@ -639,6 +678,16 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
                return -EINTR;
        locked += current->mm->locked_vm;
+        if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
+                /*
+                 * It is possible that the regions requested intersect with
+                 * previously mlocked areas, that part area in "mm->locked_vm"
+                 * should not be counted to new mlock increment count. So check
+                 * and adjust locked count if necessary.
+                 */
+                locked -= count_mm_mlocked_page_nr(current->mm,
+                                start, len);
+        }
        /* check against resource limits */
        if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
author	Simon Guo <wei.guo.simon@gmail.com>	2016-10-07 19:59:36 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-10-07 21:46:28 -0400
commit	0cf2f6f6dc605e587d2c1120f295934c77e810e8 (patch)
tree	9c8bdb773bf8edafab427f42f4c649b41048a9f7
parent	9254990fb9f0f15f25605748da20cfbeced7c816 (diff)

diff --git a/mm/mlock.c b/mm/mlock.c index 14645be06e30..b1fec89bd1c5 100644 --- a/mm/mlock.c +++ b/mm/mlock.c
@@ -617,6 +617,45 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
617	return error;	617	return error;
618	}	618	}
619		619
		620	/*
		621	* Go through vma areas and sum size of mlocked
		622	* vma pages, as return value.
		623	* Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
		624	* is also counted.
		625	* Return value: previously mlocked page counts
		626	*/
		627	static int count_mm_mlocked_page_nr(struct mm_struct *mm,
		628	unsigned long start, size_t len)
		629	{
		630	struct vm_area_struct *vma;
		631	int count = 0;
		632
		633	if (mm == NULL)
		634	mm = current->mm;
		635
		636	vma = find_vma(mm, start);
		637	if (vma == NULL)
		638	vma = mm->mmap;
		639
		640	for (; vma ; vma = vma->vm_next) {
		641	if (start >= vma->vm_end)
		642	continue;
		643	if (start + len <= vma->vm_start)
		644	break;
		645	if (vma->vm_flags & VM_LOCKED) {
		646	if (start > vma->vm_start)
		647	count -= (start - vma->vm_start);
		648	if (start + len < vma->vm_end) {
		649	count += start + len - vma->vm_start;
		650	break;
		651	}
		652	count += vma->vm_end - vma->vm_start;
		653	}
		654	}
		655
		656	return count >> PAGE_SHIFT;
		657	}
		658
620	static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)	659	static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
621	{	660	{
622	unsigned long locked;	661	unsigned long locked;
@@ -639,6 +678,16 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
639	return -EINTR;	678	return -EINTR;
640		679
641	locked += current->mm->locked_vm;	680	locked += current->mm->locked_vm;
		681	if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
		682	/*
		683	* It is possible that the regions requested intersect with
		684	* previously mlocked areas, that part area in "mm->locked_vm"
		685	* should not be counted to new mlock increment count. So check
		686	* and adjust locked count if necessary.
		687	*/
		688	locked -= count_mm_mlocked_page_nr(current->mm,
		689	start, len);
		690	}
642		691
643	/* check against resource limits */	692	/* check against resource limits */
644	if ((locked <= lock_limit) \|\| capable(CAP_IPC_LOCK))	693	if ((locked <= lock_limit) \|\| capable(CAP_IPC_LOCK))