1 files changed, 66 insertions, 62 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 45eb650b9654..bd6f0e466f6c 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -139,49 +139,36 @@ static void munlock_vma_page(struct page *page)
 }
 /**
- * __mlock_vma_pages_range() -  mlock/munlock a range of pages in the vma.
+ * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
 * @vma:   target vma
 * @start: start address
 * @end:   end address
- * @mlock: 0 indicate munlock, otherwise mlock.
 *
- * If @mlock == 0, unlock an mlocked range;
+ * This takes care of making the pages present too.
- * else mlock the range of pages.  This takes care of making the pages present ,
- * too.
 *
 * return 0 on success, negative error code on error.
 *
 * vma->vm_mm->mmap_sem must be held for at least read.
 */
 static long __mlock_vma_pages_range(struct vm_area_struct *vma,
-                                   unsigned long start, unsigned long end,
+                                    unsigned long start, unsigned long end)
-                                   int mlock)
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long addr = start;
        struct page *pages[16]; /* 16 gives a reasonable batch */
        int nr_pages = (end - start) / PAGE_SIZE;
        int ret = 0;
-        int gup_flags = 0;
+        int gup_flags;
        VM_BUG_ON(start & ~PAGE_MASK);
        VM_BUG_ON(end   & ~PAGE_MASK);
        VM_BUG_ON(start < vma->vm_start);
        VM_BUG_ON(end   > vma->vm_end);
-        VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
+        VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-                  (atomic_read(&mm->mm_users) != 0));
-        /*
-         * mlock:   don't page populate if vma has PROT_NONE permission.
-         * munlock: always do munlock although the vma has PROT_NONE
-         *          permission, or SIGKILL is pending.
-         */
-        if (!mlock)
-                gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
-                             GUP_FLAGS_IGNORE_SIGKILL;
+        gup_flags = FOLL_TOUCH | FOLL_GET;
        if (vma->vm_flags & VM_WRITE)
-                gup_flags |= GUP_FLAGS_WRITE;
+                gup_flags |= FOLL_WRITE;
        while (nr_pages > 0) {
                int i;
@@ -201,51 +188,45 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
                 * This can happen for, e.g., VM_NONLINEAR regions before
                 * a page has been allocated and mapped at a given offset,
                 * or for addresses that map beyond end of a file.
-                 * We'll mlock the the pages if/when they get faulted in.
+                 * We'll mlock the pages if/when they get faulted in.
                 */
                if (ret < 0)
                        break;
-                if (ret == 0) {
-                        /*
-                         * We know the vma is there, so the only time
-                         * we cannot get a single page should be an
-                         * error (ret < 0) case.
-                         */
-                        WARN_ON(1);
-                        break;
-                }
                lru_add_drain();        /* push cached pages to LRU */
                for (i = 0; i < ret; i++) {
                        struct page *page = pages[i];
-                        lock_page(page);
-                        /*
-                         * Because we lock page here and migration is blocked
-                         * by the elevated reference, we need only check for
-                         * page truncation (file-cache only).
-                         */
                        if (page->mapping) {
-                                if (mlock)
+                                /*
+                                 * That preliminary check is mainly to avoid
+                                 * the pointless overhead of lock_page on the
+                                 * ZERO_PAGE: which might bounce very badly if
+                                 * there is contention.  However, we're still
+                                 * dirtying its cacheline with get/put_page:
+                                 * we'll add another __get_user_pages flag to
+                                 * avoid it if that case turns out to matter.
+                                 */
+                                lock_page(page);
+                                /*
+                                 * Because we lock page here and migration is
+                                 * blocked by the elevated reference, we need
+                                 * only check for file-cache page truncation.
+                                 */
+                                if (page->mapping)
                                        mlock_vma_page(page);
-                                else
+                                unlock_page(page);
-                                        munlock_vma_page(page);
                        }
-                        unlock_page(page);
+                        put_page(page); /* ref from get_user_pages() */
-                        put_page(page);         /* ref from get_user_pages() */
-                        /*
-                         * here we assume that get_user_pages() has given us
-                         * a list of virtually contiguous pages.
-                         */
-                        addr += PAGE_SIZE;      /* for next get_user_pages() */
-                        nr_pages--;
                }
+                addr += ret * PAGE_SIZE;
+                nr_pages -= ret;
                ret = 0;
        }
-        return ret;     /* count entire vma as locked_vm */
+        return ret;     /* 0 or negative error code */
 }
 /*
@@ -289,7 +270,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
                        is_vm_hugetlb_page(vma) ||
                        vma == get_gate_vma(current))) {
-                __mlock_vma_pages_range(vma, start, end, 1);
+                __mlock_vma_pages_range(vma, start, end);
                /* Hide errors from mmap() and other callers */
                return 0;
@@ -310,7 +291,6 @@ no_mlock:
        return nr_pages;                /* error or pages NOT mlocked */
 }
 /*
 * munlock_vma_pages_range() - munlock all pages in the vma range.'
 * @vma - vma containing range to be munlock()ed.
@@ -330,10 +310,38 @@ no_mlock:
 * free them.  This will result in freeing mlocked pages.
 */
 void munlock_vma_pages_range(struct vm_area_struct *vma,
-                           unsigned long start, unsigned long end)
+                             unsigned long start, unsigned long end)
 {
+        unsigned long addr;
+        lru_add_drain();
        vma->vm_flags &= ~VM_LOCKED;
-        __mlock_vma_pages_range(vma, start, end, 0);
+        for (addr = start; addr < end; addr += PAGE_SIZE) {
+                struct page *page;
+                /*
+                 * Although FOLL_DUMP is intended for get_dump_page(),
+                 * it just so happens that its special treatment of the
+                 * ZERO_PAGE (returning an error instead of doing get_page)
+                 * suits munlock very well (and if somehow an abnormal page
+                 * has sneaked into the range, we won't oops here: great).
+                 */
+                page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
+                if (page && !IS_ERR(page)) {
+                        lock_page(page);
+                        /*
+                         * Like in __mlock_vma_pages_range(),
+                         * because we lock page here and migration is
+                         * blocked by the elevated reference, we need
+                         * only check for file-cache page truncation.
+                         */
+                        if (page->mapping)
+                                munlock_vma_page(page);
+                        unlock_page(page);
+                        put_page(page);
+                }
+                cond_resched();
+        }
 }
 /*
@@ -400,18 +408,14 @@ success:
         * It's okay if try_to_unmap_one unmaps a page just after we
         * set VM_LOCKED, __mlock_vma_pages_range will bring it back.
         */
-        vma->vm_flags = newflags;
        if (lock) {
-                ret = __mlock_vma_pages_range(vma, start, end, 1);
+                vma->vm_flags = newflags;
+                ret = __mlock_vma_pages_range(vma, start, end);
-                if (ret > 0) {
+                if (ret < 0)
-                        mm->locked_vm -= ret;
+                        ret = __mlock_posix_error_return(ret);
-                        ret = 0;
-                } else
-                        ret = __mlock_posix_error_return(ret); /* translate if needed */
        } else {
-                __mlock_vma_pages_range(vma, start, end, 0);
+                munlock_vma_pages_range(vma, start, end);
        }
 out:

diff --git a/mm/mlock.c b/mm/mlock.c index 45eb650b9654..bd6f0e466f6c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c
@@ -139,49 +139,36 @@ static void munlock_vma_page(struct page *page)
139	}	139	}
140		140
141	/**	141	/**
142	* __mlock_vma_pages_range() - mlock/munlock a range of pages in the vma.	142	* __mlock_vma_pages_range() - mlock a range of pages in the vma.
143	* @vma: target vma	143	* @vma: target vma
144	* @start: start address	144	* @start: start address
145	* @end: end address	145	* @end: end address
146	* @mlock: 0 indicate munlock, otherwise mlock.
147	*	146	*
148	* If @mlock == 0, unlock an mlocked range;	147	* This takes care of making the pages present too.
149	* else mlock the range of pages. This takes care of making the pages present ,
150	* too.
151	*	148	*
152	* return 0 on success, negative error code on error.	149	* return 0 on success, negative error code on error.
153	*	150	*
154	* vma->vm_mm->mmap_sem must be held for at least read.	151	* vma->vm_mm->mmap_sem must be held for at least read.
155	*/	152	*/
156	static long __mlock_vma_pages_range(struct vm_area_struct *vma,	153	static long __mlock_vma_pages_range(struct vm_area_struct *vma,
157	unsigned long start, unsigned long end,	154	unsigned long start, unsigned long end)
158	int mlock)
159	{	155	{
160	struct mm_struct *mm = vma->vm_mm;	156	struct mm_struct *mm = vma->vm_mm;
161	unsigned long addr = start;	157	unsigned long addr = start;
162	struct page pages[16]; / 16 gives a reasonable batch */	158	struct page pages[16]; / 16 gives a reasonable batch */
163	int nr_pages = (end - start) / PAGE_SIZE;	159	int nr_pages = (end - start) / PAGE_SIZE;
164	int ret = 0;	160	int ret = 0;
165	int gup_flags = 0;	161	int gup_flags;
166		162
167	VM_BUG_ON(start & ~PAGE_MASK);	163	VM_BUG_ON(start & ~PAGE_MASK);
168	VM_BUG_ON(end & ~PAGE_MASK);	164	VM_BUG_ON(end & ~PAGE_MASK);
169	VM_BUG_ON(start < vma->vm_start);	165	VM_BUG_ON(start < vma->vm_start);
170	VM_BUG_ON(end > vma->vm_end);	166	VM_BUG_ON(end > vma->vm_end);
171	VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&	167	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
172	(atomic_read(&mm->mm_users) != 0));
173
174	/*
175	* mlock: don't page populate if vma has PROT_NONE permission.
176	* munlock: always do munlock although the vma has PROT_NONE
177	* permission, or SIGKILL is pending.
178	*/
179	if (!mlock)
180	gup_flags \|= GUP_FLAGS_IGNORE_VMA_PERMISSIONS \|
181	GUP_FLAGS_IGNORE_SIGKILL;
182		168
		169	gup_flags = FOLL_TOUCH \| FOLL_GET;
183	if (vma->vm_flags & VM_WRITE)	170	if (vma->vm_flags & VM_WRITE)
184	gup_flags \|= GUP_FLAGS_WRITE;	171	gup_flags \|= FOLL_WRITE;
185		172
186	while (nr_pages > 0) {	173	while (nr_pages > 0) {
187	int i;	174	int i;
@@ -201,51 +188,45 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
201	* This can happen for, e.g., VM_NONLINEAR regions before	188	* This can happen for, e.g., VM_NONLINEAR regions before
202	* a page has been allocated and mapped at a given offset,	189	* a page has been allocated and mapped at a given offset,
203	* or for addresses that map beyond end of a file.	190	* or for addresses that map beyond end of a file.
204	* We'll mlock the the pages if/when they get faulted in.	191	* We'll mlock the pages if/when they get faulted in.
205	*/	192	*/
206	if (ret < 0)	193	if (ret < 0)
207	break;	194	break;
208	if (ret == 0) {
209	/*
210	* We know the vma is there, so the only time
211	* we cannot get a single page should be an
212	* error (ret < 0) case.
213	*/
214	WARN_ON(1);
215	break;
216	}
217		195
218	lru_add_drain(); /* push cached pages to LRU */	196	lru_add_drain(); /* push cached pages to LRU */
219		197
220	for (i = 0; i < ret; i++) {	198	for (i = 0; i < ret; i++) {
221	struct page *page = pages[i];	199	struct page *page = pages[i];
222		200
223	lock_page(page);
224	/*
225	* Because we lock page here and migration is blocked
226	* by the elevated reference, we need only check for
227	* page truncation (file-cache only).
228	*/
229	if (page->mapping) {	201	if (page->mapping) {
230	if (mlock)	202	/*
		203	* That preliminary check is mainly to avoid
		204	* the pointless overhead of lock_page on the
		205	* ZERO_PAGE: which might bounce very badly if
		206	* there is contention. However, we're still
		207	* dirtying its cacheline with get/put_page:
		208	* we'll add another __get_user_pages flag to
		209	* avoid it if that case turns out to matter.
		210	*/
		211	lock_page(page);
		212	/*
		213	* Because we lock page here and migration is
		214	* blocked by the elevated reference, we need
		215	* only check for file-cache page truncation.
		216	*/
		217	if (page->mapping)
231	mlock_vma_page(page);	218	mlock_vma_page(page);
232	else	219	unlock_page(page);
233	munlock_vma_page(page);
234	}	220	}
235	unlock_page(page);	221	put_page(page); /* ref from get_user_pages() */
236	put_page(page); /* ref from get_user_pages() */
237
238	/*
239	* here we assume that get_user_pages() has given us
240	* a list of virtually contiguous pages.
241	*/
242	addr += PAGE_SIZE; /* for next get_user_pages() */
243	nr_pages--;
244	}	222	}
		223
		224	addr += ret * PAGE_SIZE;
		225	nr_pages -= ret;
245	ret = 0;	226	ret = 0;
246	}	227	}
247		228
248	return ret; /* count entire vma as locked_vm */	229	return ret; /* 0 or negative error code */
249	}	230	}
250		231
251	/*	232	/*
@@ -289,7 +270,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
289	is_vm_hugetlb_page(vma) \|\|	270	is_vm_hugetlb_page(vma) \|\|
290	vma == get_gate_vma(current))) {	271	vma == get_gate_vma(current))) {
291		272
292	__mlock_vma_pages_range(vma, start, end, 1);	273	__mlock_vma_pages_range(vma, start, end);
293		274
294	/* Hide errors from mmap() and other callers */	275	/* Hide errors from mmap() and other callers */
295	return 0;	276	return 0;
@@ -310,7 +291,6 @@ no_mlock:
310	return nr_pages; /* error or pages NOT mlocked */	291	return nr_pages; /* error or pages NOT mlocked */
311	}	292	}
312		293
313
314	/*	294	/*
315	* munlock_vma_pages_range() - munlock all pages in the vma range.'	295	* munlock_vma_pages_range() - munlock all pages in the vma range.'
316	* @vma - vma containing range to be munlock()ed.	296	* @vma - vma containing range to be munlock()ed.
@@ -330,10 +310,38 @@ no_mlock:
330	* free them. This will result in freeing mlocked pages.	310	* free them. This will result in freeing mlocked pages.
331	*/	311	*/
332	void munlock_vma_pages_range(struct vm_area_struct *vma,	312	void munlock_vma_pages_range(struct vm_area_struct *vma,
333	unsigned long start, unsigned long end)	313	unsigned long start, unsigned long end)
334	{	314	{
		315	unsigned long addr;
		316
		317	lru_add_drain();
335	vma->vm_flags &= ~VM_LOCKED;	318	vma->vm_flags &= ~VM_LOCKED;
336	__mlock_vma_pages_range(vma, start, end, 0);	319
		320	for (addr = start; addr < end; addr += PAGE_SIZE) {
		321	struct page *page;
		322	/*
		323	* Although FOLL_DUMP is intended for get_dump_page(),
		324	* it just so happens that its special treatment of the
		325	* ZERO_PAGE (returning an error instead of doing get_page)
		326	* suits munlock very well (and if somehow an abnormal page
		327	* has sneaked into the range, we won't oops here: great).
		328	*/
		329	page = follow_page(vma, addr, FOLL_GET \| FOLL_DUMP);
		330	if (page && !IS_ERR(page)) {
		331	lock_page(page);
		332	/*
		333	* Like in __mlock_vma_pages_range(),
		334	* because we lock page here and migration is
		335	* blocked by the elevated reference, we need
		336	* only check for file-cache page truncation.
		337	*/
		338	if (page->mapping)
		339	munlock_vma_page(page);
		340	unlock_page(page);
		341	put_page(page);
		342	}
		343	cond_resched();
		344	}
337	}	345	}
338		346
339	/*	347	/*
@@ -400,18 +408,14 @@ success:
400	* It's okay if try_to_unmap_one unmaps a page just after we	408	* It's okay if try_to_unmap_one unmaps a page just after we
401	* set VM_LOCKED, __mlock_vma_pages_range will bring it back.	409	* set VM_LOCKED, __mlock_vma_pages_range will bring it back.
402	*/	410	*/
403	vma->vm_flags = newflags;
404		411
405	if (lock) {	412	if (lock) {
406	ret = __mlock_vma_pages_range(vma, start, end, 1);	413	vma->vm_flags = newflags;
407		414	ret = __mlock_vma_pages_range(vma, start, end);
408	if (ret > 0) {	415	if (ret < 0)
409	mm->locked_vm -= ret;	416	ret = __mlock_posix_error_return(ret);
410	ret = 0;
411	} else
412	ret = __mlock_posix_error_return(ret); /* translate if needed */
413	} else {	417	} else {
414	__mlock_vma_pages_range(vma, start, end, 0);	418	munlock_vma_pages_range(vma, start, end);
415	}	419	}
416		420
417	out:	421	out: