mm: add FOLL_MLOCK follow_page flag.

Move the code to mlock pages from __mlock_vma_pages_range() to follow_page(). This allows __mlock_vma_pages_range() to not have to break down work into 16-page batches. An additional motivation for doing this within the present patch series is that it'll make it easier for a later chagne to drop mmap_sem when blocking on disk (we'd like to be able to resume at the page that was read from disk instead of at the start of a 16-page batch). Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Michel Lespinasse <walken@google.com> 2011-01-13 18:46:11 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-01-13 20:32:36 -0500
commit: 110d74a921f4d272b47ef6104fcf937df808f4c8 (patch)
tree: a2f1705e049f06e1cf8cbaf7d6b3261f0b46b6ab /mm/mlock.c
parent: fed067da46ad3b9acedaf794a5f05d0bc153280b (diff)
1 files changed, 5 insertions, 60 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 67b3dd8616dc..25cc9e88c540 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -159,10 +159,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long addr = start;
-        struct page *pages[16]; /* 16 gives a reasonable batch */
        int nr_pages = (end - start) / PAGE_SIZE;
-        int ret = 0;
        int gup_flags;
+        int ret;
        VM_BUG_ON(start & ~PAGE_MASK);
        VM_BUG_ON(end   & ~PAGE_MASK);
@@ -170,7 +169,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
        VM_BUG_ON(end   > vma->vm_end);
        VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-        gup_flags = FOLL_TOUCH | FOLL_GET;
+        gup_flags = FOLL_TOUCH | FOLL_MLOCK;
        /*
         * We want to touch writable mappings with a write fault in order
         * to break COW, except for shared mappings because these don't COW
@@ -185,63 +184,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
                nr_pages--;
        }
-        while (nr_pages > 0) {
+        ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags,
-                int i;
+                               NULL, NULL);
+        return max(ret, 0);     /* 0 or negative error code */
-                cond_resched();
-                /*
-                 * get_user_pages makes pages present if we are
-                 * setting mlock. and this extra reference count will
-                 * disable migration of this page.  However, page may
-                 * still be truncated out from under us.
-                 */
-                ret = __get_user_pages(current, mm, addr,
-                                min_t(int, nr_pages, ARRAY_SIZE(pages)),
-                                gup_flags, pages, NULL);
-                /*
-                 * This can happen for, e.g., VM_NONLINEAR regions before
-                 * a page has been allocated and mapped at a given offset,
-                 * or for addresses that map beyond end of a file.
-                 * We'll mlock the pages if/when they get faulted in.
-                 */
-                if (ret < 0)
-                        break;
-                lru_add_drain();        /* push cached pages to LRU */
-                for (i = 0; i < ret; i++) {
-                        struct page *page = pages[i];
-                        if (page->mapping) {
-                                /*
-                                 * That preliminary check is mainly to avoid
-                                 * the pointless overhead of lock_page on the
-                                 * ZERO_PAGE: which might bounce very badly if
-                                 * there is contention.  However, we're still
-                                 * dirtying its cacheline with get/put_page:
-                                 * we'll add another __get_user_pages flag to
-                                 * avoid it if that case turns out to matter.
-                                 */
-                                lock_page(page);
-                                /*
-                                 * Because we lock page here and migration is
-                                 * blocked by the elevated reference, we need
-                                 * only check for file-cache page truncation.
-                                 */
-                                if (page->mapping)
-                                        mlock_vma_page(page);
-                                unlock_page(page);
-                        }
-                        put_page(page); /* ref from get_user_pages() */
-                }
-                addr += ret * PAGE_SIZE;
-                nr_pages -= ret;
-                ret = 0;
-        }
-        return ret;     /* 0 or negative error code */
 }
 /*
author	Michel Lespinasse <walken@google.com>	2011-01-13 18:46:11 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-01-13 20:32:36 -0500
commit	110d74a921f4d272b47ef6104fcf937df808f4c8 (patch)
tree	a2f1705e049f06e1cf8cbaf7d6b3261f0b46b6ab /mm/mlock.c
parent	fed067da46ad3b9acedaf794a5f05d0bc153280b (diff)

diff --git a/mm/mlock.c b/mm/mlock.c index 67b3dd8616dc..25cc9e88c540 100644 --- a/mm/mlock.c +++ b/mm/mlock.c
@@ -159,10 +159,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
159	{	159	{
160	struct mm_struct *mm = vma->vm_mm;	160	struct mm_struct *mm = vma->vm_mm;
161	unsigned long addr = start;	161	unsigned long addr = start;
162	struct page pages[16]; / 16 gives a reasonable batch */
163	int nr_pages = (end - start) / PAGE_SIZE;	162	int nr_pages = (end - start) / PAGE_SIZE;
164	int ret = 0;
165	int gup_flags;	163	int gup_flags;
		164	int ret;
166		165
167	VM_BUG_ON(start & ~PAGE_MASK);	166	VM_BUG_ON(start & ~PAGE_MASK);
168	VM_BUG_ON(end & ~PAGE_MASK);	167	VM_BUG_ON(end & ~PAGE_MASK);
@@ -170,7 +169,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
170	VM_BUG_ON(end > vma->vm_end);	169	VM_BUG_ON(end > vma->vm_end);
171	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));	170	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
172		171
173	gup_flags = FOLL_TOUCH \| FOLL_GET;	172	gup_flags = FOLL_TOUCH \| FOLL_MLOCK;
174	/*	173	/*
175	* We want to touch writable mappings with a write fault in order	174	* We want to touch writable mappings with a write fault in order
176	* to break COW, except for shared mappings because these don't COW	175	* to break COW, except for shared mappings because these don't COW
@@ -185,63 +184,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
185	nr_pages--;	184	nr_pages--;
186	}	185	}
187		186
188	while (nr_pages > 0) {	187	ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags,
189	int i;	188	NULL, NULL);
190		189	return max(ret, 0); /* 0 or negative error code */
191	cond_resched();
192
193	/*
194	* get_user_pages makes pages present if we are
195	* setting mlock. and this extra reference count will
196	* disable migration of this page. However, page may
197	* still be truncated out from under us.
198	*/
199	ret = __get_user_pages(current, mm, addr,
200	min_t(int, nr_pages, ARRAY_SIZE(pages)),
201	gup_flags, pages, NULL);
202	/*
203	* This can happen for, e.g., VM_NONLINEAR regions before
204	* a page has been allocated and mapped at a given offset,
205	* or for addresses that map beyond end of a file.
206	* We'll mlock the pages if/when they get faulted in.
207	*/
208	if (ret < 0)
209	break;
210
211	lru_add_drain(); /* push cached pages to LRU */
212
213	for (i = 0; i < ret; i++) {
214	struct page *page = pages[i];
215
216	if (page->mapping) {
217	/*
218	* That preliminary check is mainly to avoid
219	* the pointless overhead of lock_page on the
220	* ZERO_PAGE: which might bounce very badly if
221	* there is contention. However, we're still
222	* dirtying its cacheline with get/put_page:
223	* we'll add another __get_user_pages flag to
224	* avoid it if that case turns out to matter.
225	*/
226	lock_page(page);
227	/*
228	* Because we lock page here and migration is
229	* blocked by the elevated reference, we need
230	* only check for file-cache page truncation.
231	*/
232	if (page->mapping)
233	mlock_vma_page(page);
234	unlock_page(page);
235	}
236	put_page(page); /* ref from get_user_pages() */
237	}
238
239	addr += ret * PAGE_SIZE;
240	nr_pages -= ret;
241	ret = 0;
242	}
243
244	return ret; /* 0 or negative error code */
245	}	190	}
246		191
247	/*	192	/*