aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2011-01-13 18:46:11 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:36 -0500
commit110d74a921f4d272b47ef6104fcf937df808f4c8 (patch)
treea2f1705e049f06e1cf8cbaf7d6b3261f0b46b6ab
parentfed067da46ad3b9acedaf794a5f05d0bc153280b (diff)
mm: add FOLL_MLOCK follow_page flag.
Move the code to mlock pages from __mlock_vma_pages_range() to follow_page(). This allows __mlock_vma_pages_range() to not have to break down work into 16-page batches. An additional motivation for doing this within the present patch series is that it'll make it easier for a later chagne to drop mmap_sem when blocking on disk (we'd like to be able to resume at the page that was read from disk instead of at the start of a 16-page batch). Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Nick Piggin <npiggin@kernel.dk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/memory.c22
-rw-r--r--mm/mlock.c65
3 files changed, 28 insertions, 60 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 721f451c302..9ade803bcc1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1415,6 +1415,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
1415#define FOLL_GET 0x04 /* do get_page on page */ 1415#define FOLL_GET 0x04 /* do get_page on page */
1416#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ 1416#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
1417#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ 1417#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
1418#define FOLL_MLOCK 0x40 /* mark page as mlocked */
1418 1419
1419typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, 1420typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
1420 void *data); 1421 void *data);
diff --git a/mm/memory.c b/mm/memory.c
index b8f97b8575b..15e1f19a3b1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1310,6 +1310,28 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1310 */ 1310 */
1311 mark_page_accessed(page); 1311 mark_page_accessed(page);
1312 } 1312 }
1313 if (flags & FOLL_MLOCK) {
1314 /*
1315 * The preliminary mapping check is mainly to avoid the
1316 * pointless overhead of lock_page on the ZERO_PAGE
1317 * which might bounce very badly if there is contention.
1318 *
1319 * If the page is already locked, we don't need to
1320 * handle it now - vmscan will handle it later if and
1321 * when it attempts to reclaim the page.
1322 */
1323 if (page->mapping && trylock_page(page)) {
1324 lru_add_drain(); /* push cached pages to LRU */
1325 /*
1326 * Because we lock page here and migration is
1327 * blocked by the pte's page reference, we need
1328 * only check for file-cache page truncation.
1329 */
1330 if (page->mapping)
1331 mlock_vma_page(page);
1332 unlock_page(page);
1333 }
1334 }
1313unlock: 1335unlock:
1314 pte_unmap_unlock(ptep, ptl); 1336 pte_unmap_unlock(ptep, ptl);
1315out: 1337out:
diff --git a/mm/mlock.c b/mm/mlock.c
index 67b3dd8616d..25cc9e88c54 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -159,10 +159,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
159{ 159{
160 struct mm_struct *mm = vma->vm_mm; 160 struct mm_struct *mm = vma->vm_mm;
161 unsigned long addr = start; 161 unsigned long addr = start;
162 struct page *pages[16]; /* 16 gives a reasonable batch */
163 int nr_pages = (end - start) / PAGE_SIZE; 162 int nr_pages = (end - start) / PAGE_SIZE;
164 int ret = 0;
165 int gup_flags; 163 int gup_flags;
164 int ret;
166 165
167 VM_BUG_ON(start & ~PAGE_MASK); 166 VM_BUG_ON(start & ~PAGE_MASK);
168 VM_BUG_ON(end & ~PAGE_MASK); 167 VM_BUG_ON(end & ~PAGE_MASK);
@@ -170,7 +169,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
170 VM_BUG_ON(end > vma->vm_end); 169 VM_BUG_ON(end > vma->vm_end);
171 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); 170 VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
172 171
173 gup_flags = FOLL_TOUCH | FOLL_GET; 172 gup_flags = FOLL_TOUCH | FOLL_MLOCK;
174 /* 173 /*
175 * We want to touch writable mappings with a write fault in order 174 * We want to touch writable mappings with a write fault in order
176 * to break COW, except for shared mappings because these don't COW 175 * to break COW, except for shared mappings because these don't COW
@@ -185,63 +184,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
185 nr_pages--; 184 nr_pages--;
186 } 185 }
187 186
188 while (nr_pages > 0) { 187 ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags,
189 int i; 188 NULL, NULL);
190 189 return max(ret, 0); /* 0 or negative error code */
191 cond_resched();
192
193 /*
194 * get_user_pages makes pages present if we are
195 * setting mlock. and this extra reference count will
196 * disable migration of this page. However, page may
197 * still be truncated out from under us.
198 */
199 ret = __get_user_pages(current, mm, addr,
200 min_t(int, nr_pages, ARRAY_SIZE(pages)),
201 gup_flags, pages, NULL);
202 /*
203 * This can happen for, e.g., VM_NONLINEAR regions before
204 * a page has been allocated and mapped at a given offset,
205 * or for addresses that map beyond end of a file.
206 * We'll mlock the pages if/when they get faulted in.
207 */
208 if (ret < 0)
209 break;
210
211 lru_add_drain(); /* push cached pages to LRU */
212
213 for (i = 0; i < ret; i++) {
214 struct page *page = pages[i];
215
216 if (page->mapping) {
217 /*
218 * That preliminary check is mainly to avoid
219 * the pointless overhead of lock_page on the
220 * ZERO_PAGE: which might bounce very badly if
221 * there is contention. However, we're still
222 * dirtying its cacheline with get/put_page:
223 * we'll add another __get_user_pages flag to
224 * avoid it if that case turns out to matter.
225 */
226 lock_page(page);
227 /*
228 * Because we lock page here and migration is
229 * blocked by the elevated reference, we need
230 * only check for file-cache page truncation.
231 */
232 if (page->mapping)
233 mlock_vma_page(page);
234 unlock_page(page);
235 }
236 put_page(page); /* ref from get_user_pages() */
237 }
238
239 addr += ret * PAGE_SIZE;
240 nr_pages -= ret;
241 ret = 0;
242 }
243
244 return ret; /* 0 or negative error code */
245} 190}
246 191
247/* 192/*