diff options
author | Michel Lespinasse <walken@google.com> | 2011-01-13 18:46:11 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:36 -0500 |
commit | 110d74a921f4d272b47ef6104fcf937df808f4c8 (patch) | |
tree | a2f1705e049f06e1cf8cbaf7d6b3261f0b46b6ab | |
parent | fed067da46ad3b9acedaf794a5f05d0bc153280b (diff) |
mm: add FOLL_MLOCK follow_page flag.
Move the code to mlock pages from __mlock_vma_pages_range() to
follow_page().
This allows __mlock_vma_pages_range() to not have to break down work into
16-page batches.
An additional motivation for doing this within the present patch series is
that it'll make it easier for a later chagne to drop mmap_sem when
blocking on disk (we'd like to be able to resume at the page that was read
from disk instead of at the start of a 16-page batch).
Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | mm/memory.c | 22 | ||||
-rw-r--r-- | mm/mlock.c | 65 |
3 files changed, 28 insertions, 60 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 721f451c302..9ade803bcc1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1415,6 +1415,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, | |||
1415 | #define FOLL_GET 0x04 /* do get_page on page */ | 1415 | #define FOLL_GET 0x04 /* do get_page on page */ |
1416 | #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ | 1416 | #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ |
1417 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ | 1417 | #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ |
1418 | #define FOLL_MLOCK 0x40 /* mark page as mlocked */ | ||
1418 | 1419 | ||
1419 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, | 1420 | typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, |
1420 | void *data); | 1421 | void *data); |
diff --git a/mm/memory.c b/mm/memory.c index b8f97b8575b..15e1f19a3b1 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1310,6 +1310,28 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
1310 | */ | 1310 | */ |
1311 | mark_page_accessed(page); | 1311 | mark_page_accessed(page); |
1312 | } | 1312 | } |
1313 | if (flags & FOLL_MLOCK) { | ||
1314 | /* | ||
1315 | * The preliminary mapping check is mainly to avoid the | ||
1316 | * pointless overhead of lock_page on the ZERO_PAGE | ||
1317 | * which might bounce very badly if there is contention. | ||
1318 | * | ||
1319 | * If the page is already locked, we don't need to | ||
1320 | * handle it now - vmscan will handle it later if and | ||
1321 | * when it attempts to reclaim the page. | ||
1322 | */ | ||
1323 | if (page->mapping && trylock_page(page)) { | ||
1324 | lru_add_drain(); /* push cached pages to LRU */ | ||
1325 | /* | ||
1326 | * Because we lock page here and migration is | ||
1327 | * blocked by the pte's page reference, we need | ||
1328 | * only check for file-cache page truncation. | ||
1329 | */ | ||
1330 | if (page->mapping) | ||
1331 | mlock_vma_page(page); | ||
1332 | unlock_page(page); | ||
1333 | } | ||
1334 | } | ||
1313 | unlock: | 1335 | unlock: |
1314 | pte_unmap_unlock(ptep, ptl); | 1336 | pte_unmap_unlock(ptep, ptl); |
1315 | out: | 1337 | out: |
diff --git a/mm/mlock.c b/mm/mlock.c index 67b3dd8616d..25cc9e88c54 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -159,10 +159,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
159 | { | 159 | { |
160 | struct mm_struct *mm = vma->vm_mm; | 160 | struct mm_struct *mm = vma->vm_mm; |
161 | unsigned long addr = start; | 161 | unsigned long addr = start; |
162 | struct page *pages[16]; /* 16 gives a reasonable batch */ | ||
163 | int nr_pages = (end - start) / PAGE_SIZE; | 162 | int nr_pages = (end - start) / PAGE_SIZE; |
164 | int ret = 0; | ||
165 | int gup_flags; | 163 | int gup_flags; |
164 | int ret; | ||
166 | 165 | ||
167 | VM_BUG_ON(start & ~PAGE_MASK); | 166 | VM_BUG_ON(start & ~PAGE_MASK); |
168 | VM_BUG_ON(end & ~PAGE_MASK); | 167 | VM_BUG_ON(end & ~PAGE_MASK); |
@@ -170,7 +169,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
170 | VM_BUG_ON(end > vma->vm_end); | 169 | VM_BUG_ON(end > vma->vm_end); |
171 | VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); | 170 | VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); |
172 | 171 | ||
173 | gup_flags = FOLL_TOUCH | FOLL_GET; | 172 | gup_flags = FOLL_TOUCH | FOLL_MLOCK; |
174 | /* | 173 | /* |
175 | * We want to touch writable mappings with a write fault in order | 174 | * We want to touch writable mappings with a write fault in order |
176 | * to break COW, except for shared mappings because these don't COW | 175 | * to break COW, except for shared mappings because these don't COW |
@@ -185,63 +184,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
185 | nr_pages--; | 184 | nr_pages--; |
186 | } | 185 | } |
187 | 186 | ||
188 | while (nr_pages > 0) { | 187 | ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags, |
189 | int i; | 188 | NULL, NULL); |
190 | 189 | return max(ret, 0); /* 0 or negative error code */ | |
191 | cond_resched(); | ||
192 | |||
193 | /* | ||
194 | * get_user_pages makes pages present if we are | ||
195 | * setting mlock. and this extra reference count will | ||
196 | * disable migration of this page. However, page may | ||
197 | * still be truncated out from under us. | ||
198 | */ | ||
199 | ret = __get_user_pages(current, mm, addr, | ||
200 | min_t(int, nr_pages, ARRAY_SIZE(pages)), | ||
201 | gup_flags, pages, NULL); | ||
202 | /* | ||
203 | * This can happen for, e.g., VM_NONLINEAR regions before | ||
204 | * a page has been allocated and mapped at a given offset, | ||
205 | * or for addresses that map beyond end of a file. | ||
206 | * We'll mlock the pages if/when they get faulted in. | ||
207 | */ | ||
208 | if (ret < 0) | ||
209 | break; | ||
210 | |||
211 | lru_add_drain(); /* push cached pages to LRU */ | ||
212 | |||
213 | for (i = 0; i < ret; i++) { | ||
214 | struct page *page = pages[i]; | ||
215 | |||
216 | if (page->mapping) { | ||
217 | /* | ||
218 | * That preliminary check is mainly to avoid | ||
219 | * the pointless overhead of lock_page on the | ||
220 | * ZERO_PAGE: which might bounce very badly if | ||
221 | * there is contention. However, we're still | ||
222 | * dirtying its cacheline with get/put_page: | ||
223 | * we'll add another __get_user_pages flag to | ||
224 | * avoid it if that case turns out to matter. | ||
225 | */ | ||
226 | lock_page(page); | ||
227 | /* | ||
228 | * Because we lock page here and migration is | ||
229 | * blocked by the elevated reference, we need | ||
230 | * only check for file-cache page truncation. | ||
231 | */ | ||
232 | if (page->mapping) | ||
233 | mlock_vma_page(page); | ||
234 | unlock_page(page); | ||
235 | } | ||
236 | put_page(page); /* ref from get_user_pages() */ | ||
237 | } | ||
238 | |||
239 | addr += ret * PAGE_SIZE; | ||
240 | nr_pages -= ret; | ||
241 | ret = 0; | ||
242 | } | ||
243 | |||
244 | return ret; /* 0 or negative error code */ | ||
245 | } | 190 | } |
246 | 191 | ||
247 | /* | 192 | /* |