aboutsummaryrefslogtreecommitdiffstats
path: root/mm/madvise.c
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2013-02-22 19:32:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:10 -0500
commit1998cc048901109a29924380b8e91bc049b32951 (patch)
tree71c6875e8a9a6da14899f74e2dd3168d99374bd5 /mm/madvise.c
parenta394cb8ee632ec5edce20309901ec66767497a43 (diff)
mm: make madvise(MADV_WILLNEED) support swap file prefetch
Make madvise(MADV_WILLNEED) support swap file prefetch. If memory is swapout, this syscall can do swapin prefetch. It has no impact if the memory isn't swapout. [akpm@linux-foundation.org: fix CONFIG_SWAP=n build] [sasha.levin@oracle.com: fix BUG on madvise early failure] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/madvise.c')
-rw-r--r--mm/madvise.c105
1 files changed, 101 insertions, 4 deletions
diff --git a/mm/madvise.c b/mm/madvise.c
index 03dfa5c7adb3..c58c94b56c3d 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -16,6 +16,9 @@
16#include <linux/ksm.h> 16#include <linux/ksm.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/file.h> 18#include <linux/file.h>
19#include <linux/blkdev.h>
20#include <linux/swap.h>
21#include <linux/swapops.h>
19 22
20/* 23/*
21 * Any behaviour which results in changes to the vma->vm_flags needs to 24 * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -131,6 +134,84 @@ out:
131 return error; 134 return error;
132} 135}
133 136
137#ifdef CONFIG_SWAP
138static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
139 unsigned long end, struct mm_walk *walk)
140{
141 pte_t *orig_pte;
142 struct vm_area_struct *vma = walk->private;
143 unsigned long index;
144
145 if (pmd_none_or_trans_huge_or_clear_bad(pmd))
146 return 0;
147
148 for (index = start; index != end; index += PAGE_SIZE) {
149 pte_t pte;
150 swp_entry_t entry;
151 struct page *page;
152 spinlock_t *ptl;
153
154 orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
155 pte = *(orig_pte + ((index - start) / PAGE_SIZE));
156 pte_unmap_unlock(orig_pte, ptl);
157
158 if (pte_present(pte) || pte_none(pte) || pte_file(pte))
159 continue;
160 entry = pte_to_swp_entry(pte);
161 if (unlikely(non_swap_entry(entry)))
162 continue;
163
164 page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
165 vma, index);
166 if (page)
167 page_cache_release(page);
168 }
169
170 return 0;
171}
172
173static void force_swapin_readahead(struct vm_area_struct *vma,
174 unsigned long start, unsigned long end)
175{
176 struct mm_walk walk = {
177 .mm = vma->vm_mm,
178 .pmd_entry = swapin_walk_pmd_entry,
179 .private = vma,
180 };
181
182 walk_page_range(start, end, &walk);
183
184 lru_add_drain(); /* Push any new pages onto the LRU now */
185}
186
187static void force_shm_swapin_readahead(struct vm_area_struct *vma,
188 unsigned long start, unsigned long end,
189 struct address_space *mapping)
190{
191 pgoff_t index;
192 struct page *page;
193 swp_entry_t swap;
194
195 for (; start < end; start += PAGE_SIZE) {
196 index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
197
198 page = find_get_page(mapping, index);
199 if (!radix_tree_exceptional_entry(page)) {
200 if (page)
201 page_cache_release(page);
202 continue;
203 }
204 swap = radix_to_swp_entry(page);
205 page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
206 NULL, 0);
207 if (page)
208 page_cache_release(page);
209 }
210
211 lru_add_drain(); /* Push any new pages onto the LRU now */
212}
213#endif /* CONFIG_SWAP */
214
134/* 215/*
135 * Schedule all required I/O operations. Do not wait for completion. 216 * Schedule all required I/O operations. Do not wait for completion.
136 */ 217 */
@@ -140,6 +221,18 @@ static long madvise_willneed(struct vm_area_struct * vma,
140{ 221{
141 struct file *file = vma->vm_file; 222 struct file *file = vma->vm_file;
142 223
224#ifdef CONFIG_SWAP
225 if (!file || mapping_cap_swap_backed(file->f_mapping)) {
226 *prev = vma;
227 if (!file)
228 force_swapin_readahead(vma, start, end);
229 else
230 force_shm_swapin_readahead(vma, start, end,
231 file->f_mapping);
232 return 0;
233 }
234#endif
235
143 if (!file) 236 if (!file)
144 return -EBADF; 237 return -EBADF;
145 238
@@ -371,6 +464,7 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
371 int error = -EINVAL; 464 int error = -EINVAL;
372 int write; 465 int write;
373 size_t len; 466 size_t len;
467 struct blk_plug plug;
374 468
375#ifdef CONFIG_MEMORY_FAILURE 469#ifdef CONFIG_MEMORY_FAILURE
376 if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) 470 if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
@@ -410,18 +504,19 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
410 if (vma && start > vma->vm_start) 504 if (vma && start > vma->vm_start)
411 prev = vma; 505 prev = vma;
412 506
507 blk_start_plug(&plug);
413 for (;;) { 508 for (;;) {
414 /* Still start < end. */ 509 /* Still start < end. */
415 error = -ENOMEM; 510 error = -ENOMEM;
416 if (!vma) 511 if (!vma)
417 goto out; 512 goto out_plug;
418 513
419 /* Here start < (end|vma->vm_end). */ 514 /* Here start < (end|vma->vm_end). */
420 if (start < vma->vm_start) { 515 if (start < vma->vm_start) {
421 unmapped_error = -ENOMEM; 516 unmapped_error = -ENOMEM;
422 start = vma->vm_start; 517 start = vma->vm_start;
423 if (start >= end) 518 if (start >= end)
424 goto out; 519 goto out_plug;
425 } 520 }
426 521
427 /* Here vma->vm_start <= start < (end|vma->vm_end) */ 522 /* Here vma->vm_start <= start < (end|vma->vm_end) */
@@ -432,18 +527,20 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
432 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ 527 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
433 error = madvise_vma(vma, &prev, start, tmp, behavior); 528 error = madvise_vma(vma, &prev, start, tmp, behavior);
434 if (error) 529 if (error)
435 goto out; 530 goto out_plug;
436 start = tmp; 531 start = tmp;
437 if (prev && start < prev->vm_end) 532 if (prev && start < prev->vm_end)
438 start = prev->vm_end; 533 start = prev->vm_end;
439 error = unmapped_error; 534 error = unmapped_error;
440 if (start >= end) 535 if (start >= end)
441 goto out; 536 goto out_plug;
442 if (prev) 537 if (prev)
443 vma = prev->vm_next; 538 vma = prev->vm_next;
444 else /* madvise_remove dropped mmap_sem */ 539 else /* madvise_remove dropped mmap_sem */
445 vma = find_vma(current->mm, start); 540 vma = find_vma(current->mm, start);
446 } 541 }
542out_plug:
543 blk_finish_plug(&plug);
447out: 544out:
448 if (write) 545 if (write)
449 up_write(&current->mm->mmap_sem); 546 up_write(&current->mm->mmap_sem);