diff options
author | Shaohua Li <shli@kernel.org> | 2013-02-22 19:32:31 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:10 -0500 |
commit | 1998cc048901109a29924380b8e91bc049b32951 (patch) | |
tree | 71c6875e8a9a6da14899f74e2dd3168d99374bd5 /mm/madvise.c | |
parent | a394cb8ee632ec5edce20309901ec66767497a43 (diff) |
mm: make madvise(MADV_WILLNEED) support swap file prefetch
Make madvise(MADV_WILLNEED) support swap file prefetch. If memory is
swapout, this syscall can do swapin prefetch. It has no impact if the
memory isn't swapout.
[akpm@linux-foundation.org: fix CONFIG_SWAP=n build]
[sasha.levin@oracle.com: fix BUG on madvise early failure]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/madvise.c')
-rw-r--r-- | mm/madvise.c | 105 |
1 files changed, 101 insertions, 4 deletions
diff --git a/mm/madvise.c b/mm/madvise.c index 03dfa5c7adb3..c58c94b56c3d 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -16,6 +16,9 @@ | |||
16 | #include <linux/ksm.h> | 16 | #include <linux/ksm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/file.h> | 18 | #include <linux/file.h> |
19 | #include <linux/blkdev.h> | ||
20 | #include <linux/swap.h> | ||
21 | #include <linux/swapops.h> | ||
19 | 22 | ||
20 | /* | 23 | /* |
21 | * Any behaviour which results in changes to the vma->vm_flags needs to | 24 | * Any behaviour which results in changes to the vma->vm_flags needs to |
@@ -131,6 +134,84 @@ out: | |||
131 | return error; | 134 | return error; |
132 | } | 135 | } |
133 | 136 | ||
137 | #ifdef CONFIG_SWAP | ||
138 | static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, | ||
139 | unsigned long end, struct mm_walk *walk) | ||
140 | { | ||
141 | pte_t *orig_pte; | ||
142 | struct vm_area_struct *vma = walk->private; | ||
143 | unsigned long index; | ||
144 | |||
145 | if (pmd_none_or_trans_huge_or_clear_bad(pmd)) | ||
146 | return 0; | ||
147 | |||
148 | for (index = start; index != end; index += PAGE_SIZE) { | ||
149 | pte_t pte; | ||
150 | swp_entry_t entry; | ||
151 | struct page *page; | ||
152 | spinlock_t *ptl; | ||
153 | |||
154 | orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); | ||
155 | pte = *(orig_pte + ((index - start) / PAGE_SIZE)); | ||
156 | pte_unmap_unlock(orig_pte, ptl); | ||
157 | |||
158 | if (pte_present(pte) || pte_none(pte) || pte_file(pte)) | ||
159 | continue; | ||
160 | entry = pte_to_swp_entry(pte); | ||
161 | if (unlikely(non_swap_entry(entry))) | ||
162 | continue; | ||
163 | |||
164 | page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, | ||
165 | vma, index); | ||
166 | if (page) | ||
167 | page_cache_release(page); | ||
168 | } | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | static void force_swapin_readahead(struct vm_area_struct *vma, | ||
174 | unsigned long start, unsigned long end) | ||
175 | { | ||
176 | struct mm_walk walk = { | ||
177 | .mm = vma->vm_mm, | ||
178 | .pmd_entry = swapin_walk_pmd_entry, | ||
179 | .private = vma, | ||
180 | }; | ||
181 | |||
182 | walk_page_range(start, end, &walk); | ||
183 | |||
184 | lru_add_drain(); /* Push any new pages onto the LRU now */ | ||
185 | } | ||
186 | |||
187 | static void force_shm_swapin_readahead(struct vm_area_struct *vma, | ||
188 | unsigned long start, unsigned long end, | ||
189 | struct address_space *mapping) | ||
190 | { | ||
191 | pgoff_t index; | ||
192 | struct page *page; | ||
193 | swp_entry_t swap; | ||
194 | |||
195 | for (; start < end; start += PAGE_SIZE) { | ||
196 | index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
197 | |||
198 | page = find_get_page(mapping, index); | ||
199 | if (!radix_tree_exceptional_entry(page)) { | ||
200 | if (page) | ||
201 | page_cache_release(page); | ||
202 | continue; | ||
203 | } | ||
204 | swap = radix_to_swp_entry(page); | ||
205 | page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE, | ||
206 | NULL, 0); | ||
207 | if (page) | ||
208 | page_cache_release(page); | ||
209 | } | ||
210 | |||
211 | lru_add_drain(); /* Push any new pages onto the LRU now */ | ||
212 | } | ||
213 | #endif /* CONFIG_SWAP */ | ||
214 | |||
134 | /* | 215 | /* |
135 | * Schedule all required I/O operations. Do not wait for completion. | 216 | * Schedule all required I/O operations. Do not wait for completion. |
136 | */ | 217 | */ |
@@ -140,6 +221,18 @@ static long madvise_willneed(struct vm_area_struct * vma, | |||
140 | { | 221 | { |
141 | struct file *file = vma->vm_file; | 222 | struct file *file = vma->vm_file; |
142 | 223 | ||
224 | #ifdef CONFIG_SWAP | ||
225 | if (!file || mapping_cap_swap_backed(file->f_mapping)) { | ||
226 | *prev = vma; | ||
227 | if (!file) | ||
228 | force_swapin_readahead(vma, start, end); | ||
229 | else | ||
230 | force_shm_swapin_readahead(vma, start, end, | ||
231 | file->f_mapping); | ||
232 | return 0; | ||
233 | } | ||
234 | #endif | ||
235 | |||
143 | if (!file) | 236 | if (!file) |
144 | return -EBADF; | 237 | return -EBADF; |
145 | 238 | ||
@@ -371,6 +464,7 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | |||
371 | int error = -EINVAL; | 464 | int error = -EINVAL; |
372 | int write; | 465 | int write; |
373 | size_t len; | 466 | size_t len; |
467 | struct blk_plug plug; | ||
374 | 468 | ||
375 | #ifdef CONFIG_MEMORY_FAILURE | 469 | #ifdef CONFIG_MEMORY_FAILURE |
376 | if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) | 470 | if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) |
@@ -410,18 +504,19 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | |||
410 | if (vma && start > vma->vm_start) | 504 | if (vma && start > vma->vm_start) |
411 | prev = vma; | 505 | prev = vma; |
412 | 506 | ||
507 | blk_start_plug(&plug); | ||
413 | for (;;) { | 508 | for (;;) { |
414 | /* Still start < end. */ | 509 | /* Still start < end. */ |
415 | error = -ENOMEM; | 510 | error = -ENOMEM; |
416 | if (!vma) | 511 | if (!vma) |
417 | goto out; | 512 | goto out_plug; |
418 | 513 | ||
419 | /* Here start < (end|vma->vm_end). */ | 514 | /* Here start < (end|vma->vm_end). */ |
420 | if (start < vma->vm_start) { | 515 | if (start < vma->vm_start) { |
421 | unmapped_error = -ENOMEM; | 516 | unmapped_error = -ENOMEM; |
422 | start = vma->vm_start; | 517 | start = vma->vm_start; |
423 | if (start >= end) | 518 | if (start >= end) |
424 | goto out; | 519 | goto out_plug; |
425 | } | 520 | } |
426 | 521 | ||
427 | /* Here vma->vm_start <= start < (end|vma->vm_end) */ | 522 | /* Here vma->vm_start <= start < (end|vma->vm_end) */ |
@@ -432,18 +527,20 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | |||
432 | /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ | 527 | /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ |
433 | error = madvise_vma(vma, &prev, start, tmp, behavior); | 528 | error = madvise_vma(vma, &prev, start, tmp, behavior); |
434 | if (error) | 529 | if (error) |
435 | goto out; | 530 | goto out_plug; |
436 | start = tmp; | 531 | start = tmp; |
437 | if (prev && start < prev->vm_end) | 532 | if (prev && start < prev->vm_end) |
438 | start = prev->vm_end; | 533 | start = prev->vm_end; |
439 | error = unmapped_error; | 534 | error = unmapped_error; |
440 | if (start >= end) | 535 | if (start >= end) |
441 | goto out; | 536 | goto out_plug; |
442 | if (prev) | 537 | if (prev) |
443 | vma = prev->vm_next; | 538 | vma = prev->vm_next; |
444 | else /* madvise_remove dropped mmap_sem */ | 539 | else /* madvise_remove dropped mmap_sem */ |
445 | vma = find_vma(current->mm, start); | 540 | vma = find_vma(current->mm, start); |
446 | } | 541 | } |
542 | out_plug: | ||
543 | blk_finish_plug(&plug); | ||
447 | out: | 544 | out: |
448 | if (write) | 545 | if (write) |
449 | up_write(¤t->mm->mmap_sem); | 546 | up_write(¤t->mm->mmap_sem); |