summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-04-03 17:47:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-03 19:21:00 -0400
commit0cd6144aadd2afd19d1aca880153530c52957604 (patch)
tree529df1dc75d6a58eff057dde5feb07cecf6ba527 /mm
parente7b563bb2a6f4d974208da46200784b9c5b5a47e (diff)
mm + fs: prepare for non-page entries in page cache radix trees
shmem mappings already contain exceptional entries where swap slot information is remembered. To be able to store eviction information for regular page cache, prepare every site dealing with the radix trees directly to handle entries other than pages. The common lookup functions will filter out non-page entries and return NULL for page cache holes, just as before. But provide a raw version of the API which returns non-page entries as well, and switch shmem over to use it. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bob Liu <bob.liu@oracle.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Metin Doslu <metin@citusdata.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ozgun Erdogan <ozgun@citusdata.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <klamm@yandex-team.ru> Cc: Ryan Mallon <rmallon@gmail.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c202
-rw-r--r--mm/mincore.c20
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/shmem.c99
-rw-r--r--mm/swap.c51
-rw-r--r--mm/truncate.c74
6 files changed, 325 insertions, 123 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 40115c6c0791..efc63876477f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -446,6 +446,29 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
446} 446}
447EXPORT_SYMBOL_GPL(replace_page_cache_page); 447EXPORT_SYMBOL_GPL(replace_page_cache_page);
448 448
449static int page_cache_tree_insert(struct address_space *mapping,
450 struct page *page)
451{
452 void **slot;
453 int error;
454
455 slot = radix_tree_lookup_slot(&mapping->page_tree, page->index);
456 if (slot) {
457 void *p;
458
459 p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
460 if (!radix_tree_exceptional_entry(p))
461 return -EEXIST;
462 radix_tree_replace_slot(slot, page);
463 mapping->nrpages++;
464 return 0;
465 }
466 error = radix_tree_insert(&mapping->page_tree, page->index, page);
467 if (!error)
468 mapping->nrpages++;
469 return error;
470}
471
449/** 472/**
450 * add_to_page_cache_locked - add a locked page to the pagecache 473 * add_to_page_cache_locked - add a locked page to the pagecache
451 * @page: page to add 474 * @page: page to add
@@ -480,11 +503,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
480 page->index = offset; 503 page->index = offset;
481 504
482 spin_lock_irq(&mapping->tree_lock); 505 spin_lock_irq(&mapping->tree_lock);
483 error = radix_tree_insert(&mapping->page_tree, offset, page); 506 error = page_cache_tree_insert(mapping, page);
484 radix_tree_preload_end(); 507 radix_tree_preload_end();
485 if (unlikely(error)) 508 if (unlikely(error))
486 goto err_insert; 509 goto err_insert;
487 mapping->nrpages++;
488 __inc_zone_page_state(page, NR_FILE_PAGES); 510 __inc_zone_page_state(page, NR_FILE_PAGES);
489 spin_unlock_irq(&mapping->tree_lock); 511 spin_unlock_irq(&mapping->tree_lock);
490 trace_mm_filemap_add_to_page_cache(page); 512 trace_mm_filemap_add_to_page_cache(page);
@@ -712,7 +734,10 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
712 unsigned long i; 734 unsigned long i;
713 735
714 for (i = 0; i < max_scan; i++) { 736 for (i = 0; i < max_scan; i++) {
715 if (!radix_tree_lookup(&mapping->page_tree, index)) 737 struct page *page;
738
739 page = radix_tree_lookup(&mapping->page_tree, index);
740 if (!page || radix_tree_exceptional_entry(page))
716 break; 741 break;
717 index++; 742 index++;
718 if (index == 0) 743 if (index == 0)
@@ -750,7 +775,10 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
750 unsigned long i; 775 unsigned long i;
751 776
752 for (i = 0; i < max_scan; i++) { 777 for (i = 0; i < max_scan; i++) {
753 if (!radix_tree_lookup(&mapping->page_tree, index)) 778 struct page *page;
779
780 page = radix_tree_lookup(&mapping->page_tree, index);
781 if (!page || radix_tree_exceptional_entry(page))
754 break; 782 break;
755 index--; 783 index--;
756 if (index == ULONG_MAX) 784 if (index == ULONG_MAX)
@@ -762,14 +790,19 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
762EXPORT_SYMBOL(page_cache_prev_hole); 790EXPORT_SYMBOL(page_cache_prev_hole);
763 791
764/** 792/**
765 * find_get_page - find and get a page reference 793 * find_get_entry - find and get a page cache entry
766 * @mapping: the address_space to search 794 * @mapping: the address_space to search
767 * @offset: the page index 795 * @offset: the page cache index
796 *
797 * Looks up the page cache slot at @mapping & @offset. If there is a
798 * page cache page, it is returned with an increased refcount.
768 * 799 *
769 * Is there a pagecache struct page at the given (mapping, offset) tuple? 800 * If the slot holds a shadow entry of a previously evicted page, it
770 * If yes, increment its refcount and return it; if no, return NULL. 801 * is returned.
802 *
803 * Otherwise, %NULL is returned.
771 */ 804 */
772struct page *find_get_page(struct address_space *mapping, pgoff_t offset) 805struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
773{ 806{
774 void **pagep; 807 void **pagep;
775 struct page *page; 808 struct page *page;
@@ -810,24 +843,50 @@ out:
810 843
811 return page; 844 return page;
812} 845}
813EXPORT_SYMBOL(find_get_page); 846EXPORT_SYMBOL(find_get_entry);
814 847
815/** 848/**
816 * find_lock_page - locate, pin and lock a pagecache page 849 * find_get_page - find and get a page reference
817 * @mapping: the address_space to search 850 * @mapping: the address_space to search
818 * @offset: the page index 851 * @offset: the page index
819 * 852 *
820 * Locates the desired pagecache page, locks it, increments its reference 853 * Looks up the page cache slot at @mapping & @offset. If there is a
821 * count and returns its address. 854 * page cache page, it is returned with an increased refcount.
822 * 855 *
823 * Returns zero if the page was not present. find_lock_page() may sleep. 856 * Otherwise, %NULL is returned.
824 */ 857 */
825struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) 858struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
859{
860 struct page *page = find_get_entry(mapping, offset);
861
862 if (radix_tree_exceptional_entry(page))
863 page = NULL;
864 return page;
865}
866EXPORT_SYMBOL(find_get_page);
867
868/**
869 * find_lock_entry - locate, pin and lock a page cache entry
870 * @mapping: the address_space to search
871 * @offset: the page cache index
872 *
873 * Looks up the page cache slot at @mapping & @offset. If there is a
874 * page cache page, it is returned locked and with an increased
875 * refcount.
876 *
877 * If the slot holds a shadow entry of a previously evicted page, it
878 * is returned.
879 *
880 * Otherwise, %NULL is returned.
881 *
882 * find_lock_entry() may sleep.
883 */
884struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
826{ 885{
827 struct page *page; 886 struct page *page;
828 887
829repeat: 888repeat:
830 page = find_get_page(mapping, offset); 889 page = find_get_entry(mapping, offset);
831 if (page && !radix_tree_exception(page)) { 890 if (page && !radix_tree_exception(page)) {
832 lock_page(page); 891 lock_page(page);
833 /* Has the page been truncated? */ 892 /* Has the page been truncated? */
@@ -840,6 +899,29 @@ repeat:
840 } 899 }
841 return page; 900 return page;
842} 901}
902EXPORT_SYMBOL(find_lock_entry);
903
904/**
905 * find_lock_page - locate, pin and lock a pagecache page
906 * @mapping: the address_space to search
907 * @offset: the page index
908 *
909 * Looks up the page cache slot at @mapping & @offset. If there is a
910 * page cache page, it is returned locked and with an increased
911 * refcount.
912 *
913 * Otherwise, %NULL is returned.
914 *
915 * find_lock_page() may sleep.
916 */
917struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
918{
919 struct page *page = find_lock_entry(mapping, offset);
920
921 if (radix_tree_exceptional_entry(page))
922 page = NULL;
923 return page;
924}
843EXPORT_SYMBOL(find_lock_page); 925EXPORT_SYMBOL(find_lock_page);
844 926
845/** 927/**
@@ -848,16 +930,18 @@ EXPORT_SYMBOL(find_lock_page);
848 * @index: the page's index into the mapping 930 * @index: the page's index into the mapping
849 * @gfp_mask: page allocation mode 931 * @gfp_mask: page allocation mode
850 * 932 *
851 * Locates a page in the pagecache. If the page is not present, a new page 933 * Looks up the page cache slot at @mapping & @offset. If there is a
852 * is allocated using @gfp_mask and is added to the pagecache and to the VM's 934 * page cache page, it is returned locked and with an increased
853 * LRU list. The returned page is locked and has its reference count 935 * refcount.
854 * incremented. 936 *
937 * If the page is not present, a new page is allocated using @gfp_mask
938 * and added to the page cache and the VM's LRU list. The page is
939 * returned locked and with an increased refcount.
855 * 940 *
856 * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic 941 * On memory exhaustion, %NULL is returned.
857 * allocation!
858 * 942 *
859 * find_or_create_page() returns the desired page's address, or zero on 943 * find_or_create_page() may sleep, even if @gfp_flags specifies an
860 * memory exhaustion. 944 * atomic allocation!
861 */ 945 */
862struct page *find_or_create_page(struct address_space *mapping, 946struct page *find_or_create_page(struct address_space *mapping,
863 pgoff_t index, gfp_t gfp_mask) 947 pgoff_t index, gfp_t gfp_mask)
@@ -890,6 +974,76 @@ repeat:
890EXPORT_SYMBOL(find_or_create_page); 974EXPORT_SYMBOL(find_or_create_page);
891 975
892/** 976/**
977 * find_get_entries - gang pagecache lookup
978 * @mapping: The address_space to search
979 * @start: The starting page cache index
980 * @nr_entries: The maximum number of entries
981 * @entries: Where the resulting entries are placed
982 * @indices: The cache indices corresponding to the entries in @entries
983 *
984 * find_get_entries() will search for and return a group of up to
985 * @nr_entries entries in the mapping. The entries are placed at
986 * @entries. find_get_entries() takes a reference against any actual
987 * pages it returns.
988 *
989 * The search returns a group of mapping-contiguous page cache entries
990 * with ascending indexes. There may be holes in the indices due to
991 * not-present pages.
992 *
993 * Any shadow entries of evicted pages are included in the returned
994 * array.
995 *
996 * find_get_entries() returns the number of pages and shadow entries
997 * which were found.
998 */
999unsigned find_get_entries(struct address_space *mapping,
1000 pgoff_t start, unsigned int nr_entries,
1001 struct page **entries, pgoff_t *indices)
1002{
1003 void **slot;
1004 unsigned int ret = 0;
1005 struct radix_tree_iter iter;
1006
1007 if (!nr_entries)
1008 return 0;
1009
1010 rcu_read_lock();
1011restart:
1012 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1013 struct page *page;
1014repeat:
1015 page = radix_tree_deref_slot(slot);
1016 if (unlikely(!page))
1017 continue;
1018 if (radix_tree_exception(page)) {
1019 if (radix_tree_deref_retry(page))
1020 goto restart;
1021 /*
1022 * Otherwise, we must be storing a swap entry
1023 * here as an exceptional entry: so return it
1024 * without attempting to raise page count.
1025 */
1026 goto export;
1027 }
1028 if (!page_cache_get_speculative(page))
1029 goto repeat;
1030
1031 /* Has the page moved? */
1032 if (unlikely(page != *slot)) {
1033 page_cache_release(page);
1034 goto repeat;
1035 }
1036export:
1037 indices[ret] = iter.index;
1038 entries[ret] = page;
1039 if (++ret == nr_entries)
1040 break;
1041 }
1042 rcu_read_unlock();
1043 return ret;
1044}
1045
1046/**
893 * find_get_pages - gang pagecache lookup 1047 * find_get_pages - gang pagecache lookup
894 * @mapping: The address_space to search 1048 * @mapping: The address_space to search
895 * @start: The starting page index 1049 * @start: The starting page index
diff --git a/mm/mincore.c b/mm/mincore.c
index 101623378fbf..725c80961048 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -70,13 +70,21 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
70 * any other file mapping (ie. marked !present and faulted in with 70 * any other file mapping (ie. marked !present and faulted in with
71 * tmpfs's .fault). So swapped out tmpfs mappings are tested here. 71 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
72 */ 72 */
73 page = find_get_page(mapping, pgoff);
74#ifdef CONFIG_SWAP 73#ifdef CONFIG_SWAP
75 /* shmem/tmpfs may return swap: account for swapcache page too. */ 74 if (shmem_mapping(mapping)) {
76 if (radix_tree_exceptional_entry(page)) { 75 page = find_get_entry(mapping, pgoff);
77 swp_entry_t swap = radix_to_swp_entry(page); 76 /*
78 page = find_get_page(swap_address_space(swap), swap.val); 77 * shmem/tmpfs may return swap: account for swapcache
79 } 78 * page too.
79 */
80 if (radix_tree_exceptional_entry(page)) {
81 swp_entry_t swp = radix_to_swp_entry(page);
82 page = find_get_page(swap_address_space(swp), swp.val);
83 }
84 } else
85 page = find_get_page(mapping, pgoff);
86#else
87 page = find_get_page(mapping, pgoff);
80#endif 88#endif
81 if (page) { 89 if (page) {
82 present = PageUptodate(page); 90 present = PageUptodate(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index c62d85ace0cc..62c500a088a7 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -179,7 +179,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
179 rcu_read_lock(); 179 rcu_read_lock();
180 page = radix_tree_lookup(&mapping->page_tree, page_offset); 180 page = radix_tree_lookup(&mapping->page_tree, page_offset);
181 rcu_read_unlock(); 181 rcu_read_unlock();
182 if (page) 182 if (page && !radix_tree_exceptional_entry(page))
183 continue; 183 continue;
184 184
185 page = page_cache_alloc_readahead(mapping); 185 page = page_cache_alloc_readahead(mapping);
diff --git a/mm/shmem.c b/mm/shmem.c
index e470997010cd..a3ba988ec946 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -329,56 +329,6 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
329} 329}
330 330
331/* 331/*
332 * Like find_get_pages, but collecting swap entries as well as pages.
333 */
334static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
335 pgoff_t start, unsigned int nr_pages,
336 struct page **pages, pgoff_t *indices)
337{
338 void **slot;
339 unsigned int ret = 0;
340 struct radix_tree_iter iter;
341
342 if (!nr_pages)
343 return 0;
344
345 rcu_read_lock();
346restart:
347 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
348 struct page *page;
349repeat:
350 page = radix_tree_deref_slot(slot);
351 if (unlikely(!page))
352 continue;
353 if (radix_tree_exception(page)) {
354 if (radix_tree_deref_retry(page))
355 goto restart;
356 /*
357 * Otherwise, we must be storing a swap entry
358 * here as an exceptional entry: so return it
359 * without attempting to raise page count.
360 */
361 goto export;
362 }
363 if (!page_cache_get_speculative(page))
364 goto repeat;
365
366 /* Has the page moved? */
367 if (unlikely(page != *slot)) {
368 page_cache_release(page);
369 goto repeat;
370 }
371export:
372 indices[ret] = iter.index;
373 pages[ret] = page;
374 if (++ret == nr_pages)
375 break;
376 }
377 rcu_read_unlock();
378 return ret;
379}
380
381/*
382 * Remove swap entry from radix tree, free the swap and its page cache. 332 * Remove swap entry from radix tree, free the swap and its page cache.
383 */ 333 */
384static int shmem_free_swap(struct address_space *mapping, 334static int shmem_free_swap(struct address_space *mapping,
@@ -396,21 +346,6 @@ static int shmem_free_swap(struct address_space *mapping,
396} 346}
397 347
398/* 348/*
399 * Pagevec may contain swap entries, so shuffle up pages before releasing.
400 */
401static void shmem_deswap_pagevec(struct pagevec *pvec)
402{
403 int i, j;
404
405 for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
406 struct page *page = pvec->pages[i];
407 if (!radix_tree_exceptional_entry(page))
408 pvec->pages[j++] = page;
409 }
410 pvec->nr = j;
411}
412
413/*
414 * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. 349 * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
415 */ 350 */
416void shmem_unlock_mapping(struct address_space *mapping) 351void shmem_unlock_mapping(struct address_space *mapping)
@@ -428,12 +363,12 @@ void shmem_unlock_mapping(struct address_space *mapping)
428 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it 363 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
429 * has finished, if it hits a row of PAGEVEC_SIZE swap entries. 364 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
430 */ 365 */
431 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 366 pvec.nr = find_get_entries(mapping, index,
432 PAGEVEC_SIZE, pvec.pages, indices); 367 PAGEVEC_SIZE, pvec.pages, indices);
433 if (!pvec.nr) 368 if (!pvec.nr)
434 break; 369 break;
435 index = indices[pvec.nr - 1] + 1; 370 index = indices[pvec.nr - 1] + 1;
436 shmem_deswap_pagevec(&pvec); 371 pagevec_remove_exceptionals(&pvec);
437 check_move_unevictable_pages(pvec.pages, pvec.nr); 372 check_move_unevictable_pages(pvec.pages, pvec.nr);
438 pagevec_release(&pvec); 373 pagevec_release(&pvec);
439 cond_resched(); 374 cond_resched();
@@ -465,9 +400,9 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
465 pagevec_init(&pvec, 0); 400 pagevec_init(&pvec, 0);
466 index = start; 401 index = start;
467 while (index < end) { 402 while (index < end) {
468 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 403 pvec.nr = find_get_entries(mapping, index,
469 min(end - index, (pgoff_t)PAGEVEC_SIZE), 404 min(end - index, (pgoff_t)PAGEVEC_SIZE),
470 pvec.pages, indices); 405 pvec.pages, indices);
471 if (!pvec.nr) 406 if (!pvec.nr)
472 break; 407 break;
473 mem_cgroup_uncharge_start(); 408 mem_cgroup_uncharge_start();
@@ -496,7 +431,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
496 } 431 }
497 unlock_page(page); 432 unlock_page(page);
498 } 433 }
499 shmem_deswap_pagevec(&pvec); 434 pagevec_remove_exceptionals(&pvec);
500 pagevec_release(&pvec); 435 pagevec_release(&pvec);
501 mem_cgroup_uncharge_end(); 436 mem_cgroup_uncharge_end();
502 cond_resched(); 437 cond_resched();
@@ -534,9 +469,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
534 index = start; 469 index = start;
535 for ( ; ; ) { 470 for ( ; ; ) {
536 cond_resched(); 471 cond_resched();
537 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 472
473 pvec.nr = find_get_entries(mapping, index,
538 min(end - index, (pgoff_t)PAGEVEC_SIZE), 474 min(end - index, (pgoff_t)PAGEVEC_SIZE),
539 pvec.pages, indices); 475 pvec.pages, indices);
540 if (!pvec.nr) { 476 if (!pvec.nr) {
541 if (index == start || unfalloc) 477 if (index == start || unfalloc)
542 break; 478 break;
@@ -544,7 +480,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
544 continue; 480 continue;
545 } 481 }
546 if ((index == start || unfalloc) && indices[0] >= end) { 482 if ((index == start || unfalloc) && indices[0] >= end) {
547 shmem_deswap_pagevec(&pvec); 483 pagevec_remove_exceptionals(&pvec);
548 pagevec_release(&pvec); 484 pagevec_release(&pvec);
549 break; 485 break;
550 } 486 }
@@ -573,7 +509,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
573 } 509 }
574 unlock_page(page); 510 unlock_page(page);
575 } 511 }
576 shmem_deswap_pagevec(&pvec); 512 pagevec_remove_exceptionals(&pvec);
577 pagevec_release(&pvec); 513 pagevec_release(&pvec);
578 mem_cgroup_uncharge_end(); 514 mem_cgroup_uncharge_end();
579 index++; 515 index++;
@@ -1079,7 +1015,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1079 return -EFBIG; 1015 return -EFBIG;
1080repeat: 1016repeat:
1081 swap.val = 0; 1017 swap.val = 0;
1082 page = find_lock_page(mapping, index); 1018 page = find_lock_entry(mapping, index);
1083 if (radix_tree_exceptional_entry(page)) { 1019 if (radix_tree_exceptional_entry(page)) {
1084 swap = radix_to_swp_entry(page); 1020 swap = radix_to_swp_entry(page);
1085 page = NULL; 1021 page = NULL;
@@ -1416,6 +1352,11 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
1416 return inode; 1352 return inode;
1417} 1353}
1418 1354
1355bool shmem_mapping(struct address_space *mapping)
1356{
1357 return mapping->backing_dev_info == &shmem_backing_dev_info;
1358}
1359
1419#ifdef CONFIG_TMPFS 1360#ifdef CONFIG_TMPFS
1420static const struct inode_operations shmem_symlink_inode_operations; 1361static const struct inode_operations shmem_symlink_inode_operations;
1421static const struct inode_operations shmem_short_symlink_operations; 1362static const struct inode_operations shmem_short_symlink_operations;
@@ -1728,7 +1669,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1728 pagevec_init(&pvec, 0); 1669 pagevec_init(&pvec, 0);
1729 pvec.nr = 1; /* start small: we may be there already */ 1670 pvec.nr = 1; /* start small: we may be there already */
1730 while (!done) { 1671 while (!done) {
1731 pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 1672 pvec.nr = find_get_entries(mapping, index,
1732 pvec.nr, pvec.pages, indices); 1673 pvec.nr, pvec.pages, indices);
1733 if (!pvec.nr) { 1674 if (!pvec.nr) {
1734 if (whence == SEEK_DATA) 1675 if (whence == SEEK_DATA)
@@ -1755,7 +1696,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1755 break; 1696 break;
1756 } 1697 }
1757 } 1698 }
1758 shmem_deswap_pagevec(&pvec); 1699 pagevec_remove_exceptionals(&pvec);
1759 pagevec_release(&pvec); 1700 pagevec_release(&pvec);
1760 pvec.nr = PAGEVEC_SIZE; 1701 pvec.nr = PAGEVEC_SIZE;
1761 cond_resched(); 1702 cond_resched();
diff --git a/mm/swap.c b/mm/swap.c
index 0092097b3f4c..c8048d71c642 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -948,6 +948,57 @@ void __pagevec_lru_add(struct pagevec *pvec)
948EXPORT_SYMBOL(__pagevec_lru_add); 948EXPORT_SYMBOL(__pagevec_lru_add);
949 949
950/** 950/**
951 * pagevec_lookup_entries - gang pagecache lookup
952 * @pvec: Where the resulting entries are placed
953 * @mapping: The address_space to search
954 * @start: The starting entry index
955 * @nr_entries: The maximum number of entries
956 * @indices: The cache indices corresponding to the entries in @pvec
957 *
958 * pagevec_lookup_entries() will search for and return a group of up
959 * to @nr_entries pages and shadow entries in the mapping. All
960 * entries are placed in @pvec. pagevec_lookup_entries() takes a
961 * reference against actual pages in @pvec.
962 *
963 * The search returns a group of mapping-contiguous entries with
964 * ascending indexes. There may be holes in the indices due to
965 * not-present entries.
966 *
967 * pagevec_lookup_entries() returns the number of entries which were
968 * found.
969 */
970unsigned pagevec_lookup_entries(struct pagevec *pvec,
971 struct address_space *mapping,
972 pgoff_t start, unsigned nr_pages,
973 pgoff_t *indices)
974{
975 pvec->nr = find_get_entries(mapping, start, nr_pages,
976 pvec->pages, indices);
977 return pagevec_count(pvec);
978}
979
980/**
981 * pagevec_remove_exceptionals - pagevec exceptionals pruning
982 * @pvec: The pagevec to prune
983 *
984 * pagevec_lookup_entries() fills both pages and exceptional radix
985 * tree entries into the pagevec. This function prunes all
986 * exceptionals from @pvec without leaving holes, so that it can be
987 * passed on to page-only pagevec operations.
988 */
989void pagevec_remove_exceptionals(struct pagevec *pvec)
990{
991 int i, j;
992
993 for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
994 struct page *page = pvec->pages[i];
995 if (!radix_tree_exceptional_entry(page))
996 pvec->pages[j++] = page;
997 }
998 pvec->nr = j;
999}
1000
1001/**
951 * pagevec_lookup - gang pagecache lookup 1002 * pagevec_lookup - gang pagecache lookup
952 * @pvec: Where the resulting pages are placed 1003 * @pvec: Where the resulting pages are placed
953 * @mapping: The address_space to search 1004 * @mapping: The address_space to search
diff --git a/mm/truncate.c b/mm/truncate.c
index 353b683afd6e..2e84fe59190b 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -22,6 +22,22 @@
22#include <linux/cleancache.h> 22#include <linux/cleancache.h>
23#include "internal.h" 23#include "internal.h"
24 24
25static void clear_exceptional_entry(struct address_space *mapping,
26 pgoff_t index, void *entry)
27{
28 /* Handled by shmem itself */
29 if (shmem_mapping(mapping))
30 return;
31
32 spin_lock_irq(&mapping->tree_lock);
33 /*
34 * Regular page slots are stabilized by the page lock even
35 * without the tree itself locked. These unlocked entries
36 * need verification under the tree lock.
37 */
38 radix_tree_delete_item(&mapping->page_tree, index, entry);
39 spin_unlock_irq(&mapping->tree_lock);
40}
25 41
26/** 42/**
27 * do_invalidatepage - invalidate part or all of a page 43 * do_invalidatepage - invalidate part or all of a page
@@ -208,6 +224,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
208 unsigned int partial_start; /* inclusive */ 224 unsigned int partial_start; /* inclusive */
209 unsigned int partial_end; /* exclusive */ 225 unsigned int partial_end; /* exclusive */
210 struct pagevec pvec; 226 struct pagevec pvec;
227 pgoff_t indices[PAGEVEC_SIZE];
211 pgoff_t index; 228 pgoff_t index;
212 int i; 229 int i;
213 230
@@ -238,17 +255,23 @@ void truncate_inode_pages_range(struct address_space *mapping,
238 255
239 pagevec_init(&pvec, 0); 256 pagevec_init(&pvec, 0);
240 index = start; 257 index = start;
241 while (index < end && pagevec_lookup(&pvec, mapping, index, 258 while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
242 min(end - index, (pgoff_t)PAGEVEC_SIZE))) { 259 min(end - index, (pgoff_t)PAGEVEC_SIZE),
260 indices)) {
243 mem_cgroup_uncharge_start(); 261 mem_cgroup_uncharge_start();
244 for (i = 0; i < pagevec_count(&pvec); i++) { 262 for (i = 0; i < pagevec_count(&pvec); i++) {
245 struct page *page = pvec.pages[i]; 263 struct page *page = pvec.pages[i];
246 264
247 /* We rely upon deletion not changing page->index */ 265 /* We rely upon deletion not changing page->index */
248 index = page->index; 266 index = indices[i];
249 if (index >= end) 267 if (index >= end)
250 break; 268 break;
251 269
270 if (radix_tree_exceptional_entry(page)) {
271 clear_exceptional_entry(mapping, index, page);
272 continue;
273 }
274
252 if (!trylock_page(page)) 275 if (!trylock_page(page))
253 continue; 276 continue;
254 WARN_ON(page->index != index); 277 WARN_ON(page->index != index);
@@ -259,6 +282,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
259 truncate_inode_page(mapping, page); 282 truncate_inode_page(mapping, page);
260 unlock_page(page); 283 unlock_page(page);
261 } 284 }
285 pagevec_remove_exceptionals(&pvec);
262 pagevec_release(&pvec); 286 pagevec_release(&pvec);
263 mem_cgroup_uncharge_end(); 287 mem_cgroup_uncharge_end();
264 cond_resched(); 288 cond_resched();
@@ -307,14 +331,16 @@ void truncate_inode_pages_range(struct address_space *mapping,
307 index = start; 331 index = start;
308 for ( ; ; ) { 332 for ( ; ; ) {
309 cond_resched(); 333 cond_resched();
310 if (!pagevec_lookup(&pvec, mapping, index, 334 if (!pagevec_lookup_entries(&pvec, mapping, index,
311 min(end - index, (pgoff_t)PAGEVEC_SIZE))) { 335 min(end - index, (pgoff_t)PAGEVEC_SIZE),
336 indices)) {
312 if (index == start) 337 if (index == start)
313 break; 338 break;
314 index = start; 339 index = start;
315 continue; 340 continue;
316 } 341 }
317 if (index == start && pvec.pages[0]->index >= end) { 342 if (index == start && indices[0] >= end) {
343 pagevec_remove_exceptionals(&pvec);
318 pagevec_release(&pvec); 344 pagevec_release(&pvec);
319 break; 345 break;
320 } 346 }
@@ -323,16 +349,22 @@ void truncate_inode_pages_range(struct address_space *mapping,
323 struct page *page = pvec.pages[i]; 349 struct page *page = pvec.pages[i];
324 350
325 /* We rely upon deletion not changing page->index */ 351 /* We rely upon deletion not changing page->index */
326 index = page->index; 352 index = indices[i];
327 if (index >= end) 353 if (index >= end)
328 break; 354 break;
329 355
356 if (radix_tree_exceptional_entry(page)) {
357 clear_exceptional_entry(mapping, index, page);
358 continue;
359 }
360
330 lock_page(page); 361 lock_page(page);
331 WARN_ON(page->index != index); 362 WARN_ON(page->index != index);
332 wait_on_page_writeback(page); 363 wait_on_page_writeback(page);
333 truncate_inode_page(mapping, page); 364 truncate_inode_page(mapping, page);
334 unlock_page(page); 365 unlock_page(page);
335 } 366 }
367 pagevec_remove_exceptionals(&pvec);
336 pagevec_release(&pvec); 368 pagevec_release(&pvec);
337 mem_cgroup_uncharge_end(); 369 mem_cgroup_uncharge_end();
338 index++; 370 index++;
@@ -375,6 +407,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
375unsigned long invalidate_mapping_pages(struct address_space *mapping, 407unsigned long invalidate_mapping_pages(struct address_space *mapping,
376 pgoff_t start, pgoff_t end) 408 pgoff_t start, pgoff_t end)
377{ 409{
410 pgoff_t indices[PAGEVEC_SIZE];
378 struct pagevec pvec; 411 struct pagevec pvec;
379 pgoff_t index = start; 412 pgoff_t index = start;
380 unsigned long ret; 413 unsigned long ret;
@@ -390,17 +423,23 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
390 */ 423 */
391 424
392 pagevec_init(&pvec, 0); 425 pagevec_init(&pvec, 0);
393 while (index <= end && pagevec_lookup(&pvec, mapping, index, 426 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
394 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 427 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
428 indices)) {
395 mem_cgroup_uncharge_start(); 429 mem_cgroup_uncharge_start();
396 for (i = 0; i < pagevec_count(&pvec); i++) { 430 for (i = 0; i < pagevec_count(&pvec); i++) {
397 struct page *page = pvec.pages[i]; 431 struct page *page = pvec.pages[i];
398 432
399 /* We rely upon deletion not changing page->index */ 433 /* We rely upon deletion not changing page->index */
400 index = page->index; 434 index = indices[i];
401 if (index > end) 435 if (index > end)
402 break; 436 break;
403 437
438 if (radix_tree_exceptional_entry(page)) {
439 clear_exceptional_entry(mapping, index, page);
440 continue;
441 }
442
404 if (!trylock_page(page)) 443 if (!trylock_page(page))
405 continue; 444 continue;
406 WARN_ON(page->index != index); 445 WARN_ON(page->index != index);
@@ -414,6 +453,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
414 deactivate_page(page); 453 deactivate_page(page);
415 count += ret; 454 count += ret;
416 } 455 }
456 pagevec_remove_exceptionals(&pvec);
417 pagevec_release(&pvec); 457 pagevec_release(&pvec);
418 mem_cgroup_uncharge_end(); 458 mem_cgroup_uncharge_end();
419 cond_resched(); 459 cond_resched();
@@ -481,6 +521,7 @@ static int do_launder_page(struct address_space *mapping, struct page *page)
481int invalidate_inode_pages2_range(struct address_space *mapping, 521int invalidate_inode_pages2_range(struct address_space *mapping,
482 pgoff_t start, pgoff_t end) 522 pgoff_t start, pgoff_t end)
483{ 523{
524 pgoff_t indices[PAGEVEC_SIZE];
484 struct pagevec pvec; 525 struct pagevec pvec;
485 pgoff_t index; 526 pgoff_t index;
486 int i; 527 int i;
@@ -491,17 +532,23 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
491 cleancache_invalidate_inode(mapping); 532 cleancache_invalidate_inode(mapping);
492 pagevec_init(&pvec, 0); 533 pagevec_init(&pvec, 0);
493 index = start; 534 index = start;
494 while (index <= end && pagevec_lookup(&pvec, mapping, index, 535 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
495 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 536 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
537 indices)) {
496 mem_cgroup_uncharge_start(); 538 mem_cgroup_uncharge_start();
497 for (i = 0; i < pagevec_count(&pvec); i++) { 539 for (i = 0; i < pagevec_count(&pvec); i++) {
498 struct page *page = pvec.pages[i]; 540 struct page *page = pvec.pages[i];
499 541
500 /* We rely upon deletion not changing page->index */ 542 /* We rely upon deletion not changing page->index */
501 index = page->index; 543 index = indices[i];
502 if (index > end) 544 if (index > end)
503 break; 545 break;
504 546
547 if (radix_tree_exceptional_entry(page)) {
548 clear_exceptional_entry(mapping, index, page);
549 continue;
550 }
551
505 lock_page(page); 552 lock_page(page);
506 WARN_ON(page->index != index); 553 WARN_ON(page->index != index);
507 if (page->mapping != mapping) { 554 if (page->mapping != mapping) {
@@ -539,6 +586,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
539 ret = ret2; 586 ret = ret2;
540 unlock_page(page); 587 unlock_page(page);
541 } 588 }
589 pagevec_remove_exceptionals(&pvec);
542 pagevec_release(&pvec); 590 pagevec_release(&pvec);
543 mem_cgroup_uncharge_end(); 591 mem_cgroup_uncharge_end();
544 cond_resched(); 592 cond_resched();