diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2014-04-03 17:47:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-03 19:21:00 -0400 |
commit | 0cd6144aadd2afd19d1aca880153530c52957604 (patch) | |
tree | 529df1dc75d6a58eff057dde5feb07cecf6ba527 /mm/filemap.c | |
parent | e7b563bb2a6f4d974208da46200784b9c5b5a47e (diff) |
mm + fs: prepare for non-page entries in page cache radix trees
shmem mappings already contain exceptional entries where swap slot
information is remembered.
To be able to store eviction information for regular page cache, prepare
every site dealing with the radix trees directly to handle entries other
than pages.
The common lookup functions will filter out non-page entries and return
NULL for page cache holes, just as before. But provide a raw version of
the API which returns non-page entries as well, and switch shmem over to
use it.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Metin Doslu <metin@citusdata.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ozgun Erdogan <ozgun@citusdata.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <klamm@yandex-team.ru>
Cc: Ryan Mallon <rmallon@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 202 |
1 files changed, 178 insertions, 24 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 40115c6c0791..efc63876477f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -446,6 +446,29 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
446 | } | 446 | } |
447 | EXPORT_SYMBOL_GPL(replace_page_cache_page); | 447 | EXPORT_SYMBOL_GPL(replace_page_cache_page); |
448 | 448 | ||
449 | static int page_cache_tree_insert(struct address_space *mapping, | ||
450 | struct page *page) | ||
451 | { | ||
452 | void **slot; | ||
453 | int error; | ||
454 | |||
455 | slot = radix_tree_lookup_slot(&mapping->page_tree, page->index); | ||
456 | if (slot) { | ||
457 | void *p; | ||
458 | |||
459 | p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); | ||
460 | if (!radix_tree_exceptional_entry(p)) | ||
461 | return -EEXIST; | ||
462 | radix_tree_replace_slot(slot, page); | ||
463 | mapping->nrpages++; | ||
464 | return 0; | ||
465 | } | ||
466 | error = radix_tree_insert(&mapping->page_tree, page->index, page); | ||
467 | if (!error) | ||
468 | mapping->nrpages++; | ||
469 | return error; | ||
470 | } | ||
471 | |||
449 | /** | 472 | /** |
450 | * add_to_page_cache_locked - add a locked page to the pagecache | 473 | * add_to_page_cache_locked - add a locked page to the pagecache |
451 | * @page: page to add | 474 | * @page: page to add |
@@ -480,11 +503,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
480 | page->index = offset; | 503 | page->index = offset; |
481 | 504 | ||
482 | spin_lock_irq(&mapping->tree_lock); | 505 | spin_lock_irq(&mapping->tree_lock); |
483 | error = radix_tree_insert(&mapping->page_tree, offset, page); | 506 | error = page_cache_tree_insert(mapping, page); |
484 | radix_tree_preload_end(); | 507 | radix_tree_preload_end(); |
485 | if (unlikely(error)) | 508 | if (unlikely(error)) |
486 | goto err_insert; | 509 | goto err_insert; |
487 | mapping->nrpages++; | ||
488 | __inc_zone_page_state(page, NR_FILE_PAGES); | 510 | __inc_zone_page_state(page, NR_FILE_PAGES); |
489 | spin_unlock_irq(&mapping->tree_lock); | 511 | spin_unlock_irq(&mapping->tree_lock); |
490 | trace_mm_filemap_add_to_page_cache(page); | 512 | trace_mm_filemap_add_to_page_cache(page); |
@@ -712,7 +734,10 @@ pgoff_t page_cache_next_hole(struct address_space *mapping, | |||
712 | unsigned long i; | 734 | unsigned long i; |
713 | 735 | ||
714 | for (i = 0; i < max_scan; i++) { | 736 | for (i = 0; i < max_scan; i++) { |
715 | if (!radix_tree_lookup(&mapping->page_tree, index)) | 737 | struct page *page; |
738 | |||
739 | page = radix_tree_lookup(&mapping->page_tree, index); | ||
740 | if (!page || radix_tree_exceptional_entry(page)) | ||
716 | break; | 741 | break; |
717 | index++; | 742 | index++; |
718 | if (index == 0) | 743 | if (index == 0) |
@@ -750,7 +775,10 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping, | |||
750 | unsigned long i; | 775 | unsigned long i; |
751 | 776 | ||
752 | for (i = 0; i < max_scan; i++) { | 777 | for (i = 0; i < max_scan; i++) { |
753 | if (!radix_tree_lookup(&mapping->page_tree, index)) | 778 | struct page *page; |
779 | |||
780 | page = radix_tree_lookup(&mapping->page_tree, index); | ||
781 | if (!page || radix_tree_exceptional_entry(page)) | ||
754 | break; | 782 | break; |
755 | index--; | 783 | index--; |
756 | if (index == ULONG_MAX) | 784 | if (index == ULONG_MAX) |
@@ -762,14 +790,19 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping, | |||
762 | EXPORT_SYMBOL(page_cache_prev_hole); | 790 | EXPORT_SYMBOL(page_cache_prev_hole); |
763 | 791 | ||
764 | /** | 792 | /** |
765 | * find_get_page - find and get a page reference | 793 | * find_get_entry - find and get a page cache entry |
766 | * @mapping: the address_space to search | 794 | * @mapping: the address_space to search |
767 | * @offset: the page index | 795 | * @offset: the page cache index |
796 | * | ||
797 | * Looks up the page cache slot at @mapping & @offset. If there is a | ||
798 | * page cache page, it is returned with an increased refcount. | ||
768 | * | 799 | * |
769 | * Is there a pagecache struct page at the given (mapping, offset) tuple? | 800 | * If the slot holds a shadow entry of a previously evicted page, it |
770 | * If yes, increment its refcount and return it; if no, return NULL. | 801 | * is returned. |
802 | * | ||
803 | * Otherwise, %NULL is returned. | ||
771 | */ | 804 | */ |
772 | struct page *find_get_page(struct address_space *mapping, pgoff_t offset) | 805 | struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) |
773 | { | 806 | { |
774 | void **pagep; | 807 | void **pagep; |
775 | struct page *page; | 808 | struct page *page; |
@@ -810,24 +843,50 @@ out: | |||
810 | 843 | ||
811 | return page; | 844 | return page; |
812 | } | 845 | } |
813 | EXPORT_SYMBOL(find_get_page); | 846 | EXPORT_SYMBOL(find_get_entry); |
814 | 847 | ||
815 | /** | 848 | /** |
816 | * find_lock_page - locate, pin and lock a pagecache page | 849 | * find_get_page - find and get a page reference |
817 | * @mapping: the address_space to search | 850 | * @mapping: the address_space to search |
818 | * @offset: the page index | 851 | * @offset: the page index |
819 | * | 852 | * |
820 | * Locates the desired pagecache page, locks it, increments its reference | 853 | * Looks up the page cache slot at @mapping & @offset. If there is a |
821 | * count and returns its address. | 854 | * page cache page, it is returned with an increased refcount. |
822 | * | 855 | * |
823 | * Returns zero if the page was not present. find_lock_page() may sleep. | 856 | * Otherwise, %NULL is returned. |
824 | */ | 857 | */ |
825 | struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) | 858 | struct page *find_get_page(struct address_space *mapping, pgoff_t offset) |
859 | { | ||
860 | struct page *page = find_get_entry(mapping, offset); | ||
861 | |||
862 | if (radix_tree_exceptional_entry(page)) | ||
863 | page = NULL; | ||
864 | return page; | ||
865 | } | ||
866 | EXPORT_SYMBOL(find_get_page); | ||
867 | |||
868 | /** | ||
869 | * find_lock_entry - locate, pin and lock a page cache entry | ||
870 | * @mapping: the address_space to search | ||
871 | * @offset: the page cache index | ||
872 | * | ||
873 | * Looks up the page cache slot at @mapping & @offset. If there is a | ||
874 | * page cache page, it is returned locked and with an increased | ||
875 | * refcount. | ||
876 | * | ||
877 | * If the slot holds a shadow entry of a previously evicted page, it | ||
878 | * is returned. | ||
879 | * | ||
880 | * Otherwise, %NULL is returned. | ||
881 | * | ||
882 | * find_lock_entry() may sleep. | ||
883 | */ | ||
884 | struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset) | ||
826 | { | 885 | { |
827 | struct page *page; | 886 | struct page *page; |
828 | 887 | ||
829 | repeat: | 888 | repeat: |
830 | page = find_get_page(mapping, offset); | 889 | page = find_get_entry(mapping, offset); |
831 | if (page && !radix_tree_exception(page)) { | 890 | if (page && !radix_tree_exception(page)) { |
832 | lock_page(page); | 891 | lock_page(page); |
833 | /* Has the page been truncated? */ | 892 | /* Has the page been truncated? */ |
@@ -840,6 +899,29 @@ repeat: | |||
840 | } | 899 | } |
841 | return page; | 900 | return page; |
842 | } | 901 | } |
902 | EXPORT_SYMBOL(find_lock_entry); | ||
903 | |||
904 | /** | ||
905 | * find_lock_page - locate, pin and lock a pagecache page | ||
906 | * @mapping: the address_space to search | ||
907 | * @offset: the page index | ||
908 | * | ||
909 | * Looks up the page cache slot at @mapping & @offset. If there is a | ||
910 | * page cache page, it is returned locked and with an increased | ||
911 | * refcount. | ||
912 | * | ||
913 | * Otherwise, %NULL is returned. | ||
914 | * | ||
915 | * find_lock_page() may sleep. | ||
916 | */ | ||
917 | struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) | ||
918 | { | ||
919 | struct page *page = find_lock_entry(mapping, offset); | ||
920 | |||
921 | if (radix_tree_exceptional_entry(page)) | ||
922 | page = NULL; | ||
923 | return page; | ||
924 | } | ||
843 | EXPORT_SYMBOL(find_lock_page); | 925 | EXPORT_SYMBOL(find_lock_page); |
844 | 926 | ||
845 | /** | 927 | /** |
@@ -848,16 +930,18 @@ EXPORT_SYMBOL(find_lock_page); | |||
848 | * @index: the page's index into the mapping | 930 | * @index: the page's index into the mapping |
849 | * @gfp_mask: page allocation mode | 931 | * @gfp_mask: page allocation mode |
850 | * | 932 | * |
851 | * Locates a page in the pagecache. If the page is not present, a new page | 933 | * Looks up the page cache slot at @mapping & @offset. If there is a |
852 | * is allocated using @gfp_mask and is added to the pagecache and to the VM's | 934 | * page cache page, it is returned locked and with an increased |
853 | * LRU list. The returned page is locked and has its reference count | 935 | * refcount. |
854 | * incremented. | 936 | * |
937 | * If the page is not present, a new page is allocated using @gfp_mask | ||
938 | * and added to the page cache and the VM's LRU list. The page is | ||
939 | * returned locked and with an increased refcount. | ||
855 | * | 940 | * |
856 | * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic | 941 | * On memory exhaustion, %NULL is returned. |
857 | * allocation! | ||
858 | * | 942 | * |
859 | * find_or_create_page() returns the desired page's address, or zero on | 943 | * find_or_create_page() may sleep, even if @gfp_flags specifies an |
860 | * memory exhaustion. | 944 | * atomic allocation! |
861 | */ | 945 | */ |
862 | struct page *find_or_create_page(struct address_space *mapping, | 946 | struct page *find_or_create_page(struct address_space *mapping, |
863 | pgoff_t index, gfp_t gfp_mask) | 947 | pgoff_t index, gfp_t gfp_mask) |
@@ -890,6 +974,76 @@ repeat: | |||
890 | EXPORT_SYMBOL(find_or_create_page); | 974 | EXPORT_SYMBOL(find_or_create_page); |
891 | 975 | ||
892 | /** | 976 | /** |
977 | * find_get_entries - gang pagecache lookup | ||
978 | * @mapping: The address_space to search | ||
979 | * @start: The starting page cache index | ||
980 | * @nr_entries: The maximum number of entries | ||
981 | * @entries: Where the resulting entries are placed | ||
982 | * @indices: The cache indices corresponding to the entries in @entries | ||
983 | * | ||
984 | * find_get_entries() will search for and return a group of up to | ||
985 | * @nr_entries entries in the mapping. The entries are placed at | ||
986 | * @entries. find_get_entries() takes a reference against any actual | ||
987 | * pages it returns. | ||
988 | * | ||
989 | * The search returns a group of mapping-contiguous page cache entries | ||
990 | * with ascending indexes. There may be holes in the indices due to | ||
991 | * not-present pages. | ||
992 | * | ||
993 | * Any shadow entries of evicted pages are included in the returned | ||
994 | * array. | ||
995 | * | ||
996 | * find_get_entries() returns the number of pages and shadow entries | ||
997 | * which were found. | ||
998 | */ | ||
999 | unsigned find_get_entries(struct address_space *mapping, | ||
1000 | pgoff_t start, unsigned int nr_entries, | ||
1001 | struct page **entries, pgoff_t *indices) | ||
1002 | { | ||
1003 | void **slot; | ||
1004 | unsigned int ret = 0; | ||
1005 | struct radix_tree_iter iter; | ||
1006 | |||
1007 | if (!nr_entries) | ||
1008 | return 0; | ||
1009 | |||
1010 | rcu_read_lock(); | ||
1011 | restart: | ||
1012 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | ||
1013 | struct page *page; | ||
1014 | repeat: | ||
1015 | page = radix_tree_deref_slot(slot); | ||
1016 | if (unlikely(!page)) | ||
1017 | continue; | ||
1018 | if (radix_tree_exception(page)) { | ||
1019 | if (radix_tree_deref_retry(page)) | ||
1020 | goto restart; | ||
1021 | /* | ||
1022 | * Otherwise, we must be storing a swap entry | ||
1023 | * here as an exceptional entry: so return it | ||
1024 | * without attempting to raise page count. | ||
1025 | */ | ||
1026 | goto export; | ||
1027 | } | ||
1028 | if (!page_cache_get_speculative(page)) | ||
1029 | goto repeat; | ||
1030 | |||
1031 | /* Has the page moved? */ | ||
1032 | if (unlikely(page != *slot)) { | ||
1033 | page_cache_release(page); | ||
1034 | goto repeat; | ||
1035 | } | ||
1036 | export: | ||
1037 | indices[ret] = iter.index; | ||
1038 | entries[ret] = page; | ||
1039 | if (++ret == nr_entries) | ||
1040 | break; | ||
1041 | } | ||
1042 | rcu_read_unlock(); | ||
1043 | return ret; | ||
1044 | } | ||
1045 | |||
1046 | /** | ||
893 | * find_get_pages - gang pagecache lookup | 1047 | * find_get_pages - gang pagecache lookup |
894 | * @mapping: The address_space to search | 1048 | * @mapping: The address_space to search |
895 | * @start: The starting page index | 1049 | * @start: The starting page index |