diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 137 |
1 files changed, 82 insertions, 55 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index f820e600f1ad..c0018f2d50e0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * most "normal" filesystems (but you don't /have/ to use this: | 9 | * most "normal" filesystems (but you don't /have/ to use this: |
10 | * the NFS filesystem used to do this differently, for example) | 10 | * the NFS filesystem used to do this differently, for example) |
11 | */ | 11 | */ |
12 | #include <linux/module.h> | 12 | #include <linux/export.h> |
13 | #include <linux/compiler.h> | 13 | #include <linux/compiler.h> |
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/cpuset.h> | 33 | #include <linux/cpuset.h> |
34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ | 34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
35 | #include <linux/memcontrol.h> | 35 | #include <linux/memcontrol.h> |
36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ | ||
37 | #include <linux/cleancache.h> | 36 | #include <linux/cleancache.h> |
38 | #include "internal.h" | 37 | #include "internal.h" |
39 | 38 | ||
@@ -78,7 +77,7 @@ | |||
78 | * ->i_mutex (generic_file_buffered_write) | 77 | * ->i_mutex (generic_file_buffered_write) |
79 | * ->mmap_sem (fault_in_pages_readable->do_page_fault) | 78 | * ->mmap_sem (fault_in_pages_readable->do_page_fault) |
80 | * | 79 | * |
81 | * inode_wb_list_lock | 80 | * bdi->wb.list_lock |
82 | * sb_lock (fs/fs-writeback.c) | 81 | * sb_lock (fs/fs-writeback.c) |
83 | * ->mapping->tree_lock (__sync_single_inode) | 82 | * ->mapping->tree_lock (__sync_single_inode) |
84 | * | 83 | * |
@@ -96,9 +95,9 @@ | |||
96 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) | 95 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) |
97 | * ->private_lock (page_remove_rmap->set_page_dirty) | 96 | * ->private_lock (page_remove_rmap->set_page_dirty) |
98 | * ->tree_lock (page_remove_rmap->set_page_dirty) | 97 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
99 | * inode_wb_list_lock (page_remove_rmap->set_page_dirty) | 98 | * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) |
100 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) | 99 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) |
101 | * inode_wb_list_lock (zap_pte_range->set_page_dirty) | 100 | * bdi.wb->list_lock (zap_pte_range->set_page_dirty) |
102 | * ->inode->i_lock (zap_pte_range->set_page_dirty) | 101 | * ->inode->i_lock (zap_pte_range->set_page_dirty) |
103 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) | 102 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
104 | * | 103 | * |
@@ -128,6 +127,7 @@ void __delete_from_page_cache(struct page *page) | |||
128 | 127 | ||
129 | radix_tree_delete(&mapping->page_tree, page->index); | 128 | radix_tree_delete(&mapping->page_tree, page->index); |
130 | page->mapping = NULL; | 129 | page->mapping = NULL; |
130 | /* Leave page->index set: truncation lookup relies upon it */ | ||
131 | mapping->nrpages--; | 131 | mapping->nrpages--; |
132 | __dec_zone_page_state(page, NR_FILE_PAGES); | 132 | __dec_zone_page_state(page, NR_FILE_PAGES); |
133 | if (PageSwapBacked(page)) | 133 | if (PageSwapBacked(page)) |
@@ -461,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
461 | int error; | 461 | int error; |
462 | 462 | ||
463 | VM_BUG_ON(!PageLocked(page)); | 463 | VM_BUG_ON(!PageLocked(page)); |
464 | VM_BUG_ON(PageSwapBacked(page)); | ||
464 | 465 | ||
465 | error = mem_cgroup_cache_charge(page, current->mm, | 466 | error = mem_cgroup_cache_charge(page, current->mm, |
466 | gfp_mask & GFP_RECLAIM_MASK); | 467 | gfp_mask & GFP_RECLAIM_MASK); |
@@ -478,11 +479,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
478 | if (likely(!error)) { | 479 | if (likely(!error)) { |
479 | mapping->nrpages++; | 480 | mapping->nrpages++; |
480 | __inc_zone_page_state(page, NR_FILE_PAGES); | 481 | __inc_zone_page_state(page, NR_FILE_PAGES); |
481 | if (PageSwapBacked(page)) | ||
482 | __inc_zone_page_state(page, NR_SHMEM); | ||
483 | spin_unlock_irq(&mapping->tree_lock); | 482 | spin_unlock_irq(&mapping->tree_lock); |
484 | } else { | 483 | } else { |
485 | page->mapping = NULL; | 484 | page->mapping = NULL; |
485 | /* Leave page->index set: truncation relies upon it */ | ||
486 | spin_unlock_irq(&mapping->tree_lock); | 486 | spin_unlock_irq(&mapping->tree_lock); |
487 | mem_cgroup_uncharge_cache_page(page); | 487 | mem_cgroup_uncharge_cache_page(page); |
488 | page_cache_release(page); | 488 | page_cache_release(page); |
@@ -500,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | |||
500 | { | 500 | { |
501 | int ret; | 501 | int ret; |
502 | 502 | ||
503 | /* | ||
504 | * Splice_read and readahead add shmem/tmpfs pages into the page cache | ||
505 | * before shmem_readpage has a chance to mark them as SwapBacked: they | ||
506 | * need to go on the anon lru below, and mem_cgroup_cache_charge | ||
507 | * (called in add_to_page_cache) needs to know where they're going too. | ||
508 | */ | ||
509 | if (mapping_cap_swap_backed(mapping)) | ||
510 | SetPageSwapBacked(page); | ||
511 | |||
512 | ret = add_to_page_cache(page, mapping, offset, gfp_mask); | 503 | ret = add_to_page_cache(page, mapping, offset, gfp_mask); |
513 | if (ret == 0) { | 504 | if (ret == 0) |
514 | if (page_is_file_cache(page)) | 505 | lru_cache_add_file(page); |
515 | lru_cache_add_file(page); | ||
516 | else | ||
517 | lru_cache_add_anon(page); | ||
518 | } | ||
519 | return ret; | 506 | return ret; |
520 | } | 507 | } |
521 | EXPORT_SYMBOL_GPL(add_to_page_cache_lru); | 508 | EXPORT_SYMBOL_GPL(add_to_page_cache_lru); |
@@ -712,9 +699,16 @@ repeat: | |||
712 | page = radix_tree_deref_slot(pagep); | 699 | page = radix_tree_deref_slot(pagep); |
713 | if (unlikely(!page)) | 700 | if (unlikely(!page)) |
714 | goto out; | 701 | goto out; |
715 | if (radix_tree_deref_retry(page)) | 702 | if (radix_tree_exception(page)) { |
716 | goto repeat; | 703 | if (radix_tree_deref_retry(page)) |
717 | 704 | goto repeat; | |
705 | /* | ||
706 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
707 | * here as an exceptional entry: so return it without | ||
708 | * attempting to raise page count. | ||
709 | */ | ||
710 | goto out; | ||
711 | } | ||
718 | if (!page_cache_get_speculative(page)) | 712 | if (!page_cache_get_speculative(page)) |
719 | goto repeat; | 713 | goto repeat; |
720 | 714 | ||
@@ -751,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) | |||
751 | 745 | ||
752 | repeat: | 746 | repeat: |
753 | page = find_get_page(mapping, offset); | 747 | page = find_get_page(mapping, offset); |
754 | if (page) { | 748 | if (page && !radix_tree_exception(page)) { |
755 | lock_page(page); | 749 | lock_page(page); |
756 | /* Has the page been truncated? */ | 750 | /* Has the page been truncated? */ |
757 | if (unlikely(page->mapping != mapping)) { | 751 | if (unlikely(page->mapping != mapping)) { |
@@ -833,13 +827,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
833 | { | 827 | { |
834 | unsigned int i; | 828 | unsigned int i; |
835 | unsigned int ret; | 829 | unsigned int ret; |
836 | unsigned int nr_found; | 830 | unsigned int nr_found, nr_skip; |
837 | 831 | ||
838 | rcu_read_lock(); | 832 | rcu_read_lock(); |
839 | restart: | 833 | restart: |
840 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 834 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, |
841 | (void ***)pages, start, nr_pages); | 835 | (void ***)pages, NULL, start, nr_pages); |
842 | ret = 0; | 836 | ret = 0; |
837 | nr_skip = 0; | ||
843 | for (i = 0; i < nr_found; i++) { | 838 | for (i = 0; i < nr_found; i++) { |
844 | struct page *page; | 839 | struct page *page; |
845 | repeat: | 840 | repeat: |
@@ -847,13 +842,23 @@ repeat: | |||
847 | if (unlikely(!page)) | 842 | if (unlikely(!page)) |
848 | continue; | 843 | continue; |
849 | 844 | ||
850 | /* | 845 | if (radix_tree_exception(page)) { |
851 | * This can only trigger when the entry at index 0 moves out | 846 | if (radix_tree_deref_retry(page)) { |
852 | * of or back to the root: none yet gotten, safe to restart. | 847 | /* |
853 | */ | 848 | * Transient condition which can only trigger |
854 | if (radix_tree_deref_retry(page)) { | 849 | * when entry at index 0 moves out of or back |
855 | WARN_ON(start | i); | 850 | * to root: none yet gotten, safe to restart. |
856 | goto restart; | 851 | */ |
852 | WARN_ON(start | i); | ||
853 | goto restart; | ||
854 | } | ||
855 | /* | ||
856 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
857 | * here as an exceptional entry: so skip over it - | ||
858 | * we only reach this from invalidate_mapping_pages(). | ||
859 | */ | ||
860 | nr_skip++; | ||
861 | continue; | ||
857 | } | 862 | } |
858 | 863 | ||
859 | if (!page_cache_get_speculative(page)) | 864 | if (!page_cache_get_speculative(page)) |
@@ -873,7 +878,7 @@ repeat: | |||
873 | * If all entries were removed before we could secure them, | 878 | * If all entries were removed before we could secure them, |
874 | * try again, because callers stop trying once 0 is returned. | 879 | * try again, because callers stop trying once 0 is returned. |
875 | */ | 880 | */ |
876 | if (unlikely(!ret && nr_found)) | 881 | if (unlikely(!ret && nr_found > nr_skip)) |
877 | goto restart; | 882 | goto restart; |
878 | rcu_read_unlock(); | 883 | rcu_read_unlock(); |
879 | return ret; | 884 | return ret; |
@@ -901,7 +906,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
901 | rcu_read_lock(); | 906 | rcu_read_lock(); |
902 | restart: | 907 | restart: |
903 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 908 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, |
904 | (void ***)pages, index, nr_pages); | 909 | (void ***)pages, NULL, index, nr_pages); |
905 | ret = 0; | 910 | ret = 0; |
906 | for (i = 0; i < nr_found; i++) { | 911 | for (i = 0; i < nr_found; i++) { |
907 | struct page *page; | 912 | struct page *page; |
@@ -910,12 +915,22 @@ repeat: | |||
910 | if (unlikely(!page)) | 915 | if (unlikely(!page)) |
911 | continue; | 916 | continue; |
912 | 917 | ||
913 | /* | 918 | if (radix_tree_exception(page)) { |
914 | * This can only trigger when the entry at index 0 moves out | 919 | if (radix_tree_deref_retry(page)) { |
915 | * of or back to the root: none yet gotten, safe to restart. | 920 | /* |
916 | */ | 921 | * Transient condition which can only trigger |
917 | if (radix_tree_deref_retry(page)) | 922 | * when entry at index 0 moves out of or back |
918 | goto restart; | 923 | * to root: none yet gotten, safe to restart. |
924 | */ | ||
925 | goto restart; | ||
926 | } | ||
927 | /* | ||
928 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
929 | * here as an exceptional entry: so stop looking for | ||
930 | * contiguous pages. | ||
931 | */ | ||
932 | break; | ||
933 | } | ||
919 | 934 | ||
920 | if (!page_cache_get_speculative(page)) | 935 | if (!page_cache_get_speculative(page)) |
921 | goto repeat; | 936 | goto repeat; |
@@ -975,12 +990,21 @@ repeat: | |||
975 | if (unlikely(!page)) | 990 | if (unlikely(!page)) |
976 | continue; | 991 | continue; |
977 | 992 | ||
978 | /* | 993 | if (radix_tree_exception(page)) { |
979 | * This can only trigger when the entry at index 0 moves out | 994 | if (radix_tree_deref_retry(page)) { |
980 | * of or back to the root: none yet gotten, safe to restart. | 995 | /* |
981 | */ | 996 | * Transient condition which can only trigger |
982 | if (radix_tree_deref_retry(page)) | 997 | * when entry at index 0 moves out of or back |
983 | goto restart; | 998 | * to root: none yet gotten, safe to restart. |
999 | */ | ||
1000 | goto restart; | ||
1001 | } | ||
1002 | /* | ||
1003 | * This function is never used on a shmem/tmpfs | ||
1004 | * mapping, so a swap entry won't be found here. | ||
1005 | */ | ||
1006 | BUG(); | ||
1007 | } | ||
984 | 1008 | ||
985 | if (!page_cache_get_speculative(page)) | 1009 | if (!page_cache_get_speculative(page)) |
986 | goto repeat; | 1010 | goto repeat; |
@@ -1792,7 +1816,7 @@ EXPORT_SYMBOL(generic_file_readonly_mmap); | |||
1792 | 1816 | ||
1793 | static struct page *__read_cache_page(struct address_space *mapping, | 1817 | static struct page *__read_cache_page(struct address_space *mapping, |
1794 | pgoff_t index, | 1818 | pgoff_t index, |
1795 | int (*filler)(void *,struct page*), | 1819 | int (*filler)(void *, struct page *), |
1796 | void *data, | 1820 | void *data, |
1797 | gfp_t gfp) | 1821 | gfp_t gfp) |
1798 | { | 1822 | { |
@@ -1823,7 +1847,7 @@ repeat: | |||
1823 | 1847 | ||
1824 | static struct page *do_read_cache_page(struct address_space *mapping, | 1848 | static struct page *do_read_cache_page(struct address_space *mapping, |
1825 | pgoff_t index, | 1849 | pgoff_t index, |
1826 | int (*filler)(void *,struct page*), | 1850 | int (*filler)(void *, struct page *), |
1827 | void *data, | 1851 | void *data, |
1828 | gfp_t gfp) | 1852 | gfp_t gfp) |
1829 | 1853 | ||
@@ -1863,7 +1887,7 @@ out: | |||
1863 | * @mapping: the page's address_space | 1887 | * @mapping: the page's address_space |
1864 | * @index: the page index | 1888 | * @index: the page index |
1865 | * @filler: function to perform the read | 1889 | * @filler: function to perform the read |
1866 | * @data: destination for read data | 1890 | * @data: first arg to filler(data, page) function, often left as NULL |
1867 | * | 1891 | * |
1868 | * Same as read_cache_page, but don't wait for page to become unlocked | 1892 | * Same as read_cache_page, but don't wait for page to become unlocked |
1869 | * after submitting it to the filler. | 1893 | * after submitting it to the filler. |
@@ -1875,7 +1899,7 @@ out: | |||
1875 | */ | 1899 | */ |
1876 | struct page *read_cache_page_async(struct address_space *mapping, | 1900 | struct page *read_cache_page_async(struct address_space *mapping, |
1877 | pgoff_t index, | 1901 | pgoff_t index, |
1878 | int (*filler)(void *,struct page*), | 1902 | int (*filler)(void *, struct page *), |
1879 | void *data) | 1903 | void *data) |
1880 | { | 1904 | { |
1881 | return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); | 1905 | return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); |
@@ -1923,7 +1947,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); | |||
1923 | * @mapping: the page's address_space | 1947 | * @mapping: the page's address_space |
1924 | * @index: the page index | 1948 | * @index: the page index |
1925 | * @filler: function to perform the read | 1949 | * @filler: function to perform the read |
1926 | * @data: destination for read data | 1950 | * @data: first arg to filler(data, page) function, often left as NULL |
1927 | * | 1951 | * |
1928 | * Read into the page cache. If a page already exists, and PageUptodate() is | 1952 | * Read into the page cache. If a page already exists, and PageUptodate() is |
1929 | * not set, try to fill the page then wait for it to become unlocked. | 1953 | * not set, try to fill the page then wait for it to become unlocked. |
@@ -1932,7 +1956,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); | |||
1932 | */ | 1956 | */ |
1933 | struct page *read_cache_page(struct address_space *mapping, | 1957 | struct page *read_cache_page(struct address_space *mapping, |
1934 | pgoff_t index, | 1958 | pgoff_t index, |
1935 | int (*filler)(void *,struct page*), | 1959 | int (*filler)(void *, struct page *), |
1936 | void *data) | 1960 | void *data) |
1937 | { | 1961 | { |
1938 | return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); | 1962 | return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); |
@@ -2091,6 +2115,7 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes) | |||
2091 | } else { | 2115 | } else { |
2092 | const struct iovec *iov = i->iov; | 2116 | const struct iovec *iov = i->iov; |
2093 | size_t base = i->iov_offset; | 2117 | size_t base = i->iov_offset; |
2118 | unsigned long nr_segs = i->nr_segs; | ||
2094 | 2119 | ||
2095 | /* | 2120 | /* |
2096 | * The !iov->iov_len check ensures we skip over unlikely | 2121 | * The !iov->iov_len check ensures we skip over unlikely |
@@ -2106,11 +2131,13 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes) | |||
2106 | base += copy; | 2131 | base += copy; |
2107 | if (iov->iov_len == base) { | 2132 | if (iov->iov_len == base) { |
2108 | iov++; | 2133 | iov++; |
2134 | nr_segs--; | ||
2109 | base = 0; | 2135 | base = 0; |
2110 | } | 2136 | } |
2111 | } | 2137 | } |
2112 | i->iov = iov; | 2138 | i->iov = iov; |
2113 | i->iov_offset = base; | 2139 | i->iov_offset = base; |
2140 | i->nr_segs = nr_segs; | ||
2114 | } | 2141 | } |
2115 | } | 2142 | } |
2116 | EXPORT_SYMBOL(iov_iter_advance); | 2143 | EXPORT_SYMBOL(iov_iter_advance); |