aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c137
1 files changed, 82 insertions, 55 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index f820e600f1ad..c0018f2d50e0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -9,7 +9,7 @@
9 * most "normal" filesystems (but you don't /have/ to use this: 9 * most "normal" filesystems (but you don't /have/ to use this:
10 * the NFS filesystem used to do this differently, for example) 10 * the NFS filesystem used to do this differently, for example)
11 */ 11 */
12#include <linux/module.h> 12#include <linux/export.h>
13#include <linux/compiler.h> 13#include <linux/compiler.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
@@ -33,7 +33,6 @@
33#include <linux/cpuset.h> 33#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
36#include <linux/mm_inline.h> /* for page_is_file_cache() */
37#include <linux/cleancache.h> 36#include <linux/cleancache.h>
38#include "internal.h" 37#include "internal.h"
39 38
@@ -78,7 +77,7 @@
78 * ->i_mutex (generic_file_buffered_write) 77 * ->i_mutex (generic_file_buffered_write)
79 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 78 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
80 * 79 *
81 * inode_wb_list_lock 80 * bdi->wb.list_lock
82 * sb_lock (fs/fs-writeback.c) 81 * sb_lock (fs/fs-writeback.c)
83 * ->mapping->tree_lock (__sync_single_inode) 82 * ->mapping->tree_lock (__sync_single_inode)
84 * 83 *
@@ -96,9 +95,9 @@
96 * ->zone.lru_lock (check_pte_range->isolate_lru_page) 95 * ->zone.lru_lock (check_pte_range->isolate_lru_page)
97 * ->private_lock (page_remove_rmap->set_page_dirty) 96 * ->private_lock (page_remove_rmap->set_page_dirty)
98 * ->tree_lock (page_remove_rmap->set_page_dirty) 97 * ->tree_lock (page_remove_rmap->set_page_dirty)
99 * inode_wb_list_lock (page_remove_rmap->set_page_dirty) 98 * bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
100 * ->inode->i_lock (page_remove_rmap->set_page_dirty) 99 * ->inode->i_lock (page_remove_rmap->set_page_dirty)
101 * inode_wb_list_lock (zap_pte_range->set_page_dirty) 100 * bdi.wb->list_lock (zap_pte_range->set_page_dirty)
102 * ->inode->i_lock (zap_pte_range->set_page_dirty) 101 * ->inode->i_lock (zap_pte_range->set_page_dirty)
103 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 102 * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
104 * 103 *
@@ -128,6 +127,7 @@ void __delete_from_page_cache(struct page *page)
128 127
129 radix_tree_delete(&mapping->page_tree, page->index); 128 radix_tree_delete(&mapping->page_tree, page->index);
130 page->mapping = NULL; 129 page->mapping = NULL;
130 /* Leave page->index set: truncation lookup relies upon it */
131 mapping->nrpages--; 131 mapping->nrpages--;
132 __dec_zone_page_state(page, NR_FILE_PAGES); 132 __dec_zone_page_state(page, NR_FILE_PAGES);
133 if (PageSwapBacked(page)) 133 if (PageSwapBacked(page))
@@ -461,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
461 int error; 461 int error;
462 462
463 VM_BUG_ON(!PageLocked(page)); 463 VM_BUG_ON(!PageLocked(page));
464 VM_BUG_ON(PageSwapBacked(page));
464 465
465 error = mem_cgroup_cache_charge(page, current->mm, 466 error = mem_cgroup_cache_charge(page, current->mm,
466 gfp_mask & GFP_RECLAIM_MASK); 467 gfp_mask & GFP_RECLAIM_MASK);
@@ -478,11 +479,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
478 if (likely(!error)) { 479 if (likely(!error)) {
479 mapping->nrpages++; 480 mapping->nrpages++;
480 __inc_zone_page_state(page, NR_FILE_PAGES); 481 __inc_zone_page_state(page, NR_FILE_PAGES);
481 if (PageSwapBacked(page))
482 __inc_zone_page_state(page, NR_SHMEM);
483 spin_unlock_irq(&mapping->tree_lock); 482 spin_unlock_irq(&mapping->tree_lock);
484 } else { 483 } else {
485 page->mapping = NULL; 484 page->mapping = NULL;
485 /* Leave page->index set: truncation relies upon it */
486 spin_unlock_irq(&mapping->tree_lock); 486 spin_unlock_irq(&mapping->tree_lock);
487 mem_cgroup_uncharge_cache_page(page); 487 mem_cgroup_uncharge_cache_page(page);
488 page_cache_release(page); 488 page_cache_release(page);
@@ -500,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
500{ 500{
501 int ret; 501 int ret;
502 502
503 /*
504 * Splice_read and readahead add shmem/tmpfs pages into the page cache
505 * before shmem_readpage has a chance to mark them as SwapBacked: they
506 * need to go on the anon lru below, and mem_cgroup_cache_charge
507 * (called in add_to_page_cache) needs to know where they're going too.
508 */
509 if (mapping_cap_swap_backed(mapping))
510 SetPageSwapBacked(page);
511
512 ret = add_to_page_cache(page, mapping, offset, gfp_mask); 503 ret = add_to_page_cache(page, mapping, offset, gfp_mask);
513 if (ret == 0) { 504 if (ret == 0)
514 if (page_is_file_cache(page)) 505 lru_cache_add_file(page);
515 lru_cache_add_file(page);
516 else
517 lru_cache_add_anon(page);
518 }
519 return ret; 506 return ret;
520} 507}
521EXPORT_SYMBOL_GPL(add_to_page_cache_lru); 508EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -712,9 +699,16 @@ repeat:
712 page = radix_tree_deref_slot(pagep); 699 page = radix_tree_deref_slot(pagep);
713 if (unlikely(!page)) 700 if (unlikely(!page))
714 goto out; 701 goto out;
715 if (radix_tree_deref_retry(page)) 702 if (radix_tree_exception(page)) {
716 goto repeat; 703 if (radix_tree_deref_retry(page))
717 704 goto repeat;
705 /*
706 * Otherwise, shmem/tmpfs must be storing a swap entry
707 * here as an exceptional entry: so return it without
708 * attempting to raise page count.
709 */
710 goto out;
711 }
718 if (!page_cache_get_speculative(page)) 712 if (!page_cache_get_speculative(page))
719 goto repeat; 713 goto repeat;
720 714
@@ -751,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
751 745
752repeat: 746repeat:
753 page = find_get_page(mapping, offset); 747 page = find_get_page(mapping, offset);
754 if (page) { 748 if (page && !radix_tree_exception(page)) {
755 lock_page(page); 749 lock_page(page);
756 /* Has the page been truncated? */ 750 /* Has the page been truncated? */
757 if (unlikely(page->mapping != mapping)) { 751 if (unlikely(page->mapping != mapping)) {
@@ -833,13 +827,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
833{ 827{
834 unsigned int i; 828 unsigned int i;
835 unsigned int ret; 829 unsigned int ret;
836 unsigned int nr_found; 830 unsigned int nr_found, nr_skip;
837 831
838 rcu_read_lock(); 832 rcu_read_lock();
839restart: 833restart:
840 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 834 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
841 (void ***)pages, start, nr_pages); 835 (void ***)pages, NULL, start, nr_pages);
842 ret = 0; 836 ret = 0;
837 nr_skip = 0;
843 for (i = 0; i < nr_found; i++) { 838 for (i = 0; i < nr_found; i++) {
844 struct page *page; 839 struct page *page;
845repeat: 840repeat:
@@ -847,13 +842,23 @@ repeat:
847 if (unlikely(!page)) 842 if (unlikely(!page))
848 continue; 843 continue;
849 844
850 /* 845 if (radix_tree_exception(page)) {
851 * This can only trigger when the entry at index 0 moves out 846 if (radix_tree_deref_retry(page)) {
852 * of or back to the root: none yet gotten, safe to restart. 847 /*
853 */ 848 * Transient condition which can only trigger
854 if (radix_tree_deref_retry(page)) { 849 * when entry at index 0 moves out of or back
855 WARN_ON(start | i); 850 * to root: none yet gotten, safe to restart.
856 goto restart; 851 */
852 WARN_ON(start | i);
853 goto restart;
854 }
855 /*
856 * Otherwise, shmem/tmpfs must be storing a swap entry
857 * here as an exceptional entry: so skip over it -
858 * we only reach this from invalidate_mapping_pages().
859 */
860 nr_skip++;
861 continue;
857 } 862 }
858 863
859 if (!page_cache_get_speculative(page)) 864 if (!page_cache_get_speculative(page))
@@ -873,7 +878,7 @@ repeat:
873 * If all entries were removed before we could secure them, 878 * If all entries were removed before we could secure them,
874 * try again, because callers stop trying once 0 is returned. 879 * try again, because callers stop trying once 0 is returned.
875 */ 880 */
876 if (unlikely(!ret && nr_found)) 881 if (unlikely(!ret && nr_found > nr_skip))
877 goto restart; 882 goto restart;
878 rcu_read_unlock(); 883 rcu_read_unlock();
879 return ret; 884 return ret;
@@ -901,7 +906,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
901 rcu_read_lock(); 906 rcu_read_lock();
902restart: 907restart:
903 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 908 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
904 (void ***)pages, index, nr_pages); 909 (void ***)pages, NULL, index, nr_pages);
905 ret = 0; 910 ret = 0;
906 for (i = 0; i < nr_found; i++) { 911 for (i = 0; i < nr_found; i++) {
907 struct page *page; 912 struct page *page;
@@ -910,12 +915,22 @@ repeat:
910 if (unlikely(!page)) 915 if (unlikely(!page))
911 continue; 916 continue;
912 917
913 /* 918 if (radix_tree_exception(page)) {
914 * This can only trigger when the entry at index 0 moves out 919 if (radix_tree_deref_retry(page)) {
915 * of or back to the root: none yet gotten, safe to restart. 920 /*
916 */ 921 * Transient condition which can only trigger
917 if (radix_tree_deref_retry(page)) 922 * when entry at index 0 moves out of or back
918 goto restart; 923 * to root: none yet gotten, safe to restart.
924 */
925 goto restart;
926 }
927 /*
928 * Otherwise, shmem/tmpfs must be storing a swap entry
929 * here as an exceptional entry: so stop looking for
930 * contiguous pages.
931 */
932 break;
933 }
919 934
920 if (!page_cache_get_speculative(page)) 935 if (!page_cache_get_speculative(page))
921 goto repeat; 936 goto repeat;
@@ -975,12 +990,21 @@ repeat:
975 if (unlikely(!page)) 990 if (unlikely(!page))
976 continue; 991 continue;
977 992
978 /* 993 if (radix_tree_exception(page)) {
979 * This can only trigger when the entry at index 0 moves out 994 if (radix_tree_deref_retry(page)) {
980 * of or back to the root: none yet gotten, safe to restart. 995 /*
981 */ 996 * Transient condition which can only trigger
982 if (radix_tree_deref_retry(page)) 997 * when entry at index 0 moves out of or back
983 goto restart; 998 * to root: none yet gotten, safe to restart.
999 */
1000 goto restart;
1001 }
1002 /*
1003 * This function is never used on a shmem/tmpfs
1004 * mapping, so a swap entry won't be found here.
1005 */
1006 BUG();
1007 }
984 1008
985 if (!page_cache_get_speculative(page)) 1009 if (!page_cache_get_speculative(page))
986 goto repeat; 1010 goto repeat;
@@ -1792,7 +1816,7 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
1792 1816
1793static struct page *__read_cache_page(struct address_space *mapping, 1817static struct page *__read_cache_page(struct address_space *mapping,
1794 pgoff_t index, 1818 pgoff_t index,
1795 int (*filler)(void *,struct page*), 1819 int (*filler)(void *, struct page *),
1796 void *data, 1820 void *data,
1797 gfp_t gfp) 1821 gfp_t gfp)
1798{ 1822{
@@ -1823,7 +1847,7 @@ repeat:
1823 1847
1824static struct page *do_read_cache_page(struct address_space *mapping, 1848static struct page *do_read_cache_page(struct address_space *mapping,
1825 pgoff_t index, 1849 pgoff_t index,
1826 int (*filler)(void *,struct page*), 1850 int (*filler)(void *, struct page *),
1827 void *data, 1851 void *data,
1828 gfp_t gfp) 1852 gfp_t gfp)
1829 1853
@@ -1863,7 +1887,7 @@ out:
1863 * @mapping: the page's address_space 1887 * @mapping: the page's address_space
1864 * @index: the page index 1888 * @index: the page index
1865 * @filler: function to perform the read 1889 * @filler: function to perform the read
1866 * @data: destination for read data 1890 * @data: first arg to filler(data, page) function, often left as NULL
1867 * 1891 *
1868 * Same as read_cache_page, but don't wait for page to become unlocked 1892 * Same as read_cache_page, but don't wait for page to become unlocked
1869 * after submitting it to the filler. 1893 * after submitting it to the filler.
@@ -1875,7 +1899,7 @@ out:
1875 */ 1899 */
1876struct page *read_cache_page_async(struct address_space *mapping, 1900struct page *read_cache_page_async(struct address_space *mapping,
1877 pgoff_t index, 1901 pgoff_t index,
1878 int (*filler)(void *,struct page*), 1902 int (*filler)(void *, struct page *),
1879 void *data) 1903 void *data)
1880{ 1904{
1881 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); 1905 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
@@ -1923,7 +1947,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
1923 * @mapping: the page's address_space 1947 * @mapping: the page's address_space
1924 * @index: the page index 1948 * @index: the page index
1925 * @filler: function to perform the read 1949 * @filler: function to perform the read
1926 * @data: destination for read data 1950 * @data: first arg to filler(data, page) function, often left as NULL
1927 * 1951 *
1928 * Read into the page cache. If a page already exists, and PageUptodate() is 1952 * Read into the page cache. If a page already exists, and PageUptodate() is
1929 * not set, try to fill the page then wait for it to become unlocked. 1953 * not set, try to fill the page then wait for it to become unlocked.
@@ -1932,7 +1956,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
1932 */ 1956 */
1933struct page *read_cache_page(struct address_space *mapping, 1957struct page *read_cache_page(struct address_space *mapping,
1934 pgoff_t index, 1958 pgoff_t index,
1935 int (*filler)(void *,struct page*), 1959 int (*filler)(void *, struct page *),
1936 void *data) 1960 void *data)
1937{ 1961{
1938 return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); 1962 return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
@@ -2091,6 +2115,7 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
2091 } else { 2115 } else {
2092 const struct iovec *iov = i->iov; 2116 const struct iovec *iov = i->iov;
2093 size_t base = i->iov_offset; 2117 size_t base = i->iov_offset;
2118 unsigned long nr_segs = i->nr_segs;
2094 2119
2095 /* 2120 /*
2096 * The !iov->iov_len check ensures we skip over unlikely 2121 * The !iov->iov_len check ensures we skip over unlikely
@@ -2106,11 +2131,13 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
2106 base += copy; 2131 base += copy;
2107 if (iov->iov_len == base) { 2132 if (iov->iov_len == base) {
2108 iov++; 2133 iov++;
2134 nr_segs--;
2109 base = 0; 2135 base = 0;
2110 } 2136 }
2111 } 2137 }
2112 i->iov = iov; 2138 i->iov = iov;
2113 i->iov_offset = base; 2139 i->iov_offset = base;
2140 i->nr_segs = nr_segs;
2114 } 2141 }
2115} 2142}
2116EXPORT_SYMBOL(iov_iter_advance); 2143EXPORT_SYMBOL(iov_iter_advance);