aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c160
1 files changed, 82 insertions, 78 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a8251a8d345..0eedbf85062 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,7 +33,6 @@
33#include <linux/cpuset.h> 33#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
36#include <linux/mm_inline.h> /* for page_is_file_cache() */
37#include <linux/cleancache.h> 36#include <linux/cleancache.h>
38#include "internal.h" 37#include "internal.h"
39 38
@@ -78,10 +77,7 @@
78 * ->i_mutex (generic_file_buffered_write) 77 * ->i_mutex (generic_file_buffered_write)
79 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 78 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
80 * 79 *
81 * ->i_mutex 80 * bdi->wb.list_lock
82 * ->i_alloc_sem (various)
83 *
84 * inode_wb_list_lock
85 * sb_lock (fs/fs-writeback.c) 81 * sb_lock (fs/fs-writeback.c)
86 * ->mapping->tree_lock (__sync_single_inode) 82 * ->mapping->tree_lock (__sync_single_inode)
87 * 83 *
@@ -99,9 +95,9 @@
99 * ->zone.lru_lock (check_pte_range->isolate_lru_page) 95 * ->zone.lru_lock (check_pte_range->isolate_lru_page)
100 * ->private_lock (page_remove_rmap->set_page_dirty) 96 * ->private_lock (page_remove_rmap->set_page_dirty)
101 * ->tree_lock (page_remove_rmap->set_page_dirty) 97 * ->tree_lock (page_remove_rmap->set_page_dirty)
102 * inode_wb_list_lock (page_remove_rmap->set_page_dirty) 98 * bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
103 * ->inode->i_lock (page_remove_rmap->set_page_dirty) 99 * ->inode->i_lock (page_remove_rmap->set_page_dirty)
104 * inode_wb_list_lock (zap_pte_range->set_page_dirty) 100 * bdi.wb->list_lock (zap_pte_range->set_page_dirty)
105 * ->inode->i_lock (zap_pte_range->set_page_dirty) 101 * ->inode->i_lock (zap_pte_range->set_page_dirty)
106 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 102 * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
107 * 103 *
@@ -131,6 +127,7 @@ void __delete_from_page_cache(struct page *page)
131 127
132 radix_tree_delete(&mapping->page_tree, page->index); 128 radix_tree_delete(&mapping->page_tree, page->index);
133 page->mapping = NULL; 129 page->mapping = NULL;
130 /* Leave page->index set: truncation lookup relies upon it */
134 mapping->nrpages--; 131 mapping->nrpages--;
135 __dec_zone_page_state(page, NR_FILE_PAGES); 132 __dec_zone_page_state(page, NR_FILE_PAGES);
136 if (PageSwapBacked(page)) 133 if (PageSwapBacked(page))
@@ -396,24 +393,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range);
396int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) 393int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
397{ 394{
398 int error; 395 int error;
399 struct mem_cgroup *memcg = NULL;
400 396
401 VM_BUG_ON(!PageLocked(old)); 397 VM_BUG_ON(!PageLocked(old));
402 VM_BUG_ON(!PageLocked(new)); 398 VM_BUG_ON(!PageLocked(new));
403 VM_BUG_ON(new->mapping); 399 VM_BUG_ON(new->mapping);
404 400
405 /*
406 * This is not page migration, but prepare_migration and
407 * end_migration does enough work for charge replacement.
408 *
409 * In the longer term we probably want a specialized function
410 * for moving the charge from old to new in a more efficient
411 * manner.
412 */
413 error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
414 if (error)
415 return error;
416
417 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); 401 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
418 if (!error) { 402 if (!error) {
419 struct address_space *mapping = old->mapping; 403 struct address_space *mapping = old->mapping;
@@ -435,13 +419,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
435 if (PageSwapBacked(new)) 419 if (PageSwapBacked(new))
436 __inc_zone_page_state(new, NR_SHMEM); 420 __inc_zone_page_state(new, NR_SHMEM);
437 spin_unlock_irq(&mapping->tree_lock); 421 spin_unlock_irq(&mapping->tree_lock);
422 /* mem_cgroup codes must not be called under tree_lock */
423 mem_cgroup_replace_page_cache(old, new);
438 radix_tree_preload_end(); 424 radix_tree_preload_end();
439 if (freepage) 425 if (freepage)
440 freepage(old); 426 freepage(old);
441 page_cache_release(old); 427 page_cache_release(old);
442 mem_cgroup_end_migration(memcg, old, new, true);
443 } else {
444 mem_cgroup_end_migration(memcg, old, new, false);
445 } 428 }
446 429
447 return error; 430 return error;
@@ -464,6 +447,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
464 int error; 447 int error;
465 448
466 VM_BUG_ON(!PageLocked(page)); 449 VM_BUG_ON(!PageLocked(page));
450 VM_BUG_ON(PageSwapBacked(page));
467 451
468 error = mem_cgroup_cache_charge(page, current->mm, 452 error = mem_cgroup_cache_charge(page, current->mm,
469 gfp_mask & GFP_RECLAIM_MASK); 453 gfp_mask & GFP_RECLAIM_MASK);
@@ -481,11 +465,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
481 if (likely(!error)) { 465 if (likely(!error)) {
482 mapping->nrpages++; 466 mapping->nrpages++;
483 __inc_zone_page_state(page, NR_FILE_PAGES); 467 __inc_zone_page_state(page, NR_FILE_PAGES);
484 if (PageSwapBacked(page))
485 __inc_zone_page_state(page, NR_SHMEM);
486 spin_unlock_irq(&mapping->tree_lock); 468 spin_unlock_irq(&mapping->tree_lock);
487 } else { 469 } else {
488 page->mapping = NULL; 470 page->mapping = NULL;
471 /* Leave page->index set: truncation relies upon it */
489 spin_unlock_irq(&mapping->tree_lock); 472 spin_unlock_irq(&mapping->tree_lock);
490 mem_cgroup_uncharge_cache_page(page); 473 mem_cgroup_uncharge_cache_page(page);
491 page_cache_release(page); 474 page_cache_release(page);
@@ -503,22 +486,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
503{ 486{
504 int ret; 487 int ret;
505 488
506 /*
507 * Splice_read and readahead add shmem/tmpfs pages into the page cache
508 * before shmem_readpage has a chance to mark them as SwapBacked: they
509 * need to go on the anon lru below, and mem_cgroup_cache_charge
510 * (called in add_to_page_cache) needs to know where they're going too.
511 */
512 if (mapping_cap_swap_backed(mapping))
513 SetPageSwapBacked(page);
514
515 ret = add_to_page_cache(page, mapping, offset, gfp_mask); 489 ret = add_to_page_cache(page, mapping, offset, gfp_mask);
516 if (ret == 0) { 490 if (ret == 0)
517 if (page_is_file_cache(page)) 491 lru_cache_add_file(page);
518 lru_cache_add_file(page);
519 else
520 lru_cache_add_anon(page);
521 }
522 return ret; 492 return ret;
523} 493}
524EXPORT_SYMBOL_GPL(add_to_page_cache_lru); 494EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -715,9 +685,16 @@ repeat:
715 page = radix_tree_deref_slot(pagep); 685 page = radix_tree_deref_slot(pagep);
716 if (unlikely(!page)) 686 if (unlikely(!page))
717 goto out; 687 goto out;
718 if (radix_tree_deref_retry(page)) 688 if (radix_tree_exception(page)) {
719 goto repeat; 689 if (radix_tree_deref_retry(page))
720 690 goto repeat;
691 /*
692 * Otherwise, shmem/tmpfs must be storing a swap entry
693 * here as an exceptional entry: so return it without
694 * attempting to raise page count.
695 */
696 goto out;
697 }
721 if (!page_cache_get_speculative(page)) 698 if (!page_cache_get_speculative(page))
722 goto repeat; 699 goto repeat;
723 700
@@ -754,7 +731,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
754 731
755repeat: 732repeat:
756 page = find_get_page(mapping, offset); 733 page = find_get_page(mapping, offset);
757 if (page) { 734 if (page && !radix_tree_exception(page)) {
758 lock_page(page); 735 lock_page(page);
759 /* Has the page been truncated? */ 736 /* Has the page been truncated? */
760 if (unlikely(page->mapping != mapping)) { 737 if (unlikely(page->mapping != mapping)) {
@@ -836,13 +813,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
836{ 813{
837 unsigned int i; 814 unsigned int i;
838 unsigned int ret; 815 unsigned int ret;
839 unsigned int nr_found; 816 unsigned int nr_found, nr_skip;
840 817
841 rcu_read_lock(); 818 rcu_read_lock();
842restart: 819restart:
843 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 820 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
844 (void ***)pages, start, nr_pages); 821 (void ***)pages, NULL, start, nr_pages);
845 ret = 0; 822 ret = 0;
823 nr_skip = 0;
846 for (i = 0; i < nr_found; i++) { 824 for (i = 0; i < nr_found; i++) {
847 struct page *page; 825 struct page *page;
848repeat: 826repeat:
@@ -850,13 +828,23 @@ repeat:
850 if (unlikely(!page)) 828 if (unlikely(!page))
851 continue; 829 continue;
852 830
853 /* 831 if (radix_tree_exception(page)) {
854 * This can only trigger when the entry at index 0 moves out 832 if (radix_tree_deref_retry(page)) {
855 * of or back to the root: none yet gotten, safe to restart. 833 /*
856 */ 834 * Transient condition which can only trigger
857 if (radix_tree_deref_retry(page)) { 835 * when entry at index 0 moves out of or back
858 WARN_ON(start | i); 836 * to root: none yet gotten, safe to restart.
859 goto restart; 837 */
838 WARN_ON(start | i);
839 goto restart;
840 }
841 /*
842 * Otherwise, shmem/tmpfs must be storing a swap entry
843 * here as an exceptional entry: so skip over it -
844 * we only reach this from invalidate_mapping_pages().
845 */
846 nr_skip++;
847 continue;
860 } 848 }
861 849
862 if (!page_cache_get_speculative(page)) 850 if (!page_cache_get_speculative(page))
@@ -876,7 +864,7 @@ repeat:
876 * If all entries were removed before we could secure them, 864 * If all entries were removed before we could secure them,
877 * try again, because callers stop trying once 0 is returned. 865 * try again, because callers stop trying once 0 is returned.
878 */ 866 */
879 if (unlikely(!ret && nr_found)) 867 if (unlikely(!ret && nr_found > nr_skip))
880 goto restart; 868 goto restart;
881 rcu_read_unlock(); 869 rcu_read_unlock();
882 return ret; 870 return ret;
@@ -904,7 +892,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
904 rcu_read_lock(); 892 rcu_read_lock();
905restart: 893restart:
906 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 894 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
907 (void ***)pages, index, nr_pages); 895 (void ***)pages, NULL, index, nr_pages);
908 ret = 0; 896 ret = 0;
909 for (i = 0; i < nr_found; i++) { 897 for (i = 0; i < nr_found; i++) {
910 struct page *page; 898 struct page *page;
@@ -913,12 +901,22 @@ repeat:
913 if (unlikely(!page)) 901 if (unlikely(!page))
914 continue; 902 continue;
915 903
916 /* 904 if (radix_tree_exception(page)) {
917 * This can only trigger when the entry at index 0 moves out 905 if (radix_tree_deref_retry(page)) {
918 * of or back to the root: none yet gotten, safe to restart. 906 /*
919 */ 907 * Transient condition which can only trigger
920 if (radix_tree_deref_retry(page)) 908 * when entry at index 0 moves out of or back
921 goto restart; 909 * to root: none yet gotten, safe to restart.
910 */
911 goto restart;
912 }
913 /*
914 * Otherwise, shmem/tmpfs must be storing a swap entry
915 * here as an exceptional entry: so stop looking for
916 * contiguous pages.
917 */
918 break;
919 }
922 920
923 if (!page_cache_get_speculative(page)) 921 if (!page_cache_get_speculative(page))
924 goto repeat; 922 goto repeat;
@@ -978,12 +976,21 @@ repeat:
978 if (unlikely(!page)) 976 if (unlikely(!page))
979 continue; 977 continue;
980 978
981 /* 979 if (radix_tree_exception(page)) {
982 * This can only trigger when the entry at index 0 moves out 980 if (radix_tree_deref_retry(page)) {
983 * of or back to the root: none yet gotten, safe to restart. 981 /*
984 */ 982 * Transient condition which can only trigger
985 if (radix_tree_deref_retry(page)) 983 * when entry at index 0 moves out of or back
986 goto restart; 984 * to root: none yet gotten, safe to restart.
985 */
986 goto restart;
987 }
988 /*
989 * This function is never used on a shmem/tmpfs
990 * mapping, so a swap entry won't be found here.
991 */
992 BUG();
993 }
987 994
988 if (!page_cache_get_speculative(page)) 995 if (!page_cache_get_speculative(page))
989 goto repeat; 996 goto repeat;
@@ -1795,7 +1802,7 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
1795 1802
1796static struct page *__read_cache_page(struct address_space *mapping, 1803static struct page *__read_cache_page(struct address_space *mapping,
1797 pgoff_t index, 1804 pgoff_t index,
1798 int (*filler)(void *,struct page*), 1805 int (*filler)(void *, struct page *),
1799 void *data, 1806 void *data,
1800 gfp_t gfp) 1807 gfp_t gfp)
1801{ 1808{
@@ -1807,7 +1814,7 @@ repeat:
1807 page = __page_cache_alloc(gfp | __GFP_COLD); 1814 page = __page_cache_alloc(gfp | __GFP_COLD);
1808 if (!page) 1815 if (!page)
1809 return ERR_PTR(-ENOMEM); 1816 return ERR_PTR(-ENOMEM);
1810 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); 1817 err = add_to_page_cache_lru(page, mapping, index, gfp);
1811 if (unlikely(err)) { 1818 if (unlikely(err)) {
1812 page_cache_release(page); 1819 page_cache_release(page);
1813 if (err == -EEXIST) 1820 if (err == -EEXIST)
@@ -1826,7 +1833,7 @@ repeat:
1826 1833
1827static struct page *do_read_cache_page(struct address_space *mapping, 1834static struct page *do_read_cache_page(struct address_space *mapping,
1828 pgoff_t index, 1835 pgoff_t index,
1829 int (*filler)(void *,struct page*), 1836 int (*filler)(void *, struct page *),
1830 void *data, 1837 void *data,
1831 gfp_t gfp) 1838 gfp_t gfp)
1832 1839
@@ -1866,7 +1873,7 @@ out:
1866 * @mapping: the page's address_space 1873 * @mapping: the page's address_space
1867 * @index: the page index 1874 * @index: the page index
1868 * @filler: function to perform the read 1875 * @filler: function to perform the read
1869 * @data: destination for read data 1876 * @data: first arg to filler(data, page) function, often left as NULL
1870 * 1877 *
1871 * Same as read_cache_page, but don't wait for page to become unlocked 1878 * Same as read_cache_page, but don't wait for page to become unlocked
1872 * after submitting it to the filler. 1879 * after submitting it to the filler.
@@ -1878,7 +1885,7 @@ out:
1878 */ 1885 */
1879struct page *read_cache_page_async(struct address_space *mapping, 1886struct page *read_cache_page_async(struct address_space *mapping,
1880 pgoff_t index, 1887 pgoff_t index,
1881 int (*filler)(void *,struct page*), 1888 int (*filler)(void *, struct page *),
1882 void *data) 1889 void *data)
1883{ 1890{
1884 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); 1891 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
@@ -1904,10 +1911,7 @@ static struct page *wait_on_page_read(struct page *page)
1904 * @gfp: the page allocator flags to use if allocating 1911 * @gfp: the page allocator flags to use if allocating
1905 * 1912 *
1906 * This is the same as "read_mapping_page(mapping, index, NULL)", but with 1913 * This is the same as "read_mapping_page(mapping, index, NULL)", but with
1907 * any new page allocations done using the specified allocation flags. Note 1914 * any new page allocations done using the specified allocation flags.
1908 * that the Radix tree operations will still use GFP_KERNEL, so you can't
1909 * expect to do this atomically or anything like that - but you can pass in
1910 * other page requirements.
1911 * 1915 *
1912 * If the page does not get brought uptodate, return -EIO. 1916 * If the page does not get brought uptodate, return -EIO.
1913 */ 1917 */
@@ -1926,7 +1930,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
1926 * @mapping: the page's address_space 1930 * @mapping: the page's address_space
1927 * @index: the page index 1931 * @index: the page index
1928 * @filler: function to perform the read 1932 * @filler: function to perform the read
1929 * @data: destination for read data 1933 * @data: first arg to filler(data, page) function, often left as NULL
1930 * 1934 *
1931 * Read into the page cache. If a page already exists, and PageUptodate() is 1935 * Read into the page cache. If a page already exists, and PageUptodate() is
1932 * not set, try to fill the page then wait for it to become unlocked. 1936 * not set, try to fill the page then wait for it to become unlocked.
@@ -1935,7 +1939,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
1935 */ 1939 */
1936struct page *read_cache_page(struct address_space *mapping, 1940struct page *read_cache_page(struct address_space *mapping,
1937 pgoff_t index, 1941 pgoff_t index,
1938 int (*filler)(void *,struct page*), 1942 int (*filler)(void *, struct page *),
1939 void *data) 1943 void *data)
1940{ 1944{
1941 return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); 1945 return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));