diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 160 |
1 files changed, 82 insertions, 78 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index a8251a8d345..0eedbf85062 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/cpuset.h> | 33 | #include <linux/cpuset.h> |
34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ | 34 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
35 | #include <linux/memcontrol.h> | 35 | #include <linux/memcontrol.h> |
36 | #include <linux/mm_inline.h> /* for page_is_file_cache() */ | ||
37 | #include <linux/cleancache.h> | 36 | #include <linux/cleancache.h> |
38 | #include "internal.h" | 37 | #include "internal.h" |
39 | 38 | ||
@@ -78,10 +77,7 @@ | |||
78 | * ->i_mutex (generic_file_buffered_write) | 77 | * ->i_mutex (generic_file_buffered_write) |
79 | * ->mmap_sem (fault_in_pages_readable->do_page_fault) | 78 | * ->mmap_sem (fault_in_pages_readable->do_page_fault) |
80 | * | 79 | * |
81 | * ->i_mutex | 80 | * bdi->wb.list_lock |
82 | * ->i_alloc_sem (various) | ||
83 | * | ||
84 | * inode_wb_list_lock | ||
85 | * sb_lock (fs/fs-writeback.c) | 81 | * sb_lock (fs/fs-writeback.c) |
86 | * ->mapping->tree_lock (__sync_single_inode) | 82 | * ->mapping->tree_lock (__sync_single_inode) |
87 | * | 83 | * |
@@ -99,9 +95,9 @@ | |||
99 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) | 95 | * ->zone.lru_lock (check_pte_range->isolate_lru_page) |
100 | * ->private_lock (page_remove_rmap->set_page_dirty) | 96 | * ->private_lock (page_remove_rmap->set_page_dirty) |
101 | * ->tree_lock (page_remove_rmap->set_page_dirty) | 97 | * ->tree_lock (page_remove_rmap->set_page_dirty) |
102 | * inode_wb_list_lock (page_remove_rmap->set_page_dirty) | 98 | * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) |
103 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) | 99 | * ->inode->i_lock (page_remove_rmap->set_page_dirty) |
104 | * inode_wb_list_lock (zap_pte_range->set_page_dirty) | 100 | * bdi.wb->list_lock (zap_pte_range->set_page_dirty) |
105 | * ->inode->i_lock (zap_pte_range->set_page_dirty) | 101 | * ->inode->i_lock (zap_pte_range->set_page_dirty) |
106 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) | 102 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
107 | * | 103 | * |
@@ -131,6 +127,7 @@ void __delete_from_page_cache(struct page *page) | |||
131 | 127 | ||
132 | radix_tree_delete(&mapping->page_tree, page->index); | 128 | radix_tree_delete(&mapping->page_tree, page->index); |
133 | page->mapping = NULL; | 129 | page->mapping = NULL; |
130 | /* Leave page->index set: truncation lookup relies upon it */ | ||
134 | mapping->nrpages--; | 131 | mapping->nrpages--; |
135 | __dec_zone_page_state(page, NR_FILE_PAGES); | 132 | __dec_zone_page_state(page, NR_FILE_PAGES); |
136 | if (PageSwapBacked(page)) | 133 | if (PageSwapBacked(page)) |
@@ -396,24 +393,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range); | |||
396 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | 393 | int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) |
397 | { | 394 | { |
398 | int error; | 395 | int error; |
399 | struct mem_cgroup *memcg = NULL; | ||
400 | 396 | ||
401 | VM_BUG_ON(!PageLocked(old)); | 397 | VM_BUG_ON(!PageLocked(old)); |
402 | VM_BUG_ON(!PageLocked(new)); | 398 | VM_BUG_ON(!PageLocked(new)); |
403 | VM_BUG_ON(new->mapping); | 399 | VM_BUG_ON(new->mapping); |
404 | 400 | ||
405 | /* | ||
406 | * This is not page migration, but prepare_migration and | ||
407 | * end_migration does enough work for charge replacement. | ||
408 | * | ||
409 | * In the longer term we probably want a specialized function | ||
410 | * for moving the charge from old to new in a more efficient | ||
411 | * manner. | ||
412 | */ | ||
413 | error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask); | ||
414 | if (error) | ||
415 | return error; | ||
416 | |||
417 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); | 401 | error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); |
418 | if (!error) { | 402 | if (!error) { |
419 | struct address_space *mapping = old->mapping; | 403 | struct address_space *mapping = old->mapping; |
@@ -435,13 +419,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) | |||
435 | if (PageSwapBacked(new)) | 419 | if (PageSwapBacked(new)) |
436 | __inc_zone_page_state(new, NR_SHMEM); | 420 | __inc_zone_page_state(new, NR_SHMEM); |
437 | spin_unlock_irq(&mapping->tree_lock); | 421 | spin_unlock_irq(&mapping->tree_lock); |
422 | /* mem_cgroup codes must not be called under tree_lock */ | ||
423 | mem_cgroup_replace_page_cache(old, new); | ||
438 | radix_tree_preload_end(); | 424 | radix_tree_preload_end(); |
439 | if (freepage) | 425 | if (freepage) |
440 | freepage(old); | 426 | freepage(old); |
441 | page_cache_release(old); | 427 | page_cache_release(old); |
442 | mem_cgroup_end_migration(memcg, old, new, true); | ||
443 | } else { | ||
444 | mem_cgroup_end_migration(memcg, old, new, false); | ||
445 | } | 428 | } |
446 | 429 | ||
447 | return error; | 430 | return error; |
@@ -464,6 +447,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
464 | int error; | 447 | int error; |
465 | 448 | ||
466 | VM_BUG_ON(!PageLocked(page)); | 449 | VM_BUG_ON(!PageLocked(page)); |
450 | VM_BUG_ON(PageSwapBacked(page)); | ||
467 | 451 | ||
468 | error = mem_cgroup_cache_charge(page, current->mm, | 452 | error = mem_cgroup_cache_charge(page, current->mm, |
469 | gfp_mask & GFP_RECLAIM_MASK); | 453 | gfp_mask & GFP_RECLAIM_MASK); |
@@ -481,11 +465,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
481 | if (likely(!error)) { | 465 | if (likely(!error)) { |
482 | mapping->nrpages++; | 466 | mapping->nrpages++; |
483 | __inc_zone_page_state(page, NR_FILE_PAGES); | 467 | __inc_zone_page_state(page, NR_FILE_PAGES); |
484 | if (PageSwapBacked(page)) | ||
485 | __inc_zone_page_state(page, NR_SHMEM); | ||
486 | spin_unlock_irq(&mapping->tree_lock); | 468 | spin_unlock_irq(&mapping->tree_lock); |
487 | } else { | 469 | } else { |
488 | page->mapping = NULL; | 470 | page->mapping = NULL; |
471 | /* Leave page->index set: truncation relies upon it */ | ||
489 | spin_unlock_irq(&mapping->tree_lock); | 472 | spin_unlock_irq(&mapping->tree_lock); |
490 | mem_cgroup_uncharge_cache_page(page); | 473 | mem_cgroup_uncharge_cache_page(page); |
491 | page_cache_release(page); | 474 | page_cache_release(page); |
@@ -503,22 +486,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | |||
503 | { | 486 | { |
504 | int ret; | 487 | int ret; |
505 | 488 | ||
506 | /* | ||
507 | * Splice_read and readahead add shmem/tmpfs pages into the page cache | ||
508 | * before shmem_readpage has a chance to mark them as SwapBacked: they | ||
509 | * need to go on the anon lru below, and mem_cgroup_cache_charge | ||
510 | * (called in add_to_page_cache) needs to know where they're going too. | ||
511 | */ | ||
512 | if (mapping_cap_swap_backed(mapping)) | ||
513 | SetPageSwapBacked(page); | ||
514 | |||
515 | ret = add_to_page_cache(page, mapping, offset, gfp_mask); | 489 | ret = add_to_page_cache(page, mapping, offset, gfp_mask); |
516 | if (ret == 0) { | 490 | if (ret == 0) |
517 | if (page_is_file_cache(page)) | 491 | lru_cache_add_file(page); |
518 | lru_cache_add_file(page); | ||
519 | else | ||
520 | lru_cache_add_anon(page); | ||
521 | } | ||
522 | return ret; | 492 | return ret; |
523 | } | 493 | } |
524 | EXPORT_SYMBOL_GPL(add_to_page_cache_lru); | 494 | EXPORT_SYMBOL_GPL(add_to_page_cache_lru); |
@@ -715,9 +685,16 @@ repeat: | |||
715 | page = radix_tree_deref_slot(pagep); | 685 | page = radix_tree_deref_slot(pagep); |
716 | if (unlikely(!page)) | 686 | if (unlikely(!page)) |
717 | goto out; | 687 | goto out; |
718 | if (radix_tree_deref_retry(page)) | 688 | if (radix_tree_exception(page)) { |
719 | goto repeat; | 689 | if (radix_tree_deref_retry(page)) |
720 | 690 | goto repeat; | |
691 | /* | ||
692 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
693 | * here as an exceptional entry: so return it without | ||
694 | * attempting to raise page count. | ||
695 | */ | ||
696 | goto out; | ||
697 | } | ||
721 | if (!page_cache_get_speculative(page)) | 698 | if (!page_cache_get_speculative(page)) |
722 | goto repeat; | 699 | goto repeat; |
723 | 700 | ||
@@ -754,7 +731,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) | |||
754 | 731 | ||
755 | repeat: | 732 | repeat: |
756 | page = find_get_page(mapping, offset); | 733 | page = find_get_page(mapping, offset); |
757 | if (page) { | 734 | if (page && !radix_tree_exception(page)) { |
758 | lock_page(page); | 735 | lock_page(page); |
759 | /* Has the page been truncated? */ | 736 | /* Has the page been truncated? */ |
760 | if (unlikely(page->mapping != mapping)) { | 737 | if (unlikely(page->mapping != mapping)) { |
@@ -836,13 +813,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, | |||
836 | { | 813 | { |
837 | unsigned int i; | 814 | unsigned int i; |
838 | unsigned int ret; | 815 | unsigned int ret; |
839 | unsigned int nr_found; | 816 | unsigned int nr_found, nr_skip; |
840 | 817 | ||
841 | rcu_read_lock(); | 818 | rcu_read_lock(); |
842 | restart: | 819 | restart: |
843 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 820 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, |
844 | (void ***)pages, start, nr_pages); | 821 | (void ***)pages, NULL, start, nr_pages); |
845 | ret = 0; | 822 | ret = 0; |
823 | nr_skip = 0; | ||
846 | for (i = 0; i < nr_found; i++) { | 824 | for (i = 0; i < nr_found; i++) { |
847 | struct page *page; | 825 | struct page *page; |
848 | repeat: | 826 | repeat: |
@@ -850,13 +828,23 @@ repeat: | |||
850 | if (unlikely(!page)) | 828 | if (unlikely(!page)) |
851 | continue; | 829 | continue; |
852 | 830 | ||
853 | /* | 831 | if (radix_tree_exception(page)) { |
854 | * This can only trigger when the entry at index 0 moves out | 832 | if (radix_tree_deref_retry(page)) { |
855 | * of or back to the root: none yet gotten, safe to restart. | 833 | /* |
856 | */ | 834 | * Transient condition which can only trigger |
857 | if (radix_tree_deref_retry(page)) { | 835 | * when entry at index 0 moves out of or back |
858 | WARN_ON(start | i); | 836 | * to root: none yet gotten, safe to restart. |
859 | goto restart; | 837 | */ |
838 | WARN_ON(start | i); | ||
839 | goto restart; | ||
840 | } | ||
841 | /* | ||
842 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
843 | * here as an exceptional entry: so skip over it - | ||
844 | * we only reach this from invalidate_mapping_pages(). | ||
845 | */ | ||
846 | nr_skip++; | ||
847 | continue; | ||
860 | } | 848 | } |
861 | 849 | ||
862 | if (!page_cache_get_speculative(page)) | 850 | if (!page_cache_get_speculative(page)) |
@@ -876,7 +864,7 @@ repeat: | |||
876 | * If all entries were removed before we could secure them, | 864 | * If all entries were removed before we could secure them, |
877 | * try again, because callers stop trying once 0 is returned. | 865 | * try again, because callers stop trying once 0 is returned. |
878 | */ | 866 | */ |
879 | if (unlikely(!ret && nr_found)) | 867 | if (unlikely(!ret && nr_found > nr_skip)) |
880 | goto restart; | 868 | goto restart; |
881 | rcu_read_unlock(); | 869 | rcu_read_unlock(); |
882 | return ret; | 870 | return ret; |
@@ -904,7 +892,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | |||
904 | rcu_read_lock(); | 892 | rcu_read_lock(); |
905 | restart: | 893 | restart: |
906 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, | 894 | nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, |
907 | (void ***)pages, index, nr_pages); | 895 | (void ***)pages, NULL, index, nr_pages); |
908 | ret = 0; | 896 | ret = 0; |
909 | for (i = 0; i < nr_found; i++) { | 897 | for (i = 0; i < nr_found; i++) { |
910 | struct page *page; | 898 | struct page *page; |
@@ -913,12 +901,22 @@ repeat: | |||
913 | if (unlikely(!page)) | 901 | if (unlikely(!page)) |
914 | continue; | 902 | continue; |
915 | 903 | ||
916 | /* | 904 | if (radix_tree_exception(page)) { |
917 | * This can only trigger when the entry at index 0 moves out | 905 | if (radix_tree_deref_retry(page)) { |
918 | * of or back to the root: none yet gotten, safe to restart. | 906 | /* |
919 | */ | 907 | * Transient condition which can only trigger |
920 | if (radix_tree_deref_retry(page)) | 908 | * when entry at index 0 moves out of or back |
921 | goto restart; | 909 | * to root: none yet gotten, safe to restart. |
910 | */ | ||
911 | goto restart; | ||
912 | } | ||
913 | /* | ||
914 | * Otherwise, shmem/tmpfs must be storing a swap entry | ||
915 | * here as an exceptional entry: so stop looking for | ||
916 | * contiguous pages. | ||
917 | */ | ||
918 | break; | ||
919 | } | ||
922 | 920 | ||
923 | if (!page_cache_get_speculative(page)) | 921 | if (!page_cache_get_speculative(page)) |
924 | goto repeat; | 922 | goto repeat; |
@@ -978,12 +976,21 @@ repeat: | |||
978 | if (unlikely(!page)) | 976 | if (unlikely(!page)) |
979 | continue; | 977 | continue; |
980 | 978 | ||
981 | /* | 979 | if (radix_tree_exception(page)) { |
982 | * This can only trigger when the entry at index 0 moves out | 980 | if (radix_tree_deref_retry(page)) { |
983 | * of or back to the root: none yet gotten, safe to restart. | 981 | /* |
984 | */ | 982 | * Transient condition which can only trigger |
985 | if (radix_tree_deref_retry(page)) | 983 | * when entry at index 0 moves out of or back |
986 | goto restart; | 984 | * to root: none yet gotten, safe to restart. |
985 | */ | ||
986 | goto restart; | ||
987 | } | ||
988 | /* | ||
989 | * This function is never used on a shmem/tmpfs | ||
990 | * mapping, so a swap entry won't be found here. | ||
991 | */ | ||
992 | BUG(); | ||
993 | } | ||
987 | 994 | ||
988 | if (!page_cache_get_speculative(page)) | 995 | if (!page_cache_get_speculative(page)) |
989 | goto repeat; | 996 | goto repeat; |
@@ -1795,7 +1802,7 @@ EXPORT_SYMBOL(generic_file_readonly_mmap); | |||
1795 | 1802 | ||
1796 | static struct page *__read_cache_page(struct address_space *mapping, | 1803 | static struct page *__read_cache_page(struct address_space *mapping, |
1797 | pgoff_t index, | 1804 | pgoff_t index, |
1798 | int (*filler)(void *,struct page*), | 1805 | int (*filler)(void *, struct page *), |
1799 | void *data, | 1806 | void *data, |
1800 | gfp_t gfp) | 1807 | gfp_t gfp) |
1801 | { | 1808 | { |
@@ -1807,7 +1814,7 @@ repeat: | |||
1807 | page = __page_cache_alloc(gfp | __GFP_COLD); | 1814 | page = __page_cache_alloc(gfp | __GFP_COLD); |
1808 | if (!page) | 1815 | if (!page) |
1809 | return ERR_PTR(-ENOMEM); | 1816 | return ERR_PTR(-ENOMEM); |
1810 | err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); | 1817 | err = add_to_page_cache_lru(page, mapping, index, gfp); |
1811 | if (unlikely(err)) { | 1818 | if (unlikely(err)) { |
1812 | page_cache_release(page); | 1819 | page_cache_release(page); |
1813 | if (err == -EEXIST) | 1820 | if (err == -EEXIST) |
@@ -1826,7 +1833,7 @@ repeat: | |||
1826 | 1833 | ||
1827 | static struct page *do_read_cache_page(struct address_space *mapping, | 1834 | static struct page *do_read_cache_page(struct address_space *mapping, |
1828 | pgoff_t index, | 1835 | pgoff_t index, |
1829 | int (*filler)(void *,struct page*), | 1836 | int (*filler)(void *, struct page *), |
1830 | void *data, | 1837 | void *data, |
1831 | gfp_t gfp) | 1838 | gfp_t gfp) |
1832 | 1839 | ||
@@ -1866,7 +1873,7 @@ out: | |||
1866 | * @mapping: the page's address_space | 1873 | * @mapping: the page's address_space |
1867 | * @index: the page index | 1874 | * @index: the page index |
1868 | * @filler: function to perform the read | 1875 | * @filler: function to perform the read |
1869 | * @data: destination for read data | 1876 | * @data: first arg to filler(data, page) function, often left as NULL |
1870 | * | 1877 | * |
1871 | * Same as read_cache_page, but don't wait for page to become unlocked | 1878 | * Same as read_cache_page, but don't wait for page to become unlocked |
1872 | * after submitting it to the filler. | 1879 | * after submitting it to the filler. |
@@ -1878,7 +1885,7 @@ out: | |||
1878 | */ | 1885 | */ |
1879 | struct page *read_cache_page_async(struct address_space *mapping, | 1886 | struct page *read_cache_page_async(struct address_space *mapping, |
1880 | pgoff_t index, | 1887 | pgoff_t index, |
1881 | int (*filler)(void *,struct page*), | 1888 | int (*filler)(void *, struct page *), |
1882 | void *data) | 1889 | void *data) |
1883 | { | 1890 | { |
1884 | return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); | 1891 | return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); |
@@ -1904,10 +1911,7 @@ static struct page *wait_on_page_read(struct page *page) | |||
1904 | * @gfp: the page allocator flags to use if allocating | 1911 | * @gfp: the page allocator flags to use if allocating |
1905 | * | 1912 | * |
1906 | * This is the same as "read_mapping_page(mapping, index, NULL)", but with | 1913 | * This is the same as "read_mapping_page(mapping, index, NULL)", but with |
1907 | * any new page allocations done using the specified allocation flags. Note | 1914 | * any new page allocations done using the specified allocation flags. |
1908 | * that the Radix tree operations will still use GFP_KERNEL, so you can't | ||
1909 | * expect to do this atomically or anything like that - but you can pass in | ||
1910 | * other page requirements. | ||
1911 | * | 1915 | * |
1912 | * If the page does not get brought uptodate, return -EIO. | 1916 | * If the page does not get brought uptodate, return -EIO. |
1913 | */ | 1917 | */ |
@@ -1926,7 +1930,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); | |||
1926 | * @mapping: the page's address_space | 1930 | * @mapping: the page's address_space |
1927 | * @index: the page index | 1931 | * @index: the page index |
1928 | * @filler: function to perform the read | 1932 | * @filler: function to perform the read |
1929 | * @data: destination for read data | 1933 | * @data: first arg to filler(data, page) function, often left as NULL |
1930 | * | 1934 | * |
1931 | * Read into the page cache. If a page already exists, and PageUptodate() is | 1935 | * Read into the page cache. If a page already exists, and PageUptodate() is |
1932 | * not set, try to fill the page then wait for it to become unlocked. | 1936 | * not set, try to fill the page then wait for it to become unlocked. |
@@ -1935,7 +1939,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); | |||
1935 | */ | 1939 | */ |
1936 | struct page *read_cache_page(struct address_space *mapping, | 1940 | struct page *read_cache_page(struct address_space *mapping, |
1937 | pgoff_t index, | 1941 | pgoff_t index, |
1938 | int (*filler)(void *,struct page*), | 1942 | int (*filler)(void *, struct page *), |
1939 | void *data) | 1943 | void *data) |
1940 | { | 1944 | { |
1941 | return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); | 1945 | return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); |