aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2011-08-03 19:21:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-03 20:25:23 -0400
commit54af60421822bb9cb664dd5cd7aac46c01ccfcf8 (patch)
treeb62d2b9ccc2b467ee5406bb0d3494db6a5f96582 /mm/shmem.c
parent46f65ec15c6878a2b4a49f6e01b20b201b46a9e4 (diff)
tmpfs: convert shmem_getpage_gfp to radix-swap
Convert shmem_getpage_gfp(), the engine-room of shmem, to expect page or swap entry returned from radix tree by find_lock_page(). Whereas the repetitive old method proceeded mainly under info->lock, dropping and repeating whenever one of the conditions needed was not met, now we can proceed without it, leaving shmem_add_to_page_cache() to check for a race. This way there is no need to preallocate a page, no need for an early radix_tree_preload(), no need for mem_cgroup_shmem_charge_fallback(). Move the error unwinding down to the bottom instead of repeating it throughout. ENOSPC handling is a little different from before: there is no longer any race between find_lock_page() and finding swap, but we can arrive at ENOSPC before calling shmem_recalc_inode(), which might occasionally discover freed space. Be stricter to check i_size before returning. info->lock is used for little but alloced, swapped, i_blocks updates. Move i_blocks updates out from under the max_blocks check, so even an unlimited size=0 mount can show accurate du. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c259
1 files changed, 112 insertions, 147 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 174f97188e8a..92f01d7cc150 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -166,15 +166,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
166static LIST_HEAD(shmem_swaplist); 166static LIST_HEAD(shmem_swaplist);
167static DEFINE_MUTEX(shmem_swaplist_mutex); 167static DEFINE_MUTEX(shmem_swaplist_mutex);
168 168
169static void shmem_free_blocks(struct inode *inode, long pages)
170{
171 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
172 if (sbinfo->max_blocks) {
173 percpu_counter_add(&sbinfo->used_blocks, -pages);
174 inode->i_blocks -= pages*BLOCKS_PER_PAGE;
175 }
176}
177
178static int shmem_reserve_inode(struct super_block *sb) 169static int shmem_reserve_inode(struct super_block *sb)
179{ 170{
180 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 171 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
@@ -219,9 +210,12 @@ static void shmem_recalc_inode(struct inode *inode)
219 210
220 freed = info->alloced - info->swapped - inode->i_mapping->nrpages; 211 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
221 if (freed > 0) { 212 if (freed > 0) {
213 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
214 if (sbinfo->max_blocks)
215 percpu_counter_add(&sbinfo->used_blocks, -freed);
222 info->alloced -= freed; 216 info->alloced -= freed;
217 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
223 shmem_unacct_blocks(info->flags, freed); 218 shmem_unacct_blocks(info->flags, freed);
224 shmem_free_blocks(inode, freed);
225 } 219 }
226} 220}
227 221
@@ -888,205 +882,180 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
888 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) 882 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
889{ 883{
890 struct address_space *mapping = inode->i_mapping; 884 struct address_space *mapping = inode->i_mapping;
891 struct shmem_inode_info *info = SHMEM_I(inode); 885 struct shmem_inode_info *info;
892 struct shmem_sb_info *sbinfo; 886 struct shmem_sb_info *sbinfo;
893 struct page *page; 887 struct page *page;
894 struct page *prealloc_page = NULL;
895 swp_entry_t swap; 888 swp_entry_t swap;
896 int error; 889 int error;
890 int once = 0;
897 891
898 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT)) 892 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
899 return -EFBIG; 893 return -EFBIG;
900repeat: 894repeat:
895 swap.val = 0;
901 page = find_lock_page(mapping, index); 896 page = find_lock_page(mapping, index);
902 if (page) { 897 if (radix_tree_exceptional_entry(page)) {
898 swap = radix_to_swp_entry(page);
899 page = NULL;
900 }
901
902 if (sgp != SGP_WRITE &&
903 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
904 error = -EINVAL;
905 goto failed;
906 }
907
908 if (page || (sgp == SGP_READ && !swap.val)) {
903 /* 909 /*
904 * Once we can get the page lock, it must be uptodate: 910 * Once we can get the page lock, it must be uptodate:
905 * if there were an error in reading back from swap, 911 * if there were an error in reading back from swap,
906 * the page would not be inserted into the filecache. 912 * the page would not be inserted into the filecache.
907 */ 913 */
908 BUG_ON(!PageUptodate(page)); 914 BUG_ON(page && !PageUptodate(page));
909 goto done; 915 *pagep = page;
916 return 0;
910 } 917 }
911 918
912 /* 919 /*
913 * Try to preload while we can wait, to not make a habit of 920 * Fast cache lookup did not find it:
914 * draining atomic reserves; but don't latch on to this cpu. 921 * bring it back from swap or allocate.
915 */ 922 */
916 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); 923 info = SHMEM_I(inode);
917 if (error) 924 sbinfo = SHMEM_SB(inode->i_sb);
918 goto out;
919 radix_tree_preload_end();
920
921 if (sgp != SGP_READ && !prealloc_page) {
922 prealloc_page = shmem_alloc_page(gfp, info, index);
923 if (prealloc_page) {
924 SetPageSwapBacked(prealloc_page);
925 if (mem_cgroup_cache_charge(prealloc_page,
926 current->mm, GFP_KERNEL)) {
927 page_cache_release(prealloc_page);
928 prealloc_page = NULL;
929 }
930 }
931 }
932 925
933 spin_lock(&info->lock);
934 shmem_recalc_inode(inode);
935 swap = shmem_get_swap(info, index);
936 if (swap.val) { 926 if (swap.val) {
937 /* Look it up and read it in.. */ 927 /* Look it up and read it in.. */
938 page = lookup_swap_cache(swap); 928 page = lookup_swap_cache(swap);
939 if (!page) { 929 if (!page) {
940 spin_unlock(&info->lock);
941 /* here we actually do the io */ 930 /* here we actually do the io */
942 if (fault_type) 931 if (fault_type)
943 *fault_type |= VM_FAULT_MAJOR; 932 *fault_type |= VM_FAULT_MAJOR;
944 page = shmem_swapin(swap, gfp, info, index); 933 page = shmem_swapin(swap, gfp, info, index);
945 if (!page) { 934 if (!page) {
946 swp_entry_t nswap = shmem_get_swap(info, index); 935 error = -ENOMEM;
947 if (nswap.val == swap.val) { 936 goto failed;
948 error = -ENOMEM;
949 goto out;
950 }
951 goto repeat;
952 } 937 }
953 wait_on_page_locked(page);
954 page_cache_release(page);
955 goto repeat;
956 } 938 }
957 939
958 /* We have to do this with page locked to prevent races */ 940 /* We have to do this with page locked to prevent races */
959 if (!trylock_page(page)) { 941 lock_page(page);
960 spin_unlock(&info->lock);
961 wait_on_page_locked(page);
962 page_cache_release(page);
963 goto repeat;
964 }
965 if (PageWriteback(page)) {
966 spin_unlock(&info->lock);
967 wait_on_page_writeback(page);
968 unlock_page(page);
969 page_cache_release(page);
970 goto repeat;
971 }
972 if (!PageUptodate(page)) { 942 if (!PageUptodate(page)) {
973 spin_unlock(&info->lock);
974 unlock_page(page);
975 page_cache_release(page);
976 error = -EIO; 943 error = -EIO;
977 goto out; 944 goto failed;
978 } 945 }
979 946 wait_on_page_writeback(page);
980 error = add_to_page_cache_locked(page, mapping, 947
981 index, GFP_NOWAIT); 948 /* Someone may have already done it for us */
982 if (error) { 949 if (page->mapping) {
983 spin_unlock(&info->lock); 950 if (page->mapping == mapping &&
984 if (error == -ENOMEM) { 951 page->index == index)
985 /* 952 goto done;
986 * reclaim from proper memory cgroup and 953 error = -EEXIST;
987 * call memcg's OOM if needed. 954 goto failed;
988 */
989 error = mem_cgroup_shmem_charge_fallback(
990 page, current->mm, gfp);
991 if (error) {
992 unlock_page(page);
993 page_cache_release(page);
994 goto out;
995 }
996 }
997 unlock_page(page);
998 page_cache_release(page);
999 goto repeat;
1000 } 955 }
1001 956
1002 delete_from_swap_cache(page); 957 error = shmem_add_to_page_cache(page, mapping, index,
1003 shmem_put_swap(info, index, (swp_entry_t){0}); 958 gfp, swp_to_radix_entry(swap));
959 if (error)
960 goto failed;
961
962 spin_lock(&info->lock);
1004 info->swapped--; 963 info->swapped--;
964 shmem_recalc_inode(inode);
1005 spin_unlock(&info->lock); 965 spin_unlock(&info->lock);
966
967 delete_from_swap_cache(page);
1006 set_page_dirty(page); 968 set_page_dirty(page);
1007 swap_free(swap); 969 swap_free(swap);
1008 970
1009 } else if (sgp == SGP_READ) { 971 } else {
1010 page = find_get_page(mapping, index); 972 if (shmem_acct_block(info->flags)) {
1011 if (page && !trylock_page(page)) { 973 error = -ENOSPC;
1012 spin_unlock(&info->lock); 974 goto failed;
1013 wait_on_page_locked(page);
1014 page_cache_release(page);
1015 goto repeat;
1016 } 975 }
1017 spin_unlock(&info->lock);
1018
1019 } else if (prealloc_page) {
1020 sbinfo = SHMEM_SB(inode->i_sb);
1021 if (sbinfo->max_blocks) { 976 if (sbinfo->max_blocks) {
1022 if (percpu_counter_compare(&sbinfo->used_blocks, 977 if (percpu_counter_compare(&sbinfo->used_blocks,
1023 sbinfo->max_blocks) >= 0 || 978 sbinfo->max_blocks) >= 0) {
1024 shmem_acct_block(info->flags)) 979 error = -ENOSPC;
1025 goto nospace; 980 goto unacct;
981 }
1026 percpu_counter_inc(&sbinfo->used_blocks); 982 percpu_counter_inc(&sbinfo->used_blocks);
1027 inode->i_blocks += BLOCKS_PER_PAGE; 983 }
1028 } else if (shmem_acct_block(info->flags))
1029 goto nospace;
1030
1031 page = prealloc_page;
1032 prealloc_page = NULL;
1033 984
1034 swap = shmem_get_swap(info, index); 985 page = shmem_alloc_page(gfp, info, index);
1035 if (swap.val) 986 if (!page) {
1036 mem_cgroup_uncharge_cache_page(page); 987 error = -ENOMEM;
1037 else 988 goto decused;
1038 error = add_to_page_cache_lru(page, mapping,
1039 index, GFP_NOWAIT);
1040 /*
1041 * At add_to_page_cache_lru() failure,
1042 * uncharge will be done automatically.
1043 */
1044 if (swap.val || error) {
1045 shmem_unacct_blocks(info->flags, 1);
1046 shmem_free_blocks(inode, 1);
1047 spin_unlock(&info->lock);
1048 page_cache_release(page);
1049 goto repeat;
1050 } 989 }
1051 990
991 SetPageSwapBacked(page);
992 __set_page_locked(page);
993 error = shmem_add_to_page_cache(page, mapping, index,
994 gfp, NULL);
995 if (error)
996 goto decused;
997 lru_cache_add_anon(page);
998
999 spin_lock(&info->lock);
1052 info->alloced++; 1000 info->alloced++;
1001 inode->i_blocks += BLOCKS_PER_PAGE;
1002 shmem_recalc_inode(inode);
1053 spin_unlock(&info->lock); 1003 spin_unlock(&info->lock);
1004
1054 clear_highpage(page); 1005 clear_highpage(page);
1055 flush_dcache_page(page); 1006 flush_dcache_page(page);
1056 SetPageUptodate(page); 1007 SetPageUptodate(page);
1057 if (sgp == SGP_DIRTY) 1008 if (sgp == SGP_DIRTY)
1058 set_page_dirty(page); 1009 set_page_dirty(page);
1059
1060 } else {
1061 spin_unlock(&info->lock);
1062 error = -ENOMEM;
1063 goto out;
1064 } 1010 }
1065done: 1011done:
1066 *pagep = page; 1012 /* Perhaps the file has been truncated since we checked */
1067 error = 0; 1013 if (sgp != SGP_WRITE &&
1068out: 1014 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1069 if (prealloc_page) { 1015 error = -EINVAL;
1070 mem_cgroup_uncharge_cache_page(prealloc_page); 1016 goto trunc;
1071 page_cache_release(prealloc_page);
1072 } 1017 }
1073 return error; 1018 *pagep = page;
1019 return 0;
1074 1020
1075nospace:
1076 /* 1021 /*
1077 * Perhaps the page was brought in from swap between find_lock_page 1022 * Error recovery.
1078 * and taking info->lock? We allow for that at add_to_page_cache_lru,
1079 * but must also avoid reporting a spurious ENOSPC while working on a
1080 * full tmpfs.
1081 */ 1023 */
1082 page = find_get_page(mapping, index); 1024trunc:
1025 ClearPageDirty(page);
1026 delete_from_page_cache(page);
1027 spin_lock(&info->lock);
1028 info->alloced--;
1029 inode->i_blocks -= BLOCKS_PER_PAGE;
1083 spin_unlock(&info->lock); 1030 spin_unlock(&info->lock);
1031decused:
1032 if (sbinfo->max_blocks)
1033 percpu_counter_add(&sbinfo->used_blocks, -1);
1034unacct:
1035 shmem_unacct_blocks(info->flags, 1);
1036failed:
1037 if (swap.val && error != -EINVAL) {
1038 struct page *test = find_get_page(mapping, index);
1039 if (test && !radix_tree_exceptional_entry(test))
1040 page_cache_release(test);
1041 /* Have another try if the entry has changed */
1042 if (test != swp_to_radix_entry(swap))
1043 error = -EEXIST;
1044 }
1084 if (page) { 1045 if (page) {
1046 unlock_page(page);
1085 page_cache_release(page); 1047 page_cache_release(page);
1048 }
1049 if (error == -ENOSPC && !once++) {
1050 info = SHMEM_I(inode);
1051 spin_lock(&info->lock);
1052 shmem_recalc_inode(inode);
1053 spin_unlock(&info->lock);
1086 goto repeat; 1054 goto repeat;
1087 } 1055 }
1088 error = -ENOSPC; 1056 if (error == -EEXIST)
1089 goto out; 1057 goto repeat;
1058 return error;
1090} 1059}
1091 1060
1092static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1061static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -1095,9 +1064,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1095 int error; 1064 int error;
1096 int ret = VM_FAULT_LOCKED; 1065 int ret = VM_FAULT_LOCKED;
1097 1066
1098 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1099 return VM_FAULT_SIGBUS;
1100
1101 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); 1067 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1102 if (error) 1068 if (error)
1103 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 1069 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -2164,8 +2130,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2164 if (config.max_inodes < inodes) 2130 if (config.max_inodes < inodes)
2165 goto out; 2131 goto out;
2166 /* 2132 /*
2167 * Those tests also disallow limited->unlimited while any are in 2133 * Those tests disallow limited->unlimited while any are in use;
2168 * use, so i_blocks will always be zero when max_blocks is zero;
2169 * but we must separately disallow unlimited->limited, because 2134 * but we must separately disallow unlimited->limited, because
2170 * in that case we have no record of how much is already in use. 2135 * in that case we have no record of how much is already in use.
2171 */ 2136 */