aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c149
1 files changed, 83 insertions, 66 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 8fa27e4e582a..dfc7069102ee 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_
852 852
853static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) 853static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
854{ 854{
855 struct inode *inode; 855 struct address_space *mapping;
856 unsigned long idx; 856 unsigned long idx;
857 unsigned long size; 857 unsigned long size;
858 unsigned long limit; 858 unsigned long limit;
@@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
875 if (size > SHMEM_NR_DIRECT) 875 if (size > SHMEM_NR_DIRECT)
876 size = SHMEM_NR_DIRECT; 876 size = SHMEM_NR_DIRECT;
877 offset = shmem_find_swp(entry, ptr, ptr+size); 877 offset = shmem_find_swp(entry, ptr, ptr+size);
878 if (offset >= 0) 878 if (offset >= 0) {
879 shmem_swp_balance_unmap();
879 goto found; 880 goto found;
881 }
880 if (!info->i_indirect) 882 if (!info->i_indirect)
881 goto lost2; 883 goto lost2;
882 884
@@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
914 if (size > ENTRIES_PER_PAGE) 916 if (size > ENTRIES_PER_PAGE)
915 size = ENTRIES_PER_PAGE; 917 size = ENTRIES_PER_PAGE;
916 offset = shmem_find_swp(entry, ptr, ptr+size); 918 offset = shmem_find_swp(entry, ptr, ptr+size);
917 shmem_swp_unmap(ptr);
918 if (offset >= 0) { 919 if (offset >= 0) {
919 shmem_dir_unmap(dir); 920 shmem_dir_unmap(dir);
920 goto found; 921 goto found;
921 } 922 }
923 shmem_swp_unmap(ptr);
922 } 924 }
923 } 925 }
924lost1: 926lost1:
@@ -928,8 +930,7 @@ lost2:
928 return 0; 930 return 0;
929found: 931found:
930 idx += offset; 932 idx += offset;
931 inode = igrab(&info->vfs_inode); 933 ptr += offset;
932 spin_unlock(&info->lock);
933 934
934 /* 935 /*
935 * Move _head_ to start search for next from here. 936 * Move _head_ to start search for next from here.
@@ -940,37 +941,18 @@ found:
940 */ 941 */
941 if (shmem_swaplist.next != &info->swaplist) 942 if (shmem_swaplist.next != &info->swaplist)
942 list_move_tail(&shmem_swaplist, &info->swaplist); 943 list_move_tail(&shmem_swaplist, &info->swaplist);
943 mutex_unlock(&shmem_swaplist_mutex);
944 944
945 error = 1;
946 if (!inode)
947 goto out;
948 /* 945 /*
949 * Charge page using GFP_KERNEL while we can wait. 946 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
950 * Charged back to the user(not to caller) when swap account is used. 947 * but also to hold up shmem_evict_inode(): so inode cannot be freed
951 * add_to_page_cache() will be called with GFP_NOWAIT. 948 * beneath us (pagelock doesn't help until the page is in pagecache).
952 */ 949 */
953 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 950 mapping = info->vfs_inode.i_mapping;
954 if (error) 951 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
955 goto out; 952 /* which does mem_cgroup_uncharge_cache_page on error */
956 error = radix_tree_preload(GFP_KERNEL);
957 if (error) {
958 mem_cgroup_uncharge_cache_page(page);
959 goto out;
960 }
961 error = 1;
962
963 spin_lock(&info->lock);
964 ptr = shmem_swp_entry(info, idx, NULL);
965 if (ptr && ptr->val == entry.val) {
966 error = add_to_page_cache_locked(page, inode->i_mapping,
967 idx, GFP_NOWAIT);
968 /* does mem_cgroup_uncharge_cache_page on error */
969 } else /* we must compensate for our precharge above */
970 mem_cgroup_uncharge_cache_page(page);
971 953
972 if (error == -EEXIST) { 954 if (error == -EEXIST) {
973 struct page *filepage = find_get_page(inode->i_mapping, idx); 955 struct page *filepage = find_get_page(mapping, idx);
974 error = 1; 956 error = 1;
975 if (filepage) { 957 if (filepage) {
976 /* 958 /*
@@ -990,14 +972,8 @@ found:
990 swap_free(entry); 972 swap_free(entry);
991 error = 1; /* not an error, but entry was found */ 973 error = 1; /* not an error, but entry was found */
992 } 974 }
993 if (ptr) 975 shmem_swp_unmap(ptr);
994 shmem_swp_unmap(ptr);
995 spin_unlock(&info->lock); 976 spin_unlock(&info->lock);
996 radix_tree_preload_end();
997out:
998 unlock_page(page);
999 page_cache_release(page);
1000 iput(inode); /* allows for NULL */
1001 return error; 977 return error;
1002} 978}
1003 979
@@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
1009 struct list_head *p, *next; 985 struct list_head *p, *next;
1010 struct shmem_inode_info *info; 986 struct shmem_inode_info *info;
1011 int found = 0; 987 int found = 0;
988 int error;
989
990 /*
991 * Charge page using GFP_KERNEL while we can wait, before taking
992 * the shmem_swaplist_mutex which might hold up shmem_writepage().
993 * Charged back to the user (not to caller) when swap account is used.
994 * add_to_page_cache() will be called with GFP_NOWAIT.
995 */
996 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
997 if (error)
998 goto out;
999 /*
1000 * Try to preload while we can wait, to not make a habit of
1001 * draining atomic reserves; but don't latch on to this cpu,
1002 * it's okay if sometimes we get rescheduled after this.
1003 */
1004 error = radix_tree_preload(GFP_KERNEL);
1005 if (error)
1006 goto uncharge;
1007 radix_tree_preload_end();
1012 1008
1013 mutex_lock(&shmem_swaplist_mutex); 1009 mutex_lock(&shmem_swaplist_mutex);
1014 list_for_each_safe(p, next, &shmem_swaplist) { 1010 list_for_each_safe(p, next, &shmem_swaplist) {
@@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
1016 found = shmem_unuse_inode(info, entry, page); 1012 found = shmem_unuse_inode(info, entry, page);
1017 cond_resched(); 1013 cond_resched();
1018 if (found) 1014 if (found)
1019 goto out; 1015 break;
1020 } 1016 }
1021 mutex_unlock(&shmem_swaplist_mutex); 1017 mutex_unlock(&shmem_swaplist_mutex);
1022 /* 1018
1023 * Can some race bring us here? We've been holding page lock, 1019uncharge:
1024 * so I think not; but would rather try again later than BUG() 1020 if (!found)
1025 */ 1021 mem_cgroup_uncharge_cache_page(page);
1022 if (found < 0)
1023 error = found;
1024out:
1026 unlock_page(page); 1025 unlock_page(page);
1027 page_cache_release(page); 1026 page_cache_release(page);
1028out: 1027 return error;
1029 return (found < 0) ? found : 0;
1030} 1028}
1031 1029
1032/* 1030/*
@@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1064 else 1062 else
1065 swap.val = 0; 1063 swap.val = 0;
1066 1064
1065 /*
1066 * Add inode to shmem_unuse()'s list of swapped-out inodes,
1067 * if it's not already there. Do it now because we cannot take
1068 * mutex while holding spinlock, and must do so before the page
1069 * is moved to swap cache, when its pagelock no longer protects
1070 * the inode from eviction. But don't unlock the mutex until
1071 * we've taken the spinlock, because shmem_unuse_inode() will
1072 * prune a !swapped inode from the swaplist under both locks.
1073 */
1074 if (swap.val) {
1075 mutex_lock(&shmem_swaplist_mutex);
1076 if (list_empty(&info->swaplist))
1077 list_add_tail(&info->swaplist, &shmem_swaplist);
1078 }
1079
1067 spin_lock(&info->lock); 1080 spin_lock(&info->lock);
1081 if (swap.val)
1082 mutex_unlock(&shmem_swaplist_mutex);
1083
1068 if (index >= info->next_index) { 1084 if (index >= info->next_index) {
1069 BUG_ON(!(info->flags & SHMEM_TRUNCATE)); 1085 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
1070 goto unlock; 1086 goto unlock;
@@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1084 delete_from_page_cache(page); 1100 delete_from_page_cache(page);
1085 shmem_swp_set(info, entry, swap.val); 1101 shmem_swp_set(info, entry, swap.val);
1086 shmem_swp_unmap(entry); 1102 shmem_swp_unmap(entry);
1087 if (list_empty(&info->swaplist))
1088 inode = igrab(inode);
1089 else
1090 inode = NULL;
1091 spin_unlock(&info->lock); 1103 spin_unlock(&info->lock);
1092 swap_shmem_alloc(swap); 1104 swap_shmem_alloc(swap);
1093 BUG_ON(page_mapped(page)); 1105 BUG_ON(page_mapped(page));
1094 swap_writepage(page, wbc); 1106 swap_writepage(page, wbc);
1095 if (inode) {
1096 mutex_lock(&shmem_swaplist_mutex);
1097 /* move instead of add in case we're racing */
1098 list_move_tail(&info->swaplist, &shmem_swaplist);
1099 mutex_unlock(&shmem_swaplist_mutex);
1100 iput(inode);
1101 }
1102 return 0; 1107 return 0;
1103 } 1108 }
1104 1109
@@ -1400,20 +1405,14 @@ repeat:
1400 if (sbinfo->max_blocks) { 1405 if (sbinfo->max_blocks) {
1401 if (percpu_counter_compare(&sbinfo->used_blocks, 1406 if (percpu_counter_compare(&sbinfo->used_blocks,
1402 sbinfo->max_blocks) >= 0 || 1407 sbinfo->max_blocks) >= 0 ||
1403 shmem_acct_block(info->flags)) { 1408 shmem_acct_block(info->flags))
1404 spin_unlock(&info->lock); 1409 goto nospace;
1405 error = -ENOSPC;
1406 goto failed;
1407 }
1408 percpu_counter_inc(&sbinfo->used_blocks); 1410 percpu_counter_inc(&sbinfo->used_blocks);
1409 spin_lock(&inode->i_lock); 1411 spin_lock(&inode->i_lock);
1410 inode->i_blocks += BLOCKS_PER_PAGE; 1412 inode->i_blocks += BLOCKS_PER_PAGE;
1411 spin_unlock(&inode->i_lock); 1413 spin_unlock(&inode->i_lock);
1412 } else if (shmem_acct_block(info->flags)) { 1414 } else if (shmem_acct_block(info->flags))
1413 spin_unlock(&info->lock); 1415 goto nospace;
1414 error = -ENOSPC;
1415 goto failed;
1416 }
1417 1416
1418 if (!filepage) { 1417 if (!filepage) {
1419 int ret; 1418 int ret;
@@ -1493,6 +1492,24 @@ done:
1493 error = 0; 1492 error = 0;
1494 goto out; 1493 goto out;
1495 1494
1495nospace:
1496 /*
1497 * Perhaps the page was brought in from swap between find_lock_page
1498 * and taking info->lock? We allow for that at add_to_page_cache_lru,
1499 * but must also avoid reporting a spurious ENOSPC while working on a
1500 * full tmpfs. (When filepage has been passed in to shmem_getpage, it
1501 * is already in page cache, which prevents this race from occurring.)
1502 */
1503 if (!filepage) {
1504 struct page *page = find_get_page(mapping, idx);
1505 if (page) {
1506 spin_unlock(&info->lock);
1507 page_cache_release(page);
1508 goto repeat;
1509 }
1510 }
1511 spin_unlock(&info->lock);
1512 error = -ENOSPC;
1496failed: 1513failed:
1497 if (*pagep != filepage) { 1514 if (*pagep != filepage) {
1498 unlock_page(filepage); 1515 unlock_page(filepage);