diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 149 |
1 files changed, 83 insertions, 66 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 8fa27e4e582a..dfc7069102ee 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_ | |||
852 | 852 | ||
853 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) | 853 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) |
854 | { | 854 | { |
855 | struct inode *inode; | 855 | struct address_space *mapping; |
856 | unsigned long idx; | 856 | unsigned long idx; |
857 | unsigned long size; | 857 | unsigned long size; |
858 | unsigned long limit; | 858 | unsigned long limit; |
@@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s | |||
875 | if (size > SHMEM_NR_DIRECT) | 875 | if (size > SHMEM_NR_DIRECT) |
876 | size = SHMEM_NR_DIRECT; | 876 | size = SHMEM_NR_DIRECT; |
877 | offset = shmem_find_swp(entry, ptr, ptr+size); | 877 | offset = shmem_find_swp(entry, ptr, ptr+size); |
878 | if (offset >= 0) | 878 | if (offset >= 0) { |
879 | shmem_swp_balance_unmap(); | ||
879 | goto found; | 880 | goto found; |
881 | } | ||
880 | if (!info->i_indirect) | 882 | if (!info->i_indirect) |
881 | goto lost2; | 883 | goto lost2; |
882 | 884 | ||
@@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s | |||
914 | if (size > ENTRIES_PER_PAGE) | 916 | if (size > ENTRIES_PER_PAGE) |
915 | size = ENTRIES_PER_PAGE; | 917 | size = ENTRIES_PER_PAGE; |
916 | offset = shmem_find_swp(entry, ptr, ptr+size); | 918 | offset = shmem_find_swp(entry, ptr, ptr+size); |
917 | shmem_swp_unmap(ptr); | ||
918 | if (offset >= 0) { | 919 | if (offset >= 0) { |
919 | shmem_dir_unmap(dir); | 920 | shmem_dir_unmap(dir); |
920 | goto found; | 921 | goto found; |
921 | } | 922 | } |
923 | shmem_swp_unmap(ptr); | ||
922 | } | 924 | } |
923 | } | 925 | } |
924 | lost1: | 926 | lost1: |
@@ -928,8 +930,7 @@ lost2: | |||
928 | return 0; | 930 | return 0; |
929 | found: | 931 | found: |
930 | idx += offset; | 932 | idx += offset; |
931 | inode = igrab(&info->vfs_inode); | 933 | ptr += offset; |
932 | spin_unlock(&info->lock); | ||
933 | 934 | ||
934 | /* | 935 | /* |
935 | * Move _head_ to start search for next from here. | 936 | * Move _head_ to start search for next from here. |
@@ -940,37 +941,18 @@ found: | |||
940 | */ | 941 | */ |
941 | if (shmem_swaplist.next != &info->swaplist) | 942 | if (shmem_swaplist.next != &info->swaplist) |
942 | list_move_tail(&shmem_swaplist, &info->swaplist); | 943 | list_move_tail(&shmem_swaplist, &info->swaplist); |
943 | mutex_unlock(&shmem_swaplist_mutex); | ||
944 | 944 | ||
945 | error = 1; | ||
946 | if (!inode) | ||
947 | goto out; | ||
948 | /* | 945 | /* |
949 | * Charge page using GFP_KERNEL while we can wait. | 946 | * We rely on shmem_swaplist_mutex, not only to protect the swaplist, |
950 | * Charged back to the user(not to caller) when swap account is used. | 947 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
951 | * add_to_page_cache() will be called with GFP_NOWAIT. | 948 | * beneath us (pagelock doesn't help until the page is in pagecache). |
952 | */ | 949 | */ |
953 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | 950 | mapping = info->vfs_inode.i_mapping; |
954 | if (error) | 951 | error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); |
955 | goto out; | 952 | /* which does mem_cgroup_uncharge_cache_page on error */ |
956 | error = radix_tree_preload(GFP_KERNEL); | ||
957 | if (error) { | ||
958 | mem_cgroup_uncharge_cache_page(page); | ||
959 | goto out; | ||
960 | } | ||
961 | error = 1; | ||
962 | |||
963 | spin_lock(&info->lock); | ||
964 | ptr = shmem_swp_entry(info, idx, NULL); | ||
965 | if (ptr && ptr->val == entry.val) { | ||
966 | error = add_to_page_cache_locked(page, inode->i_mapping, | ||
967 | idx, GFP_NOWAIT); | ||
968 | /* does mem_cgroup_uncharge_cache_page on error */ | ||
969 | } else /* we must compensate for our precharge above */ | ||
970 | mem_cgroup_uncharge_cache_page(page); | ||
971 | 953 | ||
972 | if (error == -EEXIST) { | 954 | if (error == -EEXIST) { |
973 | struct page *filepage = find_get_page(inode->i_mapping, idx); | 955 | struct page *filepage = find_get_page(mapping, idx); |
974 | error = 1; | 956 | error = 1; |
975 | if (filepage) { | 957 | if (filepage) { |
976 | /* | 958 | /* |
@@ -990,14 +972,8 @@ found: | |||
990 | swap_free(entry); | 972 | swap_free(entry); |
991 | error = 1; /* not an error, but entry was found */ | 973 | error = 1; /* not an error, but entry was found */ |
992 | } | 974 | } |
993 | if (ptr) | 975 | shmem_swp_unmap(ptr); |
994 | shmem_swp_unmap(ptr); | ||
995 | spin_unlock(&info->lock); | 976 | spin_unlock(&info->lock); |
996 | radix_tree_preload_end(); | ||
997 | out: | ||
998 | unlock_page(page); | ||
999 | page_cache_release(page); | ||
1000 | iput(inode); /* allows for NULL */ | ||
1001 | return error; | 977 | return error; |
1002 | } | 978 | } |
1003 | 979 | ||
@@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
1009 | struct list_head *p, *next; | 985 | struct list_head *p, *next; |
1010 | struct shmem_inode_info *info; | 986 | struct shmem_inode_info *info; |
1011 | int found = 0; | 987 | int found = 0; |
988 | int error; | ||
989 | |||
990 | /* | ||
991 | * Charge page using GFP_KERNEL while we can wait, before taking | ||
992 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). | ||
993 | * Charged back to the user (not to caller) when swap account is used. | ||
994 | * add_to_page_cache() will be called with GFP_NOWAIT. | ||
995 | */ | ||
996 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | ||
997 | if (error) | ||
998 | goto out; | ||
999 | /* | ||
1000 | * Try to preload while we can wait, to not make a habit of | ||
1001 | * draining atomic reserves; but don't latch on to this cpu, | ||
1002 | * it's okay if sometimes we get rescheduled after this. | ||
1003 | */ | ||
1004 | error = radix_tree_preload(GFP_KERNEL); | ||
1005 | if (error) | ||
1006 | goto uncharge; | ||
1007 | radix_tree_preload_end(); | ||
1012 | 1008 | ||
1013 | mutex_lock(&shmem_swaplist_mutex); | 1009 | mutex_lock(&shmem_swaplist_mutex); |
1014 | list_for_each_safe(p, next, &shmem_swaplist) { | 1010 | list_for_each_safe(p, next, &shmem_swaplist) { |
@@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
1016 | found = shmem_unuse_inode(info, entry, page); | 1012 | found = shmem_unuse_inode(info, entry, page); |
1017 | cond_resched(); | 1013 | cond_resched(); |
1018 | if (found) | 1014 | if (found) |
1019 | goto out; | 1015 | break; |
1020 | } | 1016 | } |
1021 | mutex_unlock(&shmem_swaplist_mutex); | 1017 | mutex_unlock(&shmem_swaplist_mutex); |
1022 | /* | 1018 | |
1023 | * Can some race bring us here? We've been holding page lock, | 1019 | uncharge: |
1024 | * so I think not; but would rather try again later than BUG() | 1020 | if (!found) |
1025 | */ | 1021 | mem_cgroup_uncharge_cache_page(page); |
1022 | if (found < 0) | ||
1023 | error = found; | ||
1024 | out: | ||
1026 | unlock_page(page); | 1025 | unlock_page(page); |
1027 | page_cache_release(page); | 1026 | page_cache_release(page); |
1028 | out: | 1027 | return error; |
1029 | return (found < 0) ? found : 0; | ||
1030 | } | 1028 | } |
1031 | 1029 | ||
1032 | /* | 1030 | /* |
@@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1064 | else | 1062 | else |
1065 | swap.val = 0; | 1063 | swap.val = 0; |
1066 | 1064 | ||
1065 | /* | ||
1066 | * Add inode to shmem_unuse()'s list of swapped-out inodes, | ||
1067 | * if it's not already there. Do it now because we cannot take | ||
1068 | * mutex while holding spinlock, and must do so before the page | ||
1069 | * is moved to swap cache, when its pagelock no longer protects | ||
1070 | * the inode from eviction. But don't unlock the mutex until | ||
1071 | * we've taken the spinlock, because shmem_unuse_inode() will | ||
1072 | * prune a !swapped inode from the swaplist under both locks. | ||
1073 | */ | ||
1074 | if (swap.val) { | ||
1075 | mutex_lock(&shmem_swaplist_mutex); | ||
1076 | if (list_empty(&info->swaplist)) | ||
1077 | list_add_tail(&info->swaplist, &shmem_swaplist); | ||
1078 | } | ||
1079 | |||
1067 | spin_lock(&info->lock); | 1080 | spin_lock(&info->lock); |
1081 | if (swap.val) | ||
1082 | mutex_unlock(&shmem_swaplist_mutex); | ||
1083 | |||
1068 | if (index >= info->next_index) { | 1084 | if (index >= info->next_index) { |
1069 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); | 1085 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); |
1070 | goto unlock; | 1086 | goto unlock; |
@@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1084 | delete_from_page_cache(page); | 1100 | delete_from_page_cache(page); |
1085 | shmem_swp_set(info, entry, swap.val); | 1101 | shmem_swp_set(info, entry, swap.val); |
1086 | shmem_swp_unmap(entry); | 1102 | shmem_swp_unmap(entry); |
1087 | if (list_empty(&info->swaplist)) | ||
1088 | inode = igrab(inode); | ||
1089 | else | ||
1090 | inode = NULL; | ||
1091 | spin_unlock(&info->lock); | 1103 | spin_unlock(&info->lock); |
1092 | swap_shmem_alloc(swap); | 1104 | swap_shmem_alloc(swap); |
1093 | BUG_ON(page_mapped(page)); | 1105 | BUG_ON(page_mapped(page)); |
1094 | swap_writepage(page, wbc); | 1106 | swap_writepage(page, wbc); |
1095 | if (inode) { | ||
1096 | mutex_lock(&shmem_swaplist_mutex); | ||
1097 | /* move instead of add in case we're racing */ | ||
1098 | list_move_tail(&info->swaplist, &shmem_swaplist); | ||
1099 | mutex_unlock(&shmem_swaplist_mutex); | ||
1100 | iput(inode); | ||
1101 | } | ||
1102 | return 0; | 1107 | return 0; |
1103 | } | 1108 | } |
1104 | 1109 | ||
@@ -1400,20 +1405,14 @@ repeat: | |||
1400 | if (sbinfo->max_blocks) { | 1405 | if (sbinfo->max_blocks) { |
1401 | if (percpu_counter_compare(&sbinfo->used_blocks, | 1406 | if (percpu_counter_compare(&sbinfo->used_blocks, |
1402 | sbinfo->max_blocks) >= 0 || | 1407 | sbinfo->max_blocks) >= 0 || |
1403 | shmem_acct_block(info->flags)) { | 1408 | shmem_acct_block(info->flags)) |
1404 | spin_unlock(&info->lock); | 1409 | goto nospace; |
1405 | error = -ENOSPC; | ||
1406 | goto failed; | ||
1407 | } | ||
1408 | percpu_counter_inc(&sbinfo->used_blocks); | 1410 | percpu_counter_inc(&sbinfo->used_blocks); |
1409 | spin_lock(&inode->i_lock); | 1411 | spin_lock(&inode->i_lock); |
1410 | inode->i_blocks += BLOCKS_PER_PAGE; | 1412 | inode->i_blocks += BLOCKS_PER_PAGE; |
1411 | spin_unlock(&inode->i_lock); | 1413 | spin_unlock(&inode->i_lock); |
1412 | } else if (shmem_acct_block(info->flags)) { | 1414 | } else if (shmem_acct_block(info->flags)) |
1413 | spin_unlock(&info->lock); | 1415 | goto nospace; |
1414 | error = -ENOSPC; | ||
1415 | goto failed; | ||
1416 | } | ||
1417 | 1416 | ||
1418 | if (!filepage) { | 1417 | if (!filepage) { |
1419 | int ret; | 1418 | int ret; |
@@ -1493,6 +1492,24 @@ done: | |||
1493 | error = 0; | 1492 | error = 0; |
1494 | goto out; | 1493 | goto out; |
1495 | 1494 | ||
1495 | nospace: | ||
1496 | /* | ||
1497 | * Perhaps the page was brought in from swap between find_lock_page | ||
1498 | * and taking info->lock? We allow for that at add_to_page_cache_lru, | ||
1499 | * but must also avoid reporting a spurious ENOSPC while working on a | ||
1500 | * full tmpfs. (When filepage has been passed in to shmem_getpage, it | ||
1501 | * is already in page cache, which prevents this race from occurring.) | ||
1502 | */ | ||
1503 | if (!filepage) { | ||
1504 | struct page *page = find_get_page(mapping, idx); | ||
1505 | if (page) { | ||
1506 | spin_unlock(&info->lock); | ||
1507 | page_cache_release(page); | ||
1508 | goto repeat; | ||
1509 | } | ||
1510 | } | ||
1511 | spin_unlock(&info->lock); | ||
1512 | error = -ENOSPC; | ||
1496 | failed: | 1513 | failed: |
1497 | if (*pagep != filepage) { | 1514 | if (*pagep != filepage) { |
1498 | unlock_page(filepage); | 1515 | unlock_page(filepage); |