diff options
Diffstat (limited to 'mm/shmem.c')
| -rw-r--r-- | mm/shmem.c | 149 |
1 files changed, 83 insertions, 66 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 8fa27e4e582a..dfc7069102ee 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_ | |||
| 852 | 852 | ||
| 853 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) | 853 | static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) |
| 854 | { | 854 | { |
| 855 | struct inode *inode; | 855 | struct address_space *mapping; |
| 856 | unsigned long idx; | 856 | unsigned long idx; |
| 857 | unsigned long size; | 857 | unsigned long size; |
| 858 | unsigned long limit; | 858 | unsigned long limit; |
| @@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s | |||
| 875 | if (size > SHMEM_NR_DIRECT) | 875 | if (size > SHMEM_NR_DIRECT) |
| 876 | size = SHMEM_NR_DIRECT; | 876 | size = SHMEM_NR_DIRECT; |
| 877 | offset = shmem_find_swp(entry, ptr, ptr+size); | 877 | offset = shmem_find_swp(entry, ptr, ptr+size); |
| 878 | if (offset >= 0) | 878 | if (offset >= 0) { |
| 879 | shmem_swp_balance_unmap(); | ||
| 879 | goto found; | 880 | goto found; |
| 881 | } | ||
| 880 | if (!info->i_indirect) | 882 | if (!info->i_indirect) |
| 881 | goto lost2; | 883 | goto lost2; |
| 882 | 884 | ||
| @@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s | |||
| 914 | if (size > ENTRIES_PER_PAGE) | 916 | if (size > ENTRIES_PER_PAGE) |
| 915 | size = ENTRIES_PER_PAGE; | 917 | size = ENTRIES_PER_PAGE; |
| 916 | offset = shmem_find_swp(entry, ptr, ptr+size); | 918 | offset = shmem_find_swp(entry, ptr, ptr+size); |
| 917 | shmem_swp_unmap(ptr); | ||
| 918 | if (offset >= 0) { | 919 | if (offset >= 0) { |
| 919 | shmem_dir_unmap(dir); | 920 | shmem_dir_unmap(dir); |
| 920 | goto found; | 921 | goto found; |
| 921 | } | 922 | } |
| 923 | shmem_swp_unmap(ptr); | ||
| 922 | } | 924 | } |
| 923 | } | 925 | } |
| 924 | lost1: | 926 | lost1: |
| @@ -928,8 +930,7 @@ lost2: | |||
| 928 | return 0; | 930 | return 0; |
| 929 | found: | 931 | found: |
| 930 | idx += offset; | 932 | idx += offset; |
| 931 | inode = igrab(&info->vfs_inode); | 933 | ptr += offset; |
| 932 | spin_unlock(&info->lock); | ||
| 933 | 934 | ||
| 934 | /* | 935 | /* |
| 935 | * Move _head_ to start search for next from here. | 936 | * Move _head_ to start search for next from here. |
| @@ -940,37 +941,18 @@ found: | |||
| 940 | */ | 941 | */ |
| 941 | if (shmem_swaplist.next != &info->swaplist) | 942 | if (shmem_swaplist.next != &info->swaplist) |
| 942 | list_move_tail(&shmem_swaplist, &info->swaplist); | 943 | list_move_tail(&shmem_swaplist, &info->swaplist); |
| 943 | mutex_unlock(&shmem_swaplist_mutex); | ||
| 944 | 944 | ||
| 945 | error = 1; | ||
| 946 | if (!inode) | ||
| 947 | goto out; | ||
| 948 | /* | 945 | /* |
| 949 | * Charge page using GFP_KERNEL while we can wait. | 946 | * We rely on shmem_swaplist_mutex, not only to protect the swaplist, |
| 950 | * Charged back to the user(not to caller) when swap account is used. | 947 | * but also to hold up shmem_evict_inode(): so inode cannot be freed |
| 951 | * add_to_page_cache() will be called with GFP_NOWAIT. | 948 | * beneath us (pagelock doesn't help until the page is in pagecache). |
| 952 | */ | 949 | */ |
| 953 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | 950 | mapping = info->vfs_inode.i_mapping; |
| 954 | if (error) | 951 | error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); |
| 955 | goto out; | 952 | /* which does mem_cgroup_uncharge_cache_page on error */ |
| 956 | error = radix_tree_preload(GFP_KERNEL); | ||
| 957 | if (error) { | ||
| 958 | mem_cgroup_uncharge_cache_page(page); | ||
| 959 | goto out; | ||
| 960 | } | ||
| 961 | error = 1; | ||
| 962 | |||
| 963 | spin_lock(&info->lock); | ||
| 964 | ptr = shmem_swp_entry(info, idx, NULL); | ||
| 965 | if (ptr && ptr->val == entry.val) { | ||
| 966 | error = add_to_page_cache_locked(page, inode->i_mapping, | ||
| 967 | idx, GFP_NOWAIT); | ||
| 968 | /* does mem_cgroup_uncharge_cache_page on error */ | ||
| 969 | } else /* we must compensate for our precharge above */ | ||
| 970 | mem_cgroup_uncharge_cache_page(page); | ||
| 971 | 953 | ||
| 972 | if (error == -EEXIST) { | 954 | if (error == -EEXIST) { |
| 973 | struct page *filepage = find_get_page(inode->i_mapping, idx); | 955 | struct page *filepage = find_get_page(mapping, idx); |
| 974 | error = 1; | 956 | error = 1; |
| 975 | if (filepage) { | 957 | if (filepage) { |
| 976 | /* | 958 | /* |
| @@ -990,14 +972,8 @@ found: | |||
| 990 | swap_free(entry); | 972 | swap_free(entry); |
| 991 | error = 1; /* not an error, but entry was found */ | 973 | error = 1; /* not an error, but entry was found */ |
| 992 | } | 974 | } |
| 993 | if (ptr) | 975 | shmem_swp_unmap(ptr); |
| 994 | shmem_swp_unmap(ptr); | ||
| 995 | spin_unlock(&info->lock); | 976 | spin_unlock(&info->lock); |
| 996 | radix_tree_preload_end(); | ||
| 997 | out: | ||
| 998 | unlock_page(page); | ||
| 999 | page_cache_release(page); | ||
| 1000 | iput(inode); /* allows for NULL */ | ||
| 1001 | return error; | 977 | return error; |
| 1002 | } | 978 | } |
| 1003 | 979 | ||
| @@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
| 1009 | struct list_head *p, *next; | 985 | struct list_head *p, *next; |
| 1010 | struct shmem_inode_info *info; | 986 | struct shmem_inode_info *info; |
| 1011 | int found = 0; | 987 | int found = 0; |
| 988 | int error; | ||
| 989 | |||
| 990 | /* | ||
| 991 | * Charge page using GFP_KERNEL while we can wait, before taking | ||
| 992 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). | ||
| 993 | * Charged back to the user (not to caller) when swap account is used. | ||
| 994 | * add_to_page_cache() will be called with GFP_NOWAIT. | ||
| 995 | */ | ||
| 996 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); | ||
| 997 | if (error) | ||
| 998 | goto out; | ||
| 999 | /* | ||
| 1000 | * Try to preload while we can wait, to not make a habit of | ||
| 1001 | * draining atomic reserves; but don't latch on to this cpu, | ||
| 1002 | * it's okay if sometimes we get rescheduled after this. | ||
| 1003 | */ | ||
| 1004 | error = radix_tree_preload(GFP_KERNEL); | ||
| 1005 | if (error) | ||
| 1006 | goto uncharge; | ||
| 1007 | radix_tree_preload_end(); | ||
| 1012 | 1008 | ||
| 1013 | mutex_lock(&shmem_swaplist_mutex); | 1009 | mutex_lock(&shmem_swaplist_mutex); |
| 1014 | list_for_each_safe(p, next, &shmem_swaplist) { | 1010 | list_for_each_safe(p, next, &shmem_swaplist) { |
| @@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
| 1016 | found = shmem_unuse_inode(info, entry, page); | 1012 | found = shmem_unuse_inode(info, entry, page); |
| 1017 | cond_resched(); | 1013 | cond_resched(); |
| 1018 | if (found) | 1014 | if (found) |
| 1019 | goto out; | 1015 | break; |
| 1020 | } | 1016 | } |
| 1021 | mutex_unlock(&shmem_swaplist_mutex); | 1017 | mutex_unlock(&shmem_swaplist_mutex); |
| 1022 | /* | 1018 | |
| 1023 | * Can some race bring us here? We've been holding page lock, | 1019 | uncharge: |
| 1024 | * so I think not; but would rather try again later than BUG() | 1020 | if (!found) |
| 1025 | */ | 1021 | mem_cgroup_uncharge_cache_page(page); |
| 1022 | if (found < 0) | ||
| 1023 | error = found; | ||
| 1024 | out: | ||
| 1026 | unlock_page(page); | 1025 | unlock_page(page); |
| 1027 | page_cache_release(page); | 1026 | page_cache_release(page); |
| 1028 | out: | 1027 | return error; |
| 1029 | return (found < 0) ? found : 0; | ||
| 1030 | } | 1028 | } |
| 1031 | 1029 | ||
| 1032 | /* | 1030 | /* |
| @@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
| 1064 | else | 1062 | else |
| 1065 | swap.val = 0; | 1063 | swap.val = 0; |
| 1066 | 1064 | ||
| 1065 | /* | ||
| 1066 | * Add inode to shmem_unuse()'s list of swapped-out inodes, | ||
| 1067 | * if it's not already there. Do it now because we cannot take | ||
| 1068 | * mutex while holding spinlock, and must do so before the page | ||
| 1069 | * is moved to swap cache, when its pagelock no longer protects | ||
| 1070 | * the inode from eviction. But don't unlock the mutex until | ||
| 1071 | * we've taken the spinlock, because shmem_unuse_inode() will | ||
| 1072 | * prune a !swapped inode from the swaplist under both locks. | ||
| 1073 | */ | ||
| 1074 | if (swap.val) { | ||
| 1075 | mutex_lock(&shmem_swaplist_mutex); | ||
| 1076 | if (list_empty(&info->swaplist)) | ||
| 1077 | list_add_tail(&info->swaplist, &shmem_swaplist); | ||
| 1078 | } | ||
| 1079 | |||
| 1067 | spin_lock(&info->lock); | 1080 | spin_lock(&info->lock); |
| 1081 | if (swap.val) | ||
| 1082 | mutex_unlock(&shmem_swaplist_mutex); | ||
| 1083 | |||
| 1068 | if (index >= info->next_index) { | 1084 | if (index >= info->next_index) { |
| 1069 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); | 1085 | BUG_ON(!(info->flags & SHMEM_TRUNCATE)); |
| 1070 | goto unlock; | 1086 | goto unlock; |
| @@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
| 1084 | delete_from_page_cache(page); | 1100 | delete_from_page_cache(page); |
| 1085 | shmem_swp_set(info, entry, swap.val); | 1101 | shmem_swp_set(info, entry, swap.val); |
| 1086 | shmem_swp_unmap(entry); | 1102 | shmem_swp_unmap(entry); |
| 1087 | if (list_empty(&info->swaplist)) | ||
| 1088 | inode = igrab(inode); | ||
| 1089 | else | ||
| 1090 | inode = NULL; | ||
| 1091 | spin_unlock(&info->lock); | 1103 | spin_unlock(&info->lock); |
| 1092 | swap_shmem_alloc(swap); | 1104 | swap_shmem_alloc(swap); |
| 1093 | BUG_ON(page_mapped(page)); | 1105 | BUG_ON(page_mapped(page)); |
| 1094 | swap_writepage(page, wbc); | 1106 | swap_writepage(page, wbc); |
| 1095 | if (inode) { | ||
| 1096 | mutex_lock(&shmem_swaplist_mutex); | ||
| 1097 | /* move instead of add in case we're racing */ | ||
| 1098 | list_move_tail(&info->swaplist, &shmem_swaplist); | ||
| 1099 | mutex_unlock(&shmem_swaplist_mutex); | ||
| 1100 | iput(inode); | ||
| 1101 | } | ||
| 1102 | return 0; | 1107 | return 0; |
| 1103 | } | 1108 | } |
| 1104 | 1109 | ||
| @@ -1400,20 +1405,14 @@ repeat: | |||
| 1400 | if (sbinfo->max_blocks) { | 1405 | if (sbinfo->max_blocks) { |
| 1401 | if (percpu_counter_compare(&sbinfo->used_blocks, | 1406 | if (percpu_counter_compare(&sbinfo->used_blocks, |
| 1402 | sbinfo->max_blocks) >= 0 || | 1407 | sbinfo->max_blocks) >= 0 || |
| 1403 | shmem_acct_block(info->flags)) { | 1408 | shmem_acct_block(info->flags)) |
| 1404 | spin_unlock(&info->lock); | 1409 | goto nospace; |
| 1405 | error = -ENOSPC; | ||
| 1406 | goto failed; | ||
| 1407 | } | ||
| 1408 | percpu_counter_inc(&sbinfo->used_blocks); | 1410 | percpu_counter_inc(&sbinfo->used_blocks); |
| 1409 | spin_lock(&inode->i_lock); | 1411 | spin_lock(&inode->i_lock); |
| 1410 | inode->i_blocks += BLOCKS_PER_PAGE; | 1412 | inode->i_blocks += BLOCKS_PER_PAGE; |
| 1411 | spin_unlock(&inode->i_lock); | 1413 | spin_unlock(&inode->i_lock); |
| 1412 | } else if (shmem_acct_block(info->flags)) { | 1414 | } else if (shmem_acct_block(info->flags)) |
| 1413 | spin_unlock(&info->lock); | 1415 | goto nospace; |
| 1414 | error = -ENOSPC; | ||
| 1415 | goto failed; | ||
| 1416 | } | ||
| 1417 | 1416 | ||
| 1418 | if (!filepage) { | 1417 | if (!filepage) { |
| 1419 | int ret; | 1418 | int ret; |
| @@ -1493,6 +1492,24 @@ done: | |||
| 1493 | error = 0; | 1492 | error = 0; |
| 1494 | goto out; | 1493 | goto out; |
| 1495 | 1494 | ||
| 1495 | nospace: | ||
| 1496 | /* | ||
| 1497 | * Perhaps the page was brought in from swap between find_lock_page | ||
| 1498 | * and taking info->lock? We allow for that at add_to_page_cache_lru, | ||
| 1499 | * but must also avoid reporting a spurious ENOSPC while working on a | ||
| 1500 | * full tmpfs. (When filepage has been passed in to shmem_getpage, it | ||
| 1501 | * is already in page cache, which prevents this race from occurring.) | ||
| 1502 | */ | ||
| 1503 | if (!filepage) { | ||
| 1504 | struct page *page = find_get_page(mapping, idx); | ||
| 1505 | if (page) { | ||
| 1506 | spin_unlock(&info->lock); | ||
| 1507 | page_cache_release(page); | ||
| 1508 | goto repeat; | ||
| 1509 | } | ||
| 1510 | } | ||
| 1511 | spin_unlock(&info->lock); | ||
| 1512 | error = -ENOSPC; | ||
| 1496 | failed: | 1513 | failed: |
| 1497 | if (*pagep != filepage) { | 1514 | if (*pagep != filepage) { |
| 1498 | unlock_page(filepage); | 1515 | unlock_page(filepage); |
