aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-02-05 01:28:55 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-05 12:44:16 -0500
commit1b1b32f2c6f6bb32535d2da62075b51c980880eb (patch)
tree686aac685a4c04f085dc17cc1a05910149a04933
parentb409f9fcf04692c0f603d28c73d2e3dfed27bf54 (diff)
tmpfs: fix shmem_swaplist races
Intensive swapoff testing shows shmem_unuse spinning on an entry in shmem_swaplist pointing to itself: how does that come about? Days pass... First guess is this: shmem_delete_inode tests list_empty without taking the global mutex (so the swapping case doesn't slow down the common case); but there's an instant in shmem_unuse_inode's list_move_tail when the list entry may appear empty (a rare case, because it's actually moving the head not the the list member). So there's a danger of leaving the inode on the swaplist when it's freed, then reinitialized to point to itself when reused. Fix that by skipping the list_move_tail when it's a no-op, which happens to plug this. But this same spinning then surfaces on another machine. Ah, I'd never suspected it, but shmem_writepage's swaplist manipulation is unsafe: though we still hold page lock, which would hold off inode deletion if the page were in pagecache, it doesn't hold off once it's in swapcache (free_swap_and_cache doesn't wait on locked pages). Hmm: we could put the the inode on swaplist earlier, but then shmem_unuse_inode could never prune unswapped inodes. Fix this with an igrab before dropping info->lock, as in shmem_unuse_inode; though I am a little uneasy about the iput which has to follow - it works, and I see nothing wrong with it, but it is surprising that shmem inode deletion may now occur below shmem_writepage. Revisit this fix later? And while we're looking at these races: the way shmem_unuse tests swapped without holding info->lock looks unsafe, if we've more than one swap area: a racing shmem_writepage on another page of the same inode could be putting it in swapcache, just as we're deciding to remove the inode from swaplist - there's a danger of going on swap without being listed, so a later swapoff would hang, being unable to locate the entry. Move that test and removal down into shmem_unuse_inode, once info->lock is held. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/shmem.c37
1 files changed, 25 insertions, 12 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 530c5033d028..ee9024483f60 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -833,6 +833,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
833 idx = 0; 833 idx = 0;
834 ptr = info->i_direct; 834 ptr = info->i_direct;
835 spin_lock(&info->lock); 835 spin_lock(&info->lock);
836 if (!info->swapped) {
837 list_del_init(&info->swaplist);
838 goto lost2;
839 }
836 limit = info->next_index; 840 limit = info->next_index;
837 size = limit; 841 size = limit;
838 if (size > SHMEM_NR_DIRECT) 842 if (size > SHMEM_NR_DIRECT)
@@ -894,8 +898,15 @@ found:
894 inode = igrab(&info->vfs_inode); 898 inode = igrab(&info->vfs_inode);
895 spin_unlock(&info->lock); 899 spin_unlock(&info->lock);
896 900
897 /* move head to start search for next from here */ 901 /*
898 list_move_tail(&shmem_swaplist, &info->swaplist); 902 * Move _head_ to start search for next from here.
903 * But be careful: shmem_delete_inode checks list_empty without taking
904 * mutex, and there's an instant in list_move_tail when info->swaplist
905 * would appear empty, if it were the only one on shmem_swaplist. We
906 * could avoid doing it if inode NULL; or use this minor optimization.
907 */
908 if (shmem_swaplist.next != &info->swaplist)
909 list_move_tail(&shmem_swaplist, &info->swaplist);
899 mutex_unlock(&shmem_swaplist_mutex); 910 mutex_unlock(&shmem_swaplist_mutex);
900 911
901 error = 1; 912 error = 1;
@@ -955,10 +966,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
955 mutex_lock(&shmem_swaplist_mutex); 966 mutex_lock(&shmem_swaplist_mutex);
956 list_for_each_safe(p, next, &shmem_swaplist) { 967 list_for_each_safe(p, next, &shmem_swaplist) {
957 info = list_entry(p, struct shmem_inode_info, swaplist); 968 info = list_entry(p, struct shmem_inode_info, swaplist);
958 if (info->swapped) 969 found = shmem_unuse_inode(info, entry, page);
959 found = shmem_unuse_inode(info, entry, page);
960 else
961 list_del_init(&info->swaplist);
962 cond_resched(); 970 cond_resched();
963 if (found) 971 if (found)
964 goto out; 972 goto out;
@@ -1021,18 +1029,23 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1021 remove_from_page_cache(page); 1029 remove_from_page_cache(page);
1022 shmem_swp_set(info, entry, swap.val); 1030 shmem_swp_set(info, entry, swap.val);
1023 shmem_swp_unmap(entry); 1031 shmem_swp_unmap(entry);
1032 if (list_empty(&info->swaplist))
1033 inode = igrab(inode);
1034 else
1035 inode = NULL;
1024 spin_unlock(&info->lock); 1036 spin_unlock(&info->lock);
1025 if (list_empty(&info->swaplist)) {
1026 mutex_lock(&shmem_swaplist_mutex);
1027 /* move instead of add in case we're racing */
1028 list_move_tail(&info->swaplist, &shmem_swaplist);
1029 mutex_unlock(&shmem_swaplist_mutex);
1030 }
1031 swap_duplicate(swap); 1037 swap_duplicate(swap);
1032 BUG_ON(page_mapped(page)); 1038 BUG_ON(page_mapped(page));
1033 page_cache_release(page); /* pagecache ref */ 1039 page_cache_release(page); /* pagecache ref */
1034 set_page_dirty(page); 1040 set_page_dirty(page);
1035 unlock_page(page); 1041 unlock_page(page);
1042 if (inode) {
1043 mutex_lock(&shmem_swaplist_mutex);
1044 /* move instead of add in case we're racing */
1045 list_move_tail(&info->swaplist, &shmem_swaplist);
1046 mutex_unlock(&shmem_swaplist_mutex);
1047 iput(inode);
1048 }
1036 return 0; 1049 return 0;
1037 } 1050 }
1038 1051