diff options
author | Hugh Dickins <hugh@veritas.com> | 2008-02-05 01:28:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-05 12:44:16 -0500 |
commit | 1b1b32f2c6f6bb32535d2da62075b51c980880eb (patch) | |
tree | 686aac685a4c04f085dc17cc1a05910149a04933 /mm | |
parent | b409f9fcf04692c0f603d28c73d2e3dfed27bf54 (diff) |
tmpfs: fix shmem_swaplist races
Intensive swapoff testing shows shmem_unuse spinning on an entry in
shmem_swaplist pointing to itself: how does that come about? Days pass...
First guess is this: shmem_delete_inode tests list_empty without taking the
global mutex (so the swapping case doesn't slow down the common case); but
there's an instant in shmem_unuse_inode's list_move_tail when the list entry
may appear empty (a rare case, because it's actually moving the head not the
the list member). So there's a danger of leaving the inode on the swaplist
when it's freed, then reinitialized to point to itself when reused. Fix that
by skipping the list_move_tail when it's a no-op, which happens to plug this.
But this same spinning then surfaces on another machine. Ah, I'd never
suspected it, but shmem_writepage's swaplist manipulation is unsafe: though we
still hold page lock, which would hold off inode deletion if the page were in
pagecache, it doesn't hold off once it's in swapcache (free_swap_and_cache
doesn't wait on locked pages). Hmm: we could put the the inode on swaplist
earlier, but then shmem_unuse_inode could never prune unswapped inodes.
Fix this with an igrab before dropping info->lock, as in shmem_unuse_inode;
though I am a little uneasy about the iput which has to follow - it works, and
I see nothing wrong with it, but it is surprising that shmem inode deletion
may now occur below shmem_writepage. Revisit this fix later?
And while we're looking at these races: the way shmem_unuse tests swapped
without holding info->lock looks unsafe, if we've more than one swap area: a
racing shmem_writepage on another page of the same inode could be putting it
in swapcache, just as we're deciding to remove the inode from swaplist -
there's a danger of going on swap without being listed, so a later swapoff
would hang, being unable to locate the entry. Move that test and removal down
into shmem_unuse_inode, once info->lock is held.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/shmem.c | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 530c5033d028..ee9024483f60 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -833,6 +833,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s | |||
833 | idx = 0; | 833 | idx = 0; |
834 | ptr = info->i_direct; | 834 | ptr = info->i_direct; |
835 | spin_lock(&info->lock); | 835 | spin_lock(&info->lock); |
836 | if (!info->swapped) { | ||
837 | list_del_init(&info->swaplist); | ||
838 | goto lost2; | ||
839 | } | ||
836 | limit = info->next_index; | 840 | limit = info->next_index; |
837 | size = limit; | 841 | size = limit; |
838 | if (size > SHMEM_NR_DIRECT) | 842 | if (size > SHMEM_NR_DIRECT) |
@@ -894,8 +898,15 @@ found: | |||
894 | inode = igrab(&info->vfs_inode); | 898 | inode = igrab(&info->vfs_inode); |
895 | spin_unlock(&info->lock); | 899 | spin_unlock(&info->lock); |
896 | 900 | ||
897 | /* move head to start search for next from here */ | 901 | /* |
898 | list_move_tail(&shmem_swaplist, &info->swaplist); | 902 | * Move _head_ to start search for next from here. |
903 | * But be careful: shmem_delete_inode checks list_empty without taking | ||
904 | * mutex, and there's an instant in list_move_tail when info->swaplist | ||
905 | * would appear empty, if it were the only one on shmem_swaplist. We | ||
906 | * could avoid doing it if inode NULL; or use this minor optimization. | ||
907 | */ | ||
908 | if (shmem_swaplist.next != &info->swaplist) | ||
909 | list_move_tail(&shmem_swaplist, &info->swaplist); | ||
899 | mutex_unlock(&shmem_swaplist_mutex); | 910 | mutex_unlock(&shmem_swaplist_mutex); |
900 | 911 | ||
901 | error = 1; | 912 | error = 1; |
@@ -955,10 +966,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
955 | mutex_lock(&shmem_swaplist_mutex); | 966 | mutex_lock(&shmem_swaplist_mutex); |
956 | list_for_each_safe(p, next, &shmem_swaplist) { | 967 | list_for_each_safe(p, next, &shmem_swaplist) { |
957 | info = list_entry(p, struct shmem_inode_info, swaplist); | 968 | info = list_entry(p, struct shmem_inode_info, swaplist); |
958 | if (info->swapped) | 969 | found = shmem_unuse_inode(info, entry, page); |
959 | found = shmem_unuse_inode(info, entry, page); | ||
960 | else | ||
961 | list_del_init(&info->swaplist); | ||
962 | cond_resched(); | 970 | cond_resched(); |
963 | if (found) | 971 | if (found) |
964 | goto out; | 972 | goto out; |
@@ -1021,18 +1029,23 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
1021 | remove_from_page_cache(page); | 1029 | remove_from_page_cache(page); |
1022 | shmem_swp_set(info, entry, swap.val); | 1030 | shmem_swp_set(info, entry, swap.val); |
1023 | shmem_swp_unmap(entry); | 1031 | shmem_swp_unmap(entry); |
1032 | if (list_empty(&info->swaplist)) | ||
1033 | inode = igrab(inode); | ||
1034 | else | ||
1035 | inode = NULL; | ||
1024 | spin_unlock(&info->lock); | 1036 | spin_unlock(&info->lock); |
1025 | if (list_empty(&info->swaplist)) { | ||
1026 | mutex_lock(&shmem_swaplist_mutex); | ||
1027 | /* move instead of add in case we're racing */ | ||
1028 | list_move_tail(&info->swaplist, &shmem_swaplist); | ||
1029 | mutex_unlock(&shmem_swaplist_mutex); | ||
1030 | } | ||
1031 | swap_duplicate(swap); | 1037 | swap_duplicate(swap); |
1032 | BUG_ON(page_mapped(page)); | 1038 | BUG_ON(page_mapped(page)); |
1033 | page_cache_release(page); /* pagecache ref */ | 1039 | page_cache_release(page); /* pagecache ref */ |
1034 | set_page_dirty(page); | 1040 | set_page_dirty(page); |
1035 | unlock_page(page); | 1041 | unlock_page(page); |
1042 | if (inode) { | ||
1043 | mutex_lock(&shmem_swaplist_mutex); | ||
1044 | /* move instead of add in case we're racing */ | ||
1045 | list_move_tail(&info->swaplist, &shmem_swaplist); | ||
1046 | mutex_unlock(&shmem_swaplist_mutex); | ||
1047 | iput(inode); | ||
1048 | } | ||
1036 | return 0; | 1049 | return 0; |
1037 | } | 1050 | } |
1038 | 1051 | ||