diff options
author | Hugh Dickins <hughd@google.com> | 2019-04-18 20:50:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-04-19 12:46:04 -0400 |
commit | af53d3e9e04024885de5b4fda51e5fa362ae2bd8 (patch) | |
tree | ab2a56b0bafc76d99dcd2924f22f0fed76f4ae2e /mm/shmem.c | |
parent | 64165b1affc5bc16231ac971e66aae7d68d57f2c (diff) |
mm: swapoff: shmem_unuse() stop eviction without igrab()
The igrab() in shmem_unuse() looks good, but we forgot that it gives no
protection against concurrent unmounting: a point made by Konstantin
Khlebnikov eight years ago, and then fixed in 2.6.39 by 778dd893ae78
("tmpfs: fix race between umount and swapoff"). The current 5.1-rc
swapoff is liable to hit "VFS: Busy inodes after unmount of tmpfs.
Self-destruct in 5 seconds. Have a nice day..." followed by GPF.
Once again, give up on using igrab(); but don't go back to making such
heavy-handed use of shmem_swaplist_mutex as last time: that would spoil
the new design, and I expect could deadlock inside shmem_swapin_page().
Instead, shmem_unuse() just raise a "stop_eviction" count in the shmem-
specific inode, and shmem_evict_inode() wait for that to go down to 0.
Call it "stop_eviction" rather than "swapoff_busy" because it can be put
to use for others later (huge tmpfs patches expect to use it).
That simplifies shmem_unuse(), protecting it from both unlink and
unmount; and in practice lets it locate all the swap in its first try.
But do not rely on that: there's still a theoretical case, when
shmem_writepage() might have been preempted after its get_swap_page(),
before making the swap entry visible to swapoff.
[hughd@google.com: remove incorrect list_del()]
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904091133570.1898@eggly.anvils
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1904081259400.1523@eggly.anvils
Fixes: b56a2d8af914 ("mm: rid swapoff of quadratic complexity")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Alex Xu (Hello71)" <alex_y_xu@yahoo.ca>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Kelley Nielsen <kelleynnn@gmail.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vineeth Pillai <vpillai@digitalocean.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 40 |
1 files changed, 18 insertions, 22 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 859e8628071f..2275a0ff7c30 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1081,9 +1081,14 @@ static void shmem_evict_inode(struct inode *inode) | |||
1081 | } | 1081 | } |
1082 | spin_unlock(&sbinfo->shrinklist_lock); | 1082 | spin_unlock(&sbinfo->shrinklist_lock); |
1083 | } | 1083 | } |
1084 | if (!list_empty(&info->swaplist)) { | 1084 | while (!list_empty(&info->swaplist)) { |
1085 | /* Wait while shmem_unuse() is scanning this inode... */ | ||
1086 | wait_var_event(&info->stop_eviction, | ||
1087 | !atomic_read(&info->stop_eviction)); | ||
1085 | mutex_lock(&shmem_swaplist_mutex); | 1088 | mutex_lock(&shmem_swaplist_mutex); |
1086 | list_del_init(&info->swaplist); | 1089 | /* ...but beware of the race if we peeked too early */ |
1090 | if (!atomic_read(&info->stop_eviction)) | ||
1091 | list_del_init(&info->swaplist); | ||
1087 | mutex_unlock(&shmem_swaplist_mutex); | 1092 | mutex_unlock(&shmem_swaplist_mutex); |
1088 | } | 1093 | } |
1089 | } | 1094 | } |
@@ -1227,36 +1232,27 @@ int shmem_unuse(unsigned int type, bool frontswap, | |||
1227 | unsigned long *fs_pages_to_unuse) | 1232 | unsigned long *fs_pages_to_unuse) |
1228 | { | 1233 | { |
1229 | struct shmem_inode_info *info, *next; | 1234 | struct shmem_inode_info *info, *next; |
1230 | struct inode *inode; | ||
1231 | struct inode *prev_inode = NULL; | ||
1232 | int error = 0; | 1235 | int error = 0; |
1233 | 1236 | ||
1234 | if (list_empty(&shmem_swaplist)) | 1237 | if (list_empty(&shmem_swaplist)) |
1235 | return 0; | 1238 | return 0; |
1236 | 1239 | ||
1237 | mutex_lock(&shmem_swaplist_mutex); | 1240 | mutex_lock(&shmem_swaplist_mutex); |
1238 | |||
1239 | /* | ||
1240 | * The extra refcount on the inode is necessary to safely dereference | ||
1241 | * p->next after re-acquiring the lock. New shmem inodes with swap | ||
1242 | * get added to the end of the list and we will scan them all. | ||
1243 | */ | ||
1244 | list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { | 1241 | list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { |
1245 | if (!info->swapped) { | 1242 | if (!info->swapped) { |
1246 | list_del_init(&info->swaplist); | 1243 | list_del_init(&info->swaplist); |
1247 | continue; | 1244 | continue; |
1248 | } | 1245 | } |
1249 | 1246 | /* | |
1250 | inode = igrab(&info->vfs_inode); | 1247 | * Drop the swaplist mutex while searching the inode for swap; |
1251 | if (!inode) | 1248 | * but before doing so, make sure shmem_evict_inode() will not |
1252 | continue; | 1249 | * remove placeholder inode from swaplist, nor let it be freed |
1253 | 1250 | * (igrab() would protect from unlink, but not from unmount). | |
1251 | */ | ||
1252 | atomic_inc(&info->stop_eviction); | ||
1254 | mutex_unlock(&shmem_swaplist_mutex); | 1253 | mutex_unlock(&shmem_swaplist_mutex); |
1255 | if (prev_inode) | ||
1256 | iput(prev_inode); | ||
1257 | prev_inode = inode; | ||
1258 | 1254 | ||
1259 | error = shmem_unuse_inode(inode, type, frontswap, | 1255 | error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, |
1260 | fs_pages_to_unuse); | 1256 | fs_pages_to_unuse); |
1261 | cond_resched(); | 1257 | cond_resched(); |
1262 | 1258 | ||
@@ -1264,14 +1260,13 @@ int shmem_unuse(unsigned int type, bool frontswap, | |||
1264 | next = list_next_entry(info, swaplist); | 1260 | next = list_next_entry(info, swaplist); |
1265 | if (!info->swapped) | 1261 | if (!info->swapped) |
1266 | list_del_init(&info->swaplist); | 1262 | list_del_init(&info->swaplist); |
1263 | if (atomic_dec_and_test(&info->stop_eviction)) | ||
1264 | wake_up_var(&info->stop_eviction); | ||
1267 | if (error) | 1265 | if (error) |
1268 | break; | 1266 | break; |
1269 | } | 1267 | } |
1270 | mutex_unlock(&shmem_swaplist_mutex); | 1268 | mutex_unlock(&shmem_swaplist_mutex); |
1271 | 1269 | ||
1272 | if (prev_inode) | ||
1273 | iput(prev_inode); | ||
1274 | |||
1275 | return error; | 1270 | return error; |
1276 | } | 1271 | } |
1277 | 1272 | ||
@@ -2238,6 +2233,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode | |||
2238 | info = SHMEM_I(inode); | 2233 | info = SHMEM_I(inode); |
2239 | memset(info, 0, (char *)inode - (char *)info); | 2234 | memset(info, 0, (char *)inode - (char *)info); |
2240 | spin_lock_init(&info->lock); | 2235 | spin_lock_init(&info->lock); |
2236 | atomic_set(&info->stop_eviction, 0); | ||
2241 | info->seals = F_SEAL_SEAL; | 2237 | info->seals = F_SEAL_SEAL; |
2242 | info->flags = flags & VM_NORESERVE; | 2238 | info->flags = flags & VM_NORESERVE; |
2243 | INIT_LIST_HEAD(&info->shrinklist); | 2239 | INIT_LIST_HEAD(&info->shrinklist); |