aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inode.c
diff options
context:
space:
mode:
authorMiklos Szeredi <mszeredi@suse.cz>2011-02-23 07:49:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-02-23 22:52:52 -0500
commit2aa15890f3c191326678f1bd68af61ec6b8753ec (patch)
tree347f5fdcd0678b12be92f266cd2a5e7a74749403 /fs/inode.c
parent78794b2cdeac37ac1fd950fc9c4454b56d88ac03 (diff)
mm: prevent concurrent unmap_mapping_range() on the same inode
Michael Leun reported that running parallel opens on a fuse filesystem can trigger a "kernel BUG at mm/truncate.c:475" Gurudas Pai reported the same bug on NFS. The reason is, unmap_mapping_range() is not prepared for more than one concurrent invocation per inode. For example: thread1: going through a big range, stops in the middle of a vma and stores the restart address in vm_truncate_count. thread2: comes in with a small (e.g. single page) unmap request on the same vma, somewhere before restart_address, finds that the vma was already unmapped up to the restart address and happily returns without doing anything. Another scenario would be two big unmap requests, both having to restart the unmapping and each one setting vm_truncate_count to its own value. This could go on forever without any of them being able to finish. Truncate and hole punching already serialize with i_mutex. Other callers of unmap_mapping_range() do not, and it's difficult to get i_mutex protection for all callers. In particular ->d_revalidate(), which calls invalidate_inode_pages2_range() in fuse, may be called with or without i_mutex. This patch adds a new mutex to 'struct address_space' to prevent running multiple concurrent unmap_mapping_range() on the same mapping. [ We'll hopefully get rid of all this with the upcoming mm preemptibility series by Peter Zijlstra, the "mm: Remove i_mmap_mutex lockbreak" patch in particular. But that is for 2.6.39 ] Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> Reported-by: Michael Leun <lkml20101129@newton.leun.net> Reported-by: Gurudas Pai <gurudas.pai@oracle.com> Tested-by: Gurudas Pai <gurudas.pai@oracle.com> Acked-by: Hugh Dickins <hughd@google.com> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..9c2b795ccc93 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 295 call_rcu(&inode->i_rcu, i_callback);
296} 296}
297 297
298void address_space_init_once(struct address_space *mapping)
299{
300 memset(mapping, 0, sizeof(*mapping));
301 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
302 spin_lock_init(&mapping->tree_lock);
303 spin_lock_init(&mapping->i_mmap_lock);
304 INIT_LIST_HEAD(&mapping->private_list);
305 spin_lock_init(&mapping->private_lock);
306 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
307 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
308 mutex_init(&mapping->unmap_mutex);
309}
310EXPORT_SYMBOL(address_space_init_once);
311
298/* 312/*
299 * These are initializations that only need to be done 313 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 314 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 322 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 323 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 324 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 325 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 326 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 327#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 328 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);