aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-06-27 19:18:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-27 21:00:13 -0400
commit08142579b6ca35883c1ed066a2681de6f6917062 (patch)
tree00735ed37753533f3b645714770b4fb036b5f7e0
parent9b679320a5fbf46454011e5c62e0b8991b0956d1 (diff)
mm: fix assertion mapping->nrpages == 0 in end_writeback()
Under heavy memory and filesystem load, users observe the assertion mapping->nrpages == 0 in end_writeback() trigger. This can be caused by page reclaim reclaiming the last page from a mapping in the following race: CPU0 CPU1 ... shrink_page_list() __remove_mapping() __delete_from_page_cache() radix_tree_delete() evict_inode() truncate_inode_pages() truncate_inode_pages_range() pagevec_lookup() - finds nothing end_writeback() mapping->nrpages != 0 -> BUG page->mapping = NULL mapping->nrpages-- Fix the problem by doing a reliable check of mapping->nrpages under mapping->tree_lock in end_writeback(). Analyzed by Jay <jinshan.xiong@whamcloud.com>, lost in LKML, and dug out by Miklos Szeredi <mszeredi@suse.de>. Cc: Jay <jinshan.xiong@whamcloud.com> Cc: Miklos Szeredi <mszeredi@suse.de> Signed-off-by: Jan Kara <jack@suse.cz> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/inode.c7
-rw-r--r--include/linux/fs.h1
-rw-r--r--mm/truncate.c5
3 files changed, 13 insertions, 0 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 0f7e88a7803f..43566d17d1b8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash);
423void end_writeback(struct inode *inode) 423void end_writeback(struct inode *inode)
424{ 424{
425 might_sleep(); 425 might_sleep();
426 /*
427 * We have to cycle tree_lock here because reclaim can be still in the
428 * process of removing the last page (in __delete_from_page_cache())
429 * and we must not free mapping under it.
430 */
431 spin_lock_irq(&inode->i_data.tree_lock);
426 BUG_ON(inode->i_data.nrpages); 432 BUG_ON(inode->i_data.nrpages);
433 spin_unlock_irq(&inode->i_data.tree_lock);
427 BUG_ON(!list_empty(&inode->i_data.private_list)); 434 BUG_ON(!list_empty(&inode->i_data.private_list));
428 BUG_ON(!(inode->i_state & I_FREEING)); 435 BUG_ON(!(inode->i_state & I_FREEING));
429 BUG_ON(inode->i_state & I_CLEAR); 436 BUG_ON(inode->i_state & I_CLEAR);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e73e2e9ae33..b5b979247863 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -639,6 +639,7 @@ struct address_space {
639 struct prio_tree_root i_mmap; /* tree of private and shared mappings */ 639 struct prio_tree_root i_mmap; /* tree of private and shared mappings */
640 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 640 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
641 struct mutex i_mmap_mutex; /* protect tree, count, list */ 641 struct mutex i_mmap_mutex; /* protect tree, count, list */
642 /* Protected by tree_lock together with the radix tree */
642 unsigned long nrpages; /* number of total pages */ 643 unsigned long nrpages; /* number of total pages */
643 pgoff_t writeback_index;/* writeback starts here */ 644 pgoff_t writeback_index;/* writeback starts here */
644 const struct address_space_operations *a_ops; /* methods */ 645 const struct address_space_operations *a_ops; /* methods */
diff --git a/mm/truncate.c b/mm/truncate.c
index 29a9b8a5a31a..e13f22efaad7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range);
304 * @lstart: offset from which to truncate 304 * @lstart: offset from which to truncate
305 * 305 *
306 * Called under (and serialised by) inode->i_mutex. 306 * Called under (and serialised by) inode->i_mutex.
307 *
308 * Note: When this function returns, there can be a page in the process of
309 * deletion (inside __delete_from_page_cache()) in the specified range. Thus
310 * mapping->nrpages can be non-zero when this function returns even after
311 * truncation of the whole mapping.
307 */ 312 */
308void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 313void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
309{ 314{