diff options
author | Jan Kara <jack@suse.cz> | 2011-06-27 19:18:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-06-27 21:00:13 -0400 |
commit | 08142579b6ca35883c1ed066a2681de6f6917062 (patch) | |
tree | 00735ed37753533f3b645714770b4fb036b5f7e0 | |
parent | 9b679320a5fbf46454011e5c62e0b8991b0956d1 (diff) |
mm: fix assertion mapping->nrpages == 0 in end_writeback()
Under heavy memory and filesystem load, users observe the assertion
mapping->nrpages == 0 in end_writeback() trigger. This can be caused by
page reclaim reclaiming the last page from a mapping in the following
race:
CPU0 CPU1
...
shrink_page_list()
__remove_mapping()
__delete_from_page_cache()
radix_tree_delete()
evict_inode()
truncate_inode_pages()
truncate_inode_pages_range()
pagevec_lookup() - finds nothing
end_writeback()
mapping->nrpages != 0 -> BUG
page->mapping = NULL
mapping->nrpages--
Fix the problem by doing a reliable check of mapping->nrpages under
mapping->tree_lock in end_writeback().
Analyzed by Jay <jinshan.xiong@whamcloud.com>, lost in LKML, and dug out
by Miklos Szeredi <mszeredi@suse.de>.
Cc: Jay <jinshan.xiong@whamcloud.com>
Cc: Miklos Szeredi <mszeredi@suse.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/inode.c | 7 | ||||
-rw-r--r-- | include/linux/fs.h | 1 | ||||
-rw-r--r-- | mm/truncate.c | 5 |
3 files changed, 13 insertions, 0 deletions
diff --git a/fs/inode.c b/fs/inode.c index 0f7e88a7803f..43566d17d1b8 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash); | |||
423 | void end_writeback(struct inode *inode) | 423 | void end_writeback(struct inode *inode) |
424 | { | 424 | { |
425 | might_sleep(); | 425 | might_sleep(); |
426 | /* | ||
427 | * We have to cycle tree_lock here because reclaim can be still in the | ||
428 | * process of removing the last page (in __delete_from_page_cache()) | ||
429 | * and we must not free mapping under it. | ||
430 | */ | ||
431 | spin_lock_irq(&inode->i_data.tree_lock); | ||
426 | BUG_ON(inode->i_data.nrpages); | 432 | BUG_ON(inode->i_data.nrpages); |
433 | spin_unlock_irq(&inode->i_data.tree_lock); | ||
427 | BUG_ON(!list_empty(&inode->i_data.private_list)); | 434 | BUG_ON(!list_empty(&inode->i_data.private_list)); |
428 | BUG_ON(!(inode->i_state & I_FREEING)); | 435 | BUG_ON(!(inode->i_state & I_FREEING)); |
429 | BUG_ON(inode->i_state & I_CLEAR); | 436 | BUG_ON(inode->i_state & I_CLEAR); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e73e2e9ae33..b5b979247863 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -639,6 +639,7 @@ struct address_space { | |||
639 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ | 639 | struct prio_tree_root i_mmap; /* tree of private and shared mappings */ |
640 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ | 640 | struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ |
641 | struct mutex i_mmap_mutex; /* protect tree, count, list */ | 641 | struct mutex i_mmap_mutex; /* protect tree, count, list */ |
642 | /* Protected by tree_lock together with the radix tree */ | ||
642 | unsigned long nrpages; /* number of total pages */ | 643 | unsigned long nrpages; /* number of total pages */ |
643 | pgoff_t writeback_index;/* writeback starts here */ | 644 | pgoff_t writeback_index;/* writeback starts here */ |
644 | const struct address_space_operations *a_ops; /* methods */ | 645 | const struct address_space_operations *a_ops; /* methods */ |
diff --git a/mm/truncate.c b/mm/truncate.c index 29a9b8a5a31a..e13f22efaad7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range); | |||
304 | * @lstart: offset from which to truncate | 304 | * @lstart: offset from which to truncate |
305 | * | 305 | * |
306 | * Called under (and serialised by) inode->i_mutex. | 306 | * Called under (and serialised by) inode->i_mutex. |
307 | * | ||
308 | * Note: When this function returns, there can be a page in the process of | ||
309 | * deletion (inside __delete_from_page_cache()) in the specified range. Thus | ||
310 | * mapping->nrpages can be non-zero when this function returns even after | ||
311 | * truncation of the whole mapping. | ||
307 | */ | 312 | */ |
308 | void truncate_inode_pages(struct address_space *mapping, loff_t lstart) | 313 | void truncate_inode_pages(struct address_space *mapping, loff_t lstart) |
309 | { | 314 | { |