aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2008-02-08 07:21:59 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-08 12:22:42 -0500
commit535ee2fbf79ab52d26bce3d2e127c9007503581e (patch)
tree9d7cdbd98e481ec2fb6d5ff6eebf65f4d422ec8c /fs
parentf6f21c81464ce52dbeec921bdc2e8b288c491920 (diff)
buffer_head: fix private_list handling
There are two possible races in handling of private_list in buffer cache. 1) When fsync_buffers_list() processes a private_list, it clears b_assoc_mapping and moves buffer to its private list. Now drop_buffers() comes, sees a buffer is on list so it calls __remove_assoc_queue() which complains about b_assoc_mapping being cleared (as it cannot propagate possible IO error). This race has been actually observed in the wild. 2) When fsync_buffers_list() processes a private_list, mark_buffer_dirty_inode() can be called on bh which is already on the private list of fsync_buffers_list(). As buffer is on some list (note that the check is performed without private_lock), it is not readded to the mapping's private_list and after fsync_buffers_list() finishes, we have a dirty buffer which should be on private_list but it isn't. This race has not been reported, probably because most (but not all) callers of mark_buffer_dirty_inode() hold i_mutex and thus are serialized with fsync(). Fix these issues by not clearing b_assoc_map when fsync_buffers_list() moves buffer to a dedicated list and by reinserting buffer in private_list when it is found dirty after we have submitted buffer for IO. We also change the tests whether a buffer is on a private list from !list_empty(&bh->b_assoc_buffers) to bh->b_assoc_map so that they are single word reads and hence lockless checks are safe. Signed-off-by: Jan Kara <jack@suse.cz> Cc: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c23
1 files changed, 19 insertions, 4 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 6f0bddddcf4a..3ebccf4aa7e3 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -678,7 +678,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
678 } else { 678 } else {
679 BUG_ON(mapping->assoc_mapping != buffer_mapping); 679 BUG_ON(mapping->assoc_mapping != buffer_mapping);
680 } 680 }
681 if (list_empty(&bh->b_assoc_buffers)) { 681 if (!bh->b_assoc_map) {
682 spin_lock(&buffer_mapping->private_lock); 682 spin_lock(&buffer_mapping->private_lock);
683 list_move_tail(&bh->b_assoc_buffers, 683 list_move_tail(&bh->b_assoc_buffers,
684 &mapping->private_list); 684 &mapping->private_list);
@@ -794,6 +794,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
794{ 794{
795 struct buffer_head *bh; 795 struct buffer_head *bh;
796 struct list_head tmp; 796 struct list_head tmp;
797 struct address_space *mapping;
797 int err = 0, err2; 798 int err = 0, err2;
798 799
799 INIT_LIST_HEAD(&tmp); 800 INIT_LIST_HEAD(&tmp);
@@ -801,9 +802,14 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
801 spin_lock(lock); 802 spin_lock(lock);
802 while (!list_empty(list)) { 803 while (!list_empty(list)) {
803 bh = BH_ENTRY(list->next); 804 bh = BH_ENTRY(list->next);
805 mapping = bh->b_assoc_map;
804 __remove_assoc_queue(bh); 806 __remove_assoc_queue(bh);
807 /* Avoid race with mark_buffer_dirty_inode() which does
808 * a lockless check and we rely on seeing the dirty bit */
809 smp_mb();
805 if (buffer_dirty(bh) || buffer_locked(bh)) { 810 if (buffer_dirty(bh) || buffer_locked(bh)) {
806 list_add(&bh->b_assoc_buffers, &tmp); 811 list_add(&bh->b_assoc_buffers, &tmp);
812 bh->b_assoc_map = mapping;
807 if (buffer_dirty(bh)) { 813 if (buffer_dirty(bh)) {
808 get_bh(bh); 814 get_bh(bh);
809 spin_unlock(lock); 815 spin_unlock(lock);
@@ -822,8 +828,17 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
822 828
823 while (!list_empty(&tmp)) { 829 while (!list_empty(&tmp)) {
824 bh = BH_ENTRY(tmp.prev); 830 bh = BH_ENTRY(tmp.prev);
825 list_del_init(&bh->b_assoc_buffers);
826 get_bh(bh); 831 get_bh(bh);
832 mapping = bh->b_assoc_map;
833 __remove_assoc_queue(bh);
834 /* Avoid race with mark_buffer_dirty_inode() which does
835 * a lockless check and we rely on seeing the dirty bit */
836 smp_mb();
837 if (buffer_dirty(bh)) {
838 list_add(&bh->b_assoc_buffers,
839 &bh->b_assoc_map->private_list);
840 bh->b_assoc_map = mapping;
841 }
827 spin_unlock(lock); 842 spin_unlock(lock);
828 wait_on_buffer(bh); 843 wait_on_buffer(bh);
829 if (!buffer_uptodate(bh)) 844 if (!buffer_uptodate(bh))
@@ -1195,7 +1210,7 @@ void __brelse(struct buffer_head * buf)
1195void __bforget(struct buffer_head *bh) 1210void __bforget(struct buffer_head *bh)
1196{ 1211{
1197 clear_buffer_dirty(bh); 1212 clear_buffer_dirty(bh);
1198 if (!list_empty(&bh->b_assoc_buffers)) { 1213 if (bh->b_assoc_map) {
1199 struct address_space *buffer_mapping = bh->b_page->mapping; 1214 struct address_space *buffer_mapping = bh->b_page->mapping;
1200 1215
1201 spin_lock(&buffer_mapping->private_lock); 1216 spin_lock(&buffer_mapping->private_lock);
@@ -3022,7 +3037,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3022 do { 3037 do {
3023 struct buffer_head *next = bh->b_this_page; 3038 struct buffer_head *next = bh->b_this_page;
3024 3039
3025 if (!list_empty(&bh->b_assoc_buffers)) 3040 if (bh->b_assoc_map)
3026 __remove_assoc_queue(bh); 3041 __remove_assoc_queue(bh);
3027 bh = next; 3042 bh = next;
3028 } while (bh != head); 3043 } while (bh != head);